Skip to content

Commit

Permalink
DOCX Input: When some text has multiple footnotes insert a space betw…
Browse files Browse the repository at this point in the history
…een the consecutive foot note numbers so that they are distinct. Fixes #2089433 [Separate several footnote/endnote references](https://bugs.launchpad.net/calibre/+bug/2089433)
  • Loading branch information
kovidgoyal committed Nov 24, 2024
1 parent 90b33c9 commit 2dacaf7
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
10 changes: 9 additions & 1 deletion src/calibre/ebooks/docx/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,19 @@ def wrap_contents(tag_name, elem):
elem.append(wrapper)


def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath):
def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath, uuid):
# Apply vertical-align
for span in root.xpath('//span[@data-docx-vert]'):
wrap_contents(span.attrib.pop('data-docx-vert'), span)

for span in root.xpath(f'//*[@data-noteref-container="{uuid}"]'):
span.attrib.pop('data-noteref-container')
parent = span.getparent()
idx = parent.index(span)
if idx + 1 < len(parent) and (ns := parent[idx+1]) and hasattr(ns, 'get') and ns.get('data-noteref-container'):
if len(span) and not span[-1].tail:
span[-1].tail = '\xa0'

# Move <hr>s outside paragraphs, if possible.
pancestor = XPath('|'.join('ancestor::%s[1]' % x for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6')))
for hr in root.xpath('//span/hr'):
Expand Down
4 changes: 3 additions & 1 deletion src/calibre/ebooks/docx/to_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, n
self.dest_dir = dest_dir or os.getcwd()
self.mi = self.docx.metadata
self.body = BODY()
self.uuid = uuid.uuid4().hex
self.theme = Theme(self.namespace)
self.settings = Settings(self.namespace)
self.tables = Tables(self.namespace)
Expand Down Expand Up @@ -241,7 +242,7 @@ def __call__(self):
self.fields.polish_markup(self.object_map)

self.log.debug('Cleaning up redundant markup generated by Word')
self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath)
self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath, self.uuid)

return self.write(doc)

Expand Down Expand Up @@ -713,6 +714,7 @@ def convert_run(self, run):
l.set('role', 'doc-noteref')
text.add_elem(l)
ans.append(text.elem)
ans.set('data-noteref-container', self.uuid)
elif self.namespace.is_tag(child, 'w:tab'):
spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6))
text.add_elem(SPAN(NBSP * spaces))
Expand Down

0 comments on commit 2dacaf7

Please sign in to comment.