From 2dacaf7da287475e84a8e82413a7a32b5911e314 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 24 Nov 2024 13:24:46 +0530 Subject: [PATCH] DOCX Input: When some text has multiple footnotes insert a space between the consecutive foot note numbers so that they are distinct. Fixes #2089433 [Separate several footnote/endnote references](https://bugs.launchpad.net/calibre/+bug/2089433) --- src/calibre/ebooks/docx/cleanup.py | 10 +++++++++- src/calibre/ebooks/docx/to_html.py | 4 +++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index 322e44ba9af9..5507a207724b 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -113,11 +113,19 @@ def wrap_contents(tag_name, elem): elem.append(wrapper) -def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath): +def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath, uuid): # Apply vertical-align for span in root.xpath('//span[@data-docx-vert]'): wrap_contents(span.attrib.pop('data-docx-vert'), span) + for span in root.xpath(f'//*[@data-noteref-container="{uuid}"]'): + span.attrib.pop('data-noteref-container') + parent = span.getparent() + idx = parent.index(span) + if idx + 1 < len(parent) and (ns := parent[idx+1]) and hasattr(ns, 'get') and ns.get('data-noteref-container'): + if len(span) and not span[-1].tail: + span[-1].tail = '\xa0' + # Move
s outside paragraphs, if possible. pancestor = XPath('|'.join('ancestor::%s[1]' % x for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) for hr in root.xpath('//span/hr'): diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index a41daae32e4e..54b957857355 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -75,6 +75,7 @@ def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, n self.dest_dir = dest_dir or os.getcwd() self.mi = self.docx.metadata self.body = BODY() + self.uuid = uuid.uuid4().hex self.theme = Theme(self.namespace) self.settings = Settings(self.namespace) self.tables = Tables(self.namespace) @@ -241,7 +242,7 @@ def __call__(self): self.fields.polish_markup(self.object_map) self.log.debug('Cleaning up redundant markup generated by Word') - self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath) + self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath, self.uuid) return self.write(doc) @@ -713,6 +714,7 @@ def convert_run(self, run): l.set('role', 'doc-noteref') text.add_elem(l) ans.append(text.elem) + ans.set('data-noteref-container', self.uuid) elif self.namespace.is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces))