From 1adf83d388f8f8488d9c79f90a56c3cdee02c686 Mon Sep 17 00:00:00 2001 From: Josef Friedrich Date: Sat, 20 Jan 2024 17:47:24 +0100 Subject: [PATCH] Improve remove_tags --- docs/index.rst | 3 +- docs/submodule-xml.rst | 4 ++ mscxyz/score.py | 3 +- mscxyz/style.py | 4 +- mscxyz/xml.py | 108 ++++++++++++++++++++++++++++++++--------- tests/test_xml.py | 51 +++++++++++++++++++ 6 files changed, 145 insertions(+), 28 deletions(-) create mode 100644 docs/submodule-xml.rst diff --git a/docs/index.rst b/docs/index.rst index 1c00066..9d1ecdc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -5,7 +5,7 @@ Welcome to mscxyz's documentation! Contents: .. toctree:: - :maxdepth: 3 + :maxdepth: 5 cli main-module @@ -13,6 +13,7 @@ Contents: submodule-meta submodule-score submodule-style + submodule-xml other-submodules ****************** diff --git a/docs/submodule-xml.rst b/docs/submodule-xml.rst new file mode 100644 index 0000000..c81e611 --- /dev/null +++ b/docs/submodule-xml.rst @@ -0,0 +1,4 @@ +Submodule ``xml`` +^^^^^^^^^^^^^^^^^ + +.. automodule:: mscxyz.xml diff --git a/mscxyz/score.py b/mscxyz/score.py index 8340d44..c5889b7 100644 --- a/mscxyz/score.py +++ b/mscxyz/score.py @@ -45,7 +45,8 @@ class Score: """Score files created with MuseScore 4 have a separate style file.""" xml_root: _Element - """The root element of the XML tree. See the `lxml API `_.""" + """The root element of the XML tree. It is the ```` Tag. + See the `lxml API `_.""" xml: Xml diff --git a/mscxyz/style.py b/mscxyz/style.py index b3258fd..650f907 100644 --- a/mscxyz/style.py +++ b/mscxyz/style.py @@ -243,8 +243,8 @@ def __get_attributes(self, style_name: str) -> _Attrib: def clean(self) -> None: """Remove the style, the layout breaks, the stem directions and the ``font``, ``b``, ``i``, ``pos``, ``offset`` tags""" - self.score.remove_tags_by_xpath( - "/museScore/Score/Style", "//LayoutBreak", "//StemDirection" + self.score.xml.remove_tags( + "./Score/Style", ".//LayoutBreak", ".//StemDirection" ) strip_tags(self.score.xml_root, "font", "b", "i", "pos", "offset") diff --git a/mscxyz/xml.py b/mscxyz/xml.py index 1dfaf65..e8b9755 100644 --- a/mscxyz/xml.py +++ b/mscxyz/xml.py @@ -7,7 +7,7 @@ import lxml import lxml.etree -from lxml.etree import _Element, _ElementTree +from lxml.etree import _Element, _ElementTree, strip_tags if typing.TYPE_CHECKING: from lxml.etree import _DictAnyStr, _XPathObject @@ -97,45 +97,62 @@ def write(self, path: str | Path, element: ElementLike = None) -> None: with open(path, "w") as document: document.write(self.tostring(element)) - def find_safe(self, path: str, element: ElementLike = None) -> _Element: + def find(self, element_path: str, element: ElementLike = None) -> _Element | None: + """ + :param element_path: A `element path expression + `_ + with limited XPath support, for example ``.//Note`` selects all ```` elements. + """ + return self.__get_element(element).find(element_path) + + def find_safe(self, element_path: str, element: ElementLike = None) -> _Element: """ Find an element in the given XML element using the specified element path. - :param path: The path to the desired element. + :param element_path: A `element path expression + `_ + with limited XPath support, for example ``.//Note`` selects all ```` elements. :param element: The XML element to search within. :return: The found element. :raises ValueError: If the element is not found. """ - element = self.__get_element(element) - result: _Element | None = element.find(path) + result: _Element | None = element.find(element_path) if result is None: - raise ValueError(f"Path {path} not found in element {element}!") + raise ValueError(f"Path {element_path} not found in element {element}!") return result - def xpath(self, path: str, element: ElementLike = None) -> _Element | None: + def findall(self, element_path: str, element: ElementLike = None) -> list[_Element]: + """ + :param element_path: A `element path expression + `_ + with limited XPath support, for example ``.//Note`` selects all ```` elements. + """ + return self.__get_element(element).findall(element_path) + + def xpath(self, xpath: str, element: ElementLike = None) -> _Element | None: """ Find the first matching element in the XML tree using XPath. - :param path: The XPath expression to search for. + :param xpath: The XPath expression to search for. :param element: The root element of the XML tree. :return: The first matching element or None if no match is found. """ element = self.__get_element(element) - output: list[_Element] | None = self.xpathall(path, element) + output: list[_Element] | None = self.xpathall(xpath, element) if output and len(output) > 0: return output[0] return None - def xpath_safe(self, path: str, element: ElementLike = None) -> _Element: + def xpath_safe(self, xpath: str, element: ElementLike = None) -> _Element: """ Safely retrieves the first matching XML element using the given XPath expression. - :param path: The XPath expression to match elements. + :param xpath: The XPath expression to match elements. :param element: The XML element to search within. :return: The first matching XML element.XPath @@ -144,27 +161,29 @@ def xpath_safe(self, path: str, element: ElementLike = None) -> _Element: """ element = self.__get_element(element) output: list[_Element] = self.xpathall_safe( - path, + xpath, element, ) if len(output) > 1: raise ValueError( - f"XPath “{path}” found more than one element in {element}!" + f"XPath “{xpath}” found more than one element in {element}!" ) return output[0] - def xpathall(self, path: str, element: ElementLike = None) -> list[_Element] | None: + def xpathall( + self, xpath: str, element: ElementLike = None + ) -> list[_Element] | None: """ Returns a list of elements matching the given XPath expression. - :param path: The XPath expression to match elements. + :param xpath: The XPath expression to match elements. :param element: The XML element to search within. :return: A list of elements matching the XPath expression, or None if no elements are found. """ element = self.__get_element(element) - result: _XPathObject = element.xpath(path) + result: _XPathObject = element.xpath(xpath) output: list[_Element] = [] if isinstance(result, list): @@ -177,22 +196,22 @@ def xpathall(self, path: str, element: ElementLike = None) -> list[_Element] | N return None - def xpathall_safe(self, path: str, element: ElementLike = None) -> list[_Element]: + def xpathall_safe(self, xpath: str, element: ElementLike = None) -> list[_Element]: """ Safely retrieves a list of elements matching the given XPath expression within the specified element. + :param xpath: The XPath expression to match elements. :param element: The XML element to search within. - :param path: The XPath expression to match elements. :return: A list of elements matching the XPath expression. :raises ValueError: If the XPath expression is not found in the element. """ element = self.__get_element(element) - output: list[_Element] | None = self.xpathall(path, element) + output: list[_Element] | None = self.xpathall(xpath, element) if output is None: - raise ValueError(f"XPath “{path}” not found in element {element}!") + raise ValueError(f"XPath “{xpath}” not found in element {element}!") return output def get_text(self, element: ElementLike = None) -> str | None: @@ -226,18 +245,21 @@ def get_text_safe(self, element: ElementLike = None) -> str: return element.text def set_text( - self, path: str, value: str | int | float, element: ElementLike = None + self, element_path: str, value: str | int | float, element: ElementLike = None ) -> None: """ Set the text value of an XML element at the specified element path. - :param element: The XML element to modify. - :param path: The element path expression to locate the target element. + :param element_path: A `element path expression + `_ + with limited XPath support to locate the target element, + for example ``.//Note`` selects all ```` elements. :param value: The new value to set for the element's text. + :param element: The XML element to modify. :return: None """ - self.find_safe(path, element).text = str(value) + self.find_safe(element_path, element).text = str(value) @staticmethod def replace(old: _Element, new: _Element) -> None: @@ -276,3 +298,41 @@ def create_sub_element( if text: element.text = text return element + + def remove_tags(self, *element_paths: str) -> Xml: + """ + :param element_path: A `element path expression + `_ + with limited XPath support to locate the target element, + for example ``.//Note`` selects all ```` elements. + """ + for path in element_paths: + for element in self.findall(path): + self.remove(element) + return self + + def remove_tags_by_xpath(self, *xpath_strings: str) -> None: + """Remove tags by xpath strings. + + :param xpath_strings: A xpath string. + + .. code:: Python + + tree.remove_tags_by_xpath( + '/museScore/Score/Style', '//LayoutBreak', '//StemDirection' + ) + + """ + for xpath_string in xpath_strings: + x: _XPathObject = self.root.xpath(xpath_string) + if isinstance(x, list): + for rm in x: + if isinstance(rm, _Element): + p: _Element | None = rm.getparent() + if isinstance(p, _Element): + p.remove(rm) + + def strip_tags(self, *tag_names: str) -> Xml: + """TODO remove. Use remove_tags instead.""" + strip_tags(self.root, *tag_names) + return self diff --git a/tests/test_xml.py b/tests/test_xml.py index 889f476..b85364d 100644 --- a/tests/test_xml.py +++ b/tests/test_xml.py @@ -16,6 +16,11 @@ xml = Xml.new(xml_file) +@pytest.fixture +def custom_xml() -> Xml: + return Xml(Xml.parse_string("some text")) + + def test_read() -> None: element = xml.parse_file(xml_file) assert element.tag == "museScore" @@ -67,3 +72,49 @@ def test_xml_write(tmp_path: Path) -> None: '\n' "\n" ) + + +class TestRemove: + def test_element_with_childs(self, custom_xml: Xml) -> None: + assert ( + "some text" + in custom_xml.remove_tags("a").tostring() + ) + + def test_dot_double_slash_notation(self, custom_xml: Xml) -> None: + assert ( + "some text" + in custom_xml.remove_tags(".//a").tostring() + ) + + def test_double_slash_notation(self, custom_xml: Xml) -> None: + with pytest.raises(SyntaxError): + custom_xml.remove_tags("//b") + + def test_childs(self, custom_xml: Xml) -> None: + assert ( + "some text" + in custom_xml.remove_tags(".//b", ".//c").tostring() + ) + + def test_with_text(self, custom_xml: Xml) -> None: + assert ( + "" in custom_xml.remove_tags(".//d").tostring() + ) + + def test_navigate_in_tree(self, custom_xml: Xml) -> None: + assert "" in custom_xml.remove_tags("./a/b").tostring() + + +class TestStripTags: + def test_element_with_childs(self, custom_xml: Xml) -> None: + custom_xml.strip_tags("a") + assert "some text" in custom_xml.tostring() + + def test_child_element(self, custom_xml: Xml) -> None: + custom_xml.strip_tags("b", "c", "d") + assert "some text" in custom_xml.tostring() + + def test_containing_text(self, custom_xml: Xml) -> None: + custom_xml.strip_tags("d") + assert "some text" in custom_xml.tostring()