Skip to content

Commit

Permalink
Improve remove_tags
Browse files Browse the repository at this point in the history
  • Loading branch information
Josef-Friedrich committed Jan 20, 2024
1 parent 02aa354 commit 1adf83d
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 28 deletions.
3 changes: 2 additions & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ Welcome to mscxyz's documentation!
Contents:

.. toctree::
:maxdepth: 3
:maxdepth: 5

cli
main-module
submodule-lyrics
submodule-meta
submodule-score
submodule-style
submodule-xml
other-submodules

******************
Expand Down
4 changes: 4 additions & 0 deletions docs/submodule-xml.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Submodule ``xml``
^^^^^^^^^^^^^^^^^

.. automodule:: mscxyz.xml
3 changes: 2 additions & 1 deletion mscxyz/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ class Score:
"""Score files created with MuseScore 4 have a separate style file."""

xml_root: _Element
"""The root element of the XML tree. See the `lxml API <https://lxml.de/api.html>`_."""
"""The root element of the XML tree. It is the ``<museScore version="X.X">`` Tag.
See the `lxml API <https://lxml.de/api.html>`_."""

xml: Xml

Expand Down
4 changes: 2 additions & 2 deletions mscxyz/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@ def __get_attributes(self, style_name: str) -> _Attrib:
def clean(self) -> None:
"""Remove the style, the layout breaks, the stem directions and the
``font``, ``b``, ``i``, ``pos``, ``offset`` tags"""
self.score.remove_tags_by_xpath(
"/museScore/Score/Style", "//LayoutBreak", "//StemDirection"
self.score.xml.remove_tags(
"./Score/Style", ".//LayoutBreak", ".//StemDirection"
)
strip_tags(self.score.xml_root, "font", "b", "i", "pos", "offset")

Expand Down
108 changes: 84 additions & 24 deletions mscxyz/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import lxml
import lxml.etree
from lxml.etree import _Element, _ElementTree
from lxml.etree import _Element, _ElementTree, strip_tags

if typing.TYPE_CHECKING:
from lxml.etree import _DictAnyStr, _XPathObject
Expand Down Expand Up @@ -97,45 +97,62 @@ def write(self, path: str | Path, element: ElementLike = None) -> None:
with open(path, "w") as document:
document.write(self.tostring(element))

def find_safe(self, path: str, element: ElementLike = None) -> _Element:
def find(self, element_path: str, element: ElementLike = None) -> _Element | None:
"""
:param element_path: A `element path expression
<https://docs.python.org/3/library/xml.etree.elementtree.html#elementtree-xpath>`_
with limited XPath support, for example ``.//Note`` selects all ``<Note>`` elements.
"""
return self.__get_element(element).find(element_path)

def find_safe(self, element_path: str, element: ElementLike = None) -> _Element:
"""
Find an element in the given XML element using the specified element path.
:param path: The path to the desired element.
:param element_path: A `element path expression
<https://docs.python.org/3/library/xml.etree.elementtree.html#elementtree-xpath>`_
with limited XPath support, for example ``.//Note`` selects all ``<Note>`` elements.
:param element: The XML element to search within.
:return: The found element.
:raises ValueError: If the element is not found.
"""

element = self.__get_element(element)
result: _Element | None = element.find(path)
result: _Element | None = element.find(element_path)
if result is None:
raise ValueError(f"Path {path} not found in element {element}!")
raise ValueError(f"Path {element_path} not found in element {element}!")
return result

def xpath(self, path: str, element: ElementLike = None) -> _Element | None:
def findall(self, element_path: str, element: ElementLike = None) -> list[_Element]:
"""
:param element_path: A `element path expression
<https://docs.python.org/3/library/xml.etree.elementtree.html#elementtree-xpath>`_
with limited XPath support, for example ``.//Note`` selects all ``<Note>`` elements.
"""
return self.__get_element(element).findall(element_path)

def xpath(self, xpath: str, element: ElementLike = None) -> _Element | None:
"""
Find the first matching element in the XML tree using XPath.
:param path: The XPath expression to search for.
:param xpath: The XPath expression to search for.
:param element: The root element of the XML tree.
:return: The first matching element or None if no match is found.
"""
element = self.__get_element(element)
output: list[_Element] | None = self.xpathall(path, element)
output: list[_Element] | None = self.xpathall(xpath, element)
if output and len(output) > 0:
return output[0]

return None

def xpath_safe(self, path: str, element: ElementLike = None) -> _Element:
def xpath_safe(self, xpath: str, element: ElementLike = None) -> _Element:
"""
Safely retrieves the first matching XML element using the given XPath expression.
:param path: The XPath expression to match elements.
:param xpath: The XPath expression to match elements.
:param element: The XML element to search within.
:return: The first matching XML element.XPath
Expand All @@ -144,27 +161,29 @@ def xpath_safe(self, path: str, element: ElementLike = None) -> _Element:
"""
element = self.__get_element(element)
output: list[_Element] = self.xpathall_safe(
path,
xpath,
element,
)
if len(output) > 1:
raise ValueError(
f"XPath “{path}” found more than one element in {element}!"
f"XPath “{xpath}” found more than one element in {element}!"
)
return output[0]

def xpathall(self, path: str, element: ElementLike = None) -> list[_Element] | None:
def xpathall(
self, xpath: str, element: ElementLike = None
) -> list[_Element] | None:
"""
Returns a list of elements matching the given XPath expression.
:param path: The XPath expression to match elements.
:param xpath: The XPath expression to match elements.
:param element: The XML element to search within.
:return: A list of elements matching the XPath expression, or None if no
elements are found.
"""
element = self.__get_element(element)
result: _XPathObject = element.xpath(path)
result: _XPathObject = element.xpath(xpath)
output: list[_Element] = []

if isinstance(result, list):
Expand All @@ -177,22 +196,22 @@ def xpathall(self, path: str, element: ElementLike = None) -> list[_Element] | N

return None

def xpathall_safe(self, path: str, element: ElementLike = None) -> list[_Element]:
def xpathall_safe(self, xpath: str, element: ElementLike = None) -> list[_Element]:
"""
Safely retrieves a list of elements matching the given XPath expression within
the specified element.
:param xpath: The XPath expression to match elements.
:param element: The XML element to search within.
:param path: The XPath expression to match elements.
:return: A list of elements matching the XPath expression.
:raises ValueError: If the XPath expression is not found in the element.
"""
element = self.__get_element(element)
output: list[_Element] | None = self.xpathall(path, element)
output: list[_Element] | None = self.xpathall(xpath, element)
if output is None:
raise ValueError(f"XPath “{path}” not found in element {element}!")
raise ValueError(f"XPath “{xpath}” not found in element {element}!")
return output

def get_text(self, element: ElementLike = None) -> str | None:
Expand Down Expand Up @@ -226,18 +245,21 @@ def get_text_safe(self, element: ElementLike = None) -> str:
return element.text

def set_text(
self, path: str, value: str | int | float, element: ElementLike = None
self, element_path: str, value: str | int | float, element: ElementLike = None
) -> None:
"""
Set the text value of an XML element at the specified element path.
:param element: The XML element to modify.
:param path: The element path expression to locate the target element.
:param element_path: A `element path expression
<https://docs.python.org/3/library/xml.etree.elementtree.html#elementtree-xpath>`_
with limited XPath support to locate the target element,
for example ``.//Note`` selects all ``<Note>`` elements.
:param value: The new value to set for the element's text.
:param element: The XML element to modify.
:return: None
"""
self.find_safe(path, element).text = str(value)
self.find_safe(element_path, element).text = str(value)

@staticmethod
def replace(old: _Element, new: _Element) -> None:
Expand Down Expand Up @@ -276,3 +298,41 @@ def create_sub_element(
if text:
element.text = text
return element

def remove_tags(self, *element_paths: str) -> Xml:
"""
:param element_path: A `element path expression
<https://docs.python.org/3/library/xml.etree.elementtree.html#elementtree-xpath>`_
with limited XPath support to locate the target element,
for example ``.//Note`` selects all ``<Note>`` elements.
"""
for path in element_paths:
for element in self.findall(path):
self.remove(element)
return self

def remove_tags_by_xpath(self, *xpath_strings: str) -> None:
"""Remove tags by xpath strings.
:param xpath_strings: A xpath string.
.. code:: Python
tree.remove_tags_by_xpath(
'/museScore/Score/Style', '//LayoutBreak', '//StemDirection'
)
"""
for xpath_string in xpath_strings:
x: _XPathObject = self.root.xpath(xpath_string)
if isinstance(x, list):
for rm in x:
if isinstance(rm, _Element):
p: _Element | None = rm.getparent()
if isinstance(p, _Element):
p.remove(rm)

def strip_tags(self, *tag_names: str) -> Xml:
"""TODO remove. Use remove_tags instead."""
strip_tags(self.root, *tag_names)
return self
51 changes: 51 additions & 0 deletions tests/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
xml = Xml.new(xml_file)


@pytest.fixture
def custom_xml() -> Xml:
return Xml(Xml.parse_string("<root><a><b/><c/></a><d>some text<e/></d></root>"))


def test_read() -> None:
element = xml.parse_file(xml_file)
assert element.tag == "museScore"
Expand Down Expand Up @@ -67,3 +72,49 @@ def test_xml_write(tmp_path: Path) -> None:
'<?xml version="1.0" encoding="UTF-8"?>\n'
"<root><a><b/><c/></a><d><e/></d></root>\n"
)


class TestRemove:
def test_element_with_childs(self, custom_xml: Xml) -> None:
assert (
"<root><d>some text<e/></d></root>"
in custom_xml.remove_tags("a").tostring()
)

def test_dot_double_slash_notation(self, custom_xml: Xml) -> None:
assert (
"<root><d>some text<e/></d></root>"
in custom_xml.remove_tags(".//a").tostring()
)

def test_double_slash_notation(self, custom_xml: Xml) -> None:
with pytest.raises(SyntaxError):
custom_xml.remove_tags("//b")

def test_childs(self, custom_xml: Xml) -> None:
assert (
"<root><a/><d>some text<e/></d></root>"
in custom_xml.remove_tags(".//b", ".//c").tostring()
)

def test_with_text(self, custom_xml: Xml) -> None:
assert (
"<root><a><b/><c/></a></root>" in custom_xml.remove_tags(".//d").tostring()
)

def test_navigate_in_tree(self, custom_xml: Xml) -> None:
assert "<root><a><c/></a>" in custom_xml.remove_tags("./a/b").tostring()


class TestStripTags:
def test_element_with_childs(self, custom_xml: Xml) -> None:
custom_xml.strip_tags("a")
assert "<root><b/><c/><d>some text<e/></d></root>" in custom_xml.tostring()

def test_child_element(self, custom_xml: Xml) -> None:
custom_xml.strip_tags("b", "c", "d")
assert "<root><a/>some text<e/></root>" in custom_xml.tostring()

def test_containing_text(self, custom_xml: Xml) -> None:
custom_xml.strip_tags("d")
assert "<root><a><b/><c/></a>some text<e/></root>" in custom_xml.tostring()

0 comments on commit 1adf83d

Please sign in to comment.