Skip to content

Commit

Permalink
try to make the code more robust (and use less memory)
Browse files Browse the repository at this point in the history
  • Loading branch information
rbri committed Nov 20, 2024
1 parent abeae64 commit ea87ce4
Show file tree
Hide file tree
Showing 4 changed files with 317 additions and 15 deletions.
4 changes: 1 addition & 3 deletions src/main/java/org/htmlunit/html/DomElement.java
Original file line number Diff line number Diff line change
Expand Up @@ -656,9 +656,7 @@ protected List<E> provideElements() {
*/
public <E extends HtmlElement> List<E> getStaticElementsByTagName(final String tagName) {
final List<E> res = new ArrayList<>();
for (final Iterator<HtmlElement> iterator
= this.new DescendantElementsIterator<>(HtmlElement.class);
iterator.hasNext();) {
for (final Iterator<HtmlElement> iterator = this.new DescendantHtmlElementsIterator(); iterator.hasNext();) {
final HtmlElement elem = iterator.next();
if (elem.getLocalName().equalsIgnoreCase(tagName)) {
res.add((E) elem);
Expand Down
317 changes: 311 additions & 6 deletions src/main/java/org/htmlunit/html/DomNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -1047,9 +1047,7 @@ private void fireAddition(final DomNode domNode) {

// a node that is already "complete" (ie not being parsed) and not yet attached
if (!domNode.isBodyParsed() && !wasAlreadyAttached) {
for (final Iterator<DomNode> iterator
= domNode.new DescendantElementsIterator<>(DomNode.class);
iterator.hasNext();) {
for (final Iterator<DomNode> iterator = domNode.new DescendantDomNodesIterator(); iterator.hasNext();) {
final DomNode child = iterator.next();
child.attachedToPage_ = true;
child.onAllChildrenAddedToPage(true);
Expand Down Expand Up @@ -1359,7 +1357,7 @@ public void remove() {
* @return an {@link Iterable} that will recursively iterate over all of this node's descendants
*/
public final Iterable<DomNode> getDescendants() {
return () -> new DescendantElementsIterator<>(DomNode.class);
return () -> new DescendantDomNodesIterator();
}

/**
Expand All @@ -1371,7 +1369,7 @@ public final Iterable<DomNode> getDescendants() {
* @see #getDomElementDescendants()
*/
public final Iterable<HtmlElement> getHtmlElementDescendants() {
return () -> new DescendantElementsIterator<>(HtmlElement.class);
return () -> new DescendantHtmlElementsIterator();
}

/**
Expand All @@ -1383,13 +1381,17 @@ public final Iterable<HtmlElement> getHtmlElementDescendants() {
* @see #getHtmlElementDescendants()
*/
public final Iterable<DomElement> getDomElementDescendants() {
return () -> new DescendantElementsIterator<>(DomElement.class);
return () -> new DescendantDomElementsIterator();
}

/**
* Iterates over all descendants of a specific type, in document order.
* @param <T> the type of nodes over which to iterate
*
* @deprecated as of version 4.7.0; use {@link DescendantDomNodesIterator},
* {@link DescendantDomElementsIterator}, or {@link DescendantHtmlElementsIterator} instead.
*/
@Deprecated
protected class DescendantElementsIterator<T extends DomNode> implements Iterator<T> {

private DomNode currentNode_;
Expand Down Expand Up @@ -1494,6 +1496,309 @@ private DomNode getNextDomSibling(final DomNode element) {
}
}

/**
* Iterates over all descendants DomNodes, in document order.
*/
protected final class DescendantDomNodesIterator implements Iterator<DomNode> {
private DomNode currentNode_;
private DomNode nextNode_;

/**
* Creates a new instance which iterates over the specified node type.
*/
public DescendantDomNodesIterator() {
nextNode_ = getFirstChildElement(DomNode.this);
}

/** {@inheritDoc} */
@Override
public boolean hasNext() {
return nextNode_ != null;
}

/** {@inheritDoc} */
@Override
public DomNode next() {
return nextNode();
}

/** {@inheritDoc} */
@Override
public void remove() {
if (currentNode_ == null) {
throw new IllegalStateException("Unable to remove current node, because there is no current node.");
}
final DomNode current = currentNode_;
while (nextNode_ != null && current.isAncestorOf(nextNode_)) {
next();
}
current.remove();
}

/** @return the next node, if there is one */
@SuppressWarnings("unchecked")
public DomNode nextNode() {
currentNode_ = nextNode_;

DomNode next = getFirstChildElement(nextNode_);
if (next == null) {
next = getNextDomSibling(nextNode_);
}
if (next == null) {
next = getNextElementUpwards(nextNode_);
}
nextNode_ = next;

return currentNode_;
}

private DomNode getNextElementUpwards(final DomNode startingNode) {
if (startingNode == DomNode.this) {
return null;
}

DomNode parent = startingNode.getParentNode();
while (parent != null && parent != DomNode.this) {
DomNode next = parent.getNextSibling();
while (next != null && !isAccepted(next)) {
next = next.getNextSibling();
}
if (next != null) {
return next;
}
parent = parent.getParentNode();
}
return null;
}

private DomNode getFirstChildElement(final DomNode parent) {
DomNode node = parent.getFirstChild();
while (node != null && !isAccepted(node)) {
node = node.getNextSibling();
}
return node;
}

/**
* Indicates if the node is accepted. If not it won't be explored at all.
* @param node the node to test
* @return {@code true} if accepted
*/
private boolean isAccepted(final DomNode node) {
return DomNode.class.isAssignableFrom(node.getClass());
}

private DomNode getNextDomSibling(final DomNode element) {
DomNode node = element.getNextSibling();
while (node != null && !isAccepted(node)) {
node = node.getNextSibling();
}
return node;
}
}

/**
* Iterates over all descendants DomTypes, in document order.
*/
protected final class DescendantDomElementsIterator implements Iterator<DomElement> {
private DomNode currentNode_;
private DomNode nextNode_;

/**
* Creates a new instance which iterates over the specified node type.
*/
public DescendantDomElementsIterator() {
nextNode_ = getFirstChildElement(DomNode.this);
}

/** {@inheritDoc} */
@Override
public boolean hasNext() {
return nextNode_ != null;
}

/** {@inheritDoc} */
@Override
public DomElement next() {
return nextNode();
}

/** {@inheritDoc} */
@Override
public void remove() {
if (currentNode_ == null) {
throw new IllegalStateException("Unable to remove current node, because there is no current node.");
}
final DomNode current = currentNode_;
while (nextNode_ != null && current.isAncestorOf(nextNode_)) {
next();
}
current.remove();
}

/** @return the next node, if there is one */
@SuppressWarnings("unchecked")
public DomElement nextNode() {
currentNode_ = nextNode_;

DomNode next = getFirstChildElement(nextNode_);
if (next == null) {
next = getNextDomSibling(nextNode_);
}
if (next == null) {
next = getNextElementUpwards(nextNode_);
}
nextNode_ = next;

return (DomElement) currentNode_;
}

private DomNode getNextElementUpwards(final DomNode startingNode) {
if (startingNode == DomNode.this) {
return null;
}

DomNode parent = startingNode.getParentNode();
while (parent != null && parent != DomNode.this) {
DomNode next = parent.getNextSibling();
while (next != null && !isAccepted(next)) {
next = next.getNextSibling();
}
if (next != null) {
return next;
}
parent = parent.getParentNode();
}
return null;
}

private DomNode getFirstChildElement(final DomNode parent) {
DomNode node = parent.getFirstChild();
while (node != null && !isAccepted(node)) {
node = node.getNextSibling();
}
return node;
}

/**
* Indicates if the node is accepted. If not it won't be explored at all.
* @param node the node to test
* @return {@code true} if accepted
*/
private boolean isAccepted(final DomNode node) {
return DomElement.class.isAssignableFrom(node.getClass());
}

private DomNode getNextDomSibling(final DomNode element) {
DomNode node = element.getNextSibling();
while (node != null && !isAccepted(node)) {
node = node.getNextSibling();
}
return node;
}
}

/**
* Iterates over all descendants HtmlElements, in document order.
*/
protected final class DescendantHtmlElementsIterator implements Iterator<HtmlElement> {
private DomNode currentNode_;
private DomNode nextNode_;

/**
* Creates a new instance which iterates over the specified node type.
*/
public DescendantHtmlElementsIterator() {
nextNode_ = getFirstChildElement(DomNode.this);
}

/** {@inheritDoc} */
@Override
public boolean hasNext() {
return nextNode_ != null;
}

/** {@inheritDoc} */
@Override
public HtmlElement next() {
return nextNode();
}

/** {@inheritDoc} */
@Override
public void remove() {
if (currentNode_ == null) {
throw new IllegalStateException("Unable to remove current node, because there is no current node.");
}
final DomNode current = currentNode_;
while (nextNode_ != null && current.isAncestorOf(nextNode_)) {
next();
}
current.remove();
}

/** @return the next node, if there is one */
@SuppressWarnings("unchecked")
public HtmlElement nextNode() {
currentNode_ = nextNode_;

DomNode next = getFirstChildElement(nextNode_);
if (next == null) {
next = getNextDomSibling(nextNode_);
}
if (next == null) {
next = getNextElementUpwards(nextNode_);
}
nextNode_ = next;

return (HtmlElement) currentNode_;
}

private DomNode getNextElementUpwards(final DomNode startingNode) {
if (startingNode == DomNode.this) {
return null;
}

DomNode parent = startingNode.getParentNode();
while (parent != null && parent != DomNode.this) {
DomNode next = parent.getNextSibling();
while (next != null && !isAccepted(next)) {
next = next.getNextSibling();
}
if (next != null) {
return next;
}
parent = parent.getParentNode();
}
return null;
}

private DomNode getFirstChildElement(final DomNode parent) {
DomNode node = parent.getFirstChild();
while (node != null && !isAccepted(node)) {
node = node.getNextSibling();
}
return node;
}

/**
* Indicates if the node is accepted. If not it won't be explored at all.
* @param node the node to test
* @return {@code true} if accepted
*/
private boolean isAccepted(final DomNode node) {
return HtmlElement.class.isAssignableFrom(node.getClass());
}

private DomNode getNextDomSibling(final DomNode element) {
DomNode node = element.getNextSibling();
while (node != null && !isAccepted(node)) {
node = node.getNextSibling();
}
return node;
}
}

/**
* Returns this node's ready state (IE only).
* @return this node's ready state
Expand Down
5 changes: 2 additions & 3 deletions src/main/java/org/htmlunit/html/HtmlPage.java
Original file line number Diff line number Diff line change
Expand Up @@ -1745,9 +1745,8 @@ void notifyNodeAdded(final DomNode node) {
frameElements_.add((BaseFrameElement) node);
}

for (final Iterator<HtmlElement> iterator
= node.new DescendantElementsIterator<>(HtmlElement.class);
iterator.hasNext();) {
for (final Iterator<HtmlElement> iterator = node.new DescendantHtmlElementsIterator();
iterator.hasNext();) {
final HtmlElement child = iterator.next();
if (child instanceof BaseFrameElement) {
frameElements_.add((BaseFrameElement) child);
Expand Down
Loading

0 comments on commit ea87ce4

Please sign in to comment.