summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorKarl Lorey <git@karllorey.com>2022-06-23 21:53:23 +0200
committerKarl Lorey <git@karllorey.com>2022-06-23 21:53:23 +0200
commita2f481c3481f6445e520e6bbdfafae3bbf94f96b (patch)
treeea6b44ed7ac418af1a9f11e6891319288a5a475a /tests
parent789e635aabd126e934af7dbf0b2769bef28d9683 (diff)
Improve performance by fixing hashing and root computation
Diffstat (limited to 'tests')
-rw-r--r--tests/test_html.py27
1 files changed, 16 insertions, 11 deletions
diff --git a/tests/test_html.py b/tests/test_html.py
index e976ed5..7780504 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -1,20 +1,25 @@
-from bs4 import BeautifulSoup
-from mlscraper.html import _get_root_of_nodes
+from mlscraper.html import get_root_node
from mlscraper.html import HTMLExactTextMatch
from mlscraper.html import Page
from mlscraper.html import selector_matches_nodes
from mlscraper.matches import AttributeValueExtractor
-def test_get_root_of_nodes():
- soup = BeautifulSoup(
- b'<html><body><div><p id="one"></p><p><span id="two"></span></p></div></body></html>',
- "lxml",
- )
- node_1 = soup.select_one("#one")
- node_2 = soup.select_one("#two")
- root = _get_root_of_nodes([node_1, node_2])
- assert root == soup.select_one("div")
+def test_get_root_nodes():
+ html = b'<html><body><div><p id="one"></p><p><span id="two"></span></p></div></body></html>'
+ page = Page(html)
+ node_1 = page.select("#one")[0]
+ node_2 = page.select("#two")[0]
+ root = get_root_node([node_1, node_2])
+ assert root == page.select("div")[0]
+
+
+def test_node_set():
+ html = b"<html><body><p>test</p></body></html>"
+ page = Page(html)
+ node_1 = page.select("p")[0]
+ node_2 = node_1.parent.select("p")[0]
+ assert node_1.parent == node_2.parent
class TestPage: