diff options
author | Karl Lorey <git@karllorey.com> | 2022-06-23 21:53:23 +0200 |
---|---|---|
committer | Karl Lorey <git@karllorey.com> | 2022-06-23 21:53:23 +0200 |
commit | a2f481c3481f6445e520e6bbdfafae3bbf94f96b (patch) | |
tree | ea6b44ed7ac418af1a9f11e6891319288a5a475a /tests | |
parent | 789e635aabd126e934af7dbf0b2769bef28d9683 (diff) |
Improve performance by fixing hashing and root computation
Diffstat (limited to 'tests')
-rw-r--r-- | tests/test_html.py | 27 |
1 files changed, 16 insertions, 11 deletions
diff --git a/tests/test_html.py b/tests/test_html.py index e976ed5..7780504 100644 --- a/tests/test_html.py +++ b/tests/test_html.py @@ -1,20 +1,25 @@ -from bs4 import BeautifulSoup -from mlscraper.html import _get_root_of_nodes +from mlscraper.html import get_root_node from mlscraper.html import HTMLExactTextMatch from mlscraper.html import Page from mlscraper.html import selector_matches_nodes from mlscraper.matches import AttributeValueExtractor -def test_get_root_of_nodes(): - soup = BeautifulSoup( - b'<html><body><div><p id="one"></p><p><span id="two"></span></p></div></body></html>', - "lxml", - ) - node_1 = soup.select_one("#one") - node_2 = soup.select_one("#two") - root = _get_root_of_nodes([node_1, node_2]) - assert root == soup.select_one("div") +def test_get_root_nodes(): + html = b'<html><body><div><p id="one"></p><p><span id="two"></span></p></div></body></html>' + page = Page(html) + node_1 = page.select("#one")[0] + node_2 = page.select("#two")[0] + root = get_root_node([node_1, node_2]) + assert root == page.select("div")[0] + + +def test_node_set(): + html = b"<html><body><p>test</p></body></html>" + page = Page(html) + node_1 = page.select("p")[0] + node_2 = node_1.parent.select("p")[0] + assert node_1.parent == node_2.parent class TestPage: |