summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorKarl Lorey <git@karllorey.com>2022-06-24 12:58:48 +0200
committerKarl Lorey <git@karllorey.com>2022-06-24 12:58:48 +0200
commitaa1ac21a0ede6f4f6a4282fcb07f87d706186817 (patch)
tree011a746160630d89b545c1a36b01051c09d7a727 /tests
parentf0d841c49891355259b510de7d3490b20fc3e61b (diff)
Avoid matching numbers inside image dimensions
Diffstat (limited to 'tests')
-rw-r--r--tests/test_matches.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/tests/test_matches.py b/tests/test_matches.py
index e69de29..f361d5b 100644
--- a/tests/test_matches.py
+++ b/tests/test_matches.py
@@ -0,0 +1,19 @@
+from mlscraper.html import Page
+from mlscraper.matches import AttributeValueExtractor
+from mlscraper.matches import generate_all_value_matches
+from mlscraper.matches import is_dimensions_match
+from mlscraper.matches import ValueMatch
+
+
+def test_is_dimensions_match_plain():
+ extractor = AttributeValueExtractor("height")
+ value_match = ValueMatch(None, extractor)
+ assert is_dimensions_match(value_match)
+
+
+def test_is_dimensions_match_generation():
+ page = Page(b'<html><body><img height="20" width="20"</body></html>')
+ matches_unfiltered = list(generate_all_value_matches(page, "20"))
+ assert matches_unfiltered
+ matches = [m for m in matches_unfiltered if not is_dimensions_match(m)]
+ assert not matches