diff options
author | Karl Lorey <git@karllorey.com> | 2022-06-24 12:58:48 +0200 |
---|---|---|
committer | Karl Lorey <git@karllorey.com> | 2022-06-24 12:58:48 +0200 |
commit | aa1ac21a0ede6f4f6a4282fcb07f87d706186817 (patch) | |
tree | 011a746160630d89b545c1a36b01051c09d7a727 /tests | |
parent | f0d841c49891355259b510de7d3490b20fc3e61b (diff) |
Avoid matching numbers inside image dimensions
Diffstat (limited to 'tests')
-rw-r--r-- | tests/test_matches.py | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/tests/test_matches.py b/tests/test_matches.py index e69de29..f361d5b 100644 --- a/tests/test_matches.py +++ b/tests/test_matches.py @@ -0,0 +1,19 @@ +from mlscraper.html import Page +from mlscraper.matches import AttributeValueExtractor +from mlscraper.matches import generate_all_value_matches +from mlscraper.matches import is_dimensions_match +from mlscraper.matches import ValueMatch + + +def test_is_dimensions_match_plain(): + extractor = AttributeValueExtractor("height") + value_match = ValueMatch(None, extractor) + assert is_dimensions_match(value_match) + + +def test_is_dimensions_match_generation(): + page = Page(b'<html><body><img height="20" width="20"</body></html>') + matches_unfiltered = list(generate_all_value_matches(page, "20")) + assert matches_unfiltered + matches = [m for m in matches_unfiltered if not is_dimensions_match(m)] + assert not matches |