diff options
author | Karl Lorey <git@karllorey.com> | 2022-06-15 17:53:57 +0200 |
---|---|---|
committer | Karl Lorey <git@karllorey.com> | 2022-06-15 17:53:57 +0200 |
commit | 19bc07c8b780b23dc200644f048d5c0a74725d9a (patch) | |
tree | 486c1e3f4c269486b0074c9d263da1aa36efe6a4 | |
parent | 7d7a07ea8baf7ee2af2a93b41ac42ee73d16fbda (diff) |
Loop through all possible selectors when training ListScraper
-rw-r--r-- | mlscraper/training.py | 3 | ||||
-rw-r--r-- | tests/test_training.py | 2 |
2 files changed, 2 insertions, 3 deletions
diff --git a/mlscraper/training.py b/mlscraper/training.py index 38b15d9..fc80770 100644 --- a/mlscraper/training.py +++ b/mlscraper/training.py @@ -103,8 +103,7 @@ def train_scraper_for_matches(matches, roots): # todo can be one of the parents match_roots = [m.root for m in matches] logging.info(f"{match_roots=}") - selector = first(generate_selector_for_nodes(match_roots, roots)) - if selector: + for selector in generate_selector_for_nodes(match_roots, roots): # for all the item_matches, create a tuple # that contains the item_match and the new root matches_and_roots = [ diff --git a/tests/test_training.py b/tests/test_training.py index 49288a3..3e48756 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -16,7 +16,7 @@ def test_train_scraper_simple_list(): train_scraper(training_set) -@pytest.mark.skip("fucking fails") +@pytest.mark.skip("does not work yet") def test_train_scraper(stackoverflow_samples): training_set = TrainingSet() for s in stackoverflow_samples: |