diff options
author | Karl Lorey <git@karllorey.com> | 2022-06-15 18:01:51 +0200 |
---|---|---|
committer | Karl Lorey <git@karllorey.com> | 2022-06-15 18:01:51 +0200 |
commit | 26e96a8e4e306bf350dc2f6d6b379d9509d18198 (patch) | |
tree | ed91d1938f1a01216ca92544c12de81c89edaea7 | |
parent | 19bc07c8b780b23dc200644f048d5c0a74725d9a (diff) |
Revert previous commit and add comment why
-rw-r--r-- | mlscraper/training.py | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/mlscraper/training.py b/mlscraper/training.py index fc80770..8512682 100644 --- a/mlscraper/training.py +++ b/mlscraper/training.py @@ -103,7 +103,12 @@ def train_scraper_for_matches(matches, roots): # todo can be one of the parents match_roots = [m.root for m in matches] logging.info(f"{match_roots=}") - for selector in generate_selector_for_nodes(match_roots, roots): + + # first selector is fine as it matches perfectly + # no need to try other selectors + # -> item_scraper would be the same + selector = first(generate_selector_for_nodes(match_roots, roots)) + if selector: # for all the item_matches, create a tuple # that contains the item_match and the new root matches_and_roots = [ @@ -116,5 +121,7 @@ def train_scraper_for_matches(matches, roots): list(item_matches), list(list_roots) ) return ListScraper(selector, item_scraper) + else: + raise NoScraperFoundException() else: raise RuntimeError(f"type not matched: {found_type}") |