blob: 9ba5f7209ccdc8bcac4c8c4cce93484533aa9fb1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
|
from mlscraper import RuleBasedSingleItemScraper, SingleItemPageSample
from mlscraper.parser import make_soup_page, ExtractionResult
def test_basic():
html = '<html><body><div class="parent"><p class="item">result</p></div><p class="item">not a result</p></body></html>'
page = make_soup_page(html)
node = page.select(".item")[0]
item = {"res": ExtractionResult(node)}
samples = [SingleItemPageSample(page, item)]
scraper = RuleBasedSingleItemScraper.build(samples)
assert scraper.scrape(html)["res"] == "result"
|