diff options
Diffstat (limited to 'src/stackexchange/scraper.rs')
-rw-r--r-- | src/stackexchange/scraper.rs | 31 |
1 files changed, 16 insertions, 15 deletions
diff --git a/src/stackexchange/scraper.rs b/src/stackexchange/scraper.rs index e67f0f6..97c4bef 100644 --- a/src/stackexchange/scraper.rs +++ b/src/stackexchange/scraper.rs @@ -85,7 +85,7 @@ impl Scraper for Google { sites: &HashMap<String, String>, limit: u16, ) -> Result<ScrapedData> { - let anchors = Selector::parse("div.r > a").unwrap(); + let anchors = Selector::parse("a").unwrap(); parse_with_selector(anchors, html, sites, limit) } @@ -139,23 +139,24 @@ fn parse_with_selector( let mut ordering: HashMap<String, usize> = HashMap::new(); let mut count = 0; for anchor in fragment.select(&anchors) { - let url = anchor + if let Some(url) = anchor .value() .attr("href") - .ok_or_else(|| Error::ScrapingError("Anchor with no href".to_string())) - .map(|href| percent_decode_str(href).decode_utf8_lossy().into_owned())?; - sites.iter().find_map(|(site_code, site_url)| { - let id = question_url_to_id(site_url, &url)?; - ordering.insert(id.to_owned(), count); - match question_ids.entry(site_code.to_owned()) { - Entry::Occupied(mut o) => o.get_mut().push(id), - Entry::Vacant(o) => { - o.insert(vec![id]); + .map(|href| percent_decode_str(href).decode_utf8_lossy()) + { + sites.iter().find_map(|(site_code, site_url)| { + let id = question_url_to_id(site_url, &url)?; + ordering.insert(id.to_owned(), count); + match question_ids.entry(site_code.to_owned()) { + Entry::Occupied(mut o) => o.get_mut().push(id), + Entry::Vacant(o) => { + o.insert(vec![id]); + } } - } - count += 1; - Some(()) - }); + count += 1; + Some(()) + }); + } if count >= limit as usize { break; } |