summaryrefslogtreecommitdiffstats
path: root/src/stackexchange/scraper.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/stackexchange/scraper.rs')
-rw-r--r--src/stackexchange/scraper.rs31
1 files changed, 16 insertions, 15 deletions
diff --git a/src/stackexchange/scraper.rs b/src/stackexchange/scraper.rs
index e67f0f6..97c4bef 100644
--- a/src/stackexchange/scraper.rs
+++ b/src/stackexchange/scraper.rs
@@ -85,7 +85,7 @@ impl Scraper for Google {
sites: &HashMap<String, String>,
limit: u16,
) -> Result<ScrapedData> {
- let anchors = Selector::parse("div.r > a").unwrap();
+ let anchors = Selector::parse("a").unwrap();
parse_with_selector(anchors, html, sites, limit)
}
@@ -139,23 +139,24 @@ fn parse_with_selector(
let mut ordering: HashMap<String, usize> = HashMap::new();
let mut count = 0;
for anchor in fragment.select(&anchors) {
- let url = anchor
+ if let Some(url) = anchor
.value()
.attr("href")
- .ok_or_else(|| Error::ScrapingError("Anchor with no href".to_string()))
- .map(|href| percent_decode_str(href).decode_utf8_lossy().into_owned())?;
- sites.iter().find_map(|(site_code, site_url)| {
- let id = question_url_to_id(site_url, &url)?;
- ordering.insert(id.to_owned(), count);
- match question_ids.entry(site_code.to_owned()) {
- Entry::Occupied(mut o) => o.get_mut().push(id),
- Entry::Vacant(o) => {
- o.insert(vec![id]);
+ .map(|href| percent_decode_str(href).decode_utf8_lossy())
+ {
+ sites.iter().find_map(|(site_code, site_url)| {
+ let id = question_url_to_id(site_url, &url)?;
+ ordering.insert(id.to_owned(), count);
+ match question_ids.entry(site_code.to_owned()) {
+ Entry::Occupied(mut o) => o.get_mut().push(id),
+ Entry::Vacant(o) => {
+ o.insert(vec![id]);
+ }
}
- }
- count += 1;
- Some(())
- });
+ count += 1;
+ Some(())
+ });
+ }
if count >= limit as usize {
break;
}