diff options
author | Sam Tay <sam.chong.tay@gmail.com> | 2020-06-27 20:09:42 -0700 |
---|---|---|
committer | Sam Tay <sam.chong.tay@gmail.com> | 2020-06-27 20:09:42 -0700 |
commit | e2814eeccecfb98bd899d2c69bee23cd57541880 (patch) | |
tree | 11a0019b50c9a71ade8ee25dbc6cef47d9a62682 | |
parent | e760ec8e603b91f0e01b680150432a070ebab50c (diff) |
Fix capturing incorrect subdomain sites
-rw-r--r-- | src/stackexchange/scraper.rs | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/src/stackexchange/scraper.rs b/src/stackexchange/scraper.rs index 0ac6baf..34a25ce 100644 --- a/src/stackexchange/scraper.rs +++ b/src/stackexchange/scraper.rs @@ -180,7 +180,11 @@ fn parse_with_selector( fn question_url_to_id(site_url: &str, input: &str) -> Option<String> { ["/questions/", "/q/"].iter().find_map(|segment| { let fragment = site_url.trim_end_matches('/').to_owned() + segment; - let ix = input.find(&fragment)? + fragment.len(); + let mut ix = input.find(&fragment)?; + if ix > 0 && input.chars().nth(ix - 1) == Some('.') { + return None; + } + ix += fragment.len(); let input = &input[ix..]; let id = if let Some(end) = input.find('/') { input[0..end].to_string() @@ -361,8 +365,8 @@ mod tests { // Different site // TODO get this to pass; then test tagged.html - //let site_url = "meta.stackexchange.com"; - //let input = "/l/?kh=-1&uddg=https://math.meta.stackexchange.com/q/11828270"; - //assert_eq!(question_url_to_id(site_url, input), None); + let site_url = "meta.stackexchange.com"; + let input = "/l/?kh=-1&uddg=https://math.meta.stackexchange.com/q/11828270"; + assert_eq!(question_url_to_id(site_url, input), None); } } |