summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Tay <sam.chong.tay@gmail.com>2020-06-27 20:09:42 -0700
committerSam Tay <sam.chong.tay@gmail.com>2020-06-27 20:09:42 -0700
commite2814eeccecfb98bd899d2c69bee23cd57541880 (patch)
tree11a0019b50c9a71ade8ee25dbc6cef47d9a62682
parente760ec8e603b91f0e01b680150432a070ebab50c (diff)
Fix capturing incorrect subdomain sites
-rw-r--r--src/stackexchange/scraper.rs12
1 files changed, 8 insertions, 4 deletions
diff --git a/src/stackexchange/scraper.rs b/src/stackexchange/scraper.rs
index 0ac6baf..34a25ce 100644
--- a/src/stackexchange/scraper.rs
+++ b/src/stackexchange/scraper.rs
@@ -180,7 +180,11 @@ fn parse_with_selector(
fn question_url_to_id(site_url: &str, input: &str) -> Option<String> {
["/questions/", "/q/"].iter().find_map(|segment| {
let fragment = site_url.trim_end_matches('/').to_owned() + segment;
- let ix = input.find(&fragment)? + fragment.len();
+ let mut ix = input.find(&fragment)?;
+ if ix > 0 && input.chars().nth(ix - 1) == Some('.') {
+ return None;
+ }
+ ix += fragment.len();
let input = &input[ix..];
let id = if let Some(end) = input.find('/') {
input[0..end].to_string()
@@ -361,8 +365,8 @@ mod tests {
// Different site
// TODO get this to pass; then test tagged.html
- //let site_url = "meta.stackexchange.com";
- //let input = "/l/?kh=-1&uddg=https://math.meta.stackexchange.com/q/11828270";
- //assert_eq!(question_url_to_id(site_url, input), None);
+ let site_url = "meta.stackexchange.com";
+ let input = "/l/?kh=-1&uddg=https://math.meta.stackexchange.com/q/11828270";
+ assert_eq!(question_url_to_id(site_url, input), None);
}
}