summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Tay <sam.chong.tay@gmail.com>2020-06-26 09:34:06 -0700
committerSam Tay <sam.chong.tay@gmail.com>2020-06-26 10:19:06 -0700
commite760ec8e603b91f0e01b680150432a070ebab50c (patch)
tree08b93c1c7aba0ba7e12a926242b39f7f86eea203
parent1b410e561cd932fb10f1c0884609a29889822a0f (diff)
Fix parsing /tagged links
-rw-r--r--src/stackexchange/scraper.rs37
1 files changed, 30 insertions, 7 deletions
diff --git a/src/stackexchange/scraper.rs b/src/stackexchange/scraper.rs
index cdeef3c..0ac6baf 100644
--- a/src/stackexchange/scraper.rs
+++ b/src/stackexchange/scraper.rs
@@ -182,14 +182,21 @@ fn question_url_to_id(site_url: &str, input: &str) -> Option<String> {
let fragment = site_url.trim_end_matches('/').to_owned() + segment;
let ix = input.find(&fragment)? + fragment.len();
let input = &input[ix..];
- if let Some(end) = input.find('/') {
- Some(input[0..end].to_string())
+ let id = if let Some(end) = input.find('/') {
+ input[0..end].to_string()
} else {
- Some(input[0..].to_string())
+ input[0..].to_string()
+ };
+ if id.chars().into_iter().all(|c| c.is_digit(10)) {
+ Some(id)
+ } else {
+ None
}
})
}
+// TODO Get blocked google request html
+// note: this may only be possible at search.rs level (with non-200 code)
#[cfg(test)]
mod tests {
use super::*;
@@ -329,17 +336,33 @@ mod tests {
}
}
- // TODO Get blocked google request html
- // note: this may only be possible at search.rs level (with non-200 code)
-
#[test]
fn test_question_url_to_id() {
+ // Happy path
let site_url = "stackoverflow.com";
let input = "/l/?kh=-1&uddg=https://stackoverflow.com/questions/11828270/how-do-i-exit-the-vim-editor";
assert_eq!(question_url_to_id(site_url, input).unwrap(), "11828270");
+ // Happy path with variant /q/
let site_url = "stackoverflow.com";
- let input = "/l/?kh=-1&uddg=https://askubuntu.com/questions/24406/how-to-close-vim-from-the-command-line";
+ let input = "/l/?kh=-1&uddg=https://stackoverflow.com/q/11828270";
+ assert_eq!(question_url_to_id(site_url, input).unwrap(), "11828270");
+
+ // Base site
+ let site_url = "unix.stackoverflow.com";
+ let input = "/l/?kh=-1&uddg=https://unix.stackoverflow.com";
+ assert_eq!(question_url_to_id(site_url, input), None);
+
+ // Tagged link
+ let site_url = "meta.stackexchange.com";
+ let input =
+ "/l/?kh=-1&amp;uddg=https://meta.stackexchange.com/questions/tagged/stackexchange-tour";
assert_eq!(question_url_to_id(site_url, input), None);
+
+ // Different site
+ // TODO get this to pass; then test tagged.html
+ //let site_url = "meta.stackexchange.com";
+ //let input = "/l/?kh=-1&uddg=https://math.meta.stackexchange.com/q/11828270";
+ //assert_eq!(question_url_to_id(site_url, input), None);
}
}