diff options
Diffstat (limited to 'benches')
-rw-r--r-- | benches/parsing.rs | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/benches/parsing.rs b/benches/parsing.rs index 0bfe44c..ae6ccf0 100644 --- a/benches/parsing.rs +++ b/benches/parsing.rs @@ -3,6 +3,22 @@ use so::stackexchange::scraper::{DuckDuckGo, Google, Scraper}; use std::collections::HashMap; use std::time::Duration; +/// Note: these benchmarks show that replacing question_url_to_id with regex, i.e. +/// ```rust +/// fn question_url_to_id(site_url: &str, input: &str) -> Option<String> { +/// let re: Regex = Regex::new(&format!( +/// "[^\\.]{}/(:?q|questions)/(?P<id>\\d+)", +/// site_url.replace('.', "\\.") +/// )) +/// .unwrap(); +/// Some(re.captures(input)?.name("id")?.as_str().to_owned()) +/// } +/// ``` +/// **greatly** degrades peformance (maybe due to the fact that the regex depends on configuration +/// and can't be compiled with lazy_static?). +/// +/// Still, I could try creating a regex that captures the url encoded SE url and question id and +/// multiline regex the entire HTML document. It might be faster than the scraper library? fn bench_parsers(c: &mut Criterion) { let limit: u16 = 10; let mut sites = HashMap::new(); |