summaryrefslogtreecommitdiffstats
path: root/benches/parsing.rs
diff options
context:
space:
mode:
Diffstat (limited to 'benches/parsing.rs')
-rw-r--r--benches/parsing.rs16
1 files changed, 16 insertions, 0 deletions
diff --git a/benches/parsing.rs b/benches/parsing.rs
index 0bfe44c..ae6ccf0 100644
--- a/benches/parsing.rs
+++ b/benches/parsing.rs
@@ -3,6 +3,22 @@ use so::stackexchange::scraper::{DuckDuckGo, Google, Scraper};
use std::collections::HashMap;
use std::time::Duration;
+/// Note: these benchmarks show that replacing question_url_to_id with regex, i.e.
+/// ```rust
+/// fn question_url_to_id(site_url: &str, input: &str) -> Option<String> {
+/// let re: Regex = Regex::new(&format!(
+/// "[^\\.]{}/(:?q|questions)/(?P<id>\\d+)",
+/// site_url.replace('.', "\\.")
+/// ))
+/// .unwrap();
+/// Some(re.captures(input)?.name("id")?.as_str().to_owned())
+/// }
+/// ```
+/// **greatly** degrades peformance (maybe due to the fact that the regex depends on configuration
+/// and can't be compiled with lazy_static?).
+///
+/// Still, I could try creating a regex that captures the url encoded SE url and question id and
+/// multiline regex the entire HTML document. It might be faster than the scraper library?
fn bench_parsers(c: &mut Criterion) {
let limit: u16 = 10;
let mut sites = HashMap::new();