From 307e1973e799ace3303184f40aaf5c205194fb33 Mon Sep 17 00:00:00 2001 From: Sam Tay Date: Sat, 27 Jun 2020 20:33:59 -0700 Subject: Add benchmarks for HTML parsing --- benches/parsing.rs | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 benches/parsing.rs (limited to 'benches') diff --git a/benches/parsing.rs b/benches/parsing.rs new file mode 100644 index 0000000..0bfe44c --- /dev/null +++ b/benches/parsing.rs @@ -0,0 +1,58 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use so::stackexchange::scraper::{DuckDuckGo, Google, Scraper}; +use std::collections::HashMap; +use std::time::Duration; + +fn bench_parsers(c: &mut Criterion) { + let limit: u16 = 10; + let mut sites = HashMap::new(); + sites.insert( + String::from("stackoverflow"), + String::from("stackoverflow.com"), + ); + sites.insert(String::from("askubuntu"), String::from("askubuntu.com")); + + let mut group = c.benchmark_group("Scraping html"); + + group.sample_size(80); + group.measurement_time(Duration::from_secs(10)); + group.throughput(Throughput::Elements(limit as u64)); + + group.bench_with_input( + BenchmarkId::new("Google.parse", "exit-vim"), + include_str!("../test/google/exit-vim.html"), + |b, html| b.iter(|| Google.parse(html, &sites, limit)), + ); + + group.bench_with_input( + BenchmarkId::new("DuckDuckGo.parse", "exit-vim"), + include_str!("../test/duckduckgo/exit-vim.html"), + |b, html| b.iter(|| DuckDuckGo.parse(html, &sites, limit)), + ); + + let mut sites = HashMap::new(); + sites.insert( + String::from("stackoverflow"), + String::from("stackoverflow.com"), + ); + + group.bench_with_input( + BenchmarkId::new("Google.parse", "/q/"), + include_str!("../test/google/parsing-q.html"), + |b, html| b.iter(|| Google.parse(html, &sites, limit)), + ); + + let mut sites = HashMap::new(); + sites.insert(String::from("meta"), String::from("meta.stackexchange.com")); + + group.bench_with_input( + BenchmarkId::new("DuckDuckGo.parse", "tagged"), + include_str!("../test/duckduckgo/tagged.html"), + |b, html| b.iter(|| DuckDuckGo.parse(html, &sites, limit)), + ); + + group.finish(); +} + +criterion_group!(benches, bench_parsers); +criterion_main!(benches); -- cgit v1.2.3