summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Tay <sam.chong.tay@gmail.com>2020-06-24 11:58:34 -0700
committerSam Tay <sam.chong.tay@gmail.com>2020-06-24 11:58:34 -0700
commiteba4e14ea0420e9919379d3aba23efd95426429f (patch)
tree6a93842b345b61ef50bcaf6ab3e475590c9c6665
parent95f429041ee505497f36530e1895c2ea3554d37b (diff)
Refactor for future search engines
-rw-r--r--src/cli.rs36
-rw-r--r--src/config.rs30
-rw-r--r--src/stackexchange/scraper.rs27
-rw-r--r--src/stackexchange/search.rs23
4 files changed, 56 insertions, 60 deletions
diff --git a/src/cli.rs b/src/cli.rs
index 7abde66..fa9d42a 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -18,6 +18,7 @@ pub fn get_opts() -> Result<Opts> {
let config = config::user_config()?;
let limit = &config.limit.to_string();
let sites = &config.sites.join(";");
+ let engine = &config.search_engine.to_string();
let matches = App::new("so")
.setting(AppSettings::ColoredHelp)
.version(clap::crate_version!())
@@ -38,6 +39,7 @@ pub fn get_opts() -> Result<Opts> {
.long("set-api-key")
.number_of_values(1)
.takes_value(true)
+ .value_name("key")
.help("Set StackExchange API key"),
)
.arg(
@@ -48,6 +50,7 @@ pub fn get_opts() -> Result<Opts> {
.number_of_values(1)
.takes_value(true)
.default_value(sites)
+ .value_name("site-code")
.help("StackExchange site code to search"),
)
.arg(
@@ -57,7 +60,8 @@ pub fn get_opts() -> Result<Opts> {
.number_of_values(1)
.takes_value(true)
.default_value(limit)
- .validator(|s| s.parse::<u32>().map_err(|e| e.to_string()).map(|_| ()))
+ .value_name("int")
+ .validator(|s| s.parse::<u32>().map(|_| ()).map_err(|e| e.to_string()))
.help("Question limit"),
)
.arg(
@@ -79,16 +83,16 @@ pub fn get_opts() -> Result<Opts> {
.required_unless_one(&["list-sites", "update-sites", "set-api-key"]),
)
.arg(
- Arg::with_name("duckduckgo")
- .long("duckduckgo")
- .help("Use DuckDuckGo as a search engine"),
- )
- .arg(
- Arg::with_name("no-duckduckgo")
- .long("no-duckduckgo")
- .help("Disable duckduckgo")
- .conflicts_with("duckduckgo")
- .hidden(!config.duckduckgo),
+ Arg::with_name("search-engine")
+ .long("search-engine")
+ .short("e")
+ .number_of_values(1)
+ .takes_value(true)
+ .default_value(engine)
+ .value_name("engine")
+ .possible_values(&["duckduckgo", "stackexchange"])
+ .help("Use specified search engine")
+ .next_line_help(true),
)
.get_matches();
let lucky = match (matches.is_present("lucky"), matches.is_present("no-lucky")) {
@@ -96,14 +100,6 @@ pub fn get_opts() -> Result<Opts> {
(_, true) => false,
_ => config.lucky,
};
- let duckduckgo = match (
- matches.is_present("duckduckgo"),
- matches.is_present("no-duckduckgo"),
- ) {
- (true, _) => true,
- (_, true) => false,
- _ => config.duckduckgo,
- };
Ok(Opts {
list_sites: matches.is_present("list-sites"),
update_sites: matches.is_present("update-sites"),
@@ -114,6 +110,7 @@ pub fn get_opts() -> Result<Opts> {
config: Config {
// these unwraps are safe via clap default values & validators
limit: matches.value_of("limit").unwrap().parse::<u16>().unwrap(),
+ search_engine: serde_yaml::from_str(matches.value_of("search-engine").unwrap())?,
sites: matches
.values_of("site")
.unwrap()
@@ -126,7 +123,6 @@ pub fn get_opts() -> Result<Opts> {
.map(String::from)
.or(config.api_key),
lucky,
- duckduckgo,
},
})
}
diff --git a/src/config.rs b/src/config.rs
index 79cbd74..3102a87 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -1,5 +1,6 @@
use directories::ProjectDirs;
use serde::{Deserialize, Serialize};
+use std::fmt;
use std::fs;
use std::io::Write;
use std::path::PathBuf;
@@ -8,12 +9,37 @@ use crate::error::{Error, Result};
use crate::utils;
#[derive(Deserialize, Serialize, Debug, Clone)]
+#[serde(rename_all = "lowercase")] // TODO test this
+pub enum SearchEngine {
+ DuckDuckGo,
+ //Google,
+ StackExchange,
+}
+
+#[derive(Deserialize, Serialize, Debug, Clone)]
+#[serde(default)]
pub struct Config {
pub api_key: Option<String>,
pub limit: u16,
pub lucky: bool,
pub sites: Vec<String>,
- pub duckduckgo: bool,
+ pub search_engine: SearchEngine,
+}
+
+impl fmt::Display for SearchEngine {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let s = match &self {
+ SearchEngine::DuckDuckGo => "duckduckgo",
+ SearchEngine::StackExchange => "stackexchange",
+ };
+ write!(f, "{}", s)
+ }
+}
+
+impl Default for SearchEngine {
+ fn default() -> Self {
+ SearchEngine::DuckDuckGo
+ }
}
// TODO make a friender config file, like the colors.toml below
@@ -24,7 +50,7 @@ impl Default for Config {
limit: 20,
lucky: true,
sites: vec![String::from("stackoverflow")],
- duckduckgo: true,
+ search_engine: SearchEngine::default(),
}
}
}
diff --git a/src/stackexchange/scraper.rs b/src/stackexchange/scraper.rs
index a0d29ab..e6376fa 100644
--- a/src/stackexchange/scraper.rs
+++ b/src/stackexchange/scraper.rs
@@ -10,10 +10,6 @@ use crate::error::{Error, Result};
/// DuckDuckGo URL
const DUCKDUCKGO_URL: &str = "https://duckduckgo.com";
-pub enum SearchEngine {
- DuckDuckGo,
-}
-
// Is question_id unique across all sites? If not, then this edge case is
// unaccounted for when sorting.
//
@@ -40,28 +36,7 @@ pub trait Scraper {
I: IntoIterator<Item = &'a String>;
}
-impl Scraper for SearchEngine {
- fn parse(
- &self,
- html: &str,
- sites: &HashMap<String, String>,
- limit: u16,
- ) -> Result<ScrapedData> {
- match &self {
- Self::DuckDuckGo => DuckDuckGo.parse(html, sites, limit),
- }
- }
- fn get_url<'a, I>(&self, query: &str, sites: I) -> Url
- where
- I: IntoIterator<Item = &'a String>,
- {
- match &self {
- Self::DuckDuckGo => DuckDuckGo.get_url(query, sites),
- }
- }
-}
-
-struct DuckDuckGo;
+pub struct DuckDuckGo;
impl Scraper for DuckDuckGo {
/// Parse (site, question_id) pairs out of duckduckgo search results html
diff --git a/src/stackexchange/search.rs b/src/stackexchange/search.rs
index ed89e15..530b665 100644
--- a/src/stackexchange/search.rs
+++ b/src/stackexchange/search.rs
@@ -4,14 +4,14 @@ use reqwest::header;
use reqwest::Client;
use std::collections::HashMap;
-use crate::config::Config;
+use crate::config::{Config, SearchEngine};
use crate::error::{Error, Result};
use crate::tui::markdown;
use crate::tui::markdown::Markdown;
use super::api::{Answer, Api, Question};
use super::local_storage::LocalStorage;
-use super::scraper::{ScrapedData, Scraper, SearchEngine};
+use super::scraper::{DuckDuckGo, ScrapedData, Scraper};
/// Limit on concurrent requests (gets passed to `buffer_unordered`)
const CONCURRENT_REQUESTS_LIMIT: usize = 8;
@@ -55,8 +55,9 @@ impl Search {
let original_config = self.config.clone();
// Temp set lucky config
self.config.limit = 1;
- if !self.config.duckduckgo {
- self.config.sites.truncate(1);
+ match self.config.search_engine {
+ SearchEngine::StackExchange => self.config.sites.truncate(1),
+ _ => (),
}
// Run search with temp config
let result = self.search().await;
@@ -81,17 +82,15 @@ impl Search {
/// Search using the configured search engine
pub async fn search(&self) -> Result<Vec<Question<String>>> {
- if self.config.duckduckgo {
- self.search_by_engine(SearchEngine::DuckDuckGo).await
- } else {
- // TODO after duckduck go finished, refactor to _not_ thread this limit, its unnecessary
- self.parallel_search_advanced().await
+ match self.config.search_engine {
+ SearchEngine::DuckDuckGo => self.search_by_scraper(DuckDuckGo).await,
+ SearchEngine::StackExchange => self.parallel_search_advanced().await,
}
}
/// Search query at duckduckgo and then fetch the resulting questions from SE.
- async fn search_by_engine(&self, search_engine: impl Scraper) -> Result<Vec<Question<String>>> {
- let url = search_engine.get_url(&self.query, self.sites.values());
+ async fn search_by_scraper(&self, scraper: impl Scraper) -> Result<Vec<Question<String>>> {
+ let url = scraper.get_url(&self.query, self.sites.values());
let html = Client::new()
.get(url)
.header(header::USER_AGENT, USER_AGENT)
@@ -99,7 +98,7 @@ impl Search {
.await?
.text()
.await?;
- let data = search_engine.parse(&html, &self.sites, self.config.limit)?;
+ let data = scraper.parse(&html, &self.sites, self.config.limit)?;
self.parallel_questions(data).await
}