diff options
author | Sam Tay <sam.chong.tay@gmail.com> | 2020-06-05 13:59:34 -0700 |
---|---|---|
committer | Sam Tay <sam.chong.tay@gmail.com> | 2020-06-06 19:56:12 -0700 |
commit | b06f305db319b90ff55e159a8538bac853ca2168 (patch) | |
tree | ac7e6559346f28c5cb4852a66b213732ab8b6d8e | |
parent | ca754e6254d9e0453dd0e02d700ea0ac3dd9d6a5 (diff) |
Fetch and cache SE sites
-rw-r--r-- | README.md | 10 | ||||
-rw-r--r-- | TODO.md | 4 | ||||
-rw-r--r-- | src/config.rs | 11 | ||||
-rw-r--r-- | src/main.rs | 11 | ||||
-rw-r--r-- | src/stackexchange.rs | 88 |
5 files changed, 113 insertions, 11 deletions
@@ -1,3 +1,13 @@ # so **Note:** under development, not ready for prime time. + +### api keys +According to the [StackExchange +docs](https://api.stackexchange.com/docs/throttle), most users should be fine +without generating a personal API key (10k requests per IP per day). If you do +run into throttling issues, get a key +[here](https://stackapps.com/apps/oauth/register) and tell `so` to use it: +``` +so --set-api-key <KEY> +``` @@ -4,8 +4,10 @@ 0. Install sites when file not found 0. Implement --update-sites command 3. Parse markdown (`pulldown_cmark`) +4. Maybe default --validate-sites off (parsing 30k file a big hit) ### resources for later +0. [Intro to async rust](http://jamesmcm.github.io/blog/2020/05/06/a-practical-introduction-to-async-programming-in-rust/) 1. Async API calls [tokio](https://stackoverflow.com/a/57770687) 2. Parallel calls against multiple sites [vid](https://www.youtube.com/watch?v=O-LagKc0MPA) 3. [config mgmt](https://github.com/rust-cli/confy) or just use directories @@ -13,3 +15,5 @@ 5. Add sort option, e.g. relevance|votes|date 6. Google stuff [scraping with reqwest](https://rust-lang-nursery.github.io/rust-cookbook/web/scraping.html)) 7. App Distribution [cross-platform binaries](https://github.com/rustwasm/wasm-pack/blob/51e6351c28fbd40745719e6d4a7bf26dadd30c85/.travis.yml#L74-L91) + +8. Keep track of quota in a data file, inform user when getting close? diff --git a/src/config.rs b/src/config.rs index 4eea499..02d03bc 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,9 +1,18 @@ pub struct Config { - pub api_key: String, + pub api_key: Option<String>, pub limit: u16, pub site: String, } +/// Get project directory; might panic on unexpected OS +pub fn project_dir() -> ProjectDirs { + ProjectDirs::from("io", "Sam Tay", "so").expect( + "Couldn't find + a suitable project directory to store cache and configuration; this + application may not be supported on your operating system.", + ) +} + #[cfg(test)] mod tests { // TODO test malformed filter string diff --git a/src/main.rs b/src/main.rs index 8dd0b59..f6f4399 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,14 +3,19 @@ mod config; mod stackexchange; use config::Config; -use stackexchange::StackExchange; +use stackexchange::{LocalStorage, StackExchange}; fn main() { - let matches = cli::mk_app().get_matches(); + let config = config::user_config(); + let matches = cli::mk_app(&config).get_matches(); + + if matches.is_present("update-sites") { + LocalStorage::new().update_sites(); + } // TODO merge config from ArgMatch let se = StackExchange::new(Config { - api_key: String::from("8o9g7WcfwnwbB*Qp4VsGsw(("), + api_key: Some(String::from("8o9g7WcfwnwbB*Qp4VsGsw((")), limit: 1, site: String::from("stackoverflow"), }); diff --git a/src/stackexchange.rs b/src/stackexchange.rs index 79b389f..d98643d 100644 --- a/src/stackexchange.rs +++ b/src/stackexchange.rs @@ -2,10 +2,13 @@ use anyhow; use flate2::read::GzDecoder; use reqwest::blocking::Client; use reqwest::Url; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use std::collections::HashMap; +use std::fs; +use std::fs::File; +use std::path::PathBuf; -use crate::config::Config; +use crate::config::{project_dir, Config}; /// StackExchange API v2.2 URL const SE_URL: &str = "http://api.stackexchange.com/2.2/"; @@ -15,13 +18,22 @@ const SE_URL: &str = "http://api.stackexchange.com/2.2/"; /// [create filter](https://api.stackexchange.com/docs/create-filter). const SE_FILTER: &str = ".DND5X2VHHUH8HyJzpjo)5NvdHI3w6auG"; -/// This structure allows intercting with parts of the StackExchange +/// Pagesize when fetching all SE sites. Should be good for many years... +const SE_SITES_PAGESIZE: u16 = 10000; + +/// This structure allows interacting with parts of the StackExchange /// API, using the `Config` struct to determine certain API settings and options. pub struct StackExchange { client: Client, config: Config, } +/// This structure allows interacting with locally cached StackExchange metadata. +pub struct LocalStorage { + sites: Option<Vec<Site>>, + filename: PathBuf, +} + #[derive(Deserialize, Serialize, Debug)] pub struct Site { api_site_parameter: String, @@ -55,8 +67,8 @@ pub struct Question { /// Internal struct that represents the boilerplate response wrapper from SE API. #[derive(Deserialize, Debug)] -struct ResponseWrapper { - items: Vec<Question>, +struct ResponseWrapper<T> { + items: Vec<T>, } impl StackExchange { @@ -85,7 +97,7 @@ impl StackExchange { ]) .send()?; let gz = GzDecoder::new(resp_body); - let wrapper: ResponseWrapper = serde_json::from_reader(gz)?; + let wrapper: ResponseWrapper<Question> = serde_json::from_reader(gz)?; let qs = wrapper .items .into_iter() @@ -100,12 +112,74 @@ impl StackExchange { fn get_default_opts(&self) -> HashMap<&str, &str> { let mut params = HashMap::new(); params.insert("site", self.config.site.as_str()); - params.insert("key", self.config.api_key.as_str()); params.insert("filter", &SE_FILTER); + if let Some(key) = &self.config.api_key { + params.insert("key", key.as_str()); + } params } } +impl LocalStorage { + pub fn new() -> Self { + let project = project_dir(); + let dir = project.cache_dir(); + fs::create_dir_all(&dir); + LocalStorage { + sites: None, + filename: dir.join("sites.json"), + } + } + + // TODO this function is disgusting; how do in idiomatic rust? + // TODO make this async, inform user if we are downloading + pub fn sites(&mut self) -> &Vec<Site> { + if let Some(ref sites) = self.sites { + return sites; + } + self.fetch_local_sites(); + if let Some(ref sites) = self.sites { + return sites; + } + self.fetch_remote_sites(); + self.sites.as_ref().unwrap() + } + + pub fn update_sites(&mut self) { + self.fetch_remote_sites() + } + + fn fetch_local_sites(&mut self) { + if let Ok(file) = File::open(&self.filename) { + self.sites = serde_json::from_reader(file) + .expect("Local cache corrupted; try running `so --update-sites`") + } + } + + // TODO decide whether or not I should give LocalStorage an api key.. + // TODO cool loading animation? + fn fetch_remote_sites(&mut self) { + let resp_body = Client::new() + .get(stackechange_url("sites")) + .header("Accepts", "application/json") + .query(&[ + ("pagesize", SE_SITES_PAGESIZE.to_string()), + ("page", "1".to_string()), + ]) + .send() + .unwrap(); // TODO inspect response for errors e.g. throttle + let gz = GzDecoder::new(resp_body); + let wrapper: ResponseWrapper<Site> = serde_json::from_reader(gz).unwrap(); // TODO + self.sites = Some(wrapper.items); + self.store_local_sites(); + } + + fn store_local_sites(&self) { + let file = File::create(&self.filename).unwrap(); + serde_json::to_writer(file, &self.sites).unwrap(); + } +} + /// Creates url from const string; can technically panic fn stackechange_url(path: &str) -> Url { let mut url = Url::parse(SE_URL).unwrap(); |