summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Tay <sam.chong.tay@gmail.com>2020-06-05 13:59:34 -0700
committerSam Tay <sam.chong.tay@gmail.com>2020-06-06 19:56:12 -0700
commitb06f305db319b90ff55e159a8538bac853ca2168 (patch)
treeac7e6559346f28c5cb4852a66b213732ab8b6d8e
parentca754e6254d9e0453dd0e02d700ea0ac3dd9d6a5 (diff)
Fetch and cache SE sites
-rw-r--r--README.md10
-rw-r--r--TODO.md4
-rw-r--r--src/config.rs11
-rw-r--r--src/main.rs11
-rw-r--r--src/stackexchange.rs88
5 files changed, 113 insertions, 11 deletions
diff --git a/README.md b/README.md
index 8a8c1c8..defc45f 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,13 @@
# so
**Note:** under development, not ready for prime time.
+
+### api keys
+According to the [StackExchange
+docs](https://api.stackexchange.com/docs/throttle), most users should be fine
+without generating a personal API key (10k requests per IP per day). If you do
+run into throttling issues, get a key
+[here](https://stackapps.com/apps/oauth/register) and tell `so` to use it:
+```
+so --set-api-key <KEY>
+```
diff --git a/TODO.md b/TODO.md
index dac2516..14bb052 100644
--- a/TODO.md
+++ b/TODO.md
@@ -4,8 +4,10 @@
0. Install sites when file not found
0. Implement --update-sites command
3. Parse markdown (`pulldown_cmark`)
+4. Maybe default --validate-sites off (parsing 30k file a big hit)
### resources for later
+0. [Intro to async rust](http://jamesmcm.github.io/blog/2020/05/06/a-practical-introduction-to-async-programming-in-rust/)
1. Async API calls [tokio](https://stackoverflow.com/a/57770687)
2. Parallel calls against multiple sites [vid](https://www.youtube.com/watch?v=O-LagKc0MPA)
3. [config mgmt](https://github.com/rust-cli/confy) or just use directories
@@ -13,3 +15,5 @@
5. Add sort option, e.g. relevance|votes|date
6. Google stuff [scraping with reqwest](https://rust-lang-nursery.github.io/rust-cookbook/web/scraping.html))
7. App Distribution [cross-platform binaries](https://github.com/rustwasm/wasm-pack/blob/51e6351c28fbd40745719e6d4a7bf26dadd30c85/.travis.yml#L74-L91)
+
+8. Keep track of quota in a data file, inform user when getting close?
diff --git a/src/config.rs b/src/config.rs
index 4eea499..02d03bc 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -1,9 +1,18 @@
pub struct Config {
- pub api_key: String,
+ pub api_key: Option<String>,
pub limit: u16,
pub site: String,
}
+/// Get project directory; might panic on unexpected OS
+pub fn project_dir() -> ProjectDirs {
+ ProjectDirs::from("io", "Sam Tay", "so").expect(
+ "Couldn't find
+ a suitable project directory to store cache and configuration; this
+ application may not be supported on your operating system.",
+ )
+}
+
#[cfg(test)]
mod tests {
// TODO test malformed filter string
diff --git a/src/main.rs b/src/main.rs
index 8dd0b59..f6f4399 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,14 +3,19 @@ mod config;
mod stackexchange;
use config::Config;
-use stackexchange::StackExchange;
+use stackexchange::{LocalStorage, StackExchange};
fn main() {
- let matches = cli::mk_app().get_matches();
+ let config = config::user_config();
+ let matches = cli::mk_app(&config).get_matches();
+
+ if matches.is_present("update-sites") {
+ LocalStorage::new().update_sites();
+ }
// TODO merge config from ArgMatch
let se = StackExchange::new(Config {
- api_key: String::from("8o9g7WcfwnwbB*Qp4VsGsw(("),
+ api_key: Some(String::from("8o9g7WcfwnwbB*Qp4VsGsw((")),
limit: 1,
site: String::from("stackoverflow"),
});
diff --git a/src/stackexchange.rs b/src/stackexchange.rs
index 79b389f..d98643d 100644
--- a/src/stackexchange.rs
+++ b/src/stackexchange.rs
@@ -2,10 +2,13 @@ use anyhow;
use flate2::read::GzDecoder;
use reqwest::blocking::Client;
use reqwest::Url;
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
use std::collections::HashMap;
+use std::fs;
+use std::fs::File;
+use std::path::PathBuf;
-use crate::config::Config;
+use crate::config::{project_dir, Config};
/// StackExchange API v2.2 URL
const SE_URL: &str = "http://api.stackexchange.com/2.2/";
@@ -15,13 +18,22 @@ const SE_URL: &str = "http://api.stackexchange.com/2.2/";
/// [create filter](https://api.stackexchange.com/docs/create-filter).
const SE_FILTER: &str = ".DND5X2VHHUH8HyJzpjo)5NvdHI3w6auG";
-/// This structure allows intercting with parts of the StackExchange
+/// Pagesize when fetching all SE sites. Should be good for many years...
+const SE_SITES_PAGESIZE: u16 = 10000;
+
+/// This structure allows interacting with parts of the StackExchange
/// API, using the `Config` struct to determine certain API settings and options.
pub struct StackExchange {
client: Client,
config: Config,
}
+/// This structure allows interacting with locally cached StackExchange metadata.
+pub struct LocalStorage {
+ sites: Option<Vec<Site>>,
+ filename: PathBuf,
+}
+
#[derive(Deserialize, Serialize, Debug)]
pub struct Site {
api_site_parameter: String,
@@ -55,8 +67,8 @@ pub struct Question {
/// Internal struct that represents the boilerplate response wrapper from SE API.
#[derive(Deserialize, Debug)]
-struct ResponseWrapper {
- items: Vec<Question>,
+struct ResponseWrapper<T> {
+ items: Vec<T>,
}
impl StackExchange {
@@ -85,7 +97,7 @@ impl StackExchange {
])
.send()?;
let gz = GzDecoder::new(resp_body);
- let wrapper: ResponseWrapper = serde_json::from_reader(gz)?;
+ let wrapper: ResponseWrapper<Question> = serde_json::from_reader(gz)?;
let qs = wrapper
.items
.into_iter()
@@ -100,12 +112,74 @@ impl StackExchange {
fn get_default_opts(&self) -> HashMap<&str, &str> {
let mut params = HashMap::new();
params.insert("site", self.config.site.as_str());
- params.insert("key", self.config.api_key.as_str());
params.insert("filter", &SE_FILTER);
+ if let Some(key) = &self.config.api_key {
+ params.insert("key", key.as_str());
+ }
params
}
}
+impl LocalStorage {
+ pub fn new() -> Self {
+ let project = project_dir();
+ let dir = project.cache_dir();
+ fs::create_dir_all(&dir);
+ LocalStorage {
+ sites: None,
+ filename: dir.join("sites.json"),
+ }
+ }
+
+ // TODO this function is disgusting; how do in idiomatic rust?
+ // TODO make this async, inform user if we are downloading
+ pub fn sites(&mut self) -> &Vec<Site> {
+ if let Some(ref sites) = self.sites {
+ return sites;
+ }
+ self.fetch_local_sites();
+ if let Some(ref sites) = self.sites {
+ return sites;
+ }
+ self.fetch_remote_sites();
+ self.sites.as_ref().unwrap()
+ }
+
+ pub fn update_sites(&mut self) {
+ self.fetch_remote_sites()
+ }
+
+ fn fetch_local_sites(&mut self) {
+ if let Ok(file) = File::open(&self.filename) {
+ self.sites = serde_json::from_reader(file)
+ .expect("Local cache corrupted; try running `so --update-sites`")
+ }
+ }
+
+ // TODO decide whether or not I should give LocalStorage an api key..
+ // TODO cool loading animation?
+ fn fetch_remote_sites(&mut self) {
+ let resp_body = Client::new()
+ .get(stackechange_url("sites"))
+ .header("Accepts", "application/json")
+ .query(&[
+ ("pagesize", SE_SITES_PAGESIZE.to_string()),
+ ("page", "1".to_string()),
+ ])
+ .send()
+ .unwrap(); // TODO inspect response for errors e.g. throttle
+ let gz = GzDecoder::new(resp_body);
+ let wrapper: ResponseWrapper<Site> = serde_json::from_reader(gz).unwrap(); // TODO
+ self.sites = Some(wrapper.items);
+ self.store_local_sites();
+ }
+
+ fn store_local_sites(&self) {
+ let file = File::create(&self.filename).unwrap();
+ serde_json::to_writer(file, &self.sites).unwrap();
+ }
+}
+
/// Creates url from const string; can technically panic
fn stackechange_url(path: &str) -> Url {
let mut url = Url::parse(SE_URL).unwrap();