From 2fea044264da4a35e112110164eb98f6b19295ce Mon Sep 17 00:00:00 2001 From: Sam Tay Date: Thu, 18 Jun 2020 18:19:37 -0700 Subject: Parallelize markdown parsing, and do it upfront --- Cargo.lock | 32 +++++++++++++++ Cargo.toml | 1 + src/stackexchange.rs | 111 +++++++++++++++++++++++++++++++++++++++++---------- src/tui/app.rs | 24 +++++------ src/tui/markdown.rs | 68 +++++++++++++++---------------- src/tui/mod.rs | 2 +- src/tui/views.rs | 10 ++--- 7 files changed, 172 insertions(+), 76 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7cdc66d..1b4ea90 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -422,6 +422,12 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4358a9e11b9a09cf52383b451b49a169e8d797b68aa02301ff586d70d9661ea3" +[[package]] +name = "either" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" + [[package]] name = "encoding_rs" version = "0.8.23" @@ -1320,6 +1326,31 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rayon" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f02856753d04e03e26929f820d0a0a337ebe71f849801eea335d464b349080" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e92e15d89083484e11353891f1af602cc661426deb9564c298b270c726973280" +dependencies = [ + "crossbeam-deque", + "crossbeam-queue", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + [[package]] name = "redox_syscall" version = "0.1.56" @@ -1546,6 +1577,7 @@ dependencies = [ "minimad", "phf", "pulldown-cmark", + "rayon", "reqwest", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index ddb10cd..b9126e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ serde_yaml = "0.8" reqwest = { version = "0.10", features = ["gzip", "json"] } futures = "0.3" tokio = { version = "0.2", features = ["full"] } +rayon = "1.3" lazy_static = "1.4" minimad = "0.6" diff --git a/src/stackexchange.rs b/src/stackexchange.rs index ddc3c48..1d4789a 100644 --- a/src/stackexchange.rs +++ b/src/stackexchange.rs @@ -1,4 +1,5 @@ use futures::stream::StreamExt; +use rayon::prelude::*; use reqwest::Client; use reqwest::Url; use serde::{Deserialize, Serialize}; @@ -8,6 +9,8 @@ use std::path::PathBuf; use crate::config::{project_dir, Config}; use crate::error::{Error, Result}; +use crate::tui::markdown; +use crate::tui::markdown::Markdown; use crate::utils; /// StackExchange API v2.2 URL @@ -50,12 +53,12 @@ pub struct Site { /// Represents a StackExchange answer with a custom selection of fields from /// the [StackExchange docs](https://api.stackexchange.com/docs/types/answer) #[derive(Clone, Deserialize, Debug)] -pub struct Answer { +pub struct Answer { #[serde(rename = "answer_id")] pub id: u32, pub score: i32, #[serde(rename = "body_markdown")] - pub body: String, + pub body: S, pub is_accepted: bool, } @@ -64,14 +67,14 @@ pub struct Answer { // TODO container over answers should be generic iterator // TODO let body be a generic that implements Display! #[derive(Clone, Deserialize, Debug)] -pub struct Question { +pub struct Question { #[serde(rename = "question_id")] pub id: u32, pub score: i32, - pub answers: Vec, + pub answers: Vec>, pub title: String, #[serde(rename = "body_markdown")] - pub body: String, + pub body: S, } /// Internal struct that represents the boilerplate response wrapper from SE API. @@ -110,12 +113,12 @@ impl StackExchange { } /// Search query at stack exchange and get a list of relevant questions - pub async fn search(&self) -> Result> { + pub async fn search(&self) -> Result>> { self.search_advanced(self.config.limit).await } /// Parallel searches against the search/advanced endpoint across all configured sites - async fn search_advanced(&self, limit: u16) -> Result> { + async fn search_advanced(&self, limit: u16) -> Result>> { futures::stream::iter(self.config.sites.clone()) .map(|site| { let clone = self.clone(); @@ -131,18 +134,18 @@ impl StackExchange { .map(|r| r.map_err(Error::from).and_then(|x| x)) .collect::>>>() .map(|v| { - let mut all_qs: Vec = v.into_iter().flatten().collect(); + let mut qs: Vec> = v.into_iter().flatten().collect(); if self.config.sites.len() > 1 { - all_qs.sort_unstable_by_key(|q| -q.score); + qs.sort_unstable_by_key(|q| -q.score); } - all_qs + Self::parse_markdown(qs) }) } /// Search against the site's search/advanced endpoint with a given query. /// Only fetches questions that have at least one answer. - async fn search_advanced_site(&self, site: &str, limit: u16) -> Result> { - Ok(self + async fn search_advanced_site(&self, site: &str, limit: u16) -> Result>> { + let qs = self .client .get(stackexchange_url("search/advanced")) .header("Accepts", "application/json") @@ -158,16 +161,10 @@ impl StackExchange { ]) .send() .await? - .json::>() + .json::>>() .await? - .items - .into_iter() - .map(|mut q| { - // TODO parallelize this (and preprocess stuff too) - q.answers.sort_unstable_by_key(|a| -a.score); - q - }) - .collect()) + .items; + Ok(Self::preprocess(qs)) } fn get_default_opts(&self) -> HashMap<&str, &str> { @@ -178,6 +175,78 @@ impl StackExchange { } params } + + /// Sorts answers by score + /// Preprocess SE markdown to "cmark" markdown (or something closer to it) + fn preprocess(qs: Vec>) -> Vec> { + qs.par_iter() + .map(|q| { + let Question { + id, + score, + title, + answers, + body, + } = q; + answers.to_vec().par_sort_unstable_by_key(|a| -a.score); + let answers = answers + .par_iter() + .map(|a| Answer { + body: markdown::preprocess(a.body.clone()), + ..*a + }) + .collect(); + Question { + answers, + body: markdown::preprocess(body.to_string()), + id: *id, + score: *score, + title: title.to_string(), + } + }) + .collect::>() + } + + /// Parse all markdown fields + fn parse_markdown(qs: Vec>) -> Vec> { + qs.par_iter() + .map(|q| { + let Question { + id, + score, + title, + answers, + body, + } = q; + let body = markdown::parse(body); + let answers = answers + .par_iter() + .map(|a| { + let Answer { + id, + score, + is_accepted, + body, + } = a; + let body = markdown::parse(body); + Answer { + body, + id: *id, + score: *score, + is_accepted: *is_accepted, + } + }) + .collect::>(); + Question { + body, + answers, + id: *id, + score: *score, + title: title.to_string(), + } + }) + .collect::>() + } } impl LocalStorage { diff --git a/src/tui/app.rs b/src/tui/app.rs index bb0a923..ad1a1ea 100644 --- a/src/tui/app.rs +++ b/src/tui/app.rs @@ -4,11 +4,11 @@ use cursive::utils::markup::StyledString; use cursive::utils::span::SpannedString; use cursive::Cursive; use cursive::XY; -use std::cmp; use std::collections::HashMap; use std::sync::Arc; use super::markdown; +use super::markdown::Markdown; use super::views::{ LayoutView, ListView, MdView, Name, Vimable, NAME_ANSWER_LIST, NAME_ANSWER_VIEW, NAME_QUESTION_LIST, NAME_QUESTION_VIEW, @@ -17,16 +17,14 @@ use crate::config; use crate::error::Result; use crate::stackexchange::{Answer, Question}; -// TODO maybe a struct like Tui::new(stackexchange) creates App::new and impls tui.run()? -// TODO take async questions -// TODO take the entire SE struct for future questions -pub fn run(qs: Vec) -> Result<()> { +pub fn run(qs: Vec>) -> Result<()> { let mut siv = cursive::default(); siv.load_theme_file(config::theme_file_name()?).unwrap(); // TODO dont unwrap - let question_map: HashMap = qs.clone().into_iter().map(|q| (q.id, q)).collect(); + let question_map: HashMap> = + qs.clone().into_iter().map(|q| (q.id, q)).collect(); let question_map = Arc::new(question_map); - let answer_map: HashMap = qs + let answer_map: HashMap> = qs .clone() .into_iter() .map(|q| q.answers.into_iter().map(|a| (a.id, a))) @@ -74,15 +72,16 @@ pub fn run(qs: Vec) -> Result<()> { } fn question_selected_callback( - question_map: Arc>, + question_map: Arc>>, mut s: &mut Cursive, qid: u32, ) { let q = question_map.get(&qid).unwrap(); + let body = &q.body; let XY { x, y: _y } = s.screen_size(); // Update question view s.call_on_name(NAME_QUESTION_VIEW, |v: &mut MdView| { - v.set_content(&q.body); + v.set_content(body); }) .expect("Panic: setting question view content failed"); // Update answer list view @@ -94,15 +93,14 @@ fn question_selected_callback( cb(&mut s) } -fn preview_question(q: &Question) -> StyledString { +fn preview_question(q: &Question) -> StyledString { let mut preview = pretty_score(q.score); preview.append_plain(&q.title); preview } -fn preview_answer(screen_width: usize, a: &Answer) -> StyledString { - let width = cmp::min(a.body.len(), screen_width); - let md = markdown::preview(width, a.body.to_owned()); +fn preview_answer(screen_width: usize, a: &Answer) -> StyledString { + let md = markdown::preview(screen_width, &a.body); let mut preview = pretty_score(a.score); if a.is_accepted { preview.append_styled( diff --git a/src/tui/markdown.rs b/src/tui/markdown.rs index 6c44684..0330696 100644 --- a/src/tui/markdown.rs +++ b/src/tui/markdown.rs @@ -17,50 +17,23 @@ use unicode_width::UnicodeWidthStr; use super::entities::is_entity; +pub type Markdown = StyledString; + /// Parses the given string as markdown text. +/// **Note**: Assumes preprocessing has taken place pub fn parse(input: S) -> StyledString where S: Into, { - let input = preprocess(input.into()); + let input = input.into(); let spans = parse_spans(&input); //let output = build_output(&spans); StyledString::with_spans(input, spans) } -/// Preview markdown. Largely heuristic. -pub fn preview(size: usize, input: S) -> StyledString -where - S: Into, -{ - // DO the initial parsing here too, not just in `parse` - let generous_size = (size as f32) * 1.2; - let generous_size = generous_size.ceil(); - let generous_size = generous_size as usize; - let mut input = input.into(); - input.truncate(generous_size); - let input = preprocess(input); - let spans = parse_spans(&input) - .into_iter() - // Filter out newlines - .map(|ix_span| match ix_span { - IndexedSpan { width: 0, .. } => IndexedSpan { - content: IndexedCow::Owned(" ".to_owned()), - width: 1, - ..ix_span - }, - is => is, - }) - .collect(); - - let mut prev = StyledString::with_spans(input, spans); - prev.append_plain("..."); - prev -} - -fn preprocess(input: String) -> String { - // TODO handle other stackexchange oddities here ENTITIES - // TODO then benchmark +// TODO handle other stackexchange oddities here ENTITIES +// TODO then benchmark +pub fn preprocess(input: String) -> String { input .as_str() .trim() @@ -68,8 +41,33 @@ fn preprocess(input: String) -> String { .replace("", "]**") } +/// Preview markdown of the given length +pub fn preview(width: usize, input: &StyledString) -> StyledString { + let mut w = 0; + let mut new_spans = Vec::new(); + for span in input.spans_raw() { + // Filter newlines + if span.width == 0 { + w += 1; + new_spans.push(IndexedSpan { + content: IndexedCow::Owned(" ".to_owned()), + width: 1, + ..*span + }); + } else { + w += span.width; + new_spans.push(span.clone()); + } + if w > width { + break; + } + } + let mut prev = StyledString::with_spans(input.source(), new_spans); + prev.append_plain("..."); + prev +} + /// Parse the given markdown text into a list of spans. -/// Assumes preprocessing has taken place /// This is a shortcut for `Parser::new(preprocessed_input).collect()`. fn parse_spans(input: &str) -> Vec { Parser::new(input).collect() diff --git a/src/tui/mod.rs b/src/tui/mod.rs index 634cb29..be3764f 100644 --- a/src/tui/mod.rs +++ b/src/tui/mod.rs @@ -1,7 +1,7 @@ mod app; mod entities; mod enumerable; -mod markdown; +pub mod markdown; mod ui; mod views; diff --git a/src/tui/views.rs b/src/tui/views.rs index 2fc2136..e4c8649 100644 --- a/src/tui/views.rs +++ b/src/tui/views.rs @@ -11,7 +11,7 @@ use std::fmt; use std::fmt::Display; use std::rc::Rc; -use super::markdown; +use super::markdown::Markdown; use crate::error::Result; pub const NAME_QUESTION_LIST: &str = "question_list"; @@ -243,13 +243,10 @@ impl MdView { } /// Panics for now, to explore when result is None - pub fn set_content(&mut self, content: S) - where - S: Into, - { + pub fn set_content(&mut self, content: &Markdown) { self.view .call_on_name(&self.inner_name, |tv: &mut TextView| { - tv.set_content(markdown::parse(content)) + tv.set_content(content.clone()) }) .expect("unwrap failed in MdView.set_content") } @@ -310,6 +307,7 @@ impl ViewWrapper for LayoutView { fn wrap_on_event(&mut self, event: Event) -> EventResult { match event { Event::WindowResize => { + println!("window resized"); self.size_invalidated = true; } Event::Char(' ') => { -- cgit v1.2.3