Parallelize markdown parsing, and do it upfront

author: Sam Tay <sam.chong.tay@gmail.com> 2020-06-18 18:19:37 -0700
committer: Sam Tay <sam.chong.tay@gmail.com> 2020-06-18 18:19:37 -0700
commit: 2fea044264da4a35e112110164eb98f6b19295ce (patch)
tree: e7bad01268f1d0ea3cdf381f48b0f4dbe870c3eb
parent: 99bc14e8adb80d96ba1896b79a6c2ddec32c2513 (diff)
7 files changed, 172 insertions, 76 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 7cdc66d..1b4ea90 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -423,6 +423,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4358a9e11b9a09cf52383b451b49a169e8d797b68aa02301ff586d70d9661ea3"
 
 [[package]]
+name = "either"
+version = "1.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
+
+[[package]]
 name = "encoding_rs"
 version = "0.8.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1321,6 +1327,31 @@ dependencies = [
 ]
 
 [[package]]
+name = "rayon"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62f02856753d04e03e26929f820d0a0a337ebe71f849801eea335d464b349080"
+dependencies = [
+ "autocfg",
+ "crossbeam-deque",
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e92e15d89083484e11353891f1af602cc661426deb9564c298b270c726973280"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-queue",
+ "crossbeam-utils",
+ "lazy_static",
+ "num_cpus",
+]
+
+[[package]]
 name = "redox_syscall"
 version = "0.1.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1546,6 +1577,7 @@ dependencies = [
  "minimad",
  "phf",
  "pulldown-cmark",
+ "rayon",
  "reqwest",
  "serde",
  "serde_json",
diff --git a/Cargo.toml b/Cargo.toml
index ddb10cd..b9126e9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,6 +17,7 @@ serde_yaml = "0.8"
 reqwest = { version = "0.10", features = ["gzip", "json"] }
 futures = "0.3"
 tokio = { version = "0.2", features = ["full"] }
+rayon = "1.3"
 
 lazy_static = "1.4"
 minimad = "0.6"
diff --git a/src/stackexchange.rs b/src/stackexchange.rs
index ddc3c48..1d4789a 100644
--- a/src/stackexchange.rs
+++ b/src/stackexchange.rs
@@ -1,4 +1,5 @@
 use futures::stream::StreamExt;
+use rayon::prelude::*;
 use reqwest::Client;
 use reqwest::Url;
 use serde::{Deserialize, Serialize};
@@ -8,6 +9,8 @@ use std::path::PathBuf;
 
 use crate::config::{project_dir, Config};
 use crate::error::{Error, Result};
+use crate::tui::markdown;
+use crate::tui::markdown::Markdown;
 use crate::utils;
 
 /// StackExchange API v2.2 URL
@@ -50,12 +53,12 @@ pub struct Site {
 /// Represents a StackExchange answer with a custom selection of fields from
 /// the [StackExchange docs](https://api.stackexchange.com/docs/types/answer)
 #[derive(Clone, Deserialize, Debug)]
-pub struct Answer {
+pub struct Answer<S> {
     #[serde(rename = "answer_id")]
     pub id: u32,
     pub score: i32,
     #[serde(rename = "body_markdown")]
-    pub body: String,
+    pub body: S,
     pub is_accepted: bool,
 }
 
@@ -64,14 +67,14 @@ pub struct Answer {
 // TODO container over answers should be generic iterator
 // TODO let body be a generic that implements Display!
 #[derive(Clone, Deserialize, Debug)]
-pub struct Question {
+pub struct Question<S> {
     #[serde(rename = "question_id")]
     pub id: u32,
     pub score: i32,
-    pub answers: Vec<Answer>,
+    pub answers: Vec<Answer<S>>,
     pub title: String,
     #[serde(rename = "body_markdown")]
-    pub body: String,
+    pub body: S,
 }
 
 /// Internal struct that represents the boilerplate response wrapper from SE API.
@@ -110,12 +113,12 @@ impl StackExchange {
     }
 
     /// Search query at stack exchange and get a list of relevant questions
-    pub async fn search(&self) -> Result<Vec<Question>> {
+    pub async fn search(&self) -> Result<Vec<Question<Markdown>>> {
         self.search_advanced(self.config.limit).await
     }
 
     /// Parallel searches against the search/advanced endpoint across all configured sites
-    async fn search_advanced(&self, limit: u16) -> Result<Vec<Question>> {
+    async fn search_advanced(&self, limit: u16) -> Result<Vec<Question<Markdown>>> {
         futures::stream::iter(self.config.sites.clone())
             .map(|site| {
                 let clone = self.clone();
@@ -131,18 +134,18 @@ impl StackExchange {
             .map(|r| r.map_err(Error::from).and_then(|x| x))
             .collect::<Result<Vec<Vec<_>>>>()
             .map(|v| {
-                let mut all_qs: Vec<Question> = v.into_iter().flatten().collect();
+                let mut qs: Vec<Question<String>> = v.into_iter().flatten().collect();
                 if self.config.sites.len() > 1 {
-                    all_qs.sort_unstable_by_key(|q| -q.score);
+                    qs.sort_unstable_by_key(|q| -q.score);
                 }
-                all_qs
+                Self::parse_markdown(qs)
             })
     }
 
     /// Search against the site's search/advanced endpoint with a given query.
     /// Only fetches questions that have at least one answer.
-    async fn search_advanced_site(&self, site: &str, limit: u16) -> Result<Vec<Question>> {
-        Ok(self
+    async fn search_advanced_site(&self, site: &str, limit: u16) -> Result<Vec<Question<String>>> {
+        let qs = self
             .client
             .get(stackexchange_url("search/advanced"))
             .header("Accepts", "application/json")
@@ -158,16 +161,10 @@ impl StackExchange {
             ])
             .send()
             .await?
-            .json::<ResponseWrapper<Question>>()
+            .json::<ResponseWrapper<Question<String>>>()
             .await?
-            .items
-            .into_iter()
-            .map(|mut q| {
-                // TODO parallelize this (and preprocess <kbd> stuff too)
-                q.answers.sort_unstable_by_key(|a| -a.score);
-                q
-            })
-            .collect())
+            .items;
+        Ok(Self::preprocess(qs))
     }
 
     fn get_default_opts(&self) -> HashMap<&str, &str> {
@@ -178,6 +175,78 @@ impl StackExchange {
         }
         params
     }
+
+    /// Sorts answers by score
+    /// Preprocess SE markdown to "cmark" markdown (or something closer to it)
+    fn preprocess(qs: Vec<Question<String>>) -> Vec<Question<String>> {
+        qs.par_iter()
+            .map(|q| {
+                let Question {
+                    id,
+                    score,
+                    title,
+                    answers,
+                    body,
+                } = q;
+                answers.to_vec().par_sort_unstable_by_key(|a| -a.score);
+                let answers = answers
+                    .par_iter()
+                    .map(|a| Answer {
+                        body: markdown::preprocess(a.body.clone()),
+                        ..*a
+                    })
+                    .collect();
+                Question {
+                    answers,
+                    body: markdown::preprocess(body.to_string()),
+                    id: *id,
+                    score: *score,
+                    title: title.to_string(),
+                }
+            })
+            .collect::<Vec<_>>()
+    }
+
+    /// Parse all markdown fields
+    fn parse_markdown(qs: Vec<Question<String>>) -> Vec<Question<Markdown>> {
+        qs.par_iter()
+            .map(|q| {
+                let Question {
+                    id,
+                    score,
+                    title,
+                    answers,
+                    body,
+                } = q;
+                let body = markdown::parse(body);
+                let answers = answers
+                    .par_iter()
+                    .map(|a| {
+                        let Answer {
+                            id,
+                            score,
+                            is_accepted,
+                            body,
+                        } = a;
+                        let body = markdown::parse(body);
+                        Answer {
+                            body,
+                            id: *id,
+                            score: *score,
+                            is_accepted: *is_accepted,
+                        }
+                    })
+                    .collect::<Vec<_>>();
+                Question {
+                    body,
+                    answers,
+                    id: *id,
+                    score: *score,
+                    title: title.to_string(),
+                }
+            })
+            .collect::<Vec<_>>()
+    }
 }
 
 impl LocalStorage {
diff --git a/src/tui/app.rs b/src/tui/app.rs
index bb0a923..ad1a1ea 100644
--- a/src/tui/app.rs
+++ b/src/tui/app.rs
@@ -4,11 +4,11 @@ use cursive::utils::markup::StyledString;
 use cursive::utils::span::SpannedString;
 use cursive::Cursive;
 use cursive::XY;
-use std::cmp;
 use std::collections::HashMap;
 use std::sync::Arc;
 
 use super::markdown;
+use super::markdown::Markdown;
 use super::views::{
     LayoutView, ListView, MdView, Name, Vimable, NAME_ANSWER_LIST, NAME_ANSWER_VIEW,
     NAME_QUESTION_LIST, NAME_QUESTION_VIEW,
@@ -17,16 +17,14 @@ use crate::config;
 use crate::error::Result;
 use crate::stackexchange::{Answer, Question};
 
-// TODO maybe a struct like Tui::new(stackexchange) creates App::new and impls tui.run()?
-// TODO take async questions
-// TODO take the entire SE struct for future questions
-pub fn run(qs: Vec<Question>) -> Result<()> {
+pub fn run(qs: Vec<Question<Markdown>>) -> Result<()> {
     let mut siv = cursive::default();
     siv.load_theme_file(config::theme_file_name()?).unwrap(); // TODO dont unwrap
 
-    let question_map: HashMap<u32, Question> = qs.clone().into_iter().map(|q| (q.id, q)).collect();
+    let question_map: HashMap<u32, Question<Markdown>> =
+        qs.clone().into_iter().map(|q| (q.id, q)).collect();
     let question_map = Arc::new(question_map);
-    let answer_map: HashMap<u32, Answer> = qs
+    let answer_map: HashMap<u32, Answer<Markdown>> = qs
         .clone()
         .into_iter()
         .map(|q| q.answers.into_iter().map(|a| (a.id, a)))
@@ -74,15 +72,16 @@ pub fn run(qs: Vec<Question>) -> Result<()> {
 }
 
 fn question_selected_callback(
-    question_map: Arc<HashMap<u32, Question>>,
+    question_map: Arc<HashMap<u32, Question<Markdown>>>,
     mut s: &mut Cursive,
     qid: u32,
 ) {
     let q = question_map.get(&qid).unwrap();
+    let body = &q.body;
     let XY { x, y: _y } = s.screen_size();
     // Update question view
     s.call_on_name(NAME_QUESTION_VIEW, |v: &mut MdView| {
-        v.set_content(&q.body);
+        v.set_content(body);
     })
     .expect("Panic: setting question view content failed");
     // Update answer list view
@@ -94,15 +93,14 @@ fn question_selected_callback(
     cb(&mut s)
 }
 
-fn preview_question(q: &Question) -> StyledString {
+fn preview_question(q: &Question<Markdown>) -> StyledString {
     let mut preview = pretty_score(q.score);
     preview.append_plain(&q.title);
     preview
 }
 
-fn preview_answer(screen_width: usize, a: &Answer) -> StyledString {
-    let width = cmp::min(a.body.len(), screen_width);
-    let md = markdown::preview(width, a.body.to_owned());
+fn preview_answer(screen_width: usize, a: &Answer<Markdown>) -> StyledString {
+    let md = markdown::preview(screen_width, &a.body);
     let mut preview = pretty_score(a.score);
     if a.is_accepted {
         preview.append_styled(
diff --git a/src/tui/markdown.rs b/src/tui/markdown.rs
index 6c44684..0330696 100644
--- a/src/tui/markdown.rs
+++ b/src/tui/markdown.rs
@@ -17,50 +17,23 @@ use unicode_width::UnicodeWidthStr;
 
 use super::entities::is_entity;
 
+pub type Markdown = StyledString;
+
 /// Parses the given string as markdown text.
+/// **Note**: Assumes preprocessing has taken place
 pub fn parse<S>(input: S) -> StyledString
 where
     S: Into<String>,
 {
-    let input = preprocess(input.into());
+    let input = input.into();
     let spans = parse_spans(&input);
     //let output = build_output(&spans);
     StyledString::with_spans(input, spans)
 }
 
-/// Preview markdown. Largely heuristic.
-pub fn preview<S>(size: usize, input: S) -> StyledString
-where
-    S: Into<String>,
-{
-    // DO the initial parsing here too, not just in `parse`
-    let generous_size = (size as f32) * 1.2;
-    let generous_size = generous_size.ceil();
-    let generous_size = generous_size as usize;
-    let mut input = input.into();
-    input.truncate(generous_size);
-    let input = preprocess(input);
-    let spans = parse_spans(&input)
-        .into_iter()
-        // Filter out newlines
-        .map(|ix_span| match ix_span {
-            IndexedSpan { width: 0, .. } => IndexedSpan {
-                content: IndexedCow::Owned(" ".to_owned()),
-                width: 1,
-                ..ix_span
-            },
-            is => is,
-        })
-        .collect();
-
-    let mut prev = StyledString::with_spans(input, spans);
-    prev.append_plain("...");
-    prev
-}
-
-fn preprocess(input: String) -> String {
-    // TODO handle other stackexchange oddities here ENTITIES
-    // TODO then benchmark
+// TODO handle other stackexchange oddities here ENTITIES
+// TODO then benchmark
+pub fn preprocess(input: String) -> String {
     input
         .as_str()
         .trim()
@@ -68,8 +41,33 @@ fn preprocess(input: String) -> String {
         .replace("</kbd>", "]**")
 }
 
+/// Preview markdown of the given length
+pub fn preview(width: usize, input: &StyledString) -> StyledString {
+    let mut w = 0;
+    let mut new_spans = Vec::new();
+    for span in input.spans_raw() {
+        // Filter newlines
+        if span.width == 0 {
+            w += 1;
+            new_spans.push(IndexedSpan {
+                content: IndexedCow::Owned(" ".to_owned()),
+                width: 1,
+                ..*span
+            });
+        } else {
+            w += span.width;
+            new_spans.push(span.clone());
+        }
+        if w > width {
+            break;
+        }
+    }
+    let mut prev = StyledString::with_spans(input.source(), new_spans);
+    prev.append_plain("...");
+    prev
+}
+
 /// Parse the given markdown text into a list of spans.
-/// Assumes preprocessing has taken place
 /// This is a shortcut for `Parser::new(preprocessed_input).collect()`.
 fn parse_spans(input: &str) -> Vec<StyledIndexedSpan> {
     Parser::new(input).collect()
diff --git a/src/tui/mod.rs b/src/tui/mod.rs
index 634cb29..be3764f 100644
--- a/src/tui/mod.rs
+++ b/src/tui/mod.rs
@@ -1,7 +1,7 @@
 mod app;
 mod entities;
 mod enumerable;
-mod markdown;
+pub mod markdown;
 mod ui;
 mod views;
 
diff --git a/src/tui/views.rs b/src/tui/views.rs
index 2fc2136..e4c8649 100644
--- a/src/tui/views.rs
+++ b/src/tui/views.rs
@@ -11,7 +11,7 @@ use std::fmt;
 use std::fmt::Display;
 use std::rc::Rc;
 
-use super::markdown;
+use super::markdown::Markdown;
 use crate::error::Result;
 
 pub const NAME_QUESTION_LIST: &str = "question_list";
@@ -243,13 +243,10 @@ impl MdView {
     }
 
     /// Panics for now, to explore when result is None
-    pub fn set_content<S>(&mut self, content: S)
-    where
-        S: Into<String>,
-    {
+    pub fn set_content(&mut self, content: &Markdown) {
         self.view
             .call_on_name(&self.inner_name, |tv: &mut TextView| {
-                tv.set_content(markdown::parse(content))
+                tv.set_content(content.clone())
             })
             .expect("unwrap failed in MdView.set_content")
     }
@@ -310,6 +307,7 @@ impl ViewWrapper for LayoutView {
     fn wrap_on_event(&mut self, event: Event) -> EventResult {
         match event {
             Event::WindowResize => {
+                println!("window resized");
                 self.size_invalidated = true;
             }
             Event::Char(' ') => {
author	Sam Tay <sam.chong.tay@gmail.com>	2020-06-18 18:19:37 -0700
committer	Sam Tay <sam.chong.tay@gmail.com>	2020-06-18 18:19:37 -0700
commit	2fea044264da4a35e112110164eb98f6b19295ce (patch)
tree	e7bad01268f1d0ea3cdf381f48b0f4dbe870c3eb
parent	99bc14e8adb80d96ba1896b79a6c2ddec32c2513 (diff)