summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Tay <sam.chong.tay@gmail.com>2020-06-18 18:19:37 -0700
committerSam Tay <sam.chong.tay@gmail.com>2020-06-18 18:19:37 -0700
commit2fea044264da4a35e112110164eb98f6b19295ce (patch)
treee7bad01268f1d0ea3cdf381f48b0f4dbe870c3eb
parent99bc14e8adb80d96ba1896b79a6c2ddec32c2513 (diff)
Parallelize markdown parsing, and do it upfront
-rw-r--r--Cargo.lock32
-rw-r--r--Cargo.toml1
-rw-r--r--src/stackexchange.rs111
-rw-r--r--src/tui/app.rs24
-rw-r--r--src/tui/markdown.rs68
-rw-r--r--src/tui/mod.rs2
-rw-r--r--src/tui/views.rs10
7 files changed, 172 insertions, 76 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 7cdc66d..1b4ea90 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -423,6 +423,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4358a9e11b9a09cf52383b451b49a169e8d797b68aa02301ff586d70d9661ea3"
[[package]]
+name = "either"
+version = "1.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
+
+[[package]]
name = "encoding_rs"
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1321,6 +1327,31 @@ dependencies = [
]
[[package]]
+name = "rayon"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62f02856753d04e03e26929f820d0a0a337ebe71f849801eea335d464b349080"
+dependencies = [
+ "autocfg",
+ "crossbeam-deque",
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e92e15d89083484e11353891f1af602cc661426deb9564c298b270c726973280"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-queue",
+ "crossbeam-utils",
+ "lazy_static",
+ "num_cpus",
+]
+
+[[package]]
name = "redox_syscall"
version = "0.1.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1546,6 +1577,7 @@ dependencies = [
"minimad",
"phf",
"pulldown-cmark",
+ "rayon",
"reqwest",
"serde",
"serde_json",
diff --git a/Cargo.toml b/Cargo.toml
index ddb10cd..b9126e9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,6 +17,7 @@ serde_yaml = "0.8"
reqwest = { version = "0.10", features = ["gzip", "json"] }
futures = "0.3"
tokio = { version = "0.2", features = ["full"] }
+rayon = "1.3"
lazy_static = "1.4"
minimad = "0.6"
diff --git a/src/stackexchange.rs b/src/stackexchange.rs
index ddc3c48..1d4789a 100644
--- a/src/stackexchange.rs
+++ b/src/stackexchange.rs
@@ -1,4 +1,5 @@
use futures::stream::StreamExt;
+use rayon::prelude::*;
use reqwest::Client;
use reqwest::Url;
use serde::{Deserialize, Serialize};
@@ -8,6 +9,8 @@ use std::path::PathBuf;
use crate::config::{project_dir, Config};
use crate::error::{Error, Result};
+use crate::tui::markdown;
+use crate::tui::markdown::Markdown;
use crate::utils;
/// StackExchange API v2.2 URL
@@ -50,12 +53,12 @@ pub struct Site {
/// Represents a StackExchange answer with a custom selection of fields from
/// the [StackExchange docs](https://api.stackexchange.com/docs/types/answer)
#[derive(Clone, Deserialize, Debug)]
-pub struct Answer {
+pub struct Answer<S> {
#[serde(rename = "answer_id")]
pub id: u32,
pub score: i32,
#[serde(rename = "body_markdown")]
- pub body: String,
+ pub body: S,
pub is_accepted: bool,
}
@@ -64,14 +67,14 @@ pub struct Answer {
// TODO container over answers should be generic iterator
// TODO let body be a generic that implements Display!
#[derive(Clone, Deserialize, Debug)]
-pub struct Question {
+pub struct Question<S> {
#[serde(rename = "question_id")]
pub id: u32,
pub score: i32,
- pub answers: Vec<Answer>,
+ pub answers: Vec<Answer<S>>,
pub title: String,
#[serde(rename = "body_markdown")]
- pub body: String,
+ pub body: S,
}
/// Internal struct that represents the boilerplate response wrapper from SE API.
@@ -110,12 +113,12 @@ impl StackExchange {
}
/// Search query at stack exchange and get a list of relevant questions
- pub async fn search(&self) -> Result<Vec<Question>> {
+ pub async fn search(&self) -> Result<Vec<Question<Markdown>>> {
self.search_advanced(self.config.limit).await
}
/// Parallel searches against the search/advanced endpoint across all configured sites
- async fn search_advanced(&self, limit: u16) -> Result<Vec<Question>> {
+ async fn search_advanced(&self, limit: u16) -> Result<Vec<Question<Markdown>>> {
futures::stream::iter(self.config.sites.clone())
.map(|site| {
let clone = self.clone();
@@ -131,18 +134,18 @@ impl StackExchange {
.map(|r| r.map_err(Error::from).and_then(|x| x))
.collect::<Result<Vec<Vec<_>>>>()
.map(|v| {
- let mut all_qs: Vec<Question> = v.into_iter().flatten().collect();
+ let mut qs: Vec<Question<String>> = v.into_iter().flatten().collect();
if self.config.sites.len() > 1 {
- all_qs.sort_unstable_by_key(|q| -q.score);
+ qs.sort_unstable_by_key(|q| -q.score);
}
- all_qs
+ Self::parse_markdown(qs)
})
}
/// Search against the site's search/advanced endpoint with a given query.
/// Only fetches questions that have at least one answer.
- async fn search_advanced_site(&self, site: &str, limit: u16) -> Result<Vec<Question>> {
- Ok(self
+ async fn search_advanced_site(&self, site: &str, limit: u16) -> Result<Vec<Question<String>>> {
+ let qs = self
.client
.get(stackexchange_url("search/advanced"))
.header("Accepts", "application/json")
@@ -158,16 +161,10 @@ impl StackExchange {
])
.send()
.await?
- .json::<ResponseWrapper<Question>>()
+ .json::<ResponseWrapper<Question<String>>>()
.await?
- .items
- .into_iter()
- .map(|mut q| {
- // TODO parallelize this (and preprocess <kbd> stuff too)
- q.answers.sort_unstable_by_key(|a| -a.score);
- q
- })
- .collect())
+ .items;
+ Ok(Self::preprocess(qs))
}
fn get_default_opts(&self) -> HashMap<&str, &str> {
@@ -178,6 +175,78 @@ impl StackExchange {
}
params
}
+
+ /// Sorts answers by score
+ /// Preprocess SE markdown to "cmark" markdown (or something closer to it)
+ fn preprocess(qs: Vec<Question<String>>) -> Vec<Question<String>> {
+ qs.par_iter()
+ .map(|q| {
+ let Question {
+ id,
+ score,
+ title,
+ answers,
+ body,
+ } = q;
+ answers.to_vec().par_sort_unstable_by_key(|a| -a.score);
+ let answers = answers
+ .par_iter()
+ .map(|a| Answer {
+ body: markdown::preprocess(a.body.clone()),
+ ..*a
+ })
+ .collect();
+ Question {
+ answers,
+ body: markdown::preprocess(body.to_string()),
+ id: *id,
+ score: *score,
+ title: title.to_string(),
+ }
+ })
+ .collect::<Vec<_>>()
+ }
+
+ /// Parse all markdown fields
+ fn parse_markdown(qs: Vec<Question<String>>) -> Vec<Question<Markdown>> {
+ qs.par_iter()
+ .map(|q| {
+ let Question {
+ id,
+ score,
+ title,
+ answers,
+ body,
+ } = q;
+ let body = markdown::parse(body);
+ let answers = answers
+ .par_iter()
+ .map(|a| {
+ let Answer {
+ id,
+ score,
+ is_accepted,
+ body,
+ } = a;
+ let body = markdown::parse(body);
+ Answer {
+ body,
+ id: *id,
+ score: *score,
+ is_accepted: *is_accepted,
+ }
+ })
+ .collect::<Vec<_>>();
+ Question {
+ body,
+ answers,
+ id: *id,
+ score: *score,
+ title: title.to_string(),
+ }
+ })
+ .collect::<Vec<_>>()
+ }
}
impl LocalStorage {
diff --git a/src/tui/app.rs b/src/tui/app.rs
index bb0a923..ad1a1ea 100644
--- a/src/tui/app.rs
+++ b/src/tui/app.rs
@@ -4,11 +4,11 @@ use cursive::utils::markup::StyledString;
use cursive::utils::span::SpannedString;
use cursive::Cursive;
use cursive::XY;
-use std::cmp;
use std::collections::HashMap;
use std::sync::Arc;
use super::markdown;
+use super::markdown::Markdown;
use super::views::{
LayoutView, ListView, MdView, Name, Vimable, NAME_ANSWER_LIST, NAME_ANSWER_VIEW,
NAME_QUESTION_LIST, NAME_QUESTION_VIEW,
@@ -17,16 +17,14 @@ use crate::config;
use crate::error::Result;
use crate::stackexchange::{Answer, Question};
-// TODO maybe a struct like Tui::new(stackexchange) creates App::new and impls tui.run()?
-// TODO take async questions
-// TODO take the entire SE struct for future questions
-pub fn run(qs: Vec<Question>) -> Result<()> {
+pub fn run(qs: Vec<Question<Markdown>>) -> Result<()> {
let mut siv = cursive::default();
siv.load_theme_file(config::theme_file_name()?).unwrap(); // TODO dont unwrap
- let question_map: HashMap<u32, Question> = qs.clone().into_iter().map(|q| (q.id, q)).collect();
+ let question_map: HashMap<u32, Question<Markdown>> =
+ qs.clone().into_iter().map(|q| (q.id, q)).collect();
let question_map = Arc::new(question_map);
- let answer_map: HashMap<u32, Answer> = qs
+ let answer_map: HashMap<u32, Answer<Markdown>> = qs
.clone()
.into_iter()
.map(|q| q.answers.into_iter().map(|a| (a.id, a)))
@@ -74,15 +72,16 @@ pub fn run(qs: Vec<Question>) -> Result<()> {
}
fn question_selected_callback(
- question_map: Arc<HashMap<u32, Question>>,
+ question_map: Arc<HashMap<u32, Question<Markdown>>>,
mut s: &mut Cursive,
qid: u32,
) {
let q = question_map.get(&qid).unwrap();
+ let body = &q.body;
let XY { x, y: _y } = s.screen_size();
// Update question view
s.call_on_name(NAME_QUESTION_VIEW, |v: &mut MdView| {
- v.set_content(&q.body);
+ v.set_content(body);
})
.expect("Panic: setting question view content failed");
// Update answer list view
@@ -94,15 +93,14 @@ fn question_selected_callback(
cb(&mut s)
}
-fn preview_question(q: &Question) -> StyledString {
+fn preview_question(q: &Question<Markdown>) -> StyledString {
let mut preview = pretty_score(q.score);
preview.append_plain(&q.title);
preview
}
-fn preview_answer(screen_width: usize, a: &Answer) -> StyledString {
- let width = cmp::min(a.body.len(), screen_width);
- let md = markdown::preview(width, a.body.to_owned());
+fn preview_answer(screen_width: usize, a: &Answer<Markdown>) -> StyledString {
+ let md = markdown::preview(screen_width, &a.body);
let mut preview = pretty_score(a.score);
if a.is_accepted {
preview.append_styled(
diff --git a/src/tui/markdown.rs b/src/tui/markdown.rs
index 6c44684..0330696 100644
--- a/src/tui/markdown.rs
+++ b/src/tui/markdown.rs
@@ -17,50 +17,23 @@ use unicode_width::UnicodeWidthStr;
use super::entities::is_entity;
+pub type Markdown = StyledString;
+
/// Parses the given string as markdown text.
+/// **Note**: Assumes preprocessing has taken place
pub fn parse<S>(input: S) -> StyledString
where
S: Into<String>,
{
- let input = preprocess(input.into());
+ let input = input.into();
let spans = parse_spans(&input);
//let output = build_output(&spans);
StyledString::with_spans(input, spans)
}
-/// Preview markdown. Largely heuristic.
-pub fn preview<S>(size: usize, input: S) -> StyledString
-where
- S: Into<String>,
-{
- // DO the initial parsing here too, not just in `parse`
- let generous_size = (size as f32) * 1.2;
- let generous_size = generous_size.ceil();
- let generous_size = generous_size as usize;
- let mut input = input.into();
- input.truncate(generous_size);
- let input = preprocess(input);
- let spans = parse_spans(&input)
- .into_iter()
- // Filter out newlines
- .map(|ix_span| match ix_span {
- IndexedSpan { width: 0, .. } => IndexedSpan {
- content: IndexedCow::Owned(" ".to_owned()),
- width: 1,
- ..ix_span
- },
- is => is,
- })
- .collect();
-
- let mut prev = StyledString::with_spans(input, spans);
- prev.append_plain("...");
- prev
-}
-
-fn preprocess(input: String) -> String {
- // TODO handle other stackexchange oddities here ENTITIES
- // TODO then benchmark
+// TODO handle other stackexchange oddities here ENTITIES
+// TODO then benchmark
+pub fn preprocess(input: String) -> String {
input
.as_str()
.trim()
@@ -68,8 +41,33 @@ fn preprocess(input: String) -> String {
.replace("</kbd>", "]**")
}
+/// Preview markdown of the given length
+pub fn preview(width: usize, input: &StyledString) -> StyledString {
+ let mut w = 0;
+ let mut new_spans = Vec::new();
+ for span in input.spans_raw() {
+ // Filter newlines
+ if span.width == 0 {
+ w += 1;
+ new_spans.push(IndexedSpan {
+ content: IndexedCow::Owned(" ".to_owned()),
+ width: 1,
+ ..*span
+ });
+ } else {
+ w += span.width;
+ new_spans.push(span.clone());
+ }
+ if w > width {
+ break;
+ }
+ }
+ let mut prev = StyledString::with_spans(input.source(), new_spans);
+ prev.append_plain("...");
+ prev
+}
+
/// Parse the given markdown text into a list of spans.
-/// Assumes preprocessing has taken place
/// This is a shortcut for `Parser::new(preprocessed_input).collect()`.
fn parse_spans(input: &str) -> Vec<StyledIndexedSpan> {
Parser::new(input).collect()
diff --git a/src/tui/mod.rs b/src/tui/mod.rs
index 634cb29..be3764f 100644
--- a/src/tui/mod.rs
+++ b/src/tui/mod.rs
@@ -1,7 +1,7 @@
mod app;
mod entities;
mod enumerable;
-mod markdown;
+pub mod markdown;
mod ui;
mod views;
diff --git a/src/tui/views.rs b/src/tui/views.rs
index 2fc2136..e4c8649 100644
--- a/src/tui/views.rs
+++ b/src/tui/views.rs
@@ -11,7 +11,7 @@ use std::fmt;
use std::fmt::Display;
use std::rc::Rc;
-use super::markdown;
+use super::markdown::Markdown;
use crate::error::Result;
pub const NAME_QUESTION_LIST: &str = "question_list";
@@ -243,13 +243,10 @@ impl MdView {
}
/// Panics for now, to explore when result is None
- pub fn set_content<S>(&mut self, content: S)
- where
- S: Into<String>,
- {
+ pub fn set_content(&mut self, content: &Markdown) {
self.view
.call_on_name(&self.inner_name, |tv: &mut TextView| {
- tv.set_content(markdown::parse(content))
+ tv.set_content(content.clone())
})
.expect("unwrap failed in MdView.set_content")
}
@@ -310,6 +307,7 @@ impl ViewWrapper for LayoutView {
fn wrap_on_event(&mut self, event: Event) -> EventResult {
match event {
Event::WindowResize => {
+ println!("window resized");
self.size_invalidated = true;
}
Event::Char(' ') => {