summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--crate_db/Cargo.toml1
-rw-r--r--crate_db/src/lib_crate_db.rs22
-rw-r--r--kitchen_sink/src/lib_kitchen_sink.rs2
-rw-r--r--reindex/src/bin/reindex_crates.rs13
m---------render_readme0
-rw-r--r--rich_crate/src/rich_crate_version.rs6
6 files changed, 22 insertions, 22 deletions
diff --git a/crate_db/Cargo.toml b/crate_db/Cargo.toml
index f76a110..e6f5a73 100644
--- a/crate_db/Cargo.toml
+++ b/crate_db/Cargo.toml
@@ -20,3 +20,4 @@ chrono = "0.4.2"
thread_local = "0.3.6"
parking_lot = "0.9"
rake = { git = "https://github.com/kornelski/rake-rs" }
+render_readme = { git = "https://gitlab.com/crates.rs/render_readme.git", version = "0.6.3" }
diff --git a/crate_db/src/lib_crate_db.rs b/crate_db/src/lib_crate_db.rs
index 52c76d6..527460e 100644
--- a/crate_db/src/lib_crate_db.rs
+++ b/crate_db/src/lib_crate_db.rs
@@ -13,11 +13,11 @@ extern crate lazy_static;
use chrono::prelude::*;
use failure::ResultExt;
use rich_crate::Include;
-use rich_crate::Markup;
use rich_crate::Origin;
use rich_crate::Repo;
use rich_crate::RichCrate;
use rich_crate::RichCrateVersion;
+use render_readme::Renderer;
use rusqlite::*;
use std::cell::RefCell;
use std::collections::HashMap;
@@ -129,7 +129,7 @@ impl CrateDb {
insert_keyword.add_synonyms(&self.tag_synonyms);
{
- let d = Self::extract_text_phrases(&c);
+ let d = Self::extract_text_phrases(&c, &Renderer::new(None));
let mut sw = rake::StopWords::new();
sw.reserve(STOPWORDS.len());
sw.extend(STOPWORDS.iter().map(|s| s.to_string())); // TODO: use real stopwords, THEN filter via STOPWORDS again, because multiple Rust-y words are fine
@@ -814,7 +814,7 @@ impl CrateDb {
}
// returns an array of lowercase phrases
- fn extract_text_phrases(krate: &RichCrateVersion) -> Vec<(f64, String)> {
+ fn extract_text_phrases(krate: &RichCrateVersion, renderer: &Renderer) -> Vec<(f64, String)> {
let mut out = Vec::new();
let mut len = 0;
if let Some(s) = krate.description() {
@@ -827,9 +827,7 @@ impl CrateDb {
}
if let Ok(Some(r)) = krate.readme() {
// render readme to DOM, extract nodes
- let sub = match r.markup {
- Markup::Markdown(ref s) | Markup::Rst(ref s) => s,
- };
+ let sub = renderer.visible_text(&r.markup);
for par in sub.split('\n') {
if len > 200 {
break;
@@ -840,13 +838,15 @@ impl CrateDb {
continue;
}
let par = par.replace("http://", " ").replace("https://", " ");
- len += par.len();
- out.push((0.4, par.to_lowercase()));
- }
- }
- out
+ if !par.is_empty() {
+ len += par.len();
+ out.push((0.4, par.to_lowercase()));
}
+ }
}
+ out
+ }
+}
pub enum RepoChange {
Removed { crate_name: String, weight: f64 },
diff --git a/kitchen_sink/src/lib_kitchen_sink.rs b/kitchen_sink/src/lib_kitchen_sink.rs
index 4b0d191..f82ccd8 100644
--- a/kitchen_sink/src/lib_kitchen_sink.rs
+++ b/kitchen_sink/src/lib_kitchen_sink.rs
@@ -639,7 +639,7 @@ impl KitchenSink {
pub fn is_readme_short(&self, readme: Result<Option<&Readme>, ()>) -> bool {
if let Ok(Some(ref r)) = readme {
match r.markup {
- Markup::Markdown(ref s) | Markup::Rst(ref s) => s.len() < 1000,
+ Markup::Markdown(ref s) | Markup::Rst(ref s) | Markup::Html(ref s) => s.len() < 1000,
}
} else {
true
diff --git a/reindex/src/bin/reindex_crates.rs b/reindex/src/bin/reindex_crates.rs
index 189daa8..6419969 100644
--- a/reindex/src/bin/reindex_crates.rs
+++ b/reindex/src/bin/reindex_crates.rs
@@ -7,7 +7,7 @@ use rand::{seq::SliceRandom, thread_rng};
use ranking::CrateTemporalInputs;
use ranking::CrateVersionInputs;
use rayon;
-use render_readme::{Markup, Renderer};
+use render_readme::Renderer;
use search_index::*;
use std::collections::HashSet;
use std::sync::mpsc;
@@ -31,12 +31,13 @@ fn main() {
let (tx, rx) = mpsc::sync_channel(64);
let index_thread = std::thread::spawn({
+ let renderer = renderer.clone();
move || -> Result<(), failure::Error> {
let mut n = 0;
let mut next_n = 100;
while let Ok((ver, downloads_per_month, score)) = rx.recv() {
if stopped() {break;}
- index_search(&mut indexer, &ver, downloads_per_month, score)?;
+ index_search(&mut indexer, &renderer, &ver, downloads_per_month, score)?;
n += 1;
if n == next_n {
next_n *= 2;
@@ -117,7 +118,7 @@ fn index_crate(crates: &KitchenSink, c: &Origin, renderer: &Renderer, search_sen
Ok(v)
}
-fn index_search(indexer: &mut Indexer, k: &RichCrateVersion, downloads_per_month: usize, score: f64) -> Result<(), failure::Error> {
+fn index_search(indexer: &mut Indexer, renderer: &Renderer, k: &RichCrateVersion, downloads_per_month: usize, score: f64) -> Result<(), failure::Error> {
let keywords: Vec<_> = k.keywords(Include::Cleaned).collect();
let mut lib_tmp = None;
@@ -125,13 +126,11 @@ fn index_search(indexer: &mut Indexer, k: &RichCrateVersion, downloads_per_month
lib_tmp = k.lib_file_markdown();
lib_tmp.as_ref()
}).map(|markup| {
- match markup {
- Markup::Markdown(ref s) | Markup::Rst(ref s) => s.as_str(),
- }
+ renderer.visible_text(markup)
});
let version = k.version();
- indexer.add(k.short_name(), version, k.description().unwrap_or(""), &keywords, readme, downloads_per_month as u64, score);
+ indexer.add(k.short_name(), version, k.description().unwrap_or(""), &keywords, readme.as_ref().map(|s| s.as_str()), downloads_per_month as u64, score);
Ok(())
}
diff --git a/render_readme b/render_readme
-Subproject bb36743b5340cdeaf94b351272ac9c63e1241ee
+Subproject 8fc48291857f3b52b94df310aeb9134ff7f8384
diff --git a/rich_crate/src/rich_crate_version.rs b/rich_crate/src/rich_crate_version.rs
index 873a7c8..10a2d4f 100644
--- a/rich_crate/src/rich_crate_version.rs
+++ b/rich_crate/src/rich_crate_version.rs
@@ -7,6 +7,7 @@ pub use cargo_toml::{DepsSet, Edition, FeatureSet, MaintenanceStatus, TargetDeps
use categories::Categories;
use crates_index::Version;
use repo_url::Repo;
+use render_readme::Renderer;
use semver;
use std::borrow::Cow;
use std::collections::BTreeMap;
@@ -152,9 +153,8 @@ impl RichCrateVersion {
}
};
if let Ok(Some(r)) = self.readme() {
- match r.markup {
- Markup::Markdown(ref s) | Markup::Rst(ref s) => add_words(s),
- }
+ let s = Renderer::new(None).visible_text(&r.markup);
+ add_words(&s);
}
add_words(self.short_name());
if let Some(s) = self.description() {add_words(s);}