summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKornel <kornel@geekhood.net>2019-08-05 00:13:00 +0100
committerKornel <kornel@geekhood.net>2019-08-05 00:13:58 +0100
commit7baffaaf1bc3b7bb1d97b13e54827d72cca52379 (patch)
tree87d6687b5622558a1821bed32384273286e385a8
parent1ceb9448fbfe80194ccf2f292034109e1350adae (diff)
Load crates from the DB
-rw-r--r--crate_db/Cargo.toml1
-rw-r--r--crate_db/src/lib_crate_db.rs161
-rw-r--r--crate_db/src/schema.rs19
-rw-r--r--kitchen_sink/src/lib_kitchen_sink.rs22
m---------render_readme0
-rw-r--r--rich_crate/src/rich_crate_version.rs3
-rw-r--r--udedokei/src/lib.rs4
7 files changed, 189 insertions, 21 deletions
diff --git a/crate_db/Cargo.toml b/crate_db/Cargo.toml
index 6667fe4..747f263 100644
--- a/crate_db/Cargo.toml
+++ b/crate_db/Cargo.toml
@@ -20,6 +20,7 @@ chrono = "0.4.2"
thread_local = "0.3.6"
parking_lot = "0.9"
rake = { git = "https://github.com/kornelski/rake-rs" }
+rmp-serde = "0.14"
[dev-dependencies]
tempfile = "3.1.0"
diff --git a/crate_db/src/lib_crate_db.rs b/crate_db/src/lib_crate_db.rs
index 4a4336f..4d1e777 100644
--- a/crate_db/src/lib_crate_db.rs
+++ b/crate_db/src/lib_crate_db.rs
@@ -17,6 +17,8 @@ use rich_crate::Derived;
use rich_crate::Manifest;
use rich_crate::Origin;
use rich_crate::Repo;
+use rich_crate::Readme;
+use rich_crate::Markup;
use rich_crate::RichCrate;
use rich_crate::ManifestExt;
@@ -113,6 +115,89 @@ impl CrateDb {
Ok(db)
}
+ pub fn rich_crate_version_data(&self, origin: &Origin) -> FResult<(Manifest, Derived)> {
+ struct Row {
+ capitalized_name: String,
+ crate_compressed_size: u32,
+ crate_decompressed_size: u32,
+ github_keywords: Option<Vec<String>>,
+ lib_file: Option<String>,
+ has_buildrs: bool,
+ is_nightly: bool,
+ is_yanked: bool,
+ has_code_of_conduct: bool,
+ }
+ self.with_connection(|conn| {
+ let args: &[&dyn ToSql] = &[&origin.to_str()];
+ let (manifest, readme, row, language_stats): (Manifest, _, Row, _) = conn.query_row("SELECT * FROM crates c JOIN crate_derived d ON (c.id = d.crate_id)
+ WHERE origin = ?1", args, |row| {
+ let readme = match row.get_raw("readme_format").as_str() {
+ Err(_) => None,
+ Ok(ty) => {
+ let txt: String = row.get("readme")?;
+ Some(Readme {
+ markup: match ty {
+ "html" => rich_crate::Markup::Html(txt),
+ "md" => rich_crate::Markup::Markdown(txt),
+ "rst" => rich_crate::Markup::Rst(txt),
+ _ => unimplemented!(),
+ },
+ base_url: row.get("readme_base_url")?,
+ base_image_url: row.get("readme_base_image_url")?,
+ })
+ },
+ };
+
+ let manifest = row.get_raw("manifest").as_blob().expect("manifest col");
+ let manifest = rmp_serde::from_slice(manifest).expect("manifest parse");
+ let language_stats = row.get_raw("language_stats").as_blob().expect("language_stats col");
+ let language_stats = rmp_serde::from_slice(language_stats).expect("language_stats parse");
+ Ok((manifest, readme, Row {
+ lib_file: row.get("lib_file")?,
+ capitalized_name: row.get("capitalized_name")?,
+ crate_compressed_size: row.get("crate_compressed_size")?,
+ crate_decompressed_size: row.get("crate_decompressed_size")?,
+ github_keywords: row.get_raw("github_keywords").as_str().ok().map(|k| k.split(",").map(String::from).collect()),
+ has_buildrs: row.get("has_buildrs")?,
+ is_nightly: row.get("is_nightly")?,
+ is_yanked: row.get("is_yanked")?,
+ has_code_of_conduct: row.get("has_code_of_conduct")?,
+ }, language_stats))
+ })?;
+
+ let package = manifest.package.as_ref().expect("package in manifest");
+ let name = &package.name;
+ let maybe_repo = package.repository.as_ref().and_then(|r| Repo::new(r).ok());
+ let path_in_repo = match maybe_repo.as_ref() {
+ Some(repo) => self.path_in_repo_tx(conn, repo, name)?,
+ None => None,
+ };
+
+ let keywords = package.keywords.iter().map(|s| s.to_lowercase()).collect();
+ let categories =
+ Some(self.guess_crate_categories_tx(conn, &origin, keywords, 0.1).context("catdb")?
+ .into_iter().map(|(_, c)| c).collect());
+
+ Ok((manifest, Derived {
+ path_in_repo,
+ readme,
+ categories,
+ capitalized_name: row.capitalized_name,
+ crate_compressed_size: row.crate_compressed_size,
+ crate_decompressed_size: row.crate_decompressed_size,
+ github_description: None,
+ github_keywords: row.github_keywords,
+ keywords: Some(self.keywords_tx(conn, &origin).context("keywordsdb2")?),
+ lib_file: row.lib_file,
+ has_buildrs: row.has_buildrs,
+ is_nightly: row.is_nightly,
+ is_yanked: row.is_yanked,
+ has_code_of_conduct: row.has_code_of_conduct,
+ language_stats,
+ }))
+ })
+ }
+
pub fn latest_crate_update_timestamp(&self) -> FResult<Option<u32>> {
self.with_connection(|conn| {
let nope: [u8; 0] = [];
@@ -224,10 +309,50 @@ impl CrateDb {
let mut clear_categories = tx.prepare_cached("DELETE FROM categories WHERE crate_id = ?1")?;
let mut insert_category = tx.prepare_cached("INSERT OR IGNORE INTO categories (crate_id, slug, rank_weight, relevance_weight) VALUES (?1, ?2, ?3, ?4)")?;
let mut get_crate_id = tx.prepare_cached("SELECT id, recent_downloads FROM crates WHERE origin = ?1")?;
+ let mut insert_derived = tx.prepare_cached("INSERT OR REPLACE INTO crate_derived (
+ crate_id, readme, readme_format, readme_base_url, readme_base_image_url, crate_compressed_size, crate_decompressed_size, github_keywords, capitalized_name, lib_file, has_buildrs, is_nightly, is_yanked, has_code_of_conduct, manifest, language_stats)
+ VALUES (
+ :crate_id,:readme,:readme_format,:readme_base_url,:readme_base_image_url,:crate_compressed_size,:crate_decompressed_size,:github_keywords,:capitalized_name,:lib_file,:has_buildrs,:is_nightly,:is_yanked,:has_code_of_conduct,:manifest,:language_stats)
+ ")?;
let args: &[&dyn ToSql] = &[&origin, &0, &0];
insert_crate.execute(args).context("insert crate")?;
let (crate_id, downloads): (u32, u32) = get_crate_id.query_row(&[&origin], |row| Ok((row.get_unwrap(0), row.get_unwrap(1)))).context("crate_id")?;
+
+ let (readme, readme_format, readme_base_url, readme_base_image_url) = match &c.derived.readme {
+ Some(Readme {base_url, base_image_url, markup}) => {
+ let (markup, format) = match markup {
+ Markup::Html(s) => (s, "html"),
+ Markup::Markdown(s) => (s, "md"),
+ Markup::Rst(s) => (s, "rst"),
+ };
+ (Some(markup), Some(format), Some(base_url), Some(base_image_url))
+ },
+ None => (None, None, None, None),
+ };
+
+ let manifest = rmp_serde::encode::to_vec_named(c.manifest).context("manifest rmp")?;
+ let language_stats = rmp_serde::encode::to_vec_named(&c.derived.language_stats).context("lang rmp")?;
+ let named_args: &[(&str, &dyn ToSql)] = &[
+ (":crate_id", &crate_id),
+ (":readme", &readme),
+ (":readme_format", &readme_format),
+ (":readme_base_url", &readme_base_url),
+ (":readme_base_image_url", &readme_base_image_url),
+ (":crate_compressed_size", &c.derived.crate_compressed_size),
+ (":crate_decompressed_size", &c.derived.crate_decompressed_size),
+ (":github_keywords", &c.derived.github_keywords.as_ref().map(|s| s.join(","))),
+ (":capitalized_name", &c.derived.capitalized_name),
+ (":lib_file", &c.derived.lib_file),
+ (":has_buildrs", &c.derived.has_buildrs),
+ (":is_nightly", &c.derived.is_nightly),
+ (":is_yanked", &c.derived.is_yanked),
+ (":has_code_of_conduct", &c.derived.has_code_of_conduct),
+ (":manifest", &manifest),
+ (":language_stats", &language_stats),
+ ];
+ insert_derived.execute_named(named_args).context("insert_derived")?;
+
let is_important_ish = downloads > 2000;
if let Some(repo) = c.repository {
@@ -444,12 +569,16 @@ impl CrateDb {
}
pub fn path_in_repo(&self, repo: &Repo, crate_name: &str) -> FResult<Option<String>> {
- let repo = repo.canonical_git_url();
self.with_connection(|conn| {
+ self.path_in_repo_tx(conn, repo, crate_name)
+ })
+ }
+
+ pub fn path_in_repo_tx(&self, conn: &Connection, repo: &Repo, crate_name: &str) -> FResult<Option<String>> {
+ let repo = repo.canonical_git_url();
let mut get_path = conn.prepare_cached("SELECT path FROM repo_crates WHERE repo = ?1 AND crate_name = ?2")?;
let args: &[&dyn ToSql] = &[&repo, &crate_name];
Ok(none_rows(get_path.query_row(args, |row| row.get(0))).context("path_in_repo")?)
- })
}
/// Update download counts of the crate
@@ -636,6 +765,11 @@ impl CrateDb {
/// Find keywords that may be most relevant to the crate
pub fn keywords(&self, origin: &Origin) -> FResult<Vec<String>> {
self.with_connection(|conn| {
+ self.keywords_tx(conn, origin)
+ })
+ }
+
+ pub fn keywords_tx(&self, conn: &Connection, origin: &Origin) -> FResult<Vec<String>> {
let mut query = conn.prepare_cached(r#"
select avg(ck.weight) * srck.weight, k.keyword
-- find the crate to categorize
@@ -666,7 +800,6 @@ impl CrateDb {
None
}
}).collect())
- })
}
/// Find most relevant/popular keywords in the category
@@ -991,7 +1124,10 @@ fn try_indexing() {
let db = CrateDb::new_with_synonyms(t.as_ref(), Path::new("../data/tag-synonyms.csv")).unwrap();
let origin = Origin::from_crates_io_name("cratedbtest");
- let derived = Default::default();
+ let derived = Derived {
+ capitalized_name: "captname".into(),
+ ..Default::default()
+ };
let manifest = cargo_toml::Manifest::from_str(r#"[package]
name="crates-indexing-unit-test-hi"
version="1.2.3"
@@ -1011,5 +1147,22 @@ keywords = ["test-CRATE"]
readme_text: None,
}).unwrap();
assert_eq!(1, db.crates_with_keyword("test-crate").unwrap());
+ let (new_manifest, new_derived) = db.rich_crate_version_data(&origin).unwrap();
+ assert_eq!(manifest.package().name, new_manifest.package().name);
+ assert_eq!(manifest.package().keywords, new_manifest.package().keywords);
+
+ assert_eq!(new_derived.github_keywords, derived.github_keywords);
+ assert_eq!(new_derived.github_description, derived.github_description);
+ assert_eq!(new_derived.language_stats, derived.language_stats);
+ assert_eq!(new_derived.crate_compressed_size, derived.crate_compressed_size);
+ assert_eq!(new_derived.crate_decompressed_size, derived.crate_decompressed_size);
+ assert_eq!(new_derived.is_nightly, derived.is_nightly);
+ assert_eq!(new_derived.capitalized_name, derived.capitalized_name);
+ assert_eq!(new_derived.readme, derived.readme);
+ assert_eq!(new_derived.lib_file, derived.lib_file);
+ assert_eq!(new_derived.path_in_repo, derived.path_in_repo);
+ assert_eq!(new_derived.has_buildrs, derived.has_buildrs);
+ assert_eq!(new_derived.has_code_of_conduct, derived.has_code_of_conduct);
+ assert_eq!(new_derived.is_yanked, derived.is_yanked);
}
diff --git a/crate_db/src/schema.rs b/crate_db/src/schema.rs
index b21e0c1..17200b3 100644
--- a/crate_db/src/schema.rs
+++ b/crate_db/src/schema.rs
@@ -53,6 +53,25 @@ impl CrateDb {
);
CREATE UNIQUE INDEX IF NOT EXISTS crate_versions_idx on crate_versions(crate_id, version);
+ CREATE TABLE IF NOT EXISTS crate_derived (
+ crate_id INTEGER NOT NULL UNIQUE,
+ readme TEXT,
+ readme_format TEXT,
+ readme_base_url TEXT,
+ readme_base_image_url TEXT,
+ crate_compressed_size INTEGER NOT NULL,
+ crate_decompressed_size INTEGER NOT NULL,
+ github_keywords TEXT,
+ capitalized_name TEXT NOT NULL,
+ lib_file TEXT,
+ has_buildrs INTEGER, -- bool
+ is_nightly INTEGER, -- bool
+ is_yanked INTEGER, -- bool
+ has_code_of_conduct INTEGER, -- bool
+ manifest BLOB NOT NULL,
+ language_stats BLOB NOT NULL
+ );
+
CREATE TABLE IF NOT EXISTS crate_downloads (
crate_id INTEGER NOT NULL,
period INTEGER NOT NULL,
diff --git a/kitchen_sink/src/lib_kitchen_sink.rs b/kitchen_sink/src/lib_kitchen_sink.rs
index 8450f19..d383d54 100644
--- a/kitchen_sink/src/lib_kitchen_sink.rs
+++ b/kitchen_sink/src/lib_kitchen_sink.rs
@@ -136,7 +136,7 @@ pub struct KitchenSink {
user_db: user_db::UserDb,
gh: github_info::GitHub,
crate_derived_cache: TempCache<(String, RichCrateVersionCacheData, Warnings)>,
- loaded_rich_crate_version_cache: RwLock<FxHashMap<Box<str>, RichCrateVersion>>,
+ loaded_rich_crate_version_cache: RwLock<FxHashMap<Origin, RichCrateVersion>>,
category_crate_counts: LazyOnce<Option<HashMap<String, u32>>>,
removals: LazyOnce<HashMap<Origin, f64>>,
top_crates_cached: RwLock<FxHashMap<String, Arc<Vec<Origin>>>>,
@@ -416,21 +416,19 @@ impl KitchenSink {
/// There's no support for getting anything else than the latest version.
pub fn rich_crate_version(&self, origin: &Origin) -> CResult<RichCrateVersion> {
if stopped() {Err(KitchenSinkErr::Stopped)?;}
- let ver = self.index.crate_version_latest_unstable(origin).context("rich_crate_version")?;
-
- self.rich_crate_version_from_index( ver)
- }
- fn rich_crate_version_from_index(&self, krate: &Version) -> CResult<RichCrateVersion> {
- let cache_key = format!("{}-{}", krate.name(), krate.version()).into_boxed_str();
-
- if let Some(krate) = self.loaded_rich_crate_version_cache.read().get(&cache_key) {
+ if let Some(krate) = self.loaded_rich_crate_version_cache.read().get(origin) {
return Ok(krate.clone());
}
- let krate = self.rich_crate_version_verbose(krate).map(|(krate, _)| krate)?;
- self.loaded_rich_crate_version_cache.write().insert(cache_key, krate.clone());
+ let krate = if let Ok((manifest, derived)) = self.crate_db.rich_crate_version_data(origin) {
+ RichCrateVersion::new(origin.clone(), manifest, derived)
+ } else {
+ let ver = self.index.crate_version_latest_unstable(origin).context("rich_crate_version")?;
+ self.rich_crate_version_verbose(ver).map(|(krate, _)| krate)?
+ };
+ self.loaded_rich_crate_version_cache.write().insert(origin.clone(), krate.clone());
Ok(krate)
}
@@ -572,7 +570,7 @@ impl KitchenSink {
derived_categories = Some({
let keywords_iter = package.keywords.iter().map(|s| s.as_str());
self.crate_db.guess_crate_categories(&origin, keywords_iter).context("catdb")?
- .into_iter().map(|(_, c)| c).collect()
+ .into_iter().map(|(_, c)| c).collect()
});
}
diff --git a/render_readme b/render_readme
-Subproject 2740f09557fe65b79f36a0b392d9e0773f1b3c1
+Subproject 270191a5d610e9b4567eb6ec5442a740db6b0fa
diff --git a/rich_crate/src/rich_crate_version.rs b/rich_crate/src/rich_crate_version.rs
index 264e7dd..c693303 100644
--- a/rich_crate/src/rich_crate_version.rs
+++ b/rich_crate/src/rich_crate_version.rs
@@ -6,11 +6,9 @@ use cargo_toml::{Dependency, Manifest, Package};
pub use cargo_toml::{DepsSet, Edition, FeatureSet, MaintenanceStatus, TargetDepsSet};
use categories::Categories;
use repo_url::Repo;
-use render_readme::Renderer;
use semver;
use std::borrow::Cow;
use std::collections::BTreeMap;
-use std::collections::HashMap;
use std::collections::HashSet;
use udedokei;
@@ -479,7 +477,6 @@ pub struct Derived {
pub crate_decompressed_size: u32,
pub is_nightly: bool,
pub capitalized_name: String,
-
pub readme: Option<Readme>,
pub lib_file: Option<String>,
pub path_in_repo: Option<String>,
diff --git a/udedokei/src/lib.rs b/udedokei/src/lib.rs
index 3b0b36d..7a5fcc5 100644
--- a/udedokei/src/lib.rs
+++ b/udedokei/src/lib.rs
@@ -7,13 +7,13 @@ use tokei;
pub use tokei::LanguageType as Language;
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+#[derive(Debug, PartialEq, Clone, Default, Serialize, Deserialize)]
pub struct Stats {
pub langs: HashMap<Language, Lines>,
pub has_old_try: bool,
}
-#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
+#[derive(Debug, PartialEq, Clone, Copy, Default, Serialize, Deserialize)]
pub struct Lines {
pub comments: u32,
pub code: u32,