diff options
author | Kornel <kornel@geekhood.net> | 2019-08-05 04:07:33 +0100 |
---|---|---|
committer | Kornel <kornel@geekhood.net> | 2019-08-05 04:07:33 +0100 |
commit | bc06fb2a2d8cb4b2a53f082b610e69a16e0d79f8 (patch) | |
tree | c16a2d6eddcb1a8573317e4f8802e79af6b370d3 | |
parent | 53e49237d4a8f0be10abf110c26bcc1e7f91216e (diff) |
Fix categories filtering
-rw-r--r-- | categories/src/categories.rs | 7 | ||||
-rw-r--r-- | crate_db/src/lib_crate_db.rs | 9 | ||||
-rw-r--r-- | crate_db/src/stopwords.rs | 1 |
3 files changed, 15 insertions, 2 deletions
diff --git a/categories/src/categories.rs b/categories/src/categories.rs index 7840457..367e940 100644 --- a/categories/src/categories.rs +++ b/categories/src/categories.rs @@ -127,6 +127,7 @@ impl Categories { pub fn fixed_category_slugs(cats: &[String]) -> Vec<Cow<'_, str>> { let mut cats = Self::filtered_category_slugs(cats).enumerate().filter_map(|(idx, s)| { + let s = s.trim_matches(':'); let mut chars = s.chars().peekable(); while let Some(cur) = chars.next() { // look for a:b instead of a::b @@ -150,6 +151,12 @@ impl Categories { } let depth = s.split("::").count(); Some((depth, idx, Cow::Borrowed(s.as_ref()))) + }).filter(|(_, _, s)| { + if CATEGORIES.from_slug(s).next().is_none() { + println!("invalid cat name {}", s); + return false; + } + true }).collect::<Vec<_>>(); // depth, then original order diff --git a/crate_db/src/lib_crate_db.rs b/crate_db/src/lib_crate_db.rs index bbc6c93..f4d4a6c 100644 --- a/crate_db/src/lib_crate_db.rs +++ b/crate_db/src/lib_crate_db.rs @@ -177,9 +177,12 @@ impl CrateDb { }; let keywords = package.keywords.iter().map(|s| s.to_lowercase()).collect(); - let categories = + let categories = if categories::Categories::fixed_category_slugs(&package.categories).is_empty() { Some(self.guess_crate_categories_tx(conn, &origin, keywords, 0.1).context("catdb")? - .into_iter().map(|(_, c)| c).collect()); + .into_iter().map(|(_, c)| c).collect()) + } else { + None + }; Ok((manifest, Derived { path_in_repo, @@ -1135,6 +1138,7 @@ fn try_indexing() { name="crates-indexing-unit-test-hi" version="1.2.3" keywords = ["test-CRATE"] +categories = ["1", "two", "GAMES", "science", "::science::math::"] "#).unwrap(); db.index_latest(CrateVersionData { derived: &derived, @@ -1153,6 +1157,7 @@ keywords = ["test-CRATE"] let (new_manifest, new_derived) = db.rich_crate_version_data(&origin).unwrap(); assert_eq!(manifest.package().name, new_manifest.package().name); assert_eq!(manifest.package().keywords, new_manifest.package().keywords); + assert_eq!(manifest.package().categories, new_manifest.package().categories); assert_eq!(new_derived.github_keywords, derived.github_keywords); assert_eq!(new_derived.github_description, derived.github_description); diff --git a/crate_db/src/stopwords.rs b/crate_db/src/stopwords.rs index 6046b7c..266d33e 100644 --- a/crate_db/src/stopwords.rs +++ b/crate_db/src/stopwords.rs @@ -60,6 +60,7 @@ lazy_static! { ("machine-learning", Some(&["math", "statistics"])), ("neural-networks", Some(&["math", "statistics"])), ("database", Some(&["embedded"])), + ("robotics", Some(&["localization"])), ("thread", Some(&["storage"])), ("bitcoin", Some(&["http", "day", "database", "key-value", "network", "wasm"])), ("exonum", Some(&["http", "day", "database", "key-value", "network", "wasm"])), |