summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKornel <kornel@geekhood.net>2019-08-05 04:07:33 +0100
committerKornel <kornel@geekhood.net>2019-08-05 04:07:33 +0100
commitbc06fb2a2d8cb4b2a53f082b610e69a16e0d79f8 (patch)
treec16a2d6eddcb1a8573317e4f8802e79af6b370d3
parent53e49237d4a8f0be10abf110c26bcc1e7f91216e (diff)
Fix categories filtering
-rw-r--r--categories/src/categories.rs7
-rw-r--r--crate_db/src/lib_crate_db.rs9
-rw-r--r--crate_db/src/stopwords.rs1
3 files changed, 15 insertions, 2 deletions
diff --git a/categories/src/categories.rs b/categories/src/categories.rs
index 7840457..367e940 100644
--- a/categories/src/categories.rs
+++ b/categories/src/categories.rs
@@ -127,6 +127,7 @@ impl Categories {
pub fn fixed_category_slugs(cats: &[String]) -> Vec<Cow<'_, str>> {
let mut cats = Self::filtered_category_slugs(cats).enumerate().filter_map(|(idx, s)| {
+ let s = s.trim_matches(':');
let mut chars = s.chars().peekable();
while let Some(cur) = chars.next() {
// look for a:b instead of a::b
@@ -150,6 +151,12 @@ impl Categories {
}
let depth = s.split("::").count();
Some((depth, idx, Cow::Borrowed(s.as_ref())))
+ }).filter(|(_, _, s)| {
+ if CATEGORIES.from_slug(s).next().is_none() {
+ println!("invalid cat name {}", s);
+ return false;
+ }
+ true
}).collect::<Vec<_>>();
// depth, then original order
diff --git a/crate_db/src/lib_crate_db.rs b/crate_db/src/lib_crate_db.rs
index bbc6c93..f4d4a6c 100644
--- a/crate_db/src/lib_crate_db.rs
+++ b/crate_db/src/lib_crate_db.rs
@@ -177,9 +177,12 @@ impl CrateDb {
};
let keywords = package.keywords.iter().map(|s| s.to_lowercase()).collect();
- let categories =
+ let categories = if categories::Categories::fixed_category_slugs(&package.categories).is_empty() {
Some(self.guess_crate_categories_tx(conn, &origin, keywords, 0.1).context("catdb")?
- .into_iter().map(|(_, c)| c).collect());
+ .into_iter().map(|(_, c)| c).collect())
+ } else {
+ None
+ };
Ok((manifest, Derived {
path_in_repo,
@@ -1135,6 +1138,7 @@ fn try_indexing() {
name="crates-indexing-unit-test-hi"
version="1.2.3"
keywords = ["test-CRATE"]
+categories = ["1", "two", "GAMES", "science", "::science::math::"]
"#).unwrap();
db.index_latest(CrateVersionData {
derived: &derived,
@@ -1153,6 +1157,7 @@ keywords = ["test-CRATE"]
let (new_manifest, new_derived) = db.rich_crate_version_data(&origin).unwrap();
assert_eq!(manifest.package().name, new_manifest.package().name);
assert_eq!(manifest.package().keywords, new_manifest.package().keywords);
+ assert_eq!(manifest.package().categories, new_manifest.package().categories);
assert_eq!(new_derived.github_keywords, derived.github_keywords);
assert_eq!(new_derived.github_description, derived.github_description);
diff --git a/crate_db/src/stopwords.rs b/crate_db/src/stopwords.rs
index 6046b7c..266d33e 100644
--- a/crate_db/src/stopwords.rs
+++ b/crate_db/src/stopwords.rs
@@ -60,6 +60,7 @@ lazy_static! {
("machine-learning", Some(&["math", "statistics"])),
("neural-networks", Some(&["math", "statistics"])),
("database", Some(&["embedded"])),
+ ("robotics", Some(&["localization"])),
("thread", Some(&["storage"])),
("bitcoin", Some(&["http", "day", "database", "key-value", "network", "wasm"])),
("exonum", Some(&["http", "day", "database", "key-value", "network", "wasm"])),