diff options
author | Kornel <kornel@geekhood.net> | 2020-02-27 19:34:21 +0000 |
---|---|---|
committer | Kornel <kornel@geekhood.net> | 2020-02-27 22:19:37 +0000 |
commit | ca018e8cec2f5b96044a87c66f32913b8c40c126 (patch) | |
tree | 5b72d40de19b82ce9cb452cb43fbe818e9c4a12a /kitchen_sink | |
parent | db676cddcc1984bb84c89c81c3bb84fc91d4cda6 (diff) |
Move deps stats
Diffstat (limited to 'kitchen_sink')
-rw-r--r-- | kitchen_sink/Cargo.toml | 4 | ||||
-rw-r--r-- | kitchen_sink/src/deps_stats.rs | 250 | ||||
-rw-r--r-- | kitchen_sink/src/git_crates_index.rs | 33 | ||||
-rw-r--r-- | kitchen_sink/src/index.rs | 522 | ||||
-rw-r--r-- | kitchen_sink/src/lib_kitchen_sink.rs | 28 |
5 files changed, 19 insertions, 818 deletions
diff --git a/kitchen_sink/Cargo.toml b/kitchen_sink/Cargo.toml index 824e476..4d01659 100644 --- a/kitchen_sink/Cargo.toml +++ b/kitchen_sink/Cargo.toml @@ -1,7 +1,7 @@ [package] edition = "2018" name = "kitchen_sink" -version = "0.8.3" +version = "0.9.0" authors = ["Kornel <kornel@geekhood.net>"] publish = false @@ -10,8 +10,8 @@ name = "kitchen_sink" path = "src/lib_kitchen_sink.rs" [dependencies] -crates-index = "0.13.3" crates_io_client = { path = "../crates_io_client" } +deps_index = { path = "../deps_index" } docs_rs_client = { git = "https://gitlab.com/crates.rs/docs_rs_client.git", version = "0.4.0" } github_info = { path = "../github_info", version = "0.8.0" } crate_git_checkout = { git = "https://gitlab.com/crates.rs/crate_git_checkout.git", version = "0.4.3" } diff --git a/kitchen_sink/src/deps_stats.rs b/kitchen_sink/src/deps_stats.rs deleted file mode 100644 index 437bbed..0000000 --- a/kitchen_sink/src/deps_stats.rs +++ /dev/null @@ -1,250 +0,0 @@ -use crate::index::*; -use crate::KitchenSinkErr; -use parking_lot::Mutex; -use rayon::prelude::*; -use string_interner::Sym; - -type FxHashMap<K, V> = std::collections::HashMap<K, V, ahash::RandomState>; -type FxHashSet<V> = std::collections::HashSet<V, ahash::RandomState>; - -pub type DepInfMap = FxHashMap<Box<str>, (DepInf, MiniVer)>; - -pub struct DepsStats { - pub total: usize, - pub counts: FxHashMap<Box<str>, RevDependencies>, -} - -#[derive(Debug, Clone, Default)] -pub struct RevDepCount { - pub def: u16, - pub opt: u16, -} - -impl RevDepCount { - pub fn all(&self) -> u32 { - self.def as u32 + self.opt as u32 - } -} - -#[derive(Debug, Clone, Default)] -pub struct DirectDepCount { - pub runtime: u16, - pub build: u16, - pub dev: u16, -} - -impl DirectDepCount { - pub fn all(&self) -> u32 { - self.runtime as u32 + self.build as u32 + self.dev as u32 - } -} - -#[derive(Debug, Clone, Default)] -pub struct RevDependencies { - /// Default, optional - pub runtime: RevDepCount, - pub build: RevDepCount, - pub dev: u16, - pub direct: DirectDepCount, - pub versions: FxHashMap<MiniVer, u16>, - pub rev_dep_names: CompactStringSet, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub enum DepTy { - Runtime, - Build, - Dev, -} - -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] -pub struct DepInf { - pub direct: bool, - pub default: bool, - pub ty: DepTy, -} - -pub struct DepVisitor { - node_visited: FxHashSet<(DepInf, *const Mutex<DepSet>)>, -} - -impl DepVisitor { - pub fn new() -> Self { - Self { - node_visited: FxHashSet::with_capacity_and_hasher(120, Default::default()), - } - } - - pub fn visit(&mut self, depset: &ArcDepSet, depinf: DepInf, mut cb: impl FnMut(&mut Self, &DepName, &Dep)) { - let target_addr: &Mutex<FxHashMap<DepName, Dep>> = &*depset; - if self.node_visited.insert((depinf, target_addr as *const _)) { - if let Some(depset) = depset.try_lock() { - for (name, dep) in depset.iter() { - cb(self, name, dep); - } - } - } - } - - #[inline] - pub fn start(&mut self, dep: &Dep, depinf: DepInf, cb: impl FnMut(&mut DepVisitor, &ArcDepSet, DepInf)) { - self.recurse_inner(dep, DepInf { direct: true, ..depinf }, cb) - } - - #[inline] - pub fn recurse(&mut self, dep: &Dep, depinf: DepInf, cb: impl FnMut(&mut DepVisitor, &ArcDepSet, DepInf)) { - self.recurse_inner(dep, DepInf { direct: false, ..depinf }, cb) - } - - #[inline] - fn recurse_inner(&mut self, dep: &Dep, depinf: DepInf, mut cb: impl FnMut(&mut DepVisitor, &ArcDepSet, DepInf)) { - cb(self, &dep.runtime, depinf); - let ty = if depinf.ty == DepTy::Dev { DepTy::Dev } else { DepTy::Build }; - cb(self, &dep.build, DepInf { ty, ..depinf }); - } -} - -impl Index { - pub(crate) fn all_dependencies_flattened(&self, c: &impl ICrate) -> Result<DepInfMap, KitchenSinkErr> { - let mut collected = FxHashMap::with_capacity_and_hasher(120, Default::default()); - let mut visitor = DepVisitor::new(); - - flatten(&self.deps_of_crate(c, DepQuery { - default: true, - all_optional: false, - dev: false, - })?, DepInf { - default: true, - direct: true, - ty: DepTy::Runtime, - }, &mut collected, &mut visitor); - - flatten(&self.deps_of_crate(c, DepQuery { - default: true, - all_optional: true, - dev: false, - })?, DepInf { - default: false, // false, because real defaults have already been set - direct: true, - ty: DepTy::Runtime, - }, &mut collected, &mut visitor); - - flatten(&self.deps_of_crate(c, DepQuery { - default: true, - all_optional: true, - dev: true, - })?, DepInf { - default: false, // false, because real defaults have already been set - direct: true, - ty: DepTy::Dev, - }, &mut collected, &mut visitor); - - if collected.is_empty() { - return Ok(FxHashMap::default()); - } - - - let inter = self.inter.read(); - let mut converted = FxHashMap::with_capacity_and_hasher(collected.len(), Default::default()); - converted.extend(collected.into_iter().map(|(k, v)| { - let name = inter.resolve(k).expect("resolve"); - debug_assert_eq!(name, name.to_ascii_lowercase()); - (name.into(), v) - })); - Ok(converted) - } - - pub(crate) async fn get_deps_stats(&self) -> DepsStats { - let crates = self.crates_io_crates(); - let crates: Vec<(Box<str>, FxHashMap<_,_>)> = crates - .par_iter() - .filter_map(|(_, c)| { - self.all_dependencies_flattened(c) - .ok() - .filter(|collected| !collected.is_empty()) - .map(|dep| { - (c.name().to_ascii_lowercase().into(), dep) - }) - }).collect(); - - self.clear_cache(); - - let total = crates.len(); - let mut counts = FxHashMap::with_capacity_and_hasher(total, Default::default()); - for (parent_name, deps) in crates { - for (name, (depinf, semver)) in deps { - let n = counts.entry(name).or_insert_with(RevDependencies::default); - let t = n.versions.entry(semver).or_insert(0); - *t = t.checked_add(1).expect("overflow"); - if depinf.direct { - n.rev_dep_names.push(&parent_name); - } - match depinf.ty { - DepTy::Runtime => { - if depinf.direct {n.direct.runtime = n.direct.runtime.checked_add(1).expect("overflow"); } - if depinf.default { - n.runtime.def = n.runtime.def.checked_add(1).expect("overflow"); - } else { - n.runtime.opt = n.runtime.opt.checked_add(1).expect("overflow"); - } - }, - DepTy::Build => { - if depinf.direct {n.direct.build = n.direct.build.checked_add(1).expect("overflow"); } - if depinf.default { - n.build.def = n.build.def.checked_add(1).expect("overflow"); - } else { - n.build.opt = n.build.opt.checked_add(1).expect("overflow"); - } - }, - DepTy::Dev => { - if depinf.direct {n.direct.dev = n.direct.dev.checked_add(1).expect("overflow"); } - n.dev = n.dev.checked_add(1).expect("overflow"); - }, - } - } - } - - DepsStats { total, counts } - } -} - -fn flatten(dep: &Dep, depinf: DepInf, collected: &mut FxHashMap<Sym, (DepInf, MiniVer)>, visitor: &mut DepVisitor) { - visitor.start(dep, depinf, |vis, dep, depinf| flatten_set(dep, depinf, collected, vis)); -} - -fn flatten_set(depset: &ArcDepSet, depinf: DepInf, collected: &mut FxHashMap<Sym, (DepInf, MiniVer)>, visitor: &mut DepVisitor) { - visitor.visit(depset, depinf, |vis, (name, _), dep| { - collected.entry(name.clone()) - .and_modify(|(old, semver)| { - if depinf.default {old.default = true;} - if depinf.direct { - old.direct = true; - *semver = dep.semver.clone(); // direct version is most important; used for estimating out-of-date versions - } - match (old.ty, depinf.ty) { - (_, DepTy::Runtime) => {old.ty = DepTy::Runtime;}, - (DepTy::Dev, DepTy::Build) => {old.ty = DepTy::Build;}, - _ => {}, - } - }) - .or_insert((depinf, dep.semver.clone())); - vis.recurse(dep, depinf, |vis, dep, depinf| flatten_set(dep, depinf, collected, vis)); - }) -} - -#[derive(Debug, Clone, Default)] -pub struct CompactStringSet(String); - -impl CompactStringSet { - pub fn push(&mut self, s: &str) { - if !self.0.is_empty() { - self.0.reserve(1 + s.len()); - self.0.push('\0'); - } - self.0.push_str(s); - } - - pub fn iter(&self) -> impl Iterator<Item = &str> { - self.0.split('\0') - } -} diff --git a/kitchen_sink/src/git_crates_index.rs b/kitchen_sink/src/git_crates_index.rs deleted file mode 100644 index 814e538..0000000 --- a/kitchen_sink/src/git_crates_index.rs +++ /dev/null @@ -1,33 +0,0 @@ -use crate::KitchenSinkErr; -use crate::Origin; -use std::fs; -use std::path::Path; - -type FxHashSet<V> = std::collections::HashSet<V, ahash::RandomState>; - -pub struct GitIndex { - index: FxHashSet<Origin>, -} - -impl GitIndex { - pub fn new(dir: &Path) -> Result<Self, KitchenSinkErr> { - let path = dir.join("git_crates.txt"); - let index = if path.exists() { - match fs::read_to_string(&path) { - Ok(file) => file.split('\n').map(|s| s.trim()).filter(|s| !s.is_empty()).map(Origin::from_str).collect(), - Err(e) => return Err(KitchenSinkErr::GitIndexFile(path, e.to_string())), - } - } else { - Default::default() - }; - Ok(Self { index }) - } - - pub fn has(&self, origin: &Origin) -> bool { - self.index.get(origin).is_some() - } - - pub fn crates(&self) -> impl Iterator<Item = &Origin> { - self.index.iter() - } -} diff --git a/kitchen_sink/src/index.rs b/kitchen_sink/src/index.rs deleted file mode 100644 index 4ec3c88..0000000 --- a/kitchen_sink/src/index.rs +++ /dev/null @@ -1,522 +0,0 @@ -use crate::deps_stats::DepsStats; -use crate::git_crates_index::*; -use crate::KitchenSink; -use crate::KitchenSinkErr; -use crates_index; -use crates_index::Crate; -use crates_index::Dependency; -pub use crates_index::Version; -use double_checked_cell_async::DoubleCheckedCell; -use parking_lot::Mutex; -use parking_lot::RwLock; -use rich_crate::Origin; -use rich_crate::RichCrateVersion; -use rich_crate::RichDep; -use semver::Version as SemVer; -use semver::VersionReq; -use std::iter; -use std::path::Path; -use std::sync::Arc; -use string_interner::StringInterner; -use string_interner::Sym; -use std::time::Duration; -use rayon::prelude::*; - -type FxHashMap<K, V> = std::collections::HashMap<K, V, ahash::RandomState>; -type FxHashSet<V> = std::collections::HashSet<V, ahash::RandomState>; - -#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] -pub struct MiniVer { - pub major: u16, - pub minor: u16, - pub patch: u16, - pub build: u16, - pub pre: Box<[semver::Identifier]>, -} - -impl MiniVer { - pub fn to_semver(&self) -> SemVer { - SemVer { - major: self.major.into(), - minor: self.minor.into(), - patch: self.patch.into(), - pre: self.pre.clone().into(), - build: if self.build > 0 { vec![semver::Identifier::Numeric(self.build.into())] } else { Vec::new() }, - } - } -} - -pub(crate) trait FeatureGetter { - fn get(&self, key: &str) -> Option<&Vec<String>>; -} -impl FeatureGetter for std::collections::HashMap<String, Vec<String>> { - fn get(&self, key: &str) -> Option<&Vec<String>> { - self.get(key) - } -} -impl FeatureGetter for std::collections::BTreeMap<String, Vec<String>> { - fn get(&self, key: &str) -> Option<&Vec<String>> { - self.get(key) - } -} - -pub(crate) trait IVersion { - type Features: FeatureGetter; - fn name(&self) -> &str; - fn version(&self) -> &str; - fn dependencies(&self) -> Fudge; - fn features(&self) -> &Self::Features; - fn is_yanked(&self) -> bool; -} - -impl IVersion for Version { - type Features = std::collections::HashMap<String, Vec<String>>; - fn name(&self) -> &str {self.name()} - fn version(&self) -> &str {self.version()} - fn dependencies(&self) -> Fudge {Fudge::CratesIo(self.dependencies())} - fn features(&self) -> &Self::Features {self.features()} - fn is_yanked(&self) -> bool {self.is_yanked()} -} - -pub(crate) trait ICrate { - type Ver: IVersion; - fn latest_version_with_features(&self, all_optional: bool) -> (&Self::Ver, Box<[Box<str>]>); -} - -impl ICrate for Crate { - type Ver = Version; - fn latest_version_with_features(&self, all_optional: bool) -> (&Self::Ver, Box<[Box<str>]>) { - let latest = Index::highest_crates_io_version(self, true); - let mut features = Vec::with_capacity(if all_optional { - latest.features().len() + latest.dependencies().iter().filter(|d| d.is_optional()).count() - } else { 0 }); - if all_optional { - features.extend(latest.features().iter().filter(|(_, v)| !v.is_empty()).map(|(c, _)| c.to_string().into_boxed_str())); - // optional dependencis make implicit features - features.extend(latest.dependencies().iter().filter(|d| d.is_optional()).map(|d| d.name().to_string().into_boxed_str())); - }; - let features = features.into_boxed_slice(); - (latest, features) - } -} - -pub(crate) enum Fudge<'a> { - CratesIo(&'a [Dependency]), - Manifest((Vec<RichDep>, Vec<RichDep>, Vec<RichDep>)), -} - -impl IVersion for RichCrateVersion { - type Features = std::collections::BTreeMap<String, Vec<String>>; - fn name(&self) -> &str {self.short_name()} - fn version(&self) -> &str {self.version()} - fn dependencies(&self) -> Fudge {Fudge::Manifest(self.direct_dependencies().unwrap())} - fn features(&self) -> &Self::Features {self.features()} - fn is_yanked(&self) -> bool {self.is_yanked()} -} - -impl ICrate for RichCrateVersion { - type Ver = RichCrateVersion; - fn latest_version_with_features(&self, all_optional: bool) -> (&Self::Ver, Box<[Box<str>]>) { - let mut features = Vec::with_capacity(if all_optional { self.features().len() } else { 0 }); - if all_optional { - features.extend(self.features().iter().filter(|(_, v)| !v.is_empty()).map(|(c, _)| c.to_string().into_boxed_str())); - }; - let features = features.into_boxed_slice(); - (self, features) - } -} - -pub struct Index { - indexed_crates: FxHashMap<Box<str>, Crate>, - pub(crate) crates_io_index: crates_index::Index, - git_index: GitIndex, - - pub(crate) inter: RwLock<StringInterner<Sym>>, - pub(crate) cache: RwLock<FxHashMap<(Box<str>, Features), ArcDepSet>>, - deps_stats: DoubleCheckedCell<DepsStats>, -} - -impl Index { - pub fn new_default() -> Result<Self, KitchenSinkErr> { - Self::new(&KitchenSink::data_path()?) - } - - pub fn new(data_dir: &Path) -> Result<Self, KitchenSinkErr> { - let crates_io_index = crates_index::Index::new(data_dir.join("index")); - let indexed_crates = crates_io_index.crate_index_paths().par_bridge() - .filter_map(|path| { - let c = crates_index::Crate::new_checked(path).ok()?; - Some((c.name().to_ascii_lowercase().into(), c)) - }) - .collect(); - Ok(Self { - git_index: GitIndex::new(data_dir)?, - cache: RwLock::new(FxHashMap::with_capacity_and_hasher(5000, Default::default())), - inter: RwLock::new(StringInterner::new()), - deps_stats: DoubleCheckedCell::new(), - indexed_crates, - crates_io_index, - }) - } - - pub fn update(&self) { - let _ = self.crates_io_index.update().map_err(|e| eprintln!("{}", e)); - } - - /// Crates available in the crates.io index - /// - /// It returns only a thin and mostly useless data from the index itself, - /// so `rich_crate`/`rich_crate_version` is needed to do more. - pub fn crates_io_crates(&self) -> &FxHashMap<Box<str>, Crate> { - &self.indexed_crates - } - - pub fn crate_exists(&self, origin: &Origin) -> bool { - match origin { - Origin::CratesIo(lowercase_name) => self.crates_io_crate_by_lowercase_name(lowercase_name).is_ok(), - Origin::GitHub { .. } | Origin::GitLab { .. } => self.git_index.has(origin), - } - } - - /// All crates available in the crates.io index and our index - /// - pub fn all_crates(&self) -> impl Iterator<Item = Origin> + '_ { - self.git_index.crates().cloned().chain(self.crates_io_crates().keys().map(|n| Origin::from_crates_io_name(&n))) - } - - pub async fn deps_stats(&self) -> Result<&DepsStats, KitchenSinkErr> { - Ok(tokio::time::timeout(Duration::from_secs(30), self.deps_stats.get_or_init(self.get_deps_stats())).await - .map_err(|_| KitchenSinkErr::DepsNotAvailable)?) - } - - #[inline] - pub fn crates_io_crate_by_lowercase_name(&self, name: &str) -> Result<&Crate, KitchenSinkErr> { - debug_assert_eq!(name, name.to_ascii_lowercase()); - self.crates_io_crates() - .get(name) - .ok_or_else(|| KitchenSinkErr::CrateNotFound(Origin::from_crates_io_name(name))) - } - - pub fn crate_highest_version(&self, name: &str, stable_only: bool) -> Result<&Version, KitchenSinkErr> { - debug_assert_eq!(name, name.to_ascii_lowercase()); - Ok(Self::highest_crates_io_version(self.crates_io_crate_by_lowercase_name(name)?, stable_only)) - } - - fn highest_crates_io_version(krate: &Crate, stable_only: bool) -> &Version { - krate.versions() - .iter() - .max_by_key(|a| { - let ver = SemVer::parse(a.version()) - .map_err(|e| eprintln!("{} has invalid version {}: {}", krate.name(), a.version(), e)) - .ok(); - let bad = a.is_yanked() || (stable_only && !ver.as_ref().map_or(false, |v| v.pre.is_empty())); - (!bad, ver) - }) - .unwrap_or_else(|| krate.latest_version()) // latest_version = most recently published version - } - - pub(crate) fn deps_of_crate(&self, krate: &impl ICrate, query: DepQuery) -> Result<Dep, KitchenSinkErr> { - let (latest, features) = krate.latest_version_with_features(query.all_optional); - self.deps_of_crate_int(latest, features, query) - } - - fn deps_of_crate_int(&self, latest: &impl IVersion, features: Box<[Box<str>]>, DepQuery { default, all_optional, dev }: DepQuery) -> Result<Dep, KitchenSinkErr> { - Ok(Dep { - semver: semver_parse(latest.version()).into(), - runtime: self.deps_of_ver(latest, Features { - all_targets: all_optional, - default, - build: false, - dev, - features: features.clone(), - })?, - build: self.deps_of_ver(latest, Features { - all_targets: all_optional, - default, - build: true, - dev, - features, - })?, - }) - } - - pub(crate) fn deps_of_ver<'a>(&self, ver: &'a impl IVersion, wants: Features) -> Result<ArcDepSet, KitchenSinkErr> { - let key = (format!("{}-{}", ver.name(), ver.version()).into(), wants); - if let Some(cached) = self.cache.read().get(&key) { - return Ok(cached.clone()); - } - let (key_id_part, wants) = key; - - let ver_features = ver.features(); // available features - let mut to_enable = FxHashMap::with_capacity_and_hasher(wants.features.len(), Default::default()); - let all_wanted_features = wants.features.iter() - .map(|s| s.as_ref()) - .chain(iter::repeat("default").take(if wants.default {1} else {0})); - for feat in all_wanted_features { - if let Some(enable) = ver_features.get(feat) { - for enable in enable { - let mut t = enable.splitn(2, '/'); - let dep_name = t.next().unwrap(); - let enabled = to_enable.entry(dep_name.to_owned()) - .or_insert(FxHashSet::default()); - if let Some(enable) = t.next() { - enabled.insert(enable); - } - } - } else { - to_enable.entry(feat.to_owned()).or_insert_with(FxHashSet::default); - } - } - - let deps = ver.dependencies(); - let mut set: FxHashMap<DepName, (_, _, SemVer, FxHashSet<String>)> = FxHashMap::with_capacity_and_hasher(60, Default::default()); - let mut iter1; - let mut iter2; - let deps: &mut dyn Iterator<Item=_> = match deps { - Fudge::CratesIo(dep) => { - iter1 = dep.iter().map(|d| { - (d.crate_name().to_ascii_lowercase(), d.kind().unwrap_or("normal"), d.target().is_some(), d.is_optional(), d.requirement(), d.has_default_features(), d.features()) - }); - &mut iter1 - }, - Fudge::Manifest((ref run, ref dev, ref build)) => { - iter2 = run.iter().map(|r| (r, "normal")) - .chain(dev.iter().map(|r| (r, "dev"))) - .chain(build.iter().map(|r| (r, "build"))) - .map(|(r, kind)| { - (r.package.to_ascii_lowercase(), kind, !r.only_for_targets.is_empty(), r.is_optional(), r.dep.req(), true, &r.with_features[..]) - }); - &mut iter2 - }, - }; - for (crate_name, kind, target_specific, is_optional, requirement, has_default_features, features) in deps { - debug_assert_eq!(crate_name, crate_name.to_ascii_lowercase()); - - // people forget to include winapi conditionally - let is_target_specific = crate_name == "winapi" || target_specific; - if !wants.all_targets && is_target_specific { - continue; // FIXME: allow common targets? - } - // hopefully nobody uses clippy at runtime, they just fail to make it dev dep - if !wants.dev && crate_name == "clippy" && is_optional { - continue; - } - - match kind { - "normal" => (), - "build" if wants.build => (), - "dev" if wants.dev => (), - _ => continue, - } - - let enable_dep_features = to_enable.get(&crate_name); - if is_optional && enable_dep_features.is_none() { - continue; - } - - let req = VersionReq::parse(requirement).map_err(|_| KitchenSinkErr::SemverParsingError)?; - let krate = match self.crates_io_crate_by_lowercase_name(&crate_name) { - Ok(k) => k, - Err(e) => { - eprintln!("{}@{} depends on missing crate {} (@{}): {}", ver.name(), ver.version(), crate_name, req, e); - continue; - }, - }; - let (matched, semver) = krate.versions().iter().rev() - .filter(|v| !v.is_yanked()) - .filter_map(|v| Some((v, SemVer::parse(v.version()).ok()?))) - .find(|(_, semver)| { - req.matches(&semver) - }) - .unwrap_or_else(|| { - let fallback = krate.latest_version(); // bad version, but it shouldn't happen anyway - let semver = semver_parse(fallback.version()); - (fallback, semver) - }); - - let key = { - let mut inter = self.inter.write(); - debug_assert_eq!(crate_name, crate_name.to_ascii_lowercase()); - (inter.get_or_intern(crate_name), inter.get_or_intern(matched.version())) - }; - - let (_, _, _, all_features) = set.entry(key) - .or_insert_with(|| (has_default_features, matched.clone(), semver, FxHashSet::default())); - all_features.extend(features.iter().cloned()); - if let Some(s) = enable_dep_features { - all_features.extend(s.iter().map(|s| s.to_string())); - } - } - - // break infinite recursion. Must be inserted first, since depth-first search - // may end up requesting it. - let result = Arc::new(Mutex::new(FxHashMap::default())); - let key = (key_id_part, wants.clone()); - self.cache.write().insert(key, result.clone()); - - let set: Result<_,_> = set.into_iter().map(|(k, (has_default_features, matched, semver, all_features))| { - let all_features = all_features.into_iter().map(Into::into).collect::<Vec<_>>().into_boxed_slice(); - let runtime = self.deps_of_ver(&matched, Features { - all_targets: wants.all_targets, - build: false, - dev: false, // dev is only for top-level - default: has_default_features, - features: all_features.clone(), - })?; - let build = self.deps_of_ver(&matched, Features { - all_targets: wants.all_targets, - build: true, - dev: false, // dev is only for top-level - default: has_default_features, - features: all_features, - })?; - Ok((k, Dep { - semver: semver.into(), - runtime, - build, - })) - }).collect(); - - *result.lock() = set?; - Ok(result) - } - - pub fn clear_cache(&self) { - self.cache.write().clear(); - *self.inter.write() = StringInterner::new(); - } - - /// For crate being outdated. Returns (is_latest, popularity) - /// 0 = not used *or deprecated* - /// 1 = everyone uses it - pub async fn version_popularity(&self, crate_name: &str, requirement: &VersionReq) -> Result<Option<(bool, f32)>, KitchenSinkErr> { - if is_deprecated(crate_name) { - return Ok(Some((false, 0.))); - } - - let krate = self.crates_io_crate_by_lowercase_name(&crate_name.to_ascii_lowercase())?; - - fn matches(ver: &Version, req: &VersionReq) -> bool { - ver.version().parse().ok().map_or(false, |ver| req.matches(&ver)) - } - - let matches_latest = matches(Self::highest_crates_io_version(krate, true), requirement) || - // or match latest unstable - matches(Self::highest_crates_io_version(krate, false), requirement); - - let stats = self.deps_stats().await?; - let pop = stats.counts.get(crate_name) - .map(|stats| { - let mut matches = 0; - let mut unmatches = 0; - for (ver, count) in &stats.versions { - if requirement.matches(&ver.to_semver()) { - matches += count; // TODO: this should be (slighly) weighed by crate's popularity? - } else { - unmatches += count; - } - } - matches += 1; // one to denoise unpopular crates; div/0 - matches as f32 / (matches + unmatches) as f32 - }) - .unwrap_or(0.); - - Ok(Some((matches_latest, pop))) - } - - /// How likely it is that this exact crate will be installed in any project - pub async fn version_global_popularity(&self, crate_name: &str, version: &MiniVer) -> Result<Option<f32>, KitchenSinkErr> { - match crate_name { - // bindings' SLoC looks heavier than actual overhead of standard system libs - "libc" | "winapi" | "kernel32-sys" | "winapi-i686-pc-windows-gnu" | "winapi-x86_64-pc-windows-gnu" => return Ok(Some(0.99)), - _ => {}, - } - - let stats = self.deps_stats().await?; - Ok(stats.counts.get(crate_name) - .and_then(|c| { - c.versions.get(&version) - .map(|&ver| ver as f32 / stats.total as f32) - })) - } -} - -use std::fmt; -impl fmt::Debug for Dep { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "Dep {{ {}, runtime: x{}, build: x{} }}", self.semver, self.runtime.lock().len(), self.build.lock().len()) - } -} - -/// TODO: check if the repo is rust-lang-deprecated. -/// Note: the repo URL in the crate is outdated, and it may be a redirect to the deprecated -pub fn is_deprecated(name: &str) -> bool { - match name { - "rustc-serialize" | "gcc" | "rustc-benchmarks" | "time" | "rust-crypto" | - "flate2-crc" | "complex" | "simple_stats" | "concurrent" | "feed" | - "isatty" | "thread-scoped" | "target_build_utils" | "chan" | "chan-signal" | - "glsl-to-spirv" => true, - // fundamentally unsound - "str-concat" => true, - // uses old winapi - "user32-sys" | "shell32-sys" | "advapi32-sys" | "gdi32-sys" | "ole32-sys" | "ws2_32-sys" | "kernel32-sys" | "userenv-sys" => true, - _ => false, - } -} - -fn semver_parse(ver: &str) -> SemVer { - SemVer::parse(ver).unwrap_or_else(|_| SemVer::parse("0.0.0").expect("must parse")) -} - -impl From<SemVer> for MiniVer { - fn from(s: SemVer) -> Self { - Self { - major: s.major as u16, - minor: s.minor as u16, - patch: s.patch as u16, - pre: s.pre.into_boxed_slice(), - build: if let Some(semver::Identifier::Numeric(m)) = s.build.get(0) { *m as u16 } else { 0 }, - } - } -} - -impl fmt::Display for MiniVer { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}.{}.{}-{}", self.major, self.minor, self.patch, self.build) - } -} - -#[derive(Debug, Clone, Eq, PartialEq, Hash)] -pub struct Features { - pub all_targets: bool, - pub default: bool, - pub build: bool, - pub dev: bool, - pub features: Box<[Box<str>]>, -} - -pub type DepName = (Sym, Sym); -pub type DepSet = FxHashMap<DepName, Dep>; -pub type ArcDepSet = Arc<Mutex<DepSet>>; - -pub struct Dep { - pub semver: MiniVer, - pub runtime: ArcDepSet, - pub build: ArcDepSet, -} - -#[derive(Debug, Copy, Clone)] -pub struct DepQuery { - pub default: bool, - pub all_optional: bool, - pub dev: bool, -} - -#[tokio::test] -async fn index_test() { - let idx = Index::new_default().unwrap(); - let stats = idx.deps_stats().await.unwrap(); - assert!(stats.total > 13800); - let lode = stats.counts.get("lodepng").unwrap(); - assert_eq!(12, lode.runtime.def); -} diff --git a/kitchen_sink/src/lib_kitchen_sink.rs b/kitchen_sink/src/lib_kitchen_sink.rs index 72f7f44..1fe68f5 100644 --- a/kitchen_sink/src/lib_kitchen_sink.rs +++ b/kitchen_sink/src/lib_kitchen_sink.rs @@ -3,23 +3,18 @@ #[macro_use] extern crate serde_derive; -mod index; -pub use crate::index::*; use futures::stream::StreamExt; mod yearly; pub use crate::yearly::*; -mod deps_stats; -pub use crate::deps_stats::*; +pub use deps_index::*; pub mod filter; mod ctrlcbreak; -mod git_crates_index; mod tarball; pub use crate::ctrlcbreak::*; pub use crate_db::builddb::Compat; pub use crate_db::builddb::CompatibilityInfo; -pub use crates_index::Crate as CratesIndexCrate; use crates_io_client::CrateMetaFile; pub use crates_io_client::CrateDepKind; pub use crates_io_client::CrateDependency; @@ -140,6 +135,8 @@ pub enum KitchenSinkErr { GitIndexFile(PathBuf, String), #[fail(display = "Git crate '{:?}' can't be indexed, because it's not on the list", _0)] GitCrateNotAllowed(Origin), + #[fail(display = "Deps err: {}", _0)] + Deps(DepsErr), } #[derive(Debug, Clone)] @@ -781,7 +778,7 @@ impl KitchenSink { Ok(tarball) } - async fn rich_crate_version_data_from_crates_io(&self, latest: &crates_index::Version) -> CResult<(CrateVersionSourceData, Manifest, Warnings)> { + async fn rich_crate_version_data_from_crates_io(&self, latest: &CratesIndexVersion) -> CResult<(CrateVersionSourceData, Manifest, Warnings)> { let _f = self.throttle.acquire().await; let mut warnings = HashSet::new(); @@ -1184,10 +1181,10 @@ impl KitchenSink { pub fn all_dependencies_flattened(&self, krate: &RichCrateVersion) -> Result<DepInfMap, KitchenSinkErr> { match krate.origin() { Origin::CratesIo(name) => { - self.index.all_dependencies_flattened(self.index.crates_io_crate_by_lowercase_name(name)?) + self.index.all_dependencies_flattened(self.index.crates_io_crate_by_lowercase_name(name).map_err(KitchenSinkErr::Deps)?).map_err(KitchenSinkErr::Deps) }, _ => { - self.index.all_dependencies_flattened(krate) + self.index.all_dependencies_flattened(krate).map_err(KitchenSinkErr::Deps) } } } @@ -1203,7 +1200,7 @@ impl KitchenSink { pub async fn crates_io_dependents_stats_of(&self, origin: &Origin) -> Result<Option<&RevDependencies>, KitchenSinkErr> { match origin { - Origin::CratesIo(crate_name) => Ok(self.index.deps_stats().await?.counts.get(crate_name)), + Origin::CratesIo(crate_name) => Ok(self.index.deps_stats().await.map_err(KitchenSinkErr::Deps)?.counts.get(crate_name)), _ => Ok(None), } } @@ -1212,7 +1209,7 @@ impl KitchenSink { /// 0 = not used /// 1 = everyone uses it pub async fn version_popularity(&self, crate_name: &str, requirement: &VersionReq) -> Result<Option<(bool, f32)>, KitchenSinkErr> { - self.index.version_popularity(crate_name, requirement).await + self.index.version_popularity(crate_name, requirement).await.map_err(KitchenSinkErr::Deps) } /// "See also" @@ -2110,3 +2107,12 @@ fn fetch_uppercase_name() { })).unwrap(); } + +#[ |