diff options
author | Kornel <kornel@geekhood.net> | 2020-02-27 19:34:21 +0000 |
---|---|---|
committer | Kornel <kornel@geekhood.net> | 2020-02-27 22:19:37 +0000 |
commit | ca018e8cec2f5b96044a87c66f32913b8c40c126 (patch) | |
tree | 5b72d40de19b82ce9cb452cb43fbe818e9c4a12a /deps_index/src | |
parent | db676cddcc1984bb84c89c81c3bb84fc91d4cda6 (diff) |
Move deps stats
Diffstat (limited to 'deps_index/src')
-rw-r--r-- | deps_index/src/deps_stats.rs | 250 | ||||
-rw-r--r-- | deps_index/src/git_crates_index.rs | 33 | ||||
-rw-r--r-- | deps_index/src/index.rs | 509 | ||||
-rw-r--r-- | deps_index/src/lib.rs | 33 |
4 files changed, 825 insertions, 0 deletions
diff --git a/deps_index/src/deps_stats.rs b/deps_index/src/deps_stats.rs new file mode 100644 index 0000000..4c641f5 --- /dev/null +++ b/deps_index/src/deps_stats.rs @@ -0,0 +1,250 @@ +use crate::index::*; +use crate::DepsErr; +use parking_lot::Mutex; +use rayon::prelude::*; +use string_interner::Sym; + +type FxHashMap<K, V> = std::collections::HashMap<K, V, ahash::RandomState>; +type FxHashSet<V> = std::collections::HashSet<V, ahash::RandomState>; + +pub type DepInfMap = FxHashMap<Box<str>, (DepInf, MiniVer)>; + +pub struct DepsStats { + pub total: usize, + pub counts: FxHashMap<Box<str>, RevDependencies>, +} + +#[derive(Debug, Clone, Default)] +pub struct RevDepCount { + pub def: u16, + pub opt: u16, +} + +impl RevDepCount { + pub fn all(&self) -> u32 { + self.def as u32 + self.opt as u32 + } +} + +#[derive(Debug, Clone, Default)] +pub struct DirectDepCount { + pub runtime: u16, + pub build: u16, + pub dev: u16, +} + +impl DirectDepCount { + pub fn all(&self) -> u32 { + self.runtime as u32 + self.build as u32 + self.dev as u32 + } +} + +#[derive(Debug, Clone, Default)] +pub struct RevDependencies { + /// Default, optional + pub runtime: RevDepCount, + pub build: RevDepCount, + pub dev: u16, + pub direct: DirectDepCount, + pub versions: FxHashMap<MiniVer, u16>, + pub rev_dep_names: CompactStringSet, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum DepTy { + Runtime, + Build, + Dev, +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub struct DepInf { + pub direct: bool, + pub default: bool, + pub ty: DepTy, +} + +pub struct DepVisitor { + node_visited: FxHashSet<(DepInf, *const Mutex<DepSet>)>, +} + +impl DepVisitor { + pub fn new() -> Self { + Self { + node_visited: FxHashSet::with_capacity_and_hasher(120, Default::default()), + } + } + + pub fn visit(&mut self, depset: &ArcDepSet, depinf: DepInf, mut cb: impl FnMut(&mut Self, &DepName, &Dep)) { + let target_addr: &Mutex<FxHashMap<DepName, Dep>> = &*depset; + if self.node_visited.insert((depinf, target_addr as *const _)) { + if let Some(depset) = depset.try_lock() { + for (name, dep) in depset.iter() { + cb(self, name, dep); + } + } + } + } + + #[inline] + pub fn start(&mut self, dep: &Dep, depinf: DepInf, cb: impl FnMut(&mut DepVisitor, &ArcDepSet, DepInf)) { + self.recurse_inner(dep, DepInf { direct: true, ..depinf }, cb) + } + + #[inline] + pub fn recurse(&mut self, dep: &Dep, depinf: DepInf, cb: impl FnMut(&mut DepVisitor, &ArcDepSet, DepInf)) { + self.recurse_inner(dep, DepInf { direct: false, ..depinf }, cb) + } + + #[inline] + fn recurse_inner(&mut self, dep: &Dep, depinf: DepInf, mut cb: impl FnMut(&mut DepVisitor, &ArcDepSet, DepInf)) { + cb(self, &dep.runtime, depinf); + let ty = if depinf.ty == DepTy::Dev { DepTy::Dev } else { DepTy::Build }; + cb(self, &dep.build, DepInf { ty, ..depinf }); + } +} + +impl Index { + pub fn all_dependencies_flattened(&self, c: &impl ICrate) -> Result<DepInfMap, DepsErr> { + let mut collected = FxHashMap::with_capacity_and_hasher(120, Default::default()); + let mut visitor = DepVisitor::new(); + + flatten(&self.deps_of_crate(c, DepQuery { + default: true, + all_optional: false, + dev: false, + })?, DepInf { + default: true, + direct: true, + ty: DepTy::Runtime, + }, &mut collected, &mut visitor); + + flatten(&self.deps_of_crate(c, DepQuery { + default: true, + all_optional: true, + dev: false, + })?, DepInf { + default: false, // false, because real defaults have already been set + direct: true, + ty: DepTy::Runtime, + }, &mut collected, &mut visitor); + + flatten(&self.deps_of_crate(c, DepQuery { + default: true, + all_optional: true, + dev: true, + })?, DepInf { + default: false, // false, because real defaults have already been set + direct: true, + ty: DepTy::Dev, + }, &mut collected, &mut visitor); + + if collected.is_empty() { + return Ok(FxHashMap::default()); + } + + + let inter = self.inter.read(); + let mut converted = FxHashMap::with_capacity_and_hasher(collected.len(), Default::default()); + converted.extend(collected.into_iter().map(|(k, v)| { + let name = inter.resolve(k).expect("resolve"); + debug_assert_eq!(name, name.to_ascii_lowercase()); + (name.into(), v) + })); + Ok(converted) + } + + pub async fn get_deps_stats(&self) -> DepsStats { + let crates = self.crates_io_crates(); + let crates: Vec<(Box<str>, FxHashMap<_,_>)> = crates + .par_iter() + .filter_map(|(_, c)| { + self.all_dependencies_flattened(c) + .ok() + .filter(|collected| !collected.is_empty()) + .map(|dep| { + (c.name().to_ascii_lowercase().into(), dep) + }) + }).collect(); + + self.clear_cache(); + + let total = crates.len(); + let mut counts = FxHashMap::with_capacity_and_hasher(total, Default::default()); + for (parent_name, deps) in crates { + for (name, (depinf, semver)) in deps { + let n = counts.entry(name).or_insert_with(RevDependencies::default); + let t = n.versions.entry(semver).or_insert(0); + *t = t.checked_add(1).expect("overflow"); + if depinf.direct { + n.rev_dep_names.push(&parent_name); + } + match depinf.ty { + DepTy::Runtime => { + if depinf.direct {n.direct.runtime = n.direct.runtime.checked_add(1).expect("overflow"); } + if depinf.default { + n.runtime.def = n.runtime.def.checked_add(1).expect("overflow"); + } else { + n.runtime.opt = n.runtime.opt.checked_add(1).expect("overflow"); + } + }, + DepTy::Build => { + if depinf.direct {n.direct.build = n.direct.build.checked_add(1).expect("overflow"); } + if depinf.default { + n.build.def = n.build.def.checked_add(1).expect("overflow"); + } else { + n.build.opt = n.build.opt.checked_add(1).expect("overflow"); + } + }, + DepTy::Dev => { + if depinf.direct {n.direct.dev = n.direct.dev.checked_add(1).expect("overflow"); } + n.dev = n.dev.checked_add(1).expect("overflow"); + }, + } + } + } + + DepsStats { total, counts } + } +} + +fn flatten(dep: &Dep, depinf: DepInf, collected: &mut FxHashMap<Sym, (DepInf, MiniVer)>, visitor: &mut DepVisitor) { + visitor.start(dep, depinf, |vis, dep, depinf| flatten_set(dep, depinf, collected, vis)); +} + +fn flatten_set(depset: &ArcDepSet, depinf: DepInf, collected: &mut FxHashMap<Sym, (DepInf, MiniVer)>, visitor: &mut DepVisitor) { + visitor.visit(depset, depinf, |vis, (name, _), dep| { + collected.entry(name.clone()) + .and_modify(|(old, semver)| { + if depinf.default {old.default = true;} + if depinf.direct { + old.direct = true; + *semver = dep.semver.clone(); // direct version is most important; used for estimating out-of-date versions + } + match (old.ty, depinf.ty) { + (_, DepTy::Runtime) => {old.ty = DepTy::Runtime;}, + (DepTy::Dev, DepTy::Build) => {old.ty = DepTy::Build;}, + _ => {}, + } + }) + .or_insert((depinf, dep.semver.clone())); + vis.recurse(dep, depinf, |vis, dep, depinf| flatten_set(dep, depinf, collected, vis)); + }) +} + +#[derive(Debug, Clone, Default)] +pub struct CompactStringSet(String); + +impl CompactStringSet { + pub fn push(&mut self, s: &str) { + if !self.0.is_empty() { + self.0.reserve(1 + s.len()); + self.0.push('\0'); + } + self.0.push_str(s); + } + + pub fn iter(&self) -> impl Iterator<Item = &str> { + self.0.split('\0') + } +} diff --git a/deps_index/src/git_crates_index.rs b/deps_index/src/git_crates_index.rs new file mode 100644 index 0000000..51b3adb --- /dev/null +++ b/deps_index/src/git_crates_index.rs @@ -0,0 +1,33 @@ +use crate::DepsErr; +use crate::Origin; +use std::fs; +use std::path::Path; + +type FxHashSet<V> = std::collections::HashSet<V, ahash::RandomState>; + +pub struct GitIndex { + index: FxHashSet<Origin>, +} + +impl GitIndex { + pub fn new(dir: &Path) -> Result<Self, DepsErr> { + let path = dir.join("git_crates.txt"); + let index = if path.exists() { + match fs::read_to_string(&path) { + Ok(file) => file.split('\n').map(|s| s.trim()).filter(|s| !s.is_empty()).map(Origin::from_str).collect(), + Err(e) => return Err(DepsErr::GitIndexFile(path, e.to_string())), + } + } else { + Default::default() + }; + Ok(Self { index }) + } + + pub fn has(&self, origin: &Origin) -> bool { + self.index.get(origin).is_some() + } + + pub fn crates(&self) -> impl Iterator<Item = &Origin> { + self.index.iter() + } +} diff --git a/deps_index/src/index.rs b/deps_index/src/index.rs new file mode 100644 index 0000000..c53ab8e --- /dev/null +++ b/deps_index/src/index.rs @@ -0,0 +1,509 @@ +use crate::deps_stats::DepsStats; +use crate::git_crates_index::*; +use crate::DepsErr; +use crates_index; +use crates_index::Crate; +use crates_index::Dependency; +pub use crates_index::Version; +use double_checked_cell_async::DoubleCheckedCell; +use parking_lot::Mutex; +use parking_lot::RwLock; +use rich_crate::Origin; +use rich_crate::RichCrateVersion; +use rich_crate::RichDep; +use semver::Version as SemVer; +use semver::VersionReq; +use std::iter; +use std::path::Path; +use std::sync::Arc; +use string_interner::StringInterner; +use string_interner::Sym; +use std::time::Duration; +use rayon::prelude::*; +use serde_derive::*; + +type FxHashMap<K, V> = std::collections::HashMap<K, V, ahash::RandomState>; +type FxHashSet<V> = std::collections::HashSet<V, ahash::RandomState>; + +#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)] +pub struct MiniVer { + pub major: u16, + pub minor: u16, + pub patch: u16, + pub build: u16, + pub pre: Box<[semver::Identifier]>, +} + +impl MiniVer { + pub fn to_semver(&self) -> SemVer { + SemVer { + major: self.major.into(), + minor: self.minor.into(), + patch: self.patch.into(), + pre: self.pre.clone().into(), + build: if self.build > 0 { vec![semver::Identifier::Numeric(self.build.into())] } else { Vec::new() }, + } + } +} + +pub trait FeatureGetter { + fn get(&self, key: &str) -> Option<&Vec<String>>; +} +impl FeatureGetter for std::collections::HashMap<String, Vec<String>> { + fn get(&self, key: &str) -> Option<&Vec<String>> { + self.get(key) + } +} +impl FeatureGetter for std::collections::BTreeMap<String, Vec<String>> { + fn get(&self, key: &str) -> Option<&Vec<String>> { + self.get(key) + } +} + +pub trait IVersion { + type Features: FeatureGetter; + fn name(&self) -> &str; + fn version(&self) -> &str; + fn dependencies(&self) -> Fudge; + fn features(&self) -> &Self::Features; + fn is_yanked(&self) -> bool; +} + +impl IVersion for Version { + type Features = std::collections::HashMap<String, Vec<String>>; + fn name(&self) -> &str {self.name()} + fn version(&self) -> &str {self.version()} + fn dependencies(&self) -> Fudge {Fudge::CratesIo(self.dependencies())} + fn features(&self) -> &Self::Features {self.features()} + fn is_yanked(&self) -> bool {self.is_yanked()} +} + +pub trait ICrate { + type Ver: IVersion; + fn latest_version_with_features(&self, all_optional: bool) -> (&Self::Ver, Box<[Box<str>]>); +} + +impl ICrate for Crate { + type Ver = Version; + fn latest_version_with_features(&self, all_optional: bool) -> (&Self::Ver, Box<[Box<str>]>) { + let latest = Index::highest_crates_io_version(self, true); + let mut features = Vec::with_capacity(if all_optional { + latest.features().len() + latest.dependencies().iter().filter(|d| d.is_optional()).count() + } else { 0 }); + if all_optional { + features.extend(latest.features().iter().filter(|(_, v)| !v.is_empty()).map(|(c, _)| c.to_string().into_boxed_str())); + // optional dependencis make implicit features + features.extend(latest.dependencies().iter().filter(|d| d.is_optional()).map(|d| d.name().to_string().into_boxed_str())); + }; + let features = features.into_boxed_slice(); + (latest, features) + } +} + +pub enum Fudge<'a> { + CratesIo(&'a [Dependency]), + Manifest((Vec<RichDep>, Vec<RichDep>, Vec<RichDep>)), +} + +impl IVersion for RichCrateVersion { + type Features = std::collections::BTreeMap<String, Vec<String>>; + fn name(&self) -> &str {self.short_name()} + fn version(&self) -> &str {self.version()} + fn dependencies(&self) -> Fudge {Fudge::Manifest(self.direct_dependencies().unwrap())} + fn features(&self) -> &Self::Features {self.features()} + fn is_yanked(&self) -> bool {self.is_yanked()} +} + +impl ICrate for RichCrateVersion { + type Ver = RichCrateVersion; + fn latest_version_with_features(&self, all_optional: bool) -> (&Self::Ver, Box<[Box<str>]>) { + let mut features = Vec::with_capacity(if all_optional { self.features().len() } else { 0 }); + if all_optional { + features.extend(self.features().iter().filter(|(_, v)| !v.is_empty()).map(|(c, _)| c.to_string().into_boxed_str())); + }; + let features = features.into_boxed_slice(); + (self, features) + } +} + +pub struct Index { + indexed_crates: FxHashMap<Box<str>, Crate>, + pub crates_io_index: crates_index::Index, + git_index: GitIndex, + + pub inter: RwLock<StringInterner<Sym>>, + pub cache: RwLock<FxHashMap<(Box<str>, Features), ArcDepSet>>, + deps_stats: DoubleCheckedCell<DepsStats>, +} + +impl Index { + pub fn new(data_dir: &Path) -> Result<Self, DepsErr> { + let crates_io_index = crates_index::Index::new(data_dir.join("index")); + let indexed_crates = crates_io_index.crate_index_paths().par_bridge() + .filter_map(|path| { + let c = crates_index::Crate::new_checked(path).ok()?; + Some((c.name().to_ascii_lowercase().into(), c)) + }) + .collect(); + Ok(Self { + git_index: GitIndex::new(data_dir)?, + cache: RwLock::new(FxHashMap::with_capacity_and_hasher(5000, Default::default())), + inter: RwLock::new(StringInterner::new()), + deps_stats: DoubleCheckedCell::new(), + indexed_crates, + crates_io_index, + }) + } + + pub fn update(&self) { + let _ = self.crates_io_index.update().map_err(|e| eprintln!("{}", e)); + } + + /// Crates available in the crates.io index + /// + /// It returns only a thin and mostly useless data from the index itself, + /// so `rich_crate`/`rich_crate_version` is needed to do more. + pub fn crates_io_crates(&self) -> &FxHashMap<Box<str>, Crate> { + &self.indexed_crates + } + + pub fn crate_exists(&self, origin: &Origin) -> bool { + match origin { + Origin::CratesIo(lowercase_name) => self.crates_io_crate_by_lowercase_name(lowercase_name).is_ok(), + Origin::GitHub { .. } | Origin::GitLab { .. } => self.git_index.has(origin), + } + } + + /// All crates available in the crates.io index and our index + /// + pub fn all_crates(&self) -> impl Iterator<Item = Origin> + '_ { + self.git_index.crates().cloned().chain(self.crates_io_crates().keys().map(|n| Origin::from_crates_io_name(&n))) + } + + pub async fn deps_stats(&self) -> Result<&DepsStats, DepsErr> { + Ok(tokio::time::timeout(Duration::from_secs(30), self.deps_stats.get_or_init(self.get_deps_stats())).await + .map_err(|_| DepsErr::DepsNotAvailable)?) + } + + #[inline] + pub fn crates_io_crate_by_lowercase_name(&self, name: &str) -> Result<&Crate, DepsErr> { + debug_assert_eq!(name, name.to_ascii_lowercase()); + self.crates_io_crates() + .get(name) + .ok_or_else(|| DepsErr::CrateNotFound(Origin::from_crates_io_name(name))) + } + + pub fn crate_highest_version(&self, name: &str, stable_only: bool) -> Result<&Version, DepsErr> { + debug_assert_eq!(name, name.to_ascii_lowercase()); + Ok(Self::highest_crates_io_version(self.crates_io_crate_by_lowercase_name(name)?, stable_only)) + } + + fn highest_crates_io_version(krate: &Crate, stable_only: bool) -> &Version { + krate.versions() + .iter() + .max_by_key(|a| { + let ver = SemVer::parse(a.version()) + .map_err(|e| eprintln!("{} has invalid version {}: {}", krate.name(), a.version(), e)) + .ok(); + let bad = a.is_yanked() || (stable_only && !ver.as_ref().map_or(false, |v| v.pre.is_empty())); + (!bad, ver) + }) + .unwrap_or_else(|| krate.latest_version()) // latest_version = most recently published version + } + + pub fn deps_of_crate(&self, krate: &impl ICrate, query: DepQuery) -> Result<Dep, DepsErr> { + let (latest, features) = krate.latest_version_with_features(query.all_optional); + self.deps_of_crate_int(latest, features, query) + } + + fn deps_of_crate_int(&self, latest: &impl IVersion, features: Box<[Box<str>]>, DepQuery { default, all_optional, dev }: DepQuery) -> Result<Dep, DepsErr> { + Ok(Dep { + semver: semver_parse(latest.version()).into(), + runtime: self.deps_of_ver(latest, Features { + all_targets: all_optional, + default, + build: false, + dev, + features: features.clone(), + })?, + build: self.deps_of_ver(latest, Features { + all_targets: all_optional, + default, + build: true, + dev, + features, + })?, + }) + } + + pub fn deps_of_ver<'a>(&self, ver: &'a impl IVersion, wants: Features) -> Result<ArcDepSet, DepsErr> { + let key = (format!("{}-{}", ver.name(), ver.version()).into(), wants); + if let Some(cached) = self.cache.read().get(&key) { + return Ok(cached.clone()); + } + let (key_id_part, wants) = key; + + let ver_features = ver.features(); // available features + let mut to_enable = FxHashMap::with_capacity_and_hasher(wants.features.len(), Default::default()); + let all_wanted_features = wants.features.iter() + .map(|s| s.as_ref()) + .chain(iter::repeat("default").take(if wants.default {1} else {0})); + for feat in all_wanted_features { + if let Some(enable) = ver_features.get(feat) { + for enable in enable { + let mut t = enable.splitn(2, '/'); + let dep_name = t.next().unwrap(); + let enabled = to_enable.entry(dep_name.to_owned()) + .or_insert(FxHashSet::default()); + if let Some(enable) = t.next() { + enabled.insert(enable); + } + } + } else { + to_enable.entry(feat.to_owned()).or_insert_with(FxHashSet::default); + } + } + + let deps = ver.dependencies(); + let mut set: FxHashMap<DepName, (_, _, SemVer, FxHashSet<String>)> = FxHashMap::with_capacity_and_hasher(60, Default::default()); + let mut iter1; + let mut iter2; + let deps: &mut dyn Iterator<Item=_> = match deps { + Fudge::CratesIo(dep) => { + iter1 = dep.iter().map(|d| { + (d.crate_name().to_ascii_lowercase(), d.kind().unwrap_or("normal"), d.target().is_some(), d.is_optional(), d.requirement(), d.has_default_features(), d.features()) + }); + &mut iter1 + }, + Fudge::Manifest((ref run, ref dev, ref build)) => { + iter2 = run.iter().map(|r| (r, "normal")) + .chain(dev.iter().map(|r| (r, "dev"))) + .chain(build.iter().map(|r| (r, "build"))) + .map(|(r, kind)| { + (r.package.to_ascii_lowercase(), kind, !r.only_for_targets.is_empty(), r.is_optional(), r.dep.req(), true, &r.with_features[..]) + }); + &mut iter2 + }, + }; + for (crate_name, kind, target_specific, is_optional, requirement, has_default_features, features) in deps { + debug_assert_eq!(crate_name, crate_name.to_ascii_lowercase()); + + // people forget to include winapi conditionally + let is_target_specific = crate_name == "winapi" || target_specific; + if !wants.all_targets && is_target_specific { + continue; // FIXME: allow common targets? + } + // hopefully nobody uses clippy at runtime, they just fail to make it dev dep + if !wants.dev && crate_name == "clippy" && is_optional { + continue; + } + + match kind { + "normal" => (), + "build" if wants.build => (), + "dev" if wants.dev => (), + _ => continue, + } + + let enable_dep_features = to_enable.get(&crate_name); + if is_optional && enable_dep_features.is_none() { + continue; + } + + let req = VersionReq::parse(requirement).map_err(|_| DepsErr::SemverParsingError)?; + let krate = match self.crates_io_crate_by_lowercase_name(&crate_name) { + Ok(k) => k, + Err(e) => { + eprintln!("{}@{} depends on missing crate {} (@{}): {}", ver.name(), ver.version(), crate_name, req, e); + continue; + }, + }; + let (matched, semver) = krate.versions().iter().rev() + .filter(|v| !v.is_yanked()) + .filter_map(|v| Some((v, SemVer::parse(v.version()).ok()?))) + .find(|(_, semver)| { + req.matches(&semver) + }) + .unwrap_or_else(|| { + let fallback = krate.latest_version(); // bad version, but it shouldn't happen anyway + let semver = semver_parse(fallback.version()); + (fallback, semver) + }); + + let key = { + let mut inter = self.inter.write(); + debug_assert_eq!(crate_name, crate_name.to_ascii_lowercase()); + (inter.get_or_intern(crate_name), inter.get_or_intern(matched.version())) + }; + + let (_, _, _, all_features) = set.entry(key) + .or_insert_with(|| (has_default_features, matched.clone(), semver, FxHashSet::default())); + all_features.extend(features.iter().cloned()); + if let Some(s) = enable_dep_features { + all_features.extend(s.iter().map(|s| s.to_string())); + } + } + + // break infinite recursion. Must be inserted first, since depth-first search + // may end up requesting it. + let result = Arc::new(Mutex::new(FxHashMap::default())); + let key = (key_id_part, wants.clone()); + self.cache.write().insert(key, result.clone()); + + let set: Result<_,_> = set.into_iter().map(|(k, (has_default_features, matched, semver, all_features))| { + let all_features = all_features.into_iter().map(Into::into).collect::<Vec<_>>().into_boxed_slice(); + let runtime = self.deps_of_ver(&matched, Features { + all_targets: wants.all_targets, + build: false, + dev: false, // dev is only for top-level + default: has_default_features, + features: all_features.clone(), + })?; + let build = self.deps_of_ver(&matched, Features { + all_targets: wants.all_targets, + build: true, + dev: false, // dev is only for top-level + default: has_default_features, + features: all_features, + })?; + Ok((k, Dep { + semver: semver.into(), + runtime, + build, + })) + }).collect(); + + *result.lock() = set?; + Ok(result) + } + + pub fn clear_cache(&self) { + self.cache.write().clear(); + *self.inter.write() = StringInterner::new(); + } + + /// For crate being outdated. Returns (is_latest, popularity) + /// 0 = not used *or deprecated* + /// 1 = everyone uses it + pub async fn version_popularity(&self, crate_name: &str, requirement: &VersionReq) -> Result<Option<(bool, f32)>, DepsErr> { + if is_deprecated(crate_name) { + return Ok(Some((false, 0.))); + } + + let krate = self.crates_io_crate_by_lowercase_name(&crate_name.to_ascii_lowercase())?; + + fn matches(ver: &Version, req: &VersionReq) -> bool { + ver.version().parse().ok().map_or(false, |ver| req.matches(&ver)) + } + + let matches_latest = matches(Self::highest_crates_io_version(krate, true), requirement) || + // or match latest unstable + matches(Self::highest_crates_io_version(krate, false), requirement); + + let stats = self.deps_stats().await?; + let pop = stats.counts.get(crate_name) + .map(|stats| { + let mut matches = 0; + let mut unmatches = 0; + for (ver, count) in &stats.versions { + if requirement.matches(&ver.to_semver()) { + matches += count; // TODO: this should be (slighly) weighed by crate's popularity? + } else { + unmatches += count; + } + } + matches += 1; // one to denoise unpopular crates; div/0 + matches as f32 / (matches + unmatches) as f32 + }) + .unwrap_or(0.); + + Ok(Some((matches_latest, pop))) + } + + /// How likely it is that this exact crate will be installed in any project + pub async fn version_global_popularity(&self, crate_name: &str, version: &MiniVer) -> Result<Option<f32>, DepsErr> { + match crate_name { + // bindings' SLoC looks heavier than actual overhead of standard system libs + "libc" | "winapi" | "kernel32-sys" | "winapi-i686-pc-windows-gnu" | "winapi-x86_64-pc-windows-gnu" => return Ok(Some(0.99)), + _ => {}, + } + + let stats = self.deps_stats().await?; + Ok(stats.counts.get(crate_name) + .and_then(|c| { + c.versions.get(&version) + .map(|&ver| ver as f32 / stats.total as f32) + })) + } +} + +use std::fmt; +impl fmt::Debug for Dep { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Dep {{ {}, runtime: x{}, build: x{} }}", self.semver, self.runtime.lock().len(), self.build.lock().len()) + } +} + +/// TODO: check if the repo is rust-lang-deprecated. +/// Note: the repo URL in the crate is outdated, and it may be a redirect to the deprecated +pub fn is_deprecated(name: &str) -> bool { + match name { + "rustc-serialize" | "gcc" | "rustc-benchmarks" | "time" | "rust-crypto" | + "flate2-crc" | "complex" | "simple_stats" | "concurrent" | "feed" | + "isatty" | "thread-scoped" | "target_build_utils" | "chan" | "chan-signal" | + "glsl-to-spirv" => true, + // fundamentally unsound + "str-concat" => true, + // uses old winapi + "user32-sys" | "shell32-sys" | "advapi32-sys" | "gdi32-sys" | "ole32-sys" | "ws2_32-sys" | "kernel32-sys" | "userenv-sys" => true, + _ => false, + } +} + +fn semver_parse(ver: &str) -> SemVer { + SemVer::parse(ver).unwrap_or_else(|_| SemVer::parse("0.0.0").expect("must parse")) +} + +impl From<SemVer> for MiniVer { + fn from(s: SemVer) -> Self { + Self { + major: s.major as u16, + minor: s.minor as u16, + patch: s.patch as u16, + pre: s.pre.into_boxed_slice(), + build: if let Some(semver::Identifier::Numeric(m)) = s.build.get(0) { *m as u16 } else { 0 }, + } + } +} + +impl fmt::Display for MiniVer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}.{}.{}-{}", self.major, self.minor, self.patch, self.build) + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct Features { + pub all_targets: bool, + pub default: bool, + pub build: bool, + pub dev: bool, + pub features: Box<[Box<str>]>, +} + +pub type DepName = (Sym, Sym); +pub type DepSet = FxHashMap<DepName, Dep>; +pub type ArcDepSet = Arc<Mutex<DepSet>>; + +pub struct Dep { + pub semver: MiniVer, + pub runtime: ArcDepSet, + pub build: ArcDepSet, +} + +#[derive(Debug, Copy, Clone)] +pub struct DepQuery { + pub default: bool, + pub all_optional: bool, + pub dev: bool, +} diff --git a/deps_index/src/lib.rs b/deps_index/src/lib.rs new file mode 100644 index 0000000..b6e6ffd --- /dev/null +++ b/deps_index/src/lib.rs @@ -0,0 +1,33 @@ +mod index; +use rich_crate::Origin; +use std::path::PathBuf; +pub use index::*; +use failure::Fail; + +mod deps_stats; +mod git_crates_index; +pub use deps_stats::*; +pub use crates_index::Crate as CratesIndexCrate; +pub use crates_index::Version as CratesIndexVersion; + +#[derive(Debug, Clone, Fail)] +pub enum DepsErr { + + #[fail(display = "crate not found: {:?}", _0)] + CrateNotFound(Origin), + #[fail(display = "crate {} not found in repo {}", _0, _1)] + CrateNotFoundInRepo(String, String), + #[fail(display = "crate is not a package: {:?}", _0)] + NotAPackage(Origin), + + #[fail(display = "Error when parsing verison")] + SemverParsingError, + #[fail(display = "Stopped")] + Stopped, + #[fail(display = "Deps stats timeout")] + DepsNotAvailable, + #[fail(display = "Crate timeout")] + GitIndexFile(PathBuf, String), + #[fail(display = "Git crate '{:?}' can't be indexed, because it's not on the list", _0)] + GitCrateNotAllowed(Origin), +} |