summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKornel <kornel@geekhood.net>2020-02-23 19:27:51 +0000
committerKornel <kornel@geekhood.net>2020-02-23 19:27:51 +0000
commit97a9e602b44871572d040e8208c63190a7e74612 (patch)
treec099ebd45c6fea0304dd55d848881ca1acfd3941
parentf617258e79b9bd02d267184bc0c33b8908cc39b9 (diff)
Owner info from data dump
-rw-r--r--crates_io_client/src/crate_owners.rs9
-rw-r--r--datadump/src/main.rs75
-rw-r--r--kitchen_sink/src/lib_kitchen_sink.rs23
3 files changed, 98 insertions, 9 deletions
diff --git a/crates_io_client/src/crate_owners.rs b/crates_io_client/src/crate_owners.rs
index 8df6c69..4dad6d7 100644
--- a/crates_io_client/src/crate_owners.rs
+++ b/crates_io_client/src/crate_owners.rs
@@ -25,6 +25,15 @@ pub struct CrateOwner {
pub url: String, // "https://github.com/rust-bus",
pub name: Option<String>, // "maintainers",
pub avatar: Option<String>, // "https://avatars1.githubusercontent.com/u/38887296?v=4"
+
+ #[serde(default)]
+ pub github_id: Option<u32>,
+
+ #[serde(default)]
+ pub invited_at: Option<String>,
+
+ #[serde(default)]
+ pub invited_by_github_id: Option<u32>,
}
impl CrateOwner {
diff --git a/datadump/src/main.rs b/datadump/src/main.rs
index 09018d8..e25963d 100644
--- a/datadump/src/main.rs
+++ b/datadump/src/main.rs
@@ -1,7 +1,10 @@
#![allow(unused)]
#![allow(dead_code)]
+use std::convert::TryInto;
use chrono::prelude::*;
use kitchen_sink::KitchenSink;
+use kitchen_sink::OwnerKind;
+use kitchen_sink::CrateOwner;
use libflate::gzip::Decoder;
use serde_derive::Deserialize;
use std::collections::HashMap;
@@ -15,6 +18,7 @@ type BoxErr = Box<dyn std::error::Error + Sync + Send>;
#[tokio::main]
async fn main() -> Result<(), BoxErr> {
+ tokio::runtime::Handle::current().spawn(async move {
let mut a = Archive::new(Decoder::new(BufReader::new(File::open("db-dump.tar.gz")?))?);
let ksink = KitchenSink::new_default().await?;
@@ -70,9 +74,16 @@ async fn main() -> Result<(), BoxErr> {
index_downloads(crates, versions, &downloads, &ksink)?;
}
}
+ if let (Some(crates), Some(teams), Some(users)) = (&crates, &teams, &users) {
+ if let Some(crate_owners) = crate_owners.take() {
+ eprintln!("Indexing {} owners", crate_owners.len());
+ index_owners(crates, crate_owners, teams, users, &ksink)?;
+ }
+ }
}
}
Ok(())
+ }).await.unwrap()
}
#[inline(never)]
@@ -93,6 +104,50 @@ fn index_downloads(crates: &CratesMap, versions: &VersionsMap, downloads: &Versi
Ok(())
}
+#[inline(never)]
+fn index_owners(crates: &CratesMap, owners: CrateOwners, teams: &Teams, users: &Users, ksink: &KitchenSink) -> Result<(), BoxErr> {
+ for (crate_id, owners) in owners {
+ if let Some(k) = crates.get(&crate_id) {
+ let owners: Vec<_> = owners.into_iter().map(|o| {
+ let invited_by_github_id = o.created_by_id.and_then(|id| users.get(&id).map(|u| u.github_id as u32).or_else(|| teams.get(&id).map(|t| t.github_id)));
+ match o.owner_kind {
+ 0 => {
+ let u = users.get(&o.owner_id).expect("owner consistency");
+ CrateOwner {
+ id: o.owner_id as _,
+ login: u.login.to_owned(),
+ invited_at: Some(o.created_at),
+ invited_by_github_id,
+ github_id: u.github_id.try_into().ok(),
+ name: Some(u.name.to_owned()),
+ avatar: None,
+ url: String::new(),
+ kind: OwnerKind::User,
+ }
+ },
+ 1 => {
+ let u = teams.get(&o.owner_id).expect("owner consistency");
+ CrateOwner {
+ id: o.owner_id as _,
+ login: u.login.to_owned(),
+ invited_at: Some(o.created_at),
+ github_id: Some(u.github_id),
+ invited_by_github_id,
+ name: Some(u.name.to_owned()),
+ avatar: None,
+ url: String::new(),
+ kind: OwnerKind::Team,
+ }
+ },
+ _ => panic!("bad owner type"),
+ }
+ }).collect();
+ ksink.set_crates_io_crate_owners(&k.to_ascii_lowercase(), owners).map_err(|_| "ugh")?;
+ }
+ }
+ Ok(())
+}
+
#[derive(Deserialize)]
struct CrateOwnerRow {
crate_id: u32,
@@ -102,14 +157,16 @@ struct CrateOwnerRow {
owner_kind: u8,
}
+type CrateOwners = HashMap<u32, Vec<CrateOwnerRow>>;
+
#[inline(never)]
-fn parse_crate_owners(file: impl Read) -> Result<HashMap<u32, CrateOwnerRow>, BoxErr> {
+fn parse_crate_owners(file: impl Read) -> Result<CrateOwners, BoxErr> {
let mut csv = csv::ReaderBuilder::new().has_headers(true).flexible(false).from_reader(file);
let mut out = HashMap::with_capacity(NUM_CRATES);
for r in csv.records() {
let r = r?;
let r = r.deserialize::<CrateOwnerRow>(None).map_err(|e| format!("wat? {:#?} {}", r, e))?;
- out.insert(r.crate_id, r);
+ out.entry(r.crate_id).or_insert_with(|| Vec::with_capacity(1)).push(r);
}
Ok(out)
}
@@ -119,12 +176,14 @@ struct TeamRow {
avatar: String,
github_id: u32,
id: u32,
- login: String,
- name: String,
+ login: String, // in the funny format
+ name: String, // human str
}
+type Teams = HashMap<u32, TeamRow>;
+
#[inline(never)]
-fn parse_teams(file: impl Read) -> Result<HashMap<u32, TeamRow>, BoxErr> {
+fn parse_teams(file: impl Read) -> Result<Teams, BoxErr> {
let mut csv = csv::ReaderBuilder::new().has_headers(true).flexible(false).from_reader(file);
let mut out = HashMap::with_capacity(NUM_CRATES);
for r in csv.records() {
@@ -138,14 +197,16 @@ fn parse_teams(file: impl Read) -> Result<HashMap<u32, TeamRow>, BoxErr> {
#[derive(Deserialize)]
struct UserRow {
avatar: String,
- github_id: i32, // -1 happens :(
+ github_id: i32, // there is -1 :(
login: String,
id: u32,
name: String,
}
+type Users = HashMap<u32, UserRow>;
+
#[inline(never)]
-fn parse_users(file: impl Read) -> Result<HashMap<u32, UserRow>, BoxErr> {
+fn parse_users(file: impl Read) -> Result<Users, BoxErr> {
let mut csv = csv::ReaderBuilder::new().has_headers(true).flexible(false).from_reader(file);
let mut out = HashMap::with_capacity(NUM_CRATES);
for r in csv.records() {
diff --git a/kitchen_sink/src/lib_kitchen_sink.rs b/kitchen_sink/src/lib_kitchen_sink.rs
index 70ec683..ce480f7 100644
--- a/kitchen_sink/src/lib_kitchen_sink.rs
+++ b/kitchen_sink/src/lib_kitchen_sink.rs
@@ -46,7 +46,7 @@ use categories::Category;
use chrono::prelude::*;
use chrono::DateTime;
use crate_db::{builddb::BuildDb, CrateDb, CrateVersionData, RepoChange};
-use crates_io_client::CrateOwner;
+pub use crates_io_client::CrateOwner;
use double_checked_cell_async::DoubleCheckedCell;
use failure::ResultExt;
use futures::future::join_all;
@@ -165,6 +165,7 @@ pub struct KitchenSink {
main_cache_dir: PathBuf,
yearly: AllDownloads,
category_overrides: HashMap<String, Vec<Cow<'static, str>>>,
+ crates_io_owners_cache: TempCache<Vec<CrateOwner>>,
}
impl KitchenSink {
@@ -205,6 +206,7 @@ impl KitchenSink {
yearly: AllDownloads::new(&main_cache_dir),
main_cache_dir,
category_overrides: Self::load_category_overrides(&data_path.join("category_overrides.txt"))?,
+ crates_io_owners_cache: TempCache::new(&data_path.join("cio-owners.tmp"))?,
})
})
}
@@ -424,6 +426,10 @@ impl KitchenSink {
login: o.login,
kind: OwnerKind::User, // FIXME: crates-io uses teams, and we'd need to find the right team? is "owners" a guaranteed thing?
name: o.name,
+ github_id: Some(o.id),
+
+ invited_at: None,
+ invited_by_github_id: None,
}
}).collect(),
format!("github/{}/{}", repo.owner, package),
@@ -1713,7 +1719,12 @@ impl KitchenSink {
async fn crate_owners(&self, krate: &RichCrateVersion) -> CResult<Vec<CrateOwner>> {
match krate.origin() {
- Origin::CratesIo(name) => self.crates_io_crate_owners(name, krate.version()).await,
+ Origin::CratesIo(name) => {
+ if let Some(o) = self.crates_io_owners_cache.get(krate.name())? {
+ return Ok(o);
+ }
+ self.crates_io_crate_owners(name, krate.version()).await
+ },
Origin::GitLab {..} => Ok(vec![]),
Origin::GitHub {repo, ..} => Ok(vec![
CrateOwner {
@@ -1725,6 +1736,10 @@ impl KitchenSink {
login: repo.owner.to_string(),
kind: OwnerKind::User, // FIXME: crates-io uses teams, and we'd need to find the right team? is "owners" a guaranteed thing?
name: None,
+
+ invited_at: None,
+ github_id: None,
+ invited_by_github_id: None,
}
]),
}
@@ -1734,6 +1749,10 @@ impl KitchenSink {
Ok(self.crates_io.crate_owners(crate_name, version).await.context("crate_owners")?.unwrap_or_default())
}
+ pub fn set_crates_io_crate_owners(&self, crate_name: &str, owners: Vec<CrateOwner>) -> Result<(), ()> {
+ self.crates_io_owners_cache.set(crate_name, owners).map_err(drop)
+ }
+
// Sorted from the top, returns origins
pub async fn top_crates_in_category(&self, slug: &str) -> CResult<Arc<Vec<Origin>>> {
{