diff options
author | Kornel <kornel@geekhood.net> | 2020-02-23 19:27:51 +0000 |
---|---|---|
committer | Kornel <kornel@geekhood.net> | 2020-02-23 19:27:51 +0000 |
commit | 97a9e602b44871572d040e8208c63190a7e74612 (patch) | |
tree | c099ebd45c6fea0304dd55d848881ca1acfd3941 /datadump | |
parent | f617258e79b9bd02d267184bc0c33b8908cc39b9 (diff) |
Owner info from data dump
Diffstat (limited to 'datadump')
-rw-r--r-- | datadump/src/main.rs | 75 |
1 files changed, 68 insertions, 7 deletions
diff --git a/datadump/src/main.rs b/datadump/src/main.rs index 09018d8..e25963d 100644 --- a/datadump/src/main.rs +++ b/datadump/src/main.rs @@ -1,7 +1,10 @@ #![allow(unused)] #![allow(dead_code)] +use std::convert::TryInto; use chrono::prelude::*; use kitchen_sink::KitchenSink; +use kitchen_sink::OwnerKind; +use kitchen_sink::CrateOwner; use libflate::gzip::Decoder; use serde_derive::Deserialize; use std::collections::HashMap; @@ -15,6 +18,7 @@ type BoxErr = Box<dyn std::error::Error + Sync + Send>; #[tokio::main] async fn main() -> Result<(), BoxErr> { + tokio::runtime::Handle::current().spawn(async move { let mut a = Archive::new(Decoder::new(BufReader::new(File::open("db-dump.tar.gz")?))?); let ksink = KitchenSink::new_default().await?; @@ -70,9 +74,16 @@ async fn main() -> Result<(), BoxErr> { index_downloads(crates, versions, &downloads, &ksink)?; } } + if let (Some(crates), Some(teams), Some(users)) = (&crates, &teams, &users) { + if let Some(crate_owners) = crate_owners.take() { + eprintln!("Indexing {} owners", crate_owners.len()); + index_owners(crates, crate_owners, teams, users, &ksink)?; + } + } } } Ok(()) + }).await.unwrap() } #[inline(never)] @@ -93,6 +104,50 @@ fn index_downloads(crates: &CratesMap, versions: &VersionsMap, downloads: &Versi Ok(()) } +#[inline(never)] +fn index_owners(crates: &CratesMap, owners: CrateOwners, teams: &Teams, users: &Users, ksink: &KitchenSink) -> Result<(), BoxErr> { + for (crate_id, owners) in owners { + if let Some(k) = crates.get(&crate_id) { + let owners: Vec<_> = owners.into_iter().map(|o| { + let invited_by_github_id = o.created_by_id.and_then(|id| users.get(&id).map(|u| u.github_id as u32).or_else(|| teams.get(&id).map(|t| t.github_id))); + match o.owner_kind { + 0 => { + let u = users.get(&o.owner_id).expect("owner consistency"); + CrateOwner { + id: o.owner_id as _, + login: u.login.to_owned(), + invited_at: Some(o.created_at), + invited_by_github_id, + github_id: u.github_id.try_into().ok(), + name: Some(u.name.to_owned()), + avatar: None, + url: String::new(), + kind: OwnerKind::User, + } + }, + 1 => { + let u = teams.get(&o.owner_id).expect("owner consistency"); + CrateOwner { + id: o.owner_id as _, + login: u.login.to_owned(), + invited_at: Some(o.created_at), + github_id: Some(u.github_id), + invited_by_github_id, + name: Some(u.name.to_owned()), + avatar: None, + url: String::new(), + kind: OwnerKind::Team, + } + }, + _ => panic!("bad owner type"), + } + }).collect(); + ksink.set_crates_io_crate_owners(&k.to_ascii_lowercase(), owners).map_err(|_| "ugh")?; + } + } + Ok(()) +} + #[derive(Deserialize)] struct CrateOwnerRow { crate_id: u32, @@ -102,14 +157,16 @@ struct CrateOwnerRow { owner_kind: u8, } +type CrateOwners = HashMap<u32, Vec<CrateOwnerRow>>; + #[inline(never)] -fn parse_crate_owners(file: impl Read) -> Result<HashMap<u32, CrateOwnerRow>, BoxErr> { +fn parse_crate_owners(file: impl Read) -> Result<CrateOwners, BoxErr> { let mut csv = csv::ReaderBuilder::new().has_headers(true).flexible(false).from_reader(file); let mut out = HashMap::with_capacity(NUM_CRATES); for r in csv.records() { let r = r?; let r = r.deserialize::<CrateOwnerRow>(None).map_err(|e| format!("wat? {:#?} {}", r, e))?; - out.insert(r.crate_id, r); + out.entry(r.crate_id).or_insert_with(|| Vec::with_capacity(1)).push(r); } Ok(out) } @@ -119,12 +176,14 @@ struct TeamRow { avatar: String, github_id: u32, id: u32, - login: String, - name: String, + login: String, // in the funny format + name: String, // human str } +type Teams = HashMap<u32, TeamRow>; + #[inline(never)] -fn parse_teams(file: impl Read) -> Result<HashMap<u32, TeamRow>, BoxErr> { +fn parse_teams(file: impl Read) -> Result<Teams, BoxErr> { let mut csv = csv::ReaderBuilder::new().has_headers(true).flexible(false).from_reader(file); let mut out = HashMap::with_capacity(NUM_CRATES); for r in csv.records() { @@ -138,14 +197,16 @@ fn parse_teams(file: impl Read) -> Result<HashMap<u32, TeamRow>, BoxErr> { #[derive(Deserialize)] struct UserRow { avatar: String, - github_id: i32, // -1 happens :( + github_id: i32, // there is -1 :( login: String, id: u32, name: String, } +type Users = HashMap<u32, UserRow>; + #[inline(never)] -fn parse_users(file: impl Read) -> Result<HashMap<u32, UserRow>, BoxErr> { +fn parse_users(file: impl Read) -> Result<Users, BoxErr> { let mut csv = csv::ReaderBuilder::new().has_headers(true).flexible(false).from_reader(file); let mut out = HashMap::with_capacity(NUM_CRATES); for r in csv.records() { |