summaryrefslogtreecommitdiffstats
path: root/datadump/src
diff options
context:
space:
mode:
authorKornel <kornel@geekhood.net>2020-03-01 15:54:23 +0000
committerKornel <kornel@geekhood.net>2020-03-01 22:14:40 +0000
commit01ad124c4872ed24751d5c36a8444bddbc4a4a64 (patch)
tree66c44047fb79f03b3ac45a7ccf5fabf0667d61e5 /datadump/src
parenta38758e88c390600fef6a2eda243296bf8a198f6 (diff)
fmt
Diffstat (limited to 'datadump/src')
-rw-r--r--datadump/src/main.rs231
1 files changed, 120 insertions, 111 deletions
diff --git a/datadump/src/main.rs b/datadump/src/main.rs
index ac2a485..6f97a63 100644
--- a/datadump/src/main.rs
+++ b/datadump/src/main.rs
@@ -1,13 +1,13 @@
#![allow(unused)]
#![allow(dead_code)]
-use std::convert::TryInto;
use chrono::prelude::*;
+use kitchen_sink::CrateOwner;
use kitchen_sink::KitchenSink;
use kitchen_sink::OwnerKind;
-use kitchen_sink::CrateOwner;
use libflate::gzip::Decoder;
use serde_derive::Deserialize;
use std::collections::HashMap;
+use std::convert::TryInto;
use std::fs::File;
use std::io::BufReader;
use std::io::Read;
@@ -18,87 +18,92 @@ type BoxErr = Box<dyn std::error::Error + Sync + Send>;
#[tokio::main]
async fn main() -> Result<(), BoxErr> {
- tokio::runtime::Handle::current().spawn(async move {
- let mut a = Archive::new(Decoder::new(BufReader::new(File::open("db-dump.tar.gz")?))?);
- let ksink = KitchenSink::new_default().await?;
-
- tokio::task::block_in_place(move || {
+ tokio::runtime::Handle::current()
+ .spawn(async move {
+ let mut a = Archive::new(Decoder::new(BufReader::new(File::open("db-dump.tar.gz")?))?);
+ let ksink = KitchenSink::new_default().await?;
- let mut crate_owners = None;
- let mut crates = None;
- let mut metadata = None;
- let mut teams = None;
- let mut users = None;
- let mut downloads = None;
- let mut versions = None;
+ tokio::task::block_in_place(move || {
+ let mut crate_owners = None;
+ let mut crates = None;
+ let mut metadata = None;
+ let mut teams = None;
+ let mut users = None;
+ let mut downloads = None;
+ let mut versions = None;
- for file in a.entries()? {
- let file = file?;
- if !file.header().entry_type().is_file() {
- continue;
- }
- if let Some(path) = file.path()?.file_name().and_then(|f| f.to_str()) {
- eprint!("{} ({}KB): ", path, file.header().size()? / 1000);
- match path {
- "crate_owners.csv" => {
- eprintln!("parse_crate_owners…");
- crate_owners = Some(parse_crate_owners(file)?);
- },
- "crates.csv" => {
- eprintln!("parse_crates…");
- crates = Some(parse_crates(file)?);
- },
- "metadata.csv" => {
- eprintln!("parse_metadata…");
- metadata = Some(parse_metadata(file)?);
- },
- "teams.csv" => {
- eprintln!("parse_teams…");
- teams = Some(parse_teams(file)?);
- },
- "users.csv" => {
- eprintln!("parse_users…");
- users = Some(parse_users(file)?);
- },
- "version_downloads.csv" => {
- eprintln!("parse_version_downloads…");
- downloads = Some(parse_version_downloads(file)?);
- },
- "versions.csv" => {
- eprintln!("parse_versions…");
- versions = Some(parse_versions(file)?);
- },
- p => eprintln!("Ignored file {}", p),
- };
- if let (Some(crates), Some(versions)) = (&crates, &versions) {
- if let Some(downloads) = downloads.take() {
- eprintln!("Indexing {} crates, {} versions, {} downloads", crates.len(), versions.len(), downloads.len());
- index_downloads(crates, versions, &downloads, &ksink)?;
- }
- }
- if let (Some(crates), Some(teams), Some(users)) = (&crates, &teams, &users) {
- if let Some(crate_owners) = crate_owners.take() {
- eprintln!("Indexing {} owners", crate_owners.len());
- index_owners(crates, crate_owners, teams, users, &ksink)?;
+ for file in a.entries()? {
+ let file = file?;
+ if !file.header().entry_type().is_file() {
+ continue;
+ }
+ if let Some(path) = file.path()?.file_name().and_then(|f| f.to_str()) {
+ eprint!("{} ({}KB): ", path, file.header().size()? / 1000);
+ match path {
+ "crate_owners.csv" => {
+ eprintln!("parse_crate_owners…");
+ crate_owners = Some(parse_crate_owners(file)?);
+ },
+ "crates.csv" => {
+ eprintln!("parse_crates…");
+ crates = Some(parse_crates(file)?);
+ },
+ "metadata.csv" => {
+ eprintln!("parse_metadata…");
+ metadata = Some(parse_metadata(file)?);
+ },
+ "teams.csv" => {
+ eprintln!("parse_teams…");
+ teams = Some(parse_teams(file)?);
+ },
+ "users.csv" => {
+ eprintln!("parse_users…");
+ users = Some(parse_users(file)?);
+ },
+ "version_downloads.csv" => {
+ eprintln!("parse_version_downloads…");
+ downloads = Some(parse_version_downloads(file)?);
+ },
+ "versions.csv" => {
+ eprintln!("parse_versions…");
+ versions = Some(parse_versions(file)?);
+ },
+ p => eprintln!("Ignored file {}", p),
+ };
+ if let (Some(crates), Some(versions)) = (&crates, &versions) {
+ if let Some(downloads) = downloads.take() {
+ eprintln!("Indexing {} crates, {} versions, {} downloads", crates.len(), versions.len(), downloads.len());
+ index_downloads(crates, versions, &downloads, &ksink)?;
+ }
+ }
+ if let (Some(crates), Some(teams), Some(users)) = (&crates, &teams, &users) {
+ if let Some(crate_owners) = crate_owners.take() {
+ eprintln!("Indexing {} owners", crate_owners.len());
+ handle.spawn(index_owners(crates, crate_owners, teams, users, &ksink));
+ }
+ }
+ }
}
- }
- }
- }
- Ok(())
- })
- }).await.unwrap()
+ Ok(())
+ })
+ })
+ .await
+ .unwrap()
}
#[inline(never)]
fn index_downloads(crates: &CratesMap, versions: &VersionsMap, downloads: &VersionDownloads, ksink: &KitchenSink) -> Result<(), BoxErr> {
for (crate_id, name) in crates {
if let Some(vers) = versions.get(crate_id) {
- let data = vers.iter().filter_map(|version| {
- if let Some(d) = downloads.get(&version.id) {
- return Some((version.num.as_str(), d.as_slice()));
- }
- None
- }).collect();
+ let data = vers
+ .iter()
+ .filter_map(|version| {
+ if let Some(d) = downloads.get(&version.id) {
+ return Some((version.num.as_str(), d.as_slice()));
+ }
+ None
+ })
+ .collect();
ksink.index_crate_downloads(name, &data)?;
} else {
eprintln!("Bad crate? {} {}", crate_id, name);
@@ -111,43 +116,47 @@ fn index_downloads(crates: &CratesMap, versions: &VersionsMap, downloads: &Versi
fn index_owners(crates: &CratesMap, owners: CrateOwners, teams: &Teams, users: &Users, ksink: &KitchenSink) -> Result<(), BoxErr> {
for (crate_id, owners) in owners {
if let Some(k) = crates.get(&crate_id) {
- let owners: Vec<_> = owners.into_iter().filter_map(|o| {
- let invited_by_github_id = o.created_by_id.and_then(|id| users.get(&id).map(|u| u.github_id as u32).or_else(|| teams.get(&id).map(|t| t.github_id)));
- Some(match o.owner_kind {
- 0 => {
- let u = users.get(&o.owner_id).expect("owner consistency");
- if u.github_id <= 0 {
- return None;
- }
- CrateOwner {
- id: o.owner_id as _,
- login: u.login.to_owned(),
- invited_at: Some(o.created_at),
- invited_by_github_id,
- github_id: u.github_id.try_into().ok(),
- name: Some(u.name.to_owned()),
- avatar: None,
- url: None,
- kind: OwnerKind::User,
- }
- },
- 1 => {
- let u = teams.get(&o.owner_id).expect("owner consistency");
- CrateOwner {
- id: o.owner_id as _,
- login: u.login.to_owned(),
- invited_at: Some(o.created_at),
- github_id: Some(u.github_id),
- invited_by_github_id,
- name: Some(u.name.to_owned()),
- avatar: None,
- url: None,
- kind: OwnerKind::Team,
- }
- },
- _ => panic!("bad owner type"),
+ let owners: Vec<_> = owners
+ .into_iter()
+ .filter_map(|o| {
+ let invited_by_github_id =
+ o.created_by_id.and_then(|id| users.get(&id).map(|u| u.github_id as u32).or_else(|| teams.get(&id).map(|t| t.github_id)));
+ Some(match o.owner_kind {
+ 0 => {
+ let u = users.get(&o.owner_id).expect("owner consistency");
+ if u.github_id <= 0 {
+ return None;
+ }
+ CrateOwner {
+ id: o.owner_id as _,
+ login: u.login.to_owned(),
+ invited_at: Some(o.created_at),
+ invited_by_github_id,
+ github_id: u.github_id.try_into().ok(),
+ name: Some(u.name.to_owned()),
+ avatar: None,
+ url: None,
+ kind: OwnerKind::User,
+ }
+ },
+ 1 => {
+ let u = teams.get(&o.owner_id).expect("owner consistency");
+ CrateOwner {
+ id: o.owner_id as _,
+ login: u.login.to_owned(),
+ invited_at: Some(o.created_at),
+ github_id: Some(u.github_id),
+ invited_by_github_id,
+ name: Some(u.name.to_owned()),
+ avatar: None,
+ url: None,
+ kind: OwnerKind::Team,
+ }
+ },
+ _ => panic!("bad owner type"),
+ })
})
- }).collect();
+ .collect();
ksink.set_crates_io_crate_owners(&k.to_ascii_lowercase(), owners).map_err(|_| "ugh")?;
}
}
@@ -183,7 +192,7 @@ struct TeamRow {
github_id: u32,
id: u32,
login: String, // in the funny format
- name: String, // human str
+ name: String, // human str
}
type Teams = HashMap<u32, TeamRow>;