From 3dcb89f0038ba8a0b805957ac0e2cc5a9af3f0b0 Mon Sep 17 00:00:00 2001 From: Kornel Date: Mon, 4 Feb 2019 21:21:44 +0100 Subject: Move folder --- github_info/.gitignore | 1 + github_info/Cargo.toml | 23 +++ github_info/README.md | 4 + github_info/src/lib_github.rs | 375 ++++++++++++++++++++++++++++++++++++++++++ github_info/src/model.rs | 181 ++++++++++++++++++++ 5 files changed, 584 insertions(+) create mode 100644 github_info/.gitignore create mode 100644 github_info/Cargo.toml create mode 100644 github_info/README.md create mode 100644 github_info/src/lib_github.rs create mode 100644 github_info/src/model.rs (limited to 'github_info') diff --git a/github_info/.gitignore b/github_info/.gitignore new file mode 100644 index 0000000..2f7896d --- /dev/null +++ b/github_info/.gitignore @@ -0,0 +1 @@ +target/ diff --git a/github_info/Cargo.toml b/github_info/Cargo.toml new file mode 100644 index 0000000..7bd0df8 --- /dev/null +++ b/github_info/Cargo.toml @@ -0,0 +1,23 @@ +[package] +edition = "2018" +name = "github_info" +version = "0.8.0" +authors = ["Kornel "] + +[lib] +name = "github_info" +path = "src/lib_github.rs" + +[dependencies] +repo_url = { git = "https://gitlab.com/crates.rs/repo_url.git" } +simple_cache = { git = "https://gitlab.com/crates.rs/simple_cache.git", version = "0.6.0" } +serde = "1.0.53" +serde_derive = "1.0.53" +serde_json = "1.0.17" +file = "1.1.2" +urlencoding = "1.0.0" +quick-error = "1.2.2" +hyper = "0.12.0" + +[dependencies.github-rs] +git = "https://github.com/mgattozzi/github-rs" diff --git a/github_info/README.md b/github_info/README.md new file mode 100644 index 0000000..fb3c7e8 --- /dev/null +++ b/github_info/README.md @@ -0,0 +1,4 @@ +# Getting data for crates.rs from GitHub API + +This is a small caching wrapper around github-rs that gets information about crate contributors (to display "and N contributors" on the crate page) as well as information about commit authors and user search (to merge lists of crate authors and owners). + diff --git a/github_info/src/lib_github.rs b/github_info/src/lib_github.rs new file mode 100644 index 0000000..75a1ea9 --- /dev/null +++ b/github_info/src/lib_github.rs @@ -0,0 +1,375 @@ +use github_rs; +use hyper::header::{HeaderValue, ACCEPT}; + +use serde; + +#[macro_use] extern crate serde_derive; +use serde_json; + +use simple_cache; +#[macro_use] extern crate quick_error; + +use std::path::Path; + +use urlencoding::encode; +use repo_url::SimpleRepo; +use github_rs::client; +use github_rs::{HeaderMap, StatusCode}; +use github_rs::client::Executor; +use std::time::{SystemTime, UNIX_EPOCH}; +use std::time::Duration; +use std::thread; +use simple_cache::TempCache; +use github_rs::headers::{rate_limit_remaining, rate_limit_reset}; + +mod model; +pub use crate::model::*; + +pub type CResult = Result; + +quick_error! { + #[derive(Debug)] + pub enum Error { + NoBody { + display("Reponse with no body") + } + TryAgainLater { + display("Accepted, but no data available yet") + } + Cache(err: Box) { + display("GH can't decode cache: {}", err) + from(e: simple_cache::Error) -> (Box::new(e)) + cause(err) + } + GitHub(err: String) { + display("{}", err) + from(e: github_rs::errors::Error) -> (e.to_string()) // non-Sync + } + Json(err: Box, call: Option<&'static str>) { + display("JSON decode error {} in {}", err, call.unwrap_or("github_info")) + from(e: serde_json::Error) -> (Box::new(e), None) + cause(err) + } + Time(err: std::time::SystemTimeError) { + display("{}", err) + from() + cause(err) + } + } +} + +impl Error { + pub fn context(self, ctx: &'static str) -> Self { + match self { + Error::Json(e, _) => Error::Json(e, Some(ctx)), + as_is => as_is, + } + } +} + +pub struct GitHub { + token: String, + orgs: TempCache<(String, Option>)>, + users: TempCache<(String, Option)>, + commits: TempCache<(String, Option>)>, + releases: TempCache<(String, Option>)>, + contribs: TempCache<(String, Option>)>, + topics: TempCache<(String, Option>)>, + repos: TempCache<(String, Option)>, + emails: TempCache<(String, Option>)>, +} + +impl GitHub { + pub fn new(cache_path: impl AsRef, token: impl Into) -> CResult { + Ok(Self { + token: token.into(), + orgs: TempCache::new(&cache_path.as_ref().with_file_name("github_orgs.bin"))?, + users: TempCache::new(&cache_path.as_ref().with_file_name("github_users.bin"))?, + commits: TempCache::new(&cache_path.as_ref().with_file_name("github_commits.bin"))?, + releases: TempCache::new(&cache_path.as_ref().with_file_name("github_releases.bin"))?, + contribs: TempCache::new(&cache_path.as_ref().with_file_name("github_contribs.bin"))?, + topics: TempCache::new(&cache_path.as_ref().with_file_name("github_topics.bin"))?, + repos: TempCache::new(&cache_path.as_ref().with_file_name("github_repos.bin"))?, + emails: TempCache::new(&cache_path.as_ref().with_file_name("github_emails.bin"))?, + }) + } + + fn client(&self) -> CResult { + Ok(client::Github::new(&self.token)?) + } + + pub fn user_by_email(&self, email: &str) -> CResult>> { + let std_suffix = "@users.noreply.github.com"; + if email.ends_with(std_suffix) { + let login = email[0..email.len() - std_suffix.len()].split('+').last().unwrap(); + if let Some(user) = self.user_by_login(login)? { + return Ok(Some(vec![user])); + } + } + let enc_email = encode(email); + self.get_cached(&self.emails, (email, ""), |client| client.get() + .custom_endpoint(&format!("search/users?q=in:email%20{}", enc_email)) + .execute(), |res: SearchResults| { + println!("Found {} = {:#?}", email, res.items); + res.items + }) + } + + pub fn user_by_login(&self, login: &str) -> CResult> { + let key = login.to_ascii_lowercase(); + self.get_cached(&self.users, (&key, ""), |client| client.get() + .users().username(login) + .execute(), id).map_err(|e| e.context("user_by_login")) + } + + pub fn user_by_id(&self, user_id: u32) -> CResult> { + let user_id = user_id.to_string(); + self.get_cached(&self.users, (&user_id, ""), |client| client.get() + .users().username(&user_id) + .execute(), id).map_err(|e| e.context("user_by_id")) + } + + pub fn user_orgs(&self, login: &str) -> CResult>> { + let key = login.to_ascii_lowercase(); + self.get_cached(&self.orgs, (&key, ""), |client| client.get() + .users().username(login).orgs() + .execute(), id).map_err(|e| e.context("user_orgs")) + } + + pub fn commits(&self, repo: &SimpleRepo, as_of_version: &str) -> CResult>> { + let key = format!("commits/{}/{}", repo.owner, repo.repo); + self.get_cached(&self.commits, (&key, as_of_version), |client| client.get() + .repos().owner(&repo.owner).repo(&repo.repo) + .commits() + .execute(), id).map_err(|e| e.context("commits")) + } + + pub fn releases(&self, repo: &SimpleRepo, as_of_version: &str) -> CResult>> { + let key = format!("release/{}/{}", repo.owner, repo.repo); + let path = format!("repos/{}/{}/releases", repo.owner, repo.repo); + self.get_cached(&self.releases, (&key, as_of_version), |client| client.get() + .custom_endpoint(&path) + .execute(), id).map_err(|e| e.context("releases")) + } + + pub fn topics(&self, repo: &SimpleRepo, as_of_version: &str) -> CResult>> { + let key = format!("{}/{}", repo.owner, repo.repo); + let path = format!("repos/{}/{}/topics", repo.owner, repo.repo); + self.get_cached(&self.topics, (&key, as_of_version), |client| client.get() + .custom_endpoint(&path) + .set_header(ACCEPT, HeaderValue::from_static("application/vnd.github.mercy-preview+json")) + .execute(), |t: Topics| t.names).map_err(|e| e.context("topics")) + } + + pub fn repo(&self, repo: &SimpleRepo, as_of_version: &str) -> CResult> { + let key = format!("{}/{}", repo.owner, repo.repo); + self.get_cached(&self.repos, (&key, as_of_version), |client| client.get() + .repos().owner(&repo.owner).repo(&repo.repo) + .execute(), |mut ghdata: GitHubRepo| { + // Keep GH-specific logic in here + if ghdata.has_pages { + // Name is case-sensitive + ghdata.github_page_url = Some(format!("https://{}.github.io/{}/", repo.owner, ghdata.name)); + } + // Some homepages are empty strings + if ghdata.homepage.as_ref().map_or(false, |h| !h.starts_with("http")) { + ghdata.homepage = None; + } + if !ghdata.has_issues { + ghdata.open_issues_count = None; + } + ghdata + }) + .map_err(|e| e.context("repo")) + } + + pub fn contributors(&self, repo: &SimpleRepo, as_of_version: &str) -> CResult>> { + let path = format!("repos/{}/{}/stats/contributors", repo.owner, repo.repo); + let key = (path.as_str(), as_of_version); + let callback = |client: &client::Github| { + client.get().custom_endpoint(&path).execute() + }; + match self.get_cached(&self.contribs, key, callback, id) { + Err(Error::TryAgainLater) => { + thread::sleep(Duration::from_secs(1)); + match self.get_cached(&self.contribs, key, callback, id) { + Err(Error::TryAgainLater) => { + thread::sleep(Duration::from_secs(4)); + self.get_cached(&self.contribs, key, callback, id) + }, + res => res, + } + }, + Err(e) => Err(e.context("contributors")), + res => res, + } + } + + fn get_cached(&self, cache: &TempCache<(String, Option)>, key: (&str, &str), cb: F, postproc: P) -> CResult> + where P: FnOnce(B) -> R, + F: FnOnce(&client::Github) -> Result<(HeaderMap, StatusCode, Option), github_rs::errors::Error>, + B: for <'de> serde::Deserialize<'de> + serde::Serialize + Clone + Send + 'static, + R: for <'de> serde::Deserialize<'de> + serde::Serialize + Clone + Send + 'static + { + if let Some((ver, payload)) = cache.get(key.0)? { + if ver == key.1 { + return Ok(payload); + } + eprintln!("Cache near miss {}@{} vs {}", key.0, ver, key.1); + } + + let client = &self.client()?; + // eprintln!("Cache miss {}@{}", key.0, key.1); + let (headers, status, body) = cb(&*client)?; + eprintln!("Recvd {}@{} {:?} {:?}", key.0, key.1, status, headers); + if let (Some(rl), Some(rs)) = (rate_limit_remaining(&headers), rate_limit_reset(&headers)) { + let end_timestamp = Duration::from_secs(rs.into()); + let now = SystemTime::now().duration_since(UNIX_EPOCH)?; + let wait = (end_timestamp.checked_sub(now)).and_then(|d| d.checked_div(rl + 2)); + if let Some(wait) = wait { + if wait.as_secs() > 2 && (rl < 8 || wait.as_secs() < 15) { + eprintln!("need to wait! {:?}", wait); + thread::sleep(wait); + } + } + } + + let non_parsable_body = match status { + StatusCode::ACCEPTED | + StatusCode::CREATED => return Err(Error::TryAgainLater), + StatusCode::NO_CONTENT | + StatusCode::NOT_FOUND | + StatusCode::GONE | + StatusCode::MOVED_PERMANENTLY => true, + _ => false, + }; + + let keep_cached = match status { + StatusCode::NOT_FOUND | + StatusCode::GONE | + StatusCode::MOVED_PERMANENTLY => true, + _ => status.is_success(), + }; + + match body.ok_or(Error::NoBody).and_then(|stats| Ok(postproc(serde_json::from_value(stats)?))) { + Ok(val) => { + let res = (key.1.to_string(), Some(val)); + if keep_cached { + cache.set(key.0, &res)?; + } + Ok(res.1) + }, + Err(_) if non_parsable_body => { + if keep_cached { + cache.set(key.0, (key.1.to_string(), None))?; + } + Ok(None) + }, + Err(err) => Err(err)? + } + } +} + +fn id(v: T) -> T { + v +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +enum Payload { + Meta(Vec), + Contrib(Vec), + Res(SearchResults), + User(User), + Topics(Topics), + GitHubRepo(GitHubRepo), + Dud, +} + +impl Payloadable for Vec { + fn to(&self) -> Payload { + Payload::Meta(self.clone()) + } + + fn from(p: Payload) -> Option { + match p { + Payload::Meta(d) => Some(d), _ => None, + } + } +} + +impl Payloadable for Vec { + fn to(&self) -> Payload { + Payload::Contrib(self.clone()) + } + + fn from(p: Payload) -> Option { + match p { + Payload::Contrib(d) => Some(d), _ => None, + } + } +} + +impl Payloadable for SearchResults { + fn to(&self) -> Payload { + Payload::Res(self.clone()) + } + + fn from(p: Payload) -> Option { + match p { + Payload::Res(d) => Some(d), _ => None, + } + } +} + +impl Payloadable for User { + fn to(&self) -> Payload { + Payload::User(self.clone()) + } + + fn from(p: Payload) -> Option { + match p { + Payload::User(d) => Some(d), _ => None, + } + } +} + +pub(crate) trait Payloadable: Sized { + fn to(&self) -> Payload; + fn from(val: Payload) -> Option; +} + + +#[test] +fn github_contrib() { + let gh = GitHub::new( + "../data/github.db", + std::env::var("GITHUB_TOKEN").expect("GITHUB_TOKEN env var")).unwrap(); + let repo = SimpleRepo{ + owner:"visionmedia".into(), + repo:"superagent".into(), + }; + gh.contributors(&repo, "").unwrap(); + gh.commits(&repo, "").unwrap(); +} + +#[test] +fn github_releases() { + let gh = GitHub::new( + "../data/github.db", + std::env::var("GITHUB_TOKEN").expect("GITHUB_TOKEN env var")).unwrap(); + let repo = SimpleRepo{ + owner:"kornelski".into(), + repo:"pngquant".into(), + }; + assert!(gh.releases(&repo, "").unwrap().unwrap().len() > 2); +} + +#[test] +fn test_user_by_email() { + let gh = GitHub::new( + "../data/github.db", + std::env::var("GITHUB_TOKEN").expect("GITHUB_TOKEN env var")).unwrap(); + let user = gh.user_by_email("github@pornel.net").unwrap().unwrap(); + assert_eq!("kornelski", user[0].login); +} + diff --git a/github_info/src/model.rs b/github_info/src/model.rs new file mode 100644 index 0000000..f01219f --- /dev/null +++ b/github_info/src/model.rs @@ -0,0 +1,181 @@ + +#[derive(Debug, Copy, Eq, PartialEq, Clone)] +pub enum UserType { + Org, + User, + Bot, +} + +use serde::Serializer; +use serde::de; +use serde::de::{Deserializer, Visitor}; +use serde::Serialize; +use serde::Deserialize; +use std::fmt; + +/// Case-insensitive enum +impl<'de> Deserialize<'de> for UserType { + fn deserialize(deserializer: D) -> Result + where D: Deserializer<'de>, + { + struct UserTypeVisitor; + + impl<'a> Visitor<'a> for UserTypeVisitor { + type Value = UserType; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("user/org/bot") + } + + fn visit_str(self, v: &str) -> Result { + match v.to_ascii_lowercase().as_str() { + "org" | "organization" => Ok(UserType::Org), + "user" => Ok(UserType::User), + "bot" => Ok(UserType::Bot), + x => Err(de::Error::unknown_variant(x, &["user", "org", "bot"])), + } + } + + fn visit_string(self, v: String) -> Result { + self.visit_str(&v) + } + } + + deserializer.deserialize_string(UserTypeVisitor) + } +} + +impl Serialize for UserType { + fn serialize(&self, serializer: S) -> Result + where S: Serializer, + { + serializer.serialize_str(match *self { + UserType::User => "user", + UserType::Org => "org", + UserType::Bot => "bot", + }) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct User { + pub id: u32, + pub login: String, + pub name: Option, + pub avatar_url: Option, // "https://avatars0.githubusercontent.com/u/1111?v=4", + pub gravatar_id: Option, // "", + pub html_url: String, // "https://github.com/zzzz", + pub blog: Option, // "https://example.com + #[serde(rename="type")] + pub user_type: UserType, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ContribWeek { + #[serde(rename="w")] + pub week_timestamp: u32, + #[serde(rename="a")] + pub added: u32, + #[serde(rename="d")] + pub deleted: u32, + #[serde(rename="c")] + pub commits: u32, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResults { + pub items: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UserContrib { + pub total: u32, + pub weeks: Vec, + pub author: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GitCommitAuthor { + pub date: String, // "2018-04-30T16:24:52Z", + pub email: String, + pub name: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GitCommit { + pub author: GitCommitAuthor, + pub committer: GitCommitAuthor, + pub message: String, + pub comment_count: u32, + // tree.sha +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CommitMeta { + pub sha: String, // TODO: deserialize to bin + pub author: Option, + pub committer: Option, + pub commit: GitCommit, + // parents: [{sha}] +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GitHubRepo { + pub name: String, + pub description: Option, + pub fork: bool, + pub created_at: String, + pub updated_at: Option, + pub pushed_at: Option, + pub homepage: Option, + pub stargazers_count: u32, // Stars + pub forks_count: u32, // Real number of forks + pub subscribers_count: u32, // Real number of watches + pub has_issues: bool, + pub open_issues_count: Option, + // language: JavaScript, + pub has_downloads: bool, + // has_wiki: true, + pub has_pages: bool, + pub archived: bool, + // default_branch: master, + + /// My custom addition! + pub github_page_url: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GitHubRelease { + // url: Option, // "https://api.github.com/repos/octocat/Hello-World/releases/1", + // html_url: Option, // "https://github.com/octocat/Hello-World/releases/v1.0.0", + // assets_url: Option, // "https://api.github.com/repos/octocat/Hello-World/releases/1/assets", + // upload_url: Option, // "https://uploads.github.com/repos/octocat/Hello-World/releases/1/assets{?name,label}", + // tarball_url: Option, // "https://api.github.com/repos/octocat/Hello-World/tarball/v1.0.0", + // zipball_url: Option, // "https://api.github.com/repos/octocat/Hello-World/zipball/v1.0.0", + // id: Option, // 1, + // node_id: Option, // "MDc6UmVsZWFzZTE=", + pub tag_name: Option, // "v1.0.0", + // target_commitish: Option, // "master", + // name: Option, // "v1.0.0", + pub body: Option, // "Description of the release", + pub draft: Option, // false, + pub prerelease: Option, // false, + pub created_at: Option, // "2013-02-27T19:35:32Z", + pub published_at: Option, // "2013-02-27T19:35:32Z", +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Topics { + pub names: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UserOrg { + pub login: String, // "github", + //id: String, // 1, + // node_id: String, // "MDEyOk9yZ2FuaXphdGlvbjE=", + pub url: String, // "https://api.github.com/orgs/github", + // public_members_url: String, // "https://api.github.com/orgs/github/public_members{/member}", + // avatar_url: String, // "https://github.com/images/error/octocat_happy.gif", + pub description: Option, // "A great organization" +} -- cgit v1.2.3