summaryrefslogtreecommitdiffstats
path: root/github_info
diff options
context:
space:
mode:
authorKornel <kornel@geekhood.net>2019-02-04 21:21:44 +0100
committerKornel <kornel@geekhood.net>2019-02-04 21:21:44 +0100
commit3dcb89f0038ba8a0b805957ac0e2cc5a9af3f0b0 (patch)
tree27391fa35a4b4d2bb7ba16bfff23a11b5965b186 /github_info
parent7c60b356f687eb48f2f0156c3b0260f66d251289 (diff)
Move folder
Diffstat (limited to 'github_info')
-rw-r--r--github_info/.gitignore1
-rw-r--r--github_info/Cargo.toml23
-rw-r--r--github_info/README.md4
-rw-r--r--github_info/src/lib_github.rs375
-rw-r--r--github_info/src/model.rs181
5 files changed, 584 insertions, 0 deletions
diff --git a/github_info/.gitignore b/github_info/.gitignore
new file mode 100644
index 0000000..2f7896d
--- /dev/null
+++ b/github_info/.gitignore
@@ -0,0 +1 @@
+target/
diff --git a/github_info/Cargo.toml b/github_info/Cargo.toml
new file mode 100644
index 0000000..7bd0df8
--- /dev/null
+++ b/github_info/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+edition = "2018"
+name = "github_info"
+version = "0.8.0"
+authors = ["Kornel <kornel@geekhood.net>"]
+
+[lib]
+name = "github_info"
+path = "src/lib_github.rs"
+
+[dependencies]
+repo_url = { git = "https://gitlab.com/crates.rs/repo_url.git" }
+simple_cache = { git = "https://gitlab.com/crates.rs/simple_cache.git", version = "0.6.0" }
+serde = "1.0.53"
+serde_derive = "1.0.53"
+serde_json = "1.0.17"
+file = "1.1.2"
+urlencoding = "1.0.0"
+quick-error = "1.2.2"
+hyper = "0.12.0"
+
+[dependencies.github-rs]
+git = "https://github.com/mgattozzi/github-rs"
diff --git a/github_info/README.md b/github_info/README.md
new file mode 100644
index 0000000..fb3c7e8
--- /dev/null
+++ b/github_info/README.md
@@ -0,0 +1,4 @@
+# Getting data for crates.rs from GitHub API
+
+This is a small caching wrapper around github-rs that gets information about crate contributors (to display "and N contributors" on the crate page) as well as information about commit authors and user search (to merge lists of crate authors and owners).
+
diff --git a/github_info/src/lib_github.rs b/github_info/src/lib_github.rs
new file mode 100644
index 0000000..75a1ea9
--- /dev/null
+++ b/github_info/src/lib_github.rs
@@ -0,0 +1,375 @@
+use github_rs;
+use hyper::header::{HeaderValue, ACCEPT};
+
+use serde;
+
+#[macro_use] extern crate serde_derive;
+use serde_json;
+
+use simple_cache;
+#[macro_use] extern crate quick_error;
+
+use std::path::Path;
+
+use urlencoding::encode;
+use repo_url::SimpleRepo;
+use github_rs::client;
+use github_rs::{HeaderMap, StatusCode};
+use github_rs::client::Executor;
+use std::time::{SystemTime, UNIX_EPOCH};
+use std::time::Duration;
+use std::thread;
+use simple_cache::TempCache;
+use github_rs::headers::{rate_limit_remaining, rate_limit_reset};
+
+mod model;
+pub use crate::model::*;
+
+pub type CResult<T> = Result<T, Error>;
+
+quick_error! {
+ #[derive(Debug)]
+ pub enum Error {
+ NoBody {
+ display("Reponse with no body")
+ }
+ TryAgainLater {
+ display("Accepted, but no data available yet")
+ }
+ Cache(err: Box<simple_cache::Error>) {
+ display("GH can't decode cache: {}", err)
+ from(e: simple_cache::Error) -> (Box::new(e))
+ cause(err)
+ }
+ GitHub(err: String) {
+ display("{}", err)
+ from(e: github_rs::errors::Error) -> (e.to_string()) // non-Sync
+ }
+ Json(err: Box<serde_json::Error>, call: Option<&'static str>) {
+ display("JSON decode error {} in {}", err, call.unwrap_or("github_info"))
+ from(e: serde_json::Error) -> (Box::new(e), None)
+ cause(err)
+ }
+ Time(err: std::time::SystemTimeError) {
+ display("{}", err)
+ from()
+ cause(err)
+ }
+ }
+}
+
+impl Error {
+ pub fn context(self, ctx: &'static str) -> Self {
+ match self {
+ Error::Json(e, _) => Error::Json(e, Some(ctx)),
+ as_is => as_is,
+ }
+ }
+}
+
+pub struct GitHub {
+ token: String,
+ orgs: TempCache<(String, Option<Vec<UserOrg>>)>,
+ users: TempCache<(String, Option<User>)>,
+ commits: TempCache<(String, Option<Vec<CommitMeta>>)>,
+ releases: TempCache<(String, Option<Vec<GitHubRelease>>)>,
+ contribs: TempCache<(String, Option<Vec<UserContrib>>)>,
+ topics: TempCache<(String, Option<Vec<String>>)>,
+ repos: TempCache<(String, Option<GitHubRepo>)>,
+ emails: TempCache<(String, Option<Vec<User>>)>,
+}
+
+impl GitHub {
+ pub fn new(cache_path: impl AsRef<Path>, token: impl Into<String>) -> CResult<Self> {
+ Ok(Self {
+ token: token.into(),
+ orgs: TempCache::new(&cache_path.as_ref().with_file_name("github_orgs.bin"))?,
+ users: TempCache::new(&cache_path.as_ref().with_file_name("github_users.bin"))?,
+ commits: TempCache::new(&cache_path.as_ref().with_file_name("github_commits.bin"))?,
+ releases: TempCache::new(&cache_path.as_ref().with_file_name("github_releases.bin"))?,
+ contribs: TempCache::new(&cache_path.as_ref().with_file_name("github_contribs.bin"))?,
+ topics: TempCache::new(&cache_path.as_ref().with_file_name("github_topics.bin"))?,
+ repos: TempCache::new(&cache_path.as_ref().with_file_name("github_repos.bin"))?,
+ emails: TempCache::new(&cache_path.as_ref().with_file_name("github_emails.bin"))?,
+ })
+ }
+
+ fn client(&self) -> CResult<client::Github> {
+ Ok(client::Github::new(&self.token)?)
+ }
+
+ pub fn user_by_email(&self, email: &str) -> CResult<Option<Vec<User>>> {
+ let std_suffix = "@users.noreply.github.com";
+ if email.ends_with(std_suffix) {
+ let login = email[0..email.len() - std_suffix.len()].split('+').last().unwrap();
+ if let Some(user) = self.user_by_login(login)? {
+ return Ok(Some(vec![user]));
+ }
+ }
+ let enc_email = encode(email);
+ self.get_cached(&self.emails, (email, ""), |client| client.get()
+ .custom_endpoint(&format!("search/users?q=in:email%20{}", enc_email))
+ .execute(), |res: SearchResults<User>| {
+ println!("Found {} = {:#?}", email, res.items);
+ res.items
+ })
+ }
+
+ pub fn user_by_login(&self, login: &str) -> CResult<Option<User>> {
+ let key = login.to_ascii_lowercase();
+ self.get_cached(&self.users, (&key, ""), |client| client.get()
+ .users().username(login)
+ .execute(), id).map_err(|e| e.context("user_by_login"))
+ }
+
+ pub fn user_by_id(&self, user_id: u32) -> CResult<Option<User>> {
+ let user_id = user_id.to_string();
+ self.get_cached(&self.users, (&user_id, ""), |client| client.get()
+ .users().username(&user_id)
+ .execute(), id).map_err(|e| e.context("user_by_id"))
+ }
+
+ pub fn user_orgs(&self, login: &str) -> CResult<Option<Vec<UserOrg>>> {
+ let key = login.to_ascii_lowercase();
+ self.get_cached(&self.orgs, (&key, ""), |client| client.get()
+ .users().username(login).orgs()
+ .execute(), id).map_err(|e| e.context("user_orgs"))
+ }
+
+ pub fn commits(&self, repo: &SimpleRepo, as_of_version: &str) -> CResult<Option<Vec<CommitMeta>>> {
+ let key = format!("commits/{}/{}", repo.owner, repo.repo);
+ self.get_cached(&self.commits, (&key, as_of_version), |client| client.get()
+ .repos().owner(&repo.owner).repo(&repo.repo)
+ .commits()
+ .execute(), id).map_err(|e| e.context("commits"))
+ }
+
+ pub fn releases(&self, repo: &SimpleRepo, as_of_version: &str) -> CResult<Option<Vec<GitHubRelease>>> {
+ let key = format!("release/{}/{}", repo.owner, repo.repo);
+ let path = format!("repos/{}/{}/releases", repo.owner, repo.repo);
+ self.get_cached(&self.releases, (&key, as_of_version), |client| client.get()
+ .custom_endpoint(&path)
+ .execute(), id).map_err(|e| e.context("releases"))
+ }
+
+ pub fn topics(&self, repo: &SimpleRepo, as_of_version: &str) -> CResult<Option<Vec<String>>> {
+ let key = format!("{}/{}", repo.owner, repo.repo);
+ let path = format!("repos/{}/{}/topics", repo.owner, repo.repo);
+ self.get_cached(&self.topics, (&key, as_of_version), |client| client.get()
+ .custom_endpoint(&path)
+ .set_header(ACCEPT, HeaderValue::from_static("application/vnd.github.mercy-preview+json"))
+ .execute(), |t: Topics| t.names).map_err(|e| e.context("topics"))
+ }
+
+ pub fn repo(&self, repo: &SimpleRepo, as_of_version: &str) -> CResult<Option<GitHubRepo>> {
+ let key = format!("{}/{}", repo.owner, repo.repo);
+ self.get_cached(&self.repos, (&key, as_of_version), |client| client.get()
+ .repos().owner(&repo.owner).repo(&repo.repo)
+ .execute(), |mut ghdata: GitHubRepo| {
+ // Keep GH-specific logic in here
+ if ghdata.has_pages {
+ // Name is case-sensitive
+ ghdata.github_page_url = Some(format!("https://{}.github.io/{}/", repo.owner, ghdata.name));
+ }
+ // Some homepages are empty strings
+ if ghdata.homepage.as_ref().map_or(false, |h| !h.starts_with("http")) {
+ ghdata.homepage = None;
+ }
+ if !ghdata.has_issues {
+ ghdata.open_issues_count = None;
+ }
+ ghdata
+ })
+ .map_err(|e| e.context("repo"))
+ }
+
+ pub fn contributors(&self, repo: &SimpleRepo, as_of_version: &str) -> CResult<Option<Vec<UserContrib>>> {
+ let path = format!("repos/{}/{}/stats/contributors", repo.owner, repo.repo);
+ let key = (path.as_str(), as_of_version);
+ let callback = |client: &client::Github| {
+ client.get().custom_endpoint(&path).execute()
+ };
+ match self.get_cached(&self.contribs, key, callback, id) {
+ Err(Error::TryAgainLater) => {
+ thread::sleep(Duration::from_secs(1));
+ match self.get_cached(&self.contribs, key, callback, id) {
+ Err(Error::TryAgainLater) => {
+ thread::sleep(Duration::from_secs(4));
+ self.get_cached(&self.contribs, key, callback, id)
+ },
+ res => res,
+ }
+ },
+ Err(e) => Err(e.context("contributors")),
+ res => res,
+ }
+ }
+
+ fn get_cached<F, P, B, R>(&self, cache: &TempCache<(String, Option<R>)>, key: (&str, &str), cb: F, postproc: P) -> CResult<Option<R>>
+ where P: FnOnce(B) -> R,
+ F: FnOnce(&client::Github) -> Result<(HeaderMap, StatusCode, Option<serde_json::Value>), github_rs::errors::Error>,
+ B: for <'de> serde::Deserialize<'de> + serde::Serialize + Clone + Send + 'static,
+ R: for <'de> serde::Deserialize<'de> + serde::Serialize + Clone + Send + 'static
+ {
+ if let Some((ver, payload)) = cache.get(key.0)? {
+ if ver == key.1 {
+ return Ok(payload);
+ }
+ eprintln!("Cache near miss {}@{} vs {}", key.0, ver, key.1);
+ }
+
+ let client = &self.client()?;
+ // eprintln!("Cache miss {}@{}", key.0, key.1);
+ let (headers, status, body) = cb(&*client)?;
+ eprintln!("Recvd {}@{} {:?} {:?}", key.0, key.1, status, headers);
+ if let (Some(rl), Some(rs)) = (rate_limit_remaining(&headers), rate_limit_reset(&headers)) {
+ let end_timestamp = Duration::from_secs(rs.into());
+ let now = SystemTime::now().duration_since(UNIX_EPOCH)?;
+ let wait = (end_timestamp.checked_sub(now)).and_then(|d| d.checked_div(rl + 2));
+ if let Some(wait) = wait {
+ if wait.as_secs() > 2 && (rl < 8 || wait.as_secs() < 15) {
+ eprintln!("need to wait! {:?}", wait);
+ thread::sleep(wait);
+ }
+ }
+ }
+
+ let non_parsable_body = match status {
+ StatusCode::ACCEPTED |
+ StatusCode::CREATED => return Err(Error::TryAgainLater),
+ StatusCode::NO_CONTENT |
+ StatusCode::NOT_FOUND |
+ StatusCode::GONE |
+ StatusCode::MOVED_PERMANENTLY => true,
+ _ => false,
+ };
+
+ let keep_cached = match status {
+ StatusCode::NOT_FOUND |
+ StatusCode::GONE |
+ StatusCode::MOVED_PERMANENTLY => true,
+ _ => status.is_success(),
+ };
+
+ match body.ok_or(Error::NoBody).and_then(|stats| Ok(postproc(serde_json::from_value(stats)?))) {
+ Ok(val) => {
+ let res = (key.1.to_string(), Some(val));
+ if keep_cached {
+ cache.set(key.0, &res)?;
+ }
+ Ok(res.1)
+ },
+ Err(_) if non_parsable_body => {
+ if keep_cached {
+ cache.set(key.0, (key.1.to_string(), None))?;
+ }
+ Ok(None)
+ },
+ Err(err) => Err(err)?
+ }
+ }
+}
+
+fn id<T>(v: T) -> T {
+ v
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone)]
+enum Payload {
+ Meta(Vec<CommitMeta>),
+ Contrib(Vec<UserContrib>),
+ Res(SearchResults<User>),
+ User(User),
+ Topics(Topics),
+ GitHubRepo(GitHubRepo),
+ Dud,
+}
+
+impl Payloadable for Vec<CommitMeta> {
+ fn to(&self) -> Payload {
+ Payload::Meta(self.clone())
+ }
+
+ fn from(p: Payload) -> Option<Self> {
+ match p {
+ Payload::Meta(d) => Some(d), _ => None,
+ }
+ }
+}
+
+impl Payloadable for Vec<UserContrib> {
+ fn to(&self) -> Payload {
+ Payload::Contrib(self.clone())
+ }
+
+ fn from(p: Payload) -> Option<Self> {
+ match p {
+ Payload::Contrib(d) => Some(d), _ => None,
+ }
+ }
+}
+
+impl Payloadable for SearchResults<User> {
+ fn to(&self) -> Payload {
+ Payload::Res(self.clone())
+ }
+
+ fn from(p: Payload) -> Option<Self> {
+ match p {
+ Payload::Res(d) => Some(d), _ => None,
+ }
+ }
+}
+
+impl Payloadable for User {
+ fn to(&self) -> Payload {
+ Payload::User(self.clone())
+ }
+
+ fn from(p: Payload) -> Option<Self> {
+ match p {
+ Payload::User(d) => Some(d), _ => None,
+ }
+ }
+}
+
+pub(crate) trait Payloadable: Sized {
+ fn to(&self) -> Payload;
+ fn from(val: Payload) -> Option<Self>;
+}
+
+
+#[test]
+fn github_contrib() {
+ let gh = GitHub::new(
+ "../data/github.db",
+ std::env::var("GITHUB_TOKEN").expect("GITHUB_TOKEN env var")).unwrap();
+ let repo = SimpleRepo{
+ owner:"visionmedia".into(),
+ repo:"superagent".into(),
+ };
+ gh.contributors(&repo, "").unwrap();
+ gh.commits(&repo, "").unwrap();
+}
+
+#[test]
+fn github_releases() {
+ let gh = GitHub::new(
+ "../data/github.db",
+ std::env::var("GITHUB_TOKEN").expect("GITHUB_TOKEN env var")).unwrap();
+ let repo = SimpleRepo{
+ owner:"kornelski".into(),
+ repo:"pngquant".into(),
+ };
+ assert!(gh.releases(&repo, "").unwrap().unwrap().len() > 2);
+}
+
+#[test]
+fn test_user_by_email() {
+ let gh = GitHub::new(
+ "../data/github.db",
+ std::env::var("GITHUB_TOKEN").expect("GITHUB_TOKEN env var")).unwrap();
+ let user = gh.user_by_email("github@pornel.net").unwrap().unwrap();
+ assert_eq!("kornelski", user[0].login);
+}
+
diff --git a/github_info/src/model.rs b/github_info/src/model.rs
new file mode 100644
index 0000000..f01219f
--- /dev/null
+++ b/github_info/src/model.rs
@@ -0,0 +1,181 @@
+
+#[derive(Debug, Copy, Eq, PartialEq, Clone)]
+pub enum UserType {
+ Org,
+ User,
+ Bot,
+}
+
+use serde::Serializer;
+use serde::de;
+use serde::de::{Deserializer, Visitor};
+use serde::Serialize;
+use serde::Deserialize;
+use std::fmt;
+
+/// Case-insensitive enum
+impl<'de> Deserialize<'de> for UserType {
+ fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+ where D: Deserializer<'de>,
+ {
+ struct UserTypeVisitor;
+
+ impl<'a> Visitor<'a> for UserTypeVisitor {
+ type Value = UserType;
+
+ fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ formatter.write_str("user/org/bot")
+ }
+
+ fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
+ match v.to_ascii_lowercase().as_str() {
+ "org" | "organization" => Ok(UserType::Org),
+ "user" => Ok(UserType::User),
+ "bot" => Ok(UserType::Bot),
+ x => Err(de::Error::unknown_variant(x, &["user", "org", "bot"])),
+ }
+ }
+
+ fn visit_string<E: de::Error>(self, v: String) -> Result<Self::Value, E> {
+ self.visit_str(&v)
+ }
+ }
+
+ deserializer.deserialize_string(UserTypeVisitor)
+ }
+}
+
+impl Serialize for UserType {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where S: Serializer,
+ {
+ serializer.serialize_str(match *self {
+ UserType::User => "user",
+ UserType::Org => "org",
+ UserType::Bot => "bot",
+ })
+ }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct User {
+ pub id: u32,
+ pub login: String,
+ pub name: Option<String>,
+ pub avatar_url: Option<String>, // "https://avatars0.githubusercontent.com/u/1111?v=4",
+ pub gravatar_id: Option<String>, // "",
+ pub html_url: String, // "https://github.com/zzzz",
+ pub blog: Option<String>, // "https://example.com
+ #[serde(rename="type")]
+ pub user_type: UserType,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContribWeek {
+ #[serde(rename="w")]
+ pub week_timestamp: u32,
+ #[serde(rename="a")]
+ pub added: u32,
+ #[serde(rename="d")]
+ pub deleted: u32,
+ #[serde(rename="c")]
+ pub commits: u32,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SearchResults<T> {
+ pub items: Vec<T>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct UserContrib {
+ pub total: u32,
+ pub weeks: Vec<ContribWeek>,
+ pub author: Option<User>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GitCommitAuthor {
+ pub date: String, // "2018-04-30T16:24:52Z",
+ pub email: String,
+ pub name: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GitCommit {
+ pub author: GitCommitAuthor,
+ pub committer: GitCommitAuthor,
+ pub message: String,
+ pub comment_count: u32,
+ // tree.sha
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CommitMeta {
+ pub sha: String, // TODO: deserialize to bin
+ pub author: Option<User>,
+ pub committer: Option<User>,
+ pub commit: GitCommit,
+ // parents: [{sha}]
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GitHubRepo {
+ pub name: String,
+ pub description: Option<String>,
+ pub fork: bool,
+ pub created_at: String,
+ pub updated_at: Option<String>,
+ pub pushed_at: Option<String>,
+ pub homepage: Option<String>,
+ pub stargazers_count: u32, // Stars
+ pub forks_count: u32, // Real number of forks
+ pub subscribers_count: u32, // Real number of watches
+ pub has_issues: bool,
+ pub open_issues_count: Option<u32>,
+ // language: JavaScript,
+ pub has_downloads: bool,
+ // has_wiki: true,
+ pub has_pages: bool,
+ pub archived: bool,
+ // default_branch: master,
+
+ /// My custom addition!
+ pub github_page_url: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GitHubRelease {
+ // url: Option<String>, // "https://api.github.com/repos/octocat/Hello-World/releases/1",
+ // html_url: Option<String>, // "https://github.com/octocat/Hello-World/releases/v1.0.0",
+ // assets_url: Option<String>, // "https://api.github.com/repos/octocat/Hello-World/releases/1/assets",
+ // upload_url: Option<String>, // "https://uploads.github.com/repos/octocat/Hello-World/releases/1/assets{?name,label}",
+ // tarball_url: Option<String>, // "https://api.github.com/repos/octocat/Hello-World/tarball/v1.0.0",
+ // zipball_url: Option<String>, // "https://api.github.com/repos/octocat/Hello-World/zipball/v1.0.0",
+ // id: Option<String>, // 1,
+ // node_id: Option<String>, // "MDc6UmVsZWFzZTE=",
+ pub tag_name: Option<String>, // "v1.0.0",
+ // target_commitish: Option<String>, // "master",
+ // name: Option<String>, // "v1.0.0",
+ pub body: Option<String>, // "Description of the release",
+ pub draft: Option<bool>, // false,
+ pub prerelease: Option<bool>, // false,
+ pub created_at: Option<String>, // "2013-02-27T19:35:32Z",
+ pub published_at: Option<String>, // "2013-02-27T19:35:32Z",
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Topics {
+ pub names: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct UserOrg {
+ pub login: String, // "github",
+ //id: String, // 1,
+ // node_id: String, // "MDEyOk9yZ2FuaXphdGlvbjE=",
+ pub url: String, // "https://api.github.com/orgs/github",
+ // public_members_url: String, // "https://api.github.com/orgs/github/public_members{/member}",
+ // avatar_url: String, // "https://github.com/images/error/octocat_happy.gif",
+ pub description: Option<String>, // "A great organization"
+}