summaryrefslogtreecommitdiffstats
path: root/repo_url
diff options
context:
space:
mode:
authorKornel <kornel@geekhood.net>2020-02-21 12:03:31 +0000
committerKornel <kornel@geekhood.net>2020-02-21 12:03:31 +0000
commite20761f8c048608600445d2df846cb0009a4c66f (patch)
treea5c4f3737b044deb8f1f1afb36ef53bbba8acebd /repo_url
parent3baf4d44afa12c787790a2bb0a4581f34343b243 (diff)
Moved
Diffstat (limited to 'repo_url')
-rw-r--r--repo_url/.gitignore1
-rw-r--r--repo_url/Cargo.toml14
-rw-r--r--repo_url/README.md6
-rw-r--r--repo_url/src/repo.rs282
4 files changed, 303 insertions, 0 deletions
diff --git a/repo_url/.gitignore b/repo_url/.gitignore
new file mode 100644
index 0000000..2f7896d
--- /dev/null
+++ b/repo_url/.gitignore
@@ -0,0 +1 @@
+target/
diff --git a/repo_url/Cargo.toml b/repo_url/Cargo.toml
new file mode 100644
index 0000000..1acc06b
--- /dev/null
+++ b/repo_url/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+edition = "2018"
+name = "repo_url"
+version = "0.3.1"
+authors = ["Kornel <kornel@geekhood.net>"]
+description = "Basic properties and formatting of URLs of GitHub and GitLab repositories"
+license = "CC0-1.0"
+
+[lib]
+name = "repo_url"
+path = "src/repo.rs"
+
+[dependencies]
+url = "2.1"
diff --git a/repo_url/README.md b/repo_url/README.md
new file mode 100644
index 0000000..9cb0811
--- /dev/null
+++ b/repo_url/README.md
@@ -0,0 +1,6 @@
+# Understanding URLs to git repositories
+
+This is a helper library to parse git URLs, with some special knowledge of GitHub and GitLab URLs (more URLs schemes welcome).
+
+It's needed, because Cargo allows aribitrary URLs in the metadata, and people put all kinds of stuff in there. crates.rs needs to have canonical Git URLs and be able to query GitHub API about them.
+
diff --git a/repo_url/src/repo.rs b/repo_url/src/repo.rs
new file mode 100644
index 0000000..a03a7eb
--- /dev/null
+++ b/repo_url/src/repo.rs
@@ -0,0 +1,282 @@
+use std::borrow::Cow;
+use std::convert::TryFrom;
+use url;
+use url::Url;
+
+pub type GResult<T> = Result<T, GitError>;
+
+#[derive(Debug, Clone)]
+pub struct Repo {
+ // as set by the create author
+ pub url: Url,
+ pub host: RepoHost,
+}
+
+#[derive(Debug, Clone, Hash, Eq, PartialEq)]
+pub enum RepoHost {
+ GitHub(SimpleRepo),
+ GitLab(SimpleRepo),
+ BitBucket(SimpleRepo),
+ Other,
+}
+
+#[derive(Debug, Clone, Hash, Eq, PartialEq)]
+pub struct SimpleRepo {
+ pub owner: Box<str>,
+ pub repo: Box<str>,
+}
+
+impl SimpleRepo {
+ pub fn new(owner: impl Into<Box<str>>, repo: impl Into<Box<str>>) -> Self {
+ Self {
+ owner: owner.into(),
+ repo: repo.into(),
+ }
+ }
+}
+
+#[derive(Debug, Clone)]
+pub enum GitError {
+ IncompleteUrl,
+ InvalidUrl(url::ParseError),
+}
+
+impl std::error::Error for GitError {
+ fn description(&self) -> &str {"git"}
+}
+
+impl std::fmt::Display for GitError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match *self {
+ GitError::IncompleteUrl => f.write_str("Incomplete URL"),
+ GitError::InvalidUrl(e) => e.fmt(f),
+ }
+ }
+}
+
+impl Repo {
+ /// Parse the given URL
+ pub fn new(url: &str) -> GResult<Self> {
+ let url = Url::parse(url).map_err(|e| GitError::InvalidUrl(e))?;
+ Ok(Repo {
+ host: match (&url.host_str(), url.path_segments()) {
+ (Some("www.github.com"), Some(path)) |
+ (Some("github.com"), Some(path)) => {
+ RepoHost::GitHub(Self::repo_from_path(path)?)
+ },
+ (Some("www.gitlab.com"), Some(path)) |
+ (Some("gitlab.com"), Some(path)) => {
+ RepoHost::GitLab(Self::repo_from_path(path)?)
+ },
+ (Some("bitbucket.org"), Some(path)) => {
+ RepoHost::BitBucket(Self::repo_from_path(path)?)
+ },
+ _ => RepoHost::Other,
+ },
+ url,
+ })
+ }
+
+ fn repo_from_path<'a>(mut path: impl Iterator<Item = &'a str>) -> GResult<SimpleRepo> {
+ Ok(SimpleRepo {
+ owner: path.next().ok_or(GitError::IncompleteUrl)?.to_ascii_lowercase().into_boxed_str(),
+ repo: path.next().ok_or(GitError::IncompleteUrl)?.trim_end_matches(".git").to_ascii_lowercase().into_boxed_str(),
+ })
+ }
+
+ /// True if the URL may be a well-known git repository URL
+ pub fn looks_like_repo_url(url: &str) -> bool {
+ Url::parse(url).ok().map_or(false, |url| match url.host_str() {
+ Some("github.com") | Some("www.github.com") => true,
+ Some("gitlab.com") | Some("www.gitlab.com") => true,
+ Some("bitbucket.org") => true,
+ _ => false,
+ })
+ }
+
+ pub fn raw_url(&self) -> &str {
+ self.url.as_str()
+ }
+
+ /// Enum with details of git hosting service
+ pub fn host(&self) -> &RepoHost {
+ &self.host
+ }
+
+ /// URL to view who contributed to the repository
+ pub fn contributors_http_url(&self) -> Cow<'_, str> {
+ match self.host {
+ RepoHost::GitHub(SimpleRepo {ref owner, ref repo}) => {
+ format!("https://github.com/{}/{}/graphs/contributors", owner, repo).into()
+ },
+ RepoHost::GitLab(SimpleRepo {ref owner, ref repo}) => {
+ format!("https://gitlab.com/{}/{}/graphs/master", owner, repo).into()
+ },
+ RepoHost::BitBucket(SimpleRepo {ref owner, ref repo}) => {
+ // not really…
+ format!("https://bitbucket.org/{}/{}/commits/all", owner, repo).into()
+ },
+ RepoHost::Other => self.url.as_str().into(),
+ }
+ }
+
+ /// Name of the hosting service
+ pub fn site_link_label(&self) -> &'static str {
+ match self.host {
+ RepoHost::GitHub(..) => "GitHub",
+ RepoHost::GitLab(..) => "GitLab",
+ RepoHost::BitBucket(..) => "BitBucket",
+ RepoHost::Other => "Source Code",
+ }
+ }
+
+ /// URL for links in readmes hosted on the git website
+ ///
+ /// Base dir is without leading or trailing `/`, i.e. `""` for root, `"foo/bar"`, etc.
+ pub fn readme_base_url(&self, base_dir_in_repo: &str) -> String {
+ assert!(!base_dir_in_repo.starts_with('/'));
+ let slash = if base_dir_in_repo != "" && !base_dir_in_repo.ends_with('/') { "/" } else { "" };
+ match self.host {
+ RepoHost::GitHub(SimpleRepo {ref owner, ref repo}) => {
+ format!("https://github.com/{}/{}/blob/master/{}{}", owner, repo, base_dir_in_repo, slash)
+ },
+ RepoHost::GitLab(SimpleRepo {ref owner, ref repo}) => {
+ format!("https://gitlab.com/{}/{}/blob/master/{}{}", owner, repo, base_dir_in_repo, slash)
+ },
+ RepoHost::BitBucket(_) | // FIXME: needs commit hash!
+ RepoHost::Other => self.url.to_string() // FIXME: how to add base dir?
+ }
+ }
+
+ /// URL for image embeds in readmes hosted on the git website
+ ///
+ /// Base dir is without leading or trailing `/`, i.e. `""` for root, `"foo/bar"`, etc.
+ pub fn readme_base_image_url(&self, base_dir_in_repo: &str) -> String {
+ assert!(!base_dir_in_repo.starts_with('/'));
+ let slash = if base_dir_in_repo != "" && !base_dir_in_repo.ends_with('/') { "/" } else { "" };
+ match self.host {
+ RepoHost::GitHub(SimpleRepo {ref owner, ref repo}) => {
+ format!("https://raw.githubusercontent.com/{}/{}/master/{}{}", owner, repo, base_dir_in_repo, slash)
+ },
+ RepoHost::GitLab(SimpleRepo {ref owner, ref repo}) => {
+ format!("https://gitlab.com/{}/{}/raw/master/{}{}", owner, repo, base_dir_in_repo, slash)
+ },
+ RepoHost::BitBucket(_) | // FIXME: needs commit hash!
+ RepoHost::Other => self.url.to_string() // FIXME: how to add base dir?
+ }
+ }
+
+ /// URL for browsing the repository via web browser
+ pub fn canonical_http_url(&self, base_dir_in_repo: &str) -> Cow<'_, str> {
+ self.host.canonical_http_url(base_dir_in_repo)
+ .unwrap_or_else(|| self.url.as_str().into()) // FIXME: how to add base dir?
+ }
+
+ pub fn canonical_git_url(&self) -> Cow<'_, str> {
+ match self.host.canonical_git_url() {
+ Some(s) => s.into(),
+ None => self.url.as_str().into(),
+ }
+ }
+
+ pub fn owner_name(&self) -> Option<&str> {
+ self.host.owner_name()
+ }
+
+ pub fn repo_name(&self) -> Option<&str> {
+ self.host.repo_name()
+ }
+}
+
+impl RepoHost {
+ /// URL for cloning the repository via git
+ pub fn canonical_git_url(&self) -> Option<String> {
+ match self {
+ RepoHost::GitHub(SimpleRepo {ref owner, ref repo}) => {
+ Some(format!("https://github.com/{}/{}.git", owner, repo))
+ },
+ RepoHost::GitLab(SimpleRepo {ref owner, ref repo}) => {
+ Some(format!("https://gitlab.com/{}/{}.git", owner, repo))
+ },
+ RepoHost::BitBucket(SimpleRepo {ref owner, ref repo}) => {
+ Some(format!("https://bitbucket.org/{}/{}", owner, repo))
+ },
+ RepoHost::Other => None,
+ }
+ }
+
+ /// URL for browsing the repository via web browser
+ pub fn canonical_http_url(&self, base_dir_in_repo: &str) -> Option<Cow<'_, str>> {
+ assert!(!base_dir_in_repo.starts_with('/'));
+ let slash = if base_dir_in_repo != "" { "/tree/master/" } else { "" };
+ match self {
+ RepoHost::GitHub(SimpleRepo {ref owner, ref repo}) => {
+ Some(format!("https://github.com/{}/{}{}{}", owner, repo, slash, base_dir_in_repo).into())
+ },
+ RepoHost::GitLab(SimpleRepo {ref owner, ref repo}) => {
+ Some(format!("https://gitlab.com/{}/{}{}{}", owner, repo, slash, base_dir_in_repo).into())
+ },
+ RepoHost::BitBucket(SimpleRepo {ref owner, ref repo}) => {
+ Some(format!("https://bitbucket.org/{}/{}", owner, repo).into()) // FIXME: needs hash
+ },
+ RepoHost::Other => None,
+ }
+ }
+
+ pub fn owner_name(&self) -> Option<&str> {
+ match self {
+ RepoHost::GitHub(SimpleRepo { ref owner, .. }) |
+ RepoHost::BitBucket(SimpleRepo { ref owner, .. }) |
+ RepoHost::GitLab(SimpleRepo { ref owner, .. }) => return Some(owner),
+ RepoHost::Other => None,
+ }
+ }
+
+ pub fn repo_name(&self) -> Option<&str> {
+ self.repo().map(|r| &*r.repo)
+ }
+
+ pub fn repo(&self) -> Option<&SimpleRepo> {
+ match self {
+ RepoHost::GitHub(repo) |
+ RepoHost::BitBucket(repo) |
+ RepoHost::GitLab(repo) => return Some(repo),
+ RepoHost::Other => None,
+ }
+ }
+}
+
+impl TryFrom<RepoHost> for Repo {
+ type Error = &'static str;
+
+ fn try_from(host: RepoHost) -> Result<Self, Self::Error> {
+ host.canonical_git_url()
+ .and_then(|url| url.parse().ok())
+ .map(|url| Repo {host, url})
+ .ok_or("not a known git host")
+ }
+}
+
+#[test]
+fn repo_parse() {
+ let repo = Repo::new("HTTPS://GITHUB.COM/FOO/BAR").unwrap();
+ assert_eq!("https://github.com/foo/bar.git", repo.canonical_git_url());
+ assert_eq!("https://github.com/foo/bar", repo.canonical_http_url(""));
+ assert_eq!("https://github.com/foo/bar/tree/master/subdir", repo.canonical_http_url("subdir"));
+ assert_eq!("https://github.com/foo/bar/tree/master/sub/dir", repo.canonical_http_url("sub/dir"));
+
+ let repo = Repo::new("HTTPS://GITlaB.COM/FOO/BAR").unwrap();
+ assert_eq!("https://gitlab.com/foo/bar.git", repo.canonical_git_url());
+ assert_eq!("https://gitlab.com/foo/bar/blob/master/", repo.readme_base_url(""));
+ assert_eq!("https://gitlab.com/foo/bar/blob/master/foo/", repo.readme_base_url("foo"));
+ assert_eq!("https://gitlab.com/foo/bar/blob/master/foo/bar/", repo.readme_base_url("foo/bar"));
+ assert_eq!("https://gitlab.com/foo/bar/raw/master/baz/", repo.readme_base_image_url("baz/"));
+ assert_eq!("https://gitlab.com/foo/bar/tree/master/sub/dir", repo.canonical_http_url("sub/dir"));
+
+ let repo = Repo::new("http://priv@example.com/#111").unwrap();
+ assert_eq!("http://priv@example.com/#111", repo.canonical_git_url());
+ assert_eq!("http://priv@example.com/#111", repo.canonical_http_url(""));
+
+ let bad = Repo::new("N/A");
+ assert!(bad.is_err());
+}