diff options
author | Kornel <kornel@geekhood.net> | 2020-03-21 23:15:56 +0000 |
---|---|---|
committer | Kornel <kornel@geekhood.net> | 2020-03-21 23:30:03 +0000 |
commit | 097f7ea595a150710adeea0e606112c07b9ceb3e (patch) | |
tree | 1f637e338406e1156df706efda0df5c6c6bfbc92 | |
parent | d77831e5ac7f03d6470c8146c6e7f7fa5f48d8aa (diff) |
2-stage readme path resolution
-rw-r--r-- | crate_git_checkout/Cargo.toml | 2 | ||||
-rw-r--r-- | crate_git_checkout/src/crate_git_checkout.rs | 14 | ||||
-rw-r--r-- | front_end/src/crate_page.rs | 2 | ||||
-rw-r--r-- | kitchen_sink/Cargo.toml | 2 | ||||
-rw-r--r-- | kitchen_sink/src/lib_kitchen_sink.rs | 53 | ||||
-rw-r--r-- | kitchen_sink/src/tarball.rs | 20 |
6 files changed, 48 insertions, 45 deletions
diff --git a/crate_git_checkout/Cargo.toml b/crate_git_checkout/Cargo.toml index 4e285b3..c90aeca 100644 --- a/crate_git_checkout/Cargo.toml +++ b/crate_git_checkout/Cargo.toml @@ -2,7 +2,7 @@ edition = "2018" description = "Analyze git repository containing Cargo crates" name = "crate_git_checkout" -version = "0.4.4" +version = "0.4.5" authors = ["Kornel <kornel@geekhood.net>"] [lib] diff --git a/crate_git_checkout/src/crate_git_checkout.rs b/crate_git_checkout/src/crate_git_checkout.rs index 68af2b2..1a97f46 100644 --- a/crate_git_checkout/src/crate_git_checkout.rs +++ b/crate_git_checkout/src/crate_git_checkout.rs @@ -9,7 +9,7 @@ pub use git2::Repository; use git2::{Blob, ObjectType, Reference, Tree}; use lazy_static::lazy_static; use render_readme; -use render_readme::{Markup, Readme}; +use render_readme::Markup; use repo_url::Repo; use std::collections::hash_map::Entry::Vacant; use std::collections::{HashMap, HashSet}; @@ -310,7 +310,7 @@ pub fn find_dependency_changes(repo: &Repository, mut cb: impl FnMut(HashSet<Str } // FIXME: buggy, barely works -pub fn find_readme(repo: &Repository, package: &Package) -> Result<Option<Readme>, failure::Error> { +pub fn find_readme(repo: &Repository, package: &Package) -> Result<Option<(String, Markup)>, failure::Error> { let head = repo.head()?; let tree = head.peel_to_tree()?; let mut readme = None; @@ -344,7 +344,7 @@ pub fn find_readme(repo: &Repository, package: &Package) -> Result<Option<Readme } else { Markup::Markdown(text) }; - readme = Some(readme_from_repo(markup, &package.repository, base)); + readme = Some((base.to_owned(), markup)); found_best = is_correct_dir; } Ok(()) @@ -352,14 +352,6 @@ pub fn find_readme(repo: &Repository, package: &Package) -> Result<Option<Readme Ok(readme) } -fn readme_from_repo(markup: Markup, repo_url: &Option<String>, base_dir_in_repo: &str) -> Readme { - let repo = repo_url.as_ref().and_then(|url| Repo::new(url).ok()); - let base_url = repo.as_ref().map(|r| r.readme_base_url(base_dir_in_repo)); - let base_image_url = repo.map(|r| r.readme_base_image_url(base_dir_in_repo)); - - Readme::new(markup, base_url, base_image_url) -} - /// Check if given filename is a README. If `package` is missing, guess. fn is_readme_filename(path: &Path, package: Option<&Package>) -> bool { path.to_str().map_or(false, |s| { diff --git a/front_end/src/crate_page.rs b/front_end/src/crate_page.rs index 57286a8..6a5a44e 100644 --- a/front_end/src/crate_page.rs +++ b/front_end/src/crate_page.rs @@ -250,7 +250,7 @@ impl<'a> CratePage<'a> { } pub fn is_readme_short(&self) -> bool { - self.kitchen_sink.is_readme_short(self.ver.readme()) + self.kitchen_sink.is_readme_short(self.ver.readme().as_ref().map(|r| &r.markup)) } pub fn has_no_readme_or_lib(&self) -> bool { diff --git a/kitchen_sink/Cargo.toml b/kitchen_sink/Cargo.toml index ea9c79b..0d46977 100644 --- a/kitchen_sink/Cargo.toml +++ b/kitchen_sink/Cargo.toml @@ -15,7 +15,7 @@ deps_index = { path = "../deps_index" } feat_extractor = { path = "../feat_extractor" } docs_rs_client = { path = "../docs_rs_client", version = "0.4.0" } github_info = { path = "../github_info", version = "0.9" } -crate_git_checkout = { path = "../crate_git_checkout", version = "0.4.3" } +crate_git_checkout = { path = "../crate_git_checkout", version = "0.4.5" } user_db = { path = "../user_db", version = "0.3" } crate_db = { path = "../crate_db", version = "0.4.0" } categories = { path = "../categories" } diff --git a/kitchen_sink/src/lib_kitchen_sink.rs b/kitchen_sink/src/lib_kitchen_sink.rs index 20b7115..d10c16d 100644 --- a/kitchen_sink/src/lib_kitchen_sink.rs +++ b/kitchen_sink/src/lib_kitchen_sink.rs @@ -790,11 +790,7 @@ impl KitchenSink { warnings.extend(self.add_readme_from_repo(&mut meta, maybe_repo.as_ref())); } - if let Some(readme) = meta.readme.as_mut() { - readme.base_url = Some(repo.readme_base_url(&path_in_repo)); - readme.base_image_url = Some(repo.readme_base_image_url(&path_in_repo)); - } - Ok::<_, CError>(self.rich_crate_version_data_common(origin.clone(), meta, 0, false, warnings)) + Ok::<_, CError>(self.rich_crate_version_data_common(origin.clone(), meta, 0, false, path_in_repo, warnings)) })?.await } @@ -841,6 +837,8 @@ impl KitchenSink { } } + eprintln!("R1 {:?}", meta.readme); + let maybe_repo = package.repository.as_ref().and_then(|r| Repo::new(r).ok()); let has_readme_file = meta.readme.is_some(); if !has_readme_file { @@ -872,11 +870,16 @@ impl KitchenSink { } } - self.rich_crate_version_data_common(origin, meta, crate_compressed_size as u32, latest.is_yanked(), warnings).await + let path_in_repo = match maybe_repo.as_ref() { + Some(r) => self.crate_db.path_in_repo(r, name).await?, + None => None, + }.unwrap_or_default(); + + self.rich_crate_version_data_common(origin, meta, crate_compressed_size as u32, latest.is_yanked(), path_in_repo, warnings).await } ///// Fixing and faking the data - async fn rich_crate_version_data_common(&self, origin: Origin, mut meta: CrateFile, crate_compressed_size: u32, is_yanked: bool, mut warnings: Warnings) -> CResult<(CrateVersionSourceData, Manifest, Warnings)> { + async fn rich_crate_version_data_common(&self, origin: Origin, mut meta: CrateFile, crate_compressed_size: u32, is_yanked: bool, path_in_repo: String, mut warnings: Warnings) -> CResult<(CrateVersionSourceData, Manifest, Warnings)> { Self::override_bad_categories(&mut meta.manifest); let mut github_keywords = None; @@ -946,7 +949,7 @@ impl KitchenSink { } // lib file takes majority of space in cache, so remove it if it won't be used - if !self.is_readme_short(meta.readme.as_ref()) { + if !self.is_readme_short(meta.readme.as_ref().map(|r| &r.1)) { meta.lib_file = None; } @@ -954,7 +957,7 @@ impl KitchenSink { let mut words = vec![package.name.as_str()]; let readme_txt; if let Some(ref r) = meta.readme { - readme_txt = render_readme::Renderer::new(None).visible_text(&r.markup); + readme_txt = render_readme::Renderer::new(None).visible_text(&r.1); words.push(&readme_txt); } if let Some(ref s) = package.description {words.push(s);} @@ -968,6 +971,26 @@ impl KitchenSink { let has_buildrs = meta.has("build.rs"); let has_code_of_conduct = meta.has("CODE_OF_CONDUCT.md") || meta.has("docs/CODE_OF_CONDUCT.md") || meta.has(".github/CODE_OF_CONDUCT.md"); + + let readme = meta.readme.map(|(readme_path, markup)| { + let (base_url, base_image_url) = match maybe_repo { + Some(repo) => { + // Not parsing github URL, because "aboslute" path should not be allowed to escape the repo path, + // but it needs to normalize ../readme paths + let url = url::Url::parse(&format!("http://localhost/{}", path_in_repo)).and_then(|u| u.join(&readme_path)); + let in_repo_url_path = url.as_ref().map_or("", |u| u.path().trim_start_matches('/')); + eprintln!("{} + {} = {:?} = {}", path_in_repo, readme_path, url, in_repo_url_path); + (Some(repo.readme_base_url(in_repo_url_path)), Some(repo.readme_base_image_url(in_repo_url_path))) + }, + None => (None, None), + }; + Readme { + markup, + base_url, + base_image_url, + } + }); + let src = CrateVersionSourceData { capitalized_name, language_stats: meta.language_stats, @@ -977,7 +1000,7 @@ impl KitchenSink { is_nightly: meta.is_nightly, has_buildrs, has_code_of_conduct, - readme: meta.readme, + readme, lib_file: meta.lib_file, github_description, github_keywords, @@ -1053,9 +1076,9 @@ impl KitchenSink { }) } - pub fn is_readme_short(&self, readme: Option<&Readme>) -> bool { + pub fn is_readme_short(&self, readme: Option<&Markup>) -> bool { if let Some(r) = readme { - match r.markup { + match r { Markup::Markdown(ref s) | Markup::Rst(ref s) | Markup::Html(ref s) => s.len() < 1000, } } else { @@ -1111,11 +1134,7 @@ impl KitchenSink { async fn add_readme_from_crates_io(&self, meta: &mut CrateFile, name: &str, ver: &str) { if let Ok(Some(html)) = self.crates_io.readme(name, ver).await { eprintln!("Found readme on crates.io {}@{}", name, ver); - meta.readme = Some(Readme { - markup: Markup::Html(String::from_utf8_lossy(&html).to_string()), - base_url: None, - base_image_url: None, - }); + meta.readme = Some((String::new(), Markup::Html(String::from_utf8_lossy(&html).to_string()))); } else { eprintln!("No readme on crates.io for {}@{}", name, ver); } diff --git a/kitchen_sink/src/tarball.rs b/kitchen_sink/src/tarball.rs index d20530e..c46d1da 100644 --- a/kitchen_sink/src/tarball.rs +++ b/kitchen_sink/src/tarball.rs @@ -2,8 +2,6 @@ use cargo_toml::Manifest; use cargo_toml::Package; use libflate::gzip::Decoder; use render_readme::Markup; -use render_readme::Readme; -use repo_url::Repo; use std::collections::HashSet; use std::io; use std::io::Read; @@ -180,7 +178,7 @@ impl Collector { Ok(CrateFile { decompressed_size: self.decompressed_size, - readme: self.markup.map(|(path, m)| readme_from_repo(m, manifest.package.as_ref().and_then(|r| r.repository.as_ref()), &path)), + readme: self.markup, manifest, files: self.files, lib_file: self.lib_file, @@ -232,7 +230,8 @@ pub struct CrateFile { pub manifest: Manifest, pub lib_file: Option<String>, pub files: Vec<PathBuf>, - pub readme: Option<Readme>, + // relative path and markdown + pub readme: Option<(String, Markup)>, pub language_stats: udedokei::Stats, pub decompressed_size: usize, pub is_nightly: bool, @@ -247,14 +246,6 @@ impl CrateFile { } } -fn readme_from_repo(markup: Markup, repo_url: Option<&String>, base_path: &str) -> Readme { - let repo = repo_url.and_then(|url| Repo::new(url).ok()); - let base_url = repo.as_ref().map(|r| r.readme_base_url(base_path)); - let base_image_url = repo.map(|r| r.readme_base_image_url(base_path)); - - Readme::new(markup, base_url, base_image_url) -} - /// Check if given filename is a README. If `package` is missing, guess. fn is_readme_filename(path: &Path, package: Option<&Package>) -> bool { path.to_str().map_or(false, |pathstr| { @@ -275,7 +266,7 @@ fn unpack_crate() { assert_eq!(d.manifest.package.as_ref().unwrap().version, "0.5.1"); assert!(d.lib_file.unwrap().contains("fn nothing")); assert_eq!(d.files.len(), 5); - assert!(match d.readme.unwrap().markup { + assert!(match d.readme.unwrap().1 { Markup::Rst(a) => a == "o hi\n", _ => false, }); @@ -288,6 +279,7 @@ fn unpack_crate() { #[test] fn unpack_repo() { + use repo_url::Repo; let test_repo_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("test.repo"); let repo = Repo::new("http://example.invalid/foo.git").unwrap(); let checkout = crate_git_checkout::checkout(&repo, &test_repo_path).unwrap(); @@ -300,7 +292,7 @@ fn unpack_repo() { assert_eq!(d.manifest.package.as_ref().unwrap().version, "0.5.1"); assert!(d.lib_file.unwrap().contains("fn nothing")); assert_eq!(d.files.len(), 5); - assert!(match d.readme.unwrap().markup { + assert!(match d.readme.unwrap().1 { Markup::Rst(a) => a == "o hi\n", _ => false, }); |