diff options
Diffstat (limited to 'kitchen_sink')
-rw-r--r-- | kitchen_sink/Cargo.toml | 2 | ||||
-rw-r--r-- | kitchen_sink/src/lib_kitchen_sink.rs | 53 | ||||
-rw-r--r-- | kitchen_sink/src/tarball.rs | 20 |
3 files changed, 43 insertions, 32 deletions
diff --git a/kitchen_sink/Cargo.toml b/kitchen_sink/Cargo.toml index ea9c79b..0d46977 100644 --- a/kitchen_sink/Cargo.toml +++ b/kitchen_sink/Cargo.toml @@ -15,7 +15,7 @@ deps_index = { path = "../deps_index" } feat_extractor = { path = "../feat_extractor" } docs_rs_client = { path = "../docs_rs_client", version = "0.4.0" } github_info = { path = "../github_info", version = "0.9" } -crate_git_checkout = { path = "../crate_git_checkout", version = "0.4.3" } +crate_git_checkout = { path = "../crate_git_checkout", version = "0.4.5" } user_db = { path = "../user_db", version = "0.3" } crate_db = { path = "../crate_db", version = "0.4.0" } categories = { path = "../categories" } diff --git a/kitchen_sink/src/lib_kitchen_sink.rs b/kitchen_sink/src/lib_kitchen_sink.rs index 20b7115..d10c16d 100644 --- a/kitchen_sink/src/lib_kitchen_sink.rs +++ b/kitchen_sink/src/lib_kitchen_sink.rs @@ -790,11 +790,7 @@ impl KitchenSink { warnings.extend(self.add_readme_from_repo(&mut meta, maybe_repo.as_ref())); } - if let Some(readme) = meta.readme.as_mut() { - readme.base_url = Some(repo.readme_base_url(&path_in_repo)); - readme.base_image_url = Some(repo.readme_base_image_url(&path_in_repo)); - } - Ok::<_, CError>(self.rich_crate_version_data_common(origin.clone(), meta, 0, false, warnings)) + Ok::<_, CError>(self.rich_crate_version_data_common(origin.clone(), meta, 0, false, path_in_repo, warnings)) })?.await } @@ -841,6 +837,8 @@ impl KitchenSink { } } + eprintln!("R1 {:?}", meta.readme); + let maybe_repo = package.repository.as_ref().and_then(|r| Repo::new(r).ok()); let has_readme_file = meta.readme.is_some(); if !has_readme_file { @@ -872,11 +870,16 @@ impl KitchenSink { } } - self.rich_crate_version_data_common(origin, meta, crate_compressed_size as u32, latest.is_yanked(), warnings).await + let path_in_repo = match maybe_repo.as_ref() { + Some(r) => self.crate_db.path_in_repo(r, name).await?, + None => None, + }.unwrap_or_default(); + + self.rich_crate_version_data_common(origin, meta, crate_compressed_size as u32, latest.is_yanked(), path_in_repo, warnings).await } ///// Fixing and faking the data - async fn rich_crate_version_data_common(&self, origin: Origin, mut meta: CrateFile, crate_compressed_size: u32, is_yanked: bool, mut warnings: Warnings) -> CResult<(CrateVersionSourceData, Manifest, Warnings)> { + async fn rich_crate_version_data_common(&self, origin: Origin, mut meta: CrateFile, crate_compressed_size: u32, is_yanked: bool, path_in_repo: String, mut warnings: Warnings) -> CResult<(CrateVersionSourceData, Manifest, Warnings)> { Self::override_bad_categories(&mut meta.manifest); let mut github_keywords = None; @@ -946,7 +949,7 @@ impl KitchenSink { } // lib file takes majority of space in cache, so remove it if it won't be used - if !self.is_readme_short(meta.readme.as_ref()) { + if !self.is_readme_short(meta.readme.as_ref().map(|r| &r.1)) { meta.lib_file = None; } @@ -954,7 +957,7 @@ impl KitchenSink { let mut words = vec![package.name.as_str()]; let readme_txt; if let Some(ref r) = meta.readme { - readme_txt = render_readme::Renderer::new(None).visible_text(&r.markup); + readme_txt = render_readme::Renderer::new(None).visible_text(&r.1); words.push(&readme_txt); } if let Some(ref s) = package.description {words.push(s);} @@ -968,6 +971,26 @@ impl KitchenSink { let has_buildrs = meta.has("build.rs"); let has_code_of_conduct = meta.has("CODE_OF_CONDUCT.md") || meta.has("docs/CODE_OF_CONDUCT.md") || meta.has(".github/CODE_OF_CONDUCT.md"); + + let readme = meta.readme.map(|(readme_path, markup)| { + let (base_url, base_image_url) = match maybe_repo { + Some(repo) => { + // Not parsing github URL, because "aboslute" path should not be allowed to escape the repo path, + // but it needs to normalize ../readme paths + let url = url::Url::parse(&format!("http://localhost/{}", path_in_repo)).and_then(|u| u.join(&readme_path)); + let in_repo_url_path = url.as_ref().map_or("", |u| u.path().trim_start_matches('/')); + eprintln!("{} + {} = {:?} = {}", path_in_repo, readme_path, url, in_repo_url_path); + (Some(repo.readme_base_url(in_repo_url_path)), Some(repo.readme_base_image_url(in_repo_url_path))) + }, + None => (None, None), + }; + Readme { + markup, + base_url, + base_image_url, + } + }); + let src = CrateVersionSourceData { capitalized_name, language_stats: meta.language_stats, @@ -977,7 +1000,7 @@ impl KitchenSink { is_nightly: meta.is_nightly, has_buildrs, has_code_of_conduct, - readme: meta.readme, + readme, lib_file: meta.lib_file, github_description, github_keywords, @@ -1053,9 +1076,9 @@ impl KitchenSink { }) } - pub fn is_readme_short(&self, readme: Option<&Readme>) -> bool { + pub fn is_readme_short(&self, readme: Option<&Markup>) -> bool { if let Some(r) = readme { - match r.markup { + match r { Markup::Markdown(ref s) | Markup::Rst(ref s) | Markup::Html(ref s) => s.len() < 1000, } } else { @@ -1111,11 +1134,7 @@ impl KitchenSink { async fn add_readme_from_crates_io(&self, meta: &mut CrateFile, name: &str, ver: &str) { if let Ok(Some(html)) = self.crates_io.readme(name, ver).await { eprintln!("Found readme on crates.io {}@{}", name, ver); - meta.readme = Some(Readme { - markup: Markup::Html(String::from_utf8_lossy(&html).to_string()), - base_url: None, - base_image_url: None, - }); + meta.readme = Some((String::new(), Markup::Html(String::from_utf8_lossy(&html).to_string()))); } else { eprintln!("No readme on crates.io for {}@{}", name, ver); } diff --git a/kitchen_sink/src/tarball.rs b/kitchen_sink/src/tarball.rs index d20530e..c46d1da 100644 --- a/kitchen_sink/src/tarball.rs +++ b/kitchen_sink/src/tarball.rs @@ -2,8 +2,6 @@ use cargo_toml::Manifest; use cargo_toml::Package; use libflate::gzip::Decoder; use render_readme::Markup; -use render_readme::Readme; -use repo_url::Repo; use std::collections::HashSet; use std::io; use std::io::Read; @@ -180,7 +178,7 @@ impl Collector { Ok(CrateFile { decompressed_size: self.decompressed_size, - readme: self.markup.map(|(path, m)| readme_from_repo(m, manifest.package.as_ref().and_then(|r| r.repository.as_ref()), &path)), + readme: self.markup, manifest, files: self.files, lib_file: self.lib_file, @@ -232,7 +230,8 @@ pub struct CrateFile { pub manifest: Manifest, pub lib_file: Option<String>, pub files: Vec<PathBuf>, - pub readme: Option<Readme>, + // relative path and markdown + pub readme: Option<(String, Markup)>, pub language_stats: udedokei::Stats, pub decompressed_size: usize, pub is_nightly: bool, @@ -247,14 +246,6 @@ impl CrateFile { } } -fn readme_from_repo(markup: Markup, repo_url: Option<&String>, base_path: &str) -> Readme { - let repo = repo_url.and_then(|url| Repo::new(url).ok()); - let base_url = repo.as_ref().map(|r| r.readme_base_url(base_path)); - let base_image_url = repo.map(|r| r.readme_base_image_url(base_path)); - - Readme::new(markup, base_url, base_image_url) -} - /// Check if given filename is a README. If `package` is missing, guess. fn is_readme_filename(path: &Path, package: Option<&Package>) -> bool { path.to_str().map_or(false, |pathstr| { @@ -275,7 +266,7 @@ fn unpack_crate() { assert_eq!(d.manifest.package.as_ref().unwrap().version, "0.5.1"); assert!(d.lib_file.unwrap().contains("fn nothing")); assert_eq!(d.files.len(), 5); - assert!(match d.readme.unwrap().markup { + assert!(match d.readme.unwrap().1 { Markup::Rst(a) => a == "o hi\n", _ => false, }); @@ -288,6 +279,7 @@ fn unpack_crate() { #[test] fn unpack_repo() { + use repo_url::Repo; let test_repo_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("test.repo"); let repo = Repo::new("http://example.invalid/foo.git").unwrap(); let checkout = crate_git_checkout::checkout(&repo, &test_repo_path).unwrap(); @@ -300,7 +292,7 @@ fn unpack_repo() { assert_eq!(d.manifest.package.as_ref().unwrap().version, "0.5.1"); assert!(d.lib_file.unwrap().contains("fn nothing")); assert_eq!(d.files.len(), 5); - assert!(match d.readme.unwrap().markup { + assert!(match d.readme.unwrap().1 { Markup::Rst(a) => a == "o hi\n", _ => false, }); |