summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKornel <kornel@geekhood.net>2020-03-21 23:15:56 +0000
committerKornel <kornel@geekhood.net>2020-03-21 23:30:03 +0000
commit097f7ea595a150710adeea0e606112c07b9ceb3e (patch)
tree1f637e338406e1156df706efda0df5c6c6bfbc92
parentd77831e5ac7f03d6470c8146c6e7f7fa5f48d8aa (diff)
2-stage readme path resolution
-rw-r--r--crate_git_checkout/Cargo.toml2
-rw-r--r--crate_git_checkout/src/crate_git_checkout.rs14
-rw-r--r--front_end/src/crate_page.rs2
-rw-r--r--kitchen_sink/Cargo.toml2
-rw-r--r--kitchen_sink/src/lib_kitchen_sink.rs53
-rw-r--r--kitchen_sink/src/tarball.rs20
6 files changed, 48 insertions, 45 deletions
diff --git a/crate_git_checkout/Cargo.toml b/crate_git_checkout/Cargo.toml
index 4e285b3..c90aeca 100644
--- a/crate_git_checkout/Cargo.toml
+++ b/crate_git_checkout/Cargo.toml
@@ -2,7 +2,7 @@
edition = "2018"
description = "Analyze git repository containing Cargo crates"
name = "crate_git_checkout"
-version = "0.4.4"
+version = "0.4.5"
authors = ["Kornel <kornel@geekhood.net>"]
[lib]
diff --git a/crate_git_checkout/src/crate_git_checkout.rs b/crate_git_checkout/src/crate_git_checkout.rs
index 68af2b2..1a97f46 100644
--- a/crate_git_checkout/src/crate_git_checkout.rs
+++ b/crate_git_checkout/src/crate_git_checkout.rs
@@ -9,7 +9,7 @@ pub use git2::Repository;
use git2::{Blob, ObjectType, Reference, Tree};
use lazy_static::lazy_static;
use render_readme;
-use render_readme::{Markup, Readme};
+use render_readme::Markup;
use repo_url::Repo;
use std::collections::hash_map::Entry::Vacant;
use std::collections::{HashMap, HashSet};
@@ -310,7 +310,7 @@ pub fn find_dependency_changes(repo: &Repository, mut cb: impl FnMut(HashSet<Str
}
// FIXME: buggy, barely works
-pub fn find_readme(repo: &Repository, package: &Package) -> Result<Option<Readme>, failure::Error> {
+pub fn find_readme(repo: &Repository, package: &Package) -> Result<Option<(String, Markup)>, failure::Error> {
let head = repo.head()?;
let tree = head.peel_to_tree()?;
let mut readme = None;
@@ -344,7 +344,7 @@ pub fn find_readme(repo: &Repository, package: &Package) -> Result<Option<Readme
} else {
Markup::Markdown(text)
};
- readme = Some(readme_from_repo(markup, &package.repository, base));
+ readme = Some((base.to_owned(), markup));
found_best = is_correct_dir;
}
Ok(())
@@ -352,14 +352,6 @@ pub fn find_readme(repo: &Repository, package: &Package) -> Result<Option<Readme
Ok(readme)
}
-fn readme_from_repo(markup: Markup, repo_url: &Option<String>, base_dir_in_repo: &str) -> Readme {
- let repo = repo_url.as_ref().and_then(|url| Repo::new(url).ok());
- let base_url = repo.as_ref().map(|r| r.readme_base_url(base_dir_in_repo));
- let base_image_url = repo.map(|r| r.readme_base_image_url(base_dir_in_repo));
-
- Readme::new(markup, base_url, base_image_url)
-}
-
/// Check if given filename is a README. If `package` is missing, guess.
fn is_readme_filename(path: &Path, package: Option<&Package>) -> bool {
path.to_str().map_or(false, |s| {
diff --git a/front_end/src/crate_page.rs b/front_end/src/crate_page.rs
index 57286a8..6a5a44e 100644
--- a/front_end/src/crate_page.rs
+++ b/front_end/src/crate_page.rs
@@ -250,7 +250,7 @@ impl<'a> CratePage<'a> {
}
pub fn is_readme_short(&self) -> bool {
- self.kitchen_sink.is_readme_short(self.ver.readme())
+ self.kitchen_sink.is_readme_short(self.ver.readme().as_ref().map(|r| &r.markup))
}
pub fn has_no_readme_or_lib(&self) -> bool {
diff --git a/kitchen_sink/Cargo.toml b/kitchen_sink/Cargo.toml
index ea9c79b..0d46977 100644
--- a/kitchen_sink/Cargo.toml
+++ b/kitchen_sink/Cargo.toml
@@ -15,7 +15,7 @@ deps_index = { path = "../deps_index" }
feat_extractor = { path = "../feat_extractor" }
docs_rs_client = { path = "../docs_rs_client", version = "0.4.0" }
github_info = { path = "../github_info", version = "0.9" }
-crate_git_checkout = { path = "../crate_git_checkout", version = "0.4.3" }
+crate_git_checkout = { path = "../crate_git_checkout", version = "0.4.5" }
user_db = { path = "../user_db", version = "0.3" }
crate_db = { path = "../crate_db", version = "0.4.0" }
categories = { path = "../categories" }
diff --git a/kitchen_sink/src/lib_kitchen_sink.rs b/kitchen_sink/src/lib_kitchen_sink.rs
index 20b7115..d10c16d 100644
--- a/kitchen_sink/src/lib_kitchen_sink.rs
+++ b/kitchen_sink/src/lib_kitchen_sink.rs
@@ -790,11 +790,7 @@ impl KitchenSink {
warnings.extend(self.add_readme_from_repo(&mut meta, maybe_repo.as_ref()));
}
- if let Some(readme) = meta.readme.as_mut() {
- readme.base_url = Some(repo.readme_base_url(&path_in_repo));
- readme.base_image_url = Some(repo.readme_base_image_url(&path_in_repo));
- }
- Ok::<_, CError>(self.rich_crate_version_data_common(origin.clone(), meta, 0, false, warnings))
+ Ok::<_, CError>(self.rich_crate_version_data_common(origin.clone(), meta, 0, false, path_in_repo, warnings))
})?.await
}
@@ -841,6 +837,8 @@ impl KitchenSink {
}
}
+ eprintln!("R1 {:?}", meta.readme);
+
let maybe_repo = package.repository.as_ref().and_then(|r| Repo::new(r).ok());
let has_readme_file = meta.readme.is_some();
if !has_readme_file {
@@ -872,11 +870,16 @@ impl KitchenSink {
}
}
- self.rich_crate_version_data_common(origin, meta, crate_compressed_size as u32, latest.is_yanked(), warnings).await
+ let path_in_repo = match maybe_repo.as_ref() {
+ Some(r) => self.crate_db.path_in_repo(r, name).await?,
+ None => None,
+ }.unwrap_or_default();
+
+ self.rich_crate_version_data_common(origin, meta, crate_compressed_size as u32, latest.is_yanked(), path_in_repo, warnings).await
}
///// Fixing and faking the data
- async fn rich_crate_version_data_common(&self, origin: Origin, mut meta: CrateFile, crate_compressed_size: u32, is_yanked: bool, mut warnings: Warnings) -> CResult<(CrateVersionSourceData, Manifest, Warnings)> {
+ async fn rich_crate_version_data_common(&self, origin: Origin, mut meta: CrateFile, crate_compressed_size: u32, is_yanked: bool, path_in_repo: String, mut warnings: Warnings) -> CResult<(CrateVersionSourceData, Manifest, Warnings)> {
Self::override_bad_categories(&mut meta.manifest);
let mut github_keywords = None;
@@ -946,7 +949,7 @@ impl KitchenSink {
}
// lib file takes majority of space in cache, so remove it if it won't be used
- if !self.is_readme_short(meta.readme.as_ref()) {
+ if !self.is_readme_short(meta.readme.as_ref().map(|r| &r.1)) {
meta.lib_file = None;
}
@@ -954,7 +957,7 @@ impl KitchenSink {
let mut words = vec![package.name.as_str()];
let readme_txt;
if let Some(ref r) = meta.readme {
- readme_txt = render_readme::Renderer::new(None).visible_text(&r.markup);
+ readme_txt = render_readme::Renderer::new(None).visible_text(&r.1);
words.push(&readme_txt);
}
if let Some(ref s) = package.description {words.push(s);}
@@ -968,6 +971,26 @@ impl KitchenSink {
let has_buildrs = meta.has("build.rs");
let has_code_of_conduct = meta.has("CODE_OF_CONDUCT.md") || meta.has("docs/CODE_OF_CONDUCT.md") || meta.has(".github/CODE_OF_CONDUCT.md");
+
+ let readme = meta.readme.map(|(readme_path, markup)| {
+ let (base_url, base_image_url) = match maybe_repo {
+ Some(repo) => {
+ // Not parsing github URL, because "aboslute" path should not be allowed to escape the repo path,
+ // but it needs to normalize ../readme paths
+ let url = url::Url::parse(&format!("http://localhost/{}", path_in_repo)).and_then(|u| u.join(&readme_path));
+ let in_repo_url_path = url.as_ref().map_or("", |u| u.path().trim_start_matches('/'));
+ eprintln!("{} + {} = {:?} = {}", path_in_repo, readme_path, url, in_repo_url_path);
+ (Some(repo.readme_base_url(in_repo_url_path)), Some(repo.readme_base_image_url(in_repo_url_path)))
+ },
+ None => (None, None),
+ };
+ Readme {
+ markup,
+ base_url,
+ base_image_url,
+ }
+ });
+
let src = CrateVersionSourceData {
capitalized_name,
language_stats: meta.language_stats,
@@ -977,7 +1000,7 @@ impl KitchenSink {
is_nightly: meta.is_nightly,
has_buildrs,
has_code_of_conduct,
- readme: meta.readme,
+ readme,
lib_file: meta.lib_file,
github_description,
github_keywords,
@@ -1053,9 +1076,9 @@ impl KitchenSink {
})
}
- pub fn is_readme_short(&self, readme: Option<&Readme>) -> bool {
+ pub fn is_readme_short(&self, readme: Option<&Markup>) -> bool {
if let Some(r) = readme {
- match r.markup {
+ match r {
Markup::Markdown(ref s) | Markup::Rst(ref s) | Markup::Html(ref s) => s.len() < 1000,
}
} else {
@@ -1111,11 +1134,7 @@ impl KitchenSink {
async fn add_readme_from_crates_io(&self, meta: &mut CrateFile, name: &str, ver: &str) {
if let Ok(Some(html)) = self.crates_io.readme(name, ver).await {
eprintln!("Found readme on crates.io {}@{}", name, ver);
- meta.readme = Some(Readme {
- markup: Markup::Html(String::from_utf8_lossy(&html).to_string()),
- base_url: None,
- base_image_url: None,
- });
+ meta.readme = Some((String::new(), Markup::Html(String::from_utf8_lossy(&html).to_string())));
} else {
eprintln!("No readme on crates.io for {}@{}", name, ver);
}
diff --git a/kitchen_sink/src/tarball.rs b/kitchen_sink/src/tarball.rs
index d20530e..c46d1da 100644
--- a/kitchen_sink/src/tarball.rs
+++ b/kitchen_sink/src/tarball.rs
@@ -2,8 +2,6 @@ use cargo_toml::Manifest;
use cargo_toml::Package;
use libflate::gzip::Decoder;
use render_readme::Markup;
-use render_readme::Readme;
-use repo_url::Repo;
use std::collections::HashSet;
use std::io;
use std::io::Read;
@@ -180,7 +178,7 @@ impl Collector {
Ok(CrateFile {
decompressed_size: self.decompressed_size,
- readme: self.markup.map(|(path, m)| readme_from_repo(m, manifest.package.as_ref().and_then(|r| r.repository.as_ref()), &path)),
+ readme: self.markup,
manifest,
files: self.files,
lib_file: self.lib_file,
@@ -232,7 +230,8 @@ pub struct CrateFile {
pub manifest: Manifest,
pub lib_file: Option<String>,
pub files: Vec<PathBuf>,
- pub readme: Option<Readme>,
+ // relative path and markdown
+ pub readme: Option<(String, Markup)>,
pub language_stats: udedokei::Stats,
pub decompressed_size: usize,
pub is_nightly: bool,
@@ -247,14 +246,6 @@ impl CrateFile {
}
}
-fn readme_from_repo(markup: Markup, repo_url: Option<&String>, base_path: &str) -> Readme {
- let repo = repo_url.and_then(|url| Repo::new(url).ok());
- let base_url = repo.as_ref().map(|r| r.readme_base_url(base_path));
- let base_image_url = repo.map(|r| r.readme_base_image_url(base_path));
-
- Readme::new(markup, base_url, base_image_url)
-}
-
/// Check if given filename is a README. If `package` is missing, guess.
fn is_readme_filename(path: &Path, package: Option<&Package>) -> bool {
path.to_str().map_or(false, |pathstr| {
@@ -275,7 +266,7 @@ fn unpack_crate() {
assert_eq!(d.manifest.package.as_ref().unwrap().version, "0.5.1");
assert!(d.lib_file.unwrap().contains("fn nothing"));
assert_eq!(d.files.len(), 5);
- assert!(match d.readme.unwrap().markup {
+ assert!(match d.readme.unwrap().1 {
Markup::Rst(a) => a == "o hi\n",
_ => false,
});
@@ -288,6 +279,7 @@ fn unpack_crate() {
#[test]
fn unpack_repo() {
+ use repo_url::Repo;
let test_repo_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("test.repo");
let repo = Repo::new("http://example.invalid/foo.git").unwrap();
let checkout = crate_git_checkout::checkout(&repo, &test_repo_path).unwrap();
@@ -300,7 +292,7 @@ fn unpack_repo() {
assert_eq!(d.manifest.package.as_ref().unwrap().version, "0.5.1");
assert!(d.lib_file.unwrap().contains("fn nothing"));
assert_eq!(d.files.len(), 5);
- assert!(match d.readme.unwrap().markup {
+ assert!(match d.readme.unwrap().1 {
Markup::Rst(a) => a == "o hi\n",
_ => false,
});