diff options
author | Dan Davison <dandavison7@gmail.com> | 2021-08-29 16:52:48 -0400 |
---|---|---|
committer | Dan Davison <dandavison7@gmail.com> | 2021-08-29 22:51:06 -0400 |
commit | 97203ce2338a54b62080116f17b9b928279ae8c8 (patch) | |
tree | 09bbc096f9fba24fe7b9045f3aa459fce20dd5ea /src/parse.rs | |
parent | 93aabce9490d566d184579a0f9a182fd3cbf1d48 (diff) |
Refactor: state machine handlers module
Diffstat (limited to 'src/parse.rs')
-rw-r--r-- | src/parse.rs | 536 |
1 files changed, 0 insertions, 536 deletions
diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index c6a0860f..00000000 --- a/src/parse.rs +++ /dev/null @@ -1,536 +0,0 @@ -use lazy_static::lazy_static; -use regex::Regex; -use std::borrow::Cow; -use std::path::Path; -use unicode_segmentation::UnicodeSegmentation; - -use crate::config::Config; -use crate::features; - -// https://git-scm.com/docs/git-config#Documentation/git-config.txt-diffmnemonicPrefix -const DIFF_PREFIXES: [&str; 6] = ["a/", "b/", "c/", "i/", "o/", "w/"]; - -#[allow(clippy::tabs_in_doc_comments)] -/// Given input like -/// "--- one.rs 2019-11-20 06:16:08.000000000 +0100" -/// Return "rs" -pub fn get_file_extension_from_marker_line(line: &str) -> Option<&str> { - line.split('\t') - .next() - .and_then(|column| column.split(' ').nth(1)) - .and_then(|file| file.split('.').last()) -} - -#[derive(Debug, PartialEq)] -pub enum FileEvent { - Change, - Copy, - Rename, - ModeChange(String), - NoEvent, -} - -pub fn parse_file_meta_line( - line: &str, - git_diff_name: bool, - relative_path_base: Option<&str>, -) -> (String, FileEvent) { - let (mut path_or_mode, file_event) = match line { - line if line.starts_with("--- ") || line.starts_with("+++ ") => { - let offset = 4; - let file = _parse_file_path(&line[offset..], git_diff_name); - (file, FileEvent::Change) - } - line if line.starts_with("rename from ") => { - (line[12..].to_string(), FileEvent::Rename) // "rename from ".len() - } - line if line.starts_with("rename to ") => { - (line[10..].to_string(), FileEvent::Rename) // "rename to ".len() - } - line if line.starts_with("copy from ") => { - (line[10..].to_string(), FileEvent::Copy) // "copy from ".len() - } - line if line.starts_with("copy to ") => { - (line[8..].to_string(), FileEvent::Copy) // "copy to ".len() - } - line if line.starts_with("old mode ") => { - ("".to_string(), FileEvent::ModeChange(line[9..].to_string())) // "old mode ".len() - } - line if line.starts_with("new mode ") => { - ("".to_string(), FileEvent::ModeChange(line[9..].to_string())) // "new mode ".len() - } - _ => ("".to_string(), FileEvent::NoEvent), - }; - - if let Some(base) = relative_path_base { - if let FileEvent::ModeChange(_) = file_event { - } else if let Some(relative_path) = pathdiff::diff_paths(&path_or_mode, base) { - if let Some(relative_path) = relative_path.to_str() { - path_or_mode = relative_path.to_owned(); - } - } - } - - (path_or_mode, file_event) -} - -/// Given input like "diff --git a/src/my file.rs b/src/my file.rs" -/// return Some("src/my file.rs") -pub fn get_repeated_file_path_from_diff_line(line: &str) -> Option<String> { - if let Some(line) = line.strip_prefix("diff --git ") { - let line: Vec<&str> = line.graphemes(true).collect(); - let midpoint = line.len() / 2; - if line[midpoint] == " " { - let first_path = _parse_file_path(&line[..midpoint].join(""), true); - let second_path = _parse_file_path(&line[midpoint + 1..].join(""), true); - if first_path == second_path { - return Some(first_path); - } - } - } - None -} - -fn _parse_file_path(s: &str, git_diff_name: bool) -> String { - // It appears that, if the file name contains a space, git appends a tab - // character in the diff metadata lines, e.g. - // $ git diff --no-index "a b" "c d" | cat -A - // diff·--git·a/a·b·b/c·d␊ - // index·d00491f..0cfbf08·100644␊ - // ---·a/a·b├──┤␊ - // +++·b/c·d├──┤␊ - match s.strip_suffix('\t').unwrap_or(s) { - path if path == "/dev/null" => "/dev/null", - path if git_diff_name && DIFF_PREFIXES.iter().any(|s| path.starts_with(s)) => &path[2..], - path if git_diff_name => path, - path => path.split('\t').next().unwrap_or(""), - } - .to_string() -} - -// A regex to capture the path, and the content from the pipe onwards, in lines -// like these: -// " src/delta.rs | 14 ++++++++++----" -// " src/config.rs | 2 ++" -lazy_static! { - static ref DIFF_STAT_LINE_REGEX: Regex = - Regex::new(r" ([^\| ][^\|]+[^\| ]) +(\| +[0-9]+ .+)").unwrap(); -} - -pub fn relativize_path_in_diff_stat_line( - line: &str, - cwd_relative_to_repo_root: &str, - diff_stat_align_width: usize, -) -> Option<String> { - if let Some(caps) = DIFF_STAT_LINE_REGEX.captures(line) { - let path_relative_to_repo_root = caps.get(1).unwrap().as_str(); - if let Some(relative_path) = - pathdiff::diff_paths(path_relative_to_repo_root, cwd_relative_to_repo_root) - { - if let Some(relative_path) = relative_path.to_str() { - let suffix = caps.get(2).unwrap().as_str(); - let pad_width = diff_stat_align_width.saturating_sub(relative_path.len()); - let padding = " ".repeat(pad_width); - return Some(format!(" {}{}{}", relative_path, padding, suffix)); - } - } - } - None -} - -pub fn get_file_extension_from_file_meta_line_file_path(path: &str) -> Option<&str> { - if path.is_empty() || path == "/dev/null" { - None - } else { - get_extension(path).map(|ex| ex.trim()) - } -} - -pub fn get_file_change_description_from_file_paths( - minus_file: &str, - plus_file: &str, - comparing: bool, - minus_file_event: &FileEvent, - plus_file_event: &FileEvent, - config: &Config, -) -> String { - if comparing { - format!("comparing: {} ⟶ {}", minus_file, plus_file) - } else { - let format_label = |label: &str| { - if !label.is_empty() { - format!("{} ", label) - } else { - "".to_string() - } - }; - let format_file = |file| { - if config.hyperlinks { - features::hyperlinks::format_osc8_file_hyperlink(file, None, file, config) - } else { - Cow::from(file) - } - }; - match (minus_file, plus_file, minus_file_event, plus_file_event) { - ( - minus_file, - plus_file, - FileEvent::ModeChange(old_mode), - FileEvent::ModeChange(new_mode), - ) if minus_file == plus_file => match (old_mode.as_str(), new_mode.as_str()) { - // 100755 for executable and 100644 for non-executable are the only file modes Git records. - // https://medium.com/@tahteche/how-git-treats-changes-in-file-permissions-f71874ca239d - ("100644", "100755") => format!("{}: mode +x", plus_file), - ("100755", "100644") => format!("{}: mode -x", plus_file), - _ => format!("{}: {} ⟶ {}", plus_file, old_mode, new_mode), - }, - (minus_file, plus_file, _, _) if minus_file == plus_file => format!( - "{}{}", - format_label(&config.file_modified_label), - format_file(minus_file) - ), - (minus_file, "/dev/null", _, _) => format!( - "{}{}", - format_label(&config.file_removed_label), - format_file(minus_file) - ), - ("/dev/null", plus_file, _, _) => format!( - "{}{}", - format_label(&config.file_added_label), - format_file(plus_file) - ), - // minus_file_event == plus_file_event, except in the ModeChange - // case above. - (minus_file, plus_file, file_event, _) => format!( - "{}{} ⟶ {}", - format_label(match file_event { - FileEvent::Rename => &config.file_renamed_label, - FileEvent::Copy => &config.file_copied_label, - _ => "", - }), - format_file(minus_file), - format_file(plus_file) - ), - } - } -} - -lazy_static! { - static ref HUNK_HEADER_REGEX: Regex = Regex::new(r"@+ ([^@]+)@+(.*\s?)").unwrap(); -} - -// Parse unified diff hunk header format. See -// https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html -// https://www.artima.com/weblogs/viewpost.jsp?thread=164293 -lazy_static! { - static ref HUNK_HEADER_FILE_COORDINATE_REGEX: Regex = Regex::new( - r"(?x) -[-+] -(\d+) # 1. Hunk start line number -(?: # Start optional hunk length section (non-capturing) - , # Literal comma - (\d+) # 2. Optional hunk length (defaults to 1) -)?" - ) - .unwrap(); -} - -/// Given input like -/// "@@ -74,15 +74,14 @@ pub fn delta(" -/// Return " pub fn delta(" and a vector of (line_number, hunk_length) tuples. -pub fn parse_hunk_header(line: &str) -> (String, Vec<(usize, usize)>) { - let caps = HUNK_HEADER_REGEX.captures(line).unwrap(); - let file_coordinates = &caps[1]; - let line_numbers_and_hunk_lengths = HUNK_HEADER_FILE_COORDINATE_REGEX - .captures_iter(file_coordinates) - .map(|caps| { - ( - caps[1].parse::<usize>().unwrap(), - caps.get(2) - .map(|m| m.as_str()) - // Per the specs linked above, if the hunk length is absent then it is 1. - .unwrap_or("1") - .parse::<usize>() - .unwrap(), - ) - }) - .collect(); - let code_fragment = &caps[2]; - (code_fragment.to_string(), line_numbers_and_hunk_lengths) -} - -/// Attempt to parse input as a file path and return extension as a &str. -fn get_extension(s: &str) -> Option<&str> { - let path = Path::new(s); - path.extension() - .and_then(|e| e.to_str()) - // E.g. 'Makefile' is the file name and also the extension - .or_else(|| path.file_name().and_then(|s| s.to_str())) -} - -lazy_static! { - static ref SUBMODULE_SHORT_LINE_REGEX: Regex = - Regex::new("^[-+]Subproject commit ([0-9a-f]{40})$").unwrap(); -} - -pub fn get_submodule_short_commit(line: &str) -> Option<&str> { - match SUBMODULE_SHORT_LINE_REGEX.captures(line) { - Some(caps) => Some(caps.get(1).unwrap().as_str()), - None => None, - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_get_repeated_file_path_from_diff_line() { - assert_eq!( - get_repeated_file_path_from_diff_line("diff --git a/src/main.rs b/src/main.rs"), - Some("src/main.rs".to_string()) - ); - assert_eq!( - get_repeated_file_path_from_diff_line("diff --git a/a b/a"), - Some("a".to_string()) - ); - assert_eq!( - get_repeated_file_path_from_diff_line("diff --git a/a b b/a b"), - Some("a b".to_string()) - ); - assert_eq!( - get_repeated_file_path_from_diff_line("diff --git a/a b/aa"), - None - ); - assert_eq!( - get_repeated_file_path_from_diff_line("diff --git a/.config/Code - Insiders/User/settings.json b/.config/Code - Insiders/User/settings.json"), - Some(".config/Code - Insiders/User/settings.json".to_string()) - ); - } - - #[test] - fn test_get_file_extension_from_marker_line() { - assert_eq!( - get_file_extension_from_marker_line( - "--- src/one.rs 2019-11-20 06:47:56.000000000 +0100" - ), - Some("rs") - ); - } - - #[test] - fn test_get_file_extension_from_file_meta_line() { - assert_eq!( - get_file_extension_from_file_meta_line_file_path("a/src/parse.rs"), - Some("rs") - ); - assert_eq!( - get_file_extension_from_file_meta_line_file_path("b/src/pa rse.rs"), - Some("rs") - ); - assert_eq!( - get_file_extension_from_file_meta_line_file_path("src/pa rse.rs"), - Some("rs") - ); - assert_eq!( - get_file_extension_from_file_meta_line_file_path("wat hello.rs"), - Some("rs") - ); - assert_eq!( - get_file_extension_from_file_meta_line_file_path("/dev/null"), - None - ); - assert_eq!( - get_file_extension_from_file_meta_line_file_path("Dockerfile"), - Some("Dockerfile") - ); - assert_eq!( - get_file_extension_from_file_meta_line_file_path("Makefile"), - Some("Makefile") - ); - assert_eq!( - get_file_extension_from_file_meta_line_file_path("a/src/Makefile"), - Some("Makefile") - ); - assert_eq!( - get_file_extension_from_file_meta_line_file_path("src/Makefile"), - Some("Makefile") - ); - } - - // We should only strip the prefixes if they are "a/" or "b/". This will be correct except for - // the case of a user with `diff.noprefix = true` who has directories named "a" or "b", which - // is an irresolvable ambiguity. Ideally one would only strip the prefixes if we have confirmed - // that we are looking at something like - // - // --- a/src/parse.rs - // +++ b/src/parse.rs - // - // as opposed to something like - // - // --- a/src/parse.rs - // +++ sibling_of_a/src/parse.rs - // - // but we don't attempt that currently. - #[test] - fn test_get_file_path_from_git_file_meta_line() { - assert_eq!( - parse_file_meta_line("--- /dev/null", true, None), - ("/dev/null".to_string(), FileEvent::Change) - ); - for prefix in &DIFF_PREFIXES { - assert_eq!( - parse_file_meta_line(&format!("--- {}src/delta.rs", prefix), true, None), - ("src/delta.rs".to_string(), FileEvent::Change) - ); - } - assert_eq!( - parse_file_meta_line("--- src/delta.rs", true, None), - ("src/delta.rs".to_string(), FileEvent::Change) - ); - assert_eq!( - parse_file_meta_line("+++ src/delta.rs", true, None), - ("src/delta.rs".to_string(), FileEvent::Change) - ); - } - - #[test] - fn test_get_file_path_from_git_file_meta_line_containing_spaces() { - assert_eq!( - parse_file_meta_line("+++ a/my src/delta.rs", true, None), - ("my src/delta.rs".to_string(), FileEvent::Change) - ); - assert_eq!( - parse_file_meta_line("+++ my src/delta.rs", true, None), - ("my src/delta.rs".to_string(), FileEvent::Change) - ); - assert_eq!( - parse_file_meta_line("+++ a/src/my delta.rs", true, None), - ("src/my delta.rs".to_string(), FileEvent::Change) - ); - assert_eq!( - parse_file_meta_line("+++ a/my src/my delta.rs", true, None), - ("my src/my delta.rs".to_string(), FileEvent::Change) - ); - assert_eq!( - parse_file_meta_line("+++ b/my src/my enough/my delta.rs", true, None), - ( - "my src/my enough/my delta.rs".to_string(), - FileEvent::Change - ) - ); - } - - #[test] - fn test_get_file_path_from_git_file_meta_line_rename() { - assert_eq!( - parse_file_meta_line("rename from nospace/file2.el", true, None), - ("nospace/file2.el".to_string(), FileEvent::Rename) - ); - } - - #[test] - fn test_get_file_path_from_git_file_meta_line_rename_containing_spaces() { - assert_eq!( - parse_file_meta_line("rename from with space/file1.el", true, None), - ("with space/file1.el".to_string(), FileEvent::Rename) - ); - } - - #[test] - fn test_parse_file_meta_line() { - assert_eq!( - parse_file_meta_line("--- src/delta.rs", false, None), - ("src/delta.rs".to_string(), FileEvent::Change) - ); - assert_eq!( - parse_file_meta_line("+++ src/delta.rs", false, None), - ("src/delta.rs".to_string(), FileEvent::Change) - ); - } - - #[test] - fn test_parse_hunk_header() { - let parsed = parse_hunk_header("@@ -74,15 +75,14 @@ pub fn delta(\n"); - let code_fragment = parsed.0; - let line_numbers_and_hunk_lengths = parsed.1; - assert_eq!(code_fragment, " pub fn delta(\n"); - assert_eq!(line_numbers_and_hunk_lengths[0], (74, 15),); - assert_eq!(line_numbers_and_hunk_lengths[1], (75, 14),); - } - - #[test] - fn test_parse_hunk_header_with_omitted_hunk_lengths() { - let parsed = parse_hunk_header("@@ -74 +75,2 @@ pub fn delta(\n"); - let code_fragment = parsed.0; - let line_numbers_and_hunk_lengths = parsed.1; - assert_eq!(code_fragment, " pub fn delta(\n"); - assert_eq!(line_numbers_and_hunk_lengths[0], (74, 1),); - assert_eq!(line_numbers_and_hunk_lengths[1], (75, 2),); - } - - #[test] - fn test_parse_hunk_header_added_file() { - let parsed = parse_hunk_header("@@ -1,22 +0,0 @@"); - let code_fragment = parsed.0; - let line_numbers_and_hunk_lengths = parsed.1; - assert_eq!(code_fragment, "",); - assert_eq!(line_numbers_and_hunk_lengths[0], (1, 22),); - assert_eq!(line_numbers_and_hunk_lengths[1], (0, 0),); - } - - #[test] - fn test_parse_hunk_header_deleted_file() { - let parsed = parse_hunk_header("@@ -0,0 +1,3 @@"); - let code_fragment = parsed.0; - let line_numbers_and_hunk_lengths = parsed.1; - assert_eq!(code_fragment, "",); - assert_eq!(line_numbers_and_hunk_lengths[0], (0, 0),); - assert_eq!(line_numbers_and_hunk_lengths[1], (1, 3),); - } - - #[test] - fn test_parse_hunk_header_merge() { - let parsed = parse_hunk_header("@@@ -293,11 -358,15 +358,16 @@@ dependencies ="); - let code_fragment = parsed.0; - let line_numbers_and_hunk_lengths = parsed.1; - assert_eq!(code_fragment, " dependencies ="); - assert_eq!(line_numbers_and_hunk_lengths[0], (293, 11),); - assert_eq!(line_numbers_and_hunk_lengths[1], (358, 15),); - assert_eq!(line_numbers_and_hunk_lengths[2], (358, 16),); - } - - #[test] - fn test_relative_path() { - for (path, cwd_relative_to_repo_root, expected) in &[ - ("file.rs", "", "file.rs"), - ("file.rs", "a/", "../file.rs"), - ("a/file.rs", "a/", "file.rs"), - ("a/b/file.rs", "a", "b/file.rs"), - ("c/d/file.rs", "a/b/", "../../c/d/file.rs"), - ] { - assert_eq!( - pathdiff::diff_paths(path, cwd_relative_to_repo_root), - Some(expected.into()) - ) - } - } - - #[test] - fn test_diff_stat_line_regex_1() { - let caps = DIFF_STAT_LINE_REGEX.captures(" src/delta.rs | 14 ++++++++++----"); - assert!(caps.is_some()); - let caps = caps.unwrap(); - assert_eq!(caps.get(1).unwrap().as_str(), "src/delta.rs"); - assert_eq!(caps.get(2).unwrap().as_str(), "| 14 ++++++++++----"); - } - - #[test] - fn test_diff_stat_line_regex_2() { - let caps = DIFF_STAT_LINE_REGEX.captures(" src/config.rs | 2 ++"); - assert!(caps.is_some()); - let caps = caps.unwrap(); - assert_eq!(caps.get(1).unwrap().as_str(), "src/config.rs"); - assert_eq!(caps.get(2).unwrap().as_str(), "| 2 ++"); - } -} |