diff options
author | Dan Davison <dandavison7@gmail.com> | 2021-12-16 07:13:10 -0500 |
---|---|---|
committer | Dan Davison <dandavison7@gmail.com> | 2021-12-16 07:13:10 -0500 |
commit | 3c26eeb00a5b97488c511e65605ccf82740b151a (patch) | |
tree | 0d02cb57d1b6ddbaf62d75d23e0e67906f997955 | |
parent | ab54c116ecf1947f3e6b8bda400e2c896d982232 (diff) |
Add example diff reproducing #677677-binary-data
-rw-r--r-- | etc/examples/677-diff-with-binary-data.diff | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/etc/examples/677-diff-with-binary-data.diff b/etc/examples/677-diff-with-binary-data.diff new file mode 100644 index 00000000..2d10bb91 --- /dev/null +++ b/etc/examples/677-diff-with-binary-data.diff @@ -0,0 +1,236 @@ +diff --git a/src/binary.rs b/src/binary.rs +new file mode 100644 +index 0000000..e6627bb +--- /dev/null ++++ b/src/binary.rs +@@ -0,0 +1,230 @@ ++use std::borrow::Cow; ++use std::collections::HashMap; ++use std::io::BufRead; ++use std::io::Write; ++ ++use bytelines::ByteLines; ++ ++use crate::ansi; ++use crate::config::delta_unreachable; ++use crate::config::Config; ++use crate::features; ++use crate::handlers::hunk_header::ParsedHunkHeader; ++use crate::handlers::{self, merge_conflict}; ++use crate::paint::Painter; ++use crate::style::DecorationStyle; ++ ++#[derive(Clone, Debug, PartialEq)] ++pub enum State { ++ CommitMeta, // In commit metadata section ++ DiffHeader(DiffType), // In diff metadata section, between (possible) commit metadata and first hunk ++ HunkHeader(DiffType, ParsedHunkHeader, String, String), // In hunk metadata line (diff_type, parsed, line, raw_line) ++ HunkZero(DiffType, Option<String>), // In hunk; unchanged line (prefix, raw_line) ++ HunkMinus(DiffType, Option<String>), // In hunk; removed line (diff_type, raw_line) ++ HunkPlus(DiffType, Option<String>), // In hunk; added line (diff_type, raw_line) ++ MergeConflict(MergeParents, merge_conflict::MergeConflictCommit), ++ SubmoduleLog, // In a submodule section, with gitconfig diff.submodule = log ++ SubmoduleShort(String), // In a submodule section, with gitconfig diff.submodule = short ++ Blame(String, Option<String>), // In a line of `git blame` output (commit, repeat_blame_line). ++ GitShowFile, // In a line of `git show $revision:./path/to/file.ext` output ++ Grep, // In a line of `git grep` output ++ Unknown, ++ // The following elements are created when a line is wrapped to display it: ++ HunkZeroWrapped, // Wrapped unchanged line ++ HunkMinusWrapped, // Wrapped removed line ++ HunkPlusWrapped, // Wrapped added line ++} ++ ++#[derive(Clone, Debug, PartialEq)] ++pub enum DiffType { ++ Unified, ++ // https://git-scm.com/docs/git-diff#_combined_diff_format ++ Combined(MergeParents, InMergeConflict), ++} ++ ++#[derive(Clone, Debug, PartialEq)] ++pub enum MergeParents { ++ Number(usize), // Number of parent commits == (number of @s in hunk header) - 1 ++ Prefix(String), // Hunk line prefix, length == number of parent commits ++ Unknown, ++} ++ ++#[derive(Clone, Debug, PartialEq)] ++pub enum InMergeConflict { ++ Yes, ++ No, ++} ++ ++impl DiffType { ++ pub fn n_parents(&self) -> usize { ++ use DiffType::*; ++ use MergeParents::*; ++ match self { ++ Combined(Prefix(prefix), _) => prefix.len(), ++ Combined(Number(n_parents), _) => *n_parents, ++ Unified => 1, ++ Combined(Unknown, _) => delta_unreachable("Number of merge parents must be known."), ++ } ++ } ++} ++ ++#[derive(Debug, PartialEq)] ++pub enum Source { ++ GitDiff, // Coming from a `git diff` command ++ DiffUnified, // Coming from a `diff -u` command ++ Unknown, ++} ++ ++// Possible transitions, with actions on entry: ++// ++// ++// | from \ to | CommitMeta | DiffHeader | HunkHeader | HunkZero | HunkMinus | HunkPlus | ++// |-------------+-------------+-------------+-------------+-------------+-------------+----------| ++// | CommitMeta | emit | emit | | | | | ++// | DiffHeader | | emit | emit | | | | ++// | HunkHeader | | | | emit | push | push | ++// | HunkZero | emit | emit | emit | emit | push | push | ++// | HunkMinus | flush, emit | flush, emit | flush, emit | flush, emit | push | push | ++// | HunkPlus | flush, emit | flush, emit | flush, emit | flush, emit | flush, push | push | ++ ++pub struct StateMachine<'a> { ++ pub line: String, ++ pub raw_line: String, ++ pub state: State, ++ pub source: Source, ++ pub minus_file: String, ++ pub plus_file: String, ++ pub minus_file_event: handlers::diff_header::FileEvent, ++ pub plus_file_event: handlers::diff_header::FileEvent, ++ pub diff_line: String, ++ pub painter: Painter<'a>, ++ pub config: &'a Config, ++ ++ // When a file is modified, we use lines starting with '---' or '+++' to obtain the file name. ++ // When a file is renamed without changes, we use lines starting with 'rename' to obtain the ++ // file name (there is no diff hunk and hence no lines starting with '---' or '+++'). But when ++ // a file is renamed with changes, both are present, and we rely on the following variables to ++ // avoid emitting the file meta header line twice (#245). ++ pub current_file_pair: Option<(String, String)>, ++ pub handled_diff_header_header_line_file_pair: Option<(String, String)>, ++ pub blame_commit_colors: HashMap<String, String>, ++} ++ ++pub fn delta<I>(lines: ByteLines<I>, writer: &mut dyn Write, config: &Config) -> std::io::Result<()> ++where ++ I: BufRead, ++{ ++ StateMachine::new(writer, config).consume(lines) ++} ++ ++impl<'a> StateMachine<'a> { ++ pub fn new(writer: &'a mut dyn Write, config: &'a Config) -> Self { ++ Self { ++ line: "".to_string(), ++ raw_line: "".to_string(), ++ state: State::Unknown, ++ source: Source::Unknown, ++ minus_file: "".to_string(), ++ plus_file: "".to_string(), ++ minus_file_event: handlers::diff_header::FileEvent::NoEvent, ++ plus_file_event: handlers::diff_header::FileEvent::NoEvent, ++ diff_line: "".to_string(), ++ current_file_pair: None, ++ handled_diff_header_header_line_file_pair: None, ++ painter: Painter::new(writer, config), ++ config, ++ blame_commit_colors: HashMap::new(), ++ } ++ } ++ ++ fn consume<I>(&mut self, mut lines: ByteLines<I>) -> std::io::Result<()> ++ where ++ I: BufRead, ++ { ++ while let Some(Ok(raw_line_bytes)) = lines.next() { ++ self.ingest_line(raw_line_bytes); ++ ++ if self.source == Source::Unknown { ++ self.source = detect_source(&self.line); ++ } ++ ++ // Every method named handle_* must return std::io::Result<bool>. ++ // The bool indicates whether the line has been handled by that ++ // method (in which case no subsequent handlers are permitted to ++ // handle it). ++ let _ = self.handle_commit_meta_header_line()? ++ || self.handle_diff_stat_line()? ++ || self.handle_diff_header_diff_line()? ++ || self.handle_diff_header_minus_line()? ++ || self.handle_diff_header_plus_line()? ++ || self.handle_hunk_header_line()? ++ || self.handle_diff_header_misc_line()? ++ || self.handle_submodule_log_line()? ++ || self.handle_submodule_short_line()? ++ || self.handle_merge_conflict_line()? ++ || self.handle_hunk_line()? ++ || self.handle_git_show_file_line()? ++ || self.handle_blame_line()? ++ || self.handle_grep_line()? ++ || self.should_skip_line() ++ || self.emit_line_unchanged()?; ++ } ++ ++ self.painter.paint_buffered_minus_and_plus_lines(); ++ self.painter.emit()?; ++ Ok(()) ++ } ++ ++ fn ingest_line(&mut self, raw_line_bytes: &[u8]) { ++ // TODO: retain raw_line as Cow ++ self.raw_line = String::from_utf8_lossy(raw_line_bytes).to_string(); ++ // When a file has \r\n line endings, git sometimes adds ANSI escape sequences between the ++ // \r and \n, in which case byte_lines does not remove the \r. Remove it now. ++ if let Some(cr_index) = self.raw_line.rfind('\r') { ++ if ansi::strip_ansi_codes(&self.raw_line[cr_index + 1..]).is_empty() { ++ self.raw_line = format!( ++ "{}{}", ++ &self.raw_line[..cr_index], ++ &self.raw_line[cr_index + 1..] ++ ); ++ } ++ } ++ if self.config.max_line_length > 0 ++ && self.raw_line.len() > self.config.max_line_length ++ // Do not truncate long hunk headers ++ && !self.raw_line.starts_with("@@") ++ // Do not truncate ripgrep --json output ++ && !self.raw_line.starts_with('{') ++ { ++ self.raw_line = ansi::truncate_str( ++ &self.raw_line, ++ ++HP]鶴 ++ ++ ++IL9t'Kt&HtK<& ++fu ++fHtfHH!ƀ<2 ++fu( ++fHtfHH!ƀ<2 ++fu+ ++fHtfHH!ƀ<2 ++It$/Ht ++It$(Ht ++It$ Ht ++It$Ht ++It$Ht ++I4$I|$H| ++Ht ++H [A\A^A_]H}LH+ ++fu$ ++fHtfHH!ƀ<2 ++ ++ ++ ++ ++ ++ ++ ++u H5dD ++D |