summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Davison <dandavison7@gmail.com>2021-12-16 07:13:10 -0500
committerDan Davison <dandavison7@gmail.com>2021-12-16 07:13:10 -0500
commit3c26eeb00a5b97488c511e65605ccf82740b151a (patch)
tree0d02cb57d1b6ddbaf62d75d23e0e67906f997955
parentab54c116ecf1947f3e6b8bda400e2c896d982232 (diff)
Add example diff reproducing #677677-binary-data
-rw-r--r--etc/examples/677-diff-with-binary-data.diff236
1 files changed, 236 insertions, 0 deletions
diff --git a/etc/examples/677-diff-with-binary-data.diff b/etc/examples/677-diff-with-binary-data.diff
new file mode 100644
index 00000000..2d10bb91
--- /dev/null
+++ b/etc/examples/677-diff-with-binary-data.diff
@@ -0,0 +1,236 @@
+diff --git a/src/binary.rs b/src/binary.rs
+new file mode 100644
+index 0000000..e6627bb
+--- /dev/null
++++ b/src/binary.rs
+@@ -0,0 +1,230 @@
++use std::borrow::Cow;
++use std::collections::HashMap;
++use std::io::BufRead;
++use std::io::Write;
++
++use bytelines::ByteLines;
++
++use crate::ansi;
++use crate::config::delta_unreachable;
++use crate::config::Config;
++use crate::features;
++use crate::handlers::hunk_header::ParsedHunkHeader;
++use crate::handlers::{self, merge_conflict};
++use crate::paint::Painter;
++use crate::style::DecorationStyle;
++
++#[derive(Clone, Debug, PartialEq)]
++pub enum State {
++ CommitMeta, // In commit metadata section
++ DiffHeader(DiffType), // In diff metadata section, between (possible) commit metadata and first hunk
++ HunkHeader(DiffType, ParsedHunkHeader, String, String), // In hunk metadata line (diff_type, parsed, line, raw_line)
++ HunkZero(DiffType, Option<String>), // In hunk; unchanged line (prefix, raw_line)
++ HunkMinus(DiffType, Option<String>), // In hunk; removed line (diff_type, raw_line)
++ HunkPlus(DiffType, Option<String>), // In hunk; added line (diff_type, raw_line)
++ MergeConflict(MergeParents, merge_conflict::MergeConflictCommit),
++ SubmoduleLog, // In a submodule section, with gitconfig diff.submodule = log
++ SubmoduleShort(String), // In a submodule section, with gitconfig diff.submodule = short
++ Blame(String, Option<String>), // In a line of `git blame` output (commit, repeat_blame_line).
++ GitShowFile, // In a line of `git show $revision:./path/to/file.ext` output
++ Grep, // In a line of `git grep` output
++ Unknown,
++ // The following elements are created when a line is wrapped to display it:
++ HunkZeroWrapped, // Wrapped unchanged line
++ HunkMinusWrapped, // Wrapped removed line
++ HunkPlusWrapped, // Wrapped added line
++}
++
++#[derive(Clone, Debug, PartialEq)]
++pub enum DiffType {
++ Unified,
++ // https://git-scm.com/docs/git-diff#_combined_diff_format
++ Combined(MergeParents, InMergeConflict),
++}
++
++#[derive(Clone, Debug, PartialEq)]
++pub enum MergeParents {
++ Number(usize), // Number of parent commits == (number of @s in hunk header) - 1
++ Prefix(String), // Hunk line prefix, length == number of parent commits
++ Unknown,
++}
++
++#[derive(Clone, Debug, PartialEq)]
++pub enum InMergeConflict {
++ Yes,
++ No,
++}
++
++impl DiffType {
++ pub fn n_parents(&self) -> usize {
++ use DiffType::*;
++ use MergeParents::*;
++ match self {
++ Combined(Prefix(prefix), _) => prefix.len(),
++ Combined(Number(n_parents), _) => *n_parents,
++ Unified => 1,
++ Combined(Unknown, _) => delta_unreachable("Number of merge parents must be known."),
++ }
++ }
++}
++
++#[derive(Debug, PartialEq)]
++pub enum Source {
++ GitDiff, // Coming from a `git diff` command
++ DiffUnified, // Coming from a `diff -u` command
++ Unknown,
++}
++
++// Possible transitions, with actions on entry:
++//
++//
++// | from \ to | CommitMeta | DiffHeader | HunkHeader | HunkZero | HunkMinus | HunkPlus |
++// |-------------+-------------+-------------+-------------+-------------+-------------+----------|
++// | CommitMeta | emit | emit | | | | |
++// | DiffHeader | | emit | emit | | | |
++// | HunkHeader | | | | emit | push | push |
++// | HunkZero | emit | emit | emit | emit | push | push |
++// | HunkMinus | flush, emit | flush, emit | flush, emit | flush, emit | push | push |
++// | HunkPlus | flush, emit | flush, emit | flush, emit | flush, emit | flush, push | push |
++
++pub struct StateMachine<'a> {
++ pub line: String,
++ pub raw_line: String,
++ pub state: State,
++ pub source: Source,
++ pub minus_file: String,
++ pub plus_file: String,
++ pub minus_file_event: handlers::diff_header::FileEvent,
++ pub plus_file_event: handlers::diff_header::FileEvent,
++ pub diff_line: String,
++ pub painter: Painter<'a>,
++ pub config: &'a Config,
++
++ // When a file is modified, we use lines starting with '---' or '+++' to obtain the file name.
++ // When a file is renamed without changes, we use lines starting with 'rename' to obtain the
++ // file name (there is no diff hunk and hence no lines starting with '---' or '+++'). But when
++ // a file is renamed with changes, both are present, and we rely on the following variables to
++ // avoid emitting the file meta header line twice (#245).
++ pub current_file_pair: Option<(String, String)>,
++ pub handled_diff_header_header_line_file_pair: Option<(String, String)>,
++ pub blame_commit_colors: HashMap<String, String>,
++}
++
++pub fn delta<I>(lines: ByteLines<I>, writer: &mut dyn Write, config: &Config) -> std::io::Result<()>
++where
++ I: BufRead,
++{
++ StateMachine::new(writer, config).consume(lines)
++}
++
++impl<'a> StateMachine<'a> {
++ pub fn new(writer: &'a mut dyn Write, config: &'a Config) -> Self {
++ Self {
++ line: "".to_string(),
++ raw_line: "".to_string(),
++ state: State::Unknown,
++ source: Source::Unknown,
++ minus_file: "".to_string(),
++ plus_file: "".to_string(),
++ minus_file_event: handlers::diff_header::FileEvent::NoEvent,
++ plus_file_event: handlers::diff_header::FileEvent::NoEvent,
++ diff_line: "".to_string(),
++ current_file_pair: None,
++ handled_diff_header_header_line_file_pair: None,
++ painter: Painter::new(writer, config),
++ config,
++ blame_commit_colors: HashMap::new(),
++ }
++ }
++
++ fn consume<I>(&mut self, mut lines: ByteLines<I>) -> std::io::Result<()>
++ where
++ I: BufRead,
++ {
++ while let Some(Ok(raw_line_bytes)) = lines.next() {
++ self.ingest_line(raw_line_bytes);
++
++ if self.source == Source::Unknown {
++ self.source = detect_source(&self.line);
++ }
++
++ // Every method named handle_* must return std::io::Result<bool>.
++ // The bool indicates whether the line has been handled by that
++ // method (in which case no subsequent handlers are permitted to
++ // handle it).
++ let _ = self.handle_commit_meta_header_line()?
++ || self.handle_diff_stat_line()?
++ || self.handle_diff_header_diff_line()?
++ || self.handle_diff_header_minus_line()?
++ || self.handle_diff_header_plus_line()?
++ || self.handle_hunk_header_line()?
++ || self.handle_diff_header_misc_line()?
++ || self.handle_submodule_log_line()?
++ || self.handle_submodule_short_line()?
++ || self.handle_merge_conflict_line()?
++ || self.handle_hunk_line()?
++ || self.handle_git_show_file_line()?
++ || self.handle_blame_line()?
++ || self.handle_grep_line()?
++ || self.should_skip_line()
++ || self.emit_line_unchanged()?;
++ }
++
++ self.painter.paint_buffered_minus_and_plus_lines();
++ self.painter.emit()?;
++ Ok(())
++ }
++
++ fn ingest_line(&mut self, raw_line_bytes: &[u8]) {
++ // TODO: retain raw_line as Cow
++ self.raw_line = String::from_utf8_lossy(raw_line_bytes).to_string();
++ // When a file has \r\n line endings, git sometimes adds ANSI escape sequences between the
++ // \r and \n, in which case byte_lines does not remove the \r. Remove it now.
++ if let Some(cr_index) = self.raw_line.rfind('\r') {
++ if ansi::strip_ansi_codes(&self.raw_line[cr_index + 1..]).is_empty() {
++ self.raw_line = format!(
++ "{}{}",
++ &self.raw_line[..cr_index],
++ &self.raw_line[cr_index + 1..]
++ );
++ }
++ }
++ if self.config.max_line_length > 0
++ && self.raw_line.len() > self.config.max_line_length
++ // Do not truncate long hunk headers
++ && !self.raw_line.starts_with("@@")
++ // Do not truncate ripgrep --json output
++ && !self.raw_line.starts_with('{')
++ {
++ self.raw_line = ansi::truncate_str(
++ &self.raw_line,
++
++HP]鶴
++
++
++IL9t'Kt&HtK<&
++fu 
++fHtfHH!ƀ<2
++fu(
++fHtfHH!ƀ<2
++fu+
++fHtfHH!ƀ<2
++It$/Ht
++It$(Ht
++It$ Ht
++It$Ht
++It$Ht
++I4$I|$H|
++Ht
++H [A\A^A_]H}LH+
++fu$
++fHtfHH!ƀ<2
++
++
++
++
++
++
++
++u H5dD
++D