From a814e01d229db717fbcd5c2b98a4daa247b07c02 Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Wed, 12 Jul 2023 12:12:12 -0700 Subject: Improve word diffing heuristic and add another sample file --- sample_files/compare.expected | 5 ++++- sample_files/string_subwords_after.el | 16 ++++++++++++++++ sample_files/string_subwords_before.el | 13 +++++++++++++ src/parse/syntax.rs | 31 ++++++++++++++++--------------- 4 files changed, 49 insertions(+), 16 deletions(-) create mode 100644 sample_files/string_subwords_after.el create mode 100644 sample_files/string_subwords_before.el diff --git a/sample_files/compare.expected b/sample_files/compare.expected index bba2e0fc7..056ee8a80 100644 --- a/sample_files/compare.expected +++ b/sample_files/compare.expected @@ -158,7 +158,7 @@ sample_files/pascal_before.pascal sample_files/pascal_after.pascal dfea5599b7f5e180d0fafab326f612cc - sample_files/perl_before.pl sample_files/perl_after.pl -09034cdf9cc4853ba7527de6d633e9be - +62ed7685bdfad901d1087e8bad399d86 - sample_files/prefer_outer_before.el sample_files/prefer_outer_after.el de31a80dc8a06987aeff4aaa04ce3b87 - @@ -199,6 +199,9 @@ sample_files/slow_before.rs sample_files/slow_after.rs sample_files/small_before.js sample_files/small_after.js b4300bfc0203acd8f2603b504b859dc8 - +sample_files/string_subwords_before.el sample_files/string_subwords_after.el +1154702ee8bc90407728871b94d12878 - + sample_files/strings_before.el sample_files/strings_after.el adc1c8734906b83deff25b1567e46b56 - diff --git a/sample_files/string_subwords_after.el b/sample_files/string_subwords_after.el new file mode 100644 index 000000000..870f65428 --- /dev/null +++ b/sample_files/string_subwords_after.el @@ -0,0 +1,16 @@ +(format "%s: %s" (site-name) name) + +(defcustom deadgrep-max-buffers + 4 + "The maximum number of deadgrep results buffers. + +If the number of results buffers exceeds this value, deadgrep +will kill results buffers. The least recently used buffers are +killed first. + +To disable cleanup entirely, set this variable to nil." + :type '(choice + (number :tag "Maximum of buffers allowed") + (const :tag "Disable cleanup" nil)) + :group 'deadgrep) + diff --git a/sample_files/string_subwords_before.el b/sample_files/string_subwords_before.el new file mode 100644 index 000000000..940aa96ad --- /dev/null +++ b/sample_files/string_subwords_before.el @@ -0,0 +1,13 @@ +(format "SoloWiki Viewing: %s" name) + +(defcustom deadgrep-max-buffers + 4 + "Deadgrep will kill the least recently used results buffer +if there are more than this many. + +To disable cleanup entirely, set this variable to nil." + :type '(choice + (number :tag "Maximum of buffers allowed") + (const :tag "Disable cleanup" nil)) + :group 'deadgrep) + diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index a64d18764..f93992ad7 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -790,29 +790,30 @@ fn split_atom_words( /// Are there sufficient common words that we should only highlight /// individual changed words? fn has_common_words(word_diffs: &Vec>) -> bool { - let mut word_count = 0; + let mut novel_count = 0; + let mut unchanged_count = 0; + for word_diff in word_diffs { match word_diff { myers_diff::DiffResult::Both(word, _) => { - // If we have at least one long word (i.e. not just - // punctuation), that's sufficient. - if word.len() > 2 { - return true; - } - - // If we have lots of common short words, not just the - // beginning/end comment delimiter, that qualifies - // too. - word_count += 1; - if word_count > 4 { - return true; + if **word != " " { + unchanged_count += 1; } } - _ => {} + _ => { + novel_count += 1; + } } } - false + // We want more than two unchanged words, because the text content + // includes the comment or string delimiters. + // + // A sufficiently similar set of words is when more than 50% of + // the words are common between the two sides. We multiply by two + // because non-matching words gives us two novel words, whereas + // matched words only gives us one unchanged word. + unchanged_count > 2 && unchanged_count * 2 >= novel_count } impl MatchedPos { -- cgit v1.2.3