diff options
author | Wilfred Hughes <me@wilfred.me.uk> | 2023-07-12 12:12:12 -0700 |
---|---|---|
committer | Wilfred Hughes <me@wilfred.me.uk> | 2023-07-12 12:12:32 -0700 |
commit | a814e01d229db717fbcd5c2b98a4daa247b07c02 (patch) | |
tree | 73beb2cc0d83ad2f29a00935c6eac6bd28a7f803 | |
parent | 1d3b6836ef1ab4fd2200871c0f4a8c92583997f3 (diff) |
Improve word diffing heuristic and add another sample file
-rw-r--r-- | sample_files/compare.expected | 5 | ||||
-rw-r--r-- | sample_files/string_subwords_after.el | 16 | ||||
-rw-r--r-- | sample_files/string_subwords_before.el | 13 | ||||
-rw-r--r-- | src/parse/syntax.rs | 31 |
4 files changed, 49 insertions, 16 deletions
diff --git a/sample_files/compare.expected b/sample_files/compare.expected index bba2e0fc7..056ee8a80 100644 --- a/sample_files/compare.expected +++ b/sample_files/compare.expected @@ -158,7 +158,7 @@ sample_files/pascal_before.pascal sample_files/pascal_after.pascal dfea5599b7f5e180d0fafab326f612cc - sample_files/perl_before.pl sample_files/perl_after.pl -09034cdf9cc4853ba7527de6d633e9be - +62ed7685bdfad901d1087e8bad399d86 - sample_files/prefer_outer_before.el sample_files/prefer_outer_after.el de31a80dc8a06987aeff4aaa04ce3b87 - @@ -199,6 +199,9 @@ sample_files/slow_before.rs sample_files/slow_after.rs sample_files/small_before.js sample_files/small_after.js b4300bfc0203acd8f2603b504b859dc8 - +sample_files/string_subwords_before.el sample_files/string_subwords_after.el +1154702ee8bc90407728871b94d12878 - + sample_files/strings_before.el sample_files/strings_after.el adc1c8734906b83deff25b1567e46b56 - diff --git a/sample_files/string_subwords_after.el b/sample_files/string_subwords_after.el new file mode 100644 index 000000000..870f65428 --- /dev/null +++ b/sample_files/string_subwords_after.el @@ -0,0 +1,16 @@ +(format "%s: %s" (site-name) name) + +(defcustom deadgrep-max-buffers + 4 + "The maximum number of deadgrep results buffers. + +If the number of results buffers exceeds this value, deadgrep +will kill results buffers. The least recently used buffers are +killed first. + +To disable cleanup entirely, set this variable to nil." + :type '(choice + (number :tag "Maximum of buffers allowed") + (const :tag "Disable cleanup" nil)) + :group 'deadgrep) + diff --git a/sample_files/string_subwords_before.el b/sample_files/string_subwords_before.el new file mode 100644 index 000000000..940aa96ad --- /dev/null +++ b/sample_files/string_subwords_before.el @@ -0,0 +1,13 @@ +(format "SoloWiki Viewing: %s" name) + +(defcustom deadgrep-max-buffers + 4 + "Deadgrep will kill the least recently used results buffer +if there are more than this many. + +To disable cleanup entirely, set this variable to nil." + :type '(choice + (number :tag "Maximum of buffers allowed") + (const :tag "Disable cleanup" nil)) + :group 'deadgrep) + diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index a64d18764..f93992ad7 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -790,29 +790,30 @@ fn split_atom_words( /// Are there sufficient common words that we should only highlight /// individual changed words? fn has_common_words(word_diffs: &Vec<myers_diff::DiffResult<&&str>>) -> bool { - let mut word_count = 0; + let mut novel_count = 0; + let mut unchanged_count = 0; + for word_diff in word_diffs { match word_diff { myers_diff::DiffResult::Both(word, _) => { - // If we have at least one long word (i.e. not just - // punctuation), that's sufficient. - if word.len() > 2 { - return true; - } - - // If we have lots of common short words, not just the - // beginning/end comment delimiter, that qualifies - // too. - word_count += 1; - if word_count > 4 { - return true; + if **word != " " { + unchanged_count += 1; } } - _ => {} + _ => { + novel_count += 1; + } } } - false + // We want more than two unchanged words, because the text content + // includes the comment or string delimiters. + // + // A sufficiently similar set of words is when more than 50% of + // the words are common between the two sides. We multiply by two + // because non-matching words gives us two novel words, whereas + // matched words only gives us one unchanged word. + unchanged_count > 2 && unchanged_count * 2 >= novel_count } impl MatchedPos { |