summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilfred Hughes <me@wilfred.me.uk>2023-07-12 12:12:12 -0700
committerWilfred Hughes <me@wilfred.me.uk>2023-07-12 12:12:32 -0700
commita814e01d229db717fbcd5c2b98a4daa247b07c02 (patch)
tree73beb2cc0d83ad2f29a00935c6eac6bd28a7f803
parent1d3b6836ef1ab4fd2200871c0f4a8c92583997f3 (diff)
Improve word diffing heuristic and add another sample file
-rw-r--r--sample_files/compare.expected5
-rw-r--r--sample_files/string_subwords_after.el16
-rw-r--r--sample_files/string_subwords_before.el13
-rw-r--r--src/parse/syntax.rs31
4 files changed, 49 insertions, 16 deletions
diff --git a/sample_files/compare.expected b/sample_files/compare.expected
index bba2e0fc7..056ee8a80 100644
--- a/sample_files/compare.expected
+++ b/sample_files/compare.expected
@@ -158,7 +158,7 @@ sample_files/pascal_before.pascal sample_files/pascal_after.pascal
dfea5599b7f5e180d0fafab326f612cc -
sample_files/perl_before.pl sample_files/perl_after.pl
-09034cdf9cc4853ba7527de6d633e9be -
+62ed7685bdfad901d1087e8bad399d86 -
sample_files/prefer_outer_before.el sample_files/prefer_outer_after.el
de31a80dc8a06987aeff4aaa04ce3b87 -
@@ -199,6 +199,9 @@ sample_files/slow_before.rs sample_files/slow_after.rs
sample_files/small_before.js sample_files/small_after.js
b4300bfc0203acd8f2603b504b859dc8 -
+sample_files/string_subwords_before.el sample_files/string_subwords_after.el
+1154702ee8bc90407728871b94d12878 -
+
sample_files/strings_before.el sample_files/strings_after.el
adc1c8734906b83deff25b1567e46b56 -
diff --git a/sample_files/string_subwords_after.el b/sample_files/string_subwords_after.el
new file mode 100644
index 000000000..870f65428
--- /dev/null
+++ b/sample_files/string_subwords_after.el
@@ -0,0 +1,16 @@
+(format "%s: %s" (site-name) name)
+
+(defcustom deadgrep-max-buffers
+ 4
+ "The maximum number of deadgrep results buffers.
+
+If the number of results buffers exceeds this value, deadgrep
+will kill results buffers. The least recently used buffers are
+killed first.
+
+To disable cleanup entirely, set this variable to nil."
+ :type '(choice
+ (number :tag "Maximum of buffers allowed")
+ (const :tag "Disable cleanup" nil))
+ :group 'deadgrep)
+
diff --git a/sample_files/string_subwords_before.el b/sample_files/string_subwords_before.el
new file mode 100644
index 000000000..940aa96ad
--- /dev/null
+++ b/sample_files/string_subwords_before.el
@@ -0,0 +1,13 @@
+(format "SoloWiki Viewing: %s" name)
+
+(defcustom deadgrep-max-buffers
+ 4
+ "Deadgrep will kill the least recently used results buffer
+if there are more than this many.
+
+To disable cleanup entirely, set this variable to nil."
+ :type '(choice
+ (number :tag "Maximum of buffers allowed")
+ (const :tag "Disable cleanup" nil))
+ :group 'deadgrep)
+
diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs
index a64d18764..f93992ad7 100644
--- a/src/parse/syntax.rs
+++ b/src/parse/syntax.rs
@@ -790,29 +790,30 @@ fn split_atom_words(
/// Are there sufficient common words that we should only highlight
/// individual changed words?
fn has_common_words(word_diffs: &Vec<myers_diff::DiffResult<&&str>>) -> bool {
- let mut word_count = 0;
+ let mut novel_count = 0;
+ let mut unchanged_count = 0;
+
for word_diff in word_diffs {
match word_diff {
myers_diff::DiffResult::Both(word, _) => {
- // If we have at least one long word (i.e. not just
- // punctuation), that's sufficient.
- if word.len() > 2 {
- return true;
- }
-
- // If we have lots of common short words, not just the
- // beginning/end comment delimiter, that qualifies
- // too.
- word_count += 1;
- if word_count > 4 {
- return true;
+ if **word != " " {
+ unchanged_count += 1;
}
}
- _ => {}
+ _ => {
+ novel_count += 1;
+ }
}
}
- false
+ // We want more than two unchanged words, because the text content
+ // includes the comment or string delimiters.
+ //
+ // A sufficiently similar set of words is when more than 50% of
+ // the words are common between the two sides. We multiply by two
+ // because non-matching words gives us two novel words, whereas
+ // matched words only gives us one unchanged word.
+ unchanged_count > 2 && unchanged_count * 2 >= novel_count
}
impl MatchedPos {