summaryrefslogtreecommitdiffstats
path: root/src/edits.rs
diff options
context:
space:
mode:
authorDan Davison <dandavison7@gmail.com>2020-05-18 09:25:46 -0400
committerDan Davison <dandavison7@gmail.com>2020-05-18 09:25:46 -0400
commit2cb3e403e802250308ef05e37e33105ad68eaef6 (patch)
treed4a7a707937cd063175c59112ac2ff26e82d5f0a /src/edits.rs
parent1380ef6dfbafe0598a46bdd7a4a7608c794c9621 (diff)
Performance: create regexp as a top-level static
This seems to be a huge performance improvement: `make benchmark` changes from 1.7s to 0.9s.
Diffstat (limited to 'src/edits.rs')
-rw-r--r--src/edits.rs8
1 files changed, 6 insertions, 2 deletions
diff --git a/src/edits.rs b/src/edits.rs
index 1788afd5..2e16d2be 100644
--- a/src/edits.rs
+++ b/src/edits.rs
@@ -1,5 +1,6 @@
use regex::Regex;
+use lazy_static::lazy_static;
use unicode_width::UnicodeWidthStr;
use crate::align;
@@ -75,13 +76,16 @@ where
(annotated_minus_lines, annotated_plus_lines)
}
+lazy_static! {
+ static ref TOKENIZATION_REGEXP: Regex = Regex::new(r#"[\t ,;.:()\[\]<>/'"-]+"#).unwrap();
+}
+
/// Split line into tokens for alignment. The alignment algorithm aligns sequences of substrings;
/// not individual characters.
fn tokenize(line: &str) -> Vec<&str> {
- let separators = Regex::new(r#"[\t ,;.:()\[\]<>/'"-]+"#).unwrap();
let mut tokens = Vec::new();
let mut offset = 0;
- for m in separators.find_iter(line) {
+ for m in TOKENIZATION_REGEXP.find_iter(line) {
tokens.push(&line[offset..m.start()]);
// Align separating text as multiple single-character tokens.
for i in m.start()..m.end() {