diff options
author | Dan Davison <dandavison7@gmail.com> | 2020-05-18 09:25:46 -0400 |
---|---|---|
committer | Dan Davison <dandavison7@gmail.com> | 2020-05-18 09:25:46 -0400 |
commit | 2cb3e403e802250308ef05e37e33105ad68eaef6 (patch) | |
tree | d4a7a707937cd063175c59112ac2ff26e82d5f0a /src/edits.rs | |
parent | 1380ef6dfbafe0598a46bdd7a4a7608c794c9621 (diff) |
Performance: create regexp as a top-level static
This seems to be a huge performance improvement: `make benchmark`
changes from 1.7s to 0.9s.
Diffstat (limited to 'src/edits.rs')
-rw-r--r-- | src/edits.rs | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/src/edits.rs b/src/edits.rs index 1788afd5..2e16d2be 100644 --- a/src/edits.rs +++ b/src/edits.rs @@ -1,5 +1,6 @@ use regex::Regex; +use lazy_static::lazy_static; use unicode_width::UnicodeWidthStr; use crate::align; @@ -75,13 +76,16 @@ where (annotated_minus_lines, annotated_plus_lines) } +lazy_static! { + static ref TOKENIZATION_REGEXP: Regex = Regex::new(r#"[\t ,;.:()\[\]<>/'"-]+"#).unwrap(); +} + /// Split line into tokens for alignment. The alignment algorithm aligns sequences of substrings; /// not individual characters. fn tokenize(line: &str) -> Vec<&str> { - let separators = Regex::new(r#"[\t ,;.:()\[\]<>/'"-]+"#).unwrap(); let mut tokens = Vec::new(); let mut offset = 0; - for m in separators.find_iter(line) { + for m in TOKENIZATION_REGEXP.find_iter(line) { tokens.push(&line[offset..m.start()]); // Align separating text as multiple single-character tokens. for i in m.start()..m.end() { |