From 2cb3e403e802250308ef05e37e33105ad68eaef6 Mon Sep 17 00:00:00 2001 From: Dan Davison Date: Mon, 18 May 2020 09:25:46 -0400 Subject: Performance: create regexp as a top-level static This seems to be a huge performance improvement: `make benchmark` changes from 1.7s to 0.9s. --- src/edits.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/edits.rs') diff --git a/src/edits.rs b/src/edits.rs index 1788afd5..2e16d2be 100644 --- a/src/edits.rs +++ b/src/edits.rs @@ -1,5 +1,6 @@ use regex::Regex; +use lazy_static::lazy_static; use unicode_width::UnicodeWidthStr; use crate::align; @@ -75,13 +76,16 @@ where (annotated_minus_lines, annotated_plus_lines) } +lazy_static! { + static ref TOKENIZATION_REGEXP: Regex = Regex::new(r#"[\t ,;.:()\[\]<>/'"-]+"#).unwrap(); +} + /// Split line into tokens for alignment. The alignment algorithm aligns sequences of substrings; /// not individual characters. fn tokenize(line: &str) -> Vec<&str> { - let separators = Regex::new(r#"[\t ,;.:()\[\]<>/'"-]+"#).unwrap(); let mut tokens = Vec::new(); let mut offset = 0; - for m in separators.find_iter(line) { + for m in TOKENIZATION_REGEXP.find_iter(line) { tokens.push(&line[offset..m.start()]); // Align separating text as multiple single-character tokens. for i in m.start()..m.end() { -- cgit v1.2.3