summaryrefslogtreecommitdiffstats
path: root/crates/globset
diff options
context:
space:
mode:
authorTobias Decking <Tobias.Decking@gmail.com>2024-06-05 15:56:00 +0200
committerGitHub <noreply@github.com>2024-06-05 09:56:00 -0400
commitc9ebcbd8abe48c8336fb4826df7e9b6fb179de03 (patch)
tree144219db56f26dcb24323b649ca73a3ca68d6b46 /crates/globset
parentdec0dc319653364d7a2c23dd78e65c8b0532f270 (diff)
globset: optimize character escapingHEADmaster
Rewrites the char_to_escaped_literal and bytes_to_escaped_literal functions in a way that minimizes heap allocations. After this, the resulting string is the only allocation remaining. I believe when this code was originally written, the routines available to avoid heap allocations didn't exist. I'm skeptical that this matters in the grand scheme of things, but I think this is still worth doing for "good sense" reasons. PR #2833
Diffstat (limited to 'crates/globset')
-rw-r--r--crates/globset/src/glob.rs12
1 files changed, 8 insertions, 4 deletions
diff --git a/crates/globset/src/glob.rs b/crates/globset/src/glob.rs
index 83c08344..c25e3f22 100644
--- a/crates/globset/src/glob.rs
+++ b/crates/globset/src/glob.rs
@@ -1,3 +1,4 @@
+use std::fmt::Write;
use std::path::{is_separator, Path};
use regex_automata::meta::Regex;
@@ -732,7 +733,9 @@ impl Tokens {
/// Convert a Unicode scalar value to an escaped string suitable for use as
/// a literal in a non-Unicode regex.
fn char_to_escaped_literal(c: char) -> String {
- bytes_to_escaped_literal(&c.to_string().into_bytes())
+ let mut buf = [0; 4];
+ let bytes = c.encode_utf8(&mut buf).as_bytes();
+ bytes_to_escaped_literal(bytes)
}
/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
@@ -741,11 +744,12 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
let mut s = String::with_capacity(bs.len());
for &b in bs {
if b <= 0x7F {
- s.push_str(&regex_syntax::escape(
+ regex_syntax::escape_into(
char::from(b).encode_utf8(&mut [0; 4]),
- ));
+ &mut s,
+ );
} else {
- s.push_str(&format!("\\x{:02x}", b));
+ write!(&mut s, "\\x{:02x}", b).unwrap();
}
}
s