diff options
author | Tobias Decking <Tobias.Decking@gmail.com> | 2024-06-05 15:56:00 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-05 09:56:00 -0400 |
commit | c9ebcbd8abe48c8336fb4826df7e9b6fb179de03 (patch) | |
tree | 144219db56f26dcb24323b649ca73a3ca68d6b46 /crates/globset | |
parent | dec0dc319653364d7a2c23dd78e65c8b0532f270 (diff) |
Rewrites the char_to_escaped_literal and bytes_to_escaped_literal
functions in a way that minimizes heap allocations. After this, the
resulting string is the only allocation remaining.
I believe when this code was originally written, the routines available
to avoid heap allocations didn't exist.
I'm skeptical that this matters in the grand scheme of things, but I
think this is still worth doing for "good sense" reasons.
PR #2833
Diffstat (limited to 'crates/globset')
-rw-r--r-- | crates/globset/src/glob.rs | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/crates/globset/src/glob.rs b/crates/globset/src/glob.rs index 83c08344..c25e3f22 100644 --- a/crates/globset/src/glob.rs +++ b/crates/globset/src/glob.rs @@ -1,3 +1,4 @@ +use std::fmt::Write; use std::path::{is_separator, Path}; use regex_automata::meta::Regex; @@ -732,7 +733,9 @@ impl Tokens { /// Convert a Unicode scalar value to an escaped string suitable for use as /// a literal in a non-Unicode regex. fn char_to_escaped_literal(c: char) -> String { - bytes_to_escaped_literal(&c.to_string().into_bytes()) + let mut buf = [0; 4]; + let bytes = c.encode_utf8(&mut buf).as_bytes(); + bytes_to_escaped_literal(bytes) } /// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII @@ -741,11 +744,12 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String { let mut s = String::with_capacity(bs.len()); for &b in bs { if b <= 0x7F { - s.push_str(®ex_syntax::escape( + regex_syntax::escape_into( char::from(b).encode_utf8(&mut [0; 4]), - )); + &mut s, + ); } else { - s.push_str(&format!("\\x{:02x}", b)); + write!(&mut s, "\\x{:02x}", b).unwrap(); } } s |