use std::str; use memchr::memchr; /// Interpolate capture references in `replacement` and write the interpolation /// result to `dst`. References in `replacement` take the form of $N or $name, /// where `N` is a capture group index and `name` is a capture group name. The /// function provided, `name_to_index`, maps capture group names to indices. /// /// The `append` function given is responsible for writing the replacement /// to the `dst` buffer. That is, it is called with the capture group index /// of a capture group reference and is expected to resolve the index to its /// corresponding matched text. If no such match exists, then `append` should /// not write anything to its given buffer. pub fn interpolate( mut replacement: &[u8], mut append: A, mut name_to_index: N, dst: &mut Vec, ) where A: FnMut(usize, &mut Vec), N: FnMut(&str) -> Option { while !replacement.is_empty() { match memchr(b'$', replacement) { None => break, Some(i) => { dst.extend(&replacement[..i]); replacement = &replacement[i..]; } } if replacement.get(1).map_or(false, |&b| b == b'$') { dst.push(b'$'); replacement = &replacement[2..]; continue; } debug_assert!(!replacement.is_empty()); let cap_ref = match find_cap_ref(replacement) { Some(cap_ref) => cap_ref, None => { dst.push(b'$'); replacement = &replacement[1..]; continue; } }; replacement = &replacement[cap_ref.end..]; match cap_ref.cap { Ref::Number(i) => append(i, dst), Ref::Named(name) => { if let Some(i) = name_to_index(name) { append(i, dst); } } } } dst.extend(replacement); } /// `CaptureRef` represents a reference to a capture group inside some text. /// The reference is either a capture group name or a number. /// /// It is also tagged with the position in the text immediately proceding the /// capture reference. #[derive(Clone, Copy, Debug, Eq, PartialEq)] struct CaptureRef<'a> { cap: Ref<'a>, end: usize, } /// A reference to a capture group in some text. /// /// e.g., `$2`, `$foo`, `${foo}`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum Ref<'a> { Named(&'a str), Number(usize), } impl<'a> From<&'a str> for Ref<'a> { fn from(x: &'a str) -> Ref<'a> { Ref::Named(x) } } impl From for Ref<'static> { fn from(x: usize) -> Ref<'static> { Ref::Number(x) } } /// Parses a possible reference to a capture group name in the given text, /// starting at the beginning of `replacement`. /// /// If no such valid reference could be found, None is returned. fn find_cap_ref(replacement: &[u8]) -> Option { let mut i = 0; if replacement.len() <= 1 || replacement[0] != b'$' { return None; } let mut brace = false; i += 1; if replacement[i] == b'{' { brace = true; i += 1; } let mut cap_end = i; while replacement.get(cap_end).map_or(false, is_valid_cap_letter) { cap_end += 1; } if cap_end == i { return None; } // We just verified that the range 0..cap_end is valid ASCII, so it must // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 // check with an unchecked conversion or by parsing the number straight // from &[u8]. let cap = str::from_utf8(&replacement[i..cap_end]) .expect("valid UTF-8 capture name"); if brace { if !replacement.get(cap_end).map_or(false, |&b| b == b'}') { return None; } cap_end += 1; } Some(CaptureRef { cap: match cap.parse::() { Ok(i) => Ref::Number(i as usize), Err(_) => Ref::Named(cap), }, end: cap_end, }) } /// Returns true if and only if the given byte is allowed in a capture name. fn is_valid_cap_letter(b: &u8) -> bool { match *b { b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true, _ => false, } } #[cfg(test)] mod tests { use super::{CaptureRef, find_cap_ref, interpolate}; macro_rules! find { ($name:ident, $text:expr) => { #[test] fn $name() { assert_eq!(None, find_cap_ref($text.as_bytes())); } }; ($name:ident, $text:expr, $capref:expr) => { #[test] fn $name() { assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); } }; } macro_rules! c { ($name_or_number:expr, $pos:expr) => { CaptureRef { cap: $name_or_number.into(), end: $pos } }; } find!(find_cap_ref1, "$foo", c!("foo", 4)); find!(find_cap_ref2, "${foo}", c!("foo", 6)); find!(find_cap_ref3, "$0", c!(0, 2)); find!(find_cap_ref4, "$5", c!(5, 2)); find!(find_cap_ref5, "$10", c!(10, 3)); find!(find_cap_ref6, "$42a", c!("42a", 4)); find!(find_cap_ref7, "${42}a", c!(42, 5)); find!(find_cap_ref8, "${42"); find!(find_cap_ref9, "${42 "); find!(find_cap_ref10, " $0 "); find!(find_cap_ref11, "$"); find!(find_cap_ref12, " "); find!(find_cap_ref13, ""); // A convenience routine for using interpolate's unwieldy but flexible API. fn interpolate_string( mut name_to_index: Vec<(&'static str, usize)>, caps: Vec<&'static str>, replacement: &str, ) -> String { name_to_index.sort_by_key(|x| x.0); let mut dst = vec![]; interpolate( replacement.as_bytes(), |i, dst| { if let Some(&s) = caps.get(i) { dst.extend(s.as_bytes()); } }, |name| -> Option { name_to_index .binary_search_by_key(&name, |x| x.0) .ok() .map(|i| name_to_index[i].1) }, &mut dst, ); String::from_utf8(dst).unwrap() } macro_rules! interp { ($name:ident, $map:expr, $caps:expr, $hay:expr, $expected:expr $(,)*) => { #[test] fn $name() { assert_eq!($expected, interpolate_string($map, $caps, $hay)); } } } interp!( interp1, vec![("foo", 2)], vec!["", "", "xxx"], "test $foo test", "test xxx test", ); interp!( interp2, vec![("foo", 2)], vec!["", "", "xxx"], "test$footest", "test", ); interp!( interp3, vec![("foo", 2)], vec!["", "", "xxx"], "test${foo}test", "testxxxtest", ); interp!( interp4, vec![("foo", 2)], vec!["", "", "xxx"], "test$2test", "test", ); interp!( interp5, vec![("foo", 2)], vec!["", "", "xxx"], "test${2}test", "testxxxtest", ); interp!( interp6, vec![("foo", 2)], vec!["", "", "xxx"], "test $$foo test", "test $foo test", ); interp!( interp7, vec![("foo", 2)], vec!["", "", "xxx"], "test $foo", "test xxx", ); interp!( interp8, vec![("foo", 2)], vec!["", "", "xxx"], "$foo test", "xxx test", ); interp!( interp9, vec![("bar", 1), ("foo", 2)], vec!["", "yyy", "xxx"], "test $bar$foo", "test yyyxxx", ); interp!( interp10, vec![("bar", 1), ("foo", 2)], vec!["", "yyy", "xxx"], "test $ test", "test $ test", ); interp!( interp11, vec![("bar", 1), ("foo", 2)], vec!["", "yyy", "xxx"], "test ${} test", "test ${} test", ); interp!( interp12, vec![("bar", 1), ("foo", 2)], vec!["", "yyy", "xxx"], "test ${ } test", "test ${ } test", ); interp!( interp13, vec![("bar", 1), ("foo", 2)], vec!["", "yyy", "xxx"], "test ${a b} test", "test ${a b} test", ); interp!( interp14, vec![("bar", 1), ("foo", 2)], vec!["", "yyy", "xxx"], "test ${a} test", "test test", ); }