summaryrefslogtreecommitdiffstats
path: root/src/unescape.rs
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2018-08-29 20:53:52 -0400
committerAndrew Gallant <jamslam@gmail.com>2018-09-04 23:18:55 -0400
commit4846d63539690047fa58ec582d94bcba16da1c09 (patch)
tree61a2cf9de3d62ea6524659893ab9a2c7800c3286 /src/unescape.rs
parent13c47530a6e685d2dee1953a64f055936e6a2ba8 (diff)
grep-cli: introduce new grep-cli crate
This commit moves a lot of "utility" code from ripgrep core into grep-cli. Any one of these things might not be worth creating a new crate, but combining everything together results in a fair number of a convenience routines that make up a decent sized crate. There is potentially more we could move into the crate, but much of what remains in ripgrep core is almost entirely dealing with the number of flags we support. In the course of doing moving things to the grep-cli crate, we clean up a lot of gunk and improve failure modes in a number of cases. In particular, we've fixed a bug where other processes could deadlock if they write too much to stderr. Fixes #990
Diffstat (limited to 'src/unescape.rs')
-rw-r--r--src/unescape.rs137
1 files changed, 0 insertions, 137 deletions
diff --git a/src/unescape.rs b/src/unescape.rs
deleted file mode 100644
index 0c7f1c8d..00000000
--- a/src/unescape.rs
+++ /dev/null
@@ -1,137 +0,0 @@
-/// A single state in the state machine used by `unescape`.
-#[derive(Clone, Copy, Eq, PartialEq)]
-enum State {
- /// The state after seeing a `\`.
- Escape,
- /// The state after seeing a `\x`.
- HexFirst,
- /// The state after seeing a `\x[0-9A-Fa-f]`.
- HexSecond(char),
- /// Default state.
- Literal,
-}
-
-/// Escapes an arbitrary byte slice such that it can be presented as a human
-/// readable string.
-pub fn escape(bytes: &[u8]) -> String {
- use std::ascii::escape_default;
-
- let escaped = bytes.iter().flat_map(|&b| escape_default(b)).collect();
- String::from_utf8(escaped).unwrap()
-}
-
-/// Unescapes a string given on the command line. It supports a limited set of
-/// escape sequences:
-///
-/// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes.
-/// * `\xZZ` hexadecimal escapes are mapped to their byte.
-pub fn unescape(s: &str) -> Vec<u8> {
- use self::State::*;
-
- let mut bytes = vec![];
- let mut state = Literal;
- for c in s.chars() {
- match state {
- Escape => {
- match c {
- 'n' => { bytes.push(b'\n'); state = Literal; }
- 'r' => { bytes.push(b'\r'); state = Literal; }
- 't' => { bytes.push(b'\t'); state = Literal; }
- 'x' => { state = HexFirst; }
- c => {
- bytes.extend(format!(r"\{}", c).into_bytes());
- state = Literal;
- }
- }
- }
- HexFirst => {
- match c {
- '0'...'9' | 'A'...'F' | 'a'...'f' => {
- state = HexSecond(c);
- }
- c => {
- bytes.extend(format!(r"\x{}", c).into_bytes());
- state = Literal;
- }
- }
- }
- HexSecond(first) => {
- match c {
- '0'...'9' | 'A'...'F' | 'a'...'f' => {
- let ordinal = format!("{}{}", first, c);
- let byte = u8::from_str_radix(&ordinal, 16).unwrap();
- bytes.push(byte);
- state = Literal;
- }
- c => {
- let original = format!(r"\x{}{}", first, c);
- bytes.extend(original.into_bytes());
- state = Literal;
- }
- }
- }
- Literal => {
- match c {
- '\\' => { state = Escape; }
- c => { bytes.extend(c.to_string().as_bytes()); }
- }
- }
- }
- }
- match state {
- Escape => bytes.push(b'\\'),
- HexFirst => bytes.extend(b"\\x"),
- HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()),
- Literal => {}
- }
- bytes
-}
-
-#[cfg(test)]
-mod tests {
- use super::unescape;
-
- fn b(bytes: &'static [u8]) -> Vec<u8> {
- bytes.to_vec()
- }
-
- #[test]
- fn unescape_nul() {
- assert_eq!(b(b"\x00"), unescape(r"\x00"));
- }
-
- #[test]
- fn unescape_nl() {
- assert_eq!(b(b"\n"), unescape(r"\n"));
- }
-
- #[test]
- fn unescape_tab() {
- assert_eq!(b(b"\t"), unescape(r"\t"));
- }
-
- #[test]
- fn unescape_carriage() {
- assert_eq!(b(b"\r"), unescape(r"\r"));
- }
-
- #[test]
- fn unescape_nothing_simple() {
- assert_eq!(b(b"\\a"), unescape(r"\a"));
- }
-
- #[test]
- fn unescape_nothing_hex0() {
- assert_eq!(b(b"\\x"), unescape(r"\x"));
- }
-
- #[test]
- fn unescape_nothing_hex1() {
- assert_eq!(b(b"\\xz"), unescape(r"\xz"));
- }
-
- #[test]
- fn unescape_nothing_hex2() {
- assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
- }
-}