summaryrefslogtreecommitdiffstats
path: root/src/utils/regex_replacement.rs
blob: eec8480f5fc50373e11737a335db6728b2c04030 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
use std::borrow::Cow;

use regex::{Regex, RegexBuilder};

#[derive(Clone, Debug)]
pub struct RegexReplacement {
    regex: Regex,
    replacement: String,
    replace_all: bool,
}

impl RegexReplacement {
    pub fn from_sed_command(sed_command: &str) -> Option<Self> {
        let sep = sed_command.chars().nth(1)?;
        let mut parts = sed_command[2..].split(sep);
        let regex = parts.next()?;
        let replacement = parts.next()?.to_string();
        let flags = parts.next()?;
        let mut re_builder = RegexBuilder::new(regex);
        let mut replace_all = false;
        for flag in flags.chars() {
            match flag {
                'g' => {
                    replace_all = true;
                }
                'i' => {
                    re_builder.case_insensitive(true);
                }
                'm' => {
                    re_builder.multi_line(true);
                }
                's' => {
                    re_builder.dot_matches_new_line(true);
                }
                'U' => {
                    re_builder.swap_greed(true);
                }
                'x' => {
                    re_builder.ignore_whitespace(true);
                }
                _ => {}
            }
        }
        let regex = re_builder.build().ok()?;
        Some(RegexReplacement {
            regex,
            replacement,
            replace_all,
        })
    }

    pub fn execute<'t>(&self, s: &'t str) -> Cow<'t, str> {
        if self.replace_all {
            self.regex.replace_all(s, &self.replacement)
        } else {
            self.regex.replace(s, &self.replacement)
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_sed_command() {
        let command = "s,foo,bar,";
        let rr = RegexReplacement::from_sed_command(command).unwrap();
        assert_eq!(rr.regex.as_str(), "foo");
        assert_eq!(rr.replacement, "bar");
        assert!(!rr.replace_all);
        assert_eq!(rr.execute("foo"), "bar");
    }

    #[test]
    fn test_sed_command_i_flag() {
        let command = "s,FOO,bar,";
        let rr = RegexReplacement::from_sed_command(command).unwrap();
        assert_eq!(rr.execute("foo"), "foo");
        let command = "s,FOO,bar,i";
        let rr = RegexReplacement::from_sed_command(command).unwrap();
        assert_eq!(rr.execute("foo"), "bar");
    }

    #[test]
    fn test_sed_command_g_flag() {
        let command = "s,foo,bar,";
        let rr = RegexReplacement::from_sed_command(command).unwrap();
        assert_eq!(rr.execute("foofoo"), "barfoo");
        let command = "s,foo,bar,g";
        let rr = RegexReplacement::from_sed_command(command).unwrap();
        assert_eq!(rr.execute("foofoo"), "barbar");
    }

    #[test]
    fn test_sed_command_with_named_captures() {
        let command = r"s/(?P<last>[^,\s]+),\s+(?P<first>\S+)/$first $last/";
        let rr = RegexReplacement::from_sed_command(command).unwrap();
        assert_eq!(rr.execute("Springsteen, Bruce"), "Bruce Springsteen");
    }

    #[test]
    fn test_sed_command_invalid() {
        assert!(RegexReplacement::from_sed_command("").is_none());
        assert!(RegexReplacement::from_sed_command("s").is_none());
        assert!(RegexReplacement::from_sed_command("s,").is_none());
        assert!(RegexReplacement::from_sed_command("s,,").is_none());
        assert!(RegexReplacement::from_sed_command("s,,i").is_none());
        assert!(RegexReplacement::from_sed_command("s,,,").is_some());
        assert!(RegexReplacement::from_sed_command("s,,,i").is_some());
    }
}