diff options
author | Alexander Batischev <eual.jp@gmail.com> | 2019-10-02 16:30:18 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-02 16:30:18 +0300 |
commit | b3d0b966b09d1ca6cdc70e8fa7ad614041ef5bff (patch) | |
tree | a156bbfea2697c76b0d851f8f2957151cb4409fc | |
parent | c63da29f70de21f045a1e40d45bcbf332ef71a4c (diff) | |
parent | 6b080e68d6d255684321a8dca669fd850152c75b (diff) |
Merge pull request #658 from newsboat/bugfix/652-quoted-pound-sign-treated-as-comment
Ignore # characters inside double quotes and backticks
Fixes #652.
-rw-r--r-- | doc/chapter-firststeps.txt | 10 | ||||
-rw-r--r-- | include/rs_utils.h | 2 | ||||
-rw-r--r-- | rust/libnewsboat-ffi/src/utils.rs | 15 | ||||
-rw-r--r-- | rust/libnewsboat/src/utils.rs | 90 | ||||
-rw-r--r-- | src/utils.cpp | 9 | ||||
-rw-r--r-- | test/configparser.cpp | 28 | ||||
-rw-r--r-- | test/utils.cpp | 130 |
7 files changed, 271 insertions, 13 deletions
diff --git a/doc/chapter-firststeps.txt b/doc/chapter-firststeps.txt index 725ad49c..01e71a50 100644 --- a/doc/chapter-firststeps.txt +++ b/doc/chapter-firststeps.txt @@ -92,12 +92,12 @@ also contain comments, which start with the `#` character and go as far as the end of line. If you need to enter a configuration argument that contains spaces, use quotes (`"`) around the whole argument. It's even possible to integrate the output of external commands into the configuration. The text -between two backticks (+\`+) is evaluated as shell command, and its output is -put on its place instead. This works like backtick evaluation in +between two backticks (+`+) is evaluated as shell command, and its output +is put on its place instead. This works like backtick evaluation in Bourne-compatible shells and allows users to use external information from the -system within the configuration. Backticks can be escaped with a backslash -(+\`+); in that case, they'll be replaced with a literal backtick in the -configuration. +system within the configuration. Backticks and `#` characters can be escaped +with a backslash (e.g. `\`` and `\#`); in that case, they'll be replaced with +literal +`+ or `#` in the configuration. Searching for articles is possible in newsboat, too. Just press the "/" key, enter your search phrase, and the title and content of all articles are diff --git a/include/rs_utils.h b/include/rs_utils.h index b35468ff..cbef8e0a 100644 --- a/include/rs_utils.h +++ b/include/rs_utils.h @@ -82,6 +82,8 @@ unsigned int rs_newsboat_version_major(); int rs_mkdir_parents(const char* path, const std::uint32_t mode); +char* rs_strip_comments(const char* line); + class RustString { private: char* str; diff --git a/rust/libnewsboat-ffi/src/utils.rs b/rust/libnewsboat-ffi/src/utils.rs index 1da94c53..b75c94dc 100644 --- a/rust/libnewsboat-ffi/src/utils.rs +++ b/rust/libnewsboat-ffi/src/utils.rs @@ -489,3 +489,18 @@ pub extern "C" fn rs_program_version() -> *mut c_char { pub extern "C" fn rs_newsboat_version_major() -> u32 { abort_on_panic(|| utils::newsboat_major_version()) } + +#[no_mangle] +pub unsafe extern "C" fn rs_strip_comments(line: *const c_char) -> *mut c_char { + abort_on_panic(|| { + let line = CStr::from_ptr(line); + let line = line.to_str().expect("line contained invalid UTF-8"); + + let result = utils::strip_comments(line); + + // `result` contains a subset of `line`, which is a C string. Thus, we conclude that + // `result` doesn't contain null bytes. Therefore, `CString::new` always returns `Some`. + let result = CString::new(result).unwrap(); + result.into_raw() + }) +} diff --git a/rust/libnewsboat/src/utils.rs b/rust/libnewsboat/src/utils.rs index 91ee9cfe..d1ccca80 100644 --- a/rust/libnewsboat/src/utils.rs +++ b/rust/libnewsboat/src/utils.rs @@ -545,6 +545,44 @@ pub fn newsboat_major_version() -> u32 { env!("CARGO_PKG_VERSION_MAJOR").parse::<u32>().unwrap() } +/// Returns the part of the string before first # character (or the whole input string if there are +/// no # character in it). Pound characters inside double quotes and backticks are ignored. +pub fn strip_comments(line: &str) -> &str { + let mut prev_was_backslash = false; + let mut inside_quotes = false; + let mut inside_backticks = false; + + let mut first_pound_chr_idx = line.len(); + + for (idx, chr) in line.char_indices() { + if chr == '\\' { + prev_was_backslash = true; + continue; + } else if chr == '"' { + // If the quote is escaped or we're inside backticks, do nothing + if !prev_was_backslash && !inside_backticks { + inside_quotes = !inside_quotes; + } + } else if chr == '`' { + // If the backtick is escaped, do nothing + if !prev_was_backslash { + inside_backticks = !inside_backticks; + } + } else if chr == '#' { + if !prev_was_backslash && !inside_quotes && !inside_backticks { + first_pound_chr_idx = idx; + break; + } + } + + // We call `continue` when we run into a backslash; here, we handle all the other + // characters, which clearly *aren't* a backslash + prev_was_backslash = false; + } + + &line[0..first_pound_chr_idx] +} + #[cfg(test)] mod tests { extern crate tempfile; @@ -827,6 +865,7 @@ mod tests { get_command_output("a-program-that-is-guaranteed-to-not-exists"), "".to_string() ); + assert_eq!(get_command_output("echo c\" d e"), "".to_string()); } #[test] @@ -1042,4 +1081,55 @@ mod tests { assert_eq!(strnaturalcmp("aa10", "aa2"), Ordering::Greater); } + + #[test] + fn t_strip_comments() { + // no comments in line + assert_eq!(strip_comments(""), ""); + assert_eq!(strip_comments("\t\n"), "\t\n"); + assert_eq!(strip_comments("some directive "), "some directive "); + + // fully commented line + assert_eq!(strip_comments("#"), ""); + assert_eq!(strip_comments("# #"), ""); + assert_eq!(strip_comments("# comment"), ""); + + // partially commented line + assert_eq!(strip_comments("directive # comment"), "directive "); + assert_eq!( + strip_comments("directive # comment # another"), + "directive " + ); + assert_eq!(strip_comments("directive#comment"), "directive"); + + // ignores # characters inside double quotes (#652) + let expected = r#"highlight article "[-=+#_*~]{3,}.*" green default"#; + let input = expected.to_owned() + "# this is a comment"; + assert_eq!(strip_comments(&input), expected); + + let expected = + r#"highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold"#; + let input = expected.to_owned() + "#heresacomment"; + assert_eq!(strip_comments(&input), expected); + + // Escaped double quote inside double quotes is not treated as closing quote + let expected = r#"test "here \"goes # nothing\" etc" hehe"#; + let input = expected.to_owned() + "# and here is a comment"; + assert_eq!(strip_comments(&input), expected); + + // Ignores # characters inside backticks + let expected = r#"one `two # three` four"#; + let input = expected.to_owned() + "# and a comment, of course"; + assert_eq!(strip_comments(&input), expected); + + // Escaped backtick inside backticks is not treated as closing + let expected = r#"some `other \` tricky # test` hehe"#; + let input = expected.to_owned() + "#here goescomment"; + assert_eq!(strip_comments(&input), expected); + + // Ignores escaped # characters (\\#) + let expected = r#"one two \# three four"#; + let input = expected.to_owned() + "# and a comment"; + assert_eq!(strip_comments(&input), expected); + } } diff --git a/src/utils.cpp b/src/utils.cpp index d7351425..9b50a492 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -78,14 +78,7 @@ void append_escapes(std::string& str, char c) std::string utils::strip_comments(const std::string& line) { - /* - * This functions returns only the non-comment part of the line, - * which can then be safely evaluated and tokenized. - * - * If no comments are present on the line, the whole line is returned. - */ - auto hash_pos = line.find_first_of("#"); - return line.substr(0, hash_pos); + return RustString(rs_strip_comments(line.c_str())); } std::vector<std::string> utils::tokenize_quoted(const std::string& str, diff --git a/test/configparser.cpp b/test/configparser.cpp index 1133090d..f50625d8 100644 --- a/test/configparser.cpp +++ b/test/configparser.cpp @@ -59,6 +59,34 @@ TEST_CASE("evaluate_backticks replaces command in backticks with its output", REQUIRE_NOTHROW(cfgparser.parse("data/config-space-backticks")); REQUIRE_FALSE(keys.get_operation("s", "all") == OP_NIL); } + + SECTION("Unbalanced backtick does *not* start a command") + { + const auto input1 = std::string("one `echo two three"); + REQUIRE(ConfigParser::evaluate_backticks(input1) == input1); + + const auto input2 = std::string("this `echo is a` test `here"); + const auto expected2 = std::string("this is a test `here"); + REQUIRE(ConfigParser::evaluate_backticks(input2) == expected2); + } + + // One might think that putting one or both backticks inside a string will + // "escape" them, the same way as backslash does. But it doesn't, and + // shouldn't: when parsing a config, we need to evaluate *all* commands + // there are, no matter where they're placed. + SECTION("Backticks inside double quotes are not ignored") + { + const auto input1 = std::string(R"#("`echo hello`")#"); + REQUIRE(ConfigParser::evaluate_backticks(input1) == R"#("hello")#"); + + const auto input2 = std::string(R"#(a "b `echo c" d e` f)#"); + // The line above asks the shell to run 'echo c" d e', which is an + // invalid command--the double quotes are not closed. The standard + // output of that command would be empty, so nothing will be inserted + // in place of backticks. + const auto expected2 = std::string(R"#(a "b f)#"); + REQUIRE(ConfigParser::evaluate_backticks(input2) == expected2); + } } TEST_CASE("\"unbind-key -a\" removes all key bindings", "[ConfigParser]") diff --git a/test/utils.cpp b/test/utils.cpp index e9c8c587..f516f65c 100644 --- a/test/utils.cpp +++ b/test/utils.cpp @@ -150,6 +150,62 @@ TEST_CASE("tokenize_quoted() doesn't un-escape escaped backticks", "[utils]") REQUIRE(tokens[1] == "\\`foobar `bla`\\`"); } +TEST_CASE("tokenize_quoted stops tokenizing once it found a # character " + "(outside of double quotes)", + "[utils]") +{ + std::vector<std::string> tokens; + + SECTION("A string consisting of just a comment") + { + tokens = utils::tokenize_quoted("# just a comment"); + REQUIRE(tokens.empty()); + } + + SECTION("A string with one quoted substring") + { + tokens = utils::tokenize_quoted(R"#("a test substring" # !!!)#"); + REQUIRE(tokens.size() == 1); + REQUIRE(tokens[0] == "a test substring"); + } + + SECTION("A string with two quoted substrings") + { + tokens = utils::tokenize_quoted(R"#("first sub" "snd" # comment)#"); + REQUIRE(tokens.size() == 2); + REQUIRE(tokens[0] == "first sub"); + REQUIRE(tokens[1] == "snd"); + } + + SECTION("A comment containing # character") + { + tokens = utils::tokenize_quoted(R"#(one # a comment with # char)#"); + REQUIRE(tokens.size() == 1); + REQUIRE(tokens[0] == "one"); + } + + SECTION("A # character inside quoted substring is ignored") + { + tokens = utils::tokenize_quoted(R"#(this "will # be" ignored)#"); + REQUIRE(tokens.size() == 3); + REQUIRE(tokens[0] == "this"); + REQUIRE(tokens[1] == "will # be"); + REQUIRE(tokens[2] == "ignored"); + } +} + +TEST_CASE("tokenize_quoted does not consider escaped pound sign (\\#) " + "a beginning of a comment", + "[utils]") +{ + const auto tokens = utils::tokenize_quoted(R"#(one \# two three # ???)#"); + REQUIRE(tokens.size() == 4); + REQUIRE(tokens[0] == "one"); + REQUIRE(tokens[1] == "\\#"); + REQUIRE(tokens[2] == "two"); + REQUIRE(tokens[3] == "three"); +} + TEST_CASE("tokenize_nl() split a string into delimiters and fields", "[utils]") { std::vector<std::string> tokens; @@ -219,6 +275,79 @@ TEST_CASE( } } +TEST_CASE("strip_comments ignores escaped # characters (\\#)") +{ + const auto expected = + std::string(R"#(one two \# three four)#"); + const auto input = expected + "# and a comment"; + REQUIRE(utils::strip_comments(input) == expected); +} + +TEST_CASE("strip_comments ignores # characters inside double quotes", + "[utils][issue652]") +{ + SECTION("Real-world cases from issue 652") { + const auto expected1 = + std::string(R"#(highlight article "[-=+#_*~]{3,}.*" green default)#"); + const auto input1 = expected1 + "# this is a comment"; + REQUIRE(utils::strip_comments(input1) == expected1); + + const auto expected2 = + std::string(R"#(highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold)#"); + const auto input2 = expected2 + "#heresacomment"; + REQUIRE(utils::strip_comments(input2) == expected2); + } + + SECTION("Escaped double quote inside double quotes is not treated " + "as closing quote") + { + const auto expected = + std::string(R"#(test "here \"goes # nothing\" etc" hehe)#"); + const auto input = expected + "# and here is a comment"; + REQUIRE(utils::strip_comments(input) == expected); + } +} + +TEST_CASE("strip_comments ignores # characters inside backticks", "[utils]") +{ + SECTION("Simple case") { + const auto expected = std::string(R"#(one `two # three` four)#"); + const auto input = expected + "# and a comment, of course"; + REQUIRE(utils::strip_comments(input) == expected); + } + + SECTION("Escaped backtick inside backticks is not treated as closing") + { + const auto expected = + std::string(R"#(some `other \` tricky # test` hehe)#"); + const auto input = expected + "#here goescomment"; + REQUIRE(utils::strip_comments(input) == expected); + } +} + +TEST_CASE("strip_comments is not confused by nested double quotes and backticks", + "[utils]") +{ + { + const auto expected = std::string(R"#("`" ... ` `"` ")#"); + const auto input = expected + "#comment"; + REQUIRE(utils::strip_comments(input) == expected); + } + + { + const auto expected = std::string(R"#(aaa ` bbb "ccc ddd" e` dd)#"); + const auto input = expected + "# a comment string"; + REQUIRE(utils::strip_comments(input) == expected); + } + + { + const auto expected = + std::string(R"#(option "this `weird " command` for value")#"); + const auto input = expected + "#and a comment"; + REQUIRE(utils::strip_comments(input) == expected); + } +} + TEST_CASE( "consolidate_whitespace replaces multiple consecutive" "whitespace with a single space", @@ -249,6 +378,7 @@ TEST_CASE("get_command_output()", "[utils]") "a-program-that-is-guaranteed-to-not-exists")); REQUIRE(utils::get_command_output( "a-program-that-is-guaranteed-to-not-exists") == ""); + REQUIRE(utils::get_command_output("echo c\" d e") == ""); } TEST_CASE("extract_filter()", "[utils]") |