From 1e08618a01d0521d1972de0a8cb1a100c857e660 Mon Sep 17 00:00:00 2001 From: Alexander Batischev Date: Sat, 28 Sep 2019 21:46:03 +0300 Subject: Port utils::strip_comments to Rust --- include/rs_utils.h | 2 ++ rust/libnewsboat-ffi/src/utils.rs | 15 +++++++++++++++ rust/libnewsboat/src/utils.rs | 32 ++++++++++++++++++++++++++++++++ src/utils.cpp | 9 +-------- 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/include/rs_utils.h b/include/rs_utils.h index b35468ff..cbef8e0a 100644 --- a/include/rs_utils.h +++ b/include/rs_utils.h @@ -82,6 +82,8 @@ unsigned int rs_newsboat_version_major(); int rs_mkdir_parents(const char* path, const std::uint32_t mode); +char* rs_strip_comments(const char* line); + class RustString { private: char* str; diff --git a/rust/libnewsboat-ffi/src/utils.rs b/rust/libnewsboat-ffi/src/utils.rs index 1da94c53..40a7946d 100644 --- a/rust/libnewsboat-ffi/src/utils.rs +++ b/rust/libnewsboat-ffi/src/utils.rs @@ -489,3 +489,18 @@ pub extern "C" fn rs_program_version() -> *mut c_char { pub extern "C" fn rs_newsboat_version_major() -> u32 { abort_on_panic(|| utils::newsboat_major_version()) } + +#[no_mangle] +pub unsafe extern "C" fn rs_strip_comments(line: *const c_char) -> *mut c_char { + abort_on_panic(|| { + let line = CStr::from_ptr(line); + let line = line.to_str().expect("line contained invalid UTF-8"); + + let result = utils::strip_comments(line); + + // `result` contains a subset of `line`, which is a C string. Thus, we conclude that + // `result` doesn't contain null bytes. Therefore, `CString::new` always returns `Some`. + let result = CString::new(result.into_owned()).unwrap(); + result.into_raw() + }) +} diff --git a/rust/libnewsboat/src/utils.rs b/rust/libnewsboat/src/utils.rs index 91ee9cfe..40f6ad39 100644 --- a/rust/libnewsboat/src/utils.rs +++ b/rust/libnewsboat/src/utils.rs @@ -16,6 +16,7 @@ use self::url::percent_encoding::*; use self::url::Url; use libc::c_ulong; use logger::{self, Level}; +use std::borrow::Cow; use std::fs::DirBuilder; use std::io::{self, Write}; use std::os::unix::fs::DirBuilderExt; @@ -545,6 +546,16 @@ pub fn newsboat_major_version() -> u32 { env!("CARGO_PKG_VERSION_MAJOR").parse::().unwrap() } +/// Returns the part of the string before first # character (or the whole input string if there are +/// no # character in it). +pub fn strip_comments(line: &str) -> Cow { + if let Some(index) = line.find('#') { + Cow::from(&line[0..index]) + } else { + Cow::from(line) + } +} + #[cfg(test)] mod tests { extern crate tempfile; @@ -1042,4 +1053,25 @@ mod tests { assert_eq!(strnaturalcmp("aa10", "aa2"), Ordering::Greater); } + + #[test] + fn t_strip_comments() { + // no comments in line + assert_eq!(strip_comments(""), ""); + assert_eq!(strip_comments("\t\n"), "\t\n"); + assert_eq!(strip_comments("some directive "), "some directive "); + + // fully commented line + assert_eq!(strip_comments("#"), ""); + assert_eq!(strip_comments("# #"), ""); + assert_eq!(strip_comments("# comment"), ""); + + // partially commented line + assert_eq!(strip_comments("directive # comment"), "directive "); + assert_eq!( + strip_comments("directive # comment # another"), + "directive " + ); + assert_eq!(strip_comments("directive#comment"), "directive"); + } } diff --git a/src/utils.cpp b/src/utils.cpp index d7351425..9b50a492 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -78,14 +78,7 @@ void append_escapes(std::string& str, char c) std::string utils::strip_comments(const std::string& line) { - /* - * This functions returns only the non-comment part of the line, - * which can then be safely evaluated and tokenized. - * - * If no comments are present on the line, the whole line is returned. - */ - auto hash_pos = line.find_first_of("#"); - return line.substr(0, hash_pos); + return RustString(rs_strip_comments(line.c_str())); } std::vector utils::tokenize_quoted(const std::string& str, -- cgit v1.2.3 From 2aa982e67a2602bdfab78ceaa0344c880385392d Mon Sep 17 00:00:00 2001 From: Alexander Batischev Date: Sat, 28 Sep 2019 23:17:16 +0300 Subject: Ignore # chars inside double quotes and backticks Fixes #652. --- rust/libnewsboat-ffi/src/utils.rs | 2 +- rust/libnewsboat/src/utils.rs | 66 ++++++++++++++++++++++++++++++++++----- test/utils.cpp | 58 ++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 8 deletions(-) diff --git a/rust/libnewsboat-ffi/src/utils.rs b/rust/libnewsboat-ffi/src/utils.rs index 40a7946d..b75c94dc 100644 --- a/rust/libnewsboat-ffi/src/utils.rs +++ b/rust/libnewsboat-ffi/src/utils.rs @@ -500,7 +500,7 @@ pub unsafe extern "C" fn rs_strip_comments(line: *const c_char) -> *mut c_char { // `result` contains a subset of `line`, which is a C string. Thus, we conclude that // `result` doesn't contain null bytes. Therefore, `CString::new` always returns `Some`. - let result = CString::new(result.into_owned()).unwrap(); + let result = CString::new(result).unwrap(); result.into_raw() }) } diff --git a/rust/libnewsboat/src/utils.rs b/rust/libnewsboat/src/utils.rs index 40f6ad39..c28047d9 100644 --- a/rust/libnewsboat/src/utils.rs +++ b/rust/libnewsboat/src/utils.rs @@ -16,7 +16,6 @@ use self::url::percent_encoding::*; use self::url::Url; use libc::c_ulong; use logger::{self, Level}; -use std::borrow::Cow; use std::fs::DirBuilder; use std::io::{self, Write}; use std::os::unix::fs::DirBuilderExt; @@ -547,13 +546,41 @@ pub fn newsboat_major_version() -> u32 { } /// Returns the part of the string before first # character (or the whole input string if there are -/// no # character in it). -pub fn strip_comments(line: &str) -> Cow { - if let Some(index) = line.find('#') { - Cow::from(&line[0..index]) - } else { - Cow::from(line) +/// no # character in it). Pound characters inside double quotes and backticks are ignored. +pub fn strip_comments(line: &str) -> &str { + let mut prev_was_backslash = false; + let mut inside_quotes = false; + let mut inside_backticks = false; + + let mut first_pound_chr_idx = line.len(); + + for (idx, chr) in line.char_indices() { + if chr == '\\' { + prev_was_backslash = true; + continue; + } else if chr == '"' { + // If the quote is escaped, do nothing + if !prev_was_backslash { + inside_quotes = !inside_quotes; + } + } else if chr == '`' { + // If the backtick is escaped, do nothing + if !prev_was_backslash { + inside_backticks = !inside_backticks; + } + } else if chr == '#' { + if !inside_quotes && !inside_backticks { + first_pound_chr_idx = idx; + break; + } + } + + // We call `continue` when we run into a backslash; here, we handle all the other + // characters, which clearly *aren't* a backslash + prev_was_backslash = false; } + + &line[0..first_pound_chr_idx] } #[cfg(test)] @@ -1073,5 +1100,30 @@ mod tests { "directive " ); assert_eq!(strip_comments("directive#comment"), "directive"); + + // ignores # characters inside double quotes (#652) + let expected = r#"highlight article "[-=+#_*~]{3,}.*" green default"#; + let input = expected.to_owned() + "# this is a comment"; + assert_eq!(strip_comments(&input), expected); + + let expected = + r#"highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold"#; + let input = expected.to_owned() + "#heresacomment"; + assert_eq!(strip_comments(&input), expected); + + // Escaped double quote inside double quotes is not treated as closing quote + let expected = r#"test "here \"goes # nothing\" etc" hehe"#; + let input = expected.to_owned() + "# and here is a comment"; + assert_eq!(strip_comments(&input), expected); + + // Ignores # characters inside backticks + let expected = r#"one `two # three` four"#; + let input = expected.to_owned() + "# and a comment, of course"; + assert_eq!(strip_comments(&input), expected); + + // Escaped backtick inside backticks is not treated as closing + let expected = r#"some `other \` tricky # test` hehe"#; + let input = expected.to_owned() + "#here goescomment"; + assert_eq!(strip_comments(&input), expected); } } diff --git a/test/utils.cpp b/test/utils.cpp index e9c8c587..97f7b0b6 100644 --- a/test/utils.cpp +++ b/test/utils.cpp @@ -219,6 +219,64 @@ TEST_CASE( } } +TEST_CASE("strip_comments ignores # characters inside double quotes", + "[utils][issue652]") +{ + SECTION("Real-world cases from issue 652") { + const auto expected1 = + std::string(R"#(highlight article "[-=+#_*~]{3,}.*" green default)#"); + const auto input1 = expected1 + "# this is a comment"; + REQUIRE(utils::strip_comments(input1) == expected1); + + const auto expected2 = + std::string(R"#(highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold)#"); + const auto input2 = expected2 + "#heresacomment"; + REQUIRE(utils::strip_comments(input2) == expected2); + } + + SECTION("Escaped double quote inside double quotes is not treated " + "as closing quote") + { + const auto expected = + std::string(R"#(test "here \"goes # nothing\" etc" hehe)#"); + const auto input = expected + "# and here is a comment"; + REQUIRE(utils::strip_comments(input) == expected); + } +} + +TEST_CASE("strip_comments ignores # characters inside backticks", "[utils]") +{ + SECTION("Simple case") { + const auto expected = std::string(R"#(one `two # three` four)#"); + const auto input = expected + "# and a comment, of course"; + REQUIRE(utils::strip_comments(input) == expected); + } + + SECTION("Escaped backtick inside backticks is not treated as closing") + { + const auto expected = + std::string(R"#(some `other \` tricky # test` hehe)#"); + const auto input = expected + "#here goescomment"; + REQUIRE(utils::strip_comments(input) == expected); + } +} + +TEST_CASE("strip_comments is not confused by nested double quotes and backticks", + "[utils]") +{ + { + const auto expected = std::string(R"#("`" ... ` )#"); + const auto input = expected + "#comment"; + REQUIRE(utils::strip_comments(input) == expected); + } + + { + const auto expected = std::string(R"#(aaa ` bbb "ccc ddd" e` dd)#"); + const auto input = expected + "# a comment string"; + REQUIRE(utils::strip_comments(input) == expected); + } +} + TEST_CASE( "consolidate_whitespace replaces multiple consecutive" "whitespace with a single space", -- cgit v1.2.3 From 2b6bcaa85581e2cf58e1a2581d073ff3d12fad0c Mon Sep 17 00:00:00 2001 From: Alexander Batischev Date: Sun, 29 Sep 2019 21:18:14 +0300 Subject: Test how evaluate_backtick() deals with unbalanced backticks --- test/configparser.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/configparser.cpp b/test/configparser.cpp index 1133090d..0757d027 100644 --- a/test/configparser.cpp +++ b/test/configparser.cpp @@ -59,6 +59,16 @@ TEST_CASE("evaluate_backticks replaces command in backticks with its output", REQUIRE_NOTHROW(cfgparser.parse("data/config-space-backticks")); REQUIRE_FALSE(keys.get_operation("s", "all") == OP_NIL); } + + SECTION("Unbalanced backtick does *not* start a command") + { + const auto input1 = std::string("one `echo two three"); + REQUIRE(ConfigParser::evaluate_backticks(input1) == input1); + + const auto input2 = std::string("this `echo is a` test `here"); + const auto expected2 = std::string("this is a test `here"); + REQUIRE(ConfigParser::evaluate_backticks(input2) == expected2); + } } TEST_CASE("\"unbind-key -a\" removes all key bindings", "[ConfigParser]") -- cgit v1.2.3 From 7f71a13f1c99b5ac5c218f2a1a50a8545cfee7e9 Mon Sep 17 00:00:00 2001 From: Alexander Batischev Date: Sun, 29 Sep 2019 21:19:13 +0300 Subject: Test how backticks inside double quotes work --- rust/libnewsboat/src/utils.rs | 1 + test/configparser.cpp | 18 ++++++++++++++++++ test/utils.cpp | 1 + 3 files changed, 20 insertions(+) diff --git a/rust/libnewsboat/src/utils.rs b/rust/libnewsboat/src/utils.rs index c28047d9..54328072 100644 --- a/rust/libnewsboat/src/utils.rs +++ b/rust/libnewsboat/src/utils.rs @@ -865,6 +865,7 @@ mod tests { get_command_output("a-program-that-is-guaranteed-to-not-exists"), "".to_string() ); + assert_eq!(get_command_output("echo c\" d e"), "".to_string()); } #[test] diff --git a/test/configparser.cpp b/test/configparser.cpp index 0757d027..f50625d8 100644 --- a/test/configparser.cpp +++ b/test/configparser.cpp @@ -69,6 +69,24 @@ TEST_CASE("evaluate_backticks replaces command in backticks with its output", const auto expected2 = std::string("this is a test `here"); REQUIRE(ConfigParser::evaluate_backticks(input2) == expected2); } + + // One might think that putting one or both backticks inside a string will + // "escape" them, the same way as backslash does. But it doesn't, and + // shouldn't: when parsing a config, we need to evaluate *all* commands + // there are, no matter where they're placed. + SECTION("Backticks inside double quotes are not ignored") + { + const auto input1 = std::string(R"#("`echo hello`")#"); + REQUIRE(ConfigParser::evaluate_backticks(input1) == R"#("hello")#"); + + const auto input2 = std::string(R"#(a "b `echo c" d e` f)#"); + // The line above asks the shell to run 'echo c" d e', which is an + // invalid command--the double quotes are not closed. The standard + // output of that command would be empty, so nothing will be inserted + // in place of backticks. + const auto expected2 = std::string(R"#(a "b f)#"); + REQUIRE(ConfigParser::evaluate_backticks(input2) == expected2); + } } TEST_CASE("\"unbind-key -a\" removes all key bindings", "[ConfigParser]") diff --git a/test/utils.cpp b/test/utils.cpp index 97f7b0b6..b1dd9b50 100644 --- a/test/utils.cpp +++ b/test/utils.cpp @@ -307,6 +307,7 @@ TEST_CASE("get_command_output()", "[utils]") "a-program-that-is-guaranteed-to-not-exists")); REQUIRE(utils::get_command_output( "a-program-that-is-guaranteed-to-not-exists") == ""); + REQUIRE(utils::get_command_output("echo c\" d e") == ""); } TEST_CASE("extract_filter()", "[utils]") -- cgit v1.2.3 From 43a8e9effec37af18b037cd3dce57a601af7b8ab Mon Sep 17 00:00:00 2001 From: Alexander Batischev Date: Sun, 29 Sep 2019 21:35:26 +0300 Subject: Test how tokenize_quoted treats pound sign --- test/utils.cpp | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/test/utils.cpp b/test/utils.cpp index b1dd9b50..b5e9c037 100644 --- a/test/utils.cpp +++ b/test/utils.cpp @@ -150,6 +150,62 @@ TEST_CASE("tokenize_quoted() doesn't un-escape escaped backticks", "[utils]") REQUIRE(tokens[1] == "\\`foobar `bla`\\`"); } +TEST_CASE("tokenize_quoted stops tokenizing once it found a # character " + "(outside of double quotes)", + "[utils]") +{ + std::vector tokens; + + SECTION("A string consisting of just a comment") + { + tokens = utils::tokenize_quoted("# just a comment"); + REQUIRE(tokens.empty()); + } + + SECTION("A string with one quoted substring") + { + tokens = utils::tokenize_quoted(R"#("a test substring" # !!!)#"); + REQUIRE(tokens.size() == 1); + REQUIRE(tokens[0] == "a test substring"); + } + + SECTION("A string with two quoted substrings") + { + tokens = utils::tokenize_quoted(R"#("first sub" "snd" # comment)#"); + REQUIRE(tokens.size() == 2); + REQUIRE(tokens[0] == "first sub"); + REQUIRE(tokens[1] == "snd"); + } + + SECTION("A comment containing # character") + { + tokens = utils::tokenize_quoted(R"#(one # a comment with # char)#"); + REQUIRE(tokens.size() == 1); + REQUIRE(tokens[0] == "one"); + } + + SECTION("A # character inside quoted substring is ignored") + { + tokens = utils::tokenize_quoted(R"#(this "will # be" ignored)#"); + REQUIRE(tokens.size() == 3); + REQUIRE(tokens[0] == "this"); + REQUIRE(tokens[1] == "will # be"); + REQUIRE(tokens[2] == "ignored"); + } +} + +TEST_CASE("tokenize_quoted does not consider escaped pound sign (\\#) " + "a beginning of a comment", + "[utils]") +{ + const auto tokens = utils::tokenize_quoted(R"#(one \# two three # ???)#"); + REQUIRE(tokens.size() == 4); + REQUIRE(tokens[0] == "one"); + REQUIRE(tokens[1] == "\\#"); + REQUIRE(tokens[2] == "two"); + REQUIRE(tokens[3] == "three"); +} + TEST_CASE("tokenize_nl() split a string into delimiters and fields", "[utils]") { std::vector tokens; -- cgit v1.2.3 From b4af58a1f2f3eb97668d9e2ed4c9558ff2a8d09b Mon Sep 17 00:00:00 2001 From: Alexander Batischev Date: Sun, 29 Sep 2019 21:52:42 +0300 Subject: Ignore escaped pound characters --- rust/libnewsboat/src/utils.rs | 7 ++++++- test/utils.cpp | 8 ++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/rust/libnewsboat/src/utils.rs b/rust/libnewsboat/src/utils.rs index 54328072..5a79d94b 100644 --- a/rust/libnewsboat/src/utils.rs +++ b/rust/libnewsboat/src/utils.rs @@ -569,7 +569,7 @@ pub fn strip_comments(line: &str) -> &str { inside_backticks = !inside_backticks; } } else if chr == '#' { - if !inside_quotes && !inside_backticks { + if !prev_was_backslash && !inside_quotes && !inside_backticks { first_pound_chr_idx = idx; break; } @@ -1126,5 +1126,10 @@ mod tests { let expected = r#"some `other \` tricky # test` hehe"#; let input = expected.to_owned() + "#here goescomment"; assert_eq!(strip_comments(&input), expected); + + // Ignores escaped # characters (\\#) + let expected = r#"one two \# three four"#; + let input = expected.to_owned() + "# and a comment"; + assert_eq!(strip_comments(&input), expected); } } diff --git a/test/utils.cpp b/test/utils.cpp index b5e9c037..7469dfc0 100644 --- a/test/utils.cpp +++ b/test/utils.cpp @@ -275,6 +275,14 @@ TEST_CASE( } } +TEST_CASE("strip_comments ignores escaped # characters (\\#)") +{ + const auto expected = + std::string(R"#(one two \# three four)#"); + const auto input = expected + "# and a comment"; + REQUIRE(utils::strip_comments(input) == expected); +} + TEST_CASE("strip_comments ignores # characters inside double quotes", "[utils][issue652]") { -- cgit v1.2.3 From 649d56548c5c2449ef3f4e371bb7aa50c7c655f3 Mon Sep 17 00:00:00 2001 From: Alexander Batischev Date: Sun, 29 Sep 2019 21:54:44 +0300 Subject: Document that pound char can be escaped in config --- doc/chapter-firststeps.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/chapter-firststeps.txt b/doc/chapter-firststeps.txt index 725ad49c..01e71a50 100644 --- a/doc/chapter-firststeps.txt +++ b/doc/chapter-firststeps.txt @@ -92,12 +92,12 @@ also contain comments, which start with the `#` character and go as far as the end of line. If you need to enter a configuration argument that contains spaces, use quotes (`"`) around the whole argument. It's even possible to integrate the output of external commands into the configuration. The text -between two backticks (+\`+) is evaluated as shell command, and its output is -put on its place instead. This works like backtick evaluation in +between two backticks (+`+) is evaluated as shell command, and its output +is put on its place instead. This works like backtick evaluation in Bourne-compatible shells and allows users to use external information from the -system within the configuration. Backticks can be escaped with a backslash -(+\`+); in that case, they'll be replaced with a literal backtick in the -configuration. +system within the configuration. Backticks and `#` characters can be escaped +with a backslash (e.g. `\`` and `\#`); in that case, they'll be replaced with +literal +`+ or `#` in the configuration. Searching for articles is possible in newsboat, too. Just press the "/" key, enter your search phrase, and the title and content of all articles are -- cgit v1.2.3 From 6b080e68d6d255684321a8dca669fd850152c75b Mon Sep 17 00:00:00 2001 From: Alexander Batischev Date: Sun, 29 Sep 2019 22:15:15 +0300 Subject: Double quotes inside backticks do not start a substring Had to tweak an existing test, because it codified the wrong behaviour. --- rust/libnewsboat/src/utils.rs | 4 ++-- test/utils.cpp | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/rust/libnewsboat/src/utils.rs b/rust/libnewsboat/src/utils.rs index 5a79d94b..d1ccca80 100644 --- a/rust/libnewsboat/src/utils.rs +++ b/rust/libnewsboat/src/utils.rs @@ -559,8 +559,8 @@ pub fn strip_comments(line: &str) -> &str { prev_was_backslash = true; continue; } else if chr == '"' { - // If the quote is escaped, do nothing - if !prev_was_backslash { + // If the quote is escaped or we're inside backticks, do nothing + if !prev_was_backslash && !inside_backticks { inside_quotes = !inside_quotes; } } else if chr == '`' { diff --git a/test/utils.cpp b/test/utils.cpp index 7469dfc0..f516f65c 100644 --- a/test/utils.cpp +++ b/test/utils.cpp @@ -329,7 +329,7 @@ TEST_CASE("strip_comments is not confused by nested double quotes and backticks" "[utils]") { { - const auto expected = std::string(R"#("`" ... ` )#"); + const auto expected = std::string(R"#("`" ... ` `"` ")#"); const auto input = expected + "#comment"; REQUIRE(utils::strip_comments(input) == expected); } @@ -339,6 +339,13 @@ TEST_CASE("strip_comments is not confused by nested double quotes and backticks" const auto input = expected + "# a comment string"; REQUIRE(utils::strip_comments(input) == expected); } + + { + const auto expected = + std::string(R"#(option "this `weird " command` for value")#"); + const auto input = expected + "#and a comment"; + REQUIRE(utils::strip_comments(input) == expected); + } } TEST_CASE( -- cgit v1.2.3