summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Batischev <eual.jp@gmail.com>2019-10-02 16:30:18 +0300
committerGitHub <noreply@github.com>2019-10-02 16:30:18 +0300
commitb3d0b966b09d1ca6cdc70e8fa7ad614041ef5bff (patch)
treea156bbfea2697c76b0d851f8f2957151cb4409fc
parentc63da29f70de21f045a1e40d45bcbf332ef71a4c (diff)
parent6b080e68d6d255684321a8dca669fd850152c75b (diff)
Merge pull request #658 from newsboat/bugfix/652-quoted-pound-sign-treated-as-comment
Ignore # characters inside double quotes and backticks Fixes #652.
-rw-r--r--doc/chapter-firststeps.txt10
-rw-r--r--include/rs_utils.h2
-rw-r--r--rust/libnewsboat-ffi/src/utils.rs15
-rw-r--r--rust/libnewsboat/src/utils.rs90
-rw-r--r--src/utils.cpp9
-rw-r--r--test/configparser.cpp28
-rw-r--r--test/utils.cpp130
7 files changed, 271 insertions, 13 deletions
diff --git a/doc/chapter-firststeps.txt b/doc/chapter-firststeps.txt
index 725ad49c..01e71a50 100644
--- a/doc/chapter-firststeps.txt
+++ b/doc/chapter-firststeps.txt
@@ -92,12 +92,12 @@ also contain comments, which start with the `#` character and go as far as the
end of line. If you need to enter a configuration argument that contains
spaces, use quotes (`"`) around the whole argument. It's even possible to
integrate the output of external commands into the configuration. The text
-between two backticks (+\`+) is evaluated as shell command, and its output is
-put on its place instead. This works like backtick evaluation in
+between two backticks (+&#96;+) is evaluated as shell command, and its output
+is put on its place instead. This works like backtick evaluation in
Bourne-compatible shells and allows users to use external information from the
-system within the configuration. Backticks can be escaped with a backslash
-(+\`+); in that case, they'll be replaced with a literal backtick in the
-configuration.
+system within the configuration. Backticks and `#` characters can be escaped
+with a backslash (e.g. `\`` and `\#`); in that case, they'll be replaced with
+literal +&#96;+ or `#` in the configuration.
Searching for articles is possible in newsboat, too. Just press the "/" key,
enter your search phrase, and the title and content of all articles are
diff --git a/include/rs_utils.h b/include/rs_utils.h
index b35468ff..cbef8e0a 100644
--- a/include/rs_utils.h
+++ b/include/rs_utils.h
@@ -82,6 +82,8 @@ unsigned int rs_newsboat_version_major();
int rs_mkdir_parents(const char* path, const std::uint32_t mode);
+char* rs_strip_comments(const char* line);
+
class RustString {
private:
char* str;
diff --git a/rust/libnewsboat-ffi/src/utils.rs b/rust/libnewsboat-ffi/src/utils.rs
index 1da94c53..b75c94dc 100644
--- a/rust/libnewsboat-ffi/src/utils.rs
+++ b/rust/libnewsboat-ffi/src/utils.rs
@@ -489,3 +489,18 @@ pub extern "C" fn rs_program_version() -> *mut c_char {
pub extern "C" fn rs_newsboat_version_major() -> u32 {
abort_on_panic(|| utils::newsboat_major_version())
}
+
+#[no_mangle]
+pub unsafe extern "C" fn rs_strip_comments(line: *const c_char) -> *mut c_char {
+ abort_on_panic(|| {
+ let line = CStr::from_ptr(line);
+ let line = line.to_str().expect("line contained invalid UTF-8");
+
+ let result = utils::strip_comments(line);
+
+ // `result` contains a subset of `line`, which is a C string. Thus, we conclude that
+ // `result` doesn't contain null bytes. Therefore, `CString::new` always returns `Some`.
+ let result = CString::new(result).unwrap();
+ result.into_raw()
+ })
+}
diff --git a/rust/libnewsboat/src/utils.rs b/rust/libnewsboat/src/utils.rs
index 91ee9cfe..d1ccca80 100644
--- a/rust/libnewsboat/src/utils.rs
+++ b/rust/libnewsboat/src/utils.rs
@@ -545,6 +545,44 @@ pub fn newsboat_major_version() -> u32 {
env!("CARGO_PKG_VERSION_MAJOR").parse::<u32>().unwrap()
}
+/// Returns the part of the string before first # character (or the whole input string if there are
+/// no # character in it). Pound characters inside double quotes and backticks are ignored.
+pub fn strip_comments(line: &str) -> &str {
+ let mut prev_was_backslash = false;
+ let mut inside_quotes = false;
+ let mut inside_backticks = false;
+
+ let mut first_pound_chr_idx = line.len();
+
+ for (idx, chr) in line.char_indices() {
+ if chr == '\\' {
+ prev_was_backslash = true;
+ continue;
+ } else if chr == '"' {
+ // If the quote is escaped or we're inside backticks, do nothing
+ if !prev_was_backslash && !inside_backticks {
+ inside_quotes = !inside_quotes;
+ }
+ } else if chr == '`' {
+ // If the backtick is escaped, do nothing
+ if !prev_was_backslash {
+ inside_backticks = !inside_backticks;
+ }
+ } else if chr == '#' {
+ if !prev_was_backslash && !inside_quotes && !inside_backticks {
+ first_pound_chr_idx = idx;
+ break;
+ }
+ }
+
+ // We call `continue` when we run into a backslash; here, we handle all the other
+ // characters, which clearly *aren't* a backslash
+ prev_was_backslash = false;
+ }
+
+ &line[0..first_pound_chr_idx]
+}
+
#[cfg(test)]
mod tests {
extern crate tempfile;
@@ -827,6 +865,7 @@ mod tests {
get_command_output("a-program-that-is-guaranteed-to-not-exists"),
"".to_string()
);
+ assert_eq!(get_command_output("echo c\" d e"), "".to_string());
}
#[test]
@@ -1042,4 +1081,55 @@ mod tests {
assert_eq!(strnaturalcmp("aa10", "aa2"), Ordering::Greater);
}
+
+ #[test]
+ fn t_strip_comments() {
+ // no comments in line
+ assert_eq!(strip_comments(""), "");
+ assert_eq!(strip_comments("\t\n"), "\t\n");
+ assert_eq!(strip_comments("some directive "), "some directive ");
+
+ // fully commented line
+ assert_eq!(strip_comments("#"), "");
+ assert_eq!(strip_comments("# #"), "");
+ assert_eq!(strip_comments("# comment"), "");
+
+ // partially commented line
+ assert_eq!(strip_comments("directive # comment"), "directive ");
+ assert_eq!(
+ strip_comments("directive # comment # another"),
+ "directive "
+ );
+ assert_eq!(strip_comments("directive#comment"), "directive");
+
+ // ignores # characters inside double quotes (#652)
+ let expected = r#"highlight article "[-=+#_*~]{3,}.*" green default"#;
+ let input = expected.to_owned() + "# this is a comment";
+ assert_eq!(strip_comments(&input), expected);
+
+ let expected =
+ r#"highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold"#;
+ let input = expected.to_owned() + "#heresacomment";
+ assert_eq!(strip_comments(&input), expected);
+
+ // Escaped double quote inside double quotes is not treated as closing quote
+ let expected = r#"test "here \"goes # nothing\" etc" hehe"#;
+ let input = expected.to_owned() + "# and here is a comment";
+ assert_eq!(strip_comments(&input), expected);
+
+ // Ignores # characters inside backticks
+ let expected = r#"one `two # three` four"#;
+ let input = expected.to_owned() + "# and a comment, of course";
+ assert_eq!(strip_comments(&input), expected);
+
+ // Escaped backtick inside backticks is not treated as closing
+ let expected = r#"some `other \` tricky # test` hehe"#;
+ let input = expected.to_owned() + "#here goescomment";
+ assert_eq!(strip_comments(&input), expected);
+
+ // Ignores escaped # characters (\\#)
+ let expected = r#"one two \# three four"#;
+ let input = expected.to_owned() + "# and a comment";
+ assert_eq!(strip_comments(&input), expected);
+ }
}
diff --git a/src/utils.cpp b/src/utils.cpp
index d7351425..9b50a492 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -78,14 +78,7 @@ void append_escapes(std::string& str, char c)
std::string utils::strip_comments(const std::string& line)
{
- /*
- * This functions returns only the non-comment part of the line,
- * which can then be safely evaluated and tokenized.
- *
- * If no comments are present on the line, the whole line is returned.
- */
- auto hash_pos = line.find_first_of("#");
- return line.substr(0, hash_pos);
+ return RustString(rs_strip_comments(line.c_str()));
}
std::vector<std::string> utils::tokenize_quoted(const std::string& str,
diff --git a/test/configparser.cpp b/test/configparser.cpp
index 1133090d..f50625d8 100644
--- a/test/configparser.cpp
+++ b/test/configparser.cpp
@@ -59,6 +59,34 @@ TEST_CASE("evaluate_backticks replaces command in backticks with its output",
REQUIRE_NOTHROW(cfgparser.parse("data/config-space-backticks"));
REQUIRE_FALSE(keys.get_operation("s", "all") == OP_NIL);
}
+
+ SECTION("Unbalanced backtick does *not* start a command")
+ {
+ const auto input1 = std::string("one `echo two three");
+ REQUIRE(ConfigParser::evaluate_backticks(input1) == input1);
+
+ const auto input2 = std::string("this `echo is a` test `here");
+ const auto expected2 = std::string("this is a test `here");
+ REQUIRE(ConfigParser::evaluate_backticks(input2) == expected2);
+ }
+
+ // One might think that putting one or both backticks inside a string will
+ // "escape" them, the same way as backslash does. But it doesn't, and
+ // shouldn't: when parsing a config, we need to evaluate *all* commands
+ // there are, no matter where they're placed.
+ SECTION("Backticks inside double quotes are not ignored")
+ {
+ const auto input1 = std::string(R"#("`echo hello`")#");
+ REQUIRE(ConfigParser::evaluate_backticks(input1) == R"#("hello")#");
+
+ const auto input2 = std::string(R"#(a "b `echo c" d e` f)#");
+ // The line above asks the shell to run 'echo c" d e', which is an
+ // invalid command--the double quotes are not closed. The standard
+ // output of that command would be empty, so nothing will be inserted
+ // in place of backticks.
+ const auto expected2 = std::string(R"#(a "b f)#");
+ REQUIRE(ConfigParser::evaluate_backticks(input2) == expected2);
+ }
}
TEST_CASE("\"unbind-key -a\" removes all key bindings", "[ConfigParser]")
diff --git a/test/utils.cpp b/test/utils.cpp
index e9c8c587..f516f65c 100644
--- a/test/utils.cpp
+++ b/test/utils.cpp
@@ -150,6 +150,62 @@ TEST_CASE("tokenize_quoted() doesn't un-escape escaped backticks", "[utils]")
REQUIRE(tokens[1] == "\\`foobar `bla`\\`");
}
+TEST_CASE("tokenize_quoted stops tokenizing once it found a # character "
+ "(outside of double quotes)",
+ "[utils]")
+{
+ std::vector<std::string> tokens;
+
+ SECTION("A string consisting of just a comment")
+ {
+ tokens = utils::tokenize_quoted("# just a comment");
+ REQUIRE(tokens.empty());
+ }
+
+ SECTION("A string with one quoted substring")
+ {
+ tokens = utils::tokenize_quoted(R"#("a test substring" # !!!)#");
+ REQUIRE(tokens.size() == 1);
+ REQUIRE(tokens[0] == "a test substring");
+ }
+
+ SECTION("A string with two quoted substrings")
+ {
+ tokens = utils::tokenize_quoted(R"#("first sub" "snd" # comment)#");
+ REQUIRE(tokens.size() == 2);
+ REQUIRE(tokens[0] == "first sub");
+ REQUIRE(tokens[1] == "snd");
+ }
+
+ SECTION("A comment containing # character")
+ {
+ tokens = utils::tokenize_quoted(R"#(one # a comment with # char)#");
+ REQUIRE(tokens.size() == 1);
+ REQUIRE(tokens[0] == "one");
+ }
+
+ SECTION("A # character inside quoted substring is ignored")
+ {
+ tokens = utils::tokenize_quoted(R"#(this "will # be" ignored)#");
+ REQUIRE(tokens.size() == 3);
+ REQUIRE(tokens[0] == "this");
+ REQUIRE(tokens[1] == "will # be");
+ REQUIRE(tokens[2] == "ignored");
+ }
+}
+
+TEST_CASE("tokenize_quoted does not consider escaped pound sign (\\#) "
+ "a beginning of a comment",
+ "[utils]")
+{
+ const auto tokens = utils::tokenize_quoted(R"#(one \# two three # ???)#");
+ REQUIRE(tokens.size() == 4);
+ REQUIRE(tokens[0] == "one");
+ REQUIRE(tokens[1] == "\\#");
+ REQUIRE(tokens[2] == "two");
+ REQUIRE(tokens[3] == "three");
+}
+
TEST_CASE("tokenize_nl() split a string into delimiters and fields", "[utils]")
{
std::vector<std::string> tokens;
@@ -219,6 +275,79 @@ TEST_CASE(
}
}
+TEST_CASE("strip_comments ignores escaped # characters (\\#)")
+{
+ const auto expected =
+ std::string(R"#(one two \# three four)#");
+ const auto input = expected + "# and a comment";
+ REQUIRE(utils::strip_comments(input) == expected);
+}
+
+TEST_CASE("strip_comments ignores # characters inside double quotes",
+ "[utils][issue652]")
+{
+ SECTION("Real-world cases from issue 652") {
+ const auto expected1 =
+ std::string(R"#(highlight article "[-=+#_*~]{3,}.*" green default)#");
+ const auto input1 = expected1 + "# this is a comment";
+ REQUIRE(utils::strip_comments(input1) == expected1);
+
+ const auto expected2 =
+ std::string(R"#(highlight all "(https?|ftp)://[\-\.,/%~_:?&=\#a-zA-Z0-9]+" blue default bold)#");
+ const auto input2 = expected2 + "#heresacomment";
+ REQUIRE(utils::strip_comments(input2) == expected2);
+ }
+
+ SECTION("Escaped double quote inside double quotes is not treated "
+ "as closing quote")
+ {
+ const auto expected =
+ std::string(R"#(test "here \"goes # nothing\" etc" hehe)#");
+ const auto input = expected + "# and here is a comment";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+}
+
+TEST_CASE("strip_comments ignores # characters inside backticks", "[utils]")
+{
+ SECTION("Simple case") {
+ const auto expected = std::string(R"#(one `two # three` four)#");
+ const auto input = expected + "# and a comment, of course";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+
+ SECTION("Escaped backtick inside backticks is not treated as closing")
+ {
+ const auto expected =
+ std::string(R"#(some `other \` tricky # test` hehe)#");
+ const auto input = expected + "#here goescomment";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+}
+
+TEST_CASE("strip_comments is not confused by nested double quotes and backticks",
+ "[utils]")
+{
+ {
+ const auto expected = std::string(R"#("`" ... ` `"` ")#");
+ const auto input = expected + "#comment";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+
+ {
+ const auto expected = std::string(R"#(aaa ` bbb "ccc ddd" e` dd)#");
+ const auto input = expected + "# a comment string";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+
+ {
+ const auto expected =
+ std::string(R"#(option "this `weird " command` for value")#");
+ const auto input = expected + "#and a comment";
+ REQUIRE(utils::strip_comments(input) == expected);
+ }
+}
+
TEST_CASE(
"consolidate_whitespace replaces multiple consecutive"
"whitespace with a single space",
@@ -249,6 +378,7 @@ TEST_CASE("get_command_output()", "[utils]")
"a-program-that-is-guaranteed-to-not-exists"));
REQUIRE(utils::get_command_output(
"a-program-that-is-guaranteed-to-not-exists") == "");
+ REQUIRE(utils::get_command_output("echo c\" d e") == "");
}
TEST_CASE("extract_filter()", "[utils]")