summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarkus Unterwaditzer <markus@unterwaditzer.net>2016-02-20 19:32:30 +0100
committerMarkus Unterwaditzer <markus@unterwaditzer.net>2016-02-20 19:32:30 +0100
commit8d4971ef24b34d67ac91152aa6048de1d75362b6 (patch)
tree0282ab8afa9d8725f1d293e5493cc85f6c01ab4c
parent10b30e94ba8de939e052839f9b2a4a832521721e (diff)
parent0cb9d2c3987fae3119f59b7c3ef74f9596f355e2 (diff)
Merge pull request #9 from gracinet/master
Generalized line unfolding in parser (issue #8)
-rw-r--r--src/vobject/lib.rs170
-rw-r--r--tests/lib.rs35
2 files changed, 176 insertions, 29 deletions
diff --git a/src/vobject/lib.rs b/src/vobject/lib.rs
index 90343c4..04ce8e3 100644
--- a/src/vobject/lib.rs
+++ b/src/vobject/lib.rs
@@ -127,8 +127,38 @@ impl<'s> Parser<'s> {
}
}
- fn peek(&self) -> Option<char> {
- self.input[self.pos..].chars().next()
+ /// look-ahead for next char at given offset from current position
+ /// (self.pos), taking [line unfolding]
+ /// (https://tools.ietf.org/html/rfc5545#section-3.1) into account,
+ /// without actually
+ /// consuming it (immutable self).
+ ///
+ /// Return an option for next char, and needed increment to consume it
+ /// from current position.
+ /// CR characters get always skipped, resulting in CRLF to be simplified as
+ /// LF, which seems to be acceptable because
+ /// - the remainders of the lib do accept a lone LF as a line termination
+ /// (a bit laxer than RFC 5545)
+ /// - CR alone [is not acceptable content]
+ /// (https://tools.ietf.org/html/rfc5545#section-3.1)
+ fn peek_at(&self, at: usize) -> Option<(char, usize)> {
+ match self.input[self.pos+at..].chars().next() {
+ None => None,
+ Some('\r') => self.peek_at(at + 1),
+ Some('\n') => {
+ match self.peek_at(at + 1) {
+ Some((' ', offset)) | Some(('\t', offset)) =>
+ self.peek_at(offset),
+ _ => Some(('\n', at + 1))
+ }
+ },
+ Some(x) => { Some((x, at + x.len_utf8())) }
+ }
+ }
+
+ #[inline]
+ fn peek(&self) -> Option<(char, usize)> {
+ self.peek_at(0)
}
pub fn eof(&self) -> bool {
@@ -137,7 +167,7 @@ impl<'s> Parser<'s> {
fn assert_char(&self, c: char) -> ParseResult<()> {
let real_c = match self.peek() {
- Some(x) => x,
+ Some((x, _)) => x,
None => return Err(ParseError::new(format!("Expected {}, found EOL", c))),
};
@@ -150,11 +180,20 @@ impl<'s> Parser<'s> {
fn consume_char(&mut self) -> Option<char> {
match self.peek() {
- Some(x) => { self.pos += x.len_utf8(); Some(x) },
+ Some((c, offset)) => { self.pos += offset; Some(c) },
None => None
}
}
+ /// If next peeked char is the given `c`, consume it and return `true`,
+ /// otherwise return `false`.
+ fn consume_only_char(&mut self, c: char) -> bool {
+ match self.peek() {
+ Some((d, offset)) if d == c => {self.pos += offset; true},
+ _ => false
+ }
+ }
+
fn consume_eol(&mut self) -> ParseResult<()> {
let start_pos = self.pos;
@@ -185,12 +224,39 @@ impl<'s> Parser<'s> {
Ok(())
}
- fn consume_while<'a, F: Fn(char) -> bool>(&'a mut self, test: F) -> &'a str {
- let start_pos = self.pos;
- while !self.eof() && test(self.peek().unwrap()) {
- self.consume_char();
+ // GR this used to return just a slice from input, but line unfolding
+ // makes it contradictory, unless one'd want to rescan everything.
+ // Since actually useful calls used to_owned() on the result, which
+ // does copy into a String's buffer, let's create a String right away
+ // implementation detail : instead of pushing char after char, we
+ // do it by the biggest contiguous slices possible, because I believe it
+ // to be more efficient (less checks for reallocation etc).
+ fn consume_while<F: Fn(char) -> bool>(&mut self, test: F) -> String {
+ let mut sl_start_pos = self.pos;
+ let mut res = String::new();
+ while !self.eof() {
+ match self.peek() {
+ Some((c, offset)) => {
+ if !test(c) {
+ break
+ } else {
+ if offset > c.len_utf8() {
+ // we have some skipping and therefore need to flush
+ res.push_str(&self.input[sl_start_pos..self.pos]);
+ res.push(c);
+ sl_start_pos = self.pos + offset;
+ }
+ self.pos += offset;
+ }
+ },
+ _ => break
+ }
}
- &self.input[start_pos..self.pos]
+ // Final flush
+ if sl_start_pos < self.pos {
+ res.push_str(&self.input[sl_start_pos..self.pos])
+ }
+ res
}
pub fn consume_property(&mut self) -> ParseResult<Property> {
@@ -216,7 +282,7 @@ impl<'s> Parser<'s> {
if rv.len() == 0 {
Err(ParseError::new("No property name found."))
} else {
- Ok(rv.to_owned())
+ Ok(rv)
}
}
@@ -240,16 +306,8 @@ impl<'s> Parser<'s> {
}
fn consume_property_value<'a>(&'a mut self) -> ParseResult<String> {
- let mut rv = String::new();
- loop {
- rv.push_str(self.consume_while(|x| x != '\r' && x != '\n'));
- try!(self.sloppy_terminate_line());
-
- match self.peek() {
- Some(' ') | Some('\t') => self.consume_char(),
- _ => break,
- };
- }
+ let rv = self.consume_while(|x| x != '\r' && x != '\n');
+ try!(self.sloppy_terminate_line());
Ok(rv)
}
@@ -269,22 +327,20 @@ impl<'s> Parser<'s> {
x > '\u{1F}'
};
- if self.peek() == Some('"') {
- self.consume_char();
- let rv = self.consume_while(qsafe).to_owned();
+ if self.consume_only_char('"') {
+ let rv = self.consume_while(qsafe);
try!(self.assert_char('"'));
self.consume_char();
Ok(rv)
} else {
- Ok(self.consume_while(|x| qsafe(x) && x != ';' && x != ':').to_owned())
+ Ok(self.consume_while(|x| qsafe(x) && x != ';' && x != ':'))
}
}
fn consume_param<'a>(&'a mut self) -> ParseResult<(String, String)> {
let name = try!(self.consume_param_name());
- let value = if self.peek() == Some('=') {
- let start_pos = self.pos;
- self.consume_char();
+ let start_pos = self.pos;
+ let value = if self.consume_only_char('=') {
match self.consume_param_value() {
Ok(x) => x,
Err(e) => { self.pos = start_pos; return Err(e); }
@@ -298,8 +354,7 @@ impl<'s> Parser<'s> {
fn consume_params(&mut self) -> HashMap<String, String> {
let mut rv: HashMap<String, String> = HashMap::new();
- while self.peek() == Some(';') {
- self.consume_char();
+ while self.consume_only_char(';') {
match self.consume_param() {
Ok((name, value)) => { rv.insert(name.to_owned(), value.to_owned()); },
Err(_) => break,
@@ -470,3 +525,60 @@ impl ParseError {
self.desc
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::Parser;
+
+ #[test]
+ fn test_unfold1() {
+ let mut p = Parser{input: "ab\r\n c", pos: 2};
+ assert_eq!(p.consume_char(), Some('c'));
+ assert_eq!(p.pos, 6);
+ }
+
+ #[test]
+ fn test_unfold2() {
+ let mut p = Parser{input: "ab\n\tc\nx", pos: 2};
+ assert_eq!(p.consume_char(), Some('c'));
+ assert_eq!(p.consume_char(), Some('\n'));
+ assert_eq!(p.consume_char(), Some('x'));
+ }
+
+ #[test]
+ fn test_consume_while() {
+ let mut p = Parser{input:"af\n oo:bar", pos: 1};
+ assert_eq!(p.consume_while(|x| x != ':'), "foo");
+ assert_eq!(p.consume_char(), Some(':'));
+ assert_eq!(p.consume_while(|x| x != '\n'), "bar");
+ }
+
+ #[test]
+ fn test_consume_while2() {
+ let mut p = Parser{input:"af\n oo\n\t:bar", pos: 1};
+ assert_eq!(p.consume_while(|x| x != ':'), "foo");
+ assert_eq!(p.consume_char(), Some(':'));
+ assert_eq!(p.consume_while(|x| x != '\n'), "bar");
+ }
+
+ #[test]
+ fn test_consume_while3() {
+ let mut p = Parser{input:"af\n oo:\n bar", pos: 1};
+ assert_eq!(p.consume_while(|x| x != ':'), "foo");
+ assert_eq!(p.consume_char(), Some(':'));
+ assert_eq!(p.consume_while(|x| x != '\n'), "bar");
+ }
+
+ #[test]
+ fn test_consume_only_char() {
+ let mut p = Parser{input:"\n \"bar", pos: 0};
+ assert!(p.consume_only_char('"'));
+ assert_eq!(p.pos, 3);
+ assert!(!p.consume_only_char('"'));
+ assert_eq!(p.pos, 3);
+ assert!(p.consume_only_char('b'));
+ assert_eq!(p.pos, 4);
+ }
+
+}
+
diff --git a/tests/lib.rs b/tests/lib.rs
index ea5c499..d5e4f24 100644
--- a/tests/lib.rs
+++ b/tests/lib.rs
@@ -40,6 +40,8 @@ fn test_line_cont() {
VERSION:2.1\n\
N;ENCODING=QUOTED-PRINTABLE:Nikdo;Nikdo=\n\t\
vic\n\
+ FN;ENCODING=QUOTED-PRINT\n \
+ ABLE:Alice;Alice=vic\n\
NOTE:This ends with equal sign=\n\
TEL;WORK:5555\n \
4444\n\
@@ -48,6 +50,7 @@ fn test_line_cont() {
assert_eq!(item.name, s!("VCARD"));
assert_eq!(item.single_prop("TEL").unwrap().raw_value, s!("55554444"));
assert_eq!(item.single_prop("N").unwrap().raw_value, s!("Nikdo;Nikdo=vic"));
+ assert_eq!(item.single_prop("FN").unwrap().raw_value, s!("Alice;Alice=vic"));
}
#[test]
@@ -81,6 +84,38 @@ fn test_icalendar_basic() {
}
#[test]
+fn test_icalendar_multline() {
+ // Adapted from a very popular provider's export
+ // this used to give ParseError { desc: "Expected :, found \n" }
+ let event = parse_component(
+ "BEGIN:VEVENT\n\
+ ATTENDEE;CUTYPE=INDIVIDUAL;ROLE=REQ-PARTICIPANT;PARTSTAT=ACCEPTED;CN=Jo\n \
+ hn Doe;X-NUM-GUESTS=0:mailto:jd@cal.test\n\
+ SUMMARY:Important meeting\n\
+ END:VEVENT\n").unwrap();
+
+ assert_eq!(event.name, s!("VEVENT"));
+ assert_eq!(event.single_prop("SUMMARY").unwrap().raw_value,
+ s!("Important meeting"));
+}
+
+#[test]
+fn test_icalendar_multline2() {
+ // Adapted from a very popular provider's export
+ // this used to give ParseError { desc: "No property name found." }
+ let event = parse_component(
+ "BEGIN:VCALENDAR\n\
+ BEGIN:VEVENT\n\
+ ATTENDEE;CUTYPE=INDIVIDUAL;ROLE=REQ-PARTICIPANT;PARTSTAT=ACCEPTED;CN=Jo\n \
+ hn Doe;X-NUM-GUESTS=0:mailto:jd@cal.test\n\
+ SUMMARY:Important meeting\n\
+ END:VEVENT\n\
+ END:VCALENDAR\n").unwrap();
+
+ assert_eq!(event.name, s!("VCALENDAR"));
+}
+
+#[test]
fn test_escaping() {
let item = parse_component(
"BEGIN:VCALENDAR\n\