summaryrefslogtreecommitdiffstats
path: root/src/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/parser.rs')
-rw-r--r--src/parser.rs374
1 files changed, 374 insertions, 0 deletions
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..0276ec3
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,374 @@
+use std::collections::HashMap;
+
+use component::Component;
+use property::Property;
+use error::*;
+
+pub struct Parser<'s> {
+ pub input: &'s str,
+ pub pos: usize,
+}
+
+impl<'s> Parser<'s> {
+ pub fn new(input: &'s str) -> Self {
+ Parser {
+ input: input,
+ pos: 0,
+ }
+ }
+
+ /// look-ahead for next char at given offset from current position
+ /// (self.pos), taking [line unfolding]
+ /// (https://tools.ietf.org/html/rfc5545#section-3.1) into account,
+ /// without actually
+ /// consuming it (immutable self).
+ ///
+ /// Return an option for next char, and needed increment to consume it
+ /// from current position.
+ /// CR characters get always skipped, resulting in CRLF to be simplified as
+ /// LF, which seems to be acceptable because
+ /// - the remainders of the lib do accept a lone LF as a line termination
+ /// (a bit laxer than RFC 5545)
+ /// - CR alone [is not acceptable content]
+ /// (https://tools.ietf.org/html/rfc5545#section-3.1)
+ fn peek_at(&self, at: usize) -> Option<(char, usize)> {
+ match self.input[self.pos+at..].chars().next() {
+ None => None,
+ Some('\r') => self.peek_at(at + 1),
+ Some('\n') => {
+ match self.peek_at(at + 1) {
+ Some((' ', offset)) |
+ Some(('\t', offset)) => self.peek_at(offset),
+ _ => Some(('\n', at + 1)),
+ }
+ }
+ Some(x) => Some((x, at + x.len_utf8()))
+ }
+ }
+
+ #[inline]
+ fn peek(&self) -> Option<(char, usize)> {
+ self.peek_at(0)
+ }
+
+ pub fn eof(&self) -> bool {
+ self.pos >= self.input.len()
+ }
+
+ fn assert_char(&self, c: char) -> Result<()> {
+ let real_c = match self.peek() {
+ Some((x, _)) => x,
+ None => {
+ let kind = VObjectErrorKind::ParserError(format!("Expected {}, found EOL", c));
+ return Err(VObjectError::from_kind(kind))
+ }
+ };
+
+ if real_c != c {
+ let kind = VObjectErrorKind::ParserError(format!("Expected {}, found {}", c, real_c));
+ return Err(VObjectError::from_kind(kind))
+ };
+
+ Ok(())
+ }
+
+ pub fn consume_char(&mut self) -> Option<char> {
+ match self.peek() {
+ Some((c, offset)) => { self.pos += offset; Some(c) },
+ None => None
+ }
+ }
+
+ /// If next peeked char is the given `c`, consume it and return `true`,
+ /// otherwise return `false`.
+ pub fn consume_only_char(&mut self, c: char) -> bool {
+ match self.peek() {
+ Some((d, offset)) if d == c => { self.pos += offset; true },
+ _ => false
+ }
+ }
+
+ fn consume_eol(&mut self) -> Result<()> {
+ let start_pos = self.pos;
+
+ let consumed = match self.consume_char() {
+ Some('\n') => true,
+ Some('\r') => match self.consume_char() {
+ Some('\n') => true,
+ _ => false,
+ },
+ _ => false,
+ };
+
+ if consumed {
+ Ok(())
+ } else {
+ self.pos = start_pos;
+ let kind = VObjectErrorKind::ParserError("Expected EOL.".to_owned());
+ Err(VObjectError::from_kind(kind))
+ }
+ }
+
+ fn sloppy_terminate_line(&mut self) -> Result<()> {
+ if !self.eof() {
+ try!(self.consume_eol());
+ while let Ok(_) = self.consume_eol() {}
+ };
+
+ Ok(())
+ }
+
+ // GR this used to return just a slice from input, but line unfolding
+ // makes it contradictory, unless one'd want to rescan everything.
+ // Since actually useful calls used to_owned() on the result, which
+ // does copy into a String's buffer, let's create a String right away
+ // implementation detail : instead of pushing char after char, we
+ // do it by the biggest contiguous slices possible, because I believe it
+ // to be more efficient (less checks for reallocation etc).
+ pub fn consume_while<F: Fn(char) -> bool>(&mut self, test: F) -> String {
+ let mut sl_start_pos = self.pos;
+ let mut res = String::new();
+ while !self.eof() {
+ match self.peek() {
+ Some((c, offset)) => {
+ if !test(c) {
+ break
+ } else {
+ if offset > c.len_utf8() {
+ // we have some skipping and therefore need to flush
+ res.push_str(&self.input[sl_start_pos..self.pos]);
+ res.push(c);
+ sl_start_pos = self.pos + offset;
+ }
+ self.pos += offset;
+ }
+ },
+ _ => break
+ }
+ }
+ // Final flush
+ if sl_start_pos < self.pos {
+ res.push_str(&self.input[sl_start_pos..self.pos])
+ }
+ res
+ }
+
+ pub fn consume_property(&mut self) -> Result<Property> {
+ let group = self.consume_property_group().ok();
+ let name = try!(self.consume_property_name());
+ let params = self.consume_params();
+
+ try!(self.assert_char(':'));
+ self.consume_char();
+
+ let value = try!(self.consume_property_value());
+
+ Ok(Property {
+ name: name,
+ params: params,
+ raw_value: value,
+ prop_group: group,
+ })
+ }
+
+ fn consume_property_name(&mut self) -> Result<String> {
+ let rv = self.consume_while(|x| x == '-' || x.is_alphanumeric());
+ if rv.is_empty() {
+ let kind = VObjectErrorKind::ParserError("No property name found.".to_owned());
+ Err(VObjectError::from_kind(kind))
+ } else {
+ Ok(rv)
+ }
+ }
+
+ fn consume_property_group(&mut self) -> Result<String> {
+ let start_pos = self.pos;
+ let name = self.consume_property_name();
+
+ let e = match name {
+ Ok(name) => match self.assert_char('.') {
+ Ok(_) => {
+ self.consume_char();
+ return Ok(name);
+ },
+ Err(e) => Err(e),
+ },
+ Err(e) => Err(e),
+ };
+
+ self.pos = start_pos;
+ e
+ }
+
+ fn consume_property_value(&mut self) -> Result<String> {
+ let rv = self.consume_while(|x| x != '\r' && x != '\n');
+ try!(self.sloppy_terminate_line());
+ Ok(rv)
+ }
+
+ fn consume_param_name(&mut self) -> Result<String> {
+ match self.consume_property_name() {
+ Ok(x) => Ok(x),
+ Err(e) => {
+ let kind = VObjectErrorKind::ParserError(format!("No param name found: {}", e));
+ Err(VObjectError::from_kind(kind))
+ }
+ }
+ }
+
+ fn consume_param_value(&mut self) -> Result<String> {
+ let qsafe = |x| {
+ x != '"' &&
+ x != '\r' &&
+ x != '\n' &&
+ x != '\u{7F}' &&
+ x > '\u{1F}'
+ };
+
+ if self.consume_only_char('"') {
+ let rv = self.consume_while(qsafe);
+ try!(self.assert_char('"'));
+ self.consume_char();
+ Ok(rv)
+ } else {
+ Ok(self.consume_while(|x| qsafe(x) && x != ';' && x != ':'))
+ }
+ }
+
+ fn consume_param(&mut self) -> Result<(String, String)> {
+ let name = try!(self.consume_param_name());
+ let start_pos = self.pos;
+ let value = if self.consume_only_char('=') {
+ match self.consume_param_value() {
+ Ok(x) => x,
+ Err(e) => { self.pos = start_pos; return Err(e); }
+ }
+ } else {
+ String::new()
+ };
+
+ Ok((name, value))
+ }
+
+ fn consume_params(&mut self) -> HashMap<String, String> {
+ let mut rv: HashMap<String, String> = HashMap::new();
+ while self.consume_only_char(';') {
+ match self.consume_param() {
+ Ok((name, value)) => { rv.insert(name.to_owned(), value.to_owned()); },
+ Err(_) => break,
+ }
+ }
+ rv
+ }
+
+ pub fn consume_component(&mut self) -> Result<Component> {
+ let start_pos = self.pos;
+ let mut property = try!(self.consume_property());
+ if property.name != "BEGIN" {
+ self.pos = start_pos;
+ let kind = VObjectErrorKind::ParserError("Expected BEGIN tag.".to_owned());
+ return Err(VObjectError::from_kind(kind));
+ };
+
+ // Create a component with the name of the BEGIN tag's value
+ let mut component = Component::new(property.raw_value);
+
+ loop {
+ let previous_pos = self.pos;
+ property = try!(self.consume_property());
+ if property.name == "BEGIN" {
+ self.pos = previous_pos;
+ component.subcomponents.push(try!(self.consume_component()));
+ } else if property.name == "END" {
+ if property.raw_value != component.name {
+ self.pos = start_pos;
+ let s = format!("Mismatched tags: BEGIN:{} vs END:{}",
+ component.name,
+ property.raw_value);
+ let kind = VObjectErrorKind::ParserError(s);
+ return Err(VObjectError::from_kind(kind));
+ }
+
+ break;
+ } else {
+ component.push(property);
+ }
+ }
+
+ Ok(component)
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use error::*;
+ use super::Parser;
+
+ #[test]
+ fn test_unfold1() {
+ let mut p = Parser{input: "ab\r\n c", pos: 2};
+ assert_eq!(p.consume_char(), Some('c'));
+ assert_eq!(p.pos, 6);
+ }
+
+ #[test]
+ fn test_unfold2() {
+ let mut p = Parser{input: "ab\n\tc\nx", pos: 2};
+ assert_eq!(p.consume_char(), Some('c'));
+ assert_eq!(p.consume_char(), Some('\n'));
+ assert_eq!(p.consume_char(), Some('x'));
+ }
+
+ #[test]
+ fn test_consume_while() {
+ let mut p = Parser{input:"af\n oo:bar", pos: 1};
+ assert_eq!(p.consume_while(|x| x != ':'), "foo");
+ assert_eq!(p.consume_char(), Some(':'));
+ assert_eq!(p.consume_while(|x| x != '\n'), "bar");
+ }
+
+ #[test]
+ fn test_consume_while2() {
+ let mut p = Parser{input:"af\n oo\n\t:bar", pos: 1};
+ assert_eq!(p.consume_while(|x| x != ':'), "foo");
+ assert_eq!(p.consume_char(), Some(':'));
+ assert_eq!(p.consume_while(|x| x != '\n'), "bar");
+ }
+
+ #[test]
+ fn test_consume_while3() {
+ let mut p = Parser{input:"af\n oo:\n bar", pos: 1};
+ assert_eq!(p.consume_while(|x| x != ':'), "foo");
+ assert_eq!(p.consume_char(), Some(':'));
+ assert_eq!(p.consume_while(|x| x != '\n'), "bar");
+ }
+
+ #[test]
+ fn test_consume_only_char() {
+ let mut p = Parser{input:"\n \"bar", pos: 0};
+ assert!(p.consume_only_char('"'));
+ assert_eq!(p.pos, 3);
+ assert!(!p.consume_only_char('"'));
+ assert_eq!(p.pos, 3);
+ assert!(p.consume_only_char('b'));
+ assert_eq!(p.pos, 4);
+ }
+
+ #[test]
+ fn mismatched_begin_end_tags_returns_error() {
+ // Test for infinite loops as well
+ use std::sync::mpsc::{channel, RecvTimeoutError};
+ use std::time::Duration;
+ let mut p = Parser {input: "BEGIN:a\nBEGIN:b\nEND:a", pos: 0};
+
+ let (tx, rx) = channel();
+ ::std::thread::spawn(move|| { tx.send(p.consume_component()) });
+
+ match rx.recv_timeout(Duration::from_millis(50)) {
+ Err(RecvTimeoutError::Timeout) => assert!(false),
+ Ok(Err(VObjectError(VObjectErrorKind::ParserError{..}, _ ))) => assert!(true),
+ _ => assert!(false),
+ }
+ }
+
+}