summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Beyer <mail@beyermatthias.de>2017-10-27 18:49:45 +0200
committerMarkus Unterwaditzer <markus@unterwaditzer.net>2017-10-27 18:49:45 +0200
commit451c0e98a12110579df19709108c053bfa873060 (patch)
treedbe5cc392e95a2d73a6fefd31dd02ee32fb91190
parentbf457b5b321791d6a2c1bcfb63be2831e32d8c78 (diff)
Refactoring (#15)
* Add dependency: error-chain, refactor to use error chain infrastructure * Split code into modules * Move tests to appropriate modules
-rw-r--r--Cargo.toml4
-rw-r--r--src/component.rs173
-rw-r--r--src/error.rs16
-rw-r--r--src/lib.rs628
-rw-r--r--src/parser.rs374
-rw-r--r--src/property.rs64
6 files changed, 643 insertions, 616 deletions
diff --git a/Cargo.toml b/Cargo.toml
index dabcf08..c24fea9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,3 +11,7 @@ keywords = ["vobject", "icalendar", "calendar", "contacts"]
version = "0.3.0"
authors = ["Markus Unterwaditzer <markus@unterwaditzer.net>"]
license = "MIT"
+
+[dependencies]
+error-chain = "0.10"
+
diff --git a/src/component.rs b/src/component.rs
new file mode 100644
index 0000000..4c46067
--- /dev/null
+++ b/src/component.rs
@@ -0,0 +1,173 @@
+use std::str::FromStr;
+use std::collections::HashMap;
+
+use property::Property;
+use parser::Parser;
+use error::*;
+
+#[derive(Clone, Debug)]
+pub struct Component {
+ /// The name of the component, such as `VCARD` or `VEVENT`.
+ pub name: String,
+
+ /// The component's properties.
+ pub props: HashMap<String, Vec<Property>>,
+
+ /// The component's child- or sub-components.
+ pub subcomponents: Vec<Component>
+}
+
+impl Component {
+ pub fn new<N: Into<String>>(name: N) -> Component {
+ Component {
+ name: name.into(),
+ props: HashMap::new(),
+ subcomponents: vec![]
+ }
+ }
+
+ /// Append the given property, preserve other same-named properties.
+ pub fn push(&mut self, prop: Property) {
+ self.props.entry(prop.name.clone()).or_insert_with(Vec::new).push(prop);
+ }
+
+ /// Set the given property, remove other same-named properties.
+ pub fn set(&mut self, prop: Property) {
+ self.props.insert(prop.name.clone(), vec![prop]);
+ }
+
+ /// Retrieve one property by key. Returns `None` if not exactly one property was found.
+ pub fn get_only<P: AsRef<str>>(&self, name: P) -> Option<&Property> {
+ match self.props.get(name.as_ref()) {
+ Some(x) if x.len() == 1 => Some(&x[0]),
+ _ => None
+ }
+ }
+
+ /// Retrieve properties by key. Returns an empty slice if key doesn't exist.
+ pub fn get_all<P: AsRef<str>>(&self, name: P) -> &[Property] {
+ static EMPTY: &'static [Property] = &[];
+ match self.props.get(name.as_ref()) {
+ Some(values) => &values[..],
+ None => EMPTY
+ }
+ }
+
+ /// Remove a single property.
+ pub fn pop<P: AsRef<str>>(&mut self, name: P) -> Option<Property> {
+ match self.props.get_mut(name.as_ref()) {
+ Some(values) => values.pop(),
+ None => None
+ }
+ }
+
+ /// Remove all properties
+ pub fn remove<P: AsRef<str>>(&mut self, name: P) -> Option<Vec<Property>> {
+ self.props.remove(name.as_ref())
+ }
+}
+
+impl FromStr for Component {
+ type Err = VObjectError;
+
+ /// Same as `vobject::parse_component`
+ fn from_str(s: &str) -> Result<Component> {
+ parse_component(s)
+ }
+}
+
+/// Parse exactly one component. Trailing data generates errors.
+pub fn parse_component(s: &str) -> Result<Component> {
+ let mut parser = Parser::new(s);
+ let rv = try!(parser.consume_component());
+ if !parser.eof() {
+ let s = format!("Trailing data: `{}`", &parser.input[parser.pos..]);
+ let kind = VObjectErrorKind::ParserError(s);
+ Err(VObjectError::from_kind(kind))
+ } else {
+ Ok(rv)
+ }
+}
+
+/// Write a component to a String.
+pub fn write_component(c: &Component) -> String {
+ fn inner(buf: &mut String, c: &Component) {
+ buf.push_str("BEGIN:");
+ buf.push_str(&c.name);
+ buf.push_str("\r\n");
+
+ for (prop_name, props) in &c.props {
+ for prop in props.iter() {
+ if let Some(ref x) = prop.prop_group {
+ buf.push_str(&x);
+ buf.push('.');
+ };
+ buf.push_str(&prop_name);
+ for (param_key, param_value) in &prop.params {
+ buf.push(';');
+ buf.push_str(&param_key);
+ buf.push('=');
+ buf.push_str(&param_value);
+ }
+ buf.push(':');
+ buf.push_str(&fold_line(&prop.raw_value));
+ buf.push_str("\r\n");
+ }
+ }
+
+ for subcomponent in &c.subcomponents {
+ inner(buf, subcomponent);
+ }
+
+ buf.push_str("END:");
+ buf.push_str(&c.name);
+ buf.push_str("\r\n");
+ }
+
+ let mut buf = String::new();
+ inner(&mut buf, c);
+ buf
+}
+
+/// Fold contentline to 75 bytes or less. This function assumes the input
+/// to be unfolded, which means no '\n' or '\r' in it.
+pub fn fold_line(line: &str) -> String {
+ let limit = 75;
+ let len = line.len();
+ let mut bytes_remaining = len;
+ let mut ret = String::with_capacity(len + (len / limit * 3));
+
+ let mut pos = 0;
+ let mut next_pos = limit;
+ while bytes_remaining > limit {
+ while line.is_char_boundary(next_pos) == false {
+ next_pos -= 1;
+ }
+ ret.push_str(&line[pos..next_pos]);
+ ret.push_str("\r\n ");
+
+ bytes_remaining -= next_pos - pos;
+ pos = next_pos;
+ next_pos += limit;
+ }
+
+ ret.push_str(&line[len - bytes_remaining..]);
+ ret
+}
+
+
+#[cfg(test)]
+mod tests {
+ use component::fold_line;
+
+ #[test]
+ fn test_fold() {
+ let line = "This should be multiple lines and fold on char boundaries. 毎害止\
+ 加食下組多地将写館来局必第。東証細再記得玲祉込吉宣会法授";
+ let expected = "This should be multiple lines and fold on char boundaries. 毎害止\
+ 加食\r\n 下組多地将写館来局必第。東証細再記得玲祉込吉宣会法\r\n 授";
+ assert_eq!(expected, fold_line(line));
+ assert_eq!("ab", fold_line("ab"));
+ }
+
+}
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..d772e5e
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,16 @@
+
+error_chain! {
+
+ types {
+ VObjectError, VObjectErrorKind, ResultExt, Result;
+ }
+
+ errors {
+ ParserError(desc: String) {
+ description("Parser error")
+ display("{}", desc)
+ }
+ }
+
+
+}
diff --git a/src/lib.rs b/src/lib.rs
index e485f86..2626c53 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,621 +1,17 @@
// DOCS
-use std::collections::BTreeMap;
-use std::borrow::ToOwned;
-use std::str::FromStr;
-use std::fmt;
-use std::error::Error;
+#[macro_use]
+extern crate error_chain;
+pub mod component;
+pub mod error;
+mod parser;
+pub mod property;
-#[derive(Clone, Debug)]
-pub struct Property {
- /// Key in component.
- pub name: String,
+pub use component::Component;
+pub use component::parse_component;
+pub use component::write_component;
+pub use property::Property;
+pub use property::escape_chars;
+pub use property::unescape_chars;
- /// Parameters.
- pub params: BTreeMap<String, String>,
-
- /// Value as unparsed string.
- pub raw_value: String,
-
- /// Property group. E.g. a contentline like `foo.FN:Markus` would result in the group being
- /// `"foo"`.
- pub prop_group: Option<String>
-}
-
-impl Property {
- /// Create property from unescaped string.
- pub fn new<N, V>(name: N, value: V) -> Property
- where N: Into<String>,
- V: AsRef<str>
- {
- Property {
- name: name.into(),
- params: BTreeMap::new(),
- raw_value: escape_chars(value.as_ref()),
- prop_group: None
- }
- }
-
- /// Get value as unescaped string.
- pub fn value_as_string(&self) -> String {
- unescape_chars(&self.raw_value)
- }
-}
-
-#[derive(Clone, Debug)]
-pub struct Component {
- /// The name of the component, such as `VCARD` or `VEVENT`.
- pub name: String,
-
- /// The component's properties.
- pub props: BTreeMap<String, Vec<Property>>,
-
- /// The component's child- or sub-components.
- pub subcomponents: Vec<Component>
-}
-
-impl Component {
- pub fn new<N: Into<String>>(name: N) -> Component {
- Component {
- name: name.into(),
- props: BTreeMap::new(),
- subcomponents: vec![]
- }
- }
-
- /// Append the given property, preserve other same-named properties.
- pub fn push(&mut self, prop: Property) {
- self.props.entry(prop.name.clone()).or_insert_with(Vec::new).push(prop);
- }
-
- /// Set the given property, remove other same-named properties.
- pub fn set(&mut self, prop: Property) {
- self.props.insert(prop.name.clone(), vec![prop]);
- }
-
- /// Retrieve one property by key. Returns `None` if not exactly one property was found.
- pub fn get_only<P: AsRef<str>>(&self, name: P) -> Option<&Property> {
- match self.props.get(name.as_ref()) {
- Some(x) if x.len() == 1 => Some(&x[0]),
- _ => None
- }
- }
-
- /// Retrieve properties by key. Returns an empty slice if key doesn't exist.
- pub fn get_all<P: AsRef<str>>(&self, name: P) -> &[Property] {
- static EMPTY: &'static [Property] = &[];
- match self.props.get(name.as_ref()) {
- Some(values) => &values[..],
- None => EMPTY
- }
- }
-
- /// Remove a single property.
- pub fn pop<P: AsRef<str>>(&mut self, name: P) -> Option<Property> {
- match self.props.get_mut(name.as_ref()) {
- Some(values) => values.pop(),
- None => None
- }
- }
-
- /// Remove all properties
- pub fn remove<P: AsRef<str>>(&mut self, name: P) -> Option<Vec<Property>> {
- self.props.remove(name.as_ref())
- }
-}
-
-impl FromStr for Component {
- type Err = ParseError;
-
- /// Same as `vobject::parse_component`
- fn from_str(s: &str) -> ParseResult<Component> {
- parse_component(s)
- }
-}
-
-
-
-struct Parser<'s> {
- pub input: &'s str,
- pub pos: usize,
-}
-
-impl<'s> Parser<'s> {
- pub fn new(input: &'s str) -> Self {
- Parser {
- input: input,
- pos: 0,
- }
- }
-
- /// look-ahead for next char at given offset from current position
- /// (self.pos), taking [line unfolding]
- /// (https://tools.ietf.org/html/rfc5545#section-3.1) into account,
- /// without actually
- /// consuming it (immutable self).
- ///
- /// Return an option for next char, and needed increment to consume it
- /// from current position.
- /// CR characters get always skipped, resulting in CRLF to be simplified as
- /// LF, which seems to be acceptable because
- /// - the remainders of the lib do accept a lone LF as a line termination
- /// (a bit laxer than RFC 5545)
- /// - CR alone [is not acceptable content]
- /// (https://tools.ietf.org/html/rfc5545#section-3.1)
- fn peek_at(&self, at: usize) -> Option<(char, usize)> {
- match self.input[self.pos+at..].chars().next() {
- None => None,
- Some('\r') => self.peek_at(at + 1),
- Some('\n') => {
- match self.peek_at(at + 1) {
- Some((' ', offset)) |
- Some(('\t', offset)) => self.peek_at(offset),
- _ => Some(('\n', at + 1)),
- }
- }
- Some(x) => Some((x, at + x.len_utf8()))
- }
- }
-
- #[inline]
- fn peek(&self) -> Option<(char, usize)> {
- self.peek_at(0)
- }
-
- pub fn eof(&self) -> bool {
- self.pos >= self.input.len()
- }
-
- fn assert_char(&self, c: char) -> ParseResult<()> {
- let real_c = match self.peek() {
- Some((x, _)) => x,
- None => return Err(ParseError::new(format!("Expected {}, found EOL", c))),
- };
-
- if real_c != c {
- return Err(ParseError::new(format!("Expected {}, found {}", c, real_c)))
- };
-
- Ok(())
- }
-
- fn consume_char(&mut self) -> Option<char> {
- match self.peek() {
- Some((c, offset)) => { self.pos += offset; Some(c) },
- None => None
- }
- }
-
- /// If next peeked char is the given `c`, consume it and return `true`,
- /// otherwise return `false`.
- fn consume_only_char(&mut self, c: char) -> bool {
- match self.peek() {
- Some((d, offset)) if d == c => { self.pos += offset; true },
- _ => false
- }
- }
-
- fn consume_eol(&mut self) -> ParseResult<()> {
- let start_pos = self.pos;
-
- let consumed = match self.consume_char() {
- Some('\n') => true,
- Some('\r') => match self.consume_char() {
- Some('\n') => true,
- _ => false,
- },
- _ => false,
- };
-
- if consumed {
- Ok(())
- } else {
- self.pos = start_pos;
- Err(ParseError::new("Expected EOL."))
- }
- }
-
- fn sloppy_terminate_line(&mut self) -> ParseResult<()> {
- if !self.eof() {
- try!(self.consume_eol());
- while let Ok(_) = self.consume_eol() {}
- };
-
- Ok(())
- }
-
- // GR this used to return just a slice from input, but line unfolding
- // makes it contradictory, unless one'd want to rescan everything.
- // Since actually useful calls used to_owned() on the result, which
- // does copy into a String's buffer, let's create a String right away
- // implementation detail : instead of pushing char after char, we
- // do it by the biggest contiguous slices possible, because I believe it
- // to be more efficient (less checks for reallocation etc).
- fn consume_while<F: Fn(char) -> bool>(&mut self, test: F) -> String {
- let mut sl_start_pos = self.pos;
- let mut res = String::new();
- while !self.eof() {
- match self.peek() {
- Some((c, offset)) => {
- if !test(c) {
- break
- } else {
- if offset > c.len_utf8() {
- // we have some skipping and therefore need to flush
- res.push_str(&self.input[sl_start_pos..self.pos]);
- res.push(c);
- sl_start_pos = self.pos + offset;
- }
- self.pos += offset;
- }
- },
- _ => break
- }
- }
- // Final flush
- if sl_start_pos < self.pos {
- res.push_str(&self.input[sl_start_pos..self.pos])
- }
- res
- }
-
- pub fn consume_property(&mut self) -> ParseResult<Property> {
- let group = self.consume_property_group().ok();
- let name = try!(self.consume_property_name());
- let params = self.consume_params();
-
- try!(self.assert_char(':'));
- self.consume_char();
-
- let value = try!(self.consume_property_value());
-
- Ok(Property {
- name: name,
- params: params,
- raw_value: value,
- prop_group: group,
- })
- }
-
- fn consume_property_name(&mut self) -> ParseResult<String> {
- let rv = self.consume_while(|x| x == '-' || x.is_alphanumeric());
- if rv.is_empty() {
- Err(ParseError::new("No property name found."))
- } else {
- Ok(rv)
- }
- }
-
- fn consume_property_group(&mut self) -> ParseResult<String> {
- let start_pos = self.pos;
- let name = self.consume_property_name();
-
- let e = match name {
- Ok(name) => match self.assert_char('.') {
- Ok(_) => {
- self.consume_char();
- return Ok(name);
- },
- Err(e) => Err(e),
- },
- Err(e) => Err(e),
- };
-
- self.pos = start_pos;
- e
- }
-
- fn consume_property_value(&mut self) -> ParseResult<String> {
- let rv = self.consume_while(|x| x != '\r' && x != '\n');
- try!(self.sloppy_terminate_line());
- Ok(rv)
- }
-
- fn consume_param_name(&mut self) -> ParseResult<String> {
- match self.consume_property_name() {
- Ok(x) => Ok(x),
- Err(e) => Err(ParseError::new(format!("No param name found: {}", e))),
- }
- }
-
- fn consume_param_value(&mut self) -> ParseResult<String> {
- let qsafe = |x| {
- x != '"' &&
- x != '\r' &&
- x != '\n' &&
- x != '\u{7F}' &&
- x > '\u{1F}'
- };
-
- if self.consume_only_char('"') {
- let rv = self.consume_while(qsafe);
- try!(self.assert_char('"'));
- self.consume_char();
- Ok(rv)
- } else {
- Ok(self.consume_while(|x| qsafe(x) && x != ';' && x != ':'))
- }
- }
-
- fn consume_param(&mut self) -> ParseResult<(String, String)> {
- let name = try!(self.consume_param_name());
- let start_pos = self.pos;
- let value = if self.consume_only_char('=') {
- match self.consume_param_value() {
- Ok(x) => x,
- Err(e) => { self.pos = start_pos; return Err(e); }
- }
- } else {
- String::new()
- };
-
- Ok((name, value))
- }
-
- fn consume_params(&mut self) -> BTreeMap<String, String> {
- let mut rv: BTreeMap<String, String> = BTreeMap::new();
- while self.consume_only_char(';') {
- match self.consume_param() {
- Ok((name, value)) => { rv.insert(name.to_owned(), value.to_owned()); },
- Err(_) => break,
- }
- }
- rv
- }
-
- fn consume_component(&mut self) -> ParseResult<Component> {
- let start_pos = self.pos;
- let mut property = try!(self.consume_property());
- if property.name != "BEGIN" {
- self.pos = start_pos;
- return Err(ParseError::new("Expected BEGIN tag."));
- };
-
- // Create a component with the name of the BEGIN tag's value
- let mut component = Component::new(property.raw_value);
-
- loop {
- let previous_pos = self.pos;
- property = try!(self.consume_property());
- if property.name == "BEGIN" {
- self.pos = previous_pos;
- component.subcomponents.push(try!(self.consume_component()));
- } else if property.name == "END" {
- if property.raw_value != component.name {
- self.pos = start_pos;
- return Err(ParseError::new(format!(
- "Mismatched tags: BEGIN:{} vs END:{}",
- component.name, property.raw_value
- )))
- }
-
- break;
- } else {
- component.push(property);
- }
- }
-
- Ok(component)
- }
-}
-
-/// Parse exactly one component. Trailing data generates errors.
-pub fn parse_component(s: &str) -> ParseResult<Component> {
- let mut parser = Parser::new(s);
- let rv = try!(parser.consume_component());
- if !parser.eof() {
- Err(ParseError::new(format!("Trailing data: `{}`", &parser.input[parser.pos..])))
- } else {
- Ok(rv)
- }
-}
-
-/// Write a component to a String.
-pub fn write_component(c: &Component) -> String {
- fn inner(buf: &mut String, c: &Component) {
- buf.push_str("BEGIN:");
- buf.push_str(&c.name);
- buf.push_str("\r\n");
-
- for (prop_name, props) in &c.props {
- for prop in props.iter() {
- if let Some(ref x) = prop.prop_group {
- buf.push_str(&x);
- buf.push('.');
- };
- buf.push_str(&prop_name);
- for (param_key, param_value) in &prop.params {
- buf.push(';');
- buf.push_str(&param_key);
- buf.push('=');
- buf.push_str(&param_value);
- }
- buf.push(':');
- buf.push_str(&fold_line(&prop.raw_value));
- buf.push_str("\r\n");
- }
- }
-
- for subcomponent in &c.subcomponents {
- inner(buf, subcomponent);
- }
-
- buf.push_str("END:");
- buf.push_str(&c.name);
- buf.push_str("\r\n");
- }
-
- let mut buf = String::new();
- inner(&mut buf, c);
- buf
-}
-
-/// Escape text for a VObject property value.
-pub fn escape_chars(s: &str) -> String {
- // Order matters! Lifted from icalendar.parser
- // https://github.com/collective/icalendar/
- s
- .replace("\\N", "\n")
- .replace("\\", "\\\\")
- .replace(";", "\\;")
- .replace(",", "\\,")
- .replace("\r\n", "\\n")
- .replace("\n", "\\n")
-}
-
-/// Unescape text from a VObject property value.
-pub fn unescape_chars(s: &str) -> String {
- // Order matters! Lifted from icalendar.parser
- // https://github.com/collective/icalendar/
- s
- .replace("\\N", "\\n")
- .replace("\r\n", "\n")
- .replace("\\n", "\n")
- .replace("\\,", ",")
- .replace("\\;", ";")
- .replace("\\\\", "\\")
-}
-
-/// Fold contentline to 75 bytes or less. This function assumes the input
-/// to be unfolded, which means no '\n' or '\r' in it.
-pub fn fold_line(line: &str) -> String {
- let limit = 75;
- let len = line.len();
- let mut bytes_remaining = len;
- let mut ret = String::with_capacity(len + (len / limit * 3));
-
- let mut pos = 0;
- let mut next_pos = limit;
- while bytes_remaining > limit {
- while line.is_char_boundary(next_pos) == false {
- next_pos -= 1;
- }
- ret.push_str(&line[pos..next_pos]);
- ret.push_str("\r\n ");
-
- bytes_remaining -= next_pos - pos;
- pos = next_pos;
- next_pos += limit;
- }
-
- ret.push_str(&line[len - bytes_remaining..]);
- ret
-}
-
-#[derive(PartialEq, Eq, Debug)]
-pub struct ParseError {
- desc: String
-}
-
-pub type ParseResult<T> = Result<T, ParseError>;
-
-impl Error for ParseError {
- fn description(&self) -> &str {
- &self.desc[..]
- }
-
- fn cause(&self) -> Option<&Error> {
- None
- }
-}
-
-impl fmt::Display for ParseError {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- self.description().fmt(f)
- }
-}
-
-impl ParseError {
- pub fn new<T: Into<String>>(desc: T) -> Self {
- ParseError {
- desc: desc.into(),
- }
- }
-
- pub fn into_string(self) -> String {
- self.desc
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::{Parser, ParseError, fold_line};
-
- #[test]
- fn test_unfold1() {
- let mut p = Parser{input: "ab\r\n c", pos: 2};
- assert_eq!(p.consume_char(), Some('c'));
- assert_eq!(p.pos, 6);
- }
-
- #[test]
- fn test_unfold2() {
- let mut p = Parser{input: "ab\n\tc\nx", pos: 2};
- assert_eq!(p.consume_char(), Some('c'));
- assert_eq!(p.consume_char(), Some('\n'));
- assert_eq!(p.consume_char(), Some('x'));
- }
-
- #[test]
- fn test_fold() {
- let line = "This should be multiple lines and fold on char boundaries. 毎害止\
- 加食下組多地将写館来局必第。東証細再記得玲祉込吉宣会法授";
- let expected = "This should be multiple lines and fold on char boundaries. 毎害止\
- 加食\r\n 下組多地将写館来局必第。東証細再記得玲祉込吉宣会法\r\n 授";
- assert_eq!(expected, fold_line(line));
- assert_eq!("ab", fold_line("ab"));
- }
-
- #[test]
- fn test_consume_while() {
- let mut p = Parser{input:"af\n oo:bar", pos: 1};
- assert_eq!(p.consume_while(|x| x != ':'), "foo");
- assert_eq!(p.consume_char(), Some(':'));
- assert_eq!(p.consume_while(|x| x != '\n'), "bar");
- }
-
- #[test]
- fn test_consume_while2() {
- let mut p = Parser{input:"af\n oo\n\t:bar", pos: 1};
- assert_eq!(p.consume_while(|x| x != ':'), "foo");
- assert_eq!(p.consume_char(), Some(':'));
- assert_eq!(p.consume_while(|x| x != '\n'), "bar");
- }
-
- #[test]
- fn test_consume_while3() {
- let mut p = Parser{input:"af\n oo:\n bar", pos: 1};
- assert_eq!(p.consume_while(|x| x != ':'), "foo");
- assert_eq!(p.consume_char(), Some(':'));
- assert_eq!(p.consume_while(|x| x != '\n'), "bar");
- }
-
- #[test]
- fn test_consume_only_char() {
- let mut p = Parser{input:"\n \"bar", pos: 0};
- assert!(p.consume_only_char('"'));
- assert_eq!(p.pos, 3);
- assert!(!p.consume_only_char('"'));
- assert_eq!(p.pos, 3);
- assert!(p.consume_only_char('b'));
- assert_eq!(p.pos, 4);
- }
-
- #[test]
- fn mismatched_begin_end_tags_returns_error() {
- // Test for infinite loops as well
- use std::sync::mpsc::{channel, RecvTimeoutError};
- use std::time::Duration;
- let mut p = Parser {input: "BEGIN:a\nBEGIN:b\nEND:a", pos: 0};
-
- let (tx, rx) = channel();
- ::std::thread::spawn(move|| { tx.send(p.consume_component()) });
-
- match rx.recv_timeout(Duration::from_millis(50)) {
- Err(RecvTimeoutError::Timeout) => assert!(false),
- Ok(Err(ParseError {desc: _} )) => assert!(true),
- _ => assert!(false),
- }
- }
-}
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..0276ec3
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,374 @@
+use std::collections::HashMap;
+
+use component::Component;
+use property::Property;
+use error::*;
+
+pub struct Parser<'s> {
+ pub input: &'s str,
+ pub pos: usize,
+}
+
+impl<'s> Parser<'s> {
+ pub fn new(input: &'s str) -> Self {
+ Parser {
+ input: input,
+ pos: 0,
+ }
+ }
+
+ /// look-ahead for next char at given offset from current position
+ /// (self.pos), taking [line unfolding]
+ /// (https://tools.ietf.org/html/rfc5545#section-3.1) into account,
+ /// without actually
+ /// consuming it (immutable self).
+ ///
+ /// Return an option for next char, and needed increment to consume it
+ /// from current position.
+ /// CR characters get always skipped, resulting in CRLF to be simplified as
+ /// LF, which seems to be acceptable because
+ /// - the remainders of the lib do accept a lone LF as a line termination
+ /// (a bit laxer than RFC 5545)
+ /// - CR alone [is not acceptable content]
+ /// (https://tools.ietf.org/html/rfc5545#section-3.1)
+ fn peek_at(&self, at: usize) -> Option<(char, usize)> {
+ match self.input[self.pos+at..].chars().next() {
+ None => None,
+ Some('\r') => self.peek_at(at + 1),
+ Some('\n') => {
+ match self.peek_at(at + 1) {
+ Some((' ', offset)) |
+ Some(('\t', offset)) => self.peek_at(offset),
+ _ => Some(('\n', at + 1)),
+ }
+ }
+ Some(x) => Some((x, at + x.len_utf8()))
+ }
+ }
+
+ #[inline]
+ fn peek(&self) -> Option<(char, usize)> {
+ self.peek_at(0)
+ }
+
+ pub fn eof(&self) -> bool {
+ self.pos >= self.input.len()
+ }
+
+ fn assert_char(&self, c: char) -> Result<()> {
+ let real_c = match self.peek() {
+ Some((x, _)) => x,
+ None => {
+ let kind = VObjectErrorKind::ParserError(format!("Expected {}, found EOL", c));
+ return Err(VObjectError::from_kind(kind))
+ }
+ };
+
+ if real_c != c {
+ let kind = VObjectErrorKind::ParserError(format!("Expected {}, found {}", c, real_c));
+ return Err(VObjectError::from_kind(kind))
+ };
+
+ Ok(())
+ }
+
+ pub fn consume_char(&mut self) -> Option<char> {
+ match self.peek() {
+ Some((c, offset)) => { self.pos += offset; Some(c) },
+ None => None
+ }
+ }
+
+ /// If next peeked char is the given `c`, consume it and return `true`,
+ /// otherwise return `false`.
+ pub fn consume_only_char(&mut self, c: char) -> bool {
+ match self.peek() {
+ Some((d, offset)) if d == c => { self.pos += offset; true },
+ _ => false
+ }
+ }
+
+ fn consume_eol(&mut self) -> Result<()> {
+ let start_pos = self.pos;
+
+ let consumed = match self.consume_char() {
+ Some('\n') => true,
+ Some('\r') => match self.consume_char() {
+ Some('\n') => true,
+ _ => false,
+ },
+ _ => false,
+ };
+
+ if consumed {
+ Ok(())
+ } else {
+ self.pos = start_pos;
+ let kind = VObjectErrorKind::ParserError("Expected EOL.".to_owned());
+ Err(VObjectError::from_kind(kind))
+ }
+ }
+
+ fn sloppy_terminate_line(&mut self) -> Result<()> {
+ if !self.eof() {
+ try!(self.consume_eol());
+ while let Ok(_) = self.consume_eol() {}
+ };
+
+ Ok(())
+ }
+
+ // GR this used to return just a slice from input, but line unfolding
+ // makes it contradictory, unless one'd want to rescan everything.
+ // Since actually useful calls used to_owned() on the result, which
+ // does copy into a String's buffer, let's create a String right away
+ // implementation detail : instead of pushing char after char, we
+ // do it by the biggest contiguous slices possible, because I believe it
+ // to be more efficient (less checks for reallocation etc).
+ pub fn consume_while<F: Fn(char) -> bool>(&mut self, test: F) -> String {
+ let mut sl_start_pos = self.pos;
+ let mut res = String::new();
+ while !self.eof() {
+ match self.peek() {
+ Some((c, offset)) => {
+ if !test(c) {
+ break
+ } else {
+ if offset > c.len_utf8() {
+ // we have some skipping and therefore need to flush
+ res.push_str(&self.input[sl_start_pos..self.pos]);
+ res.push(c);
+ sl_start_pos = self.pos + offset;
+ }
+ self.pos += offset;
+ }
+ },
+ _ => break
+ }
+ }
+ // Final flush
+ if sl_start_pos < self.pos {
+ res.push_str(&self.input[sl_start_pos..self.pos])
+ }
+ res
+ }
+
+ pub fn consume_property(&mut self) -> Result<Property> {
+ let group = self.consume_property_group().ok();
+ let name = try!(self.consume_property_name());
+ let params = self.consume_params();
+
+ try!(self.assert_char(':'));
+ self.consume_char();
+
+ let value = try!(self.consume_property_value());
+
+ Ok(Property {
+ name: name,
+ params: params,
+ raw_value: value,
+ prop_group: group,
+ })
+ }
+
+ fn consume_property_name(&mut self) -> Result<String> {
+ let rv = self.consume_while(|x| x == '-' || x.is_alphanumeric());
+ if rv.is_empty() {
+ let kind = VObjectErrorKind::ParserError("No property name found.".to_owned());
+ Err(VObjectError::from_kind(kind))
+ } else {
+ Ok(rv)
+ }
+ }
+
+ fn consume_property_group(&mut self) -> Result<String> {
+ let start_pos = self.pos;
+ let name = self.consume_property_name();
+
+ let e = match name {
+ Ok(name) => match self.assert_char('.') {
<