summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarkus Unterwaditzer <markus@unterwaditzer.net>2015-05-31 23:03:15 +0200
committerMarkus Unterwaditzer <markus@unterwaditzer.net>2015-06-02 15:21:09 +0200
commit083620129bd316b5211e61b658a7d2b8a696b975 (patch)
treea9d07874e2f8502098f18a93d6103ab015b38433
parent31df4da79bf53862045bc39de135fac80f6681b5 (diff)
Rewrite rust-vobject without peg
-rw-r--r--Cargo.toml3
-rw-r--r--src/vobject/lib.rs417
2 files changed, 249 insertions, 171 deletions
diff --git a/Cargo.toml b/Cargo.toml
index ad0f2fb..cdb0ec3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,6 +15,3 @@ license = "MIT"
[lib]
name = "vobject"
path = "src/vobject/lib.rs"
-
-[dependencies]
-peg = "*"
diff --git a/src/vobject/lib.rs b/src/vobject/lib.rs
index b77daf3..b2d4cb9 100644
--- a/src/vobject/lib.rs
+++ b/src/vobject/lib.rs
@@ -1,8 +1,5 @@
// DOCS
-#![feature(plugin,collections,str_char)]
-#![plugin(peg_syntax_ext)]
-
use std::collections::HashMap;
use std::collections::hash_map::Entry::{Occupied, Vacant};
use std::borrow::ToOwned;
@@ -66,9 +63,9 @@ pub struct Component {
}
impl Component {
- pub fn new(name: &str) -> Component {
+ pub fn new<T: Into<String>>(name: T) -> Component {
Component {
- name: name.to_string(),
+ name: name.into(),
props: HashMap::new(),
subcomponents: vec![]
}
@@ -89,8 +86,8 @@ impl Component {
/// Retrieve a mutable vector of properties for this key. Creates one (and inserts it into the
/// component) if none exists.
- pub fn all_props_mut(&mut self, key: &str) -> &mut Vec<Property> {
- match self.props.entry(String::from_str(key)) {
+ pub fn all_props_mut<T: Into<String>>(&mut self, key: T) -> &mut Vec<Property> {
+ match self.props.entry(key.into()) {
Occupied(values) => values.into_mut(),
Vacant(values) => values.insert(vec![])
}
@@ -118,14 +115,248 @@ impl FromStr for Component {
}
}
-/// Parse a component. The error value is a human-readable message.
+
+
+struct Parser<'s> {
+ pub input: &'s str,
+ pub pos: usize,
+}
+
+impl<'s> Parser<'s> {
+ pub fn new<'a>(input: &'a str) -> Parser<'a> {
+ Parser {
+ input: input,
+ pos: 0,
+ }
+ }
+
+ fn peek(&self) -> Option<char> {
+ self.input[self.pos..].chars().next()
+ }
+
+ pub fn eof(&self) -> bool {
+ self.pos >= self.input.len()
+ }
+
+ fn assert_char(&self, c: char) -> ParseResult<()> {
+ let real_c = match self.peek() {
+ Some(x) => x,
+ None => return Err(ParseError::new(format!("Expected {}, found EOL", c))),
+ };
+
+ if real_c != c {
+ return Err(ParseError::new(format!("Expected {}, found {}", c, real_c)))
+ };
+
+ Ok(())
+ }
+
+ fn consume_char(&mut self) -> Option<char> {
+ match self.peek() {
+ Some(x) => { self.pos += x.len_utf8(); Some(x) },
+ None => None
+ }
+ }
+
+ fn consume_eol(&mut self) -> ParseResult<()> {
+
+ let start_pos = self.pos;
+
+ let consumed = match self.consume_char() {
+ Some('\n') => true,
+ Some('\r') => match self.consume_char() {
+ Some('\n') => true,
+ _ => false,
+ },
+ _ => false,
+ };
+
+ if consumed {
+ Ok(())
+ } else {
+ self.pos = start_pos;
+ Err(ParseError::new("Expected EOL."))
+ }
+ }
+
+ fn sloppy_terminate_line(&mut self) -> ParseResult<()> {
+ if !self.eof() {
+ try!(self.consume_eol());
+ while let Ok(_) = self.consume_eol() {};
+ };
+
+ Ok(())
+ }
+
+ fn consume_while<'a, F: Fn(char) -> bool>(&'a mut self, test: F) -> &'a str {
+ let start_pos = self.pos;
+ while !self.eof() && test(self.peek().unwrap()) {
+ self.consume_char();
+ }
+ &self.input[start_pos..self.pos]
+ }
+
+ pub fn consume_property(&mut self) -> ParseResult<Property> {
+ let group = self.consume_property_group().ok();
+ let name = try!(self.consume_property_name());
+ let params = self.consume_params();
+
+ try!(self.assert_char(':'));
+ self.consume_char();
+
+ let value = try!(self.consume_property_value());
+
+ Ok(Property {
+ name: name,
+ params: params,
+ raw_value: value,
+ prop_group: group,
+ })
+ }
+
+ fn consume_property_name<'a>(&'a mut self) -> ParseResult<String> {
+ let rv = self.consume_while(|x| x == '-' || x.is_alphanumeric());
+ if rv.len() == 0 {
+ Err(ParseError::new("No property name found."))
+ } else {
+ Ok(rv.to_owned())
+ }
+ }
+
+ fn consume_property_group<'a>(&'a mut self) -> ParseResult<String> {
+ let start_pos = self.pos;
+ let name = self.consume_property_name();
+
+ let e = match name {
+ Ok(name) => match self.assert_char('.') {
+ Ok(_) => {
+ self.consume_char();
+ return Ok(name);
+ },
+ Err(e) => Err(e),
+ },
+ Err(e) => Err(e),
+ };
+
+ self.pos = start_pos;
+ e
+ }
+
+ fn consume_property_value<'a>(&'a mut self) -> ParseResult<String> {
+ let mut rv = String::new();
+ loop {
+ rv.push_str(self.consume_while(|x| x != '\r' && x != '\n'));
+ try!(self.sloppy_terminate_line());
+
+ match self.peek() {
+ Some(' ') | Some('\t') => self.consume_char(),
+ _ => break,
+ };
+ }
+ Ok(rv)
+ }
+
+ fn consume_param_name<'a>(&'a mut self) -> ParseResult<String> {
+ match self.consume_property_name() {
+ Ok(x) => Ok(x),
+ Err(e) => Err(ParseError::new(format!("No param name found: {}", e))),
+ }
+ }
+
+ fn consume_param_value<'a>(&'a mut self) -> ParseResult<String> {
+ let qsafe = |x| {
+ x != '"' &&
+ x != '\r' &&
+ x != '\n' &&
+ x != '\u{7F}' &&
+ x > '\u{1F}'
+ };
+
+ if self.peek() == Some('"') {
+ self.consume_char();
+ let rv = self.consume_while(qsafe).to_owned();
+ try!(self.assert_char('"'));
+ self.consume_char();
+ Ok(rv)
+ } else {
+ Ok(self.consume_while(|x| qsafe(x) && x != ';' && x != ':').to_owned())
+ }
+ }
+
+ fn consume_param<'a>(&'a mut self) -> ParseResult<(String, String)> {
+ let name = try!(self.consume_param_name());
+ let value = if self.peek() == Some('=') {
+ let start_pos = self.pos;
+ self.consume_char();
+ match self.consume_param_value() {
+ Ok(x) => x,
+ Err(e) => { self.pos = start_pos; return Err(e); }
+ }
+ } else {
+ String::new()
+ };
+
+ Ok((name, value))
+ }
+
+ fn consume_params(&mut self) -> HashMap<String, String> {
+ let mut rv: HashMap<String, String> = HashMap::new();
+ while self.peek() == Some(';') {
+ self.consume_char();
+ match self.consume_param() {
+ Ok((name, value)) => { rv.insert(name.to_owned(), value.to_owned()); },
+ Err(_) => break,
+ }
+ };
+ rv
+ }
+
+ fn consume_component(&mut self) -> ParseResult<Component> {
+ let begin_pos = self.pos;
+ let mut property = try!(self.consume_property());
+ if property.name != "BEGIN" {
+ self.pos = begin_pos;
+ return Err(ParseError::new("Expected BEGIN tag."));
+ };
+
+ let c_name = property.raw_value;
+ let mut component = Component::new(&c_name[..]);
+
+ loop {
+ let previous_pos = self.pos;
+ property = try!(self.consume_property());
+ if property.name == "BEGIN" {
+ self.pos = previous_pos;
+ while let Ok(subcomponent) = self.consume_component() {
+ component.subcomponents.push(subcomponent);
+ };
+ } else if property.name == "END" {
+ if property.raw_value != c_name {
+ self.pos = begin_pos;
+ return Err(ParseError::new(format!(
+ "Mismatched tags: BEGIN:{} vs END:{}",
+ c_name, property.raw_value
+ )));
+ };
+
+ break;
+ } else {
+ component.all_props_mut(property.name.to_owned()).push(property);
+ }
+ };
+
+ Ok(component)
+ }
+}
+
+/// Parse exactly one component. Trailing data generates errors.
pub fn parse_component(s: &str) -> ParseResult<Component> {
- // XXX: The unfolding should be worked into the PEG
- // See feature request: https://github.com/kevinmehall/rust-peg/issues/26
- let unfolded = unfold_lines(s);
- match parser::component(&unfolded[..]) {
- Ok(x) => Ok(x),
- Err(e) => Err(ParseError::from_peg_error(e))
+ let mut parser = Parser::new(s);
+ let rv = try!(parser.consume_component());
+ if !parser.eof() {
+ Err(ParseError::new(format!("Trailing data: `{}`", &parser.input[parser.pos..])))
+ } else {
+ Ok(rv)
}
}
@@ -195,14 +426,6 @@ pub fn unescape_chars(s: &str) -> String {
.replace("\\\\", "\\")
}
-/// Unfold contentline.
-pub fn unfold_lines(s: &str) -> String {
- s
- .replace("\r\n ", "").replace("\r\n\t", "")
- .replace("\n ", "").replace("\n\t", "")
- .replace("\r ", "").replace("\r\t", "")
-}
-
/// Fold contentline to 75 chars. This function assumes the input to be unfolded, which means no
/// '\n' or '\r' in it.
pub fn fold_line(s: &str) -> String {
@@ -218,8 +441,7 @@ pub fn fold_line(s: &str) -> String {
#[derive(PartialEq, Eq, Debug)]
pub struct ParseError {
- desc: String,
- orig: Option<parser::ParseError>
+ desc: String
}
pub type ParseResult<T> = Result<T, ParseError>;
@@ -230,10 +452,7 @@ impl Error for ParseError {
}
fn cause(&self) -> Option<&Error> {
- match self.parser_error() {
- Some(x) => Some(&*x),
- None => None
- }
+ None
}
}
@@ -244,151 +463,13 @@ impl fmt::Display for ParseError {
}
impl ParseError {
- pub fn new(desc: String, cause: Option<parser::ParseError>) -> Self {
+ pub fn new<T: Into<String>>(desc: T) -> Self {
ParseError {
- desc: desc,
- orig: cause
- }
- }
-
- pub fn from_peg_error(e: parser::ParseError) -> Self {
- ParseError {
- desc: format!("{}", e).to_owned(),
- orig: Some(e)
+ desc: desc.into(),
}
}
pub fn into_string(self) -> String {
self.desc
}
-
- /// Access the underlying parser error.
- pub fn parser_error(&self) -> Option<&parser::ParseError> {
- self.orig.as_ref()
- }
-
-
- /// The line where the error occured.
- ///
- /// The value might be warped because content lines are unfolded before the parser keeps track
- /// of line numbers.
- pub fn line(&self) -> Option<&usize> {
- match self.parser_error() {
- Some(e) => Some(&e.line),
- None => None
- }
- }
-
- /// The column where the error occured.
- ///
- /// The value might be warped because content lines are unfolded before the parser keeps track
- /// of line numbers.
- pub fn column(&self) -> Option<&usize> {
- match self.parser_error() {
- Some(e) => Some(&e.column),
- None => None
- }
- }
}
-
-
-peg! parser(r#"
-use super::{Component,Property};
-use std::collections::HashMap;
-
-components -> Vec<Component>
- = cs:component ** eols __ { cs }
-
- #[pub]
- component -> Component
- = name:component_begin
- ps:props
- cs:components
- component_end {
- let mut rv = Component::new(name);
- rv.subcomponents = cs;
-
- for (k, v) in ps.into_iter() {
- rv.all_props_mut(k).push(v);
- };
-
- rv
- }
-
- component_begin -> &'input str
- = "BEGIN:" v:value __ { v }
-
- component_end -> &'input str
- = "END:" v:value __ { v }
-
-props -> Vec<(&'input str, Property)>
- = ps:prop ++ eols __ { ps }
-
- prop -> (&'input str, Property)
- = !"BEGIN:" !"END:" g:group? k:name p:params ":" v:value {
- (k, Property { name: k.to_string(), params: p, raw_value: v.to_string(), prop_group: g })
- }
-
- group -> String
- = g:group_name "." { g.to_string() }
-
- group_name -> &'input str
- = group_char+ { match_str }
-
- name -> &'input str
- = iana_token+ { match_str }
-
- params -> HashMap<String, String>
- = ps:(";" p:param {p})* {
- let mut rv: HashMap<String, String> = HashMap::with_capacity(ps.len());
- rv.extend(ps.into_iter().map(|(k, v)| (k.to_string(), v.to_string())));
- rv
- }
-
- param -> (&'input str, &'input str)
- // FIXME: Doesn't handle comma-separated values
- = k:param_name v:("=" v:param_value { v })? {
- (k, match v {
- Some(x) => x,
- None => ""
- })
- }
-
- param_name -> &'input str
- = iana_token+ { match_str }
-
- param_value -> &'input str
- = x:(quoted_string / param_text) { x }
-
- param_text -> &'input str
- = safe_char* { match_str }
-
- value -> &'input str
- = value_char+ { match_str }
-
-
-quoted_string -> &'input str
- = dquote x:quoted_content dquote { x }
-
-quoted_content -> &'input str
- = qsafe_char* { match_str }
-
-iana_token = ([a-zA-Z0-9] / "-")+
-group_char = ([a-zA-Z0-9] / "-")
-qsafe_char = !dquote !ctl value_char
-safe_char = !";" !":" qsafe_char
-
-value_char = !eol .
-
-eol = "\r\n" / "\n" / "\r"
-dquote = "\""
-eols = eol+
-
-// Taken from vCard. vCalendar's is a subset. Together with the definition of "qsafe_char" this
-// might reject a bunch of valid iCalendars, but I can't imagine one.
-ctl = [\u{00}-\u{1F}] / "\u{7F}"
-
-whitespace = " " / "\t"
-__ = (eol / whitespace)*
-
-"#);