diff options
author | Neal H. Walfield <neal@pep.foundation> | 2019-11-21 16:34:28 +0100 |
---|---|---|
committer | Neal H. Walfield <neal@pep.foundation> | 2019-11-21 16:50:44 +0100 |
commit | 28be944bd9f6548b2eee5fc222ffeb52c9d2f8aa (patch) | |
tree | 247030a61766a0c664a8466c74ad7e258f435e3e | |
parent | b251f9e8857fba284f515061ac62013519997e30 (diff) |
openpgp: When parsing User IDs, recognize URIs.
- Restore the functionality removed in 8693a005 when replacing the
RFC 2822 mailbox parser.
-rw-r--r-- | openpgp-ffi/include/sequoia/openpgp.h | 13 | ||||
-rw-r--r-- | openpgp-ffi/src/packet/userid.rs | 40 | ||||
-rw-r--r-- | openpgp/src/packet/userid/mod.rs | 455 |
3 files changed, 431 insertions, 77 deletions
diff --git a/openpgp-ffi/include/sequoia/openpgp.h b/openpgp-ffi/include/sequoia/openpgp.h index 6dadcbc0..fccc79d4 100644 --- a/openpgp-ffi/include/sequoia/openpgp.h +++ b/openpgp-ffi/include/sequoia/openpgp.h @@ -1266,6 +1266,19 @@ pgp_status_t pgp_user_id_email_normalized(pgp_error_t *errp, pgp_packet_t uid, char **emailp); /*/ +/// Returns the User ID's URI, if any. +/// +/// The User ID is parsed according to de factor convention, and the +/// URI is extracted. +/// +/// If the User ID cannot be parsed, then an error is returned. +/// +/// If the User ID does not contain a URI, *urip is set to NULL. +/*/ +pgp_status_t pgp_user_id_uri(pgp_error_t *errp, pgp_packet_t uid, + char **uri); + +/*/ /// Returns the value of the User Attribute Packet. /// /// The returned pointer is valid until `ua` is deallocated. If diff --git a/openpgp-ffi/src/packet/userid.rs b/openpgp-ffi/src/packet/userid.rs index 168227c4..9935c2ca 100644 --- a/openpgp-ffi/src/packet/userid.rs +++ b/openpgp-ffi/src/packet/userid.rs @@ -249,6 +249,46 @@ fn pgp_user_id_email( Status::Success } +/// Returns the User ID's URI, if any. +/// +/// The User ID is parsed according to de factor convention, and the +/// URI is extracted. +/// +/// If the User ID cannot be parsed, then an error is returned. +/// +/// If the User ID does not contain a URI, *urip is set to NULL. +#[::sequoia_ffi_macros::extern_fn] #[no_mangle] +pub extern "C" +fn pgp_user_id_uri( + errp: Option<&mut *mut crate::error::Error>, uid: *const Packet, + urip: &mut *mut c_char) + -> Status +{ + ffi_make_fry_from_errp!(errp); + let uid = uid.ref_raw(); + + if let &openpgp::Packet::UserID(ref uid) = uid { + match uid.uri() { + Ok(Some(uri)) => + *urip = ffi_return_string!(uri), + Ok(None) => + *urip = ::std::ptr::null_mut(), + Err(err) => { + use crate::MoveIntoRaw; + let status = crate::error::Status::from(&err); + if let Some(errp) = errp { + *errp = err.move_into_raw(); + } + return status; + } + } + } else { + panic!("Not a UserID packet"); + } + + Status::Success +} + /// Returns a normalized version of the UserID's email address. /// /// Normalized email addresses are primarily needed when email diff --git a/openpgp/src/packet/userid/mod.rs b/openpgp/src/packet/userid/mod.rs index b4ed1432..ac4cb9f1 100644 --- a/openpgp/src/packet/userid/mod.rs +++ b/openpgp/src/packet/userid/mod.rs @@ -25,6 +25,12 @@ use crate::Error; /// - <name@example.org> /// - name@example.org /// +/// - Name (Comment) <scheme://hostname/path> +/// - Name (Comment) <mailto:user@example.org> +/// - Name <scheme://hostname/path> +/// - <scheme://hostname/path> +/// - scheme://hostname/path +/// /// Names consist of UTF-8 non-control characters and may include /// punctuation. For instance, the following names are valid: /// @@ -52,6 +58,36 @@ use crate::Error; /// /// - (foo (bar)) /// +/// URIs +/// ---- +/// +/// The URI parser recognizes URIs using a regular expression similar +/// to the one recommended in [RFC 3986] with the following extensions +/// and restrictions: +/// +/// - UTF-8 characters are in the range \u{80}-\u{10ffff} are +/// allowed wherever percent-encoded characters are allowed (i.e., +/// everywhere but the schema). +/// +/// - The scheme component and its trailing ":" are required. +/// +/// - The URI must have an authority component ("//domain") or a +/// path component ("/path/to/resource"). +/// +/// - Although the RFC does not allow it, in practice, the '[' and +/// ']' characters are allowed wherever percent-encoded characters +/// are allowed (i.e., everywhere but the schema). +/// +/// URIs are neither normalized nor interpreted. For instance, dot +/// segments are not removed, escape sequences are not decoded, etc. +/// +/// Note: the recommended regular expression is less strict than the +/// grammar. For instance, a percent encoded character must consist +/// of three characters: the percent character followed by two hex +/// digits. The parser that we use does not enforce this either. +/// +/// [RFC 3986]: https://tools.ietf.org/html/rfc3986 +/// /// Formal Grammar /// -------------- /// @@ -102,8 +138,12 @@ use crate::Error; /// /// addr-spec = dot-atom-text "@" dot-atom-text /// +/// uri = See [RFC 3986] and the note on URIs above. +/// /// pgp-uid-convention = addr-spec / +/// uri / /// *WS [name] *WS [comment] *WS "<" addr-spec ">" / +/// *WS [name] *WS [comment] *WS "<" uri ">" / /// *WS name *WS [comment] *WS #[derive(Clone, Debug)] pub struct ConventionallyParsedUserID { @@ -112,9 +152,7 @@ pub struct ConventionallyParsedUserID { name: Option<(usize, usize)>, comment: Option<(usize, usize)>, email: Option<(usize, usize)>, - - // XXX: Add support for URIs. - // uri: Option<(usize, usize)>, + uri: Option<(usize, usize)>, } impl ConventionallyParsedUserID { @@ -140,6 +178,14 @@ impl ConventionallyParsedUserID { self.email.map(|(s, e)| &self.userid[s..e]) } + /// Returns the User ID's URI component, if any. + /// + /// Note: the URI is returned as is; dot segments are not removed, + /// escape sequences are not unescaped, etc. + pub fn uri(&self) -> Option<&str> { + self.uri.map(|(s, e)| &self.userid[s..e]) + } + fn parse(userid: String) -> Result<Self> { lazy_static!{ static ref USER_ID_PARSER: Regex = { @@ -191,22 +237,96 @@ impl ConventionallyParsedUserID { let addr_spec = format!("(?:{}@{})", dot_atom_text, dot_atom_text); + let uri = |prefix| { + // The regex suggested from the RFC: + // + // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + // ^schema ^authority ^path ^query ^fragment + // + // Since only the path component is required, and + // the path matches everything but the '?' and '#' + // characters, this regular expression will match + // almost any string. + // + // This regular expression is good for picking + // apart strings that are known to be URIs. But, + // we want to detect URIs and distinguish them + // from things that are almost certainly not URIs, + // like email addresses. + // + // As such, we require the URI to have a + // well-formed schema, and the schema must be + // followed by a non-empty component. Further, we + // restrict the alphabet to approximately what the + // grammar permits. + + // Looking at the productions for the schema, + // authority, path, query, and fragment + // components, we can distil the following useful + // alphabets (the symbols are drawn from the + // following pct-encoded, unreserved, gen-delims, + // sub-delims, pchar, and IP-literal productions): + let symbols = "-{}0-9._~%!$&'()*+,;=:@\\[\\]"; + let ascii_alpha = "a-zA-Z"; + let utf8_alpha = "a-zA-Z\u{80}-\u{10ffff}"; + + // We strictly match the schema production: + // + // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + let schema + = format!("(?:[{}][-+.{}0-9]*:)", + ascii_alpha, ascii_alpha); + + // The symbols that can occur in a fragment are a + // superset of those that can occur in a query and + // its delimiters. Likewise, the symbols that can + // occur in a query are a superset of those that + // can occur in a path and its delimiters. The + // symbols that can occur in a path are *almost* a + // subset of those that can occur in an authority: + // '[' and ']' can occur in an authority component + // (via the IP-literal production, e.g., + // '[2001:db8::7]'), but not in a path. But, URI + // parsers appear to accept '[' and ']' as part of + // a path. So, we accept them too. + // + // Given this, a fragment matches all components + // and everything that precedes it. Since we + // don't need to distinguish the individual parts + // here, matching what follows the schema in a URI + // is straightforward: + let rest = format!("(?:[{}{}/\\?#]+)", + symbols, utf8_alpha); + + format!("(?P<{}_uri>{}{})", + prefix, schema, rest) + }; - let addr_spec_raw + let raw_addr_spec = format!("(?P<raw_addr_spec>{})", addr_spec); + let raw_uri = format!("(?:{})", uri("raw")); + // whitespace is ignored. It is allowed (but not // required) at the start and between components, but // it is not allowed after the closing '>'. space is // not allowed. - let addr_spec_wrapped - = format!("{}(?P<wrapped_name>{})?{}\ + let wrapped_addr_spec + = format!("{}(?P<wrapped_addr_spec_name>{})?{}\ (:?{})?{}\ <(?P<wrapped_addr_spec>{})>", optional_ws, name, optional_ws, - comment("wrapped"), optional_ws, + comment("wrapped_addr_spec"), optional_ws, addr_spec); + let wrapped_uri + = format!("{}(?P<wrapped_uri_name>{})?{}\ + (?:{})?{}\ + <(?:{})>", + optional_ws, name, optional_ws, + comment("wrapped_uri"), optional_ws, + uri("wrapped")); + let bare_name = format!("{}(?P<bare_name>{}){}\ (?:{})?{}", @@ -217,8 +337,10 @@ impl ConventionallyParsedUserID { // prefer addr-spec-raw to bare-name when the match is // ambiguous. let pgp_uid_convention - = format!("^(?:{}|{}|{})$", - addr_spec_raw, addr_spec_wrapped, bare_name); + = format!("^(?:{}|{}|{}|{}|{})$", + raw_addr_spec, raw_uri, + wrapped_addr_spec, wrapped_uri, + bare_name); Regex::new(&pgp_uid_convention).unwrap() }; @@ -229,45 +351,71 @@ impl ConventionallyParsedUserID { if let Some(cap) = USER_ID_PARSER.captures_iter(&userid).nth(0) { let to_range = |m: regex::Match| (m.start(), m.end()); - match (cap.name("raw_addr_spec"), cap.name("bare_name")) { - // addr-spec-raw - (Some(email), None) => { - let email = Some(to_range(email)); - let comment = cap.name("bare_comment").map(to_range); - - Ok(ConventionallyParsedUserID { - userid: userid, - name: None, - comment: comment, - email: email, - }) - } - // addr-spec-wrapped - (None, None) => { - let name = cap.name("wrapped_name").map(to_range); - let comment = cap.name("wrapped_comment").map(to_range); - let email = cap.name("wrapped_addr_spec").map(to_range); - - Ok(ConventionallyParsedUserID { - userid: userid, - name: name, - comment: comment, - email: email, - }) - } - // bare name - (None, Some(name)) => { - let name = Some(to_range(name)); - let comment = cap.name("bare_comment").map(to_range); - - Ok(ConventionallyParsedUserID { - userid: userid, - name: name, - comment: comment, - email: None, - }) - } - _ => panic!("Unexpected result"), + // We need to figure out which branch matched. Match on a + // required capture for each branch. + + if let Some(email) = cap.name("raw_addr_spec") { + // raw-addr-spec + let email = Some(to_range(email)); + + Ok(ConventionallyParsedUserID { + userid: userid, + name: None, + comment: None, + email: email, + uri: None, + }) + } else if let Some(uri) = cap.name("raw_uri") { + // raw-uri + let uri = Some(to_range(uri)); + + Ok(ConventionallyParsedUserID { + userid: userid, + name: None, + comment: None, + email: None, + uri: uri, + }) + } else if let Some(email) = cap.name("wrapped_addr_spec") { + // wrapped-addr-spec + let name = cap.name("wrapped_addr_spec_name").map(to_range); + let comment = cap.name("wrapped_addr_spec_comment").map(to_range); + let email = Some(to_range(email)); + + Ok(ConventionallyParsedUserID { + userid: userid, + name: name, + comment: comment, + email: email, + uri: None, + }) + } else if let Some(uri) = cap.name("wrapped_uri") { + // uri-wrapped + let name = cap.name("wrapped_uri_name").map(to_range); + let comment = cap.name("wrapped_uri_comment").map(to_range); + let uri = Some(to_range(uri)); + + Ok(ConventionallyParsedUserID { + userid: userid, + name: name, + comment: comment, + email: None, + uri: uri, + }) + } else if let Some(name) = cap.name("bare_name") { + // name-bare + let name = to_range(name); + let comment = cap.name("bare_comment").map(to_range); + + Ok(ConventionallyParsedUserID { + userid: userid, + name: Some(name), + comment: comment, + email: None, + uri: None, + }) + } else { + panic!("Unexpected result"); } } else { return Err(Error::InvalidArgument( @@ -609,6 +757,16 @@ impl UserID { } } + /// Parses the User ID according to de facto conventions, and + /// returns the URI, if any. + pub fn uri(&self) -> Result<Option<String>> { + self.do_parse()?; + match *self.parsed.lock().unwrap().borrow() { + Some(ref puid) => Ok(puid.uri().map(|s| s.to_string())), + None => unreachable!(), + } + } + /// Returns a normalized version of the UserID's email address. /// /// Normalized email addresses are primarily needed when email @@ -690,14 +848,16 @@ mod tests { tracer!(true, "decompose", 0); fn c(userid: &str, - name: Option<&str>, comment: Option<&str>, email: Option<&str>) + name: Option<&str>, comment: Option<&str>, + email: Option<&str>, uri: Option<&str>) -> bool { match ConventionallyParsedUserID::new(userid) { Ok(puid) => { let good = puid.name() == name && puid.comment() == comment - && puid.email() == email; + && puid.email() == email + && puid.uri() == uri; if ! good { t!("userid: {}", userid); @@ -714,6 +874,10 @@ mod tests { puid.email(), if puid.email() == email { "=" } else { "!" }, email); + t!(" {:?} {}= {:?}", + puid.uri(), + if puid.uri() == uri { "=" } else { "!" }, + uri); t!(" -> BAD PARSE"); } @@ -730,85 +894,213 @@ mod tests { // Conventional User IDs: g &= c("First Last (Comment) <name@example.org>", - Some("First Last"), Some("Comment"), Some("name@example.org")); + Some("First Last"), Some("Comment"), Some("name@example.org"), None); g &= c("First Last <name@example.org>", - Some("First Last"), None, Some("name@example.org")); - g &= c("First Last", Some("First Last"), None, None); + Some("First Last"), None, Some("name@example.org"), None); + g &= c("First Last", Some("First Last"), None, None, None); g &= c("name@example.org <name@example.org>", - Some("name@example.org"), None, Some("name@example.org")); + Some("name@example.org"), None, Some("name@example.org"), None); g &= c("<name@example.org>", - None, None, Some("name@example.org")); + None, None, Some("name@example.org"), None); g &= c("name@example.org", - None, None, Some("name@example.org")); + None, None, Some("name@example.org"), None); // Examples from dkg's mail: g &= c("Björn Björnson <bjoern@example.net>", - Some("Björn Björnson"), None, Some("bjoern@example.net")); + Some("Björn Björnson"), None, Some("bjoern@example.net"), None); // We explicitly don't support RFC 2047 so the following is // correctly not escaped. g &= c("Bj=?utf-8?q?=C3=B6?=rn Bj=?utf-8?q?=C3=B6?=rnson \ <bjoern@example.net>", Some("Bj=?utf-8?q?=C3=B6?=rn Bj=?utf-8?q?=C3=B6?=rnson"), - None, Some("bjoern@example.net")); + None, Some("bjoern@example.net"), None); g &= c("Acme Industries, Inc. <info@acme.example>", - Some("Acme Industries, Inc."), None, Some("info@acme.example")); + Some("Acme Industries, Inc."), None, Some("info@acme.example"), None); g &= c("Michael O'Brian <obrian@example.biz>", - Some("Michael O'Brian"), None, Some("obrian@example.biz")); + Some("Michael O'Brian"), None, Some("obrian@example.biz"), None); g &= c("Smith, John <jsmith@example.com>", - Some("Smith, John"), None, Some("jsmith@example.com")); + Some("Smith, John"), None, Some("jsmith@example.com"), None); g &= c("mariag@example.org", - None, None, Some("mariag@example.org")); + None, None, Some("mariag@example.org"), None); g &= c("joe@example.net <joe@example.net>", - Some("joe@example.net"), None, Some("joe@example.net")); + Some("joe@example.net"), None, Some("joe@example.net"), None); g &= c("иван.сергеев@пример.рф", - None, None, Some("иван.сергеев@пример.рф")); + None, None, Some("иван.сергеев@пример.рф"), None); g &= c("Dörte@Sörensen.example.com", - None, None, Some("Dörte@Sörensen.example.com")); + None, None, Some("Dörte@Sörensen.example.com"), None); // Some craziness. g &= c("Vorname Nachname, Dr.", - Some("Vorname Nachname, Dr."), None, None); + Some("Vorname Nachname, Dr."), None, None, None); g &= c("Vorname Nachname, Dr. <dr@example.org>", - Some("Vorname Nachname, Dr."), None, Some("dr@example.org")); + Some("Vorname Nachname, Dr."), None, Some("dr@example.org"), None); // Only the last comment counts as a comment. The rest if // part of the name. g &= c("Foo (Bar) (Baz)", - Some("Foo (Bar)"), Some("Baz"), None); + Some("Foo (Bar)"), Some("Baz"), None, None); // The same with extra whitespace. g &= c("Foo (Bar) (Baz)", - Some("Foo (Bar)"), Some("Baz"), None); + Some("Foo (Bar)"), Some("Baz"), None, None); g &= c("Foo (Bar (Baz)", - Some("Foo (Bar"), Some("Baz"), None); + Some("Foo (Bar"), Some("Baz"), None, None); // Make sure whitespace is stripped. g &= c(" Name Last ( some comment ) <name@example.org>", Some("Name Last"), Some("some comment"), - Some("name@example.org")); + Some("name@example.org"), None); // Make sure an email is a comment is recognized as a comment. g &= c(" Name Last (email@example.org)", - Some("Name Last"), Some("email@example.org"), None); + Some("Name Last"), Some("email@example.org"), None, None); // Quoting in the local part of the email address is not // allowed, but it is recognized as a name. That's fine. g &= c("\"user\"@example.org", - Some("\"user\"@example.org"), None, None); + Some("\"user\"@example.org"), None, None, None); // Even unbalanced quotes. g &= c("\"user@example.org", - Some("\"user@example.org"), None, None); + Some("\"user@example.org"), None, None, None); g &= c("Henry Ford (CEO) <henry@ford.com>", - Some("Henry Ford"), Some("CEO"), Some("henry@ford.com")); + Some("Henry Ford"), Some("CEO"), Some("henry@ford.com"), None); g &= c("Thomas \"Tomakin\" (DHC) <thomas@clh.co.uk>", Some("Thomas \"Tomakin\""), Some("DHC"), - Some("thomas@clh.co.uk")); + Some("thomas@clh.co.uk"), None); g &= c("Aldous L. Huxley <huxley@old-world.org>", Some("Aldous L. Huxley"), None, - Some("huxley@old-world.org")); + Some("huxley@old-world.org"), None); + + + // Some URIs. + + // Examples from https://tools.ietf.org/html/rfc3986#section-1.1.2 + g &= c("<ftp://ftp.is.co.za/rfc/rfc1808.txt>", + None, None, + None, Some("ftp://ftp.is.co.za/rfc/rfc1808.txt")); + + g &= c("<http://www.ietf.org/rfc/rfc2396.txt>", + None, None, + None, Some("http://www.ietf.org/rfc/rfc2396.txt")); + + g &= c("<ldap://[2001:db8::7]/c=GB?objectClass?one>", + None, None, + None, Some("ldap://[2001:db8::7]/c=GB?objectClass?one")); + + g &= c("<mailto:John.Doe@example.com>", + None, None, + None, Some("mailto:John.Doe@example.com")); + + g &= c("<news:comp.infosystems.www.servers.unix>", + None, None, + None, Some("news:comp.infosystems.www.servers.unix")); + + g &= c("<tel:+1-816-555-1212>", + None, None, + None, Some("tel:+1-816-555-1212")); + + g &= c("<telnet://192.0.2.16:80/>", + None, None, + None, Some("telnet://192.0.2.16:80/")); + + g &= c("<urn:oasis:names:specification:docbook:dtd:xml:4.1.2>", + None, None, + None, Some("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")); + + + + g &= c("Foo's ssh server <ssh://hostname>", + Some("Foo's ssh server"), None, + None, Some("ssh://hostname")); + + g &= c("Foo (ssh server) <ssh://hostname>", + Some("Foo"), Some("ssh server"), + None, Some("ssh://hostname")); + + g &= c("<ssh://hostname>", + None, None, + None, Some("ssh://hostname")); + + g &= c("Warez <ftp://127.0.0.1>", + Some("Warez"), None, + None, Some("ftp://127.0.0.1")); + + g &= c("ssh://hostname", + None, None, + None, Some("ssh://hostname")); + + g &= c("ssh:hostname", + None, None, + None, Some("ssh:hostname")); + + g &= c("Frank Füber <ssh://ïntérnätïònál.eu>", + Some("Frank Füber"), None, + None, Some("ssh://ïntérnätïònál.eu")); + + g &= c("ssh://ïntérnätïònál.eu", + None, None, + None, Some("ssh://ïntérnätïònál.eu")); + + g &= c("<foo://domain.org>", + None, None, + None, Some("foo://domain.org")); + + g &= c("<foo-bar://domain.org>", + None, None, + None, Some("foo-bar://domain.org")); + + g &= c("<foo+bar://domain.org>", + None, None, + None, Some("foo+bar://domain.org")); + + g &= c("<foo.bar://domain.org>", + None, None, + None, Some("foo.bar://domain.org")); + + g &= c("<foo.bar://domain.org#anchor?query>", + None, None, + None, Some("foo.bar://domain.org#anchor?query")); + + // Is it an email address or a URI? It should show up as a URI. + g &= c("<foo://user:password@domain.org>", + None, None, + None, Some("foo://user:password@domain.org")); + + // Ports... + g &= c("<foo://domain.org:348>", + None, None, + None, Some("foo://domain.org:348")); + + g &= c("<foo://domain.org:348/>", + None, None, + None, Some("foo://domain.org:348/")); + + // Some test vectors from + // https://github.com/cweb/iri-tests/blob/master/iris.txt + g &= c("<http://[:]>", None, None, None, Some("http://[:]")); + g &= c("<http://2001:db8::1>", None, None, None, Some("http://2001:db8::1")); + g &= c("<http://[www.google.com]/>", None, None, None, Some("http://[www.google.com]/")); + g &= c("<http:////////user:@google.com:99?foo>", None, None, None, Some("http:////////user:@google.com:99?foo")); + g &= c("<http:path>", None, None, None, Some("http:path")); + g &= c("<http:/path>", None, None, None, Some("http:/path")); + g &= c("<http:host>", None, None, None, Some("http:host")); + g &= c("<http://user:pass@foo:21/bar;par?b#c>", None, None, None, + Some("http://user:pass@foo:21/bar;par?b#c")); + g &= c("<http:foo.com>", None, None, None, Some("http:foo.com")); + g &= c("<http://f:/c>", None, None, None, Some("http://f:/c")); + g &= c("<http://f:0/c>", None, None, None, Some("http://f:0/c")); + g &= c("<http://f:00000000000000/c>", None, None, None, Some("http://f:00000000000000/c")); + g &= c("<http://f:
/c>", None, None, None, Some("http://f:
/c")); + g &= c("<http://f:fifty-two/c>", None, None, None, Some("http://f:fifty-two/c")); + g &= c("<foo://>", None, None, None, Some("foo://")); + g &= c("<http://a:b@c:29/d>", None, None, None, Some("http://a:b@c:29/d")); + g &= c("<http::@c:29>", None, None, None, Some("http::@c:29")); + g &= c("<http://&a:foo(b]c@d:2/>", None, None, None, Some("http://&a:foo(b]c@d:2/")); + g &= c("<http://iris.test.ing/résumé/résumé.html>", None, None, None, Some("http://iris.test.ing/résumé/résumé.html")); + g &= c("<http://google.com/foo[bar]>", None, None, None, Some("http://google.com/foo[bar]")); if !g { panic!("Parse error"); @@ -877,6 +1169,15 @@ mod tests { // // assert!(ConventionallyParsedUserID::new( // "@old-world.org").is_err()); + + + // URI schemas must be ASCII. + assert!(ConventionallyParsedUserID::new( + "<über://domain.org>").is_err()); + + // Whitespace is not allowed. + assert!(ConventionallyParsedUserID::new( + "<http://some domain.org>").is_err()); } #[test] |