summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenri Sivonen <hsivonen@hsivonen.fi>2018-11-17 11:56:22 +0200
committerKartikaya Gupta (kats) <staktrace@users.noreply.github.com>2019-05-23 21:40:49 -0400
commita60c8493d3c921ab3e87646866fd3fdd0ee03cb6 (patch)
treee172d425631c5440f6fd37c147730a4828f7728f
parent33eb96aeb20283e3691da393d321a0c5df2a64dc (diff)
downloadmailparse-a60c8493d3c921ab3e87646866fd3fdd0ee03cb6.tar.gz
mailparse-a60c8493d3c921ab3e87646866fd3fdd0ee03cb6.tar.xz
Use the charset crate instead of the encoding crate
Adds support for UTF-7 and drops support for HZ.
-rw-r--r--Cargo.toml2
-rw-r--r--src/lib.rs31
2 files changed, 17 insertions, 16 deletions
diff --git a/Cargo.toml b/Cargo.toml
index d6582c4..f07c114 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,4 +19,4 @@ maintenance = { status = "passively-maintained" }
[dependencies]
base64 = "0.9.0"
quoted_printable = "0.4.0"
-encoding = "0.2.32"
+charset = "0.1.1"
diff --git a/src/lib.rs b/src/lib.rs
index ec3905b..f707a99 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,5 @@
extern crate base64;
-extern crate encoding;
+extern crate charset;
extern crate quoted_printable;
use std::error;
@@ -7,7 +7,9 @@ use std::fmt;
use std::ops::Deref;
use std::collections::BTreeMap;
-use encoding::Encoding;
+use charset::Charset;
+use charset::decode_latin1;
+use charset::decode_ascii;
mod dateparse;
@@ -138,10 +140,7 @@ fn test_find_from_u8() {
impl<'a> MailHeader<'a> {
/// Get the name of the header. Note that header names are case-insensitive.
pub fn get_key(&self) -> Result<String, MailParseError> {
- encoding::all::ISO_8859_1
- .decode(self.key, encoding::DecoderTrap::Strict)
- .map(|s| s.trim().to_string())
- .map_err(|e| e.into())
+ Ok(decode_latin1(self.key).into_owned())
}
fn decode_word(&self, encoded: &str) -> Option<String> {
@@ -171,10 +170,9 @@ impl<'a> MailHeader<'a> {
}
_ => return None,
};
- let charset_conv = encoding::label::encoding_from_whatwg_label(charset)?;
- charset_conv
- .decode(&decoded, encoding::DecoderTrap::Replace)
- .ok()
+ let charset = Charset::for_label_no_replacement(charset.as_bytes())?;
+ let (cow, _) = charset.decode_without_bom_handling(&decoded);
+ Some(cow.into_owned())
}
/// Get the value of the header. Any sequences of newlines characters followed
@@ -192,7 +190,7 @@ impl<'a> MailHeader<'a> {
/// ```
pub fn get_value(&self) -> Result<String, MailParseError> {
let mut result = String::new();
- let chars = encoding::all::ISO_8859_1.decode(self.value, encoding::DecoderTrap::Strict)?;
+ let chars = decode_latin1(self.value);
let mut lines = chars.lines();
let mut add_space = false;
while let Some(line) = lines.next().map(str::trim_left) {
@@ -656,10 +654,13 @@ impl<'a> ParsedMail<'a> {
/// ```
pub fn get_body(&self) -> Result<String, MailParseError> {
let decoded = self.get_body_raw()?;
- let charset_conv = encoding::label::encoding_from_whatwg_label(&self.ctype.charset)
- .unwrap_or(encoding::all::ASCII);
- charset_conv.decode(&decoded, encoding::DecoderTrap::Replace)
- .map_err(|e| e.into())
+ let cow = if let Some(charset) = Charset::for_label(self.ctype.charset.as_bytes()) {
+ let (cow, _, _) = charset.decode(&decoded);
+ cow
+ } else {
+ decode_ascii(&decoded)
+ };
+ Ok(cow.into_owned())
}
/// Get the body of the message as a Rust Vec<u8>. This function tries to