summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorManos Pitsidianakis <el13635@mail.ntua.gr>2019-07-05 18:58:46 +0300
committerManos Pitsidianakis <el13635@mail.ntua.gr>2019-07-06 00:19:25 +0300
commit6906142278c69a250811d2691dbe96d5fa39732e (patch)
treeb378845da52e1e8b04e979aeee9fd74626f536f5
parentff2c030c0fd71b3ef9405df62e2131df0417c465 (diff)
melib: don't exclude whitespaces in mime encoded words
-rw-r--r--melib/src/email/compose/mime.rs70
1 files changed, 60 insertions, 10 deletions
diff --git a/melib/src/email/compose/mime.rs b/melib/src/email/compose/mime.rs
index fa6b8517..7eddb8c0 100644
--- a/melib/src/email/compose/mime.rs
+++ b/melib/src/email/compose/mime.rs
@@ -1,18 +1,68 @@
use super::*;
+use crate::grapheme_clusters::Graphemes;
pub fn encode_header(value: &str) -> String {
- eprintln!("encoding \"{}\"", value);
let mut ret = String::with_capacity(value.len());
- for word in value.split_whitespace() {
- if word.is_ascii() {
- ret.push_str(word);
- } else {
- ret.push_str(
- format!("=?UTF-8?B?{}?=", BASE64_MIME.encode(word.trim().as_bytes())).trim(),
- );
+ let graphemes = value.graphemes_indices();
+ let mut is_current_window_ascii = true;
+ let mut current_window_start = 0;
+ for (idx, g) in graphemes {
+ match (g.is_ascii(), is_current_window_ascii) {
+ (true, true) => {
+ ret.push_str(g);
+ }
+ (false, true) => {
+ current_window_start = idx;
+ is_current_window_ascii = false;
+ }
+ (true, false) => {
+ /* If !g.is_whitespace()
+ *
+ * Whitespaces inside encoded tokens must be greedily taken,
+ * instead of splitting each non-ascii word into separate encoded tokens. */
+ if !g.split_whitespace().collect::<Vec<&str>>().is_empty() {
+ ret.push_str(&format!(
+ "=?UTF-8?B?{}?=",
+ BASE64_MIME
+ .encode(value[current_window_start..idx].as_bytes())
+ .trim()
+ ));
+ if idx != value.len() - 1 {
+ ret.push(' ');
+ }
+ is_current_window_ascii = true;
+ }
+ }
+ /* RFC2047 recommends:
+ * 'While there is no limit to the length of a multiple-line header field, each line of
+ * a header field that contains one or more 'encoded-word's is limited to 76
+ * characters.'
+ * This is a rough compliance.
+ */
+ (false, false) if (((4 * (idx - current_window_start) / 3) + 3) & !3) > 33 => {
+ ret.push_str(&format!(
+ "=?UTF-8?B?{}?=",
+ BASE64_MIME
+ .encode(value[current_window_start..idx].as_bytes())
+ .trim()
+ ));
+ if idx != value.len() - 1 {
+ ret.push(' ');
+ }
+ current_window_start = idx;
+ }
+ (false, false) => {}
}
- ret.push(' ');
}
- ret.pop();
+ /* If the last part of the header value is encoded, it won't be pushed inside the previous for
+ * block */
+ if !is_current_window_ascii {
+ ret.push_str(&format!(
+ "=?UTF-8?B?{}?=",
+ BASE64_MIME
+ .encode(value[current_window_start..].as_bytes())
+ .trim()
+ ));
+ }
ret
}