From a054789ddb60ed1fab26e6d4e6bd36ed926273f1 Mon Sep 17 00:00:00 2001 From: Alexandros Frantzis Date: Tue, 1 Oct 2019 23:06:38 +0300 Subject: Initial public release --- .github/workflows/build.yml | 19 ++ .gitignore | 2 + Cargo.toml | 24 +++ LICENSE | 373 ++++++++++++++++++++++++++++++++++ README.md | 35 ++++ examples/normalize.rs | 18 ++ examples/personal-mda.rs | 55 +++++ src/decode.rs | 224 +++++++++++++++++++++ src/deliver.rs | 176 ++++++++++++++++ src/lib.rs | 373 ++++++++++++++++++++++++++++++++++ src/normalize.rs | 477 ++++++++++++++++++++++++++++++++++++++++++++ src/processing.rs | 95 +++++++++ src/regex.rs | 114 +++++++++++ tests/test_charset.rs | 104 ++++++++++ tests/test_deliver.rs | 86 ++++++++ tests/test_encoded_words.rs | 78 ++++++++ tests/test_encoding.rs | 136 +++++++++++++ tests/test_fields.rs | 135 +++++++++++++ tests/test_processing.rs | 39 ++++ tests/test_regex.rs | 185 +++++++++++++++++ 20 files changed, 2748 insertions(+) create mode 100644 .github/workflows/build.yml create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 examples/normalize.rs create mode 100644 examples/personal-mda.rs create mode 100644 src/decode.rs create mode 100644 src/deliver.rs create mode 100644 src/lib.rs create mode 100644 src/normalize.rs create mode 100644 src/processing.rs create mode 100644 src/regex.rs create mode 100644 tests/test_charset.rs create mode 100644 tests/test_deliver.rs create mode 100644 tests/test_encoded_words.rs create mode 100644 tests/test_encoding.rs create mode 100644 tests/test_fields.rs create mode 100644 tests/test_processing.rs create mode 100644 tests/test_regex.rs diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..4b0e23f --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,19 @@ +name: build + +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: Build + run: cargo build --verbose + - name: Run tests + run: cargo test --verbose diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..53eaa21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +**/*.rs.bk diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..33b0b15 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "mda" +version = "0.1.0" +authors = ["Alexandros Frantzis "] +edition = "2018" +description = "A library for creating custom Mail Delivery Agents" +license = "MPL-2.0" +repository = "https://github.com/afrantzis/mda-rs" +documentation = "https://docs.rs/mda" +homepage = "https://github.com/afrantzis/mda-rs" +readme = "README.md" +categories = ["email"] +exclude = ["/.github/**"] + +[dependencies] +regex = "1" +libc = "0.2" +gethostname = "0.2" +memchr = "2.2" +charset = "0.1" +lazy_static = "1.4" + +[dev-dependencies] +tempfile = "3" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..14e2f77 --- /dev/null +++ b/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f757ae6 --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +mda-rs +====== + +mda-rs is a Rust library for writing custom Mail Deliver Agents. + +![](https://github.com/afrantzis/mda-rs/workflows/build/badge.svg) + +### Documentation + +The detailed module documentation, including code examples for all features, +can be found at [https://docs.rs/mda](https://docs.rs/mda). + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +mda = "0.1" +``` + +If you are using Rust 2015 add the following to your crate root file (Rust 2018 +doesn't require this): + +```rust +extern crate mda; +``` + +See [examples/personal-mda.rs](examples/personal-mda.rs) for an example that +uses mda-rs. + +### License + +This project is licensed under the Mozilla Public License Version 2.0 +([LICENSE](LICENSE) or https://www.mozilla.org/en-US/MPL/2.0/). diff --git a/examples/normalize.rs b/examples/normalize.rs new file mode 100644 index 0000000..359250a --- /dev/null +++ b/examples/normalize.rs @@ -0,0 +1,18 @@ +// Copyright 2019 Alexandros Frantzis +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +// SPDX-License-Identifier: MPL-2.0 + +//! Writes out the normalized form of an email. + +use std::io::{self, Write}; +use mda::{Email, Result}; + +fn main() -> Result<()> { + let email = Email::from_stdin()?; + io::stdout().lock().write_all(email.data())?; + Ok(()) +} diff --git a/examples/personal-mda.rs b/examples/personal-mda.rs new file mode 100644 index 0000000..aa26431 --- /dev/null +++ b/examples/personal-mda.rs @@ -0,0 +1,55 @@ +// Copyright 2019 Alexandros Frantzis +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +// SPDX-License-Identifier: MPL-2.0 + +//! An example of a custom MDA. + +use std::path::PathBuf; + +use mda::{Email, EmailRegex, Result, DeliveryDurability}; + +fn main() -> Result<()> { + // Just some random path to make it highly unlikely that this example will + // indvertently mess up something. + let root = PathBuf::from("/tmp/my-personal-mail-96f29eb6375cfa37"); + + // If we are sure bogofilter is available, the below can be better written as: + // let mut email = Email::from_stdin_filtered(&["/usr/bin/bogofilter", "-ep"])?; + let mut email = Email::from_stdin()?; + if let Ok(new_email) = email.filter(&["/usr/bin/bogofilter", "-ep"]) { + email = new_email; + } + + // Quicker (but possibly less durable) delivery. + email.set_delivery_durability(DeliveryDurability::FileSyncOnly); + + let from = email.header_field("From").unwrap_or(""); + let bogosity = email.header_field("X-Bogosity").unwrap_or(""); + + if bogosity.contains("Spam, tests=bogofilter") || + from.contains("@banneddomain.com") { + email.deliver_to_maildir(root.join("spam"))?; + return Ok(()); + } + + let cc = email.header_field("Cc").unwrap_or(""); + let to = email.header_field("To").unwrap_or(""); + + if to.contains("myworkemail@example.com") || + cc.contains("myworkemail@example.com") { + if email.body().search("URGENCY RATING: (CRITICAL|URGENT)")? { + email.deliver_to_maildir(root.join("inbox/myemail/urgent"))?; + } else { + email.deliver_to_maildir(root.join("inbox/myemail/normal"))?; + } + return Ok(()); + } + + email.deliver_to_maildir(root.join("inbox/unsorted"))?; + + Ok(()) +} diff --git a/src/decode.rs b/src/decode.rs new file mode 100644 index 0000000..8004f18 --- /dev/null +++ b/src/decode.rs @@ -0,0 +1,224 @@ +// Copyright 2019 Alexandros Frantzis +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +// SPDX-License-Identifier: MPL-2.0 + +//! Base64 and quoted-printable decoding. + +use crate::Result; + +const PAD: u8 = 64; // The pseudo-index of the PAD character. +const INV: u8 = 99; // An invalid index. + +static BASE64_INDICES: &'static [u8] = &[ + // 0 1 2 3 4 5 6 7 8 9 A B C D E F +/* 0 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +/* 1 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +/* 2 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, 62, INV, INV, INV, 63, +/* 3 */ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, INV, INV, INV, PAD, INV, INV, +/* 4 */ INV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, +/* 5 */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, INV, INV, INV, INV, INV, +/* 6 */ INV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, +/* 7 */ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, INV, INV, INV, INV, INV, +/* 8 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +/* 9 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +/* A */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +/* B */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +/* C */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +/* D */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +/* E */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +/* F */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, +]; + +/// Decodes base64 encoded data, appending the decoded data to a Vec. +/// +/// During decoding all line breaks and invalid characters are ignored. +/// If an error is encountered during decoding, the already decoded data in the +/// output buffer is left intact. It's up to the caller to deal with the partial +/// decoded data in case of failure. +pub fn base64_decode_into_buf(input: &[u8], output: &mut Vec) -> Result<()> { + let mut num_chars = 0; + let mut cur_triplet = 0; + let mut valid_chars = 0; + + for c in input { + let ci = BASE64_INDICES[*c as usize]; + match ci { + // rfc2045: All line breaks or other characters not + // found in Table 1 must be ignored by decoding software. + INV => continue, + _ if ci < PAD => valid_chars += 1, + _ => {} + } + + cur_triplet = cur_triplet << 6 | ((ci & 0x3f) as u32); + num_chars += 1; + + if num_chars == 4 { + match valid_chars { + 2 => output.push((cur_triplet >> 16) as u8), + 3 => output.extend( + &[(cur_triplet >> 16) as u8, (cur_triplet >> 8) as u8] + ), + 4 => output.extend( + &[(cur_triplet >> 16) as u8, + (cur_triplet >> 8) as u8, + cur_triplet as u8 + ] + ), + _ => return Err("Invalid base64 encoding".into()), + } + + cur_triplet = 0; + num_chars = 0; + valid_chars = 0; + } + } + + // rfc2045: A full encoding quantum is always completed at the end of a body. + if num_chars != 0 { + return Err("Unpadded input".into()); + } + + Ok(()) +} + +/// Converts an ascii byte representing a hex digit to it's numerical value. +fn hexdigit_to_num(mut a: u8) -> Option { + if a.is_ascii_digit() { + return Some(a - b'0'); + } + + a.make_ascii_lowercase(); + + if a >= b'a' && a <= b'f' { + return Some(a - b'a' + 10); + } + + None +} + +/// Decodes quoted-printable encoded data, appending the decoding data to a +/// Vec. +/// +/// During decoding all line breaks and invalid characters are ignored. +/// If an error is encountered during decoding, the already decoded data in the +/// output buffer is left intact. It's up to the caller to deal with the partial +/// decoded data in case of failure. +pub fn qp_decode_into_buf(input: &[u8], output: &mut Vec) -> Result<()> { + let mut iter = input.iter().peekable(); + + 'outer: loop { + loop { + match iter.next() { + Some(b'=') => break, + Some(c) => output.push(*c), + None => break 'outer, + } + } + + // At this point we have encountered a '=', so check + // to see what follows. + if let Some(&first) = iter.next() { + // A CRLF/LF after '=' marks a line continuation, and + // is effectively dropped. + if first == b'\r' { + if iter.peek() == Some(&&b'\n') { + iter.next(); + continue; + } + } else if first == b'\n' { + continue; + } else if let Some(first_num) = hexdigit_to_num(first) { + // A valid pair of hexdigits represent the raw byte value. + if let Some(&&second) = iter.peek() { + if let Some(second_num) = hexdigit_to_num(second) { + output.push(first_num * 16 + second_num); + iter.next(); + continue; + } + } + } + + // Emit the raw sequence if it's not one of the special + // special cases checked above. + output.extend(&[b'=', first]); + } else { + // Last character in the input was an '=', just emit it. + output.push(b'='); + } + } + + + Ok(()) +} + +#[cfg(test)] +mod test_base64 { + use crate::decode::base64_decode_into_buf; + + #[test] + fn decodes_full_length() { + let mut decoded = Vec::new(); + assert!(base64_decode_into_buf("YWJj".as_bytes(), &mut decoded).is_ok()); + assert_eq!(decoded, &[b'a', b'b', b'c']); + } + + #[test] + fn decodes_with_two_padding() { + let mut decoded = Vec::new(); + assert!(base64_decode_into_buf("YWJjZA==".as_bytes(), &mut decoded).is_ok()); + assert_eq!(decoded, &[b'a', b'b', b'c', b'd']); + } + + #[test] + fn decodes_with_one_padding() { + let mut decoded = Vec::new(); + assert!(base64_decode_into_buf("YWJjZGU=".as_bytes(), &mut decoded).is_ok()); + assert_eq!(decoded, &[b'a', b'b', b'c', b'd', b'e']); + } + + #[test] + fn error_with_invalid_paddings() { + let mut decoded = Vec::new(); + assert!(base64_decode_into_buf("YWJj====".as_bytes(), &mut decoded).is_err()); + assert!(base64_decode_into_buf("YWJjZ===".as_bytes(), &mut decoded).is_err()); + assert!(base64_decode_into_buf("====".as_bytes(), &mut decoded).is_err()); + } + + #[test] + fn error_with_unpadded_input() { + let mut decoded = Vec::new(); + assert!(base64_decode_into_buf("YWJjZA=".as_bytes(), &mut decoded).is_err()); + } +} + +#[cfg(test)] +mod test_qp { + use crate::decode::qp_decode_into_buf; + + #[test] + fn decodes_byte() { + let mut decoded = Vec::new(); + assert!(qp_decode_into_buf("a=62c=64".as_bytes(), &mut decoded).is_ok()); + assert_eq!(decoded, &[b'a', b'b', b'c', b'd']); + } + + #[test] + fn decodes_soft_break() { + let mut decoded = Vec::new(); + assert!(qp_decode_into_buf("a=\r\nb=\nc".as_bytes(), &mut decoded).is_ok()); + assert_eq!(decoded, &[b'a', b'b', b'c']); + } + + #[test] + fn invalid_sequences_are_untouched() { + let mut decoded = Vec::new(); + let invalid_sequence = "a=6t= c=".as_bytes(); + assert!(qp_decode_into_buf(invalid_sequence, &mut decoded).is_ok()); + assert_eq!(decoded, invalid_sequence); + } +} diff --git a/src/deliver.rs b/src/deliver.rs new file mode 100644 index 0000000..8ade10f --- /dev/null +++ b/src/deliver.rs @@ -0,0 +1,176 @@ +// Copyright 2019 Alexandros Frantzis +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +// SPDX-License-Identifier: MPL-2.0 + +//! Email delivery functionality. + +use std::fs::{self, File}; +use std::io::ErrorKind; +use std::io::prelude::*; +use std::os::unix::prelude::*; +use std::path::{PathBuf, Path}; +use std::process; +use std::sync::{Arc, Mutex}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use crate::{DeliveryDurability, Result}; + +use gethostname::gethostname; +use libc; + +/// A generator for likely unique maildir email filenames. +/// +/// Using it as an iterator gets a filename that can be used in a maildir +/// and is likely to be unique. +pub struct EmailFilenameGenerator { + count: usize, + max_seen_unix_time: u64, + hostname: String, +} + +impl EmailFilenameGenerator { + pub fn new() -> Self { + // From https://cr.yp.to/proto/maildir.html: + // "To deal with invalid host names, replace / with \057 and : with \072" + let hostname = + gethostname() + .to_string_lossy() + .into_owned() + .replace("/", r"\057") + .replace(":", r"\072"); + + EmailFilenameGenerator{ + count: 0, + max_seen_unix_time: 0, + hostname: hostname, + } + } +} + +impl Iterator for EmailFilenameGenerator { + type Item = String; + + fn next(&mut self) -> Option { + let unix_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs(); + let pid = process::id(); + + if self.max_seen_unix_time < unix_time { + self.max_seen_unix_time = unix_time; + self.count = 0; + } else { + self.count += 1; + } + + Some(format!("{}.{}_{}.{}", unix_time, pid, self.count, self.hostname)) + } +} + +/// A representation of a maildir. +pub struct Maildir { + root: PathBuf, + email_filename_gen: Arc>, +} + +impl Maildir { + /// Opens, or creates if it doesn't a exist, a maildir directory structure + /// at the specified path. + pub fn open_or_create( + mailbox: &Path, + email_filename_gen: Arc> + ) -> Result { + let root = PathBuf::from(mailbox); + for s in &["tmp", "new", "cur"] { + let path = root.join(&s); + fs::create_dir_all(&path)?; + } + + Ok(Maildir{root, email_filename_gen}) + } + + /// Delivers an email to the maildir by creating a new file with the email data, + /// and using the specified DeliveryDurability method. + pub fn deliver( + &self, + data: &[u8], + delivery_durability: DeliveryDurability + ) -> Result { + loop { + let tmp_dir = self.root.join("tmp"); + let new_dir = self.root.join("new"); + + let tmp_email = self.write_email_to_dir(data, &tmp_dir)?; + let new_email = new_dir.join( + tmp_email.file_name().ok_or("")?.to_str().ok_or("")?); + + let result = fs::hard_link(&tmp_email, &new_email); + fs::remove_file(&tmp_email)?; + + match result { + Ok(_) => { + if delivery_durability == DeliveryDurability::FileAndDirSync { + File::open(&new_dir)?.sync_all()?; + File::open(&tmp_dir)?.sync_all()?; + } + return Ok(new_email); + }, + Err(ref err) if err.kind() == ErrorKind::AlreadyExists => {}, + Err(err) => return Err(err.into()), + } + } + } + + /// Delivers an email to the maildir by hard-linking with an existing file, + /// and using the specified DeliveryDurability method. + pub fn deliver_with_hard_link( + &self, + src: &Path, + delivery_durability: DeliveryDurability + ) -> Result { + loop { + let new_dir = self.root.join("new"); + let new_email = new_dir.join(self.next_email_filename_candidate()?); + + match fs::hard_link(&src, &new_email) { + Ok(_) => { + if delivery_durability == DeliveryDurability::FileAndDirSync { + File::open(&new_dir)?.sync_all()?; + } + return Ok(new_email); + }, + Err(ref err) if err.kind() == ErrorKind::AlreadyExists => {}, + Err(err) => return Err(err.into()), + } + } + } + + /// Writes email data to a new file in the specified directory. + fn write_email_to_dir(&self, data: &[u8], dir: &Path) -> Result { + loop { + let email = dir.join(self.next_email_filename_candidate()?); + let result = fs::OpenOptions::new() + .create_new(true) + .write(true) + .custom_flags(libc::O_SYNC) + .open(&email); + + match result { + Ok(mut f) => { + f.write_all(&data)?; + return Ok(email); + }, + Err(ref err) if err.kind() == ErrorKind::AlreadyExists => {}, + Err(err) => return Err(err.into()), + } + } + } + + /// Gets the next email filename candidate from the EmailFilenameGenerator. + fn next_email_filename_candidate(&self) -> Result { + let mut gen = self.email_filename_gen.lock().map_err(|_| "")?; + gen.next().ok_or("".into()) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..bdb8c85 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,373 @@ +// Copyright 2019 Alexandros Frantzis +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +// SPDX-License-Identifier: MPL-2.0 + +//! The mda crate provides a library for writing custom Mail Deliver Agents. It +//! supports local delivery to maildirs, access to normalized email byte +//! data for easier processing, and access to individual header fields. +//! +//! Email data normalization involves ensuring header fields are in single +//! lines, decoding text parts of the message that use some kind of transfer +//! encoding (e.g., base64), and converting all text to UTF-8. The original +//! (non-normalized) email data is used during delivery. +//! +//! This crate also exposes convenience methods for regular expression searching +//! and processing/filtering of emails. +//! +//! # Email construction +//! +//! The [Email struct](struct.Email.html) is the basic abstraction of the `mda` +//! crate. To construct an Email use the +//! [Email::from_stdin](struct.Email.html#method.from_stdin) or +//! [Email::from_vec](struct.Email.html#method.from_vec) method. +//! +//! ```no_run +//! use mda::Email; +//! let email = Email::from_stdin()?; +//! let email = Email::from_vec(vec![97, 98, 99])?; +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # Email delivery +//! +//! Use the +//! [Email::deliver_to_maildir](struct.Email.html#method.deliver_to_maildir) +//! method to deliver the email to local maildir directories. Note that +//! the original (non-normalized) email data is used during delivery. +//! +//! ```no_run +//! use mda::Email; +//! let email = Email::from_stdin()?; +//! email.deliver_to_maildir("/my/maildir/path")?; +//! email.deliver_to_maildir("/my/other/maildir/path")?; +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # Accessing email header fields +//! +//! Use the [Email::header_field](struct.Email.html#method.header_field) and +//! [Email::header_field_all_occurrences](struct.Email.html#method.header_field_all_occurrences) +//! methods to access the email header fields. Any MIME encoded words in the +//! header field values are decoded and the field value is converted to UTF-8. +//! +//! ```no_run +//! use mda::Email; +//! let email = Email::from_stdin()?; +//! let to = email.header_field("To").unwrap_or(""); +//! if to.contains("me@example.com") { +//! email.deliver_to_maildir("/my/maildir/path")?; +//! } +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # Searching with regular expressions +//! +//! The [EmailRegex](trait.EmailRegex.html) trait provides convenience methods +//! for searching the header, the body or the whole email with regular +//! expressions. The convenience functions use case-insensitive, multi-line +//! search (`^` and `$` match beginning and end of lines). If the above don't +//! match your needs, or you require additional functionality, you can perform +//! manual regex search using the email data. +//! +//! ```no_run +//! use mda::{Email, EmailRegex}; +//! let email = Email::from_stdin()?; +//! if email.header().search(r"^To:.*me@example.com")? { +//! email.deliver_to_maildir("/my/maildir/path")?; +//! } +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # Processing and filtering the email with external programs +//! +//! Use the [Email::filter](struct.Email.html#method.filter) and +//! [Email::from_stdin_filtered](struct.Email.html#method.from_stdin_filtered) +//! methods to filter the email, in both cases creating a new email. +//! +//! ```no_run +//! use mda::Email; +//! // Filtering directly from stdin is more efficient. +//! let email = Email::from_stdin_filtered(&["bogofilter", "-ep"])?; +//! let bogosity = email.header_field("X-Bogosity").unwrap_or(""); +//! if bogosity.contains("Spam, tests=bogofilter") { +//! email.deliver_to_maildir("/my/spam/path")?; +//! } +//! // We can also filter at any other time. +//! let email = email.filter(&["bogofilter", "-ep"])?; +//! # Ok::<(), Box>(()) +//! ``` +//! +//! To perform more general processing use the +//! [Email::process](struct.Email.html#method.process) +//! method: +//! +//! ```no_run +//! use mda::Email; +//! let email = Email::from_stdin()?; +//! let output = email.process(&["bogofilter"])?; +//! if let Some(0) = output.status.code() { +//! email.deliver_to_maildir("/my/spam/path")?; +//! } +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # Access to byte data +//! +//! Use the [Email::header](struct.Email.html#method.header), +//! [Email::body](struct.Email.html#method.body), +//! [Email::data](struct.Email.html#method.data) methods to access the +//! normalized byte data of the header, body and whole email respectively. +//! +//! Normalization involves ensuring header fields are in single lines, decoding +//! text parts of the message that use some kind of transfer encoding (e.g., +//! base64), and converting all text to UTF-8 character encoding. +//! +//! If for some reason you need access to non-normalized data use +//! [Email::raw_data](struct.Email.html#method.raw_data). +//! +//! ```no_run +//! use std::str; +//! use mda::Email; +//! let email = Email::from_stdin()?; +//! let body_str = String::from_utf8_lossy(email.header()); +//! +//! if body_str.contains("FREE BEER") { +//! email.deliver_to_maildir("/my/spam/path")?; +//! } +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # Decide delivery durability vs speed trade-off +//! +//! Use the [Email::set_delivery_durability](struct.Email.html#method.set_delivery_durability) +//! to decide which [DeliveryDurability](enum.DeliveryDurability.html) method to use. +//! By default the most durable (but also slower) method is used. +//! +//! ```no_run +//! use mda::{Email, DeliveryDurability}; +//! let mut email = Email::from_stdin()?; +//! email.set_delivery_durability(DeliveryDurability::FileSyncOnly); +//! # Ok::<(), Box>(()) +//! ``` + +mod deliver; +mod regex; +mod processing; +mod normalize; +mod decode; + +use std::io; +use std::io::prelude::*; +use std::path::{PathBuf, Path}; +use std::sync:: {Arc, Mutex, RwLock}; +use std::collections::HashMap; + +use deliver::{Maildir, EmailFilenameGenerator}; +use normalize::normalize_email; + +pub use crate::regex::EmailRegex; + +pub type Result = std::result::Result>; + +fn find_empty_line(data: &[u8]) -> Option { + data.windows(2).position(|w| w[0]== b'\n' && (w[1] == b'\n' || w[1] == b'\r')) +} + +/// The method to use to try to guarantee durable email delivery. +#[derive(PartialEq, Copy, Clone)] +pub enum DeliveryDurability { + /// Perform both file and directory syncing during delivery. + /// This is the default delivery durability method. + FileAndDirSync, + /// Perform only file sync during delivery. This method is + /// potentially much faster, and is used by many existing + /// MDAs, but, depending on the used filesystem, may not + /// provide the required delivery durability guarantees. + FileSyncOnly, +} + +/// A representation of an email. +pub struct Email { + data: Vec, + normalized_data: Vec, + body_index: usize, + deliver_path: RwLock>, + fields: HashMap>, + email_filename_gen: Arc>, + delivery_durability: DeliveryDurability, +} + +impl Email { + /// Creates an `Email` by reading data from stdin. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_stdin()?; + /// # Ok::<(), Box>(()) + /// ``` + pub fn from_stdin() -> Result { + let stdin = io::stdin(); + let mut data = Vec::new(); + stdin.lock().read_to_end(&mut data)?; + Email::from_vec(data) + } + + /// Creates an `Email` by using data passed in a `Vec`. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_vec(vec![1, 2, 3])?; + /// # Ok::<(), Box>(()) + /// ``` + pub fn from_vec(data: Vec) -> Result { + let (normalized_data, fields) = normalize_email(&data); + let body_index = find_empty_line(&normalized_data).unwrap_or(normalized_data.len()); + let email_filename_gen = Arc::new(Mutex::new(EmailFilenameGenerator::new())); + + Ok( + Email{ + data: data, + normalized_data: normalized_data, + body_index: body_index, + deliver_path: RwLock::new(None), + fields: fields, + email_filename_gen: email_filename_gen, + delivery_durability: DeliveryDurability::FileAndDirSync, + } + ) + } + + /// Sets the durability method for delivery of this email. + /// + /// # Example + /// + /// ```no_run + /// # use mda::{DeliveryDurability, Email}; + /// let mut email = Email::from_stdin()?; + /// email.set_delivery_durability(DeliveryDurability::FileSyncOnly); + /// # Ok::<(), Box>(()) + /// ``` + pub fn set_delivery_durability(&mut self, delivery_durability: DeliveryDurability) { + self.delivery_durability = delivery_durability; + } + + /// Returns the value of a header field, if present. If a field occurs + /// multiple times, the value of the first occurrence is returned. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_stdin()?; + /// let to = email.header_field("To").unwrap_or(""); + /// # Ok::<(), Box>(()) + /// ``` + pub fn header_field(&self, name: &str) -> Option<&str> { + self.fields.get(&name.to_lowercase()).map(|v| v[0].as_str()) + } + + /// Returns the values from all occurrences of a header field, if present. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_stdin()?; + /// if let Some(all_received) = email.header_field_all_occurrences("Received") { + /// // process all_received + /// } + /// # Ok::<(), Box>(()) + /// ``` + pub fn header_field_all_occurrences(&self, name: &str) -> Option<&Vec> { + self.fields.get(&name.to_lowercase()).map(|v| v) + } + + /// Delivers the email to the specified maildir. If the maildir isn't + /// present it is created. + /// + /// The first delivery of an email involves writing the email data to + /// the target file, whereas subsequent deliveries try to use a hard link + /// to the first delivery, falling back to a normal write if needed. + /// + /// The email is delivered durably by syncing both the file and the + /// associated directories (`DeliveryDurability::FileAndDirSync`), + /// unless a different durability method is specified with + /// `set_delivery_durability`. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_stdin()?; + /// email.deliver_to_maildir("/path/to/maildir/")?; + /// # Ok::<(), Box>(()) + /// ``` + pub fn deliver_to_maildir(&self, path: impl AsRef) -> Result { + self.deliver_to_maildir_path(path.as_ref()) + } + + fn deliver_to_maildir_path(&self, path: &Path) -> Result { + let maildir = Maildir::open_or_create(&path, self.email_filename_gen.clone())?; + + if let Some(deliver_path) = self.deliver_path.read().unwrap().as_ref() { + let email_path_result = + maildir.deliver_with_hard_link( + deliver_path, + self.delivery_durability); + + if email_path_result.is_ok() { + return email_path_result; + } + } + + let email_path = maildir.deliver(&self.data, self.delivery_durability)?; + + *self.deliver_path.write().unwrap() = Some(email_path.clone()); + + Ok(email_path) + } + + /// Returns whether the email has been delivered to at least one maildir. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_stdin()?; + /// if !email.has_been_delivered() { + /// email.deliver_to_maildir("/fallback/maildir/")?; + /// } + /// # Ok::<(), Box>(()) + /// ``` + pub fn has_been_delivered(&self) -> bool { + self.deliver_path.read().unwrap().is_some() + } + + /// Provides access to the normalized email byte data. + pub fn data(&self) -> &[u8] { + &self.normalized_data + } + + /// Provides access to the normalized email header byte data. + pub fn header(&self) -> &[u8] { + &self.normalized_data[..self.body_index] + } + + /// Provides access to the normalized email body byte data. + pub fn body(&self) -> &[u8] { + &self.normalized_data[self.body_index..] + } + + /// Provides access to the raw (non-normalized) email byte data. + pub fn raw_data(&self) -> &[u8] { + &self.data + } +} diff --git a/src/normalize.rs b/src/normalize.rs new file mode 100644 index 0000000..7c8487d --- /dev/null +++ b/src/normalize.rs @@ -0,0 +1,477 @@ +// Copyright 2019 Alexandros Frantzis +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +// SPDX-License-Identifier: MPL-2.0 + +//! Normalization of email data for easier processing. +//! +//! Normalization includes: +//! +//! * Placing multi-line header fields on a single line +//! * Decoding base64 or quoted-printable encoded text data, including +//! MIME encoded-words in the header. +//! * Converting all text data to UTF-8. + +use ::regex::bytes::{RegexBuilder, Regex, Captures}; +use std::collections::HashMap; +use std::iter::Peekable; +use memchr::{memchr, memchr_iter}; +use charset::Charset; +use std::borrow::Cow; +use lazy_static::lazy_static; + +use crate::decode::{base64_decode_into_buf, qp_decode_into_buf}; + +/// An element recognized by the [EmailParser](struct.EmailParser.html). +enum Element { + HeaderField{data: Vec}, + Body{ + data: Vec, + encoding: Option, + content_type: Option, + charset: Option + }, + Verbatim{data: Vec}, +} + +/// Information about a part in a multi-part email message. +/// The top-level is also considered a part. +struct Part { + encoding: Option, + content_type: Option, + charset: Option, + subpart_boundary: Option>, +} + +impl Part { + fn new() -> Self { + Part{ + encoding: None, + content_type: None, + charset: None, + subpart_boundary: None, + } + } +} + +/// Iterator for the lines contained in a slice of [u8]. +pub struct SliceLines<'a> { + buf: &'a [u8], + last: usize, +} + +impl<'a> Iterator for SliceLines<'a> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option<&'a [u8]> { + match memchr(b'\n', &self.buf[self.last..]) { + Some(m) => { + let line = &self.buf[self.last..=(self.last + m)]; + self.last = self.last + m + 1; + Some(line) + }, + None => { + let line = &self.buf[self.last..]; + if line.is_empty() { + None + } else { + self.last = self.buf.len(); + Some(line) + } + } + } + } +} + +/// A parser for the elements contained in an email. +/// +/// The parsed elements are accessible by iterating over the parser. +/// +/// Every line in the email is contained in a MIME part (which itself may be +/// nested in another part). The top level of the email is also considered +/// to be a part for convenience of processing. +struct EmailParser<'a> { + lines: Peekable>, + // The stack of nested parts the line we are processing is contained in. + part_stack: Vec, + // Whether we currently parsing header lines. + in_header: bool, + // The active multi-part boundary. + active_boundary: Vec, + content_encoding_regex: Regex, + content_type_regex: Regex, + boundary_regex: Regex, +} + +impl<'a> EmailParser<'a> { + fn new(buf: &'a [u8]) -> Self { + let content_encoding_regex = + RegexBuilder::new(r"Content-Transfer-Encoding:\s*([[:alnum:]-]+)") + .case_insensitive(true) + .build().unwrap(); + let content_type_regex = + RegexBuilder::new(r#"^Content-Type:\s*([^;]+)\s*(?:;\s*charset\s*=\s*"?([[:alnum:]_:\-\.]+))?"?"#) + .case_insensitive(true) + .build().unwrap(); + + let boundary_regex = + RegexBuilder::new(r#"^Content-Type:\s*multipart/.*boundary\s*=\s*"?([[:alnum:]'_,/:=\(\)\+\-\.\?]+)"?"#) + .case_insensitive(true) + .build().unwrap(); + + EmailParser{ + lines: SliceLines{buf, last: 0}.peekable(), + // All emails have the top-level part. + part_stack: vec![Part::new()], + in_header: true, + active_boundary: Vec::new(), + content_encoding_regex: content_encoding_regex, + content_type_regex: content_type_regex, + boundary_regex: boundary_regex, + } + } + + // Returns the content type of the active part. + fn active_content_type(&self) -> Option { + self.part_stack.last()?.content_type.clone() + } + + // Returns the encoding of the active part. + fn active_encoding(&self) -> Option { + self.part_stack.last()?.encoding.clone() + } + + // Returns the charset of the active part. + fn active_charset(&self) -> Option { + self.part_stack.last()?.charset.clone() + } + + fn begin_part(&mut self) { + let part = self.part_stack.last().unwrap(); + + // We need to differentiate between the first and subsequent parts in a + // multipart message. The first part creates a new subpart in the + // part_stack... + if part.subpart_boundary.as_ref().is_some() && + part.subpart_boundary.as_ref().unwrap() == &self.active_boundary { + self.part_stack.push(Part::new()) + } else { + // ...whereas subsequent sibling parts just replace the existing + // part in the stack. + let part = self.part_stack.last_mut().unwrap(); + *part = Part::new(); + } + } + + fn end_part(&mut self) { + self.part_stack.pop(); + if let Some(part) = self.part_stack.last_mut() { + part.subpart_boundary = None; + } + for p in self.part_stack.iter().rev() { + if let Some(b) = &p.subpart_boundary { + self.active_boundary = b.clone(); + } + } + } + + fn update_active_part_from_header_field(&mut self, field: &[u8]) { + let mut part = self.part_stack.last_mut().unwrap(); + + if let Some(captures) = self.content_encoding_regex.captures(&field) { + let enc_bytes = captures.get(1).unwrap().as_bytes(); + part.encoding = Some(std::str::from_utf8(&enc_bytes).unwrap().to_lowercase()); + } else if let Some(captures) = self.boundary_regex.captures(&field) { + part.subpart_boundary = Some(captures.get(1).unwrap().as_bytes().to_vec()); + self.active_boundary = part.subpart_boundary.as_ref().unwrap().clone(); + } + else if let Some(captures) = self.content_type_regex.captures(&field) { + let type_bytes = captures.get(1).unwrap().as_bytes(); + part.content_type = Some(std::str::from_utf8(&type_bytes).unwrap().to_lowercase()); + if let Some(charset) = captures.get(2) { + part.charset = Some(std::str::from_utf8(charset.as_bytes()).unwrap().to_lowercase()); + } + } + } +} + +/// Removes newline characters from the end of a byte vector. +fn vec_trim_end_newline(line: &mut Vec) { + while let Some(&b) = line.last() { + if b != b'\n' && b != b'\r' { + break; + } + line.pop(); + } +} + +/// Returns a new slice not including any newline characters from the +/// end of an existing slice. +fn slice_trim_end_newline(mut line: &[u8]) -> &[u8] { + while let Some(&b) = line.last() { + if b != b'\n' && b != b'\r' { + break; + } + line = &line[..line.len()-1]; + } + line +} + +/// Returns whether a line of bytes is a multi-part boundary line for the +/// specified boundary string. +fn is_boundary_line(line: &[u8], boundary: &[u8]) -> bool { + line.starts_with(b"--") && + !boundary.is_empty() && + line[2..].starts_with(&boundary) +} + + +impl Iterator for EmailParser<'_> { + type Item = Element; + + fn next(&mut self) -> Option { + let mut inprogress = Vec::new(); + let mut element = None; + + // Loop until we recognize an element (or reach end of input). + loop { + let line = match self.lines.next() { + Some(l) => l, + None => break, + }; + + if self.in_header { + match line[0] { + // Empty lines denote the end of header. + b'\n' | b'\r' => { + self.in_header = false; + element = Some(Element::Verbatim{data: line.to_vec()}); + break; + }, + // Lines beginning with are continuation lines. + b' ' | b'\t' => { + vec_trim_end_newline(&mut inprogress); + inprogress.extend(line); + }, + _ => inprogress = line.to_vec(), + }; + + // If the next line is not a continuation line, break + // to emit the current header field. + if let Some(next_line) = self.lines.peek() { + if next_line[0] != b' ' && next_line[0] != b'\t' { + break; + } + } + + continue; + } + + if is_boundary_line(&line, &self.active_boundary) { + if slice_trim_end_newline(&line).ends_with(b"--") { + self.end_part(); + } else { + self.begin_part(); + // After a boundary start line we expect a header. + self.in_header = true; + } + + element = Some(Element::Verbatim{data: line.to_vec()}); + break; + } + + // If we reached this point, this line is a body line. Append + // it to the inprogress data. + inprogress.extend(line); + + // If next line is a boundary line, break to emit the current + // body. + if let Some(next_line) = self.lines.peek() { + if is_boundary_line(next_line, &self.active_boundary) { + break; + } + } + } + + // Breaking out the loop happens in three cases: + // 1. End of input + // 2. We have recognized a verbatim element. + // 3. We have inprogress data that we have recognized as a header field + // or body. + + // If we have inprogress data, emit it as header or body. + if !inprogress.is_empty() { + // We shouldn't have set an element at this point, since we have + // inprogress data, and this would lead to loss of data. + assert!(element.is_none()); + + if self.in_header { + element = Some(Element::HeaderField{data: inprogress}); + } else { + element = Some( + Element::Body{ + data: inprogress, + encoding: self.active_encoding(), + content_type: self.active_content_type(), + charset: self.active_charset(), + } + ); + } + } + + if let Some(Element::HeaderField{data: field}) = element.as_ref() { + self.update_active_part_from_header_field(&field); + } + + element + } +} + +/// Decodes a byte array slice with the specified content encoding and charset +/// to utf-8 byte data, appending to the specified Vec. +fn decode_text_data_to_buf( + data: &[u8], + encoding: Option<&str>, + charset: Option<&str>, + mut out: &mut Vec, +) { + let should_decode = encoding.is_some(); + let mut should_convert_charset = true; + let initial_len = out.len(); + + if should_decode { + let result = match encoding.unwrap().as_ref() { + "base64" => base64_decode_into_buf(&data, &mut out), + "quoted-printable" => qp_decode_into_buf(&data, &mut out), + "8bit" | "binary" => { out.extend(data); Ok(()) }, + _ => Err("unknown encoding".into()), + }; + + if result.is_err() { + out.resize(initial_len, 0); + should_convert_charset = false; + } + } + + if out.len() == initial_len { + out.extend(data); + } + + if should_convert_charset { + if let Some(chr) = Charset::for_label(charset.unwrap_or("us-ascii").as_bytes()) { + let (cow, _, _) = chr.decode(&out[initial_len..]); + if let Cow::Owned(c) = cow { + out.resize(initial_len, 0); + out.extend(c.bytes()); + } + } + } +} + +/// Returns whether a byte array slice could contain an MIME encoded-wor