summaryrefslogtreecommitdiffstats
path: root/src/decompressor.rs
blob: d25c2f56bb83e0ef68efd520d9384f362bb06cc3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
use std::collections::HashMap;
use std::ffi::OsStr;
use std::fmt;
use std::io::{self, Read};
use std::path::Path;
use std::process::{self, Stdio};

use globset::{Glob, GlobSet, GlobSetBuilder};

/// A decompression command, contains the command to be spawned as well as any
/// necessary CLI args.
#[derive(Clone, Copy, Debug)]
struct DecompressionCommand {
    cmd: &'static str,
    args: &'static [&'static str],
}

impl DecompressionCommand {
    /// Create a new decompress command
    fn new(
        cmd: &'static str,
        args: &'static [&'static str],
    ) -> DecompressionCommand {
        DecompressionCommand {
            cmd, args
        }
    }
}

impl fmt::Display for DecompressionCommand {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{} {}", self.cmd, self.args.join(" "))
    }
}

lazy_static! {
    static ref DECOMPRESSION_COMMANDS: HashMap<
        &'static str,
        DecompressionCommand,
    > = {
        let mut m = HashMap::new();

        const ARGS: &[&str] = &["-d", "-c"];
        m.insert("gz", DecompressionCommand::new("gzip", ARGS));
        m.insert("bz2", DecompressionCommand::new("bzip2", ARGS));
        m.insert("xz", DecompressionCommand::new("xz", ARGS));
        m.insert("lz4", DecompressionCommand::new("lz4", ARGS));

        const LZMA_ARGS: &[&str] = &["--format=lzma", "-d", "-c"];
        m.insert("lzma", DecompressionCommand::new("xz", LZMA_ARGS));

        m
    };
    static ref SUPPORTED_COMPRESSION_FORMATS: GlobSet = {
        let mut builder = GlobSetBuilder::new();
        builder.add(Glob::new("*.gz").unwrap());
        builder.add(Glob::new("*.bz2").unwrap());
        builder.add(Glob::new("*.xz").unwrap());
        builder.add(Glob::new("*.lz4").unwrap());
        builder.add(Glob::new("*.lzma").unwrap());
        builder.build().unwrap()
    };
    static ref TAR_ARCHIVE_FORMATS: GlobSet = {
        let mut builder = GlobSetBuilder::new();
        builder.add(Glob::new("*.tar.gz").unwrap());
        builder.add(Glob::new("*.tar.xz").unwrap());
        builder.add(Glob::new("*.tar.bz2").unwrap());
        builder.add(Glob::new("*.tar.lz4").unwrap());
        builder.add(Glob::new("*.tgz").unwrap());
        builder.add(Glob::new("*.txz").unwrap());
        builder.add(Glob::new("*.tbz2").unwrap());
        builder.build().unwrap()
    };
}

/// DecompressionReader provides an `io::Read` implementation for a limited
/// set of compression formats.
#[derive(Debug)]
pub struct DecompressionReader {
    cmd: DecompressionCommand,
    child: process::Child,
    done: bool,
}

impl DecompressionReader {
    /// Returns a handle to the stdout of the spawned decompression process for
    /// `path`, which can be directly searched in the worker. When the returned
    /// value is exhausted, the underlying process is reaped. If the underlying
    /// process fails, then its stderr is read and converted into a normal
    /// io::Error.
    ///
    /// If there is any error in spawning the decompression command, then
    /// return `None`, after outputting any necessary debug or error messages.
    pub fn from_path(path: &Path) -> Option<DecompressionReader> {
        let extension = match path.extension().and_then(OsStr::to_str) {
            Some(extension) => extension,
            None => {
                debug!(
                    "{}: failed to get compresson extension", path.display());
                return None;
            }
        };
        let decompression_cmd = match DECOMPRESSION_COMMANDS.get(extension) {
            Some(cmd) => cmd,
            None => {
                debug!(
                    "{}: failed to get decompression command", path.display());
                return None;
            }
        };
        let cmd = process::Command::new(decompression_cmd.cmd)
            .args(decompression_cmd.args)
            .arg(path)
            .stdout(Stdio::piped())
            .stderr(Stdio::piped())
            .spawn();
        let child = match cmd {
            Ok(process) => process,
            Err(_) => {
                debug!(
                    "{}: decompression command '{}' not found",
                    path.display(), decompression_cmd.cmd);
                return None;
            }
        };
        Some(DecompressionReader::new(*decompression_cmd, child))
    }

    fn new(
        cmd: DecompressionCommand,
        child: process::Child,
    ) -> DecompressionReader {
        DecompressionReader {
            cmd: cmd,
            child: child,
            done: false,
        }
    }

    fn read_error(&mut self) -> io::Result<io::Error> {
        let mut errbytes = vec![];
        self.child.stderr.as_mut().unwrap().read_to_end(&mut errbytes)?;
        let errstr = String::from_utf8_lossy(&errbytes);
        let errstr = errstr.trim();

        Ok(if errstr.is_empty() {
            let msg = format!("decompression command failed: '{}'", self.cmd);
            io::Error::new(io::ErrorKind::Other, msg)
        } else {
            let msg = format!(
                "decompression command '{}' failed: {}", self.cmd, errstr);
            io::Error::new(io::ErrorKind::Other, msg)
        })
    }
}

impl io::Read for DecompressionReader {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        if self.done {
            return Ok(0);
        }
        let nread = self.child.stdout.as_mut().unwrap().read(buf)?;
        if nread == 0 {
            self.done = true;
            // Reap the child now that we're done reading.
            // If the command failed, report stderr as an error.
            if !self.child.wait()?.success() {
                return Err(self.read_error()?);
            }
        }
        Ok(nread)
    }
}

/// Returns true if the given path contains a supported compression format or
/// is a TAR archive.
pub fn is_compressed(path: &Path) -> bool {
    is_supported_compression_format(path) || is_tar_archive(path)
}

/// Returns true if the given path matches any one of the supported compression
/// formats
fn is_supported_compression_format(path: &Path) -> bool {
    SUPPORTED_COMPRESSION_FORMATS.is_match(path)
}

/// Returns true if the given path matches any of the known TAR file formats.
fn is_tar_archive(path: &Path) -> bool {
    TAR_ARCHIVE_FORMATS.is_match(path)
}