summaryrefslogtreecommitdiffstats
path: root/src/main.rs
blob: 00ac3019cf8581037dcb089cbb8e79a7e564025d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
extern crate humansize;
extern crate ignore;
extern crate num_cpus;
#[macro_use]
extern crate clap;

use std::collections::HashSet;
use std::os::unix::fs::MetadataExt;
use std::path::Path;
use std::sync::mpsc::channel;
use std::thread;

use clap::{App, AppSettings, Arg};
use humansize::{file_size_opts, FileSize};
use ignore::WalkBuilder;

fn get_size<P: AsRef<Path>>(p: P, num_threads: usize) -> u64 {
    let mut builder = WalkBuilder::new(p);
    builder.hidden(false);
    builder.parents(false);
    builder.ignore(false);
    builder.git_global(false);
    builder.git_ignore(false);
    builder.git_exclude(false);
    builder.follow_links(false);

    builder.threads(num_threads);

    let walker = builder.build_parallel();

    let (tx, rx) = channel();

    let receiver_thread = thread::spawn(move || {
        let mut total = 0;
        let mut ids = HashSet::new();
        for (unique_id, size) in rx {
            if let Some(unique_id) = unique_id {
                // Only count this entry if the ID has not been seen
                if ids.insert(unique_id) {
                    total += size;
                }
            } else {
                total += size;
            }
        }

        total
    });

    walker.run(|| {
        let tx = tx.clone();
        Box::new(move |result| {
            match result {
                Ok(entry) => {
                    if let Ok(metadata) = entry.metadata() {
                        // If the entry has more than one hard link, generate
                        // a unique ID consisting of device and inode in order
                        // not to count this entry twice.
                        let unique_id = if metadata.is_file() && metadata.nlink() > 1 {
                            Some((metadata.dev(), metadata.ino()))
                        } else {
                            None
                        };

                        let size = metadata.len();

                        tx.send((unique_id, size)).ok();
                    } else {
                        eprintln!(
                            "Could not get metadata: '{}'",
                            entry.path().to_string_lossy()
                        );
                    }
                }
                Err(err) => {
                    eprintln!("I/O error: {}", err);
                }
            }

            return ignore::WalkState::Continue;
        })
    });

    drop(tx);
    receiver_thread.join().unwrap()
}

fn print_result(size: u64) {
    println!(
        "{} ({} bytes)",
        size.file_size(file_size_opts::DECIMAL).unwrap(),
        size
    );
}

fn main() {
    let app = App::new(crate_name!())
        .setting(AppSettings::ColorAuto)
        .setting(AppSettings::ColoredHelp)
        .setting(AppSettings::DeriveDisplayOrder)
        .setting(AppSettings::UnifiedHelpMessage)
        .version(crate_version!())
        .about("Compute disk usage for the current directory")
        .arg(
            Arg::with_name("threads")
                .long("threads")
                .short("j")
                .value_name("N")
                .takes_value(true)
                .help("Set the number of threads (default: 3 x num cores)"),
        );

    let matches = app.get_matches();

    // Setting the number of threads to 3x the number of cores is a good tradeoff between
    // cold-cache and warm-cache runs. For a cold disk cache, we are limited by disk IO and
    // therefore want the number of threads to be rather large in order for the IO scheduler to
    // plan ahead. On the other hand, the number of threads shouldn't be too high for warm disk
    // caches where we would otherwise pay a higher synchronization overhead.
    let num_threads = matches
        .value_of("threads")
        .and_then(|t| t.parse().ok())
        .unwrap_or(3 * num_cpus::get());

    let size = get_size(".", num_threads);
    print_result(size);
}