Skip to main content

coreutils_rs/du/
core.rs

1use std::collections::HashSet;
2use std::io::{self, BufRead, Write};
3use std::os::unix::fs::MetadataExt;
4use std::path::{Path, PathBuf};
5use std::time::SystemTime;
6
7/// Configuration for the `du` command.
8pub struct DuConfig {
9    /// Show counts for all files, not just directories.
10    pub all: bool,
11    /// Print apparent sizes rather than disk usage.
12    pub apparent_size: bool,
13    /// Block size for output scaling.
14    pub block_size: u64,
15    /// Human-readable output (powers of 1024).
16    pub human_readable: bool,
17    /// Human-readable output (powers of 1000).
18    pub si: bool,
19    /// Produce a grand total.
20    pub total: bool,
21    /// Maximum depth of directory traversal to display.
22    pub max_depth: Option<usize>,
23    /// Only display a total for each argument.
24    pub summarize: bool,
25    /// Stay on the same filesystem.
26    pub one_file_system: bool,
27    /// Dereference all symbolic links.
28    pub dereference: bool,
29    /// For directories, do not include size of subdirectories.
30    pub separate_dirs: bool,
31    /// Count sizes of hard-linked files multiple times.
32    pub count_links: bool,
33    /// End output lines with NUL instead of newline.
34    pub null_terminator: bool,
35    /// Exclude entries smaller (or larger if negative) than this threshold.
36    pub threshold: Option<i64>,
37    /// Show modification time of entries.
38    pub show_time: bool,
39    /// Time format style (full-iso, long-iso, iso).
40    pub time_style: String,
41    /// Glob patterns to exclude.
42    pub exclude_patterns: Vec<String>,
43    /// Count inodes instead of sizes.
44    pub inodes: bool,
45}
46
47impl Default for DuConfig {
48    fn default() -> Self {
49        DuConfig {
50            all: false,
51            apparent_size: false,
52            block_size: 1024,
53            human_readable: false,
54            si: false,
55            total: false,
56            max_depth: None,
57            summarize: false,
58            one_file_system: false,
59            dereference: false,
60            separate_dirs: false,
61            count_links: false,
62            null_terminator: false,
63            threshold: None,
64            show_time: false,
65            time_style: "long-iso".to_string(),
66            exclude_patterns: Vec::new(),
67            inodes: false,
68        }
69    }
70}
71
72/// A single entry produced by `du` traversal.
73pub struct DuEntry {
74    /// Size in bytes (or inode count if inodes mode).
75    pub size: u64,
76    /// Path of the entry.
77    pub path: PathBuf,
78    /// Modification time (seconds since epoch), if available.
79    pub mtime: Option<i64>,
80}
81
82/// Traverse `path` and collect `DuEntry` results according to `config`.
83pub fn du_path(path: &Path, config: &DuConfig) -> io::Result<Vec<DuEntry>> {
84    let mut seen_inodes: HashSet<(u64, u64)> = HashSet::new();
85    let mut had_error = false;
86    du_path_with_seen(path, config, &mut seen_inodes, &mut had_error)
87}
88
89/// Traverse `path` with a shared inode set (for deduplication across multiple arguments).
90/// Sets `had_error` to true if any permission or access errors are encountered.
91pub fn du_path_with_seen(
92    path: &Path,
93    config: &DuConfig,
94    seen_inodes: &mut HashSet<(u64, u64)>,
95    had_error: &mut bool,
96) -> io::Result<Vec<DuEntry>> {
97    let mut entries = Vec::new();
98    du_recursive(path, config, seen_inodes, &mut entries, 0, None, had_error)?;
99    Ok(entries)
100}
101
102/// Recursive traversal core. Returns the cumulative size of the subtree at `path`.
103fn du_recursive(
104    path: &Path,
105    config: &DuConfig,
106    seen: &mut HashSet<(u64, u64)>,
107    entries: &mut Vec<DuEntry>,
108    depth: usize,
109    root_dev: Option<u64>,
110    had_error: &mut bool,
111) -> io::Result<u64> {
112    let meta = if config.dereference {
113        std::fs::metadata(path)?
114    } else {
115        std::fs::symlink_metadata(path)?
116    };
117
118    // Check one-file-system: skip entries on different devices.
119    if let Some(dev) = root_dev {
120        if meta.dev() != dev && config.one_file_system {
121            return Ok(0);
122        }
123    }
124
125    // Track hard links: skip files we have already counted (unless --count-links).
126    let ino_key = (meta.dev(), meta.ino());
127    if meta.nlink() > 1 && !config.count_links {
128        if !seen.insert(ino_key) {
129            return Ok(0);
130        }
131    }
132
133    let size = if config.inodes {
134        1
135    } else if config.apparent_size {
136        // GNU du uses 0 for directory entries themselves in apparent-size mode;
137        // only file content sizes are counted.
138        if meta.is_dir() { 0 } else { meta.len() }
139    } else {
140        meta.blocks() * 512
141    };
142
143    let mtime = meta.mtime();
144
145    if meta.is_dir() {
146        // Track the full subtree size (for returning to parent)
147        // and the display size (what we show for this directory).
148        let mut subtree_size: u64 = size;
149        // For separate_dirs, display size only includes this dir + direct files, not subdirs.
150        let mut display_size: u64 = size;
151
152        let read_dir = match std::fs::read_dir(path) {
153            Ok(rd) => rd,
154            Err(e) => {
155                eprintln!(
156                    "du: cannot read directory '{}': {}",
157                    path.display(),
158                    format_io_error(&e)
159                );
160                *had_error = true;
161                // Still report what we can for this directory.
162                if should_report_dir(config, depth) {
163                    entries.push(DuEntry {
164                        size,
165                        path: path.to_path_buf(),
166                        mtime: if config.show_time { Some(mtime) } else { None },
167                    });
168                }
169                return Ok(size);
170            }
171        };
172
173        for entry in read_dir {
174            let entry = match entry {
175                Ok(e) => e,
176                Err(e) => {
177                    eprintln!(
178                        "du: cannot access entry in '{}': {}",
179                        path.display(),
180                        format_io_error(&e)
181                    );
182                    *had_error = true;
183                    continue;
184                }
185            };
186            let child_path = entry.path();
187
188            // Check exclude patterns against the file name.
189            if let Some(name) = child_path.file_name() {
190                let name_str = name.to_string_lossy();
191                if config
192                    .exclude_patterns
193                    .iter()
194                    .any(|pat| glob_match(pat, &name_str))
195                {
196                    continue;
197                }
198            }
199
200            // Check if child is a directory (for separate_dirs logic).
201            let child_is_dir = child_path.symlink_metadata().map_or(false, |m| m.is_dir());
202
203            let child_size = du_recursive(
204                &child_path,
205                config,
206                seen,
207                entries,
208                depth + 1,
209                Some(root_dev.unwrap_or(meta.dev())),
210                had_error,
211            )?;
212            subtree_size += child_size;
213            if config.separate_dirs && child_is_dir {
214                // Don't add subdirectory sizes to display size.
215            } else {
216                display_size += child_size;
217            }
218        }
219
220        // Emit an entry for this directory if within display depth.
221        if should_report_dir(config, depth) {
222            entries.push(DuEntry {
223                size: display_size,
224                path: path.to_path_buf(),
225                mtime: if config.show_time { Some(mtime) } else { None },
226            });
227        }
228
229        Ok(subtree_size)
230    } else {
231        // Regular file / symlink / special file.
232        // Always report top-level arguments (depth 0), or all files if --all.
233        if (depth == 0 || config.all) && within_depth(config, depth) {
234            entries.push(DuEntry {
235                size,
236                path: path.to_path_buf(),
237                mtime: if config.show_time { Some(mtime) } else { None },
238            });
239        }
240        Ok(size)
241    }
242}
243
244/// Whether a directory entry at `depth` should be reported.
245fn should_report_dir(config: &DuConfig, depth: usize) -> bool {
246    if config.summarize {
247        return depth == 0;
248    }
249    within_depth(config, depth)
250}
251
252/// Whether `depth` is within the configured max_depth.
253fn within_depth(config: &DuConfig, depth: usize) -> bool {
254    match config.max_depth {
255        Some(max) => depth <= max,
256        None => true,
257    }
258}
259
260/// Simple glob matching supporting `*` and `?` wildcards.
261pub fn glob_match(pattern: &str, text: &str) -> bool {
262    let pat: Vec<char> = pattern.chars().collect();
263    let txt: Vec<char> = text.chars().collect();
264    glob_match_inner(&pat, &txt)
265}
266
267fn glob_match_inner(pat: &[char], txt: &[char]) -> bool {
268    let mut pi = 0;
269    let mut ti = 0;
270    let mut star_pi = usize::MAX;
271    let mut star_ti = 0;
272
273    while ti < txt.len() {
274        if pi < pat.len() && (pat[pi] == '?' || pat[pi] == txt[ti]) {
275            pi += 1;
276            ti += 1;
277        } else if pi < pat.len() && pat[pi] == '*' {
278            star_pi = pi;
279            star_ti = ti;
280            pi += 1;
281        } else if star_pi != usize::MAX {
282            pi = star_pi + 1;
283            star_ti += 1;
284            ti = star_ti;
285        } else {
286            return false;
287        }
288    }
289
290    while pi < pat.len() && pat[pi] == '*' {
291        pi += 1;
292    }
293    pi == pat.len()
294}
295
296/// Format a size value for display according to the config.
297pub fn format_size(raw_bytes: u64, config: &DuConfig) -> String {
298    if config.human_readable {
299        human_readable(raw_bytes, 1024)
300    } else if config.si {
301        human_readable(raw_bytes, 1000)
302    } else if config.inodes {
303        raw_bytes.to_string()
304    } else {
305        // Scale by block_size, rounding up.
306        let scaled = (raw_bytes + config.block_size - 1) / config.block_size;
307        scaled.to_string()
308    }
309}
310
311/// Format a byte count in human-readable form (e.g., 1.5K, 23M).
312/// GNU du uses ceiling rounding and always shows one decimal for values < 10.
313fn human_readable(bytes: u64, base: u64) -> String {
314    let suffixes = if base == 1024 {
315        &["", "K", "M", "G", "T", "P", "E"]
316    } else {
317        &["", "k", "M", "G", "T", "P", "E"]
318    };
319
320    if bytes < base {
321        return format!("{}", bytes);
322    }
323
324    let mut value = bytes as f64;
325    let mut idx = 0;
326    while value >= base as f64 && idx + 1 < suffixes.len() {
327        value /= base as f64;
328        idx += 1;
329    }
330
331    if value >= 10.0 {
332        format!("{:.0}{}", value.ceil(), suffixes[idx])
333    } else {
334        // Show one decimal place for values < 10 (GNU keeps the .0).
335        let rounded = (value * 10.0).ceil() / 10.0;
336        if rounded >= 10.0 {
337            format!("{:.0}{}", rounded.ceil(), suffixes[idx])
338        } else {
339            format!("{:.1}{}", rounded, suffixes[idx])
340        }
341    }
342}
343
344/// Format a modification time for display.
345pub fn format_time(epoch_secs: i64, style: &str) -> String {
346    // Convert epoch seconds to a broken-down time.
347    let secs = epoch_secs;
348    let st = match SystemTime::UNIX_EPOCH.checked_add(std::time::Duration::from_secs(secs as u64)) {
349        Some(t) => t,
350        None => return String::from("?"),
351    };
352
353    // Use libc localtime_r for correct timezone handling.
354    let mut tm: libc::tm = unsafe { std::mem::zeroed() };
355    let time_t = secs as libc::time_t;
356    unsafe {
357        libc::localtime_r(&time_t, &mut tm);
358    }
359    // Ignore the SystemTime; we use the libc tm directly.
360    let _ = st;
361
362    let year = tm.tm_year + 1900;
363    let mon = tm.tm_mon + 1;
364    let day = tm.tm_mday;
365    let hour = tm.tm_hour;
366    let min = tm.tm_min;
367    let sec = tm.tm_sec;
368
369    match style {
370        "full-iso" => format!(
371            "{:04}-{:02}-{:02} {:02}:{:02}:{:02}.000000000 +0000",
372            year, mon, day, hour, min, sec
373        ),
374        "iso" => format!("{:04}-{:02}-{:02}", year, mon, day),
375        _ => {
376            // long-iso (default)
377            format!("{:04}-{:02}-{:02} {:02}:{:02}", year, mon, day, hour, min)
378        }
379    }
380}
381
382/// Print a single DuEntry.
383pub fn print_entry<W: Write>(out: &mut W, entry: &DuEntry, config: &DuConfig) -> io::Result<()> {
384    // Apply threshold filtering.
385    if let Some(thresh) = config.threshold {
386        let size_signed = entry.size as i64;
387        if thresh >= 0 && size_signed < thresh {
388            return Ok(());
389        }
390        if thresh < 0 && size_signed > thresh.unsigned_abs() as i64 {
391            return Ok(());
392        }
393    }
394
395    let size_str = format_size(entry.size, config);
396
397    if config.show_time {
398        if let Some(mtime) = entry.mtime {
399            let time_str = format_time(mtime, &config.time_style);
400            write!(out, "{}\t{}\t{}", size_str, time_str, entry.path.display())?;
401        } else {
402            write!(out, "{}\t{}", size_str, entry.path.display())?;
403        }
404    } else {
405        write!(out, "{}\t{}", size_str, entry.path.display())?;
406    }
407
408    if config.null_terminator {
409        out.write_all(b"\0")?;
410    } else {
411        out.write_all(b"\n")?;
412    }
413
414    Ok(())
415}
416
417/// Parse a block size string like "1K", "1M", "1G", etc.
418/// Returns the number of bytes per block.
419pub fn parse_block_size(s: &str) -> Result<u64, String> {
420    let s = s.trim();
421    if s.is_empty() {
422        return Err("empty block size".to_string());
423    }
424
425    let mut num_end = 0;
426    for (i, c) in s.char_indices() {
427        if c.is_ascii_digit() {
428            num_end = i + 1;
429        } else {
430            break;
431        }
432    }
433
434    let (num_str, suffix) = s.split_at(num_end);
435    let base_val: u64 = if num_str.is_empty() {
436        1
437    } else {
438        num_str
439            .parse()
440            .map_err(|_| format!("invalid block size: '{}'", s))?
441    };
442
443    let multiplier = match suffix.to_uppercase().as_str() {
444        "" => 1u64,
445        "B" => 1,
446        "K" | "KB" => 1024,
447        "M" | "MB" => 1024 * 1024,
448        "G" | "GB" => 1024 * 1024 * 1024,
449        "T" | "TB" => 1024u64 * 1024 * 1024 * 1024,
450        "P" | "PB" => 1024u64 * 1024 * 1024 * 1024 * 1024,
451        "KB_SI" => 1000,
452        _ => return Err(format!("invalid suffix in block size: '{}'", s)),
453    };
454
455    Ok(base_val * multiplier)
456}
457
458/// Parse a threshold value. Positive means "exclude entries smaller than SIZE".
459/// Negative means "exclude entries larger than -SIZE".
460pub fn parse_threshold(s: &str) -> Result<i64, String> {
461    let s = s.trim();
462    let (negative, rest) = if let Some(stripped) = s.strip_prefix('-') {
463        (true, stripped)
464    } else {
465        (false, s)
466    };
467
468    let val = parse_block_size(rest)? as i64;
469    if negative { Ok(-val) } else { Ok(val) }
470}
471
472/// Read exclude patterns from a file (one per line).
473pub fn read_exclude_file(path: &str) -> io::Result<Vec<String>> {
474    let file = std::fs::File::open(path)?;
475    let reader = io::BufReader::new(file);
476    let mut patterns = Vec::new();
477    for line in reader.lines() {
478        let line = line?;
479        let trimmed = line.trim();
480        if !trimmed.is_empty() {
481            patterns.push(trimmed.to_string());
482        }
483    }
484    Ok(patterns)
485}
486
487/// Format an IO error without the "(os error N)" suffix.
488fn format_io_error(e: &io::Error) -> String {
489    if let Some(raw) = e.raw_os_error() {
490        let os_err = io::Error::from_raw_os_error(raw);
491        let msg = format!("{}", os_err);
492        msg.replace(&format!(" (os error {})", raw), "")
493    } else {
494        format!("{}", e)
495    }
496}