use std::collections::HashSet;
use std::io::{self, BufRead, Write};
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use std::time::SystemTime;
pub struct DuConfig {
pub all: bool,
pub apparent_size: bool,
pub block_size: u64,
pub human_readable: bool,
pub si: bool,
pub total: bool,
pub max_depth: Option<usize>,
pub summarize: bool,
pub one_file_system: bool,
pub dereference: bool,
pub dereference_args: bool,
pub separate_dirs: bool,
pub count_links: bool,
pub null_terminator: bool,
pub threshold: Option<i64>,
pub show_time: bool,
pub time_style: String,
pub exclude_patterns: Vec<String>,
pub inodes: bool,
}
impl Default for DuConfig {
fn default() -> Self {
DuConfig {
all: false,
apparent_size: false,
block_size: 1024,
human_readable: false,
si: false,
total: false,
max_depth: None,
summarize: false,
one_file_system: false,
dereference: false,
dereference_args: false,
separate_dirs: false,
count_links: false,
null_terminator: false,
threshold: None,
show_time: false,
time_style: "long-iso".to_string(),
exclude_patterns: Vec::new(),
inodes: false,
}
}
}
pub struct DuEntry {
pub size: u64,
pub path: PathBuf,
pub mtime: Option<i64>,
}
pub fn du_path(path: &Path, config: &DuConfig) -> io::Result<Vec<DuEntry>> {
let mut seen_inodes: HashSet<(u64, u64)> = HashSet::new();
let mut had_error = false;
du_path_with_seen(path, config, &mut seen_inodes, &mut had_error)
}
pub fn du_path_with_seen(
path: &Path,
config: &DuConfig,
seen_inodes: &mut HashSet<(u64, u64)>,
had_error: &mut bool,
) -> io::Result<Vec<DuEntry>> {
let mut entries = Vec::new();
du_recursive(path, config, seen_inodes, &mut entries, 0, None, had_error)?;
Ok(entries)
}
fn is_excluded(path: &Path, config: &DuConfig) -> bool {
if config.exclude_patterns.is_empty() {
return false;
}
let path_str = path.to_string_lossy();
let basename = path
.file_name()
.map(|n| n.to_string_lossy())
.unwrap_or_default();
config
.exclude_patterns
.iter()
.any(|pat| glob_match(pat, &basename) || glob_match(pat, &path_str))
}
fn du_recursive(
path: &Path,
config: &DuConfig,
seen: &mut HashSet<(u64, u64)>,
entries: &mut Vec<DuEntry>,
depth: usize,
root_dev: Option<u64>,
had_error: &mut bool,
) -> io::Result<u64> {
if is_excluded(path, config) {
return Ok(0);
}
let meta = if config.dereference || (depth == 0 && config.dereference_args) {
std::fs::metadata(path)?
} else {
std::fs::symlink_metadata(path)?
};
if let Some(dev) = root_dev {
if meta.dev() != dev && config.one_file_system {
return Ok(0);
}
}
let ino_key = (meta.dev(), meta.ino());
if meta.nlink() > 1 && !config.count_links {
if !seen.insert(ino_key) {
return Ok(0);
}
}
let size = if config.inodes {
1
} else if config.apparent_size {
if meta.is_dir() { 0 } else { meta.len() }
} else {
meta.blocks() * 512
};
let mtime = meta.mtime();
if meta.is_dir() {
let mut subtree_size: u64 = size;
let mut display_size: u64 = size;
let read_dir = match std::fs::read_dir(path) {
Ok(rd) => rd,
Err(e) => {
eprintln!(
"du: cannot read directory '{}': {}",
path.display(),
format_io_error(&e)
);
*had_error = true;
if should_report_dir(config, depth) {
entries.push(DuEntry {
size,
path: path.to_path_buf(),
mtime: if config.show_time { Some(mtime) } else { None },
});
}
return Ok(size);
}
};
for entry in read_dir {
let entry = match entry {
Ok(e) => e,
Err(e) => {
eprintln!(
"du: cannot access entry in '{}': {}",
path.display(),
format_io_error(&e)
);
*had_error = true;
continue;
}
};
let child_path = entry.path();
if is_excluded(&child_path, config) {
continue;
}
let child_is_dir = entry
.file_type()
.or_else(|_| std::fs::symlink_metadata(&child_path).map(|m| m.file_type()))
.map_or(false, |ft| ft.is_dir());
let child_size = du_recursive(
&child_path,
config,
seen,
entries,
depth + 1,
Some(root_dev.unwrap_or(meta.dev())),
had_error,
)?;
subtree_size += child_size;
if config.separate_dirs && child_is_dir {
} else {
display_size += child_size;
}
}
if should_report_dir(config, depth) {
entries.push(DuEntry {
size: display_size,
path: path.to_path_buf(),
mtime: if config.show_time { Some(mtime) } else { None },
});
}
Ok(subtree_size)
} else {
if (depth == 0 || config.all) && within_depth(config, depth) {
entries.push(DuEntry {
size,
path: path.to_path_buf(),
mtime: if config.show_time { Some(mtime) } else { None },
});
}
Ok(size)
}
}
fn should_report_dir(config: &DuConfig, depth: usize) -> bool {
if config.summarize {
return depth == 0;
}
within_depth(config, depth)
}
fn within_depth(config: &DuConfig, depth: usize) -> bool {
match config.max_depth {
Some(max) => depth <= max,
None => true,
}
}
pub fn glob_match(pattern: &str, text: &str) -> bool {
let pat: Vec<char> = pattern.chars().collect();
let txt: Vec<char> = text.chars().collect();
glob_match_inner(&pat, &txt)
}
fn match_bracket_class(pat: &[char], start: usize, ch: char) -> Option<(bool, usize)> {
let mut i = start + 1; if i >= pat.len() {
return None;
}
let negate = if pat[i] == '^' || pat[i] == '!' {
i += 1;
true
} else {
false
};
let mut found = false;
let mut first = true;
while i < pat.len() {
if pat[i] == ']' && !first {
let matched = if negate { !found } else { found };
return Some((matched, i + 1));
}
if i + 2 < pat.len() && pat[i + 1] == '-' && pat[i + 2] != ']' {
let lo = pat[i];
let hi = pat[i + 2];
if ch >= lo && ch <= hi {
found = true;
}
i += 3;
} else {
if pat[i] == ch {
found = true;
}
i += 1;
}
first = false;
}
None
}
fn glob_match_inner(pat: &[char], txt: &[char]) -> bool {
let mut pi = 0;
let mut ti = 0;
let mut star_pi = usize::MAX;
let mut star_ti = 0;
while ti < txt.len() {
if pi < pat.len() && pat[pi] == '[' {
if let Some((matched, end)) = match_bracket_class(pat, pi, txt[ti]) {
if matched {
pi = end;
ti += 1;
continue;
}
}
if star_pi != usize::MAX {
pi = star_pi + 1;
star_ti += 1;
ti = star_ti;
} else {
return false;
}
} else if pi < pat.len() && (pat[pi] == '?' || pat[pi] == txt[ti]) {
pi += 1;
ti += 1;
} else if pi < pat.len() && pat[pi] == '*' {
star_pi = pi;
star_ti = ti;
pi += 1;
} else if star_pi != usize::MAX {
pi = star_pi + 1;
star_ti += 1;
ti = star_ti;
} else {
return false;
}
}
while pi < pat.len() && pat[pi] == '*' {
pi += 1;
}
pi == pat.len()
}
pub fn format_size(raw_bytes: u64, config: &DuConfig) -> String {
if config.human_readable {
human_readable(raw_bytes, 1024)
} else if config.si {
human_readable(raw_bytes, 1000)
} else if config.inodes {
raw_bytes.to_string()
} else {
let scaled = (raw_bytes + config.block_size - 1) / config.block_size;
scaled.to_string()
}
}
fn human_readable(bytes: u64, base: u64) -> String {
let suffixes = if base == 1024 {
&["", "K", "M", "G", "T", "P", "E"]
} else {
&["", "k", "M", "G", "T", "P", "E"]
};
if bytes < base {
return format!("{}", bytes);
}
let mut value = bytes as f64;
let mut idx = 0;
while value >= base as f64 && idx + 1 < suffixes.len() {
value /= base as f64;
idx += 1;
}
if value >= 10.0 {
format!("{:.0}{}", value.ceil(), suffixes[idx])
} else {
let rounded = (value * 10.0).ceil() / 10.0;
if rounded >= 10.0 {
format!("{:.0}{}", rounded.ceil(), suffixes[idx])
} else {
format!("{:.1}{}", rounded, suffixes[idx])
}
}
}
pub fn format_time(epoch_secs: i64, style: &str) -> String {
let secs = epoch_secs;
let st = match SystemTime::UNIX_EPOCH.checked_add(std::time::Duration::from_secs(secs as u64)) {
Some(t) => t,
None => return String::from("?"),
};
let mut tm: libc::tm = unsafe { std::mem::zeroed() };
let time_t = secs as libc::time_t;
unsafe {
libc::localtime_r(&time_t, &mut tm);
}
let _ = st;
let year = tm.tm_year + 1900;
let mon = tm.tm_mon + 1;
let day = tm.tm_mday;
let hour = tm.tm_hour;
let min = tm.tm_min;
let sec = tm.tm_sec;
match style {
"full-iso" => format!(
"{:04}-{:02}-{:02} {:02}:{:02}:{:02}.000000000 +0000",
year, mon, day, hour, min, sec
),
"iso" => format!("{:04}-{:02}-{:02}", year, mon, day),
_ => {
format!("{:04}-{:02}-{:02} {:02}:{:02}", year, mon, day, hour, min)
}
}
}
pub fn print_entry<W: Write>(out: &mut W, entry: &DuEntry, config: &DuConfig) -> io::Result<()> {
if let Some(thresh) = config.threshold {
let size_signed = entry.size as i64;
if thresh >= 0 && size_signed < thresh {
return Ok(());
}
if thresh < 0 && size_signed > thresh.unsigned_abs() as i64 {
return Ok(());
}
}
let size_str = format_size(entry.size, config);
if config.show_time {
if let Some(mtime) = entry.mtime {
let time_str = format_time(mtime, &config.time_style);
write!(out, "{}\t{}\t{}", size_str, time_str, entry.path.display())?;
} else {
write!(out, "{}\t{}", size_str, entry.path.display())?;
}
} else {
write!(out, "{}\t{}", size_str, entry.path.display())?;
}
if config.null_terminator {
out.write_all(b"\0")?;
} else {
out.write_all(b"\n")?;
}
Ok(())
}
pub fn parse_block_size(s: &str) -> Result<u64, String> {
let s = s.trim();
if s.is_empty() {
return Err("empty block size".to_string());
}
let mut num_end = 0;
for (i, c) in s.char_indices() {
if c.is_ascii_digit() {
num_end = i + 1;
} else {
break;
}
}
let (num_str, suffix) = s.split_at(num_end);
let base_val: u64 = if num_str.is_empty() {
1
} else {
num_str
.parse()
.map_err(|_| format!("invalid block size: '{}'", s))?
};
let multiplier = match suffix.to_uppercase().as_str() {
"" => 1u64,
"B" => 1,
"K" | "KB" => 1024,
"M" | "MB" => 1024 * 1024,
"G" | "GB" => 1024 * 1024 * 1024,
"T" | "TB" => 1024u64 * 1024 * 1024 * 1024,
"P" | "PB" => 1024u64 * 1024 * 1024 * 1024 * 1024,
"KB_SI" => 1000,
_ => return Err(format!("invalid suffix in block size: '{}'", s)),
};
Ok(base_val * multiplier)
}
pub fn parse_threshold(s: &str) -> Result<i64, String> {
let s = s.trim();
let (negative, rest) = if let Some(stripped) = s.strip_prefix('-') {
(true, stripped)
} else {
(false, s)
};
let val = parse_block_size(rest)? as i64;
if negative {
if val == 0 {
return Err(format!("invalid --threshold argument '-{}'", rest));
}
Ok(-val)
} else {
Ok(val)
}
}
pub fn read_exclude_file(path: &str) -> io::Result<Vec<String>> {
let file = std::fs::File::open(path)?;
let reader = io::BufReader::new(file);
let mut patterns = Vec::new();
for line in reader.lines() {
let line = line?;
let trimmed = line.trim();
if !trimmed.is_empty() {
patterns.push(trimmed.to_string());
}
}
Ok(patterns)
}
fn format_io_error(e: &io::Error) -> String {
if let Some(raw) = e.raw_os_error() {
let os_err = io::Error::from_raw_os_error(raw);
let msg = format!("{}", os_err);
msg.replace(&format!(" (os error {})", raw), "")
} else {
format!("{}", e)
}
}