1use std::collections::HashSet;
2use std::io::{self, BufRead, Write};
3use std::os::unix::fs::MetadataExt;
4use std::path::{Path, PathBuf};
5use std::time::SystemTime;
6
7pub struct DuConfig {
9 pub all: bool,
11 pub apparent_size: bool,
13 pub block_size: u64,
15 pub human_readable: bool,
17 pub si: bool,
19 pub total: bool,
21 pub max_depth: Option<usize>,
23 pub summarize: bool,
25 pub one_file_system: bool,
27 pub dereference: bool,
29 pub dereference_args: bool,
31 pub separate_dirs: bool,
33 pub count_links: bool,
35 pub null_terminator: bool,
37 pub threshold: Option<i64>,
39 pub show_time: bool,
41 pub time_style: String,
43 pub exclude_patterns: Vec<String>,
45 pub inodes: bool,
47}
48
49impl Default for DuConfig {
50 fn default() -> Self {
51 DuConfig {
52 all: false,
53 apparent_size: false,
54 block_size: 1024,
55 human_readable: false,
56 si: false,
57 total: false,
58 max_depth: None,
59 summarize: false,
60 one_file_system: false,
61 dereference: false,
62 dereference_args: false,
63 separate_dirs: false,
64 count_links: false,
65 null_terminator: false,
66 threshold: None,
67 show_time: false,
68 time_style: "long-iso".to_string(),
69 exclude_patterns: Vec::new(),
70 inodes: false,
71 }
72 }
73}
74
75pub struct DuEntry {
77 pub size: u64,
79 pub path: PathBuf,
81 pub mtime: Option<i64>,
83}
84
85pub fn du_path(path: &Path, config: &DuConfig) -> io::Result<Vec<DuEntry>> {
87 let mut seen_inodes: HashSet<(u64, u64)> = HashSet::new();
88 let mut had_error = false;
89 du_path_with_seen(path, config, &mut seen_inodes, &mut had_error)
90}
91
92pub fn du_path_with_seen(
95 path: &Path,
96 config: &DuConfig,
97 seen_inodes: &mut HashSet<(u64, u64)>,
98 had_error: &mut bool,
99) -> io::Result<Vec<DuEntry>> {
100 let mut entries = Vec::new();
101 du_recursive(path, config, seen_inodes, &mut entries, 0, None, had_error)?;
102 Ok(entries)
103}
104
105fn is_excluded(path: &Path, config: &DuConfig) -> bool {
108 if config.exclude_patterns.is_empty() {
109 return false;
110 }
111 let path_str = path.to_string_lossy();
112 let basename = path
113 .file_name()
114 .map(|n| n.to_string_lossy())
115 .unwrap_or_default();
116 config
117 .exclude_patterns
118 .iter()
119 .any(|pat| glob_match(pat, &basename) || glob_match(pat, &path_str))
120}
121
122fn du_recursive(
124 path: &Path,
125 config: &DuConfig,
126 seen: &mut HashSet<(u64, u64)>,
127 entries: &mut Vec<DuEntry>,
128 depth: usize,
129 root_dev: Option<u64>,
130 had_error: &mut bool,
131) -> io::Result<u64> {
132 if is_excluded(path, config) {
135 return Ok(0);
136 }
137
138 let meta = if config.dereference || (depth == 0 && config.dereference_args) {
140 std::fs::metadata(path)?
141 } else {
142 std::fs::symlink_metadata(path)?
143 };
144
145 if let Some(dev) = root_dev {
147 if meta.dev() != dev && config.one_file_system {
148 return Ok(0);
149 }
150 }
151
152 let ino_key = (meta.dev(), meta.ino());
154 if meta.nlink() > 1 && !config.count_links {
155 if !seen.insert(ino_key) {
156 return Ok(0);
157 }
158 }
159
160 let size = if config.inodes {
161 1
162 } else if config.apparent_size {
163 if meta.is_dir() { 0 } else { meta.len() }
166 } else {
167 meta.blocks() * 512
168 };
169
170 let mtime = meta.mtime();
171
172 if meta.is_dir() {
173 let mut subtree_size: u64 = size;
176 let mut display_size: u64 = size;
178
179 let read_dir = match std::fs::read_dir(path) {
180 Ok(rd) => rd,
181 Err(e) => {
182 eprintln!(
183 "du: cannot read directory '{}': {}",
184 path.display(),
185 format_io_error(&e)
186 );
187 *had_error = true;
188 if should_report_dir(config, depth) {
190 entries.push(DuEntry {
191 size,
192 path: path.to_path_buf(),
193 mtime: if config.show_time { Some(mtime) } else { None },
194 });
195 }
196 return Ok(size);
197 }
198 };
199
200 for entry in read_dir {
201 let entry = match entry {
202 Ok(e) => e,
203 Err(e) => {
204 eprintln!(
205 "du: cannot access entry in '{}': {}",
206 path.display(),
207 format_io_error(&e)
208 );
209 *had_error = true;
210 continue;
211 }
212 };
213 let child_path = entry.path();
214
215 if is_excluded(&child_path, config) {
217 continue;
218 }
219
220 let child_is_dir = entry
224 .file_type()
225 .or_else(|_| std::fs::symlink_metadata(&child_path).map(|m| m.file_type()))
226 .map_or(false, |ft| ft.is_dir());
227
228 let child_size = du_recursive(
229 &child_path,
230 config,
231 seen,
232 entries,
233 depth + 1,
234 Some(root_dev.unwrap_or(meta.dev())),
235 had_error,
236 )?;
237 subtree_size += child_size;
238 if config.separate_dirs && child_is_dir {
239 } else {
241 display_size += child_size;
242 }
243 }
244
245 if should_report_dir(config, depth) {
247 entries.push(DuEntry {
248 size: display_size,
249 path: path.to_path_buf(),
250 mtime: if config.show_time { Some(mtime) } else { None },
251 });
252 }
253
254 Ok(subtree_size)
255 } else {
256 if (depth == 0 || config.all) && within_depth(config, depth) {
259 entries.push(DuEntry {
260 size,
261 path: path.to_path_buf(),
262 mtime: if config.show_time { Some(mtime) } else { None },
263 });
264 }
265 Ok(size)
266 }
267}
268
269fn should_report_dir(config: &DuConfig, depth: usize) -> bool {
271 if config.summarize {
272 return depth == 0;
273 }
274 within_depth(config, depth)
275}
276
277fn within_depth(config: &DuConfig, depth: usize) -> bool {
279 match config.max_depth {
280 Some(max) => depth <= max,
281 None => true,
282 }
283}
284
285pub fn glob_match(pattern: &str, text: &str) -> bool {
288 let pat: Vec<char> = pattern.chars().collect();
289 let txt: Vec<char> = text.chars().collect();
290 glob_match_inner(&pat, &txt)
291}
292
293fn match_bracket_class(pat: &[char], start: usize, ch: char) -> Option<(bool, usize)> {
297 let mut i = start + 1; if i >= pat.len() {
299 return None;
300 }
301
302 let negate = if pat[i] == '^' || pat[i] == '!' {
304 i += 1;
305 true
306 } else {
307 false
308 };
309
310 let mut found = false;
312 let mut first = true;
313 while i < pat.len() {
314 if pat[i] == ']' && !first {
315 let matched = if negate { !found } else { found };
317 return Some((matched, i + 1));
318 }
319 if i + 2 < pat.len() && pat[i + 1] == '-' && pat[i + 2] != ']' {
321 let lo = pat[i];
322 let hi = pat[i + 2];
323 if ch >= lo && ch <= hi {
324 found = true;
325 }
326 i += 3;
327 } else {
328 if pat[i] == ch {
329 found = true;
330 }
331 i += 1;
332 }
333 first = false;
334 }
335
336 None
338}
339
340fn glob_match_inner(pat: &[char], txt: &[char]) -> bool {
341 let mut pi = 0;
342 let mut ti = 0;
343 let mut star_pi = usize::MAX;
344 let mut star_ti = 0;
345
346 while ti < txt.len() {
347 if pi < pat.len() && pat[pi] == '[' {
348 if let Some((matched, end)) = match_bracket_class(pat, pi, txt[ti]) {
350 if matched {
351 pi = end;
352 ti += 1;
353 continue;
354 }
355 }
357 if star_pi != usize::MAX {
359 pi = star_pi + 1;
360 star_ti += 1;
361 ti = star_ti;
362 } else {
363 return false;
364 }
365 } else if pi < pat.len() && (pat[pi] == '?' || pat[pi] == txt[ti]) {
366 pi += 1;
367 ti += 1;
368 } else if pi < pat.len() && pat[pi] == '*' {
369 star_pi = pi;
370 star_ti = ti;
371 pi += 1;
372 } else if star_pi != usize::MAX {
373 pi = star_pi + 1;
374 star_ti += 1;
375 ti = star_ti;
376 } else {
377 return false;
378 }
379 }
380
381 while pi < pat.len() && pat[pi] == '*' {
382 pi += 1;
383 }
384 pi == pat.len()
385}
386
387pub fn format_size(raw_bytes: u64, config: &DuConfig) -> String {
389 if config.human_readable {
390 human_readable(raw_bytes, 1024)
391 } else if config.si {
392 human_readable(raw_bytes, 1000)
393 } else if config.inodes {
394 raw_bytes.to_string()
395 } else {
396 let scaled = (raw_bytes + config.block_size - 1) / config.block_size;
398 scaled.to_string()
399 }
400}
401
402fn human_readable(bytes: u64, base: u64) -> String {
405 let suffixes = if base == 1024 {
406 &["", "K", "M", "G", "T", "P", "E"]
407 } else {
408 &["", "k", "M", "G", "T", "P", "E"]
409 };
410
411 if bytes < base {
412 return format!("{}", bytes);
413 }
414
415 let mut value = bytes as f64;
416 let mut idx = 0;
417 while value >= base as f64 && idx + 1 < suffixes.len() {
418 value /= base as f64;
419 idx += 1;
420 }
421
422 if value >= 10.0 {
423 format!("{:.0}{}", value.ceil(), suffixes[idx])
424 } else {
425 let rounded = (value * 10.0).ceil() / 10.0;
427 if rounded >= 10.0 {
428 format!("{:.0}{}", rounded.ceil(), suffixes[idx])
429 } else {
430 format!("{:.1}{}", rounded, suffixes[idx])
431 }
432 }
433}
434
435pub fn format_time(epoch_secs: i64, style: &str) -> String {
437 let secs = epoch_secs;
439 let st = match SystemTime::UNIX_EPOCH.checked_add(std::time::Duration::from_secs(secs as u64)) {
440 Some(t) => t,
441 None => return String::from("?"),
442 };
443
444 let mut tm: libc::tm = unsafe { std::mem::zeroed() };
446 let time_t = secs as libc::time_t;
447 unsafe {
448 libc::localtime_r(&time_t, &mut tm);
449 }
450 let _ = st;
452
453 let year = tm.tm_year + 1900;
454 let mon = tm.tm_mon + 1;
455 let day = tm.tm_mday;
456 let hour = tm.tm_hour;
457 let min = tm.tm_min;
458 let sec = tm.tm_sec;
459
460 match style {
461 "full-iso" => format!(
462 "{:04}-{:02}-{:02} {:02}:{:02}:{:02}.000000000 +0000",
463 year, mon, day, hour, min, sec
464 ),
465 "iso" => format!("{:04}-{:02}-{:02}", year, mon, day),
466 _ => {
467 format!("{:04}-{:02}-{:02} {:02}:{:02}", year, mon, day, hour, min)
469 }
470 }
471}
472
473pub fn print_entry<W: Write>(out: &mut W, entry: &DuEntry, config: &DuConfig) -> io::Result<()> {
475 if let Some(thresh) = config.threshold {
477 let size_signed = entry.size as i64;
478 if thresh >= 0 && size_signed < thresh {
479 return Ok(());
480 }
481 if thresh < 0 && size_signed > thresh.unsigned_abs() as i64 {
482 return Ok(());
483 }
484 }
485
486 let size_str = format_size(entry.size, config);
487
488 if config.show_time {
489 if let Some(mtime) = entry.mtime {
490 let time_str = format_time(mtime, &config.time_style);
491 write!(out, "{}\t{}\t{}", size_str, time_str, entry.path.display())?;
492 } else {
493 write!(out, "{}\t{}", size_str, entry.path.display())?;
494 }
495 } else {
496 write!(out, "{}\t{}", size_str, entry.path.display())?;
497 }
498
499 if config.null_terminator {
500 out.write_all(b"\0")?;
501 } else {
502 out.write_all(b"\n")?;
503 }
504
505 Ok(())
506}
507
508pub fn parse_block_size(s: &str) -> Result<u64, String> {
511 let s = s.trim();
512 if s.is_empty() {
513 return Err("empty block size".to_string());
514 }
515
516 let mut num_end = 0;
517 for (i, c) in s.char_indices() {
518 if c.is_ascii_digit() {
519 num_end = i + 1;
520 } else {
521 break;
522 }
523 }
524
525 let (num_str, suffix) = s.split_at(num_end);
526 let base_val: u64 = if num_str.is_empty() {
527 1
528 } else {
529 num_str
530 .parse()
531 .map_err(|_| format!("invalid block size: '{}'", s))?
532 };
533
534 let multiplier = match suffix.to_uppercase().as_str() {
535 "" => 1u64,
536 "B" => 1,
537 "K" | "KB" => 1024,
538 "M" | "MB" => 1024 * 1024,
539 "G" | "GB" => 1024 * 1024 * 1024,
540 "T" | "TB" => 1024u64 * 1024 * 1024 * 1024,
541 "P" | "PB" => 1024u64 * 1024 * 1024 * 1024 * 1024,
542 "KB_SI" => 1000,
543 _ => return Err(format!("invalid suffix in block size: '{}'", s)),
544 };
545
546 Ok(base_val * multiplier)
547}
548
549pub fn parse_threshold(s: &str) -> Result<i64, String> {
554 let s = s.trim();
555 let (negative, rest) = if let Some(stripped) = s.strip_prefix('-') {
556 (true, stripped)
557 } else {
558 (false, s)
559 };
560
561 let val = parse_block_size(rest)? as i64;
562 if negative {
563 if val == 0 {
564 return Err(format!("invalid --threshold argument '-{}'", rest));
565 }
566 Ok(-val)
567 } else {
568 Ok(val)
569 }
570}
571
572pub fn read_exclude_file(path: &str) -> io::Result<Vec<String>> {
574 let file = std::fs::File::open(path)?;
575 let reader = io::BufReader::new(file);
576 let mut patterns = Vec::new();
577 for line in reader.lines() {
578 let line = line?;
579 let trimmed = line.trim();
580 if !trimmed.is_empty() {
581 patterns.push(trimmed.to_string());
582 }
583 }
584 Ok(patterns)
585}
586
587fn format_io_error(e: &io::Error) -> String {
589 if let Some(raw) = e.raw_os_error() {
590 let os_err = io::Error::from_raw_os_error(raw);
591 let msg = format!("{}", os_err);
592 msg.replace(&format!(" (os error {})", raw), "")
593 } else {
594 format!("{}", e)
595 }
596}