use std::io::{self, Write};
use anyhow::Result;
use serde::Serialize;
use super::format::format_size;
use super::{
child_relative_path, hardlinks_label, is_zero_u64, precompute_subtree_counts,
scan_root_for_wire, OutputConfig, SubtreeCounts, WIRE_VERSION,
};
use crate::category::Category;
use crate::entry::Entry;
#[derive(Debug, Clone, Copy)]
pub enum LargestFormat {
Text,
Json,
Ndjson,
}
struct Row<'a> {
entry: &'a Entry,
relative_path: String,
depth: u32,
}
fn collect_all<'a>(entry: &'a Entry, rel_path: String, depth: u32, out: &mut Vec<Row<'a>>) {
if depth > 0 {
out.push(Row {
entry,
relative_path: rel_path.clone(),
depth,
});
} else if !entry.is_dir() {
out.push(Row {
entry,
relative_path: entry.name.clone(),
depth,
});
}
if let Some(children) = entry.children() {
for child in children {
let child_path = child_relative_path(&rel_path, &child.name);
collect_all(child, child_path, depth + 1, out);
}
}
}
fn compare_rows(a: &Row<'_>, b: &Row<'_>) -> std::cmp::Ordering {
b.entry
.size
.cmp(&a.entry.size)
.then_with(|| a.relative_path.cmp(&b.relative_path))
}
fn select_largest<'a>(rows: &mut Vec<Row<'a>>, n: usize) {
if rows.len() > n {
rows.select_nth_unstable_by(n, compare_rows);
rows.truncate(n);
}
rows.sort_unstable_by(compare_rows);
}
pub fn write(
entry: &Entry,
config: &OutputConfig,
n: usize,
format: LargestFormat,
out: &mut impl Write,
) -> Result<()> {
let mut rows: Vec<Row<'_>> = Vec::new();
collect_all(entry, ".".to_string(), 0, &mut rows);
let total_entries = rows.len() as u64;
if !config.filter.is_empty() {
rows.retain(|r| config.filter.matches(r.entry));
}
select_largest(&mut rows, n);
match format {
LargestFormat::Text => write_text(&rows, config, n, total_entries, out)?,
LargestFormat::Json => write_json(entry, &rows, config, n, total_entries, out)?,
LargestFormat::Ndjson => write_ndjson(entry, &rows, config, n, total_entries, out)?,
}
Ok(())
}
const PATH_DISPLAY_CAP: usize = 60;
fn write_text(
rows: &[Row<'_>],
config: &OutputConfig,
n_requested: usize,
total_entries: u64,
out: &mut impl Write,
) -> io::Result<()> {
writeln!(
out,
"Largest {} entries in {} (of {} total):",
rows.len().min(n_requested),
config.scan_root.display(),
total_entries,
)?;
writeln!(out)?;
if rows.is_empty() {
if total_entries == 0 {
writeln!(out, "(scan tree is empty)")?;
} else {
writeln!(out, "(no entries match the active filters)")?;
}
return Ok(());
}
let rendered: Vec<String> = rows.iter().map(render_path_for_text).collect();
let path_width = rendered
.iter()
.map(|p| p.chars().count())
.max()
.unwrap_or(0);
for (row, path) in rows.iter().zip(rendered.iter()) {
let size = format_size(row.entry.size);
let cat = format!("[{}]", row.entry.category.label());
let kind = if row.entry.is_dir() { "dir" } else { "file" };
writeln!(
out,
"{:>10} {:<width$} {:<13} {}",
size,
path,
cat,
kind,
width = path_width,
)?;
}
Ok(())
}
fn render_path_for_text(row: &Row<'_>) -> String {
let raw = if row.entry.is_dir() {
format!("{}/", row.relative_path)
} else {
row.relative_path.clone()
};
let count = raw.chars().count();
if count <= PATH_DISPLAY_CAP {
return raw;
}
let keep = PATH_DISPLAY_CAP.saturating_sub(1);
let tail: String = raw.chars().skip(count - keep).collect();
format!("…{tail}")
}
#[derive(Serialize)]
struct LargestRoot<'a> {
meta: Meta<'a>,
largest: Vec<LargestEntry>,
}
#[derive(Serialize)]
struct Meta<'a> {
wire_version: u32,
duvis_version: &'static str,
scan_root: String,
hardlinks: &'a str,
items_scanned: u64,
items_skipped: u64,
largest_requested: usize,
total_entries: u64,
}
#[derive(Serialize)]
struct LargestEntry {
name: String,
relative_path: String,
depth: u32,
size: u64,
size_human: String,
is_dir: bool,
category: Category,
#[serde(skip_serializing_if = "Option::is_none")]
modified_days_ago: Option<u64>,
file_count: u64,
dir_count: u64,
}
fn make_meta<'a>(config: &'a OutputConfig<'a>, n_requested: usize, total_entries: u64) -> Meta<'a> {
Meta {
wire_version: WIRE_VERSION,
duvis_version: env!("CARGO_PKG_VERSION"),
scan_root: scan_root_for_wire(config.scan_root),
hardlinks: hardlinks_label(config.hardlinks),
items_scanned: config.counts.scanned(),
items_skipped: config.counts.skipped(),
largest_requested: n_requested,
total_entries,
}
}
fn build_entry(row: &Row<'_>, counts: &SubtreeCounts) -> LargestEntry {
let (file_count, dir_count_with_self) = counts
.get(&(row.entry as *const Entry))
.copied()
.unwrap_or((0, 0));
let dir_count = dir_count_with_self.saturating_sub(if row.entry.is_dir() { 1 } else { 0 });
LargestEntry {
name: row.entry.name.clone(),
relative_path: row.relative_path.clone(),
depth: row.depth,
size: row.entry.size,
size_human: format_size(row.entry.size),
is_dir: row.entry.is_dir(),
category: row.entry.category,
modified_days_ago: row.entry.modified_days_ago,
file_count,
dir_count,
}
}
fn write_json(
tree_root: &Entry,
rows: &[Row<'_>],
config: &OutputConfig,
n_requested: usize,
total_entries: u64,
out: &mut impl Write,
) -> Result<()> {
let counts = precompute_subtree_counts(tree_root);
let entries: Vec<LargestEntry> = rows.iter().map(|r| build_entry(r, &counts)).collect();
let root = LargestRoot {
meta: make_meta(config, n_requested, total_entries),
largest: entries,
};
serde_json::to_writer_pretty(&mut *out, &root)?;
writeln!(out)?;
Ok(())
}
#[derive(Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum NdjsonRecord<'a> {
Meta(Meta<'a>),
Entry(NdjsonEntryRecord<'a>),
}
#[derive(Serialize)]
struct NdjsonEntryRecord<'a> {
name: &'a str,
relative_path: &'a str,
depth: u32,
size: u64,
size_human: String,
is_dir: bool,
category: Category,
#[serde(skip_serializing_if = "Option::is_none")]
modified_days_ago: Option<u64>,
file_count: u64,
dir_count: u64,
#[serde(skip_serializing_if = "is_zero_u64")]
truncated_count: u64,
#[serde(skip_serializing_if = "is_zero_u64")]
truncated_size: u64,
}
fn write_ndjson(
tree_root: &Entry,
rows: &[Row<'_>],
config: &OutputConfig,
n_requested: usize,
total_entries: u64,
out: &mut impl Write,
) -> Result<()> {
let meta_rec = NdjsonRecord::Meta(make_meta(config, n_requested, total_entries));
serde_json::to_writer(&mut *out, &meta_rec)?;
writeln!(out)?;
let counts = precompute_subtree_counts(tree_root);
for row in rows {
let (file_count, dir_count_with_self) = counts
.get(&(row.entry as *const Entry))
.copied()
.unwrap_or((0, 0));
let dir_count = dir_count_with_self.saturating_sub(if row.entry.is_dir() { 1 } else { 0 });
let rec = NdjsonRecord::Entry(NdjsonEntryRecord {
name: &row.entry.name,
relative_path: &row.relative_path,
depth: row.depth,
size: row.entry.size,
size_human: format_size(row.entry.size),
is_dir: row.entry.is_dir(),
category: row.entry.category,
modified_days_ago: row.entry.modified_days_ago,
file_count,
dir_count,
truncated_count: 0,
truncated_size: 0,
});
serde_json::to_writer(&mut *out, &rec)?;
writeln!(out)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::scanner::{HardlinkPolicy, ScanCounts};
use std::path::PathBuf;
fn dir(name: &str, children: Vec<Entry>) -> Entry {
Entry::dir(name.to_string(), Category::Other, None, children)
}
fn file(name: &str, size: u64, cat: Category) -> Entry {
Entry::file(name.to_string(), size, cat, None)
}
fn fixture() -> Entry {
dir(
"proj",
vec![
dir("target", vec![file("app", 200_000, Category::Build)]),
dir(
"src",
vec![
file("a.rs", 100, Category::Other),
file("b.rs", 50, Category::Other),
],
),
dir("git", vec![file("HEAD", 10, Category::Vcs)]),
],
)
}
fn cfg<'a>(
scan_root: &'a PathBuf,
counts: &'a ScanCounts,
filter: &'a crate::output::filter::Filter,
) -> OutputConfig<'a> {
OutputConfig {
max_depth: None,
top: None,
scan_root,
counts,
hardlinks: HardlinkPolicy::CountOnce,
filter,
}
}
#[test]
fn collect_all_excludes_root_and_visits_every_descendant() {
let tree = fixture();
let mut rows: Vec<Row<'_>> = Vec::new();
collect_all(&tree, ".".to_string(), 0, &mut rows);
let paths: Vec<String> = rows.iter().map(|r| r.relative_path.clone()).collect();
assert_eq!(rows.len(), 7);
assert!(paths.contains(&"target".to_string()));
assert!(paths.contains(&"target/app".to_string()));
assert!(paths.contains(&"src/a.rs".to_string()));
assert!(!paths.contains(&".".to_string()), "root should be excluded");
}
#[test]
fn collect_all_includes_file_scan_root() {
let root = file("solo.bin", 1024, Category::Other);
let mut rows: Vec<Row<'_>> = Vec::new();
collect_all(&root, ".".to_string(), 0, &mut rows);
assert_eq!(rows.len(), 1);
assert_eq!(rows[0].relative_path, "solo.bin");
assert!(!rows[0].entry.is_dir());
}
#[test]
fn render_path_for_text_truncates_long_paths_from_the_front() {
let long_name = "a".repeat(80);
let row = Row {
entry: &file(&long_name, 0, Category::Other),
relative_path: long_name.clone(),
depth: 1,
};
let rendered = render_path_for_text(&row);
assert!(
rendered.chars().count() <= PATH_DISPLAY_CAP,
"truncated path exceeded cap: {rendered:?}",
);
assert!(
rendered.starts_with('…'),
"ellipsis should mark front-truncation: {rendered:?}",
);
assert!(
rendered.ends_with("aaaaa"),
"tail not preserved: {rendered:?}"
);
}
#[test]
fn select_largest_orders_by_size_desc_with_path_tiebreak() {
let tree = fixture();
let mut rows: Vec<Row<'_>> = Vec::new();
collect_all(&tree, ".".to_string(), 0, &mut rows);
select_largest(&mut rows, 3);
let paths: Vec<&str> = rows.iter().map(|r| r.relative_path.as_str()).collect();
assert_eq!(paths, vec!["target", "target/app", "src"]);
}
#[test]
fn write_text_renders_aligned_table_with_dir_marker() {
let tree = fixture();
let scan_root = PathBuf::from("/tmp/proj");
let counts = ScanCounts::default();
let filter = crate::output::filter::Filter::default();
let cfg = cfg(&scan_root, &counts, &filter);
let mut buf: Vec<u8> = Vec::new();
write(&tree, &cfg, 3, LargestFormat::Text, &mut buf).unwrap();
let output = String::from_utf8(buf).unwrap();
assert!(output.contains("Largest 3 entries"));
assert!(output.contains("target/"));
assert!(output.contains("target/app "));
assert!(output.contains("of 7 total"));
}
#[test]
fn write_json_returns_meta_plus_flat_largest_array() {
let tree = fixture();
let scan_root = PathBuf::from("/tmp/proj");
let counts = ScanCounts::default();
let filter = crate::output::filter::Filter::default();
let cfg = cfg(&scan_root, &counts, &filter);
let mut buf: Vec<u8> = Vec::new();
write(&tree, &cfg, 2, LargestFormat::Json, &mut buf).unwrap();
let v: serde_json::Value = serde_json::from_slice(&buf).unwrap();
assert_eq!(v["meta"]["wire_version"], 2);
assert_eq!(v["meta"]["largest_requested"], 2);
assert_eq!(v["meta"]["total_entries"], 7);
assert!(v.get("tree").is_none());
let largest = v["largest"].as_array().unwrap();
assert_eq!(largest.len(), 2);
assert_eq!(largest[0]["relative_path"], "target");
}
#[test]
fn write_ndjson_emits_meta_then_one_line_per_result() {
let tree = fixture();
let scan_root = PathBuf::from("/tmp/proj");
let counts = ScanCounts::default();
let filter = crate::output::filter::Filter::default();
let cfg = cfg(&scan_root, &counts, &filter);
let mut buf: Vec<u8> = Vec::new();
write(&tree, &cfg, 3, LargestFormat::Ndjson, &mut buf).unwrap();
let lines: Vec<serde_json::Value> = std::str::from_utf8(&buf)
.unwrap()
.lines()
.filter(|l| !l.trim().is_empty())
.map(|l| serde_json::from_str(l).unwrap())
.collect();
assert_eq!(lines.len(), 4); assert_eq!(lines[0]["type"], "meta");
assert_eq!(lines[0]["largest_requested"], 3);
for line in &lines[1..] {
assert_eq!(line["type"], "entry");
}
}
}