Skip to main content

hexz_cli/cmd/data/
ls.rs

1//! List Hexz archives in a directory and render their lineage as a tree.
2//!
3//! Scans a directory for `.hxz` files, reads the `parent_paths` field from
4//! each header, and builds a parent→child graph.  Files whose parent lives
5//! outside the scanned directory are shown with an external-parent annotation.
6//!
7//! # Common Usage
8//!
9//! ```bash
10//! hexz ls ./checkpoints/
11//! hexz ls .
12//! ```
13//!
14//! # Output Example
15//!
16//! ```text
17//! ./checkpoints/
18//! ├── base.hxz                 12.4 GB  standalone
19//! │   ├── epoch1.hxz            1.2 GB  +162 new blocks
20//! │   │   └── epoch2.hxz        0.8 GB  +97 new blocks
21//! │   └── finetune-v1.hxz       2.1 GB  +389 new blocks
22//! └── unrelated.hxz             4.0 GB  standalone
23//!
24//! 5 archives   20.5 GB on disk
25//! ```
26
27use anyhow::{Context, Result};
28use hexz_core::format::header::Header;
29use hexz_core::format::index::MasterIndex;
30use indicatif::HumanBytes;
31use std::collections::{HashMap, HashSet};
32use std::fs::File;
33use std::path::{Path, PathBuf};
34
35struct ArchiveInfo {
36    path: PathBuf,
37    file_size: u64,
38    /// First declared parent path (if any).
39    parent: Option<String>,
40    /// Number of data blocks (not parent-refs, not sparse).
41    data_blocks: usize,
42}
43
44fn read_archive_info(path: &Path) -> Result<ArchiveInfo> {
45    use hexz_core::format::index::IndexPage;
46    use std::io::{Read, Seek, SeekFrom};
47
48    let mut f = File::open(path)?;
49    let file_size = f.metadata()?.len();
50    let header = Header::read_from(&mut f)?;
51    let master = MasterIndex::read_from(&mut f, header.index_offset)?;
52
53    let parent = header.parent_paths.into_iter().next();
54
55    // Count data blocks (non-sparse, non-parent-ref) in primary stream.
56    let mut data_blocks = 0usize;
57    for page_meta in &master.primary_pages {
58        f.seek(SeekFrom::Start(page_meta.offset))?;
59        let mut buf = vec![0u8; page_meta.length as usize];
60        f.read_exact(&mut buf)?;
61        let page: IndexPage = bincode::deserialize(&buf)?;
62        for block in page.blocks {
63            if !block.is_sparse() && !block.is_parent_ref() {
64                data_blocks += 1;
65            }
66        }
67    }
68
69    Ok(ArchiveInfo {
70        path: path.to_path_buf(),
71        file_size,
72        parent,
73        data_blocks,
74    })
75}
76
77/// Print a tree of all `.hxz` archives found in `dir`.
78pub fn run(dir: PathBuf) -> Result<()> {
79    // --- Collect all .hxz files ---
80    let entries: Vec<ArchiveInfo> = std::fs::read_dir(&dir)
81        .with_context(|| format!("Cannot read directory: {}", dir.display()))?
82        .filter_map(|e| e.ok())
83        .filter(|e| e.path().extension().is_some_and(|ext| ext == "hxz"))
84        .map(|e| {
85            let p = e.path();
86            read_archive_info(&p).with_context(|| format!("Failed to read {}", p.display()))
87        })
88        .collect::<Result<Vec<_>>>()?;
89
90    if entries.is_empty() {
91        println!("No .hxz archives found in {}", dir.display());
92        return Ok(());
93    }
94
95    // --- Build name → index map (using just the filename for matching) ---
96    // parent_paths are stored as full paths at write time; match on filename.
97    let name_to_idx: HashMap<String, usize> = entries
98        .iter()
99        .enumerate()
100        .map(|(i, a)| {
101            let name = a
102                .path
103                .file_name()
104                .unwrap_or_default()
105                .to_string_lossy()
106                .into_owned();
107            (name, i)
108        })
109        .collect();
110
111    // For each archive, resolve its parent to a local index (if present).
112    let parent_idx: Vec<Option<usize>> = entries
113        .iter()
114        .map(|a| {
115            a.parent.as_deref().and_then(|p| {
116                // Try matching on the bare filename of the declared parent path.
117                let parent_name = Path::new(p)
118                    .file_name()
119                    .unwrap_or_default()
120                    .to_string_lossy()
121                    .into_owned();
122                name_to_idx.get(&parent_name).copied()
123            })
124        })
125        .collect();
126
127    // Archives whose parent is outside the scanned dir (declared but unresolved).
128    let external_parent: Vec<Option<&str>> = entries
129        .iter()
130        .zip(&parent_idx)
131        .map(|(a, resolved)| {
132            if resolved.is_none() {
133                a.parent.as_deref()
134            } else {
135                None
136            }
137        })
138        .collect();
139
140    // Build children map: parent_idx → Vec<child_idx>
141    let mut children: HashMap<usize, Vec<usize>> = HashMap::new();
142    let mut has_parent: HashSet<usize> = HashSet::new();
143    for (i, p) in parent_idx.iter().enumerate() {
144        if let Some(pi) = p {
145            children.entry(*pi).or_default().push(i);
146            has_parent.insert(i);
147        }
148    }
149
150    // Roots = archives with no resolved local parent (including external-parent ones).
151    let mut roots: Vec<usize> = (0..entries.len())
152        .filter(|i| !has_parent.contains(i))
153        .collect();
154    roots.sort_by_key(|&i| &entries[i].path);
155
156    // --- Render ---
157    let total_size: u64 = entries.iter().map(|a| a.file_size).sum();
158
159    println!();
160    println!("  {}/", dir.display());
161
162    fn print_tree(
163        idx: usize,
164        entries: &[ArchiveInfo],
165        children: &HashMap<usize, Vec<usize>>,
166        external_parent: &[Option<&str>],
167        prefix: &str,
168        is_last: bool,
169    ) {
170        let a = &entries[idx];
171        let connector = if is_last { "└──" } else { "├──" };
172        let name = a.path.file_name().unwrap_or_default().to_string_lossy();
173
174        let annotation = if let Some(ext) = external_parent[idx] {
175            // External parent — show where it comes from
176            let parent_name = Path::new(ext)
177                .file_name()
178                .unwrap_or_default()
179                .to_string_lossy()
180                .into_owned();
181            format!("← {} (external)", parent_name)
182        } else if a.parent.is_none() {
183            "standalone".to_string()
184        } else {
185            format!("+{} new blocks", a.data_blocks)
186        };
187
188        println!(
189            "  {}{} {:<32} {:>10}  {}",
190            prefix,
191            connector,
192            name,
193            HumanBytes(a.file_size).to_string(),
194            annotation,
195        );
196
197        let mut kids = children.get(&idx).cloned().unwrap_or_default();
198        kids.sort_by_key(|&i| &entries[i].path);
199
200        let child_prefix = format!("{}{}   ", prefix, if is_last { " " } else { "│" });
201        for (j, &child) in kids.iter().enumerate() {
202            let last = j == kids.len() - 1;
203            print_tree(
204                child,
205                entries,
206                children,
207                external_parent,
208                &child_prefix,
209                last,
210            );
211        }
212    }
213
214    for (i, &root) in roots.iter().enumerate() {
215        let last = i == roots.len() - 1;
216        print_tree(root, &entries, &children, &external_parent, "", last);
217    }
218
219    println!();
220    println!(
221        "  {} archive{}   {} on disk",
222        entries.len(),
223        if entries.len() == 1 { "" } else { "s" },
224        HumanBytes(total_size),
225    );
226    println!();
227
228    Ok(())
229}