Skip to main content

st/formatters/
hextree.rs

1//! HexTree Formatter - Quantum compression meets readable tree structure
2//!
3//! Combines the token efficiency of quantum format with human/AI readable output.
4//! Uses ULTRA_V2 traversal codes rendered as visible Unicode symbols.
5//!
6//! Format:
7//! ```
8//! HEXTREE_V1:
9//! KEY: ↓=enter ·=same ↑=exit │=tree @=rust #=py $=js
10//! TOK: 80=src 81=tests 82=mod.rs 83=lib.rs
11//! ---
12//! │project↓
13//! │ 80↓
14//! │  main.rs·4k2
15//! │  lib.rs·1k8
16//! │  82↑F3S6k
17//! │ 81↓
18//! │  test_main.rs↑F1S512
19//! ↑F4D2S6k5
20//! ```
21
22use super::Formatter;
23use crate::scanner::{FileNode, TreeStats};
24use anyhow::Result;
25use std::collections::HashMap;
26use std::io::Write;
27use std::path::Path;
28
29/// HexTree - The readable quantum format
30pub struct HexTreeFormatter {
31    /// Dynamic tokens learned from the tree
32    tokens: HashMap<String, u8>,
33    /// Next available token ID
34    next_token: u8,
35    /// Minimum occurrences to earn a token
36    min_occurrences: usize,
37}
38
39impl Default for HexTreeFormatter {
40    fn default() -> Self {
41        Self::new()
42    }
43}
44
45impl HexTreeFormatter {
46    pub fn new() -> Self {
47        Self {
48            tokens: HashMap::new(),
49            next_token: 0x80,
50            min_occurrences: 2,
51        }
52    }
53
54    /// Learn tokens from analyzing the tree
55    fn learn_tokens(&mut self, nodes: &[FileNode]) {
56        let mut occurrences: HashMap<String, usize> = HashMap::new();
57
58        // Count directory names and common extensions
59        for node in nodes {
60            if let Some(name) = node.path.file_name() {
61                let name_str = name.to_string_lossy().to_string();
62
63                // Count directory names
64                if node.is_dir {
65                    *occurrences.entry(name_str.clone()).or_insert(0) += 1;
66                }
67
68                // Count file stems for common patterns
69                if let Some(stem) = node.path.file_stem() {
70                    let stem_str = stem.to_string_lossy().to_string();
71                    if stem_str == "mod"
72                        || stem_str == "lib"
73                        || stem_str == "main"
74                        || stem_str == "index"
75                        || stem_str == "test"
76                    {
77                        *occurrences.entry(format!("{}.rs", stem_str)).or_insert(0) += 1;
78                        *occurrences.entry(format!("{}.py", stem_str)).or_insert(0) += 1;
79                        *occurrences.entry(format!("{}.js", stem_str)).or_insert(0) += 1;
80                    }
81                }
82            }
83        }
84
85        // Assign tokens to frequently occurring names
86        let mut sorted: Vec<_> = occurrences.into_iter().collect();
87        sorted.sort_by(|a, b| b.1.cmp(&a.1));
88
89        for (name, count) in sorted {
90            if count >= self.min_occurrences && self.next_token < 0xFF {
91                self.tokens.insert(name, self.next_token);
92                self.next_token += 1;
93            }
94            if self.tokens.len() >= 32 {
95                break; // Limit token count for readability
96            }
97        }
98    }
99
100    /// Format a size in compact hex notation
101    fn format_size(size: u64) -> String {
102        if size == 0 {
103            return "0".to_string();
104        }
105
106        // Use suffixes: k=1024, m=1048576, g=1073741824
107        if size >= 1073741824 {
108            format!("{:x}g", size / 1073741824)
109        } else if size >= 1048576 {
110            format!("{:x}m", size / 1048576)
111        } else if size >= 1024 {
112            format!("{:x}k", size / 1024)
113        } else {
114            format!("{:x}", size)
115        }
116    }
117
118    /// Tokenize a name if possible
119    fn tokenize(&self, name: &str) -> String {
120        if let Some(&token) = self.tokens.get(name) {
121            format!("{:X}", token)
122        } else {
123            name.to_string()
124        }
125    }
126
127    /// Get language marker for file extension
128    fn lang_marker(ext: Option<&str>) -> &'static str {
129        match ext {
130            Some("rs") => "@",
131            Some("py") => "#",
132            Some("js" | "jsx" | "ts" | "tsx") => "$",
133            Some("md") => "%",
134            Some("toml" | "yaml" | "yml" | "json") => "&",
135            _ => "",
136        }
137    }
138}
139
140impl Formatter for HexTreeFormatter {
141    fn format(
142        &self,
143        writer: &mut dyn Write,
144        nodes: &[FileNode],
145        stats: &TreeStats,
146        root_path: &Path,
147    ) -> Result<()> {
148        let mut formatter = HexTreeFormatter::new();
149
150        // Learn tokens from the tree
151        formatter.learn_tokens(nodes);
152
153        // Write header
154        writeln!(writer, "HEXTREE_V1:")?;
155        writeln!(writer, "KEY: ↓=enter ·=same ↑=exit")?;
156        writeln!(writer, "EXT: @=rs #=py $=js %=md &=cfg")?;
157
158        // Write token definitions if any
159        if !formatter.tokens.is_empty() {
160            write!(writer, "TOK:")?;
161            let mut token_list: Vec<_> = formatter.tokens.iter().collect();
162            token_list.sort_by_key(|(_, &v)| v);
163            for (name, &id) in token_list.iter().take(16) {
164                write!(writer, " {:X}={}", id, name)?;
165            }
166            writeln!(writer)?;
167        }
168
169        writeln!(writer, "ROOT:{}", root_path.display())?;
170        writeln!(writer, "---")?;
171
172        // Track directory state for summaries
173        struct DirState {
174            depth: usize,
175            file_count: usize,
176            total_size: u64,
177        }
178        let mut dir_stack: Vec<DirState> = vec![];
179        let mut prev_depth = 0;
180
181        for node in nodes {
182            let depth = node.depth;
183
184            // Handle depth changes - close directories
185            while prev_depth > depth {
186                if let Some(state) = dir_stack.pop() {
187                    // Write directory summary on exit
188                    let indent = "  ".repeat(state.depth);
189                    writeln!(
190                        writer,
191                        "{}↑F{}S{}",
192                        indent,
193                        state.file_count,
194                        Self::format_size(state.total_size)
195                    )?;
196                }
197                prev_depth -= 1;
198            }
199
200            let indent = "  ".repeat(depth);
201            let name = node
202                .path
203                .file_name()
204                .map(|n| n.to_string_lossy().to_string())
205                .unwrap_or_else(|| node.path.to_string_lossy().to_string());
206
207            let tokenized = formatter.tokenize(&name);
208
209            if node.is_dir {
210                // Directory - going deeper
211                writeln!(writer, "{}{}↓", indent, tokenized)?;
212                dir_stack.push(DirState {
213                    depth,
214                    file_count: 0,
215                    total_size: 0,
216                });
217                prev_depth = depth + 1;
218            } else {
219                // File - same level
220                let ext = node.path.extension().and_then(|e| e.to_str());
221                let lang = Self::lang_marker(ext);
222                let size = Self::format_size(node.size);
223                writeln!(writer, "{}{}{}·{}", indent, lang, tokenized, size)?;
224
225                // Update parent directory stats
226                if let Some(parent) = dir_stack.last_mut() {
227                    parent.file_count += 1;
228                    parent.total_size += node.size;
229                }
230            }
231        }
232
233        // Close remaining directories
234        while let Some(state) = dir_stack.pop() {
235            let indent = "  ".repeat(state.depth);
236            writeln!(
237                writer,
238                "{}↑F{}S{}",
239                indent,
240                state.file_count,
241                Self::format_size(state.total_size)
242            )?;
243        }
244
245        // Final stats
246        writeln!(writer, "---")?;
247        writeln!(
248            writer,
249            "TOTAL:F{:x}D{:x}S{}",
250            stats.total_files,
251            stats.total_dirs,
252            Self::format_size(stats.total_size)
253        )?;
254
255        Ok(())
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::scanner::{FileCategory, FileType, FilesystemType};
263    use std::path::PathBuf;
264    use std::time::SystemTime;
265
266    fn make_node(path: &str, is_dir: bool, size: u64, depth: usize) -> FileNode {
267        FileNode {
268            path: PathBuf::from(path),
269            is_dir,
270            size,
271            depth,
272            permissions: 0o644,
273            uid: 1000,
274            gid: 1000,
275            modified: SystemTime::now(),
276            is_symlink: false,
277            is_hidden: false,
278            permission_denied: false,
279            is_ignored: false,
280            file_type: FileType::RegularFile,
281            category: FileCategory::Unknown,
282            search_matches: None,
283            filesystem_type: FilesystemType::Unknown,
284            git_branch: None,
285            traversal_context: None,
286            interest: None,
287            security_findings: Vec::new(),
288            change_status: None,
289            content_hash: None,
290        }
291    }
292
293    #[test]
294    fn test_hextree_basic() {
295        let nodes = vec![
296            make_node("project", true, 0, 0),
297            make_node("project/src", true, 0, 1),
298            make_node("project/src/main.rs", false, 4096, 2),
299            make_node("project/src/lib.rs", false, 2048, 2),
300        ];
301
302        let stats = TreeStats {
303            total_files: 2,
304            total_dirs: 2,
305            total_size: 6144,
306            ..Default::default()
307        };
308
309        let formatter = HexTreeFormatter::new();
310        let mut output = Vec::new();
311        formatter
312            .format(&mut output, &nodes, &stats, Path::new("project"))
313            .unwrap();
314
315        let result = String::from_utf8(output).unwrap();
316        assert!(result.contains("HEXTREE_V1:"));
317        assert!(result.contains("↓")); // Directory marker
318        assert!(result.contains("·")); // File marker
319    }
320}