Skip to main content

st/formatters/
quantum.rs

1use super::{Formatter, StreamingFormatter};
2use crate::scanner::{FileNode, TreeStats};
3use anyhow::Result;
4use std::collections::HashMap;
5use std::io::Write;
6use std::path::Path;
7
8/// MEM|8 Quantum Format - The Ultimate Compression
9///
10/// Header byte (8 bits):
11/// 7 6 5 4 3 2 1 0
12/// | | | | | | | └─ Size present (always 1 for now)
13/// | | | | | | └─── Permissions differ from parent
14/// | | | | | └──── Time differs from parent  
15/// | | | | └───── Owner/Group differ from parent
16/// | | | └────── Is directory
17/// | | └─────── Is symlink
18/// | └──────── Has extended attributes
19/// └───────── Reserved for summary
20pub struct QuantumFormatter {
21    // Context for delta encoding
22    parent_perms: u16,
23    parent_uid: u32,
24    parent_gid: u32,
25    parent_time: u64,
26
27    // Token dictionary for common patterns
28    tokens: HashMap<String, u8>,
29}
30
31// Bit positions in header
32const SIZE_BIT: u8 = 0b00000001;
33const PERMS_BIT: u8 = 0b00000010;
34// const TIME_BIT: u8 = 0b00000100; // Unused for now
35// const OWNER_BIT: u8 = 0b00001000; // Unused for now
36const DIR_BIT: u8 = 0b00010000;
37// const LINK_BIT: u8 = 0b00100000; // Unused for now
38// const XATTR_BIT: u8 = 0b01000000; // Unused for now
39// const SUMMARY_BIT: u8 = 0b10000000; // Unused for now
40
41// ASCII control codes for tree traversal
42const TRAVERSE_SAME: char = '\x0B'; // Vertical Tab
43const TRAVERSE_DEEPER: char = '\x0E'; // Shift Out
44const TRAVERSE_BACK: char = '\x0F'; // Shift In
45
46impl Default for QuantumFormatter {
47    fn default() -> Self {
48        Self::new()
49    }
50}
51
52impl QuantumFormatter {
53    pub fn new() -> Self {
54        let mut tokens = HashMap::new();
55
56        // Pre-populate common tokens
57        tokens.insert("node_modules".to_string(), 0x80);
58        tokens.insert(".git".to_string(), 0x81);
59        tokens.insert("src".to_string(), 0x82);
60        tokens.insert("target".to_string(), 0x83);
61        tokens.insert("dist".to_string(), 0x84);
62        tokens.insert(".js".to_string(), 0x90);
63        tokens.insert(".rs".to_string(), 0x91);
64        tokens.insert(".json".to_string(), 0x92);
65        tokens.insert(".md".to_string(), 0x93);
66        tokens.insert("index".to_string(), 0x94);
67        tokens.insert("README".to_string(), 0x95);
68
69        Self {
70            parent_perms: 0o755,
71            parent_uid: 1000,
72            parent_gid: 1000,
73            parent_time: 0,
74            tokens,
75        }
76    }
77
78    /// Encode size using variable-length encoding
79    fn encode_size(size: u64) -> Vec<u8> {
80        match size {
81            0..=255 => vec![0x00, size as u8],
82            256..=65535 => {
83                let bytes = (size as u16).to_le_bytes();
84                vec![0x01, bytes[0], bytes[1]]
85            }
86            65536..=4294967295 => {
87                let bytes = (size as u32).to_le_bytes();
88                vec![0x02, bytes[0], bytes[1], bytes[2], bytes[3]]
89            }
90            _ => {
91                let bytes = size.to_le_bytes();
92                let mut result = vec![0x03];
93                result.extend_from_slice(&bytes);
94                result
95            }
96        }
97    }
98
99    /// Encode permissions as delta from parent
100    fn encode_perms_delta(&self, perms: u32) -> Vec<u8> {
101        let perms16 = (perms & 0o777) as u16;
102        if perms16 == self.parent_perms {
103            vec![]
104        } else {
105            // Just store the different bits
106            let delta = perms16 ^ self.parent_perms;
107            vec![(delta >> 8) as u8, delta as u8]
108        }
109    }
110
111    /// Tokenize filename components
112    fn tokenize_name(&mut self, name: &str) -> Vec<u8> {
113        let mut result = Vec::new();
114
115        // Check for exact token match
116        if let Some(&token) = self.tokens.get(name) {
117            result.push(token);
118            return result;
119        }
120
121        // Check for extension tokens
122        if let Some(dot_pos) = name.rfind('.') {
123            let ext = &name[dot_pos..];
124            if let Some(&token) = self.tokens.get(ext) {
125                result.extend_from_slice(&name.as_bytes()[..dot_pos]);
126                result.push(token);
127                return result;
128            }
129        }
130
131        // Check for prefix tokens
132        for (pattern, &token) in &self.tokens {
133            if name.starts_with(pattern) && pattern.len() > 3 {
134                result.push(token);
135                result.extend_from_slice(&name.as_bytes()[pattern.len()..]);
136                return result;
137            }
138        }
139
140        // No token found, use raw name
141        result.extend_from_slice(name.as_bytes());
142        result
143    }
144
145    fn encode_entry(&mut self, node: &FileNode) -> Vec<u8> {
146        let mut header = 0u8;
147        let mut data = Vec::new();
148
149        // Always include size (for now)
150        header |= SIZE_BIT;
151        let size_bytes = Self::encode_size(node.size);
152        data.extend(size_bytes);
153
154        // Check what differs from parent context
155        if (node.permissions & 0o777) as u16 != self.parent_perms {
156            header |= PERMS_BIT;
157            data.extend(self.encode_perms_delta(node.permissions));
158        }
159
160        // For directories, update context
161        if node.is_dir {
162            header |= DIR_BIT;
163            // Update parent context for children
164            self.parent_perms = (node.permissions & 0o777) as u16;
165            self.parent_uid = node.uid;
166            self.parent_gid = node.gid;
167            self.parent_time = node
168                .modified
169                .duration_since(std::time::UNIX_EPOCH)
170                .unwrap_or_default()
171                .as_secs();
172        }
173
174        // Add header
175        let mut result = vec![header];
176        result.extend(data);
177
178        // Add tokenized name with null terminator
179        let name = node
180            .path
181            .file_name()
182            .unwrap_or(node.path.as_os_str())
183            .to_string_lossy();
184        let tokenized = self.tokenize_name(&name);
185        result.extend(tokenized);
186        result.push(0); // Add null terminator for name
187
188        result
189    }
190}
191
192impl Formatter for QuantumFormatter {
193    fn format(
194        &self,
195        writer: &mut dyn Write,
196        nodes: &[FileNode],
197        _stats: &TreeStats,
198        _root_path: &Path,
199    ) -> Result<()> {
200        let mut formatter = QuantumFormatter::new();
201
202        // Write header
203        writeln!(writer, "MEM8_QUANTUM_V1:")?;
204        writeln!(writer, "KEY:HSSSSS...")?; // Header + variable size
205        writeln!(
206            writer,
207            "TOKENS:80=node_modules,81=.git,82=src,90=.js,91=.rs"
208        )?;
209        writeln!(writer, "---BEGIN_DATA---")?; // Clear marker for binary data start
210
211        // Process nodes with depth tracking
212        let mut current_depth = 0;
213
214        for (i, node) in nodes.iter().enumerate() {
215            // Handle depth changes
216            if current_depth > node.depth {
217                // Going back up one or more levels
218                for _ in 0..(current_depth - node.depth) {
219                    write!(writer, "{}", TRAVERSE_BACK)?;
220                }
221                current_depth = node.depth;
222            }
223
224            // Encode entry
225            let encoded = formatter.encode_entry(node);
226            writer.write_all(&encoded)?;
227
228            // Add traversal code
229            let is_last =
230                i + 1 >= nodes.len() || (i + 1 < nodes.len() && nodes[i + 1].depth < node.depth);
231
232            if node.is_dir && i + 1 < nodes.len() && nodes[i + 1].depth > node.depth {
233                write!(writer, "{}", TRAVERSE_DEEPER)?;
234                current_depth = node.depth + 1;
235            } else if is_last && node.depth > 0 {
236                write!(writer, "{}", TRAVERSE_BACK)?;
237                current_depth = node.depth - 1;
238            } else {
239                write!(writer, "{}", TRAVERSE_SAME)?;
240            }
241        }
242
243        // Close any remaining directories
244        while current_depth > 0 {
245            write!(writer, "{}", TRAVERSE_BACK)?;
246            current_depth -= 1;
247        }
248
249        writeln!(writer)?; // Final newline
250        writeln!(writer, "---END_DATA---")?; // Clear marker for binary data end
251
252        Ok(())
253    }
254}
255
256impl StreamingFormatter for QuantumFormatter {
257    fn start_stream(&self, writer: &mut dyn Write, _root_path: &Path) -> Result<()> {
258        writeln!(writer, "MEM8_QUANTUM_V1_STREAM:")?;
259        writeln!(writer, "KEY:HSSSSS...")?;
260        writeln!(writer, "BASE_CONTEXT:perms=755,uid=1000,gid=1000")?;
261        Ok(())
262    }
263
264    fn format_node(
265        &self,
266        writer: &mut dyn Write,
267        node: &FileNode,
268        _root_path: &Path,
269    ) -> Result<()> {
270        let mut formatter = QuantumFormatter::new();
271        let encoded = formatter.encode_entry(node);
272        writer.write_all(&encoded)?;
273        write!(writer, "{}", TRAVERSE_SAME)?;
274        Ok(())
275    }
276
277    fn end_stream(
278        &self,
279        writer: &mut dyn Write,
280        stats: &TreeStats,
281        _root_path: &Path,
282    ) -> Result<()> {
283        // Compact summary
284        writeln!(writer, "\nQUANTUM_STATS:")?;
285        writeln!(
286            writer,
287            "F:{:x} D:{:x} S:{:x}",
288            stats.total_files, stats.total_dirs, stats.total_size
289        )?;
290        Ok(())
291    }
292}