1#![allow(dead_code)] use anyhow::Result;
7use std::collections::HashMap;
8use std::fs;
9use std::io::Write;
10use std::path::Path;
11use std::time::SystemTime;
12
13const TOKEN_RESERVED_START: u16 = 0x0000;
15const TOKEN_RESERVED_END: u16 = 0x00FF;
16const TOKEN_USER_START: u16 = 0x0100;
17
18const TOKEN_DIR: u16 = 0x0001;
20const TOKEN_FILE: u16 = 0x0002;
21const TOKEN_LINK: u16 = 0x0003;
22const TOKEN_PERM_755: u16 = 0x0010;
23const TOKEN_PERM_644: u16 = 0x0011;
24const TOKEN_PERM_777: u16 = 0x0012;
25const TOKEN_PERM_600: u16 = 0x0013;
26
27const TOKEN_EXT_JS: u16 = 0x0020;
29const TOKEN_EXT_RS: u16 = 0x0021;
30const TOKEN_EXT_PY: u16 = 0x0022;
31const TOKEN_EXT_GO: u16 = 0x0023;
32const TOKEN_EXT_MD: u16 = 0x0024;
33const TOKEN_EXT_JSON: u16 = 0x0025;
34const TOKEN_EXT_YAML: u16 = 0x0026;
35const TOKEN_EXT_TXT: u16 = 0x0027;
36
37const TOKEN_NODE_MODULES: u16 = 0x0080;
39const TOKEN_GIT: u16 = 0x0081;
40const TOKEN_SRC: u16 = 0x0082;
41const TOKEN_TARGET: u16 = 0x0083;
42const TOKEN_BUILD: u16 = 0x0084;
43const TOKEN_DIST: u16 = 0x0085;
44const TOKEN_DOCS: u16 = 0x0086;
45const TOKEN_TESTS: u16 = 0x0087;
46
47const TOKEN_SIZE_ZERO: u16 = 0x00A0;
49const TOKEN_SIZE_TINY: u16 = 0x00A1; const TOKEN_SIZE_SMALL: u16 = 0x00A2; const TOKEN_SIZE_MEDIUM: u16 = 0x00A3; const TOKEN_SIZE_LARGE: u16 = 0x00A4; const TRAVERSE_SAME: u8 = 0x0B; const TRAVERSE_DEEPER: u8 = 0x0E; const TRAVERSE_BACK: u8 = 0x0F; const TRAVERSE_SUMMARY: u8 = 0x0C; const HDR_HAS_SIZE: u8 = 0b00000001;
62const HDR_HAS_PERMS: u8 = 0b00000010;
63const HDR_HAS_TIME: u8 = 0b00000100;
64const HDR_HAS_OWNER: u8 = 0b00001000;
65const HDR_IS_DIR: u8 = 0b00010000;
66const HDR_IS_LINK: u8 = 0b00100000;
67const HDR_HAS_XATTR: u8 = 0b01000000;
68const HDR_TOKENIZED: u8 = 0b10000000;
69
70pub struct QuantumScanner<W: Write> {
71 writer: W,
72 token_map: HashMap<String, u16>,
73 #[allow(dead_code)]
74 next_dynamic_token: u16,
75
76 parent_perms: u32,
78 #[allow(dead_code)]
79 parent_uid: u32,
80 #[allow(dead_code)]
81 parent_gid: u32,
82 #[allow(dead_code)]
83 parent_time: SystemTime,
84
85 total_files: u64,
87 total_dirs: u64,
88 total_size: u64,
89}
90
91impl<W: Write> QuantumScanner<W> {
92 #[cfg(unix)]
94 fn get_permissions(metadata: &fs::Metadata) -> u32 {
95 use std::os::unix::fs::PermissionsExt;
96 metadata.permissions().mode() & 0o777
97 }
98
99 #[cfg(not(unix))]
100 fn get_permissions(_metadata: &fs::Metadata) -> u32 {
101 0o755 }
103
104 pub fn new(writer: W) -> Self {
105 let mut token_map = HashMap::new();
106
107 token_map.insert("node_modules".to_string(), TOKEN_NODE_MODULES);
109 token_map.insert(".git".to_string(), TOKEN_GIT);
110 token_map.insert("src".to_string(), TOKEN_SRC);
111 token_map.insert("target".to_string(), TOKEN_TARGET);
112 token_map.insert("build".to_string(), TOKEN_BUILD);
113 token_map.insert("dist".to_string(), TOKEN_DIST);
114 token_map.insert("docs".to_string(), TOKEN_DOCS);
115 token_map.insert("tests".to_string(), TOKEN_TESTS);
116
117 token_map.insert(".js".to_string(), TOKEN_EXT_JS);
119 token_map.insert(".rs".to_string(), TOKEN_EXT_RS);
120 token_map.insert(".py".to_string(), TOKEN_EXT_PY);
121 token_map.insert(".go".to_string(), TOKEN_EXT_GO);
122 token_map.insert(".md".to_string(), TOKEN_EXT_MD);
123 token_map.insert(".json".to_string(), TOKEN_EXT_JSON);
124 token_map.insert(".yaml".to_string(), TOKEN_EXT_YAML);
125 token_map.insert(".txt".to_string(), TOKEN_EXT_TXT);
126
127 Self {
128 writer,
129 token_map,
130 next_dynamic_token: TOKEN_USER_START,
131 parent_perms: 0o755,
132 parent_uid: 1000,
133 parent_gid: 1000,
134 parent_time: SystemTime::UNIX_EPOCH,
135 total_files: 0,
136 total_dirs: 0,
137 total_size: 0,
138 }
139 }
140
141 pub fn write_header(&mut self) -> Result<()> {
143 writeln!(self.writer, "QUANTUM_NATIVE_V1:")?;
144 writeln!(self.writer, "TOKENS:")?;
145
146 let mut tokens: Vec<_> = self.token_map.iter().collect();
148 tokens.sort_by_key(|(_, &token)| token);
149
150 for (name, token) in tokens {
151 writeln!(self.writer, " {:04X}={}", token, name)?;
152 }
153
154 writeln!(self.writer, "DATA:")?;
155 Ok(())
156 }
157
158 pub fn scan(&mut self, path: &Path) -> Result<()> {
160 self.write_header()?;
161 self.scan_recursive(path, 0)?;
162 self.write_summary()?;
163 Ok(())
164 }
165
166 fn scan_recursive(&mut self, path: &Path, depth: usize) -> Result<()> {
167 let metadata = fs::metadata(path)?;
168
169 if metadata.is_dir() {
171 self.emit_directory(path, &metadata, depth)?;
172
173 let old_perms = self.parent_perms;
175 self.parent_perms = Self::get_permissions(&metadata);
176
177 let mut entries: Vec<_> = fs::read_dir(path)?.filter_map(|e| e.ok()).collect();
179
180 entries.sort_by_key(|e| e.file_name());
182
183 for (i, entry) in entries.iter().enumerate() {
184 let child_path = entry.path();
185 self.scan_recursive(&child_path, depth + 1)?;
186
187 if i < entries.len() - 1 {
189 self.writer.write_all(&[TRAVERSE_SAME])?;
190 }
191 }
192
193 self.parent_perms = old_perms;
195
196 if depth > 0 {
198 self.writer.write_all(&[TRAVERSE_BACK])?;
199 }
200
201 self.total_dirs += 1;
202 } else {
203 self.emit_file(path, &metadata)?;
204 self.total_files += 1;
205 self.total_size += metadata.len();
206 }
207
208 Ok(())
209 }
210
211 fn emit_directory(&mut self, path: &Path, metadata: &fs::Metadata, depth: usize) -> Result<()> {
212 let mut header = HDR_IS_DIR;
213 let mut data = Vec::new();
214
215 header |= HDR_HAS_SIZE;
217 data.extend(&self.encode_size(metadata.len()));
218
219 let perms = Self::get_permissions(metadata);
221 if perms != self.parent_perms {
222 header |= HDR_HAS_PERMS;
223 let delta = perms ^ self.parent_perms;
224 data.push((delta >> 8) as u8);
225 data.push(delta as u8);
226 }
227
228 self.writer.write_all(&[header])?;
230 self.writer.write_all(&data)?;
231
232 self.emit_name(path)?;
234
235 if depth == 0 {
237 self.writer.write_all(&[TRAVERSE_DEEPER])?;
239 }
240
241 Ok(())
242 }
243
244 fn emit_file(&mut self, path: &Path, metadata: &fs::Metadata) -> Result<()> {
245 let mut header = 0u8;
246 let mut data = Vec::new();
247
248 header |= HDR_HAS_SIZE;
250 data.extend(&self.encode_size(metadata.len()));
251
252 let perms = Self::get_permissions(metadata);
254 if perms != self.parent_perms {
255 header |= HDR_HAS_PERMS;
256 let delta = perms ^ self.parent_perms;
257 data.push((delta >> 8) as u8);
258 data.push(delta as u8);
259 }
260
261 self.writer.write_all(&[header])?;
263 self.writer.write_all(&data)?;
264
265 self.emit_name(path)?;
267
268 Ok(())
269 }
270
271 fn emit_name(&mut self, path: &Path) -> Result<()> {
272 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
273
274 if let Some(&token) = self.token_map.get(name) {
276 self.writer.write_all(&token.to_le_bytes())?;
277 return Ok(());
278 }
279
280 if let Some(dot_pos) = name.rfind('.') {
282 let ext = &name[dot_pos..];
283 if let Some(&token) = self.token_map.get(ext) {
284 self.writer.write_all(&name.as_bytes()[..dot_pos])?;
286 self.writer.write_all(&token.to_le_bytes())?;
287 return Ok(());
288 }
289 }
290
291 self.writer.write_all(name.as_bytes())?;
294 Ok(())
295 }
296
297 fn encode_size(&self, size: u64) -> Vec<u8> {
298 match size {
300 0 => vec![TOKEN_SIZE_ZERO as u8, (TOKEN_SIZE_ZERO >> 8) as u8],
301 1..=1024 => vec![
302 TOKEN_SIZE_TINY as u8,
303 (TOKEN_SIZE_TINY >> 8) as u8,
304 size as u8,
305 ],
306 1025..=102400 => {
307 let kb = (size / 1024) as u16;
308 vec![
309 TOKEN_SIZE_SMALL as u8,
310 (TOKEN_SIZE_SMALL >> 8) as u8,
311 kb as u8,
312 (kb >> 8) as u8,
313 ]
314 }
315 _ => {
316 match size {
318 0..=255 => vec![0x00, size as u8],
319 256..=65535 => {
320 let bytes = (size as u16).to_le_bytes();
321 vec![0x01, bytes[0], bytes[1]]
322 }
323 _ => {
324 let bytes = (size as u32).to_le_bytes();
325 vec![0x02, bytes[0], bytes[1], bytes[2], bytes[3]]
326 }
327 }
328 }
329 }
330 }
331
332 fn write_summary(&mut self) -> Result<()> {
333 writeln!(self.writer, "\nSUMMARY:")?;
334 writeln!(self.writer, "FILES: {}", self.total_files)?;
335 writeln!(self.writer, "DIRS: {}", self.total_dirs)?;
336 writeln!(self.writer, "SIZE: {}", self.total_size)?;
337 Ok(())
338 }
339}
340
341