smart-tree 8.0.1

Smart Tree - An intelligent, AI-friendly directory visualization tool
Documentation
// Quantum Scanner - The native tree walker that speaks in quantum format
// This is where the magic happens - no intermediate representation, just pure quantum output

#![allow(dead_code)] // Many constants and fields are reserved for future use

use anyhow::Result;
use std::collections::HashMap;
use std::fs;
use std::io::Write;
use std::path::Path;
use std::time::SystemTime;

// Token ranges as suggested
const TOKEN_RESERVED_START: u16 = 0x0000;
const TOKEN_RESERVED_END: u16 = 0x00FF;
const TOKEN_USER_START: u16 = 0x0100;

// Pre-defined tokens for common filesystem terms
const TOKEN_DIR: u16 = 0x0001;
const TOKEN_FILE: u16 = 0x0002;
const TOKEN_LINK: u16 = 0x0003;
const TOKEN_PERM_755: u16 = 0x0010;
const TOKEN_PERM_644: u16 = 0x0011;
const TOKEN_PERM_777: u16 = 0x0012;
const TOKEN_PERM_600: u16 = 0x0013;

// Common extensions (0x20-0x7F)
const TOKEN_EXT_JS: u16 = 0x0020;
const TOKEN_EXT_RS: u16 = 0x0021;
const TOKEN_EXT_PY: u16 = 0x0022;
const TOKEN_EXT_GO: u16 = 0x0023;
const TOKEN_EXT_MD: u16 = 0x0024;
const TOKEN_EXT_JSON: u16 = 0x0025;
const TOKEN_EXT_YAML: u16 = 0x0026;
const TOKEN_EXT_TXT: u16 = 0x0027;

// Common directory names (0x80-0xFF)
const TOKEN_NODE_MODULES: u16 = 0x0080;
const TOKEN_GIT: u16 = 0x0081;
const TOKEN_SRC: u16 = 0x0082;
const TOKEN_TARGET: u16 = 0x0083;
const TOKEN_BUILD: u16 = 0x0084;
const TOKEN_DIST: u16 = 0x0085;
const TOKEN_DOCS: u16 = 0x0086;
const TOKEN_TESTS: u16 = 0x0087;

// Size tokens for common ranges
const TOKEN_SIZE_ZERO: u16 = 0x00A0;
const TOKEN_SIZE_TINY: u16 = 0x00A1; // 1-1KB
const TOKEN_SIZE_SMALL: u16 = 0x00A2; // 1KB-100KB
const TOKEN_SIZE_MEDIUM: u16 = 0x00A3; // 100KB-10MB
const TOKEN_SIZE_LARGE: u16 = 0x00A4; // 10MB+

// ASCII control codes for tree traversal
const TRAVERSE_SAME: u8 = 0x0B; // Vertical Tab
const TRAVERSE_DEEPER: u8 = 0x0E; // Shift Out
const TRAVERSE_BACK: u8 = 0x0F; // Shift In
const TRAVERSE_SUMMARY: u8 = 0x0C; // Form Feed

// Header bit flags
const HDR_HAS_SIZE: u8 = 0b00000001;
const HDR_HAS_PERMS: u8 = 0b00000010;
const HDR_HAS_TIME: u8 = 0b00000100;
const HDR_HAS_OWNER: u8 = 0b00001000;
const HDR_IS_DIR: u8 = 0b00010000;
const HDR_IS_LINK: u8 = 0b00100000;
const HDR_HAS_XATTR: u8 = 0b01000000;
const HDR_TOKENIZED: u8 = 0b10000000;

pub struct QuantumScanner<W: Write> {
    writer: W,
    token_map: HashMap<String, u16>,
    #[allow(dead_code)]
    next_dynamic_token: u16,

    // Context for delta encoding
    parent_perms: u32,
    #[allow(dead_code)]
    parent_uid: u32,
    #[allow(dead_code)]
    parent_gid: u32,
    #[allow(dead_code)]
    parent_time: SystemTime,

    // Stats tracking
    total_files: u64,
    total_dirs: u64,
    total_size: u64,
}

impl<W: Write> QuantumScanner<W> {
    // Cross-platform permission handling
    #[cfg(unix)]
    fn get_permissions(metadata: &fs::Metadata) -> u32 {
        use std::os::unix::fs::PermissionsExt;
        metadata.permissions().mode() & 0o777
    }

    #[cfg(not(unix))]
    fn get_permissions(_metadata: &fs::Metadata) -> u32 {
        0o755 // Default permissions for non-Unix
    }

    pub fn new(writer: W) -> Self {
        let mut token_map = HashMap::new();

        // Initialize with predefined tokens
        token_map.insert("node_modules".to_string(), TOKEN_NODE_MODULES);
        token_map.insert(".git".to_string(), TOKEN_GIT);
        token_map.insert("src".to_string(), TOKEN_SRC);
        token_map.insert("target".to_string(), TOKEN_TARGET);
        token_map.insert("build".to_string(), TOKEN_BUILD);
        token_map.insert("dist".to_string(), TOKEN_DIST);
        token_map.insert("docs".to_string(), TOKEN_DOCS);
        token_map.insert("tests".to_string(), TOKEN_TESTS);

        // Extension tokens
        token_map.insert(".js".to_string(), TOKEN_EXT_JS);
        token_map.insert(".rs".to_string(), TOKEN_EXT_RS);
        token_map.insert(".py".to_string(), TOKEN_EXT_PY);
        token_map.insert(".go".to_string(), TOKEN_EXT_GO);
        token_map.insert(".md".to_string(), TOKEN_EXT_MD);
        token_map.insert(".json".to_string(), TOKEN_EXT_JSON);
        token_map.insert(".yaml".to_string(), TOKEN_EXT_YAML);
        token_map.insert(".txt".to_string(), TOKEN_EXT_TXT);

        Self {
            writer,
            token_map,
            next_dynamic_token: TOKEN_USER_START,
            parent_perms: 0o755,
            parent_uid: 1000,
            parent_gid: 1000,
            parent_time: SystemTime::UNIX_EPOCH,
            total_files: 0,
            total_dirs: 0,
            total_size: 0,
        }
    }

    /// Write the quantum format header
    pub fn write_header(&mut self) -> Result<()> {
        writeln!(self.writer, "QUANTUM_NATIVE_V1:")?;
        writeln!(self.writer, "TOKENS:")?;

        // Write token map in sorted order
        let mut tokens: Vec<_> = self.token_map.iter().collect();
        tokens.sort_by_key(|(_, &token)| token);

        for (name, token) in tokens {
            writeln!(self.writer, "  {:04X}={}", token, name)?;
        }

        writeln!(self.writer, "DATA:")?;
        Ok(())
    }

    /// Scan a path and emit quantum format directly
    pub fn scan(&mut self, path: &Path) -> Result<()> {
        self.write_header()?;
        self.scan_recursive(path, 0)?;
        self.write_summary()?;
        Ok(())
    }

    fn scan_recursive(&mut self, path: &Path, depth: usize) -> Result<()> {
        let metadata = fs::metadata(path)?;

        // Emit quantum entry
        if metadata.is_dir() {
            self.emit_directory(path, &metadata, depth)?;

            // Update parent context
            let old_perms = self.parent_perms;
            self.parent_perms = Self::get_permissions(&metadata);

            // Scan children
            let mut entries: Vec<_> = fs::read_dir(path)?.filter_map(|e| e.ok()).collect();

            // Sort for consistent output
            entries.sort_by_key(|e| e.file_name());

            for (i, entry) in entries.iter().enumerate() {
                let child_path = entry.path();
                self.scan_recursive(&child_path, depth + 1)?;

                // Emit traversal code
                if i < entries.len() - 1 {
                    self.writer.write_all(&[TRAVERSE_SAME])?;
                }
            }

            // Restore parent context
            self.parent_perms = old_perms;

            // Emit back traversal if not at root
            if depth > 0 {
                self.writer.write_all(&[TRAVERSE_BACK])?;
            }

            self.total_dirs += 1;
        } else {
            self.emit_file(path, &metadata)?;
            self.total_files += 1;
            self.total_size += metadata.len();
        }

        Ok(())
    }

    fn emit_directory(&mut self, path: &Path, metadata: &fs::Metadata, depth: usize) -> Result<()> {
        let mut header = HDR_IS_DIR;
        let mut data = Vec::new();

        // Size (for directories, this is the entry size)
        header |= HDR_HAS_SIZE;
        data.extend(&self.encode_size(metadata.len()));

        // Permissions if different
        let perms = Self::get_permissions(metadata);
        if perms != self.parent_perms {
            header |= HDR_HAS_PERMS;
            let delta = perms ^ self.parent_perms;
            data.push((delta >> 8) as u8);
            data.push(delta as u8);
        }

        // Emit header and data
        self.writer.write_all(&[header])?;
        self.writer.write_all(&data)?;

        // Emit name (tokenized if possible)
        self.emit_name(path)?;

        // Emit traversal code
        if depth == 0 {
            // Root directory
            self.writer.write_all(&[TRAVERSE_DEEPER])?;
        }

        Ok(())
    }

    fn emit_file(&mut self, path: &Path, metadata: &fs::Metadata) -> Result<()> {
        let mut header = 0u8;
        let mut data = Vec::new();

        // Size
        header |= HDR_HAS_SIZE;
        data.extend(&self.encode_size(metadata.len()));

        // Permissions if different
        let perms = Self::get_permissions(metadata);
        if perms != self.parent_perms {
            header |= HDR_HAS_PERMS;
            let delta = perms ^ self.parent_perms;
            data.push((delta >> 8) as u8);
            data.push(delta as u8);
        }

        // Emit header and data
        self.writer.write_all(&[header])?;
        self.writer.write_all(&data)?;

        // Emit name
        self.emit_name(path)?;

        Ok(())
    }

    fn emit_name(&mut self, path: &Path) -> Result<()> {
        let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");

        // Check for exact token match
        if let Some(&token) = self.token_map.get(name) {
            self.writer.write_all(&token.to_le_bytes())?;
            return Ok(());
        }

        // Check for extension token
        if let Some(dot_pos) = name.rfind('.') {
            let ext = &name[dot_pos..];
            if let Some(&token) = self.token_map.get(ext) {
                // Write base name + extension token
                self.writer.write_all(&name.as_bytes()[..dot_pos])?;
                self.writer.write_all(&token.to_le_bytes())?;
                return Ok(());
            }
        }

        // No token found - consider adding dynamically for frequently seen patterns
        // For now, just write the raw name
        self.writer.write_all(name.as_bytes())?;
        Ok(())
    }

    fn encode_size(&self, size: u64) -> Vec<u8> {
        // Size-based tokenization
        match size {
            0 => vec![TOKEN_SIZE_ZERO as u8, (TOKEN_SIZE_ZERO >> 8) as u8],
            1..=1024 => vec![
                TOKEN_SIZE_TINY as u8,
                (TOKEN_SIZE_TINY >> 8) as u8,
                size as u8,
            ],
            1025..=102400 => {
                let kb = (size / 1024) as u16;
                vec![
                    TOKEN_SIZE_SMALL as u8,
                    (TOKEN_SIZE_SMALL >> 8) as u8,
                    kb as u8,
                    (kb >> 8) as u8,
                ]
            }
            _ => {
                // For larger sizes, use standard encoding
                match size {
                    0..=255 => vec![0x00, size as u8],
                    256..=65535 => {
                        let bytes = (size as u16).to_le_bytes();
                        vec![0x01, bytes[0], bytes[1]]
                    }
                    _ => {
                        let bytes = (size as u32).to_le_bytes();
                        vec![0x02, bytes[0], bytes[1], bytes[2], bytes[3]]
                    }
                }
            }
        }
    }

    fn write_summary(&mut self) -> Result<()> {
        writeln!(self.writer, "\nSUMMARY:")?;
        writeln!(self.writer, "FILES: {}", self.total_files)?;
        writeln!(self.writer, "DIRS: {}", self.total_dirs)?;
        writeln!(self.writer, "SIZE: {}", self.total_size)?;
        Ok(())
    }
}

// PermissionsExt import removed - not currently used
// Will be re-added when permission handling is implemented