smart-tree 8.0.1

Smart Tree - An intelligent, AI-friendly directory visualization tool
Documentation
// 🎸 The Cheet's Markdown Quantum Compressor - "Compress it like it's hot!" 🔥
// This module implements the Marqant (.mq) format for quantum-compressed markdown
//
// "Why send a whole README when you can send its soul?" - The Cheet
//
// Features:
// - Smart phrase detection with frequency analysis
// - Huffman-inspired token assignment
// - Optional zlib compression for extra magic
// - Section tagging for semantic navigation
// - Streaming support for large documents

use super::{Formatter, PathDisplayMode};
use crate::scanner::{FileNode, TreeStats};
use anyhow::Result;
use marqant::Marqant as MarqantCore;
use std::collections::HashMap;
use std::io::Write;
use std::path::Path;

/// Marqant formatter - Quantum compression for markdown files
pub struct MarqantFormatter {
    no_emoji: bool,
}

impl MarqantFormatter {
    pub fn new(_path_mode: PathDisplayMode, no_emoji: bool) -> Self {
        Self { no_emoji }
    }

    /// Compress markdown content into marqant format
    pub fn compress_markdown(content: &str) -> Result<String> {
        MarqantCore::compress_markdown(content)
    }

    /// Compress markdown content with optional flags
    pub fn compress_markdown_with_flags(content: &str, flags: Option<&str>) -> Result<String> {
        MarqantCore::compress_markdown_with_flags(content, flags)
    }

    /// Add semantic section tags to markdown content
    #[allow(dead_code)]
    fn add_section_tags(content: &str) -> String {
        let mut result = String::new();
        let mut in_code_block = false;

        for line in content.lines() {
            // Track code blocks to avoid tagging inside them
            if line.trim_start().starts_with("```") {
                in_code_block = !in_code_block;
            }

            // Detect section headers
            if !in_code_block {
                if let Some(stripped) = line.strip_prefix("# ") {
                    let section = stripped.trim();
                    result.push_str(&format!("::section:{}::\n", section));
                } else if let Some(stripped) = line.strip_prefix("## ") {
                    let subsection = stripped.trim();
                    result.push_str(&format!("::section:{}::\n", subsection));
                }
            }

            result.push_str(line);
            result.push('\n');
        }

        result
    }

    /// Tokenize markdown content with smart frequency analysis
    pub fn tokenize_content(content: &str) -> (HashMap<String, String>, String) {
        MarqantCore::tokenize_content(content)
    }

    /// Decompress marqant content back to markdown
    pub fn decompress_marqant(compressed: &str) -> Result<String> {
        MarqantCore::decompress_marqant(compressed)
    }
}

impl Formatter for MarqantFormatter {
    fn format(
        &self,
        writer: &mut dyn Write,
        nodes: &[FileNode],
        stats: &TreeStats,
        root_path: &Path,
    ) -> Result<()> {
        // For directory trees, we'll create a compressed markdown representation
        let mut markdown = String::new();

        // Create header with project name
        let project_name = root_path
            .file_name()
            .and_then(|n| n.to_str())
            .unwrap_or("Directory");

        markdown.push_str(&format!("# {} Structure\n\n", project_name));

        // Create a tree structure in markdown
        markdown.push_str("## File Tree\n\n");
        markdown.push_str("```\n");

        for node in nodes {
            let indent = "  ".repeat(node.depth);
            let name = node.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
            let suffix = if node.is_dir { "/" } else { "" };
            let emoji = if !self.no_emoji {
                if node.is_dir {
                    "📁 "
                } else {
                    "📄 "
                }
            } else {
                ""
            };
            markdown.push_str(&format!("{}{}{}{}\n", indent, emoji, name, suffix));
        }

        markdown.push_str("```\n\n");

        // Add statistics
        markdown.push_str("## Statistics\n\n");
        markdown.push_str(&format!("- Total files: {}\n", stats.total_files));
        markdown.push_str(&format!("- Total directories: {}\n", stats.total_dirs));
        markdown.push_str(&format!(
            "- Total size: {:.2} MB\n",
            stats.total_size as f64 / 1_048_576.0
        ));

        // Add file type breakdown if available
        if !stats.file_types.is_empty() {
            markdown.push_str("\n### File Types\n\n");
            let mut types: Vec<_> = stats.file_types.iter().collect();
            types.sort_by(|a, b| b.1.cmp(a.1));

            for (ext, count) in types.iter().take(10) {
                markdown.push_str(&format!("- .{}: {} files\n", ext, count));
            }
        }

        // Compress and write
        let compressed = Self::compress_markdown(&markdown)?;
        writer.write_all(compressed.as_bytes())?;

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_markdown_compression() {
        let markdown = r#"# Test Document

## Section One

This is a test document. This is a test document.

### Subsection

- Item one
- Item two
- Item three

## Section Two

This is a test document.

```rust
fn main() {
    println!("Hello, world!");
}
```

## Section Three

**Bold text** and *italic text*.
"#;

        let compressed = MarqantFormatter::compress_markdown(markdown).unwrap();

        // Marqant v0.2 uses "MARQANT" header (without _V1 suffix)
        assert!(
            compressed.starts_with("MARQANT"),
            "Compressed data should start with MARQANT header"
        );

        // For documents with limited repetition, compression might not reduce size due to header overhead
        // The important thing is that the format is correct and round-trip works

        // Test round-trip
        let decompressed = MarqantFormatter::decompress_marqant(&compressed).unwrap();
        assert_eq!(decompressed.trim(), markdown.trim());

        // Verify the compression at least includes proper header and structure
        assert!(
            compressed.starts_with("MARQANT"),
            "Should have proper header"
        );
        assert!(compressed.len() > 20, "Should have header and content");
    }

    #[test]
    fn test_token_assignment() {
        // Test with markdown content that definitely triggers tokenization
        // Static tokens need enough repetition to save bytes:
        // For "## " (3 chars), we need count * 3 > count + 3 + 3, so count > 2
        // Using 5+ occurrences to ensure threshold is met
        let markdown_content = "## Section 1\n\n## Section 2\n\n## Section 3\n\n## Section 4\n\n## Section 5\n\nContent here.";
        let (tokens, tokenized) = MarqantFormatter::tokenize_content(markdown_content);

        // The tokenization should either:
        // 1. Create token mappings for repeated patterns, OR
        // 2. Modify the content with static token replacements
        // With 5 occurrences of "## ", this should definitely trigger tokenization
        assert!(
            !tokens.is_empty() || tokenized != markdown_content,
            "Tokenization should create tokens or modify content. Got tokens: {:?}, content modified: {}",
            tokens.keys().collect::<Vec<_>>(),
            tokenized != markdown_content
        );
    }
}

// 🎸 The Cheet says: "Markdown files are like guitar solos -
// sometimes you need to compress them down to the essential riffs!" 🎵