Skip to main content

st/formatters/
marqant.rs

1// 🎸 The Cheet's Markdown Quantum Compressor - "Compress it like it's hot!" 🔥
2// This module implements the Marqant (.mq) format for quantum-compressed markdown
3//
4// "Why send a whole README when you can send its soul?" - The Cheet
5//
6// Features:
7// - Smart phrase detection with frequency analysis
8// - Huffman-inspired token assignment
9// - Optional zlib compression for extra magic
10// - Section tagging for semantic navigation
11// - Streaming support for large documents
12
13use super::{Formatter, PathDisplayMode};
14use crate::scanner::{FileNode, TreeStats};
15use anyhow::Result;
16use marqant::Marqant as MarqantCore;
17use std::collections::HashMap;
18use std::io::Write;
19use std::path::Path;
20
21/// Marqant formatter - Quantum compression for markdown files
22pub struct MarqantFormatter {
23    no_emoji: bool,
24}
25
26impl MarqantFormatter {
27    pub fn new(_path_mode: PathDisplayMode, no_emoji: bool) -> Self {
28        Self { no_emoji }
29    }
30
31    /// Compress markdown content into marqant format
32    pub fn compress_markdown(content: &str) -> Result<String> {
33        MarqantCore::compress_markdown(content)
34    }
35
36    /// Compress markdown content with optional flags
37    pub fn compress_markdown_with_flags(content: &str, flags: Option<&str>) -> Result<String> {
38        MarqantCore::compress_markdown_with_flags(content, flags)
39    }
40
41    /// Add semantic section tags to markdown content
42    #[allow(dead_code)]
43    fn add_section_tags(content: &str) -> String {
44        let mut result = String::new();
45        let mut in_code_block = false;
46
47        for line in content.lines() {
48            // Track code blocks to avoid tagging inside them
49            if line.trim_start().starts_with("```") {
50                in_code_block = !in_code_block;
51            }
52
53            // Detect section headers
54            if !in_code_block {
55                if let Some(stripped) = line.strip_prefix("# ") {
56                    let section = stripped.trim();
57                    result.push_str(&format!("::section:{}::\n", section));
58                } else if let Some(stripped) = line.strip_prefix("## ") {
59                    let subsection = stripped.trim();
60                    result.push_str(&format!("::section:{}::\n", subsection));
61                }
62            }
63
64            result.push_str(line);
65            result.push('\n');
66        }
67
68        result
69    }
70
71    /// Tokenize markdown content with smart frequency analysis
72    pub fn tokenize_content(content: &str) -> (HashMap<String, String>, String) {
73        MarqantCore::tokenize_content(content)
74    }
75
76    /// Decompress marqant content back to markdown
77    pub fn decompress_marqant(compressed: &str) -> Result<String> {
78        MarqantCore::decompress_marqant(compressed)
79    }
80}
81
82impl Formatter for MarqantFormatter {
83    fn format(
84        &self,
85        writer: &mut dyn Write,
86        nodes: &[FileNode],
87        stats: &TreeStats,
88        root_path: &Path,
89    ) -> Result<()> {
90        // For directory trees, we'll create a compressed markdown representation
91        let mut markdown = String::new();
92
93        // Create header with project name
94        let project_name = root_path
95            .file_name()
96            .and_then(|n| n.to_str())
97            .unwrap_or("Directory");
98
99        markdown.push_str(&format!("# {} Structure\n\n", project_name));
100
101        // Create a tree structure in markdown
102        markdown.push_str("## File Tree\n\n");
103        markdown.push_str("```\n");
104
105        for node in nodes {
106            let indent = "  ".repeat(node.depth);
107            let name = node.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
108            let suffix = if node.is_dir { "/" } else { "" };
109            let emoji = if !self.no_emoji {
110                if node.is_dir {
111                    "📁 "
112                } else {
113                    "📄 "
114                }
115            } else {
116                ""
117            };
118            markdown.push_str(&format!("{}{}{}{}\n", indent, emoji, name, suffix));
119        }
120
121        markdown.push_str("```\n\n");
122
123        // Add statistics
124        markdown.push_str("## Statistics\n\n");
125        markdown.push_str(&format!("- Total files: {}\n", stats.total_files));
126        markdown.push_str(&format!("- Total directories: {}\n", stats.total_dirs));
127        markdown.push_str(&format!(
128            "- Total size: {:.2} MB\n",
129            stats.total_size as f64 / 1_048_576.0
130        ));
131
132        // Add file type breakdown if available
133        if !stats.file_types.is_empty() {
134            markdown.push_str("\n### File Types\n\n");
135            let mut types: Vec<_> = stats.file_types.iter().collect();
136            types.sort_by(|a, b| b.1.cmp(a.1));
137
138            for (ext, count) in types.iter().take(10) {
139                markdown.push_str(&format!("- .{}: {} files\n", ext, count));
140            }
141        }
142
143        // Compress and write
144        let compressed = Self::compress_markdown(&markdown)?;
145        writer.write_all(compressed.as_bytes())?;
146
147        Ok(())
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    #[test]
156    fn test_markdown_compression() {
157        let markdown = r#"# Test Document
158
159## Section One
160
161This is a test document. This is a test document.
162
163### Subsection
164
165- Item one
166- Item two
167- Item three
168
169## Section Two
170
171This is a test document.
172
173```rust
174fn main() {
175    println!("Hello, world!");
176}
177```
178
179## Section Three
180
181**Bold text** and *italic text*.
182"#;
183
184        let compressed = MarqantFormatter::compress_markdown(markdown).unwrap();
185
186        // Marqant v0.2 uses "MARQANT" header (without _V1 suffix)
187        assert!(
188            compressed.starts_with("MARQANT"),
189            "Compressed data should start with MARQANT header"
190        );
191
192        // For documents with limited repetition, compression might not reduce size due to header overhead
193        // The important thing is that the format is correct and round-trip works
194
195        // Test round-trip
196        let decompressed = MarqantFormatter::decompress_marqant(&compressed).unwrap();
197        assert_eq!(decompressed.trim(), markdown.trim());
198
199        // Verify the compression at least includes proper header and structure
200        assert!(
201            compressed.starts_with("MARQANT"),
202            "Should have proper header"
203        );
204        assert!(compressed.len() > 20, "Should have header and content");
205    }
206
207    #[test]
208    fn test_token_assignment() {
209        // Test with markdown content that definitely triggers tokenization
210        // Static tokens need enough repetition to save bytes:
211        // For "## " (3 chars), we need count * 3 > count + 3 + 3, so count > 2
212        // Using 5+ occurrences to ensure threshold is met
213        let markdown_content = "## Section 1\n\n## Section 2\n\n## Section 3\n\n## Section 4\n\n## Section 5\n\nContent here.";
214        let (tokens, tokenized) = MarqantFormatter::tokenize_content(markdown_content);
215
216        // The tokenization should either:
217        // 1. Create token mappings for repeated patterns, OR
218        // 2. Modify the content with static token replacements
219        // With 5 occurrences of "## ", this should definitely trigger tokenization
220        assert!(
221            !tokens.is_empty() || tokenized != markdown_content,
222            "Tokenization should create tokens or modify content. Got tokens: {:?}, content modified: {}",
223            tokens.keys().collect::<Vec<_>>(),
224            tokenized != markdown_content
225        );
226    }
227}
228
229// 🎸 The Cheet says: "Markdown files are like guitar solos -
230// sometimes you need to compress them down to the essential riffs!" 🎵