smart-tree 8.0.1

Smart Tree - An intelligent, AI-friendly directory visualization tool
Documentation
//! AI Summary formatter - "Compressed intelligence for machines!" - Omni
//! Provides compressed, structured summaries optimized for AI consumption

use super::Formatter;
use crate::content_detector::{ContentDetector, DirectoryType};
use crate::scanner::{FileNode, TreeStats};
use anyhow::Result;
use std::collections::HashMap;
use std::io::Write;
use std::path::Path;

pub struct SummaryAiFormatter {
    #[allow(dead_code)]
    compress: bool,
}

impl SummaryAiFormatter {
    pub fn new(compress: bool) -> Self {
        Self { compress }
    }
}

impl Formatter for SummaryAiFormatter {
    fn format(
        &self,
        writer: &mut dyn Write,
        nodes: &[FileNode],
        stats: &TreeStats,
        root_path: &Path,
    ) -> Result<()> {
        // Detect directory type
        let dir_type = ContentDetector::detect(nodes, root_path);

        // Header
        writeln!(writer, "SUMMARY_AI_V1:")?;
        writeln!(writer, "PATH:{}", root_path.display())?;
        writeln!(
            writer,
            "STATS:F{:x}D{:x}S{:x}",
            stats.total_files, stats.total_dirs, stats.total_size
        )?;

        // Directory type analysis
        match &dir_type {
            DirectoryType::CodeProject {
                language,
                framework,
                has_tests,
                has_docs,
            } => {
                write!(writer, "TYPE:CODE[{:?}", language)?;
                if let Some(fw) = framework {
                    write!(writer, ",{:?}", fw)?;
                }
                writeln!(
                    writer,
                    "]T{}D{}",
                    if *has_tests { "1" } else { "0" },
                    if *has_docs { "1" } else { "0" }
                )?;

                // Key files in compressed format
                write!(writer, "KEY:")?;
                let important = find_key_files(nodes, language);
                for (i, file) in important.iter().enumerate() {
                    if i > 0 {
                        write!(writer, ",")?;
                    }
                    write!(writer, "{}", file)?;
                }
                writeln!(writer)?;

                // File type distribution
                let ext_counts = get_extension_counts(nodes);
                write!(writer, "EXT:")?;
                for (i, (ext, count)) in ext_counts.iter().enumerate() {
                    if i > 0 {
                        write!(writer, ",")?;
                    }
                    write!(writer, "{}:{}", ext, count)?;
                }
                writeln!(writer)?;
            }

            DirectoryType::PhotoCollection {
                image_count,
                date_range,
                cameras,
            } => {
                write!(writer, "TYPE:PHOTO[{}]", image_count)?;
                if let Some((start, end)) = date_range {
                    write!(writer, "DATE[{},{}]", start, end)?;
                }
                if !cameras.is_empty() {
                    write!(writer, "CAM[{}]", cameras.join(","))?;
                }
                writeln!(writer)?;
            }

            DirectoryType::DocumentArchive {
                categories,
                total_docs,
            } => {
                write!(writer, "TYPE:DOCS[{}]", total_docs)?;
                if !categories.is_empty() {
                    write!(writer, "CAT[")?;
                    for (i, (cat, count)) in categories.iter().enumerate() {
                        if i > 0 {
                            write!(writer, ",")?;
                        }
                        write!(writer, "{}:{}", cat, count)?;
                    }
                    write!(writer, "]")?;
                }
                writeln!(writer)?;
            }

            DirectoryType::MediaLibrary {
                video_count,
                audio_count,
                total_duration,
                quality,
            } => {
                write!(writer, "TYPE:MEDIA[V{},A{}]", video_count, audio_count)?;
                if let Some(duration) = total_duration {
                    write!(writer, "DUR[{}]", duration)?;
                }
                if !quality.is_empty() {
                    write!(writer, "Q[{}]", quality.join(","))?;
                }
                writeln!(writer)?;
            }

            DirectoryType::DataScience {
                notebooks,
                datasets,
                languages,
            } => {
                write!(writer, "TYPE:DATA[N{},D{}]", notebooks, datasets)?;
                if !languages.is_empty() {
                    write!(writer, "LANG[{}]", languages.join(","))?;
                }
                writeln!(writer)?;
            }

            DirectoryType::MixedContent {
                dominant_type,
                file_types,
                total_files,
            } => {
                write!(writer, "TYPE:MIXED[{}]", total_files)?;
                if let Some(dominant) = dominant_type {
                    write!(writer, "DOM[{}]", dominant)?;
                }
                writeln!(writer)?;

                // Top 5 file types
                let mut types: Vec<_> = file_types.iter().collect();
                types.sort_by(|a, b| b.1.cmp(a.1));
                write!(writer, "TOP:")?;
                for (i, (ext, count)) in types.iter().take(5).enumerate() {
                    if i > 0 {
                        write!(writer, ",")?;
                    }
                    write!(writer, "{}:{}", ext, count)?;
                }
                writeln!(writer)?;
            }
        }

        // Structure summary - top-level directories
        let mut dir_sizes: HashMap<String, (usize, u64)> = HashMap::new();
        for node in nodes {
            if let Ok(relative) = node.path.strip_prefix(root_path) {
                if let Some(first_component) = relative.components().next() {
                    if let Some(name) = first_component.as_os_str().to_str() {
                        let entry = dir_sizes.entry(name.to_string()).or_insert((0, 0));
                        entry.0 += 1;
                        if !node.is_dir {
                            entry.1 += node.size;
                        }
                    }
                }
            }
        }

        write!(writer, "DIRS:")?;
        let mut dirs: Vec<_> = dir_sizes.iter().collect();
        dirs.sort_by(|a, b| b.1 .1.cmp(&a.1 .1)); // Sort by size
        for (i, (name, (count, size))) in dirs.iter().take(10).enumerate() {
            if i > 0 {
                write!(writer, ",")?;
            }
            write!(writer, "{}[{},{:x}]", name, count, size)?;
        }
        writeln!(writer)?;

        // Largest files
        let mut files: Vec<_> = nodes.iter().filter(|n| !n.is_dir).collect();
        files.sort_by(|a, b| b.size.cmp(&a.size));

        write!(writer, "LARGE:")?;
        for (i, file) in files.iter().take(5).enumerate() {
            if i > 0 {
                write!(writer, ",")?;
            }
            let name = file
                .path
                .file_name()
                .and_then(|n| n.to_str())
                .unwrap_or("?");
            write!(writer, "{}:{:x}", name, file.size)?;
        }
        writeln!(writer)?;

        // Footer
        writeln!(writer, "END_SUMMARY_AI")?;

        Ok(())
    }
}

fn find_key_files(nodes: &[FileNode], language: &crate::content_detector::Language) -> Vec<String> {
    use crate::content_detector::Language;

    let mut key_files = Vec::new();
    let important_names = match language {
        Language::Rust => vec!["Cargo.toml", "main.rs", "lib.rs"],
        Language::Python => vec!["requirements.txt", "setup.py", "main.py", "__init__.py"],
        Language::JavaScript | Language::TypeScript => vec!["package.json", "index.js", "index.ts"],
        Language::Go => vec!["go.mod", "main.go"],
        Language::Java => vec!["pom.xml", "build.gradle", "Main.java"],
        _ => vec![],
    };

    for node in nodes {
        if node.is_dir {
            continue;
        }

        let name = node.path.file_name().and_then(|n| n.to_str()).unwrap_or("");

        if important_names.contains(&name) {
            key_files.push(name.to_string());
        }
    }

    key_files
}

fn get_extension_counts(nodes: &[FileNode]) -> Vec<(String, usize)> {
    let mut ext_counts: HashMap<String, usize> = HashMap::new();

    for node in nodes {
        if !node.is_dir {
            if let Some(ext) = node.path.extension().and_then(|e| e.to_str()) {
                *ext_counts.entry(ext.to_string()).or_insert(0) += 1;
            }
        }
    }

    let mut counts: Vec<_> = ext_counts.into_iter().collect();
    counts.sort_by(|a, b| b.1.cmp(&a.1));
    counts.truncate(10); // Top 10 extensions
    counts
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::scanner::FileNode;
    use std::collections::HashMap;
    use std::path::PathBuf;

    #[test]
    fn test_ai_summary_formatter() {
        use crate::scanner::{FileCategory, FileType, FilesystemType};
        let formatter = SummaryAiFormatter::new(false);
        let nodes = vec![
            FileNode {
                path: PathBuf::from("/test/src/main.rs"),
                is_dir: false,
                size: 1000,
                permissions: 0o644,
                uid: 1000,
                gid: 1000,
                modified: std::time::SystemTime::now(),
                is_symlink: false,
                is_hidden: false,
                permission_denied: false,
                is_ignored: false,
                depth: 2,
                file_type: FileType::RegularFile,
                category: FileCategory::Rust,
                search_matches: None,
                filesystem_type: FilesystemType::Ext4,
                git_branch: None,
                traversal_context: None,
                interest: None,
                security_findings: Vec::new(),
                change_status: None,
                content_hash: None,
            },
            FileNode {
                path: PathBuf::from("/test/Cargo.toml"),
                is_dir: false,
                size: 500,
                permissions: 0o644,
                uid: 1000,
                gid: 1000,
                modified: std::time::SystemTime::now(),
                is_symlink: false,
                is_hidden: false,
                permission_denied: false,
                is_ignored: false,
                depth: 1,
                file_type: FileType::RegularFile,
                category: FileCategory::Toml,
                search_matches: None,
                filesystem_type: FilesystemType::Ext4,
                git_branch: None,
                traversal_context: None,
                interest: None,
                security_findings: Vec::new(),
                change_status: None,
                content_hash: None,
            },
        ];

        let stats = TreeStats {
            total_files: 2,
            total_dirs: 1,
            total_size: 1500,
            file_types: HashMap::new(),
            largest_files: vec![],
            newest_files: vec![],
            oldest_files: vec![],
        };

        let mut output = Vec::new();
        let result = formatter.format(&mut output, &nodes, &stats, &PathBuf::from("/test"));

        assert!(result.is_ok());
        let output_str = String::from_utf8(output).unwrap();

        // Check format markers
        assert!(output_str.starts_with("SUMMARY_AI_V1:"));
        assert!(output_str.contains("TYPE:CODE[Rust]"));
        assert!(output_str.contains("KEY:"));
        assert!(output_str.contains("END_SUMMARY_AI"));
    }
}