Skip to main content

st/formatters/
summary_ai.rs

1//! AI Summary formatter - "Compressed intelligence for machines!" - Omni
2//! Provides compressed, structured summaries optimized for AI consumption
3
4use super::Formatter;
5use crate::content_detector::{ContentDetector, DirectoryType};
6use crate::scanner::{FileNode, TreeStats};
7use anyhow::Result;
8use std::collections::HashMap;
9use std::io::Write;
10use std::path::Path;
11
12pub struct SummaryAiFormatter {
13    #[allow(dead_code)]
14    compress: bool,
15}
16
17impl SummaryAiFormatter {
18    pub fn new(compress: bool) -> Self {
19        Self { compress }
20    }
21}
22
23impl Formatter for SummaryAiFormatter {
24    fn format(
25        &self,
26        writer: &mut dyn Write,
27        nodes: &[FileNode],
28        stats: &TreeStats,
29        root_path: &Path,
30    ) -> Result<()> {
31        // Detect directory type
32        let dir_type = ContentDetector::detect(nodes, root_path);
33
34        // Header
35        writeln!(writer, "SUMMARY_AI_V1:")?;
36        writeln!(writer, "PATH:{}", root_path.display())?;
37        writeln!(
38            writer,
39            "STATS:F{:x}D{:x}S{:x}",
40            stats.total_files, stats.total_dirs, stats.total_size
41        )?;
42
43        // Directory type analysis
44        match &dir_type {
45            DirectoryType::CodeProject {
46                language,
47                framework,
48                has_tests,
49                has_docs,
50            } => {
51                write!(writer, "TYPE:CODE[{:?}", language)?;
52                if let Some(fw) = framework {
53                    write!(writer, ",{:?}", fw)?;
54                }
55                writeln!(
56                    writer,
57                    "]T{}D{}",
58                    if *has_tests { "1" } else { "0" },
59                    if *has_docs { "1" } else { "0" }
60                )?;
61
62                // Key files in compressed format
63                write!(writer, "KEY:")?;
64                let important = find_key_files(nodes, language);
65                for (i, file) in important.iter().enumerate() {
66                    if i > 0 {
67                        write!(writer, ",")?;
68                    }
69                    write!(writer, "{}", file)?;
70                }
71                writeln!(writer)?;
72
73                // File type distribution
74                let ext_counts = get_extension_counts(nodes);
75                write!(writer, "EXT:")?;
76                for (i, (ext, count)) in ext_counts.iter().enumerate() {
77                    if i > 0 {
78                        write!(writer, ",")?;
79                    }
80                    write!(writer, "{}:{}", ext, count)?;
81                }
82                writeln!(writer)?;
83            }
84
85            DirectoryType::PhotoCollection {
86                image_count,
87                date_range,
88                cameras,
89            } => {
90                write!(writer, "TYPE:PHOTO[{}]", image_count)?;
91                if let Some((start, end)) = date_range {
92                    write!(writer, "DATE[{},{}]", start, end)?;
93                }
94                if !cameras.is_empty() {
95                    write!(writer, "CAM[{}]", cameras.join(","))?;
96                }
97                writeln!(writer)?;
98            }
99
100            DirectoryType::DocumentArchive {
101                categories,
102                total_docs,
103            } => {
104                write!(writer, "TYPE:DOCS[{}]", total_docs)?;
105                if !categories.is_empty() {
106                    write!(writer, "CAT[")?;
107                    for (i, (cat, count)) in categories.iter().enumerate() {
108                        if i > 0 {
109                            write!(writer, ",")?;
110                        }
111                        write!(writer, "{}:{}", cat, count)?;
112                    }
113                    write!(writer, "]")?;
114                }
115                writeln!(writer)?;
116            }
117
118            DirectoryType::MediaLibrary {
119                video_count,
120                audio_count,
121                total_duration,
122                quality,
123            } => {
124                write!(writer, "TYPE:MEDIA[V{},A{}]", video_count, audio_count)?;
125                if let Some(duration) = total_duration {
126                    write!(writer, "DUR[{}]", duration)?;
127                }
128                if !quality.is_empty() {
129                    write!(writer, "Q[{}]", quality.join(","))?;
130                }
131                writeln!(writer)?;
132            }
133
134            DirectoryType::DataScience {
135                notebooks,
136                datasets,
137                languages,
138            } => {
139                write!(writer, "TYPE:DATA[N{},D{}]", notebooks, datasets)?;
140                if !languages.is_empty() {
141                    write!(writer, "LANG[{}]", languages.join(","))?;
142                }
143                writeln!(writer)?;
144            }
145
146            DirectoryType::MixedContent {
147                dominant_type,
148                file_types,
149                total_files,
150            } => {
151                write!(writer, "TYPE:MIXED[{}]", total_files)?;
152                if let Some(dominant) = dominant_type {
153                    write!(writer, "DOM[{}]", dominant)?;
154                }
155                writeln!(writer)?;
156
157                // Top 5 file types
158                let mut types: Vec<_> = file_types.iter().collect();
159                types.sort_by(|a, b| b.1.cmp(a.1));
160                write!(writer, "TOP:")?;
161                for (i, (ext, count)) in types.iter().take(5).enumerate() {
162                    if i > 0 {
163                        write!(writer, ",")?;
164                    }
165                    write!(writer, "{}:{}", ext, count)?;
166                }
167                writeln!(writer)?;
168            }
169        }
170
171        // Structure summary - top-level directories
172        let mut dir_sizes: HashMap<String, (usize, u64)> = HashMap::new();
173        for node in nodes {
174            if let Ok(relative) = node.path.strip_prefix(root_path) {
175                if let Some(first_component) = relative.components().next() {
176                    if let Some(name) = first_component.as_os_str().to_str() {
177                        let entry = dir_sizes.entry(name.to_string()).or_insert((0, 0));
178                        entry.0 += 1;
179                        if !node.is_dir {
180                            entry.1 += node.size;
181                        }
182                    }
183                }
184            }
185        }
186
187        write!(writer, "DIRS:")?;
188        let mut dirs: Vec<_> = dir_sizes.iter().collect();
189        dirs.sort_by(|a, b| b.1 .1.cmp(&a.1 .1)); // Sort by size
190        for (i, (name, (count, size))) in dirs.iter().take(10).enumerate() {
191            if i > 0 {
192                write!(writer, ",")?;
193            }
194            write!(writer, "{}[{},{:x}]", name, count, size)?;
195        }
196        writeln!(writer)?;
197
198        // Largest files
199        let mut files: Vec<_> = nodes.iter().filter(|n| !n.is_dir).collect();
200        files.sort_by(|a, b| b.size.cmp(&a.size));
201
202        write!(writer, "LARGE:")?;
203        for (i, file) in files.iter().take(5).enumerate() {
204            if i > 0 {
205                write!(writer, ",")?;
206            }
207            let name = file
208                .path
209                .file_name()
210                .and_then(|n| n.to_str())
211                .unwrap_or("?");
212            write!(writer, "{}:{:x}", name, file.size)?;
213        }
214        writeln!(writer)?;
215
216        // Footer
217        writeln!(writer, "END_SUMMARY_AI")?;
218
219        Ok(())
220    }
221}
222
223fn find_key_files(nodes: &[FileNode], language: &crate::content_detector::Language) -> Vec<String> {
224    use crate::content_detector::Language;
225
226    let mut key_files = Vec::new();
227    let important_names = match language {
228        Language::Rust => vec!["Cargo.toml", "main.rs", "lib.rs"],
229        Language::Python => vec!["requirements.txt", "setup.py", "main.py", "__init__.py"],
230        Language::JavaScript | Language::TypeScript => vec!["package.json", "index.js", "index.ts"],
231        Language::Go => vec!["go.mod", "main.go"],
232        Language::Java => vec!["pom.xml", "build.gradle", "Main.java"],
233        _ => vec![],
234    };
235
236    for node in nodes {
237        if node.is_dir {
238            continue;
239        }
240
241        let name = node.path.file_name().and_then(|n| n.to_str()).unwrap_or("");
242
243        if important_names.contains(&name) {
244            key_files.push(name.to_string());
245        }
246    }
247
248    key_files
249}
250
251fn get_extension_counts(nodes: &[FileNode]) -> Vec<(String, usize)> {
252    let mut ext_counts: HashMap<String, usize> = HashMap::new();
253
254    for node in nodes {
255        if !node.is_dir {
256            if let Some(ext) = node.path.extension().and_then(|e| e.to_str()) {
257                *ext_counts.entry(ext.to_string()).or_insert(0) += 1;
258            }
259        }
260    }
261
262    let mut counts: Vec<_> = ext_counts.into_iter().collect();
263    counts.sort_by(|a, b| b.1.cmp(&a.1));
264    counts.truncate(10); // Top 10 extensions
265    counts
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271    use crate::scanner::FileNode;
272    use std::collections::HashMap;
273    use std::path::PathBuf;
274
275    #[test]
276    fn test_ai_summary_formatter() {
277        use crate::scanner::{FileCategory, FileType, FilesystemType};
278        let formatter = SummaryAiFormatter::new(false);
279        let nodes = vec![
280            FileNode {
281                path: PathBuf::from("/test/src/main.rs"),
282                is_dir: false,
283                size: 1000,
284                permissions: 0o644,
285                uid: 1000,
286                gid: 1000,
287                modified: std::time::SystemTime::now(),
288                is_symlink: false,
289                is_hidden: false,
290                permission_denied: false,
291                is_ignored: false,
292                depth: 2,
293                file_type: FileType::RegularFile,
294                category: FileCategory::Rust,
295                search_matches: None,
296                filesystem_type: FilesystemType::Ext4,
297                git_branch: None,
298                traversal_context: None,
299                interest: None,
300                security_findings: Vec::new(),
301                change_status: None,
302                content_hash: None,
303            },
304            FileNode {
305                path: PathBuf::from("/test/Cargo.toml"),
306                is_dir: false,
307                size: 500,
308                permissions: 0o644,
309                uid: 1000,
310                gid: 1000,
311                modified: std::time::SystemTime::now(),
312                is_symlink: false,
313                is_hidden: false,
314                permission_denied: false,
315                is_ignored: false,
316                depth: 1,
317                file_type: FileType::RegularFile,
318                category: FileCategory::Toml,
319                search_matches: None,
320                filesystem_type: FilesystemType::Ext4,
321                git_branch: None,
322                traversal_context: None,
323                interest: None,
324                security_findings: Vec::new(),
325                change_status: None,
326                content_hash: None,
327            },
328        ];
329
330        let stats = TreeStats {
331            total_files: 2,
332            total_dirs: 1,
333            total_size: 1500,
334            file_types: HashMap::new(),
335            largest_files: vec![],
336            newest_files: vec![],
337            oldest_files: vec![],
338        };
339
340        let mut output = Vec::new();
341        let result = formatter.format(&mut output, &nodes, &stats, &PathBuf::from("/test"));
342
343        assert!(result.is_ok());
344        let output_str = String::from_utf8(output).unwrap();
345
346        // Check format markers
347        assert!(output_str.starts_with("SUMMARY_AI_V1:"));
348        assert!(output_str.contains("TYPE:CODE[Rust]"));
349        assert!(output_str.contains("KEY:"));
350        assert!(output_str.contains("END_SUMMARY_AI"));
351    }
352}