Skip to main content

matrixcode_core/
overview.rs

1//! Project overview generation and caching.
2//!
3//! The `/init` command generates a project overview file using AI analysis.
4//! The overview captures the project architecture, key patterns, and development guidance.
5//!
6//! The overview file is stored at `MATRIX.md` in the project root.
7
8use crate::prompt::{OverviewContext, build_overview_prompt};
9use crate::providers::{ChatRequest, Message, MessageContent, Provider, Role};
10use crate::truncate::find_boundary;
11use anyhow::{Context, Result};
12use std::fs;
13use std::path::{Path, PathBuf};
14
15// =============================================================================
16// Configuration Constants
17// =============================================================================
18
19/// Default filename for the cached project overview.
20pub const OVERVIEW_FILENAME: &str = "MATRIX.md";
21/// Directory name for matrixcode metadata.
22pub const MATRIXCODE_DIR: &str = ".matrix";
23
24// --- Token and content limits ---
25
26/// Maximum output tokens for AI overview generation.
27const MAX_OUTPUT_TOKENS: u32 = 8192;
28
29/// Maximum characters for config file content.
30const CONFIG_FILE_MAX_CHARS: usize = 2000;
31
32/// Maximum characters for README content.
33const README_MAX_CHARS: usize = 1000;
34
35/// Maximum characters for key source file content.
36const SOURCE_FILE_MAX_CHARS: usize = 3000;
37
38/// Maximum characters for module file content.
39const MODULE_FILE_MAX_CHARS: usize = 2000;
40
41// --- Directory structure limits ---
42
43/// Maximum depth for directory tree traversal.
44const DIRECTORY_MAX_DEPTH: usize = 3;
45
46/// Maximum items to show at root level.
47const DIRECTORY_ROOT_MAX_ITEMS: usize = 15;
48
49/// Maximum items to show at non-root levels.
50const DIRECTORY_OTHER_MAX_ITEMS: usize = 10;
51
52// --- Common file names ---
53
54/// Default project name when root directory name cannot be determined.
55const DEFAULT_PROJECT_NAME: &str = "project";
56
57/// README filename to look for.
58const README_FILENAME: &str = "README.md";
59
60/// Source directory name for many project types.
61pub const SRC_DIR: &str = "src";
62
63/// Rust module file name.
64const RUST_MOD_FILE: &str = "mod.rs";
65
66/// Rust library entry file.
67const RUST_LIB_FILE: &str = "lib.rs";
68
69// --- Project type configuration ---
70
71/// Configuration for a project type, including detection and key files.
72pub struct ProjectTypeConfig {
73    /// Human-readable type name.
74    pub type_name: &'static str,
75    /// Files whose presence indicates this project type (checked in order).
76    pub detect_files: &'static [&'static str],
77    /// Key source file paths relative to project root.
78    pub key_source_files: &'static [&'static str],
79}
80
81/// All supported project type configurations.
82pub const PROJECT_TYPE_CONFIGS: &[ProjectTypeConfig] = &[
83    ProjectTypeConfig {
84        type_name: "Rust",
85        detect_files: &["Cargo.toml"],
86        key_source_files: &["src/main.rs", "src/agent.rs"],
87    },
88    ProjectTypeConfig {
89        type_name: "Go",
90        detect_files: &["go.mod"],
91        key_source_files: &["main.go", "cmd/main.go"],
92    },
93    ProjectTypeConfig {
94        type_name: "Node.js/TypeScript",
95        detect_files: &["package.json"],
96        key_source_files: &[
97            "src/index.ts",
98            "src/index.js",
99            "src/main.ts",
100            "src/main.js",
101            "src/app.ts",
102            "src/app.js",
103        ],
104    },
105    ProjectTypeConfig {
106        type_name: "Python",
107        detect_files: &["pyproject.toml", "requirements.txt"],
108        key_source_files: &["main.py", "app.py", "__init__.py"],
109    },
110    ProjectTypeConfig {
111        type_name: "Java (Maven)",
112        detect_files: &["pom.xml"],
113        key_source_files: &[],
114    },
115    ProjectTypeConfig {
116        type_name: "Java (Gradle)",
117        detect_files: &["build.gradle"],
118        key_source_files: &[],
119    },
120    ProjectTypeConfig {
121        type_name: "C/C++ (Make)",
122        detect_files: &["Makefile"],
123        key_source_files: &[],
124    },
125];
126
127/// Unknown project type name.
128const PROJECT_TYPE_UNKNOWN: &str = "Unknown";
129
130// --- Configuration file names to scan ---
131
132const CONFIG_FILENAMES: &[&str] = &[
133    "Cargo.toml",
134    "package.json",
135    "go.mod",
136    "pyproject.toml",
137    "requirements.txt",
138    "pom.xml",
139    "build.gradle",
140    "Makefile",
141    "docker-compose.yml",
142    "Dockerfile",
143    "tsconfig.json",
144    "vite.config.ts",
145    "vite.config.js",
146    "next.config.js",
147    "nuxt.config.ts",
148    "tailwind.config.js",
149    "tailwind.config.ts",
150    ".env.example",
151];
152
153/// Project overview containing the generated summary.
154#[derive(Debug, Clone)]
155pub struct ProjectOverview {
156    /// The rendered markdown content.
157    pub content: String,
158    /// Path to the overview file (for cache invalidation info).
159    pub path: PathBuf,
160}
161
162impl ProjectOverview {
163    /// Load the overview from the project root if it exists.
164    /// Returns `None` if the file doesn't exist.
165    pub fn load(project_root: &Path) -> Result<Option<Self>> {
166        let path = overview_path(project_root);
167        if !path.exists() {
168            return Ok(None);
169        }
170        let content = fs::read_to_string(&path)
171            .with_context(|| format!("reading overview file {}", path.display()))?;
172        Ok(Some(Self { content, path }))
173    }
174
175    /// Generate and save a new overview using AI analysis.
176    /// This method collects project files and sends them to the AI for analysis.
177    pub async fn generate_with_ai(project_root: &Path, provider: &dyn Provider) -> Result<Self> {
178        let project_name = project_root
179            .file_name()
180            .and_then(|n| n.to_str())
181            .unwrap_or(DEFAULT_PROJECT_NAME);
182
183        // Collect project context
184        let context = collect_project_context(project_root)?;
185
186        // Build the AI prompt
187        let prompt = build_overview_prompt(&OverviewContext {
188            project_name: project_name.to_string(),
189            project_type: context.project_type.to_string(),
190            directory_structure: context.directory_structure.clone(),
191            config_files: context.config_files.clone(),
192            readme: context.readme.clone(),
193            source_files: context.source_files.clone(),
194        });
195
196        // Call AI API
197        let request = ChatRequest {
198            messages: vec![Message {
199                role: Role::User,
200                content: MessageContent::Text(prompt),
201            }],
202            tools: vec![],
203            system: None,
204            think: false,
205            max_tokens: MAX_OUTPUT_TOKENS,
206            server_tools: vec![],
207            enable_caching: false, // No caching for overview generation
208        };
209
210        let response = provider
211            .chat(request)
212            .await
213            .with_context(|| "calling AI for overview generation")?;
214
215        // Extract content from response
216        let content = extract_response_content(&response);
217
218        // Save to file
219        let path = overview_path(project_root);
220        fs::write(&path, &content)
221            .with_context(|| format!("writing overview file {}", path.display()))?;
222
223        Ok(Self { content, path })
224    }
225
226    /// Delete the overview file if it exists.
227    pub fn clear(project_root: &Path) -> Result<()> {
228        let path = overview_path(project_root);
229        if path.exists() {
230            fs::remove_file(&path)
231                .with_context(|| format!("removing overview file {}", path.display()))?;
232        }
233        Ok(())
234    }
235
236    /// Check if an overview exists for the project.
237    pub fn exists(project_root: &Path) -> bool {
238        overview_path(project_root).exists()
239    }
240
241    /// Get the path to the overview file.
242    pub fn path(project_root: &Path) -> PathBuf {
243        overview_path(project_root)
244    }
245}
246
247/// Get the path to the overview file (directly in project root).
248fn overview_path(project_root: &Path) -> PathBuf {
249    project_root.join(OVERVIEW_FILENAME)
250}
251
252/// Patterns to ignore when scanning the project.
253const IGNORE_PATTERNS: &[&str] = &[
254    // Version control
255    ".git",
256    ".svn",
257    ".hg",
258    // Dependencies
259    "node_modules",
260    "vendor",
261    // Build outputs
262    "target",
263    "target-test",
264    "build",
265    "dist",
266    "out",
267    "bin",
268    "obj",
269    ".cargo",
270    // IDE and editor
271    ".idea",
272    ".vscode",
273    ".vs",
274    ".claude",
275    ".matrix",
276    // Cache and temp
277    ".cache",
278    "__pycache__",
279    "*.pyc",
280    ".DS_Store",
281    "Thumbs.db",
282    // Lock files (usually large and not informative)
283    "Cargo.lock",
284    "package-lock.json",
285    "yarn.lock",
286    "pnpm-lock.yaml",
287    // Generated files
288    "*.generated.*",
289    "swagger.json",
290    "swagger.yaml",
291];
292
293/// Check if a path component should be ignored.
294pub fn should_ignore(name: &str) -> bool {
295    if IGNORE_PATTERNS.contains(&name) {
296        return true;
297    }
298    for pattern in IGNORE_PATTERNS {
299        if pattern.starts_with("*.") {
300            let suffix = &pattern[1..];
301            if name.ends_with(suffix) {
302                return true;
303            }
304        }
305    }
306    false
307}
308
309/// Project context collected for AI analysis.
310struct ProjectContext {
311    /// Configuration file contents (Cargo.toml, package.json, etc.)
312    config_files: Vec<(String, String)>,
313    /// README content (first part)
314    readme: Option<String>,
315    /// Directory structure summary
316    directory_structure: String,
317    /// Key source files content (limited)
318    source_files: Vec<(String, String)>,
319    /// Project type detected
320    project_type: &'static str,
321}
322
323/// Collect project context for AI analysis.
324fn collect_project_context(project_root: &Path) -> Result<ProjectContext> {
325    // Detect project type
326    let project_type = detect_project_type(project_root);
327
328    // Collect config files
329    let config_files = collect_config_files(project_root)?;
330
331    // Get README
332    let readme = read_readme(project_root)?;
333
334    // Build directory structure
335    let directory_structure = build_directory_structure(project_root)?;
336
337    // Collect key source files
338    let source_files = collect_key_source_files(project_root, project_type)?;
339
340    Ok(ProjectContext {
341        config_files,
342        readme,
343        directory_structure,
344        source_files,
345        project_type,
346    })
347}
348
349/// Detect project type from configuration files.
350pub fn detect_project_type(project_root: &Path) -> &'static str {
351    for config in PROJECT_TYPE_CONFIGS {
352        for detect_file in config.detect_files {
353            if project_root.join(detect_file).exists() {
354                return config.type_name;
355            }
356        }
357    }
358    PROJECT_TYPE_UNKNOWN
359}
360
361/// Collect configuration files content.
362fn collect_config_files(project_root: &Path) -> Result<Vec<(String, String)>> {
363    let mut files = Vec::new();
364    for filename in CONFIG_FILENAMES {
365        let path = project_root.join(filename);
366        if path.exists() {
367            let content =
368                fs::read_to_string(&path).with_context(|| format!("reading {}", filename))?;
369            let truncated = truncate_content(&content, CONFIG_FILE_MAX_CHARS);
370            files.push((filename.to_string(), truncated));
371        }
372    }
373
374    Ok(files)
375}
376
377/// Read README.md (first part).
378fn read_readme(project_root: &Path) -> Result<Option<String>> {
379    let readme_path = project_root.join(README_FILENAME);
380    if !readme_path.exists() {
381        return Ok(None);
382    }
383
384    let content =
385        fs::read_to_string(&readme_path).with_context(|| format!("reading {}", README_FILENAME))?;
386
387    Ok(Some(truncate_content(&content, README_MAX_CHARS)))
388}
389
390/// Build directory structure string.
391fn build_directory_structure(project_root: &Path) -> Result<String> {
392    let mut result = String::new();
393    result.push_str(&format!(
394        "{}/\n",
395        project_root
396            .file_name()
397            .and_then(|n| n.to_str())
398            .unwrap_or(DEFAULT_PROJECT_NAME)
399    ));
400
401    build_tree_recursive(project_root, 0, DIRECTORY_MAX_DEPTH, &mut result)?;
402
403    Ok(result)
404}
405
406/// Build directory tree recursively.
407fn build_tree_recursive(
408    dir: &Path,
409    depth: usize,
410    max_depth: usize,
411    result: &mut String,
412) -> Result<()> {
413    if depth > max_depth {
414        result.push_str(&format!("{}  ...\n", "  ".repeat(depth)));
415        return Ok(());
416    }
417
418    let entries = match fs::read_dir(dir) {
419        Ok(e) => e,
420        Err(_) => return Ok(()),
421    };
422
423    let mut dirs: Vec<String> = Vec::new();
424    let mut files: Vec<String> = Vec::new();
425
426    for entry in entries.flatten() {
427        let name = entry.file_name().to_string_lossy().into_owned();
428        if should_ignore(&name) {
429            continue;
430        }
431        if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
432            dirs.push(name);
433        } else {
434            files.push(name);
435        }
436    }
437
438    dirs.sort();
439    files.sort();
440
441    let indent = "  ".repeat(depth);
442    let max_items = if depth == 0 {
443        DIRECTORY_ROOT_MAX_ITEMS
444    } else {
445        DIRECTORY_OTHER_MAX_ITEMS
446    };
447
448    let mut count = 0;
449    for d in &dirs {
450        if count >= max_items {
451            result.push_str(&format!(
452                "{}  ... ({} more dirs)\n",
453                indent,
454                dirs.len() - count
455            ));
456            break;
457        }
458        result.push_str(&format!("{}  {}/\n", indent, d));
459        build_tree_recursive(&dir.join(d), depth + 1, max_depth, result)?;
460        count += 1;
461    }
462
463    for f in files.iter().take(max_items - count) {
464        result.push_str(&format!("{}  {}\n", indent, f));
465    }
466
467    if files.len() > max_items - count {
468        result.push_str(&format!(
469            "{}  ... ({} more files)\n",
470            indent,
471            files.len() - (max_items - count)
472        ));
473    }
474
475    Ok(())
476}
477
478/// Collect key source files for analysis.
479fn collect_key_source_files(
480    project_root: &Path,
481    project_type: &str,
482) -> Result<Vec<(String, String)>> {
483    let mut files = Vec::new();
484
485    // Find the matching project type config
486    let config = PROJECT_TYPE_CONFIGS
487        .iter()
488        .find(|c| c.type_name == project_type);
489
490    // Collect key source files from config
491    if let Some(config) = config {
492        for path_str in config.key_source_files {
493            let path = project_root.join(path_str);
494            if path.exists() {
495                let content = fs::read_to_string(&path).ok();
496                if let Some(content) = content {
497                    files.push((
498                        path_str.to_string(),
499                        truncate_content(&content, SOURCE_FILE_MAX_CHARS),
500                    ));
501                }
502            }
503        }
504    }
505
506    // Special handling for Rust: collect lib.rs and module files
507    if project_type == "Rust" {
508        // Collect lib.rs
509        let lib_path = project_root.join(SRC_DIR).join(RUST_LIB_FILE);
510        if lib_path.exists() {
511            let lib_relative = format!("{}/{}", SRC_DIR, RUST_LIB_FILE);
512            let content = fs::read_to_string(&lib_path).ok();
513            if let Some(content) = content {
514                files.push((
515                    lib_relative,
516                    truncate_content(&content, SOURCE_FILE_MAX_CHARS),
517                ));
518            }
519
520            // Collect module files (mod.rs in subdirectories)
521            let src_path = project_root.join(SRC_DIR);
522            if src_path.exists() {
523                for entry in fs::read_dir(&src_path)?.flatten() {
524                    let name = entry.file_name().to_string_lossy().into_owned();
525                    if entry.file_type().map(|t| t.is_dir()).unwrap_or(false)
526                        && !should_ignore(&name)
527                    {
528                        let mod_path = src_path.join(&name).join(RUST_MOD_FILE);
529                        if mod_path.exists() {
530                            let content = fs::read_to_string(&mod_path).ok();
531                            if let Some(content) = content {
532                                let mod_relative =
533                                    format!("{}/{}/{}", SRC_DIR, name, RUST_MOD_FILE);
534                                files.push((
535                                    mod_relative,
536                                    truncate_content(&content, MODULE_FILE_MAX_CHARS),
537                                ));
538                            }
539                        }
540                    }
541                }
542            }
543        }
544    }
545
546    Ok(files)
547}
548
549/// Truncate content to a maximum length, respecting char boundaries.
550pub fn truncate_content(content: &str, max_len: usize) -> String {
551    if content.len() <= max_len {
552        content.to_string()
553    } else {
554        let end = find_boundary(content, max_len);
555        let mut truncated = content[..end].to_string();
556        truncated.push_str("\n... (truncated)");
557        truncated
558    }
559}
560
561/// Extract content from AI response.
562fn extract_response_content(response: &crate::providers::ChatResponse) -> String {
563    let mut content = String::new();
564    for block in &response.content {
565        if let crate::providers::ContentBlock::Text { text } = block {
566            content.push_str(text);
567        }
568    }
569    content
570}
571
572#[cfg(test)]
573mod tests {
574    use super::*;
575
576    #[test]
577    fn truncate_content_respects_char_boundary() {
578        // Chinese text with multibyte characters
579        let text = "这是一个包含中文字符的测试文本,用于验证截断功能是否正确处理字符边界问题。";
580
581        // Truncate at a position that would fall inside a multibyte character
582        let truncated = truncate_content(text, 50);
583
584        // Should not panic and should end with truncated marker
585        assert!(truncated.contains("... (truncated)"));
586        // String in Rust is always valid UTF-8, no need to check
587    }
588
589    #[test]
590    fn truncate_content_preserves_short_text() {
591        let short = "hello world";
592        let result = truncate_content(short, 100);
593        assert_eq!(result, short);
594    }
595
596    #[test]
597    fn truncate_content_exact_boundary() {
598        // ASCII text - every byte is a char boundary
599        let text = "abcdefghijklmnopqrstuvwxyz";
600        let truncated = truncate_content(text, 10);
601        assert_eq!(truncated, "abcdefghij\n... (truncated)");
602    }
603
604    #[test]
605    fn truncate_content_multibyte_edge() {
606        // Text ending exactly at a multibyte char
607        let text = "你好世界hello";
608        let truncated = truncate_content(text, 12); // "你好世界" = 12 bytes
609        assert!(truncated.starts_with("你好世界"));
610    }
611}