Skip to main content

matrixcode_core/
overview.rs

1//! Project overview generation and caching.
2//!
3//! The `/init` command generates a project overview file using AI analysis.
4//! The overview captures the project architecture, key patterns, and development guidance.
5//!
6//! The overview file is stored at `MATRIX.md` in the project root.
7
8use crate::prompt::{OverviewContext, build_overview_prompt};
9use crate::providers::{ChatRequest, Message, MessageContent, Provider, Role};
10use crate::truncate::find_boundary;
11use anyhow::{Context, Result};
12use std::fs;
13use std::path::{Path, PathBuf};
14
15// =============================================================================
16// Configuration Constants
17// =============================================================================
18
19/// Default filename for the cached project overview.
20pub const OVERVIEW_FILENAME: &str = "MATRIX.md";
21/// Directory name for matrixcode metadata.
22pub const MATRIXCODE_DIR: &str = ".matrix";
23
24// --- Token and content limits ---
25
26/// Maximum output tokens for AI overview generation.
27const MAX_OUTPUT_TOKENS: u32 = 8192;
28
29/// Maximum characters for config file content.
30const CONFIG_FILE_MAX_CHARS: usize = 2000;
31
32/// Maximum characters for README content.
33const README_MAX_CHARS: usize = 1000;
34
35/// Maximum characters for key source file content.
36const SOURCE_FILE_MAX_CHARS: usize = 3000;
37
38/// Maximum characters for module file content.
39const MODULE_FILE_MAX_CHARS: usize = 2000;
40
41// --- Directory structure limits ---
42
43/// Maximum depth for directory tree traversal.
44const DIRECTORY_MAX_DEPTH: usize = 3;
45
46/// Maximum items to show at root level.
47const DIRECTORY_ROOT_MAX_ITEMS: usize = 15;
48
49/// Maximum items to show at non-root levels.
50const DIRECTORY_OTHER_MAX_ITEMS: usize = 10;
51
52// --- Common file names ---
53
54/// Default project name when root directory name cannot be determined.
55const DEFAULT_PROJECT_NAME: &str = "project";
56
57/// README filename to look for.
58const README_FILENAME: &str = "README.md";
59
60/// Source directory name for many project types.
61pub const SRC_DIR: &str = "src";
62
63/// Rust module file name.
64const RUST_MOD_FILE: &str = "mod.rs";
65
66/// Rust library entry file.
67const RUST_LIB_FILE: &str = "lib.rs";
68
69// --- Project type configuration ---
70
71/// Configuration for a project type, including detection and key files.
72pub struct ProjectTypeConfig {
73    /// Human-readable type name.
74    pub type_name: &'static str,
75    /// Files whose presence indicates this project type (checked in order).
76    pub detect_files: &'static [&'static str],
77    /// Key source file paths relative to project root.
78    pub key_source_files: &'static [&'static str],
79}
80
81/// All supported project type configurations.
82pub const PROJECT_TYPE_CONFIGS: &[ProjectTypeConfig] = &[
83    ProjectTypeConfig {
84        type_name: "Rust",
85        detect_files: &["Cargo.toml"],
86        key_source_files: &["src/main.rs", "src/agent.rs"],
87    },
88    ProjectTypeConfig {
89        type_name: "Go",
90        detect_files: &["go.mod"],
91        key_source_files: &["main.go", "cmd/main.go"],
92    },
93    ProjectTypeConfig {
94        type_name: "Node.js/TypeScript",
95        detect_files: &["package.json"],
96        key_source_files: &[
97            "src/index.ts",
98            "src/index.js",
99            "src/main.ts",
100            "src/main.js",
101            "src/app.ts",
102            "src/app.js",
103        ],
104    },
105    ProjectTypeConfig {
106        type_name: "Python",
107        detect_files: &["pyproject.toml", "requirements.txt"],
108        key_source_files: &["main.py", "app.py", "__init__.py"],
109    },
110    ProjectTypeConfig {
111        type_name: "Java (Maven)",
112        detect_files: &["pom.xml"],
113        key_source_files: &[],
114    },
115    ProjectTypeConfig {
116        type_name: "Java (Gradle)",
117        detect_files: &["build.gradle"],
118        key_source_files: &[],
119    },
120    ProjectTypeConfig {
121        type_name: "C/C++ (Make)",
122        detect_files: &["Makefile"],
123        key_source_files: &[],
124    },
125];
126
127/// Unknown project type name.
128const PROJECT_TYPE_UNKNOWN: &str = "Unknown";
129
130// --- Configuration file names to scan ---
131
132const CONFIG_FILENAMES: &[&str] = &[
133    "Cargo.toml",
134    "package.json",
135    "go.mod",
136    "pyproject.toml",
137    "requirements.txt",
138    "pom.xml",
139    "build.gradle",
140    "Makefile",
141    "docker-compose.yml",
142    "Dockerfile",
143    "tsconfig.json",
144    "vite.config.ts",
145    "vite.config.js",
146    "next.config.js",
147    "nuxt.config.ts",
148    "tailwind.config.js",
149    "tailwind.config.ts",
150    ".env.example",
151];
152
153/// Project overview containing the generated summary.
154#[derive(Debug, Clone)]
155pub struct ProjectOverview {
156    /// The rendered markdown content.
157    pub content: String,
158    /// Path to the overview file (for cache invalidation info).
159    pub path: PathBuf,
160}
161
162impl ProjectOverview {
163    /// Load the overview from the project root if it exists.
164    /// Returns `None` if the file doesn't exist.
165    pub fn load(project_root: &Path) -> Result<Option<Self>> {
166        let path = overview_path(project_root);
167        if !path.exists() {
168            return Ok(None);
169        }
170        let content = fs::read_to_string(&path)
171            .with_context(|| format!("reading overview file {}", path.display()))?;
172
173        // Limit to 200 lines to prevent excessively long content
174        let limited_content = content
175            .lines()
176            .take(200)
177            .collect::<Vec<_>>()
178            .join("\n");
179
180        Ok(Some(Self { content: limited_content, path }))
181    }
182
183    /// Generate and save a new overview using AI analysis.
184    /// This method collects project files and sends them to the AI for analysis.
185    pub async fn generate_with_ai(project_root: &Path, provider: &dyn Provider) -> Result<Self> {
186        let project_name = project_root
187            .file_name()
188            .and_then(|n| n.to_str())
189            .unwrap_or(DEFAULT_PROJECT_NAME);
190
191        // Collect project context
192        let context = collect_project_context(project_root)?;
193
194        // Build the AI prompt
195        let prompt = build_overview_prompt(&OverviewContext {
196            project_name: project_name.to_string(),
197            project_type: context.project_type.to_string(),
198            directory_structure: context.directory_structure.clone(),
199            config_files: context.config_files.clone(),
200            readme: context.readme.clone(),
201            source_files: context.source_files.clone(),
202        });
203
204        // Call AI API
205        let request = ChatRequest {
206            messages: vec![Message {
207                role: Role::User,
208                content: MessageContent::Text(prompt),
209            }],
210            tools: vec![],
211            system: None,
212            think: false,
213            max_tokens: MAX_OUTPUT_TOKENS,
214            server_tools: vec![],
215            enable_caching: false, // No caching for overview generation
216        };
217
218        let model_name = provider.model_name();
219        log::info!(
220            "Overview generation: sending request to AI (model: {}, max_tokens: {})",
221            model_name,
222            MAX_OUTPUT_TOKENS
223        );
224
225        // Use streaming API (same as Agent) for better compatibility with DashScope
226        let mut rx = provider
227            .chat_stream(request)
228            .await
229            .map_err(|e| {
230                log::error!("Overview generation failed to start stream: {}", e);
231                e
232            })
233            .with_context(|| format!(
234                "starting AI stream for overview generation (model: {})",
235                model_name
236            ))?;
237
238        // Collect streaming response
239        let mut content = String::new();
240        let mut input_tokens: u32;
241        let mut output_tokens: u32;
242
243        while let Some(event) = rx.recv().await {
244            match event {
245                crate::providers::StreamEvent::FirstByte => {
246                    log::debug!("Overview generation: received first byte");
247                }
248                crate::providers::StreamEvent::TextDelta { 0: delta } => {
249                    content.push_str(&delta);
250                }
251                crate::providers::StreamEvent::ThinkingDelta { 0: thinking } => {
252                    log::debug!("Overview thinking chunk: {} chars", thinking.len());
253                }
254                crate::providers::StreamEvent::ToolUseStart { id, name } => {
255                    log::debug!("Overview tool use start: {} ({})", name, id);
256                }
257                crate::providers::StreamEvent::ToolInputDelta { bytes_so_far } => {
258                    log::debug!("Overview tool input delta: {} bytes", bytes_so_far);
259                }
260                crate::providers::StreamEvent::Usage { output_tokens: _ } => {
261                    // Usage events are sent during streaming, but we only need the final
262                    // token counts from the Done event
263                }
264                crate::providers::StreamEvent::Done(response) => {
265                    input_tokens = response.usage.input_tokens;
266                    output_tokens = response.usage.output_tokens;
267                    log::info!(
268                        "Overview generation complete: input_tokens={}, output_tokens={}",
269                        input_tokens,
270                        output_tokens
271                    );
272                    // Use final content from response if our accumulated content is empty
273                    if content.is_empty() {
274                        for block in &response.content {
275                            if let crate::providers::ContentBlock::Text { text } = block {
276                                content.push_str(text);
277                            }
278                        }
279                    }
280                }
281                crate::providers::StreamEvent::Error { 0: msg } => {
282                    log::error!("Overview stream error: {}", msg);
283                    return Err(anyhow::anyhow!("Stream error: {}", msg));
284                }
285            }
286        }
287
288        if content.is_empty() {
289            return Err(anyhow::anyhow!("Overview generation returned empty content"));
290        }
291
292        // Save to file
293        let path = overview_path(project_root);
294        fs::write(&path, &content)
295            .with_context(|| format!("writing overview file {}", path.display()))?;
296
297        Ok(Self { content, path })
298    }
299
300    /// Delete the overview file if it exists.
301    pub fn clear(project_root: &Path) -> Result<()> {
302        let path = overview_path(project_root);
303        if path.exists() {
304            fs::remove_file(&path)
305                .with_context(|| format!("removing overview file {}", path.display()))?;
306        }
307        Ok(())
308    }
309
310    /// Check if an overview exists for the project.
311    pub fn exists(project_root: &Path) -> bool {
312        overview_path(project_root).exists()
313    }
314
315    /// Get the path to the overview file.
316    pub fn path(project_root: &Path) -> PathBuf {
317        overview_path(project_root)
318    }
319}
320
321/// Get the path to the overview file (directly in project root).
322fn overview_path(project_root: &Path) -> PathBuf {
323    project_root.join(OVERVIEW_FILENAME)
324}
325
326/// Patterns to ignore when scanning the project.
327const IGNORE_PATTERNS: &[&str] = &[
328    // Version control
329    ".git",
330    ".svn",
331    ".hg",
332    // Dependencies
333    "node_modules",
334    "vendor",
335    // Build outputs
336    "target",
337    "target-test",
338    "build",
339    "dist",
340    "out",
341    "bin",
342    "obj",
343    ".cargo",
344    // IDE and editor
345    ".idea",
346    ".vscode",
347    ".vs",
348    ".claude",
349    ".matrix",
350    // Cache and temp
351    ".cache",
352    "__pycache__",
353    "*.pyc",
354    ".DS_Store",
355    "Thumbs.db",
356    // Lock files (usually large and not informative)
357    "Cargo.lock",
358    "package-lock.json",
359    "yarn.lock",
360    "pnpm-lock.yaml",
361    // Generated files
362    "*.generated.*",
363    "swagger.json",
364    "swagger.yaml",
365];
366
367/// Check if a path component should be ignored.
368pub fn should_ignore(name: &str) -> bool {
369    if IGNORE_PATTERNS.contains(&name) {
370        return true;
371    }
372    for pattern in IGNORE_PATTERNS {
373        if pattern.starts_with("*.") {
374            let suffix = &pattern[1..];
375            if name.ends_with(suffix) {
376                return true;
377            }
378        }
379    }
380    false
381}
382
383/// Project context collected for AI analysis.
384struct ProjectContext {
385    /// Configuration file contents (Cargo.toml, package.json, etc.)
386    config_files: Vec<(String, String)>,
387    /// README content (first part)
388    readme: Option<String>,
389    /// Directory structure summary
390    directory_structure: String,
391    /// Key source files content (limited)
392    source_files: Vec<(String, String)>,
393    /// Project type detected
394    project_type: &'static str,
395}
396
397/// Collect project context for AI analysis.
398fn collect_project_context(project_root: &Path) -> Result<ProjectContext> {
399    // Detect project type
400    let project_type = detect_project_type(project_root);
401
402    // Collect config files
403    let config_files = collect_config_files(project_root)?;
404
405    // Get README
406    let readme = read_readme(project_root)?;
407
408    // Build directory structure
409    let directory_structure = build_directory_structure(project_root)?;
410
411    // Collect key source files
412    let source_files = collect_key_source_files(project_root, project_type)?;
413
414    Ok(ProjectContext {
415        config_files,
416        readme,
417        directory_structure,
418        source_files,
419        project_type,
420    })
421}
422
423/// Detect project type from configuration files.
424pub fn detect_project_type(project_root: &Path) -> &'static str {
425    for config in PROJECT_TYPE_CONFIGS {
426        for detect_file in config.detect_files {
427            if project_root.join(detect_file).exists() {
428                return config.type_name;
429            }
430        }
431    }
432    PROJECT_TYPE_UNKNOWN
433}
434
435/// Collect configuration files content.
436fn collect_config_files(project_root: &Path) -> Result<Vec<(String, String)>> {
437    let mut files = Vec::new();
438    for filename in CONFIG_FILENAMES {
439        let path = project_root.join(filename);
440        if path.exists() {
441            let content =
442                fs::read_to_string(&path).with_context(|| format!("reading {}", filename))?;
443            let truncated = truncate_content(&content, CONFIG_FILE_MAX_CHARS);
444            files.push((filename.to_string(), truncated));
445        }
446    }
447
448    Ok(files)
449}
450
451/// Read README.md (first part).
452fn read_readme(project_root: &Path) -> Result<Option<String>> {
453    let readme_path = project_root.join(README_FILENAME);
454    if !readme_path.exists() {
455        return Ok(None);
456    }
457
458    let content =
459        fs::read_to_string(&readme_path).with_context(|| format!("reading {}", README_FILENAME))?;
460
461    Ok(Some(truncate_content(&content, README_MAX_CHARS)))
462}
463
464/// Build directory structure string.
465fn build_directory_structure(project_root: &Path) -> Result<String> {
466    let mut result = String::new();
467    result.push_str(&format!(
468        "{}/\n",
469        project_root
470            .file_name()
471            .and_then(|n| n.to_str())
472            .unwrap_or(DEFAULT_PROJECT_NAME)
473    ));
474
475    build_tree_recursive(project_root, 0, DIRECTORY_MAX_DEPTH, &mut result)?;
476
477    Ok(result)
478}
479
480/// Build directory tree recursively.
481fn build_tree_recursive(
482    dir: &Path,
483    depth: usize,
484    max_depth: usize,
485    result: &mut String,
486) -> Result<()> {
487    if depth > max_depth {
488        result.push_str(&format!("{}  ...\n", "  ".repeat(depth)));
489        return Ok(());
490    }
491
492    let entries = match fs::read_dir(dir) {
493        Ok(e) => e,
494        Err(_) => return Ok(()),
495    };
496
497    let mut dirs: Vec<String> = Vec::new();
498    let mut files: Vec<String> = Vec::new();
499
500    for entry in entries.flatten() {
501        let name = entry.file_name().to_string_lossy().into_owned();
502        if should_ignore(&name) {
503            continue;
504        }
505        if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
506            dirs.push(name);
507        } else {
508            files.push(name);
509        }
510    }
511
512    dirs.sort();
513    files.sort();
514
515    let indent = "  ".repeat(depth);
516    let max_items = if depth == 0 {
517        DIRECTORY_ROOT_MAX_ITEMS
518    } else {
519        DIRECTORY_OTHER_MAX_ITEMS
520    };
521
522    let mut count = 0;
523    for d in &dirs {
524        if count >= max_items {
525            result.push_str(&format!(
526                "{}  ... ({} more dirs)\n",
527                indent,
528                dirs.len() - count
529            ));
530            break;
531        }
532        result.push_str(&format!("{}  {}/\n", indent, d));
533        build_tree_recursive(&dir.join(d), depth + 1, max_depth, result)?;
534        count += 1;
535    }
536
537    for f in files.iter().take(max_items - count) {
538        result.push_str(&format!("{}  {}\n", indent, f));
539    }
540
541    if files.len() > max_items - count {
542        result.push_str(&format!(
543            "{}  ... ({} more files)\n",
544            indent,
545            files.len() - (max_items - count)
546        ));
547    }
548
549    Ok(())
550}
551
552/// Collect key source files for analysis.
553fn collect_key_source_files(
554    project_root: &Path,
555    project_type: &str,
556) -> Result<Vec<(String, String)>> {
557    let mut files = Vec::new();
558
559    // Find the matching project type config
560    let config = PROJECT_TYPE_CONFIGS
561        .iter()
562        .find(|c| c.type_name == project_type);
563
564    // Collect key source files from config
565    if let Some(config) = config {
566        for path_str in config.key_source_files {
567            let path = project_root.join(path_str);
568            if path.exists() {
569                let content = fs::read_to_string(&path).ok();
570                if let Some(content) = content {
571                    files.push((
572                        path_str.to_string(),
573                        truncate_content(&content, SOURCE_FILE_MAX_CHARS),
574                    ));
575                }
576            }
577        }
578    }
579
580    // Special handling for Rust: collect lib.rs and module files
581    if project_type == "Rust" {
582        // Collect lib.rs
583        let lib_path = project_root.join(SRC_DIR).join(RUST_LIB_FILE);
584        if lib_path.exists() {
585            let lib_relative = format!("{}/{}", SRC_DIR, RUST_LIB_FILE);
586            let content = fs::read_to_string(&lib_path).ok();
587            if let Some(content) = content {
588                files.push((
589                    lib_relative,
590                    truncate_content(&content, SOURCE_FILE_MAX_CHARS),
591                ));
592            }
593
594            // Collect module files (mod.rs in subdirectories)
595            let src_path = project_root.join(SRC_DIR);
596            if src_path.exists() {
597                for entry in fs::read_dir(&src_path)?.flatten() {
598                    let name = entry.file_name().to_string_lossy().into_owned();
599                    if entry.file_type().map(|t| t.is_dir()).unwrap_or(false)
600                        && !should_ignore(&name)
601                    {
602                        let mod_path = src_path.join(&name).join(RUST_MOD_FILE);
603                        if mod_path.exists() {
604                            let content = fs::read_to_string(&mod_path).ok();
605                            if let Some(content) = content {
606                                let mod_relative =
607                                    format!("{}/{}/{}", SRC_DIR, name, RUST_MOD_FILE);
608                                files.push((
609                                    mod_relative,
610                                    truncate_content(&content, MODULE_FILE_MAX_CHARS),
611                                ));
612                            }
613                        }
614                    }
615                }
616            }
617        }
618    }
619
620    Ok(files)
621}
622
623/// Truncate content to a maximum length, respecting char boundaries.
624pub fn truncate_content(content: &str, max_len: usize) -> String {
625    if content.len() <= max_len {
626        content.to_string()
627    } else {
628        let end = find_boundary(content, max_len);
629        let mut truncated = content[..end].to_string();
630        truncated.push_str("\n... (truncated)");
631        truncated
632    }
633}
634
635/// Extract content from AI response.
636#[allow(dead_code)]
637fn extract_response_content(response: &crate::providers::ChatResponse) -> String {
638    let mut content = String::new();
639    for block in &response.content {
640        if let crate::providers::ContentBlock::Text { text } = block {
641            content.push_str(text);
642        }
643    }
644    content
645}
646
647#[cfg(test)]
648mod tests {
649    use super::*;
650
651    #[test]
652    fn truncate_content_respects_char_boundary() {
653        // Chinese text with multibyte characters
654        let text = "这是一个包含中文字符的测试文本,用于验证截断功能是否正确处理字符边界问题。";
655
656        // Truncate at a position that would fall inside a multibyte character
657        let truncated = truncate_content(text, 50);
658
659        // Should not panic and should end with truncated marker
660        assert!(truncated.contains("... (truncated)"));
661        // String in Rust is always valid UTF-8, no need to check
662    }
663
664    #[test]
665    fn truncate_content_preserves_short_text() {
666        let short = "hello world";
667        let result = truncate_content(short, 100);
668        assert_eq!(result, short);
669    }
670
671    #[test]
672    fn truncate_content_exact_boundary() {
673        // ASCII text - every byte is a char boundary
674        let text = "abcdefghijklmnopqrstuvwxyz";
675        let truncated = truncate_content(text, 10);
676        assert_eq!(truncated, "abcdefghij\n... (truncated)");
677    }
678
679    #[test]
680    fn truncate_content_multibyte_edge() {
681        // Text ending exactly at a multibyte char
682        let text = "你好世界hello";
683        let truncated = truncate_content(text, 12); // "你好世界" = 12 bytes
684        assert!(truncated.starts_with("你好世界"));
685    }
686}