Skip to main content

matrixcode_core/
overview.rs

1//! Project overview generation and caching.
2//!
3//! The `/init` command generates a project overview file using AI analysis.
4//! The overview captures the project architecture, key patterns, and development guidance.
5//!
6//! The overview file is stored at `MATRIX.md` in the project root.
7
8use crate::prompt::{OverviewContext, build_overview_prompt};
9use crate::providers::{ChatRequest, Message, MessageContent, Provider, Role};
10use crate::truncate::find_boundary;
11use anyhow::{Context, Result};
12use std::fs;
13use std::path::{Path, PathBuf};
14
15// =============================================================================
16// Configuration Constants
17// =============================================================================
18
19/// Default filename for the cached project overview.
20pub const OVERVIEW_FILENAME: &str = "MATRIX.md";
21/// Directory name for matrixcode metadata.
22pub const MATRIXCODE_DIR: &str = ".matrix";
23
24// --- Token and content limits ---
25
26/// Maximum output tokens for AI overview generation.
27const MAX_OUTPUT_TOKENS: u32 = 8192;
28
29/// Maximum characters for config file content.
30const CONFIG_FILE_MAX_CHARS: usize = 2000;
31
32/// Maximum characters for README content.
33const README_MAX_CHARS: usize = 1000;
34
35/// Maximum characters for key source file content.
36const SOURCE_FILE_MAX_CHARS: usize = 3000;
37
38/// Maximum characters for module file content.
39const MODULE_FILE_MAX_CHARS: usize = 2000;
40
41// --- Directory structure limits ---
42
43/// Maximum depth for directory tree traversal.
44const DIRECTORY_MAX_DEPTH: usize = 3;
45
46/// Maximum items to show at root level.
47const DIRECTORY_ROOT_MAX_ITEMS: usize = 15;
48
49/// Maximum items to show at non-root levels.
50const DIRECTORY_OTHER_MAX_ITEMS: usize = 10;
51
52// --- Common file names ---
53
54/// Default project name when root directory name cannot be determined.
55const DEFAULT_PROJECT_NAME: &str = "project";
56
57/// README filename to look for.
58const README_FILENAME: &str = "README.md";
59
60/// Source directory name for many project types.
61pub const SRC_DIR: &str = "src";
62
63/// Rust module file name.
64const RUST_MOD_FILE: &str = "mod.rs";
65
66/// Rust library entry file.
67const RUST_LIB_FILE: &str = "lib.rs";
68
69// --- Project type configuration ---
70
71/// Configuration for a project type, including detection and key files.
72pub struct ProjectTypeConfig {
73    /// Human-readable type name.
74    pub type_name: &'static str,
75    /// Files whose presence indicates this project type (checked in order).
76    pub detect_files: &'static [&'static str],
77    /// Key source file paths relative to project root.
78    pub key_source_files: &'static [&'static str],
79}
80
81/// All supported project type configurations.
82pub const PROJECT_TYPE_CONFIGS: &[ProjectTypeConfig] = &[
83    ProjectTypeConfig {
84        type_name: "Rust",
85        detect_files: &["Cargo.toml"],
86        key_source_files: &["src/main.rs", "src/agent.rs"],
87    },
88    ProjectTypeConfig {
89        type_name: "Go",
90        detect_files: &["go.mod"],
91        key_source_files: &["main.go", "cmd/main.go"],
92    },
93    ProjectTypeConfig {
94        type_name: "Node.js/TypeScript",
95        detect_files: &["package.json"],
96        key_source_files: &[
97            "src/index.ts",
98            "src/index.js",
99            "src/main.ts",
100            "src/main.js",
101            "src/app.ts",
102            "src/app.js",
103        ],
104    },
105    ProjectTypeConfig {
106        type_name: "Python",
107        detect_files: &["pyproject.toml", "requirements.txt"],
108        key_source_files: &["main.py", "app.py", "__init__.py"],
109    },
110    ProjectTypeConfig {
111        type_name: "Java (Maven)",
112        detect_files: &["pom.xml"],
113        key_source_files: &[],
114    },
115    ProjectTypeConfig {
116        type_name: "Java (Gradle)",
117        detect_files: &["build.gradle"],
118        key_source_files: &[],
119    },
120    ProjectTypeConfig {
121        type_name: "C/C++ (Make)",
122        detect_files: &["Makefile"],
123        key_source_files: &[],
124    },
125];
126
127/// Unknown project type name.
128const PROJECT_TYPE_UNKNOWN: &str = "Unknown";
129
130// --- Configuration file names to scan ---
131
132const CONFIG_FILENAMES: &[&str] = &[
133    "Cargo.toml",
134    "package.json",
135    "go.mod",
136    "pyproject.toml",
137    "requirements.txt",
138    "pom.xml",
139    "build.gradle",
140    "Makefile",
141    "docker-compose.yml",
142    "Dockerfile",
143    "tsconfig.json",
144    "vite.config.ts",
145    "vite.config.js",
146    "next.config.js",
147    "nuxt.config.ts",
148    "tailwind.config.js",
149    "tailwind.config.ts",
150    ".env.example",
151];
152
153/// Project overview containing the generated summary.
154#[derive(Debug, Clone)]
155pub struct ProjectOverview {
156    /// The rendered markdown content.
157    pub content: String,
158    /// Path to the overview file (for cache invalidation info).
159    pub path: PathBuf,
160}
161
162impl ProjectOverview {
163    /// Load the overview from the project root if it exists.
164    /// Returns `None` if the file doesn't exist.
165    pub fn load(project_root: &Path) -> Result<Option<Self>> {
166        let path = overview_path(project_root);
167        if !path.exists() {
168            return Ok(None);
169        }
170        let content = fs::read_to_string(&path)
171            .with_context(|| format!("reading overview file {}", path.display()))?;
172
173        // Limit to 200 lines to prevent excessively long content
174        let limited_content = content.lines().take(200).collect::<Vec<_>>().join("\n");
175
176        Ok(Some(Self {
177            content: limited_content,
178            path,
179        }))
180    }
181
182    /// Generate and save a new overview using AI analysis.
183    /// This method collects project files and sends them to the AI for analysis.
184    pub async fn generate_with_ai(project_root: &Path, provider: &dyn Provider) -> Result<Self> {
185        let project_name = project_root
186            .file_name()
187            .and_then(|n| n.to_str())
188            .unwrap_or(DEFAULT_PROJECT_NAME);
189
190        // Collect project context
191        let context = collect_project_context(project_root)?;
192
193        // Build the AI prompt
194        let prompt = build_overview_prompt(&OverviewContext {
195            project_name: project_name.to_string(),
196            project_type: context.project_type.to_string(),
197            directory_structure: context.directory_structure.clone(),
198            config_files: context.config_files.clone(),
199            readme: context.readme.clone(),
200            source_files: context.source_files.clone(),
201        });
202
203        // Call AI API
204        let request = ChatRequest {
205            messages: vec![Message {
206                role: Role::User,
207                content: MessageContent::Text(prompt),
208            }],
209            tools: vec![],
210            system: None,
211            think: false,
212            max_tokens: MAX_OUTPUT_TOKENS,
213            server_tools: vec![],
214            enable_caching: false, // No caching for overview generation
215        };
216
217        let model_name = provider.model_name();
218        log::info!(
219            "Overview generation: sending request to AI (model: {}, max_tokens: {})",
220            model_name,
221            MAX_OUTPUT_TOKENS
222        );
223
224        // Use streaming API (same as Agent) for better compatibility with DashScope
225        let mut rx = provider
226            .chat_stream(request)
227            .await
228            .map_err(|e| {
229                log::error!("Overview generation failed to start stream: {}", e);
230                e
231            })
232            .with_context(|| {
233                format!(
234                    "starting AI stream for overview generation (model: {})",
235                    model_name
236                )
237            })?;
238
239        // Collect streaming response
240        let mut content = String::new();
241        let mut input_tokens: u32;
242        let mut output_tokens: u32;
243
244        while let Some(event) = rx.recv().await {
245            match event {
246                crate::providers::StreamEvent::FirstByte => {
247                    log::debug!("Overview generation: received first byte");
248                }
249                crate::providers::StreamEvent::TextDelta { 0: delta } => {
250                    content.push_str(&delta);
251                }
252                crate::providers::StreamEvent::ThinkingDelta { 0: thinking } => {
253                    log::debug!("Overview thinking chunk: {} chars", thinking.len());
254                }
255                crate::providers::StreamEvent::ToolUseStart { id, name } => {
256                    log::debug!("Overview tool use start: {} ({})", name, id);
257                }
258                crate::providers::StreamEvent::ToolInputDelta { bytes_so_far } => {
259                    log::debug!("Overview tool input delta: {} bytes", bytes_so_far);
260                }
261                crate::providers::StreamEvent::ToolInputComplete { .. } => {
262                    // Overview generation does not execute tools; ignore complete tool inputs.
263                }
264                crate::providers::StreamEvent::Usage { output_tokens: _ } => {
265                    // Usage events are sent during streaming, but we only need the final
266                    // token counts from the Done event
267                }
268                crate::providers::StreamEvent::Done(response) => {
269                    input_tokens = response.usage.input_tokens;
270                    output_tokens = response.usage.output_tokens;
271                    log::info!(
272                        "Overview generation complete: input_tokens={}, output_tokens={}",
273                        input_tokens,
274                        output_tokens
275                    );
276                    // Use final content from response if our accumulated content is empty
277                    if content.is_empty() {
278                        for block in &response.content {
279                            if let crate::providers::ContentBlock::Text { text } = block {
280                                content.push_str(text);
281                            }
282                        }
283                    }
284                }
285                crate::providers::StreamEvent::Error { 0: msg } => {
286                    log::error!("Overview stream error: {}", msg);
287                    return Err(anyhow::anyhow!("Stream error: {}", msg));
288                }
289            }
290        }
291
292        if content.is_empty() {
293            return Err(anyhow::anyhow!(
294                "Overview generation returned empty content"
295            ));
296        }
297
298        // Save to file
299        let path = overview_path(project_root);
300        fs::write(&path, &content)
301            .with_context(|| format!("writing overview file {}", path.display()))?;
302
303        Ok(Self { content, path })
304    }
305
306    /// Delete the overview file if it exists.
307    pub fn clear(project_root: &Path) -> Result<()> {
308        let path = overview_path(project_root);
309        if path.exists() {
310            fs::remove_file(&path)
311                .with_context(|| format!("removing overview file {}", path.display()))?;
312        }
313        Ok(())
314    }
315
316    /// Check if an overview exists for the project.
317    pub fn exists(project_root: &Path) -> bool {
318        overview_path(project_root).exists()
319    }
320
321    /// Get the path to the overview file.
322    pub fn path(project_root: &Path) -> PathBuf {
323        overview_path(project_root)
324    }
325}
326
327/// Get the path to the overview file (directly in project root).
328fn overview_path(project_root: &Path) -> PathBuf {
329    project_root.join(OVERVIEW_FILENAME)
330}
331
332/// Patterns to ignore when scanning the project.
333const IGNORE_PATTERNS: &[&str] = &[
334    // Version control
335    ".git",
336    ".svn",
337    ".hg",
338    // Dependencies
339    "node_modules",
340    "vendor",
341    // Build outputs
342    "target",
343    "target-test",
344    "build",
345    "dist",
346    "out",
347    "bin",
348    "obj",
349    ".cargo",
350    // IDE and editor
351    ".idea",
352    ".vscode",
353    ".vs",
354    ".claude",
355    ".matrix",
356    // Cache and temp
357    ".cache",
358    "__pycache__",
359    "*.pyc",
360    ".DS_Store",
361    "Thumbs.db",
362    // Lock files (usually large and not informative)
363    "Cargo.lock",
364    "package-lock.json",
365    "yarn.lock",
366    "pnpm-lock.yaml",
367    // Generated files
368    "*.generated.*",
369    "swagger.json",
370    "swagger.yaml",
371];
372
373/// Check if a path component should be ignored.
374pub fn should_ignore(name: &str) -> bool {
375    if IGNORE_PATTERNS.contains(&name) {
376        return true;
377    }
378    for pattern in IGNORE_PATTERNS {
379        if pattern.starts_with("*.") {
380            let suffix = &pattern[1..];
381            if name.ends_with(suffix) {
382                return true;
383            }
384        }
385    }
386    false
387}
388
389/// Project context collected for AI analysis.
390struct ProjectContext {
391    /// Configuration file contents (Cargo.toml, package.json, etc.)
392    config_files: Vec<(String, String)>,
393    /// README content (first part)
394    readme: Option<String>,
395    /// Directory structure summary
396    directory_structure: String,
397    /// Key source files content (limited)
398    source_files: Vec<(String, String)>,
399    /// Project type detected
400    project_type: &'static str,
401}
402
403/// Collect project context for AI analysis.
404fn collect_project_context(project_root: &Path) -> Result<ProjectContext> {
405    // Detect project type
406    let project_type = detect_project_type(project_root);
407
408    // Collect config files
409    let config_files = collect_config_files(project_root)?;
410
411    // Get README
412    let readme = read_readme(project_root)?;
413
414    // Build directory structure
415    let directory_structure = build_directory_structure(project_root)?;
416
417    // Collect key source files
418    let source_files = collect_key_source_files(project_root, project_type)?;
419
420    Ok(ProjectContext {
421        config_files,
422        readme,
423        directory_structure,
424        source_files,
425        project_type,
426    })
427}
428
429/// Detect project type from configuration files.
430pub fn detect_project_type(project_root: &Path) -> &'static str {
431    for config in PROJECT_TYPE_CONFIGS {
432        for detect_file in config.detect_files {
433            if project_root.join(detect_file).exists() {
434                return config.type_name;
435            }
436        }
437    }
438    PROJECT_TYPE_UNKNOWN
439}
440
441/// Collect configuration files content.
442fn collect_config_files(project_root: &Path) -> Result<Vec<(String, String)>> {
443    let mut files = Vec::new();
444    for filename in CONFIG_FILENAMES {
445        let path = project_root.join(filename);
446        if path.exists() {
447            let content =
448                fs::read_to_string(&path).with_context(|| format!("reading {}", filename))?;
449            let truncated = truncate_content(&content, CONFIG_FILE_MAX_CHARS);
450            files.push((filename.to_string(), truncated));
451        }
452    }
453
454    Ok(files)
455}
456
457/// Read README.md (first part).
458fn read_readme(project_root: &Path) -> Result<Option<String>> {
459    let readme_path = project_root.join(README_FILENAME);
460    if !readme_path.exists() {
461        return Ok(None);
462    }
463
464    let content =
465        fs::read_to_string(&readme_path).with_context(|| format!("reading {}", README_FILENAME))?;
466
467    Ok(Some(truncate_content(&content, README_MAX_CHARS)))
468}
469
470/// Build directory structure string.
471fn build_directory_structure(project_root: &Path) -> Result<String> {
472    let mut result = String::new();
473    result.push_str(&format!(
474        "{}/\n",
475        project_root
476            .file_name()
477            .and_then(|n| n.to_str())
478            .unwrap_or(DEFAULT_PROJECT_NAME)
479    ));
480
481    build_tree_recursive(project_root, 0, DIRECTORY_MAX_DEPTH, &mut result)?;
482
483    Ok(result)
484}
485
486/// Build directory tree recursively.
487fn build_tree_recursive(
488    dir: &Path,
489    depth: usize,
490    max_depth: usize,
491    result: &mut String,
492) -> Result<()> {
493    if depth > max_depth {
494        result.push_str(&format!("{}  ...\n", "  ".repeat(depth)));
495        return Ok(());
496    }
497
498    let entries = match fs::read_dir(dir) {
499        Ok(e) => e,
500        Err(_) => return Ok(()),
501    };
502
503    let mut dirs: Vec<String> = Vec::new();
504    let mut files: Vec<String> = Vec::new();
505
506    for entry in entries.flatten() {
507        let name = entry.file_name().to_string_lossy().into_owned();
508        if should_ignore(&name) {
509            continue;
510        }
511        if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
512            dirs.push(name);
513        } else {
514            files.push(name);
515        }
516    }
517
518    dirs.sort();
519    files.sort();
520
521    let indent = "  ".repeat(depth);
522    let max_items = if depth == 0 {
523        DIRECTORY_ROOT_MAX_ITEMS
524    } else {
525        DIRECTORY_OTHER_MAX_ITEMS
526    };
527
528    let mut count = 0;
529    for d in &dirs {
530        if count >= max_items {
531            result.push_str(&format!(
532                "{}  ... ({} more dirs)\n",
533                indent,
534                dirs.len() - count
535            ));
536            break;
537        }
538        result.push_str(&format!("{}  {}/\n", indent, d));
539        build_tree_recursive(&dir.join(d), depth + 1, max_depth, result)?;
540        count += 1;
541    }
542
543    for f in files.iter().take(max_items - count) {
544        result.push_str(&format!("{}  {}\n", indent, f));
545    }
546
547    if files.len() > max_items - count {
548        result.push_str(&format!(
549            "{}  ... ({} more files)\n",
550            indent,
551            files.len() - (max_items - count)
552        ));
553    }
554
555    Ok(())
556}
557
558/// Collect key source files for analysis.
559fn collect_key_source_files(
560    project_root: &Path,
561    project_type: &str,
562) -> Result<Vec<(String, String)>> {
563    let mut files = Vec::new();
564
565    // Find the matching project type config
566    let config = PROJECT_TYPE_CONFIGS
567        .iter()
568        .find(|c| c.type_name == project_type);
569
570    // Collect key source files from config
571    if let Some(config) = config {
572        for path_str in config.key_source_files {
573            let path = project_root.join(path_str);
574            if path.exists() {
575                let content = fs::read_to_string(&path).ok();
576                if let Some(content) = content {
577                    files.push((
578                        path_str.to_string(),
579                        truncate_content(&content, SOURCE_FILE_MAX_CHARS),
580                    ));
581                }
582            }
583        }
584    }
585
586    // Special handling for Rust: collect lib.rs and module files
587    if project_type == "Rust" {
588        // Collect lib.rs
589        let lib_path = project_root.join(SRC_DIR).join(RUST_LIB_FILE);
590        if lib_path.exists() {
591            let lib_relative = format!("{}/{}", SRC_DIR, RUST_LIB_FILE);
592            let content = fs::read_to_string(&lib_path).ok();
593            if let Some(content) = content {
594                files.push((
595                    lib_relative,
596                    truncate_content(&content, SOURCE_FILE_MAX_CHARS),
597                ));
598            }
599
600            // Collect module files (mod.rs in subdirectories)
601            let src_path = project_root.join(SRC_DIR);
602            if src_path.exists() {
603                for entry in fs::read_dir(&src_path)?.flatten() {
604                    let name = entry.file_name().to_string_lossy().into_owned();
605                    if entry.file_type().map(|t| t.is_dir()).unwrap_or(false)
606                        && !should_ignore(&name)
607                    {
608                        let mod_path = src_path.join(&name).join(RUST_MOD_FILE);
609                        if mod_path.exists() {
610                            let content = fs::read_to_string(&mod_path).ok();
611                            if let Some(content) = content {
612                                let mod_relative =
613                                    format!("{}/{}/{}", SRC_DIR, name, RUST_MOD_FILE);
614                                files.push((
615                                    mod_relative,
616                                    truncate_content(&content, MODULE_FILE_MAX_CHARS),
617                                ));
618                            }
619                        }
620                    }
621                }
622            }
623        }
624    }
625
626    Ok(files)
627}
628
629/// Truncate content to a maximum length, respecting char boundaries.
630pub fn truncate_content(content: &str, max_len: usize) -> String {
631    if content.len() <= max_len {
632        content.to_string()
633    } else {
634        let end = find_boundary(content, max_len);
635        let mut truncated = content[..end].to_string();
636        truncated.push_str("\n... (truncated)");
637        truncated
638    }
639}
640
641/// Extract content from AI response.
642#[allow(dead_code)]
643fn extract_response_content(response: &crate::providers::ChatResponse) -> String {
644    let mut content = String::new();
645    for block in &response.content {
646        if let crate::providers::ContentBlock::Text { text } = block {
647            content.push_str(text);
648        }
649    }
650    content
651}
652
653#[cfg(test)]
654mod tests {
655    use super::*;
656
657    #[test]
658    fn truncate_content_respects_char_boundary() {
659        // Chinese text with multibyte characters
660        let text = "这是一个包含中文字符的测试文本,用于验证截断功能是否正确处理字符边界问题。";
661
662        // Truncate at a position that would fall inside a multibyte character
663        let truncated = truncate_content(text, 50);
664
665        // Should not panic and should end with truncated marker
666        assert!(truncated.contains("... (truncated)"));
667        // String in Rust is always valid UTF-8, no need to check
668    }
669
670    #[test]
671    fn truncate_content_preserves_short_text() {
672        let short = "hello world";
673        let result = truncate_content(short, 100);
674        assert_eq!(result, short);
675    }
676
677    #[test]
678    fn truncate_content_exact_boundary() {
679        // ASCII text - every byte is a char boundary
680        let text = "abcdefghijklmnopqrstuvwxyz";
681        let truncated = truncate_content(text, 10);
682        assert_eq!(truncated, "abcdefghij\n... (truncated)");
683    }
684
685    #[test]
686    fn truncate_content_multibyte_edge() {
687        // Text ending exactly at a multibyte char
688        let text = "你好世界hello";
689        let truncated = truncate_content(text, 12); // "你好世界" = 12 bytes
690        assert!(truncated.starts_with("你好世界"));
691    }
692}