acp/annotate/
analyzer.rs

1//! @acp:module "Annotation Analyzer"
2//! @acp:summary "Analyzes code to identify annotation gaps and existing coverage"
3//! @acp:domain cli
4//! @acp:layer service
5//! @acp:stability experimental
6//!
7//! # Annotation Analyzer
8//!
9//! Provides functionality for analyzing source files to:
10//! - Detect existing ACP annotations
11//! - Identify symbols lacking annotations
12//! - Calculate annotation coverage metrics
13//! - Extract doc comments for potential conversion
14
15use std::collections::{HashMap, HashSet};
16use std::path::{Path, PathBuf};
17
18use regex::Regex;
19use walkdir::WalkDir;
20
21use crate::ast::{AstParser, ExtractedSymbol, SymbolKind, Visibility};
22use crate::config::Config;
23use crate::error::Result;
24
25use super::{AnalysisResult, AnnotateLevel, AnnotationGap, AnnotationType, ExistingAnnotation};
26
27/// @acp:summary "Analyzes source files for ACP annotation coverage"
28/// @acp:lock normal
29pub struct Analyzer {
30    /// Configuration for analysis
31    config: Config,
32
33    /// AST parser for symbol extraction
34    ast_parser: AstParser,
35
36    /// Regex for detecting @acp: annotations
37    annotation_pattern: Regex,
38
39    /// Annotation level for gap detection
40    level: AnnotateLevel,
41}
42
43impl Analyzer {
44    /// @acp:summary "Creates a new analyzer with the given configuration"
45    pub fn new(config: &Config) -> Result<Self> {
46        let annotation_pattern =
47            Regex::new(r"@acp:([a-z][a-z0-9-]*)(?:\s+(.+))?$").expect("Invalid annotation regex");
48
49        Ok(Self {
50            config: config.clone(),
51            ast_parser: AstParser::new()?,
52            annotation_pattern,
53            level: AnnotateLevel::Standard,
54        })
55    }
56
57    /// @acp:summary "Sets the annotation level for gap detection"
58    pub fn with_level(mut self, level: AnnotateLevel) -> Self {
59        self.level = level;
60        self
61    }
62
63    /// @acp:summary "Discovers files to analyze based on configuration"
64    ///
65    /// Walks the directory tree and filters files based on include/exclude
66    /// patterns from the configuration.
67    pub fn discover_files(&self, root: &Path, filter: Option<&str>) -> Result<Vec<PathBuf>> {
68        let mut files = Vec::new();
69
70        for entry in WalkDir::new(root)
71            .follow_links(true)
72            .into_iter()
73            .filter_map(|e| e.ok())
74        {
75            let path = entry.path();
76
77            // Skip directories
78            if path.is_dir() {
79                continue;
80            }
81
82            // Check if file matches include patterns
83            let path_str = path.to_string_lossy();
84            let matches_include = self.config.include.iter().any(|pattern| {
85                glob::Pattern::new(pattern)
86                    .map(|p| p.matches(&path_str))
87                    .unwrap_or(false)
88            });
89
90            if !matches_include {
91                continue;
92            }
93
94            // Check if file matches exclude patterns
95            let matches_exclude = self.config.exclude.iter().any(|pattern| {
96                glob::Pattern::new(pattern)
97                    .map(|p| p.matches(&path_str))
98                    .unwrap_or(false)
99            });
100
101            if matches_exclude {
102                continue;
103            }
104
105            // Apply optional filter
106            if let Some(filter_pattern) = filter {
107                if let Ok(pattern) = glob::Pattern::new(filter_pattern) {
108                    if !pattern.matches(&path_str) {
109                        continue;
110                    }
111                }
112            }
113
114            files.push(path.to_path_buf());
115        }
116
117        Ok(files)
118    }
119
120    /// @acp:summary "Analyzes a single file for annotation coverage"
121    ///
122    /// Parses the file, extracts symbols and existing annotations,
123    /// and identifies gaps where annotations are missing.
124    pub fn analyze_file(&self, file_path: &Path) -> Result<AnalysisResult> {
125        let content = std::fs::read_to_string(file_path)?;
126        let path_str = file_path.to_string_lossy().to_string();
127
128        // Detect language from extension
129        let language = self.detect_language(file_path);
130
131        let mut result = AnalysisResult::new(&path_str, &language);
132
133        // Extract existing annotations from comments
134        result.existing_annotations = self.extract_existing_annotations(&content, &path_str);
135
136        // Parse AST and extract symbols
137        if let Ok(symbols) = self.ast_parser.parse_file(file_path, &content) {
138            // Associate annotations with their correct symbol targets
139            self.associate_annotations_with_symbols(&mut result.existing_annotations, &symbols);
140
141            // Build map of annotated targets -> annotation types they have
142            let annotated_types: HashMap<String, HashSet<AnnotationType>> = {
143                let mut map: HashMap<String, HashSet<AnnotationType>> = HashMap::new();
144                for ann in &result.existing_annotations {
145                    map.entry(ann.target.clone())
146                        .or_default()
147                        .insert(ann.annotation_type);
148                }
149                map
150            };
151
152            // Find gaps (symbols with missing annotation types)
153            for symbol in &symbols {
154                if self.should_annotate_symbol(symbol) {
155                    let target = symbol.qualified_name.as_ref().unwrap_or(&symbol.name);
156
157                    // Get existing annotation types for this target
158                    let existing_types = annotated_types.get(target).cloned().unwrap_or_default();
159
160                    // Determine which annotation types are missing
161                    let missing = self.get_missing_annotation_types(symbol, &existing_types);
162
163                    if !missing.is_empty() {
164                        // Use definition_start_line (before decorators/attributes) for insertion
165                        let insertion_line =
166                            symbol.definition_start_line.unwrap_or(symbol.start_line);
167
168                        let mut gap = AnnotationGap::new(target, symbol.start_line)
169                            .with_insertion_line(insertion_line)
170                            .with_symbol_kind(symbol.kind)
171                            .with_visibility(symbol.visibility);
172
173                        if symbol.exported {
174                            gap = gap.exported();
175                        }
176
177                        // Set doc comment with calculated line range
178                        if let Some(doc) = &symbol.doc_comment {
179                            // Try to find actual doc comment boundaries in source
180                            if let Some((start, end)) =
181                                self.find_doc_comment_range(&content, symbol.start_line)
182                            {
183                                gap = gap.with_doc_comment_range(doc, start, end);
184                            } else {
185                                // Fallback to calculated range
186                                let doc_line_count = doc.lines().count();
187                                if doc_line_count > 0 && symbol.start_line > doc_line_count {
188                                    let doc_end = symbol.start_line - 1;
189                                    let doc_start = doc_end.saturating_sub(doc_line_count - 1);
190                                    gap = gap.with_doc_comment_range(doc, doc_start, doc_end);
191                                } else {
192                                    gap = gap.with_doc_comment(doc);
193                                }
194                            }
195                        }
196
197                        gap.missing = missing;
198                        result.gaps.push(gap);
199                    }
200                }
201            }
202
203            // Check for file-level annotation gap
204            let file_existing_types = annotated_types.get(&path_str).cloned().unwrap_or_default();
205            let mut file_missing = Vec::new();
206
207            if !file_existing_types.contains(&AnnotationType::Module) {
208                file_missing.push(AnnotationType::Module);
209            }
210            if self.level.includes(AnnotationType::Summary)
211                && !file_existing_types.contains(&AnnotationType::Summary)
212            {
213                file_missing.push(AnnotationType::Summary);
214            }
215            if self.level.includes(AnnotationType::Domain)
216                && !file_existing_types.contains(&AnnotationType::Domain)
217            {
218                file_missing.push(AnnotationType::Domain);
219            }
220
221            if !file_missing.is_empty() {
222                let mut file_gap = AnnotationGap::new(&path_str, 1);
223                file_gap.missing = file_missing;
224                result.gaps.push(file_gap);
225            }
226        }
227
228        // Calculate coverage
229        result.calculate_coverage();
230
231        Ok(result)
232    }
233
234    /// @acp:summary "Detects the programming language from file extension"
235    fn detect_language(&self, path: &Path) -> String {
236        path.extension()
237            .and_then(|ext| ext.to_str())
238            .map(|ext| match ext {
239                "ts" | "tsx" => "typescript",
240                "js" | "jsx" | "mjs" | "cjs" => "javascript",
241                "py" | "pyi" => "python",
242                "rs" => "rust",
243                "go" => "go",
244                "java" => "java",
245                _ => "unknown",
246            })
247            .unwrap_or("unknown")
248            .to_string()
249    }
250
251    /// @acp:summary "Extracts existing @acp: annotations from file content"
252    fn extract_existing_annotations(
253        &self,
254        content: &str,
255        file_path: &str,
256    ) -> Vec<ExistingAnnotation> {
257        let mut annotations = Vec::new();
258        let current_target = file_path.to_string();
259
260        for (line_num, line) in content.lines().enumerate() {
261            let line_number = line_num + 1; // Convert to 1-indexed
262
263            // Check for @acp: annotation
264            if let Some(caps) = self.annotation_pattern.captures(line) {
265                let namespace = caps.get(1).map(|m| m.as_str()).unwrap_or("");
266                let value = caps.get(2).map(|m| m.as_str().trim()).unwrap_or("");
267
268                if let Some(annotation_type) = self.parse_annotation_type(namespace) {
269                    annotations.push(ExistingAnnotation {
270                        target: current_target.clone(),
271                        annotation_type,
272                        value: value.trim_matches('"').to_string(),
273                        line: line_number,
274                    });
275                }
276            }
277        }
278
279        annotations
280    }
281
282    /// @acp:summary "Associates annotations with their correct symbol targets"
283    ///
284    /// For each annotation, finds the symbol that immediately follows it
285    /// (within a reasonable line distance) and updates the annotation's target.
286    fn associate_annotations_with_symbols(
287        &self,
288        annotations: &mut [ExistingAnnotation],
289        symbols: &[ExtractedSymbol],
290    ) {
291        // Sort symbols by start line for efficient lookup
292        let mut sorted_symbols: Vec<&ExtractedSymbol> = symbols.iter().collect();
293        sorted_symbols.sort_by_key(|s| s.start_line);
294
295        for annotation in annotations.iter_mut() {
296            // Find the symbol that starts closest after this annotation
297            // (annotations appear in doc comments just before the symbol)
298            let annotation_line = annotation.line;
299
300            // Look for a symbol that starts within 20 lines after the annotation
301            // (doc comments can be multi-line)
302            let max_distance = 20;
303
304            if let Some(symbol) = sorted_symbols.iter().find(|s| {
305                s.start_line > annotation_line && s.start_line <= annotation_line + max_distance
306            }) {
307                // Update the target to the symbol's qualified name
308                annotation.target = symbol
309                    .qualified_name
310                    .clone()
311                    .unwrap_or_else(|| symbol.name.clone());
312            }
313            // If no symbol found, the annotation stays associated with the file path
314            // (module-level annotation)
315        }
316    }
317
318    /// @acp:summary "Parses an annotation namespace into an AnnotationType"
319    fn parse_annotation_type(&self, namespace: &str) -> Option<AnnotationType> {
320        match namespace {
321            "module" => Some(AnnotationType::Module),
322            "summary" => Some(AnnotationType::Summary),
323            "domain" => Some(AnnotationType::Domain),
324            "layer" => Some(AnnotationType::Layer),
325            "lock" => Some(AnnotationType::Lock),
326            "stability" => Some(AnnotationType::Stability),
327            "deprecated" => Some(AnnotationType::Deprecated),
328            "ai-hint" => Some(AnnotationType::AiHint),
329            "ref" => Some(AnnotationType::Ref),
330            "hack" => Some(AnnotationType::Hack),
331            "lock-reason" => Some(AnnotationType::LockReason),
332            _ => None,
333        }
334    }
335
336    /// @acp:summary "Determines if a symbol should be annotated"
337    fn should_annotate_symbol(&self, symbol: &ExtractedSymbol) -> bool {
338        // Skip private symbols unless they're important
339        match symbol.visibility {
340            Visibility::Private => false,
341            Visibility::Protected | Visibility::Internal | Visibility::Crate => {
342                // Include protected/internal if they're "important" kinds
343                matches!(
344                    symbol.kind,
345                    SymbolKind::Class
346                        | SymbolKind::Struct
347                        | SymbolKind::Interface
348                        | SymbolKind::Trait
349                )
350            }
351            Visibility::Public => true,
352        }
353    }
354
355    /// @acp:summary "Determines which annotation types are missing for a symbol"
356    fn get_missing_annotation_types(
357        &self,
358        symbol: &ExtractedSymbol,
359        existing_types: &HashSet<AnnotationType>,
360    ) -> Vec<AnnotationType> {
361        let mut missing = Vec::new();
362
363        // Check each annotation type at current level
364        for annotation_type in self.level.included_types() {
365            // Skip file-level only annotations for symbols
366            if matches!(annotation_type, AnnotationType::Module) {
367                continue;
368            }
369
370            // Check if this specific annotation type already exists
371            if !existing_types.contains(&annotation_type) {
372                missing.push(annotation_type);
373            }
374        }
375
376        // @acp:summary is always recommended for exported symbols
377        if symbol.exported
378            && !existing_types.contains(&AnnotationType::Summary)
379            && !missing.contains(&AnnotationType::Summary)
380        {
381            missing.insert(0, AnnotationType::Summary);
382        }
383
384        missing
385    }
386
387    /// @acp:summary "Finds the actual doc comment range by parsing source"
388    ///
389    /// Searches backward from the symbol line to find the JSDoc/doc comment
390    /// block boundaries (/** ... */). Returns (start_line, end_line) 1-indexed.
391    fn find_doc_comment_range(&self, content: &str, symbol_line: usize) -> Option<(usize, usize)> {
392        let lines: Vec<&str> = content.lines().collect();
393
394        // symbol_line is 1-indexed, convert to 0-indexed for array access
395        if symbol_line == 0 || symbol_line > lines.len() {
396            return None;
397        }
398
399        let mut end_line = None;
400        let mut start_line = None;
401
402        // Search backward from symbol (excluding the symbol line itself)
403        for i in (0..symbol_line.saturating_sub(1)).rev() {
404            let line = lines.get(i).map(|s| s.trim()).unwrap_or("");
405
406            // Found end of doc comment
407            if line.ends_with("*/") && end_line.is_none() {
408                end_line = Some(i + 1); // Convert back to 1-indexed
409            }
410
411            // Found start of doc comment
412            if line.starts_with("/**") || line == "/**" {
413                start_line = Some(i + 1); // Convert back to 1-indexed
414                break;
415            }
416
417            // If we haven't found end_line yet and hit non-comment/non-whitespace, stop
418            if end_line.is_none() {
419                // Allow: empty lines, decorator lines (@...), single-line comments
420                if !line.is_empty()
421                    && !line.starts_with("//")
422                    && !line.starts_with("@")
423                    && !line.starts_with("*")
424                {
425                    break;
426                }
427            }
428        }
429
430        match (start_line, end_line) {
431            (Some(s), Some(e)) if s <= e => Some((s, e)),
432            _ => None,
433        }
434    }
435
436    /// @acp:summary "Checks if a cache exists and has been initialized"
437    pub fn has_existing_cache(&self, root: &Path) -> bool {
438        let cache_path = root.join(".acp").join("acp.cache.json");
439        cache_path.exists()
440    }
441
442    /// @acp:summary "Calculates total coverage across multiple analysis results"
443    pub fn calculate_total_coverage(results: &[AnalysisResult]) -> f32 {
444        if results.is_empty() {
445            return 100.0;
446        }
447
448        let total_annotated: usize = results.iter().map(|r| r.existing_annotations.len()).sum();
449        let total_gaps: usize = results.iter().map(|r| r.gaps.len()).sum();
450        let total = total_annotated + total_gaps;
451
452        if total == 0 {
453            100.0
454        } else {
455            (total_annotated as f32 / total as f32) * 100.0
456        }
457    }
458}
459
460#[cfg(test)]
461mod tests {
462    use super::*;
463
464    #[test]
465    fn test_detect_language() {
466        let config = Config::default();
467        let analyzer = Analyzer::new(&config).unwrap();
468
469        assert_eq!(analyzer.detect_language(Path::new("test.ts")), "typescript");
470        assert_eq!(analyzer.detect_language(Path::new("test.py")), "python");
471        assert_eq!(analyzer.detect_language(Path::new("test.rs")), "rust");
472        assert_eq!(analyzer.detect_language(Path::new("test.txt")), "unknown");
473    }
474
475    #[test]
476    fn test_parse_annotation_type() {
477        let config = Config::default();
478        let analyzer = Analyzer::new(&config).unwrap();
479
480        assert_eq!(
481            analyzer.parse_annotation_type("summary"),
482            Some(AnnotationType::Summary)
483        );
484        assert_eq!(
485            analyzer.parse_annotation_type("domain"),
486            Some(AnnotationType::Domain)
487        );
488        assert_eq!(analyzer.parse_annotation_type("unknown"), None);
489    }
490
491    #[test]
492    fn test_calculate_total_coverage() {
493        let mut result1 = AnalysisResult::new("file1.ts", "typescript");
494        result1.existing_annotations.push(ExistingAnnotation {
495            target: "file1.ts".to_string(),
496            annotation_type: AnnotationType::Module,
497            value: "Test".to_string(),
498            line: 1,
499        });
500
501        let mut result2 = AnalysisResult::new("file2.ts", "typescript");
502        result2.gaps.push(AnnotationGap::new("MyClass", 10));
503
504        let coverage = Analyzer::calculate_total_coverage(&[result1, result2]);
505        assert!((coverage - 50.0).abs() < 0.01);
506    }
507
508    #[test]
509    fn test_doc_comment_range() {
510        // Test the with_doc_comment_range builder method
511        let gap = AnnotationGap::new("MyClass", 10).with_doc_comment_range(
512            "/// This is a doc comment\n/// Second line",
513            8,
514            9,
515        );
516
517        assert!(gap.doc_comment.is_some());
518        assert_eq!(gap.doc_comment_range, Some((8, 9)));
519        assert!(gap.doc_comment.unwrap().contains("This is a doc comment"));
520    }
521
522    #[test]
523    fn test_associate_annotations_with_symbols() {
524        use crate::ast::SymbolKind;
525
526        let config = Config::default();
527        let analyzer = Analyzer::new(&config).unwrap();
528
529        // Create mock annotations at lines that precede symbols
530        let mut annotations = vec![
531            ExistingAnnotation {
532                target: "file.rs".to_string(), // Initially assigned to file
533                annotation_type: AnnotationType::Summary,
534                value: "MyStruct summary".to_string(),
535                line: 28, // Annotation on line 28 (near symbol at 30)
536            },
537            ExistingAnnotation {
538                target: "file.rs".to_string(),
539                annotation_type: AnnotationType::Domain,
540                value: "core".to_string(),
541                line: 29, // Another annotation on line 29
542            },
543            ExistingAnnotation {
544                target: "file.rs".to_string(),
545                annotation_type: AnnotationType::Module,
546                value: "FileModule".to_string(),
547                line: 1, // Module annotation at top (>20 lines from any symbol)
548            },
549        ];
550
551        // Create mock symbols
552        let symbols = vec![ExtractedSymbol {
553            name: "MyStruct".to_string(),
554            qualified_name: Some("module::MyStruct".to_string()),
555            kind: SymbolKind::Struct,
556            visibility: Visibility::Public,
557            start_line: 30, // Symbol starts at line 30 (within 20 lines of annotations at 28-29)
558            end_line: 50,
559            start_col: 0,
560            end_col: 0,
561            signature: None,
562            doc_comment: None,
563            parent: None,
564            type_info: None,
565            parameters: vec![],
566            return_type: None,
567            exported: true,
568            is_async: false,
569            is_static: false,
570            generics: vec![],
571            definition_start_line: Some(30),
572        }];
573
574        analyzer.associate_annotations_with_symbols(&mut annotations, &symbols);
575
576        // Check that annotations on lines 28 and 29 were associated with MyStruct
577        assert_eq!(annotations[0].target, "module::MyStruct");
578        assert_eq!(annotations[1].target, "module::MyStruct");
579
580        // Module annotation at line 1 should stay as file target (symbol at 30 is >20 lines away)
581        assert_eq!(annotations[2].target, "file.rs");
582    }
583}