acp/index/
indexer.rs

1//! @acp:module "Indexer"
2//! @acp:summary "Codebase indexing and cache generation (schema-compliant, RFC-003 provenance, RFC-006 bridging)"
3//! @acp:domain cli
4//! @acp:layer service
5//!
6//! Walks the codebase and builds the cache/vars files.
7//! Uses tree-sitter AST parsing for symbol extraction and git2 for metadata.
8//! Supports RFC-0003 annotation provenance tracking.
9//! Supports RFC-0006 documentation system bridging.
10
11use std::collections::HashMap;
12use std::fs;
13use std::path::Path;
14use std::sync::Arc;
15
16use chrono::{DateTime, Utc};
17use glob::Pattern;
18use rayon::prelude::*;
19use walkdir::WalkDir;
20
21use crate::annotate::converters::{
22    DocStandardParser, DocstringParser, GodocParser, JavadocParser, JsDocParser,
23    ParsedDocumentation, RustdocParser,
24};
25use crate::ast::{AstParser, ExtractedSymbol, SymbolKind, Visibility as AstVisibility};
26use crate::bridge::merger::AcpAnnotations;
27use crate::bridge::{BridgeConfig, BridgeMerger, FormatDetector};
28use crate::cache::{
29    AnnotationProvenance, BridgeMetadata, BridgeSource, BridgeStats, BridgeSummary, Cache,
30    CacheBuilder, DomainEntry, Language, LowConfidenceEntry, ProvenanceStats, SourceFormat,
31    SymbolEntry, SymbolType, Visibility,
32};
33use crate::config::Config;
34use crate::constraints::{
35    ConstraintIndex, Constraints, HackMarker, HackType, LockLevel, MutationConstraint,
36};
37use crate::error::Result;
38use crate::git::{BlameInfo, FileHistory, GitFileInfo, GitRepository, GitSymbolInfo};
39use crate::parse::{AnnotationWithProvenance, Parser, SourceOrigin};
40use crate::vars::{VarEntry, VarsFile};
41
42/// @acp:summary "Codebase indexer with parallel file processing"
43/// Uses tree-sitter AST parsing for accurate symbol extraction and git2 for metadata.
44/// Supports RFC-0006 documentation bridging.
45pub struct Indexer {
46    config: Config,
47    parser: Arc<Parser>,
48    ast_parser: Arc<AstParser>,
49    /// RFC-0006: Format detector for native documentation
50    format_detector: Arc<FormatDetector>,
51    /// RFC-0006: Merger for native docs with ACP annotations
52    bridge_merger: Arc<BridgeMerger>,
53}
54
55impl Indexer {
56    pub fn new(config: Config) -> Result<Self> {
57        // RFC-0006: Initialize bridge components
58        let format_detector = FormatDetector::new(&config.bridge);
59        let bridge_merger = BridgeMerger::new(&config.bridge);
60
61        Ok(Self {
62            config,
63            parser: Arc::new(Parser::new()),
64            ast_parser: Arc::new(AstParser::new()?),
65            format_detector: Arc::new(format_detector),
66            bridge_merger: Arc::new(bridge_merger),
67        })
68    }
69
70    /// @acp:summary "Index the codebase and generate cache"
71    /// @acp:ai-careful "This processes many files in parallel"
72    pub async fn index<P: AsRef<Path>>(&self, root: P) -> Result<Cache> {
73        let root = root.as_ref();
74        let project_name = root
75            .file_name()
76            .map(|n| n.to_string_lossy().to_string())
77            .unwrap_or_else(|| "project".to_string());
78
79        let mut builder = CacheBuilder::new(&project_name, &root.to_string_lossy());
80
81        // Try to open git repository for metadata
82        let git_repo = GitRepository::open(root).ok();
83
84        // Set git commit if available
85        if let Some(ref repo) = git_repo {
86            if let Ok(commit) = repo.head_commit() {
87                builder = builder.set_git_commit(commit);
88            }
89        }
90
91        // Find all matching files
92        let files = self.find_files(root)?;
93
94        // Add source_files with modification times
95        for file_path in &files {
96            if let Ok(metadata) = fs::metadata(file_path) {
97                if let Ok(modified) = metadata.modified() {
98                    let modified_dt: DateTime<Utc> = modified.into();
99                    let relative_path = Path::new(file_path)
100                        .strip_prefix(root)
101                        .map(|p| p.to_string_lossy().to_string())
102                        .unwrap_or_else(|_| file_path.clone());
103                    builder = builder.add_source_file(relative_path, modified_dt);
104                }
105            }
106        }
107
108        // Parse files in parallel using rayon
109        // Uses annotation parser as primary for metadata, AST parser for accurate symbols
110        let ast_parser = Arc::clone(&self.ast_parser);
111        let annotation_parser = Arc::clone(&self.parser);
112        let root_path = root.to_path_buf();
113
114        // RFC-0003: Get review threshold from config
115        let review_threshold = self.config.annotate.provenance.review_threshold;
116
117        // RFC-0006: Clone bridge components for parallel access
118        let format_detector = Arc::clone(&self.format_detector);
119        let bridge_merger = Arc::clone(&self.bridge_merger);
120        let bridge_enabled = self.config.bridge.enabled;
121
122        let mut results: Vec<_> = files
123            .par_iter()
124            .filter_map(|path| {
125                // Parse with annotation parser (metadata, domains, etc.)
126                let mut parse_result = annotation_parser.parse(path).ok()?;
127
128                // Try AST parsing for accurate symbol extraction
129                if let Ok(source) = std::fs::read_to_string(path) {
130                    // RFC-0003: Parse annotations with provenance support
131                    let annotations_with_prov =
132                        annotation_parser.parse_annotations_with_provenance(&source);
133                    let file_provenance =
134                        extract_provenance(&annotations_with_prov, review_threshold);
135
136                    // Add provenance to file entry
137                    parse_result.file.annotations = file_provenance;
138
139                    // RFC-0006: Detect documentation format and populate bridge metadata
140                    if bridge_enabled {
141                        let language = language_name_from_enum(parse_result.file.language);
142                        let detected_format = format_detector.detect(&source, language);
143
144                        // Initialize bridge metadata
145                        parse_result.file.bridge = BridgeMetadata {
146                            enabled: true,
147                            detected_format,
148                            converted_count: 0,
149                            merged_count: 0,
150                            explicit_count: 0,
151                        };
152
153                        // Count explicit ACP annotations
154                        let explicit_count = parse_result
155                            .file
156                            .annotations
157                            .values()
158                            .filter(|p| matches!(p.source, SourceOrigin::Explicit))
159                            .count() as u64;
160                        parse_result.file.bridge.explicit_count = explicit_count;
161
162                        // Count converted annotations (from provenance tracking)
163                        let converted_count = parse_result
164                            .file
165                            .annotations
166                            .values()
167                            .filter(|p| matches!(p.source, SourceOrigin::Converted))
168                            .count() as u64;
169                        parse_result.file.bridge.converted_count = converted_count;
170                    }
171
172                    if let Ok(ast_symbols) = ast_parser.parse_file(Path::new(path), &source) {
173                        // Convert AST symbols to cache symbols and merge
174                        let relative_path = Path::new(path)
175                            .strip_prefix(&root_path)
176                            .map(|p| p.to_string_lossy().to_string())
177                            .unwrap_or_else(|_| path.clone());
178
179                        let converted = convert_ast_symbols(&ast_symbols, &relative_path);
180
181                        // Merge: prefer AST symbols but keep annotation metadata
182                        if !converted.is_empty() {
183                            // Keep summaries from annotation parser
184                            let annotation_summaries: HashMap<_, _> = parse_result
185                                .symbols
186                                .iter()
187                                .filter_map(|s| {
188                                    s.summary.as_ref().map(|sum| (s.name.clone(), sum.clone()))
189                                })
190                                .collect();
191
192                            parse_result.symbols = converted;
193
194                            // Restore summaries from annotations
195                            for symbol in &mut parse_result.symbols {
196                                if symbol.summary.is_none() {
197                                    if let Some(sum) = annotation_summaries.get(&symbol.name) {
198                                        symbol.summary = Some(sum.clone());
199                                    }
200                                }
201                            }
202
203                            // RFC-0006: Apply bridge merging for symbols with doc comments
204                            if bridge_enabled {
205                                if let Some(ref detected_format) =
206                                    parse_result.file.bridge.detected_format
207                                {
208                                    // Build map of AST symbols by name for doc_comment lookup
209                                    let ast_doc_comments: HashMap<_, _> = ast_symbols
210                                        .iter()
211                                        .filter_map(|s| {
212                                            s.doc_comment
213                                                .as_ref()
214                                                .map(|doc| (s.name.clone(), doc.clone()))
215                                        })
216                                        .collect();
217
218                                    let mut merged_count = 0u64;
219                                    for symbol in &mut parse_result.symbols {
220                                        if let Some(doc_comment) =
221                                            ast_doc_comments.get(&symbol.name)
222                                        {
223                                            // Parse native documentation
224                                            let native_docs =
225                                                parse_native_docs(doc_comment, detected_format);
226
227                                            // Extract ACP annotations from doc comment
228                                            let acp_annotations = extract_acp_annotations(
229                                                doc_comment,
230                                                &annotation_parser,
231                                            );
232
233                                            // Merge using bridge merger
234                                            let bridge_result = bridge_merger.merge(
235                                                native_docs.as_ref(),
236                                                *detected_format,
237                                                &acp_annotations,
238                                            );
239
240                                            // Update symbol with merged data
241                                            if bridge_result.summary.is_some() {
242                                                symbol.summary = bridge_result.summary;
243                                            }
244                                            if bridge_result.directive.is_some() {
245                                                symbol.purpose = bridge_result.directive;
246                                            }
247
248                                            // Track merged count
249                                            if matches!(bridge_result.source, BridgeSource::Merged)
250                                            {
251                                                merged_count += 1;
252                                            }
253                                        }
254                                    }
255                                    parse_result.file.bridge.merged_count = merged_count;
256                                }
257                            }
258                        }
259
260                        // Extract calls from AST
261                        if let Ok(calls) = ast_parser.parse_calls(Path::new(path), &source) {
262                            for call in calls {
263                                if !call.caller.is_empty() {
264                                    parse_result
265                                        .calls
266                                        .push((call.caller.clone(), vec![call.callee.clone()]));
267                                }
268                            }
269                        }
270                    }
271                }
272
273                Some(parse_result)
274            })
275            .collect();
276
277        // Add git metadata sequentially (git2::Repository is not Sync)
278        if let Some(ref repo) = git_repo {
279            for parse_result in &mut results {
280                let file_path = &parse_result.file.path;
281                // Strip "./" prefix if present - git expects paths like "src/lib.rs" not "./src/lib.rs"
282                let clean_path = file_path.strip_prefix("./").unwrap_or(file_path);
283                let relative_path = Path::new(clean_path);
284
285                // Add git metadata for the file (only if we have valid git history)
286                if let Ok(history) = FileHistory::for_file(repo, relative_path, 100) {
287                    if let Some(latest) = history.latest() {
288                        // Only set git info if we have actual commit data
289                        parse_result.file.git = Some(GitFileInfo {
290                            last_commit: latest.commit.clone(),
291                            last_author: latest.author.clone(),
292                            last_modified: latest.timestamp,
293                            commit_count: history.commit_count(),
294                            contributors: history.contributors(),
295                        });
296                    }
297                }
298
299                // Add git metadata for symbols using blame
300                if let Ok(blame) = BlameInfo::for_file(repo, relative_path) {
301                    for symbol in &mut parse_result.symbols {
302                        if let Some(line_blame) =
303                            blame.last_modified(symbol.lines[0], symbol.lines[1])
304                        {
305                            let age_days =
306                                (Utc::now() - line_blame.timestamp).num_days().max(0) as u32;
307                            symbol.git = Some(GitSymbolInfo {
308                                last_commit: line_blame.commit.clone(),
309                                last_author: line_blame.author.clone(),
310                                code_age_days: age_days,
311                            });
312                        }
313                    }
314                }
315            }
316        }
317
318        // Build cache from results
319        let mut domains: std::collections::HashMap<String, Vec<String>> =
320            std::collections::HashMap::new();
321        let mut constraint_index = ConstraintIndex::default();
322
323        for result in &results {
324            // Add file
325            builder = builder.add_file(result.file.clone());
326
327            // Add symbols
328            for symbol in &result.symbols {
329                builder = builder.add_symbol(symbol.clone());
330            }
331
332            // Add call edges
333            for (from, to) in &result.calls {
334                builder = builder.add_call_edge(from, to.clone());
335            }
336
337            // Track domains
338            for domain in &result.file.domains {
339                domains
340                    .entry(domain.clone())
341                    .or_default()
342                    .push(result.file.path.clone());
343            }
344
345            // Build constraints from parse result (RFC-001 compliant)
346            if result.lock_level.is_some() || !result.ai_hints.is_empty() {
347                let lock_level = result
348                    .lock_level
349                    .as_ref()
350                    .map(|l| match l.to_lowercase().as_str() {
351                        "frozen" => LockLevel::Frozen,
352                        "restricted" => LockLevel::Restricted,
353                        "approval-required" => LockLevel::ApprovalRequired,
354                        "tests-required" => LockLevel::TestsRequired,
355                        "docs-required" => LockLevel::DocsRequired,
356                        "experimental" => LockLevel::Experimental,
357                        _ => LockLevel::Normal,
358                    })
359                    .unwrap_or(LockLevel::Normal);
360
361                let constraints = Constraints {
362                    mutation: Some(MutationConstraint {
363                        level: lock_level,
364                        reason: None,
365                        contact: None,
366                        requires_approval: matches!(lock_level, LockLevel::ApprovalRequired),
367                        requires_tests: matches!(lock_level, LockLevel::TestsRequired),
368                        requires_docs: matches!(lock_level, LockLevel::DocsRequired),
369                        max_lines_changed: None,
370                        allowed_operations: None,
371                        forbidden_operations: None,
372                    }),
373                    // RFC-001: Include directive from lock annotation
374                    directive: result.lock_directive.clone(),
375                    auto_generated: result.lock_directive.is_none(),
376                    ..Default::default()
377                };
378                constraint_index
379                    .by_file
380                    .insert(result.file.path.clone(), constraints);
381
382                // Track by lock level
383                let level_str = format!("{:?}", lock_level).to_lowercase();
384                constraint_index
385                    .by_lock_level
386                    .entry(level_str)
387                    .or_default()
388                    .push(result.file.path.clone());
389            }
390
391            // Build hack markers
392            for hack in &result.hacks {
393                let hack_marker = HackMarker {
394                    id: format!("{}:{}", result.file.path, hack.line),
395                    hack_type: HackType::Workaround,
396                    file: result.file.path.clone(),
397                    line: Some(hack.line),
398                    created_at: Utc::now(),
399                    author: None,
400                    reason: hack
401                        .reason
402                        .clone()
403                        .unwrap_or_else(|| "Temporary hack".to_string()),
404                    ticket: hack.ticket.clone(),
405                    expires: hack.expires.as_ref().and_then(|e| {
406                        chrono::NaiveDate::parse_from_str(e, "%Y-%m-%d")
407                            .ok()
408                            .map(|d| d.and_hms_opt(0, 0, 0).unwrap())
409                            .map(|dt| DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc))
410                    }),
411                    original_code: None,
412                    revert_instructions: None,
413                };
414                constraint_index.hacks.push(hack_marker);
415            }
416        }
417
418        // Add domains to cache
419        for (name, files) in domains {
420            builder = builder.add_domain(DomainEntry {
421                name: name.clone(),
422                files: files.clone(),
423                symbols: vec![],
424                description: None,
425            });
426        }
427
428        // Add constraints if any were found
429        if !constraint_index.by_file.is_empty() || !constraint_index.hacks.is_empty() {
430            builder = builder.set_constraints(constraint_index);
431        }
432
433        // Build the cache
434        let mut cache = builder.build();
435
436        // RFC-0015: Compute reverse import graph (imported_by)
437        compute_import_graph(&mut cache);
438
439        // RFC-0003: Compute provenance statistics
440        let low_conf_threshold = 0.5; // TODO: Read from config when available
441        cache.provenance = compute_provenance_stats(&cache, low_conf_threshold);
442
443        // RFC-0006: Compute bridge statistics
444        cache.bridge = compute_bridge_stats(&cache, &self.config.bridge);
445
446        Ok(cache)
447    }
448
449    /// @acp:summary "Find all files matching include/exclude patterns"
450    fn find_files<P: AsRef<Path>>(&self, root: P) -> Result<Vec<String>> {
451        let root = root.as_ref();
452        let include_patterns: Vec<_> = self
453            .config
454            .include
455            .iter()
456            .filter_map(|p| Pattern::new(p).ok())
457            .collect();
458
459        let exclude_patterns: Vec<_> = self
460            .config
461            .exclude
462            .iter()
463            .filter_map(|p| Pattern::new(p).ok())
464            .collect();
465
466        let files: Vec<String> = WalkDir::new(root)
467            .into_iter()
468            .filter_map(|e| e.ok())
469            .filter(|e| e.file_type().is_file())
470            .filter_map(|e| {
471                // Get path relative to root for pattern matching
472                let full_path = e.path().to_string_lossy().to_string();
473                let relative_path = e
474                    .path()
475                    .strip_prefix(root)
476                    .map(|p| p.to_string_lossy().to_string())
477                    .unwrap_or_else(|_| full_path.clone());
478
479                // Must match at least one include pattern
480                let match_opts = glob::MatchOptions {
481                    case_sensitive: true,
482                    require_literal_separator: false,
483                    require_literal_leading_dot: false,
484                };
485                let included = include_patterns.is_empty()
486                    || include_patterns
487                        .iter()
488                        .any(|p| p.matches_with(&relative_path, match_opts));
489                // Must not match any exclude pattern
490                let excluded = exclude_patterns
491                    .iter()
492                    .any(|p| p.matches_with(&relative_path, match_opts));
493
494                if included && !excluded {
495                    Some(full_path)
496                } else {
497                    None
498                }
499            })
500            .collect();
501
502        Ok(files)
503    }
504
505    /// @acp:summary "Generate vars file from cache (schema-compliant)"
506    pub fn generate_vars(&self, cache: &Cache) -> VarsFile {
507        let mut vars_file = VarsFile::new();
508
509        // Build a map of symbol names to var names for ref resolution
510        let mut symbol_to_var: std::collections::HashMap<String, String> =
511            std::collections::HashMap::new();
512        for (name, symbol) in &cache.symbols {
513            if symbol.exported {
514                let var_name = format!("SYM_{}", name.to_uppercase().replace('.', "_"));
515                symbol_to_var.insert(name.clone(), var_name);
516            }
517        }
518
519        // Generate symbol vars with refs from call graph
520        for (name, symbol) in &cache.symbols {
521            if symbol.exported {
522                let var_name = format!("SYM_{}", name.to_uppercase().replace('.', "_"));
523
524                // Build refs from symbols this one calls
525                let refs: Vec<String> = symbol
526                    .calls
527                    .iter()
528                    .filter_map(|callee| symbol_to_var.get(callee).cloned())
529                    .collect();
530
531                let entry = VarEntry {
532                    var_type: crate::vars::VarType::Symbol,
533                    value: symbol.qualified_name.clone(),
534                    description: symbol.summary.clone(),
535                    refs,
536                    source: Some(symbol.file.clone()),
537                    lines: Some(symbol.lines),
538                };
539
540                vars_file.add_variable(var_name, entry);
541            }
542        }
543
544        // Generate domain vars
545        for (name, domain) in &cache.domains {
546            let var_name = format!("DOM_{}", name.to_uppercase().replace('-', "_"));
547            vars_file.add_variable(
548                var_name,
549                VarEntry::domain(
550                    name.clone(),
551                    Some(format!("Domain: {} ({} files)", name, domain.files.len())),
552                ),
553            );
554        }
555
556        // Generate file vars for important files
557        for (path, file) in &cache.files {
558            // Only generate vars for files with modules or summaries
559            if file.module.is_some() || file.summary.is_some() {
560                let var_name = format!("FILE_{}", path.replace(['/', '.'], "_").to_uppercase());
561                vars_file.add_variable(
562                    var_name,
563                    VarEntry::file(
564                        path.clone(),
565                        file.summary.clone().or_else(|| file.module.clone()),
566                    ),
567                );
568            }
569        }
570
571        // Generate layer vars from unique layers
572        let mut layers: std::collections::HashSet<String> = std::collections::HashSet::new();
573        for file in cache.files.values() {
574            if let Some(layer) = &file.layer {
575                layers.insert(layer.clone());
576            }
577        }
578        for layer in layers {
579            let var_name = format!("LAYER_{}", layer.to_uppercase().replace('-', "_"));
580            let file_count = cache
581                .files
582                .values()
583                .filter(|f| f.layer.as_ref() == Some(&layer))
584                .count();
585            vars_file.add_variable(
586                var_name,
587                VarEntry::layer(
588                    layer.clone(),
589                    Some(format!("Layer: {} ({} files)", layer, file_count)),
590                ),
591            );
592        }
593
594        vars_file
595    }
596}
597
598/// Detect language from file extension
599pub fn detect_language(path: &str) -> Option<Language> {
600    let path = Path::new(path);
601    let ext = path.extension()?.to_str()?;
602
603    match ext.to_lowercase().as_str() {
604        "ts" | "tsx" => Some(Language::Typescript),
605        "js" | "jsx" | "mjs" | "cjs" => Some(Language::Javascript),
606        "py" | "pyw" => Some(Language::Python),
607        "rs" => Some(Language::Rust),
608        "go" => Some(Language::Go),
609        "java" => Some(Language::Java),
610        "cs" => Some(Language::CSharp),
611        "cpp" | "cxx" | "cc" | "hpp" | "hxx" => Some(Language::Cpp),
612        "c" | "h" => Some(Language::C),
613        "rb" => Some(Language::Ruby),
614        "php" => Some(Language::Php),
615        "swift" => Some(Language::Swift),
616        "kt" | "kts" => Some(Language::Kotlin),
617        _ => None,
618    }
619}
620
621/// Convert AST-extracted symbols to cache SymbolEntry format
622fn convert_ast_symbols(ast_symbols: &[ExtractedSymbol], file_path: &str) -> Vec<SymbolEntry> {
623    ast_symbols
624        .iter()
625        .map(|sym| {
626            let symbol_type = match sym.kind {
627                SymbolKind::Function => SymbolType::Function,
628                SymbolKind::Method => SymbolType::Method,
629                SymbolKind::Class => SymbolType::Class,
630                SymbolKind::Struct => SymbolType::Struct,
631                SymbolKind::Interface => SymbolType::Interface,
632                SymbolKind::Trait => SymbolType::Trait,
633                SymbolKind::Enum => SymbolType::Enum,
634                SymbolKind::EnumVariant => SymbolType::Enum,
635                SymbolKind::Constant => SymbolType::Const,
636                SymbolKind::Variable => SymbolType::Const,
637                SymbolKind::TypeAlias => SymbolType::Type,
638                SymbolKind::Module => SymbolType::Function, // No direct mapping
639                SymbolKind::Namespace => SymbolType::Function, // No direct mapping
640                SymbolKind::Property => SymbolType::Function, // No direct mapping
641                SymbolKind::Field => SymbolType::Function,  // No direct mapping
642                SymbolKind::Impl => SymbolType::Class,      // Map impl to class
643            };
644
645            let visibility = match sym.visibility {
646                AstVisibility::Public => Visibility::Public,
647                AstVisibility::Private => Visibility::Private,
648                AstVisibility::Protected => Visibility::Protected,
649                AstVisibility::Internal | AstVisibility::Crate => Visibility::Private,
650            };
651
652            let qualified_name = sym
653                .qualified_name
654                .clone()
655                .unwrap_or_else(|| format!("{}:{}", file_path, sym.name));
656
657            SymbolEntry {
658                name: sym.name.clone(),
659                qualified_name,
660                symbol_type,
661                file: file_path.to_string(),
662                lines: [sym.start_line, sym.end_line],
663                exported: matches!(sym.visibility, AstVisibility::Public),
664                signature: sym.signature.clone(),
665                summary: sym.doc_comment.clone(),
666                purpose: None, // RFC-001: Populated from @acp:fn/@acp:class annotations
667                constraints: None, // RFC-001: Populated from symbol-level constraints
668                async_fn: sym.is_async,
669                visibility,
670                calls: vec![],               // Populated separately from call graph
671                called_by: vec![],           // Populated by graph builder
672                git: None,                   // Populated after symbol creation
673                annotations: HashMap::new(), // RFC-0003: Populated during indexing
674                // RFC-0009: Extended annotation types
675                behavioral: None,
676                lifecycle: None,
677                documentation: None,
678                performance: None,
679                // RFC-0008: Type annotation info
680                type_info: None,
681            }
682        })
683        .collect()
684}
685
686// ============================================================================
687// RFC-0003: Annotation Provenance Functions
688// ============================================================================
689
690/// Extract provenance data from parsed annotations (RFC-0003)
691///
692/// Converts AnnotationWithProvenance to AnnotationProvenance entries
693/// suitable for storage in the cache.
694fn extract_provenance(
695    annotations: &[AnnotationWithProvenance],
696    review_threshold: f64,
697) -> HashMap<String, AnnotationProvenance> {
698    let mut result = HashMap::new();
699
700    for ann in annotations {
701        // Skip provenance-only annotations (source, source-confidence, etc.)
702        if ann.annotation.name.starts_with("source") {
703            continue;
704        }
705
706        let key = format!("@acp:{}", ann.annotation.name);
707
708        let prov = if let Some(ref marker) = ann.provenance {
709            let needs_review = marker.confidence.is_some_and(|c| c < review_threshold);
710
711            AnnotationProvenance {
712                value: ann.annotation.value.clone().unwrap_or_default(),
713                source: marker.source,
714                confidence: marker.confidence,
715                needs_review,
716                reviewed: marker.reviewed.unwrap_or(false),
717                reviewed_at: None,
718                generated_at: Some(Utc::now().to_rfc3339()),
719                generation_id: marker.generation_id.clone(),
720            }
721        } else {
722            // No provenance markers = explicit annotation (human-written)
723            AnnotationProvenance {
724                value: ann.annotation.value.clone().unwrap_or_default(),
725                source: SourceOrigin::Explicit,
726                confidence: None,
727                needs_review: false,
728                reviewed: true, // Explicit annotations are considered reviewed
729                reviewed_at: None,
730                generated_at: None,
731                generation_id: None,
732            }
733        };
734
735        result.insert(key, prov);
736    }
737
738    result
739}
740
741/// Compute aggregate provenance statistics from cache (RFC-0003)
742///
743/// Aggregates provenance data from all files and symbols to produce
744/// summary statistics for the cache.
745fn compute_provenance_stats(cache: &Cache, low_conf_threshold: f64) -> ProvenanceStats {
746    let mut stats = ProvenanceStats::default();
747    let mut confidence_sums: HashMap<String, (f64, u64)> = HashMap::new();
748
749    // Process file annotations
750    for (path, file) in &cache.files {
751        for (key, prov) in &file.annotations {
752            update_provenance_stats(
753                &mut stats,
754                &mut confidence_sums,
755                key,
756                prov,
757                path,
758                low_conf_threshold,
759            );
760        }
761    }
762
763    // Process symbol annotations
764    for symbol in cache.symbols.values() {
765        for (key, prov) in &symbol.annotations {
766            let target = format!("{}:{}", symbol.file, symbol.name);
767            update_provenance_stats(
768                &mut stats,
769                &mut confidence_sums,
770                key,
771                prov,
772                &target,
773                low_conf_threshold,
774            );
775        }
776    }
777
778    // Calculate average confidence per source type
779    for (source, (sum, count)) in confidence_sums {
780        if count > 0 {
781            stats
782                .summary
783                .average_confidence
784                .insert(source, sum / count as f64);
785        }
786    }
787
788    // Sort low confidence entries by confidence (ascending)
789    stats.low_confidence.sort_by(|a, b| {
790        a.confidence
791            .partial_cmp(&b.confidence)
792            .unwrap_or(std::cmp::Ordering::Equal)
793    });
794
795    stats
796}
797
798/// Update provenance statistics with a single annotation's data
799fn update_provenance_stats(
800    stats: &mut ProvenanceStats,
801    confidence_sums: &mut HashMap<String, (f64, u64)>,
802    key: &str,
803    prov: &AnnotationProvenance,
804    target: &str,
805    low_conf_threshold: f64,
806) {
807    stats.summary.total += 1;
808
809    // Count by source type
810    match prov.source {
811        SourceOrigin::Explicit => stats.summary.by_source.explicit += 1,
812        SourceOrigin::Converted => stats.summary.by_source.converted += 1,
813        SourceOrigin::Heuristic => stats.summary.by_source.heuristic += 1,
814        SourceOrigin::Refined => stats.summary.by_source.refined += 1,
815        SourceOrigin::Inferred => stats.summary.by_source.inferred += 1,
816    }
817
818    // Count review status
819    if prov.needs_review {
820        stats.summary.needs_review += 1;
821    }
822    if prov.reviewed {
823        stats.summary.reviewed += 1;
824    }
825
826    // Track confidence for averaging
827    if let Some(conf) = prov.confidence {
828        let source_key = prov.source.as_str().to_string();
829        let entry = confidence_sums.entry(source_key).or_insert((0.0, 0));
830        entry.0 += conf;
831        entry.1 += 1;
832
833        // Track low confidence annotations
834        if conf < low_conf_threshold {
835            stats.low_confidence.push(LowConfidenceEntry {
836                target: target.to_string(),
837                annotation: key.to_string(),
838                confidence: conf,
839                value: prov.value.clone(),
840            });
841        }
842    }
843}
844
845// ============================================================================
846// RFC-0006: Documentation Bridging Functions
847// ============================================================================
848
849/// Convert Language enum to string for FormatDetector
850fn language_name_from_enum(lang: Language) -> &'static str {
851    match lang {
852        Language::Typescript => "typescript",
853        Language::Javascript => "javascript",
854        Language::Python => "python",
855        Language::Rust => "rust",
856        Language::Go => "go",
857        Language::Java => "java",
858        Language::CSharp => "csharp",
859        Language::Cpp => "cpp",
860        Language::C => "c",
861        Language::Ruby => "ruby",
862        Language::Php => "php",
863        Language::Swift => "swift",
864        Language::Kotlin => "kotlin",
865    }
866}
867
868/// Compute aggregate bridge statistics from cache (RFC-0006)
869///
870/// Aggregates bridging data from all files to produce summary statistics.
871fn compute_bridge_stats(cache: &Cache, config: &BridgeConfig) -> BridgeStats {
872    let mut stats = BridgeStats {
873        enabled: config.enabled,
874        precedence: config.precedence.to_string(),
875        summary: BridgeSummary::default(),
876        by_format: HashMap::new(),
877    };
878
879    if !config.enabled {
880        return stats;
881    }
882
883    // Aggregate from file bridge metadata
884    for file in cache.files.values() {
885        if !file.bridge.enabled {
886            continue;
887        }
888
889        stats.summary.explicit_count += file.bridge.explicit_count;
890        stats.summary.converted_count += file.bridge.converted_count;
891        stats.summary.merged_count += file.bridge.merged_count;
892
893        // Track by detected format
894        if let Some(format) = &file.bridge.detected_format {
895            let format_key = format_to_string(format);
896            let format_count = file.bridge.converted_count + file.bridge.merged_count;
897            if format_count > 0 {
898                *stats.by_format.entry(format_key).or_insert(0) += format_count;
899            }
900        }
901    }
902
903    stats.summary.total_annotations =
904        stats.summary.explicit_count + stats.summary.converted_count + stats.summary.merged_count;
905
906    stats
907}
908
909/// Convert SourceFormat to string key for by_format map
910fn format_to_string(format: &SourceFormat) -> String {
911    match format {
912        SourceFormat::Acp => "acp".to_string(),
913        SourceFormat::Jsdoc => "jsdoc".to_string(),
914        SourceFormat::DocstringGoogle => "docstring:google".to_string(),
915        SourceFormat::DocstringNumpy => "docstring:numpy".to_string(),
916        SourceFormat::DocstringSphinx => "docstring:sphinx".to_string(),
917        SourceFormat::Rustdoc => "rustdoc".to_string(),
918        SourceFormat::Javadoc => "javadoc".to_string(),
919        SourceFormat::Godoc => "godoc".to_string(),
920        SourceFormat::TypeHint => "type_hint".to_string(),
921    }
922}
923
924/// Parse native documentation from a doc comment based on detected format
925fn parse_native_docs(doc_comment: &str, format: &SourceFormat) -> Option<ParsedDocumentation> {
926    let parsed = match format {
927        SourceFormat::Jsdoc => JsDocParser::new().parse(doc_comment),
928        SourceFormat::DocstringGoogle
929        | SourceFormat::DocstringNumpy
930        | SourceFormat::DocstringSphinx => DocstringParser::new().parse(doc_comment),
931        SourceFormat::Rustdoc => RustdocParser::new().parse(doc_comment),
932        SourceFormat::Javadoc => JavadocParser::new().parse(doc_comment),
933        SourceFormat::Godoc => GodocParser::new().parse(doc_comment),
934        SourceFormat::Acp | SourceFormat::TypeHint => return None,
935    };
936
937    if parsed.is_empty() {
938        None
939    } else {
940        Some(parsed)
941    }
942}
943
944/// Extract ACP annotations from a doc comment and convert to AcpAnnotations
945fn extract_acp_annotations(doc_comment: &str, parser: &Parser) -> AcpAnnotations {
946    let annotations = parser.parse_annotations(doc_comment);
947
948    let mut result = AcpAnnotations::default();
949
950    for ann in annotations {
951        match ann.name.as_str() {
952            "summary" => {
953                if let Some(ref value) = ann.value {
954                    result.summary = Some(value.clone());
955                }
956            }
957            "fn" | "method" => {
958                // @acp:fn "summary" - directive
959                // The parser already extracts value and directive separately
960                if let Some(ref value) = ann.value {
961                    // Value might be the summary in quotes
962                    if let Some((summary, _)) = parse_fn_annotation(value) {
963                        if result.summary.is_none() {
964                            result.summary = Some(summary);
965                        }
966                    }
967                }
968                // Directive is already parsed by the Parser
969                if let Some(ref directive) = ann.directive {
970                    result.directive = Some(directive.clone());
971                }
972            }
973            "param" => {
974                // @acp:param {type} name - directive
975                // Extract name from value, directive is already parsed
976                if let Some(ref value) = ann.value {
977                    if let Some((name, _)) = parse_param_annotation(value) {
978                        let directive = ann.directive.clone().unwrap_or_default();
979                        result.params.push((name, directive));
980                    }
981                }
982            }
983            "returns" => {
984                // @acp:returns {type} - directive
985                // Directive is already parsed by the Parser
986                if let Some(ref directive) = ann.directive {
987                    result.returns = Some(directive.clone());
988                } else if let Some(ref value) = ann.value {
989                    // Fallback: try to extract directive from value
990                    if let Some(directive) = parse_returns_annotation(value) {
991                        result.returns = Some(directive);
992                    }
993                }
994            }
995            "throws" => {
996                // @acp:throws {exception} - directive
997                if let Some(ref value) = ann.value {
998                    // Extract exception type from value
999                    let exception = if value.starts_with('{') {
1000                        if let Some(close) = value.find('}') {
1001                            value[1..close].to_string()
1002                        } else {
1003                            value.clone()
1004                        }
1005                    } else {
1006                        value.split_whitespace().next().unwrap_or(value).to_string()
1007                    };
1008                    let directive = ann.directive.clone().unwrap_or_default();
1009                    result.throws.push((exception, directive));
1010                }
1011            }
1012            _ => {}
1013        }
1014    }
1015
1016    result
1017}
1018
1019/// Parse @acp:fn value into (summary, directive)
1020fn parse_fn_annotation(value: &str) -> Option<(String, String)> {
1021    // Format: "summary text" - directive text
1022    // or just: directive text
1023    if let Some(stripped) = value.strip_prefix('"') {
1024        if let Some(end_quote) = stripped.find('"') {
1025            let summary = stripped[..end_quote].to_string();
1026            let rest = &stripped[end_quote + 1..];
1027            let directive = rest.trim().trim_start_matches('-').trim().to_string();
1028            if !directive.is_empty() {
1029                return Some((summary, directive));
1030            }
1031        }
1032    }
1033    None
1034}
1035
1036/// Parse @acp:param value into (name, directive)
1037fn parse_param_annotation(value: &str) -> Option<(String, String)> {
1038    // Format: {type} name - directive  OR  name - directive
1039    let value = value.trim();
1040
1041    // Skip type annotation if present
1042    let rest = if value.starts_with('{') {
1043        if let Some(close) = value.find('}') {
1044            &value[close + 1..]
1045        } else {
1046            value
1047        }
1048    } else {
1049        value
1050    };
1051
1052    let rest = rest.trim();
1053
1054    // Handle optional params: [name] or [name=default]
1055    let (name, after_name) = if rest.starts_with('[') {
1056        if let Some(close) = rest.find(']') {
1057            let inner = &rest[1..close];
1058            let name = inner.split('=').next().unwrap_or(inner).trim();
1059            (name.to_string(), &rest[close + 1..])
1060        } else {
1061            return None;
1062        }
1063    } else {
1064        // Regular param: name - directive
1065        let parts: Vec<&str> = rest.splitn(2, char::is_whitespace).collect();
1066        if parts.is_empty() {
1067            return None;
1068        }
1069        let after = if parts.len() > 1 { parts[1] } else { "" };
1070        (parts[0].to_string(), after)
1071    };
1072
1073    // Extract directive after the dash
1074    let directive = after_name.trim().trim_start_matches('-').trim().to_string();
1075
1076    if name.is_empty() {
1077        None
1078    } else {
1079        Some((name, directive))
1080    }
1081}
1082
1083/// Parse @acp:returns value into directive
1084fn parse_returns_annotation(value: &str) -> Option<String> {
1085    // Format: {type} - directive  OR  - directive  OR  directive
1086    let value = value.trim();
1087
1088    // Skip type annotation if present
1089    let rest = if value.starts_with('{') {
1090        if let Some(close) = value.find('}') {
1091            &value[close + 1..]
1092        } else {
1093            value
1094        }
1095    } else {
1096        value
1097    };
1098
1099    let directive = rest.trim().trim_start_matches('-').trim().to_string();
1100
1101    if directive.is_empty() {
1102        None
1103    } else {
1104        Some(directive)
1105    }
1106}
1107
1108// ============================================================================
1109// RFC-0015: Import Graph Computation
1110// ============================================================================
1111
1112/// Compute reverse import graph for all files (RFC-0015)
1113///
1114/// For each file's imports, resolve the import path to a file in the cache
1115/// and add the importing file to that target's `imported_by` list.
1116fn compute_import_graph(cache: &mut Cache) {
1117    use std::path::Path;
1118
1119    // Collect all import relationships first (avoid borrow issues)
1120    let mut import_edges: Vec<(String, String)> = Vec::new();
1121
1122    // Get all file paths for lookup
1123    let file_paths: std::collections::HashSet<_> = cache.files.keys().cloned().collect();
1124
1125    for (importer_path, file) in &cache.files {
1126        let importer_dir = Path::new(importer_path)
1127            .parent()
1128            .map(|p| p.to_string_lossy().to_string())
1129            .unwrap_or_default();
1130
1131        for import_source in &file.imports {
1132            // Try to resolve the import to a file in the cache
1133            if let Some(resolved) = resolve_import_path(import_source, &importer_dir, &file_paths) {
1134                import_edges.push((importer_path.clone(), resolved));
1135            }
1136        }
1137    }
1138
1139    // Apply the reverse edges
1140    for (importer, imported) in import_edges {
1141        if let Some(file) = cache.files.get_mut(&imported) {
1142            if !file.imported_by.contains(&importer) {
1143                file.imported_by.push(importer);
1144            }
1145        }
1146    }
1147
1148    // Sort imported_by lists for consistent output
1149    for file in cache.files.values_mut() {
1150        file.imported_by.sort();
1151    }
1152}
1153
1154/// Resolve an import path to a file path in the cache
1155///
1156/// Handles:
1157/// - Relative imports: `./utils`, `../lib/helper`
1158/// - Index file resolution: `./utils` -> `./utils/index.ts`
1159/// - Extension resolution: `./utils` -> `./utils.ts`
1160fn resolve_import_path(
1161    import_source: &str,
1162    importer_dir: &str,
1163    file_paths: &std::collections::HashSet<String>,
1164) -> Option<String> {
1165    // Skip external packages (no path prefix)
1166    if !import_source.starts_with('.') && !import_source.starts_with('/') {
1167        return None;
1168    }
1169
1170    // Normalize the import path
1171    let normalized = if import_source.starts_with('.') {
1172        // Relative import - resolve against importer's directory
1173        let combined = if importer_dir.is_empty() {
1174            import_source.to_string()
1175        } else {
1176            format!("{}/{}", importer_dir, import_source)
1177        };
1178        crate::cache::normalize_path(&combined)
1179    } else {
1180        // Absolute import (starts with /)
1181        crate::cache::normalize_path(import_source)
1182    };
1183
1184    // Common extensions to try
1185    let extensions = [".ts", ".tsx", ".js", ".jsx", ".mjs", ".rs", ".py", ".go"];
1186
1187    // Try exact match first
1188    let with_prefix = format!("./{}", normalized);
1189    if file_paths.contains(&normalized) {
1190        return Some(normalized);
1191    }
1192    if file_paths.contains(&with_prefix) {
1193        return Some(with_prefix);
1194    }
1195
1196    // Try with various extensions
1197    for ext in &extensions {
1198        let with_ext = format!("{}{}", normalized, ext);
1199        let with_prefix_ext = format!("./{}{}", normalized, ext);
1200
1201        if file_paths.contains(&with_ext) {
1202            return Some(with_ext);
1203        }
1204        if file_paths.contains(&with_prefix_ext) {
1205            return Some(with_prefix_ext);
1206        }
1207    }
1208
1209    // Try index file resolution
1210    for ext in &extensions {
1211        let index_path = format!("{}/index{}", normalized, ext);
1212        let with_prefix_index = format!("./{}/index{}", normalized, ext);
1213
1214        if file_paths.contains(&index_path) {
1215            return Some(index_path);
1216        }
1217        if file_paths.contains(&with_prefix_index) {
1218            return Some(with_prefix_index);
1219        }
1220    }
1221
1222    None
1223}