acp/index/
indexer.rs

1//! @acp:module "Indexer"
2//! @acp:summary "Codebase indexing and cache generation (schema-compliant, RFC-003 provenance, RFC-006 bridging)"
3//! @acp:domain cli
4//! @acp:layer service
5//!
6//! Walks the codebase and builds the cache/vars files.
7//! Uses tree-sitter AST parsing for symbol extraction and git2 for metadata.
8//! Supports RFC-0003 annotation provenance tracking.
9//! Supports RFC-0006 documentation system bridging.
10
11use std::collections::HashMap;
12use std::fs;
13use std::path::Path;
14use std::sync::Arc;
15
16use chrono::{DateTime, Utc};
17use glob::Pattern;
18use rayon::prelude::*;
19use walkdir::WalkDir;
20
21use crate::annotate::converters::{
22    DocStandardParser, DocstringParser, GodocParser, JavadocParser, JsDocParser,
23    ParsedDocumentation, RustdocParser,
24};
25use crate::ast::{AstParser, ExtractedSymbol, SymbolKind, Visibility as AstVisibility};
26use crate::bridge::merger::AcpAnnotations;
27use crate::bridge::{BridgeConfig, BridgeMerger, FormatDetector};
28use crate::cache::{
29    AnnotationProvenance, BridgeMetadata, BridgeSource, BridgeStats, BridgeSummary, Cache,
30    CacheBuilder, DomainEntry, Language, LowConfidenceEntry, ProvenanceStats, SourceFormat,
31    SymbolEntry, SymbolType, Visibility,
32};
33use crate::config::Config;
34use crate::constraints::{
35    ConstraintIndex, Constraints, HackMarker, HackType, LockLevel, MutationConstraint,
36};
37use crate::error::Result;
38use crate::git::{BlameInfo, FileHistory, GitFileInfo, GitRepository, GitSymbolInfo};
39use crate::parse::{AnnotationWithProvenance, Parser, SourceOrigin};
40use crate::vars::{VarEntry, VarsFile};
41
42/// @acp:summary "Codebase indexer with parallel file processing"
43/// Uses tree-sitter AST parsing for accurate symbol extraction and git2 for metadata.
44/// Supports RFC-0006 documentation bridging.
45pub struct Indexer {
46    config: Config,
47    parser: Arc<Parser>,
48    ast_parser: Arc<AstParser>,
49    /// RFC-0006: Format detector for native documentation
50    format_detector: Arc<FormatDetector>,
51    /// RFC-0006: Merger for native docs with ACP annotations
52    bridge_merger: Arc<BridgeMerger>,
53}
54
55impl Indexer {
56    pub fn new(config: Config) -> Result<Self> {
57        // RFC-0006: Initialize bridge components
58        let format_detector = FormatDetector::new(&config.bridge);
59        let bridge_merger = BridgeMerger::new(&config.bridge);
60
61        Ok(Self {
62            config,
63            parser: Arc::new(Parser::new()),
64            ast_parser: Arc::new(AstParser::new()?),
65            format_detector: Arc::new(format_detector),
66            bridge_merger: Arc::new(bridge_merger),
67        })
68    }
69
70    /// @acp:summary "Index the codebase and generate cache"
71    /// @acp:ai-careful "This processes many files in parallel"
72    pub async fn index<P: AsRef<Path>>(&self, root: P) -> Result<Cache> {
73        let root = root.as_ref();
74        let project_name = root
75            .file_name()
76            .map(|n| n.to_string_lossy().to_string())
77            .unwrap_or_else(|| "project".to_string());
78
79        let mut builder = CacheBuilder::new(&project_name, &root.to_string_lossy());
80
81        // Try to open git repository for metadata
82        let git_repo = GitRepository::open(root).ok();
83
84        // Set git commit if available
85        if let Some(ref repo) = git_repo {
86            if let Ok(commit) = repo.head_commit() {
87                builder = builder.set_git_commit(commit);
88            }
89        }
90
91        // Find all matching files
92        let files = self.find_files(root)?;
93
94        // Add source_files with modification times
95        for file_path in &files {
96            if let Ok(metadata) = fs::metadata(file_path) {
97                if let Ok(modified) = metadata.modified() {
98                    let modified_dt: DateTime<Utc> = modified.into();
99                    let relative_path = Path::new(file_path)
100                        .strip_prefix(root)
101                        .map(|p| p.to_string_lossy().to_string())
102                        .unwrap_or_else(|_| file_path.clone());
103                    builder = builder.add_source_file(relative_path, modified_dt);
104                }
105            }
106        }
107
108        // Parse files in parallel using rayon
109        // Uses annotation parser as primary for metadata, AST parser for accurate symbols
110        let ast_parser = Arc::clone(&self.ast_parser);
111        let annotation_parser = Arc::clone(&self.parser);
112        let root_path = root.to_path_buf();
113
114        // RFC-0003: Get review threshold from config
115        let review_threshold = self.config.annotate.provenance.review_threshold;
116
117        // RFC-0006: Clone bridge components for parallel access
118        let format_detector = Arc::clone(&self.format_detector);
119        let bridge_merger = Arc::clone(&self.bridge_merger);
120        let bridge_enabled = self.config.bridge.enabled;
121
122        let mut results: Vec<_> = files
123            .par_iter()
124            .filter_map(|path| {
125                // Parse with annotation parser (metadata, domains, etc.)
126                let mut parse_result = annotation_parser.parse(path).ok()?;
127
128                // Try AST parsing for accurate symbol extraction
129                if let Ok(source) = std::fs::read_to_string(path) {
130                    // RFC-0003: Parse annotations with provenance support
131                    let annotations_with_prov =
132                        annotation_parser.parse_annotations_with_provenance(&source);
133                    let file_provenance =
134                        extract_provenance(&annotations_with_prov, review_threshold);
135
136                    // Add provenance to file entry
137                    parse_result.file.annotations = file_provenance;
138
139                    // RFC-0006: Detect documentation format and populate bridge metadata
140                    if bridge_enabled {
141                        let language = language_name_from_enum(parse_result.file.language);
142                        let detected_format = format_detector.detect(&source, language);
143
144                        // Initialize bridge metadata
145                        parse_result.file.bridge = BridgeMetadata {
146                            enabled: true,
147                            detected_format,
148                            converted_count: 0,
149                            merged_count: 0,
150                            explicit_count: 0,
151                        };
152
153                        // Count explicit ACP annotations
154                        let explicit_count = parse_result
155                            .file
156                            .annotations
157                            .values()
158                            .filter(|p| matches!(p.source, SourceOrigin::Explicit))
159                            .count() as u64;
160                        parse_result.file.bridge.explicit_count = explicit_count;
161
162                        // Count converted annotations (from provenance tracking)
163                        let converted_count = parse_result
164                            .file
165                            .annotations
166                            .values()
167                            .filter(|p| matches!(p.source, SourceOrigin::Converted))
168                            .count() as u64;
169                        parse_result.file.bridge.converted_count = converted_count;
170                    }
171
172                    if let Ok(ast_symbols) = ast_parser.parse_file(Path::new(path), &source) {
173                        // Convert AST symbols to cache symbols and merge
174                        let relative_path = Path::new(path)
175                            .strip_prefix(&root_path)
176                            .map(|p| p.to_string_lossy().to_string())
177                            .unwrap_or_else(|_| path.clone());
178
179                        let converted = convert_ast_symbols(&ast_symbols, &relative_path);
180
181                        // Merge: prefer AST symbols but keep annotation metadata
182                        if !converted.is_empty() {
183                            // Keep summaries from annotation parser
184                            let annotation_summaries: HashMap<_, _> = parse_result
185                                .symbols
186                                .iter()
187                                .filter_map(|s| {
188                                    s.summary.as_ref().map(|sum| (s.name.clone(), sum.clone()))
189                                })
190                                .collect();
191
192                            parse_result.symbols = converted;
193
194                            // Restore summaries from annotations
195                            for symbol in &mut parse_result.symbols {
196                                if symbol.summary.is_none() {
197                                    if let Some(sum) = annotation_summaries.get(&symbol.name) {
198                                        symbol.summary = Some(sum.clone());
199                                    }
200                                }
201                            }
202
203                            // RFC-0006: Apply bridge merging for symbols with doc comments
204                            if bridge_enabled {
205                                if let Some(ref detected_format) =
206                                    parse_result.file.bridge.detected_format
207                                {
208                                    // Build map of AST symbols by name for doc_comment lookup
209                                    let ast_doc_comments: HashMap<_, _> = ast_symbols
210                                        .iter()
211                                        .filter_map(|s| {
212                                            s.doc_comment
213                                                .as_ref()
214                                                .map(|doc| (s.name.clone(), doc.clone()))
215                                        })
216                                        .collect();
217
218                                    let mut merged_count = 0u64;
219                                    for symbol in &mut parse_result.symbols {
220                                        if let Some(doc_comment) =
221                                            ast_doc_comments.get(&symbol.name)
222                                        {
223                                            // Parse native documentation
224                                            let native_docs =
225                                                parse_native_docs(doc_comment, detected_format);
226
227                                            // Extract ACP annotations from doc comment
228                                            let acp_annotations = extract_acp_annotations(
229                                                doc_comment,
230                                                &annotation_parser,
231                                            );
232
233                                            // Merge using bridge merger
234                                            let bridge_result = bridge_merger.merge(
235                                                native_docs.as_ref(),
236                                                *detected_format,
237                                                &acp_annotations,
238                                            );
239
240                                            // Update symbol with merged data
241                                            if bridge_result.summary.is_some() {
242                                                symbol.summary = bridge_result.summary;
243                                            }
244                                            if bridge_result.directive.is_some() {
245                                                symbol.purpose = bridge_result.directive;
246                                            }
247
248                                            // Track merged count
249                                            if matches!(bridge_result.source, BridgeSource::Merged)
250                                            {
251                                                merged_count += 1;
252                                            }
253                                        }
254                                    }
255                                    parse_result.file.bridge.merged_count = merged_count;
256                                }
257                            }
258                        }
259
260                        // Extract calls from AST
261                        if let Ok(calls) = ast_parser.parse_calls(Path::new(path), &source) {
262                            for call in calls {
263                                if !call.caller.is_empty() {
264                                    parse_result
265                                        .calls
266                                        .push((call.caller.clone(), vec![call.callee.clone()]));
267                                }
268                            }
269                        }
270                    }
271                }
272
273                Some(parse_result)
274            })
275            .collect();
276
277        // Add git metadata sequentially (git2::Repository is not Sync)
278        if let Some(ref repo) = git_repo {
279            for parse_result in &mut results {
280                let file_path = &parse_result.file.path;
281                // Strip "./" prefix if present - git expects paths like "src/lib.rs" not "./src/lib.rs"
282                let clean_path = file_path.strip_prefix("./").unwrap_or(file_path);
283                let relative_path = Path::new(clean_path);
284
285                // Add git metadata for the file (only if we have valid git history)
286                if let Ok(history) = FileHistory::for_file(repo, relative_path, 100) {
287                    if let Some(latest) = history.latest() {
288                        // Only set git info if we have actual commit data
289                        parse_result.file.git = Some(GitFileInfo {
290                            last_commit: latest.commit.clone(),
291                            last_author: latest.author.clone(),
292                            last_modified: latest.timestamp,
293                            commit_count: history.commit_count(),
294                            contributors: history.contributors(),
295                        });
296                    }
297                }
298
299                // Add git metadata for symbols using blame
300                if let Ok(blame) = BlameInfo::for_file(repo, relative_path) {
301                    for symbol in &mut parse_result.symbols {
302                        if let Some(line_blame) =
303                            blame.last_modified(symbol.lines[0], symbol.lines[1])
304                        {
305                            let age_days =
306                                (Utc::now() - line_blame.timestamp).num_days().max(0) as u32;
307                            symbol.git = Some(GitSymbolInfo {
308                                last_commit: line_blame.commit.clone(),
309                                last_author: line_blame.author.clone(),
310                                code_age_days: age_days,
311                            });
312                        }
313                    }
314                }
315            }
316        }
317
318        // Build cache from results
319        let mut domains: std::collections::HashMap<String, Vec<String>> =
320            std::collections::HashMap::new();
321        let mut constraint_index = ConstraintIndex::default();
322
323        for result in &results {
324            // Add file
325            builder = builder.add_file(result.file.clone());
326
327            // Add symbols
328            for symbol in &result.symbols {
329                builder = builder.add_symbol(symbol.clone());
330            }
331
332            // Add call edges
333            for (from, to) in &result.calls {
334                builder = builder.add_call_edge(from, to.clone());
335            }
336
337            // Track domains
338            for domain in &result.file.domains {
339                domains
340                    .entry(domain.clone())
341                    .or_default()
342                    .push(result.file.path.clone());
343            }
344
345            // Build constraints from parse result (RFC-001 compliant)
346            if result.lock_level.is_some() || !result.ai_hints.is_empty() {
347                let lock_level = result
348                    .lock_level
349                    .as_ref()
350                    .map(|l| match l.to_lowercase().as_str() {
351                        "frozen" => LockLevel::Frozen,
352                        "restricted" => LockLevel::Restricted,
353                        "approval-required" => LockLevel::ApprovalRequired,
354                        "tests-required" => LockLevel::TestsRequired,
355                        "docs-required" => LockLevel::DocsRequired,
356                        "experimental" => LockLevel::Experimental,
357                        _ => LockLevel::Normal,
358                    })
359                    .unwrap_or(LockLevel::Normal);
360
361                let constraints = Constraints {
362                    mutation: Some(MutationConstraint {
363                        level: lock_level,
364                        reason: None,
365                        contact: None,
366                        requires_approval: matches!(lock_level, LockLevel::ApprovalRequired),
367                        requires_tests: matches!(lock_level, LockLevel::TestsRequired),
368                        requires_docs: matches!(lock_level, LockLevel::DocsRequired),
369                        max_lines_changed: None,
370                        allowed_operations: None,
371                        forbidden_operations: None,
372                    }),
373                    // RFC-001: Include directive from lock annotation
374                    directive: result.lock_directive.clone(),
375                    auto_generated: result.lock_directive.is_none(),
376                    ..Default::default()
377                };
378                constraint_index
379                    .by_file
380                    .insert(result.file.path.clone(), constraints);
381
382                // Track by lock level
383                let level_str = format!("{:?}", lock_level).to_lowercase();
384                constraint_index
385                    .by_lock_level
386                    .entry(level_str)
387                    .or_default()
388                    .push(result.file.path.clone());
389            }
390
391            // Build hack markers
392            for hack in &result.hacks {
393                let hack_marker = HackMarker {
394                    id: format!("{}:{}", result.file.path, hack.line),
395                    hack_type: HackType::Workaround,
396                    file: result.file.path.clone(),
397                    line: Some(hack.line),
398                    created_at: Utc::now(),
399                    author: None,
400                    reason: hack
401                        .reason
402                        .clone()
403                        .unwrap_or_else(|| "Temporary hack".to_string()),
404                    ticket: hack.ticket.clone(),
405                    expires: hack.expires.as_ref().and_then(|e| {
406                        chrono::NaiveDate::parse_from_str(e, "%Y-%m-%d")
407                            .ok()
408                            .map(|d| d.and_hms_opt(0, 0, 0).unwrap())
409                            .map(|dt| DateTime::<Utc>::from_naive_utc_and_offset(dt, Utc))
410                    }),
411                    original_code: None,
412                    revert_instructions: None,
413                };
414                constraint_index.hacks.push(hack_marker);
415            }
416        }
417
418        // Add domains to cache
419        for (name, files) in domains {
420            builder = builder.add_domain(DomainEntry {
421                name: name.clone(),
422                files: files.clone(),
423                symbols: vec![],
424                description: None,
425            });
426        }
427
428        // Add constraints if any were found
429        if !constraint_index.by_file.is_empty() || !constraint_index.hacks.is_empty() {
430            builder = builder.set_constraints(constraint_index);
431        }
432
433        // Build the cache
434        let mut cache = builder.build();
435
436        // RFC-0003: Compute provenance statistics
437        let low_conf_threshold = 0.5; // TODO: Read from config when available
438        cache.provenance = compute_provenance_stats(&cache, low_conf_threshold);
439
440        // RFC-0006: Compute bridge statistics
441        cache.bridge = compute_bridge_stats(&cache, &self.config.bridge);
442
443        Ok(cache)
444    }
445
446    /// @acp:summary "Find all files matching include/exclude patterns"
447    fn find_files<P: AsRef<Path>>(&self, root: P) -> Result<Vec<String>> {
448        let root = root.as_ref();
449        let include_patterns: Vec<_> = self
450            .config
451            .include
452            .iter()
453            .filter_map(|p| Pattern::new(p).ok())
454            .collect();
455
456        let exclude_patterns: Vec<_> = self
457            .config
458            .exclude
459            .iter()
460            .filter_map(|p| Pattern::new(p).ok())
461            .collect();
462
463        let files: Vec<String> = WalkDir::new(root)
464            .into_iter()
465            .filter_map(|e| e.ok())
466            .filter(|e| e.file_type().is_file())
467            .filter_map(|e| {
468                // Get path relative to root for pattern matching
469                let full_path = e.path().to_string_lossy().to_string();
470                let relative_path = e
471                    .path()
472                    .strip_prefix(root)
473                    .map(|p| p.to_string_lossy().to_string())
474                    .unwrap_or_else(|_| full_path.clone());
475
476                // Must match at least one include pattern
477                let match_opts = glob::MatchOptions {
478                    case_sensitive: true,
479                    require_literal_separator: false,
480                    require_literal_leading_dot: false,
481                };
482                let included = include_patterns.is_empty()
483                    || include_patterns
484                        .iter()
485                        .any(|p| p.matches_with(&relative_path, match_opts));
486                // Must not match any exclude pattern
487                let excluded = exclude_patterns
488                    .iter()
489                    .any(|p| p.matches_with(&relative_path, match_opts));
490
491                if included && !excluded {
492                    Some(full_path)
493                } else {
494                    None
495                }
496            })
497            .collect();
498
499        Ok(files)
500    }
501
502    /// @acp:summary "Generate vars file from cache (schema-compliant)"
503    pub fn generate_vars(&self, cache: &Cache) -> VarsFile {
504        let mut vars_file = VarsFile::new();
505
506        // Build a map of symbol names to var names for ref resolution
507        let mut symbol_to_var: std::collections::HashMap<String, String> =
508            std::collections::HashMap::new();
509        for (name, symbol) in &cache.symbols {
510            if symbol.exported {
511                let var_name = format!("SYM_{}", name.to_uppercase().replace('.', "_"));
512                symbol_to_var.insert(name.clone(), var_name);
513            }
514        }
515
516        // Generate symbol vars with refs from call graph
517        for (name, symbol) in &cache.symbols {
518            if symbol.exported {
519                let var_name = format!("SYM_{}", name.to_uppercase().replace('.', "_"));
520
521                // Build refs from symbols this one calls
522                let refs: Vec<String> = symbol
523                    .calls
524                    .iter()
525                    .filter_map(|callee| symbol_to_var.get(callee).cloned())
526                    .collect();
527
528                let entry = VarEntry {
529                    var_type: crate::vars::VarType::Symbol,
530                    value: symbol.qualified_name.clone(),
531                    description: symbol.summary.clone(),
532                    refs,
533                    source: Some(symbol.file.clone()),
534                    lines: Some(symbol.lines),
535                };
536
537                vars_file.add_variable(var_name, entry);
538            }
539        }
540
541        // Generate domain vars
542        for (name, domain) in &cache.domains {
543            let var_name = format!("DOM_{}", name.to_uppercase().replace('-', "_"));
544            vars_file.add_variable(
545                var_name,
546                VarEntry::domain(
547                    name.clone(),
548                    Some(format!("Domain: {} ({} files)", name, domain.files.len())),
549                ),
550            );
551        }
552
553        // Generate file vars for important files
554        for (path, file) in &cache.files {
555            // Only generate vars for files with modules or summaries
556            if file.module.is_some() || file.summary.is_some() {
557                let var_name = format!("FILE_{}", path.replace(['/', '.'], "_").to_uppercase());
558                vars_file.add_variable(
559                    var_name,
560                    VarEntry::file(
561                        path.clone(),
562                        file.summary.clone().or_else(|| file.module.clone()),
563                    ),
564                );
565            }
566        }
567
568        // Generate layer vars from unique layers
569        let mut layers: std::collections::HashSet<String> = std::collections::HashSet::new();
570        for file in cache.files.values() {
571            if let Some(layer) = &file.layer {
572                layers.insert(layer.clone());
573            }
574        }
575        for layer in layers {
576            let var_name = format!("LAYER_{}", layer.to_uppercase().replace('-', "_"));
577            let file_count = cache
578                .files
579                .values()
580                .filter(|f| f.layer.as_ref() == Some(&layer))
581                .count();
582            vars_file.add_variable(
583                var_name,
584                VarEntry::layer(
585                    layer.clone(),
586                    Some(format!("Layer: {} ({} files)", layer, file_count)),
587                ),
588            );
589        }
590
591        vars_file
592    }
593}
594
595/// Detect language from file extension
596pub fn detect_language(path: &str) -> Option<Language> {
597    let path = Path::new(path);
598    let ext = path.extension()?.to_str()?;
599
600    match ext.to_lowercase().as_str() {
601        "ts" | "tsx" => Some(Language::Typescript),
602        "js" | "jsx" | "mjs" | "cjs" => Some(Language::Javascript),
603        "py" | "pyw" => Some(Language::Python),
604        "rs" => Some(Language::Rust),
605        "go" => Some(Language::Go),
606        "java" => Some(Language::Java),
607        "cs" => Some(Language::CSharp),
608        "cpp" | "cxx" | "cc" | "hpp" | "hxx" => Some(Language::Cpp),
609        "c" | "h" => Some(Language::C),
610        "rb" => Some(Language::Ruby),
611        "php" => Some(Language::Php),
612        "swift" => Some(Language::Swift),
613        "kt" | "kts" => Some(Language::Kotlin),
614        _ => None,
615    }
616}
617
618/// Convert AST-extracted symbols to cache SymbolEntry format
619fn convert_ast_symbols(ast_symbols: &[ExtractedSymbol], file_path: &str) -> Vec<SymbolEntry> {
620    ast_symbols
621        .iter()
622        .map(|sym| {
623            let symbol_type = match sym.kind {
624                SymbolKind::Function => SymbolType::Function,
625                SymbolKind::Method => SymbolType::Method,
626                SymbolKind::Class => SymbolType::Class,
627                SymbolKind::Struct => SymbolType::Struct,
628                SymbolKind::Interface => SymbolType::Interface,
629                SymbolKind::Trait => SymbolType::Trait,
630                SymbolKind::Enum => SymbolType::Enum,
631                SymbolKind::EnumVariant => SymbolType::Enum,
632                SymbolKind::Constant => SymbolType::Const,
633                SymbolKind::Variable => SymbolType::Const,
634                SymbolKind::TypeAlias => SymbolType::Type,
635                SymbolKind::Module => SymbolType::Function, // No direct mapping
636                SymbolKind::Namespace => SymbolType::Function, // No direct mapping
637                SymbolKind::Property => SymbolType::Function, // No direct mapping
638                SymbolKind::Field => SymbolType::Function,  // No direct mapping
639                SymbolKind::Impl => SymbolType::Class,      // Map impl to class
640            };
641
642            let visibility = match sym.visibility {
643                AstVisibility::Public => Visibility::Public,
644                AstVisibility::Private => Visibility::Private,
645                AstVisibility::Protected => Visibility::Protected,
646                AstVisibility::Internal | AstVisibility::Crate => Visibility::Private,
647            };
648
649            let qualified_name = sym
650                .qualified_name
651                .clone()
652                .unwrap_or_else(|| format!("{}:{}", file_path, sym.name));
653
654            SymbolEntry {
655                name: sym.name.clone(),
656                qualified_name,
657                symbol_type,
658                file: file_path.to_string(),
659                lines: [sym.start_line, sym.end_line],
660                exported: matches!(sym.visibility, AstVisibility::Public),
661                signature: sym.signature.clone(),
662                summary: sym.doc_comment.clone(),
663                purpose: None, // RFC-001: Populated from @acp:fn/@acp:class annotations
664                constraints: None, // RFC-001: Populated from symbol-level constraints
665                async_fn: sym.is_async,
666                visibility,
667                calls: vec![],               // Populated separately from call graph
668                called_by: vec![],           // Populated by graph builder
669                git: None,                   // Populated after symbol creation
670                annotations: HashMap::new(), // RFC-0003: Populated during indexing
671                // RFC-0009: Extended annotation types
672                behavioral: None,
673                lifecycle: None,
674                documentation: None,
675                performance: None,
676                // RFC-0008: Type annotation info
677                type_info: None,
678            }
679        })
680        .collect()
681}
682
683// ============================================================================
684// RFC-0003: Annotation Provenance Functions
685// ============================================================================
686
687/// Extract provenance data from parsed annotations (RFC-0003)
688///
689/// Converts AnnotationWithProvenance to AnnotationProvenance entries
690/// suitable for storage in the cache.
691fn extract_provenance(
692    annotations: &[AnnotationWithProvenance],
693    review_threshold: f64,
694) -> HashMap<String, AnnotationProvenance> {
695    let mut result = HashMap::new();
696
697    for ann in annotations {
698        // Skip provenance-only annotations (source, source-confidence, etc.)
699        if ann.annotation.name.starts_with("source") {
700            continue;
701        }
702
703        let key = format!("@acp:{}", ann.annotation.name);
704
705        let prov = if let Some(ref marker) = ann.provenance {
706            let needs_review = marker.confidence.is_some_and(|c| c < review_threshold);
707
708            AnnotationProvenance {
709                value: ann.annotation.value.clone().unwrap_or_default(),
710                source: marker.source,
711                confidence: marker.confidence,
712                needs_review,
713                reviewed: marker.reviewed.unwrap_or(false),
714                reviewed_at: None,
715                generated_at: Some(Utc::now().to_rfc3339()),
716                generation_id: marker.generation_id.clone(),
717            }
718        } else {
719            // No provenance markers = explicit annotation (human-written)
720            AnnotationProvenance {
721                value: ann.annotation.value.clone().unwrap_or_default(),
722                source: SourceOrigin::Explicit,
723                confidence: None,
724                needs_review: false,
725                reviewed: true, // Explicit annotations are considered reviewed
726                reviewed_at: None,
727                generated_at: None,
728                generation_id: None,
729            }
730        };
731
732        result.insert(key, prov);
733    }
734
735    result
736}
737
738/// Compute aggregate provenance statistics from cache (RFC-0003)
739///
740/// Aggregates provenance data from all files and symbols to produce
741/// summary statistics for the cache.
742fn compute_provenance_stats(cache: &Cache, low_conf_threshold: f64) -> ProvenanceStats {
743    let mut stats = ProvenanceStats::default();
744    let mut confidence_sums: HashMap<String, (f64, u64)> = HashMap::new();
745
746    // Process file annotations
747    for (path, file) in &cache.files {
748        for (key, prov) in &file.annotations {
749            update_provenance_stats(
750                &mut stats,
751                &mut confidence_sums,
752                key,
753                prov,
754                path,
755                low_conf_threshold,
756            );
757        }
758    }
759
760    // Process symbol annotations
761    for symbol in cache.symbols.values() {
762        for (key, prov) in &symbol.annotations {
763            let target = format!("{}:{}", symbol.file, symbol.name);
764            update_provenance_stats(
765                &mut stats,
766                &mut confidence_sums,
767                key,
768                prov,
769                &target,
770                low_conf_threshold,
771            );
772        }
773    }
774
775    // Calculate average confidence per source type
776    for (source, (sum, count)) in confidence_sums {
777        if count > 0 {
778            stats
779                .summary
780                .average_confidence
781                .insert(source, sum / count as f64);
782        }
783    }
784
785    // Sort low confidence entries by confidence (ascending)
786    stats.low_confidence.sort_by(|a, b| {
787        a.confidence
788            .partial_cmp(&b.confidence)
789            .unwrap_or(std::cmp::Ordering::Equal)
790    });
791
792    stats
793}
794
795/// Update provenance statistics with a single annotation's data
796fn update_provenance_stats(
797    stats: &mut ProvenanceStats,
798    confidence_sums: &mut HashMap<String, (f64, u64)>,
799    key: &str,
800    prov: &AnnotationProvenance,
801    target: &str,
802    low_conf_threshold: f64,
803) {
804    stats.summary.total += 1;
805
806    // Count by source type
807    match prov.source {
808        SourceOrigin::Explicit => stats.summary.by_source.explicit += 1,
809        SourceOrigin::Converted => stats.summary.by_source.converted += 1,
810        SourceOrigin::Heuristic => stats.summary.by_source.heuristic += 1,
811        SourceOrigin::Refined => stats.summary.by_source.refined += 1,
812        SourceOrigin::Inferred => stats.summary.by_source.inferred += 1,
813    }
814
815    // Count review status
816    if prov.needs_review {
817        stats.summary.needs_review += 1;
818    }
819    if prov.reviewed {
820        stats.summary.reviewed += 1;
821    }
822
823    // Track confidence for averaging
824    if let Some(conf) = prov.confidence {
825        let source_key = prov.source.as_str().to_string();
826        let entry = confidence_sums.entry(source_key).or_insert((0.0, 0));
827        entry.0 += conf;
828        entry.1 += 1;
829
830        // Track low confidence annotations
831        if conf < low_conf_threshold {
832            stats.low_confidence.push(LowConfidenceEntry {
833                target: target.to_string(),
834                annotation: key.to_string(),
835                confidence: conf,
836                value: prov.value.clone(),
837            });
838        }
839    }
840}
841
842// ============================================================================
843// RFC-0006: Documentation Bridging Functions
844// ============================================================================
845
846/// Convert Language enum to string for FormatDetector
847fn language_name_from_enum(lang: Language) -> &'static str {
848    match lang {
849        Language::Typescript => "typescript",
850        Language::Javascript => "javascript",
851        Language::Python => "python",
852        Language::Rust => "rust",
853        Language::Go => "go",
854        Language::Java => "java",
855        Language::CSharp => "csharp",
856        Language::Cpp => "cpp",
857        Language::C => "c",
858        Language::Ruby => "ruby",
859        Language::Php => "php",
860        Language::Swift => "swift",
861        Language::Kotlin => "kotlin",
862    }
863}
864
865/// Compute aggregate bridge statistics from cache (RFC-0006)
866///
867/// Aggregates bridging data from all files to produce summary statistics.
868fn compute_bridge_stats(cache: &Cache, config: &BridgeConfig) -> BridgeStats {
869    let mut stats = BridgeStats {
870        enabled: config.enabled,
871        precedence: config.precedence.to_string(),
872        summary: BridgeSummary::default(),
873        by_format: HashMap::new(),
874    };
875
876    if !config.enabled {
877        return stats;
878    }
879
880    // Aggregate from file bridge metadata
881    for file in cache.files.values() {
882        if !file.bridge.enabled {
883            continue;
884        }
885
886        stats.summary.explicit_count += file.bridge.explicit_count;
887        stats.summary.converted_count += file.bridge.converted_count;
888        stats.summary.merged_count += file.bridge.merged_count;
889
890        // Track by detected format
891        if let Some(format) = &file.bridge.detected_format {
892            let format_key = format_to_string(format);
893            let format_count = file.bridge.converted_count + file.bridge.merged_count;
894            if format_count > 0 {
895                *stats.by_format.entry(format_key).or_insert(0) += format_count;
896            }
897        }
898    }
899
900    stats.summary.total_annotations =
901        stats.summary.explicit_count + stats.summary.converted_count + stats.summary.merged_count;
902
903    stats
904}
905
906/// Convert SourceFormat to string key for by_format map
907fn format_to_string(format: &SourceFormat) -> String {
908    match format {
909        SourceFormat::Acp => "acp".to_string(),
910        SourceFormat::Jsdoc => "jsdoc".to_string(),
911        SourceFormat::DocstringGoogle => "docstring:google".to_string(),
912        SourceFormat::DocstringNumpy => "docstring:numpy".to_string(),
913        SourceFormat::DocstringSphinx => "docstring:sphinx".to_string(),
914        SourceFormat::Rustdoc => "rustdoc".to_string(),
915        SourceFormat::Javadoc => "javadoc".to_string(),
916        SourceFormat::Godoc => "godoc".to_string(),
917        SourceFormat::TypeHint => "type_hint".to_string(),
918    }
919}
920
921/// Parse native documentation from a doc comment based on detected format
922fn parse_native_docs(doc_comment: &str, format: &SourceFormat) -> Option<ParsedDocumentation> {
923    let parsed = match format {
924        SourceFormat::Jsdoc => JsDocParser::new().parse(doc_comment),
925        SourceFormat::DocstringGoogle
926        | SourceFormat::DocstringNumpy
927        | SourceFormat::DocstringSphinx => DocstringParser::new().parse(doc_comment),
928        SourceFormat::Rustdoc => RustdocParser::new().parse(doc_comment),
929        SourceFormat::Javadoc => JavadocParser::new().parse(doc_comment),
930        SourceFormat::Godoc => GodocParser::new().parse(doc_comment),
931        SourceFormat::Acp | SourceFormat::TypeHint => return None,
932    };
933
934    if parsed.is_empty() {
935        None
936    } else {
937        Some(parsed)
938    }
939}
940
941/// Extract ACP annotations from a doc comment and convert to AcpAnnotations
942fn extract_acp_annotations(doc_comment: &str, parser: &Parser) -> AcpAnnotations {
943    let annotations = parser.parse_annotations(doc_comment);
944
945    let mut result = AcpAnnotations::default();
946
947    for ann in annotations {
948        match ann.name.as_str() {
949            "summary" => {
950                if let Some(ref value) = ann.value {
951                    result.summary = Some(value.clone());
952                }
953            }
954            "fn" | "method" => {
955                // @acp:fn "summary" - directive
956                // The parser already extracts value and directive separately
957                if let Some(ref value) = ann.value {
958                    // Value might be the summary in quotes
959                    if let Some((summary, _)) = parse_fn_annotation(value) {
960                        if result.summary.is_none() {
961                            result.summary = Some(summary);
962                        }
963                    }
964                }
965                // Directive is already parsed by the Parser
966                if let Some(ref directive) = ann.directive {
967                    result.directive = Some(directive.clone());
968                }
969            }
970            "param" => {
971                // @acp:param {type} name - directive
972                // Extract name from value, directive is already parsed
973                if let Some(ref value) = ann.value {
974                    if let Some((name, _)) = parse_param_annotation(value) {
975                        let directive = ann.directive.clone().unwrap_or_default();
976                        result.params.push((name, directive));
977                    }
978                }
979            }
980            "returns" => {
981                // @acp:returns {type} - directive
982                // Directive is already parsed by the Parser
983                if let Some(ref directive) = ann.directive {
984                    result.returns = Some(directive.clone());
985                } else if let Some(ref value) = ann.value {
986                    // Fallback: try to extract directive from value
987                    if let Some(directive) = parse_returns_annotation(value) {
988                        result.returns = Some(directive);
989                    }
990                }
991            }
992            "throws" => {
993                // @acp:throws {exception} - directive
994                if let Some(ref value) = ann.value {
995                    // Extract exception type from value
996                    let exception = if value.starts_with('{') {
997                        if let Some(close) = value.find('}') {
998                            value[1..close].to_string()
999                        } else {
1000                            value.clone()
1001                        }
1002                    } else {
1003                        value.split_whitespace().next().unwrap_or(value).to_string()
1004                    };
1005                    let directive = ann.directive.clone().unwrap_or_default();
1006                    result.throws.push((exception, directive));
1007                }
1008            }
1009            _ => {}
1010        }
1011    }
1012
1013    result
1014}
1015
1016/// Parse @acp:fn value into (summary, directive)
1017fn parse_fn_annotation(value: &str) -> Option<(String, String)> {
1018    // Format: "summary text" - directive text
1019    // or just: directive text
1020    if let Some(stripped) = value.strip_prefix('"') {
1021        if let Some(end_quote) = stripped.find('"') {
1022            let summary = stripped[..end_quote].to_string();
1023            let rest = &stripped[end_quote + 1..];
1024            let directive = rest.trim().trim_start_matches('-').trim().to_string();
1025            if !directive.is_empty() {
1026                return Some((summary, directive));
1027            }
1028        }
1029    }
1030    None
1031}
1032
1033/// Parse @acp:param value into (name, directive)
1034fn parse_param_annotation(value: &str) -> Option<(String, String)> {
1035    // Format: {type} name - directive  OR  name - directive
1036    let value = value.trim();
1037
1038    // Skip type annotation if present
1039    let rest = if value.starts_with('{') {
1040        if let Some(close) = value.find('}') {
1041            &value[close + 1..]
1042        } else {
1043            value
1044        }
1045    } else {
1046        value
1047    };
1048
1049    let rest = rest.trim();
1050
1051    // Handle optional params: [name] or [name=default]
1052    let (name, after_name) = if rest.starts_with('[') {
1053        if let Some(close) = rest.find(']') {
1054            let inner = &rest[1..close];
1055            let name = inner.split('=').next().unwrap_or(inner).trim();
1056            (name.to_string(), &rest[close + 1..])
1057        } else {
1058            return None;
1059        }
1060    } else {
1061        // Regular param: name - directive
1062        let parts: Vec<&str> = rest.splitn(2, char::is_whitespace).collect();
1063        if parts.is_empty() {
1064            return None;
1065        }
1066        let after = if parts.len() > 1 { parts[1] } else { "" };
1067        (parts[0].to_string(), after)
1068    };
1069
1070    // Extract directive after the dash
1071    let directive = after_name.trim().trim_start_matches('-').trim().to_string();
1072
1073    if name.is_empty() {
1074        None
1075    } else {
1076        Some((name, directive))
1077    }
1078}
1079
1080/// Parse @acp:returns value into directive
1081fn parse_returns_annotation(value: &str) -> Option<String> {
1082    // Format: {type} - directive  OR  - directive  OR  directive
1083    let value = value.trim();
1084
1085    // Skip type annotation if present
1086    let rest = if value.starts_with('{') {
1087        if let Some(close) = value.find('}') {
1088            &value[close + 1..]
1089        } else {
1090            value
1091        }
1092    } else {
1093        value
1094    };
1095
1096    let directive = rest.trim().trim_start_matches('-').trim().to_string();
1097
1098    if directive.is_empty() {
1099        None
1100    } else {
1101        Some(directive)
1102    }
1103}