organizational_intelligence_plugin/
rag_localization.rs

1//! RAG-Enhanced Fault Localization
2//!
3//! This module integrates trueno-rag with SBFL fault localization to provide:
4//! - Semantic search over historical bugs
5//! - Similar code pattern retrieval
6//! - Fix suggestion generation
7//! - Contextual explanations
8//!
9//! Toyota Way Alignment:
10//! - Genchi Genbutsu: Retrieve actual historical bugs, not hypothetical patterns
11//! - Kaizen: Bug knowledge base improves continuously from each fix
12//! - Jidoka: Human-readable explanations with context
13//! - Muda: Only query RAG for top-N suspicious statements (avoid waste)
14//! - Muri: Configurable retrieval limits prevent information overload
15
16use crate::tarantula::{
17    FaultLocalizationResult, SbflFormula, SbflLocalizer, StatementCoverage, SuspiciousnessRanking,
18};
19use serde::{Deserialize, Serialize};
20use std::collections::HashMap;
21use thiserror::Error;
22use trueno_rag::{
23    chunk::{Chunk, ChunkId},
24    index::{BM25Index, SparseIndex},
25    DocumentId,
26};
27
28/// Errors that can occur during RAG-enhanced fault localization
29#[derive(Debug, Error)]
30pub enum RagLocalizationError {
31    #[error("Failed to build RAG pipeline: {0}")]
32    PipelineBuild(String),
33
34    #[error("Failed to index document: {0}")]
35    IndexError(String),
36
37    #[error("Failed to query RAG pipeline: {0}")]
38    QueryError(String),
39
40    #[error("IO error: {0}")]
41    Io(#[from] std::io::Error),
42
43    #[error("Serialization error: {0}")]
44    Serialization(#[from] serde_json::Error),
45}
46
47/// Result type for RAG localization operations
48pub type Result<T> = std::result::Result<T, RagLocalizationError>;
49
50/// Defect category for bug classification
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
52pub enum DefectCategory {
53    MemorySafety,
54    Concurrency,
55    TypeErrors,
56    Performance,
57    Security,
58    Configuration,
59    ApiMisuse,
60    IntegrationFailure,
61    DocumentationGap,
62    TestingGap,
63    OperatorPrecedence,
64    TypeAnnotationGap,
65    StdlibMapping,
66    AstTransform,
67    ComprehensionBug,
68    IteratorChain,
69    OwnershipBorrow,
70    TraitBounds,
71}
72
73impl DefectCategory {
74    /// Get all defect categories
75    pub fn all() -> &'static [DefectCategory] {
76        &[
77            DefectCategory::MemorySafety,
78            DefectCategory::Concurrency,
79            DefectCategory::TypeErrors,
80            DefectCategory::Performance,
81            DefectCategory::Security,
82            DefectCategory::Configuration,
83            DefectCategory::ApiMisuse,
84            DefectCategory::IntegrationFailure,
85            DefectCategory::DocumentationGap,
86            DefectCategory::TestingGap,
87            DefectCategory::OperatorPrecedence,
88            DefectCategory::TypeAnnotationGap,
89            DefectCategory::StdlibMapping,
90            DefectCategory::AstTransform,
91            DefectCategory::ComprehensionBug,
92            DefectCategory::IteratorChain,
93            DefectCategory::OwnershipBorrow,
94            DefectCategory::TraitBounds,
95        ]
96    }
97
98    /// Get display name
99    pub fn display_name(&self) -> &'static str {
100        match self {
101            DefectCategory::MemorySafety => "Memory Safety",
102            DefectCategory::Concurrency => "Concurrency",
103            DefectCategory::TypeErrors => "Type Errors",
104            DefectCategory::Performance => "Performance",
105            DefectCategory::Security => "Security",
106            DefectCategory::Configuration => "Configuration",
107            DefectCategory::ApiMisuse => "API Misuse",
108            DefectCategory::IntegrationFailure => "Integration Failure",
109            DefectCategory::DocumentationGap => "Documentation Gap",
110            DefectCategory::TestingGap => "Testing Gap",
111            DefectCategory::OperatorPrecedence => "Operator Precedence",
112            DefectCategory::TypeAnnotationGap => "Type Annotation Gap",
113            DefectCategory::StdlibMapping => "Stdlib Mapping",
114            DefectCategory::AstTransform => "AST Transform",
115            DefectCategory::ComprehensionBug => "Comprehension Bug",
116            DefectCategory::IteratorChain => "Iterator Chain",
117            DefectCategory::OwnershipBorrow => "Ownership/Borrow",
118            DefectCategory::TraitBounds => "Trait Bounds",
119        }
120    }
121}
122
123impl std::fmt::Display for DefectCategory {
124    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125        write!(f, "{}", self.display_name())
126    }
127}
128
129/// Bug document for RAG indexing
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct BugDocument {
132    /// Unique bug identifier (e.g., commit hash or issue number)
133    pub id: String,
134    /// Bug title/summary
135    pub title: String,
136    /// Full bug description
137    pub description: String,
138    /// Commit that fixed the bug
139    pub fix_commit: String,
140    /// The actual code change (diff)
141    pub fix_diff: String,
142    /// Files affected by the bug
143    pub affected_files: Vec<String>,
144    /// Defect category
145    pub category: DefectCategory,
146    /// Severity level (1-5, 5 being most severe)
147    pub severity: u8,
148    /// Symptoms that indicate this bug
149    pub symptoms: Vec<String>,
150    /// Root cause description
151    pub root_cause: String,
152    /// Fix pattern description
153    pub fix_pattern: String,
154}
155
156impl BugDocument {
157    /// Create a new bug document
158    pub fn new(id: impl Into<String>, title: impl Into<String>, category: DefectCategory) -> Self {
159        Self {
160            id: id.into(),
161            title: title.into(),
162            description: String::new(),
163            fix_commit: String::new(),
164            fix_diff: String::new(),
165            affected_files: Vec::new(),
166            category,
167            severity: 3,
168            symptoms: Vec::new(),
169            root_cause: String::new(),
170            fix_pattern: String::new(),
171        }
172    }
173
174    /// Set description
175    pub fn with_description(mut self, desc: impl Into<String>) -> Self {
176        self.description = desc.into();
177        self
178    }
179
180    /// Set fix commit
181    pub fn with_fix_commit(mut self, commit: impl Into<String>) -> Self {
182        self.fix_commit = commit.into();
183        self
184    }
185
186    /// Set fix diff
187    pub fn with_fix_diff(mut self, diff: impl Into<String>) -> Self {
188        self.fix_diff = diff.into();
189        self
190    }
191
192    /// Add affected file
193    pub fn with_affected_file(mut self, file: impl Into<String>) -> Self {
194        self.affected_files.push(file.into());
195        self
196    }
197
198    /// Set severity
199    pub fn with_severity(mut self, severity: u8) -> Self {
200        self.severity = severity.clamp(1, 5);
201        self
202    }
203
204    /// Add symptom
205    pub fn with_symptom(mut self, symptom: impl Into<String>) -> Self {
206        self.symptoms.push(symptom.into());
207        self
208    }
209
210    /// Set root cause
211    pub fn with_root_cause(mut self, cause: impl Into<String>) -> Self {
212        self.root_cause = cause.into();
213        self
214    }
215
216    /// Set fix pattern
217    pub fn with_fix_pattern(mut self, pattern: impl Into<String>) -> Self {
218        self.fix_pattern = pattern.into();
219        self
220    }
221
222    /// Convert to trueno-rag Chunk for indexing
223    pub fn to_rag_chunk(&self) -> Chunk {
224        let content = format!(
225            "{}\n\n{}\n\nSymptoms:\n{}\n\nRoot Cause:\n{}\n\nFix Pattern:\n{}",
226            self.title,
227            self.description,
228            self.symptoms.join("\n- "),
229            self.root_cause,
230            self.fix_pattern
231        );
232
233        Chunk::new(DocumentId::new(), content, 0, self.description.len().max(1))
234    }
235}
236
237/// Similar bug retrieved from RAG
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct SimilarBug {
240    /// Bug ID
241    pub id: String,
242    /// Similarity score (0.0 to 1.0)
243    pub similarity: f32,
244    /// Defect category
245    pub category: DefectCategory,
246    /// Bug summary
247    pub summary: String,
248    /// Fix commit hash
249    pub fix_commit: String,
250}
251
252/// Suggested fix from RAG retrieval
253#[derive(Debug, Clone, Serialize, Deserialize)]
254pub struct SuggestedFix {
255    /// Fix pattern name
256    pub pattern: String,
257    /// Confidence score (0.0 to 1.0)
258    pub confidence: f32,
259    /// Example code showing the fix
260    pub example: String,
261    /// Source bug ID this pattern came from
262    pub source_bug_id: String,
263}
264
265/// RAG-enhanced ranking with additional context
266#[derive(Debug, Clone, Serialize, Deserialize)]
267pub struct RagEnhancedRanking {
268    /// Original SBFL ranking
269    pub sbfl_ranking: SuspiciousnessRanking,
270    /// Similar historical bugs
271    pub similar_bugs: Vec<SimilarBug>,
272    /// Suggested fixes
273    pub suggested_fixes: Vec<SuggestedFix>,
274    /// Contextual explanation
275    pub context_explanation: String,
276    /// Combined score (SBFL + RAG)
277    pub combined_score: f32,
278}
279
280/// Result of RAG-enhanced fault localization
281#[derive(Debug, Clone, Serialize, Deserialize)]
282pub struct RagEnhancedResult {
283    /// Enhanced rankings with RAG context
284    pub rankings: Vec<RagEnhancedRanking>,
285    /// Original SBFL result
286    pub sbfl_result: FaultLocalizationResult,
287    /// Fusion strategy used
288    pub fusion_strategy: String,
289    /// Number of bugs in knowledge base
290    pub knowledge_base_size: usize,
291}
292
293/// Bug knowledge base for RAG retrieval
294#[derive(Debug)]
295pub struct BugKnowledgeBase {
296    /// Indexed bug documents
297    bugs: Vec<BugDocument>,
298    /// BM25 index for text search
299    bm25_index: BM25Index,
300    /// Chunk ID to bug ID mapping
301    chunk_to_bug: HashMap<ChunkId, String>,
302}
303
304impl BugKnowledgeBase {
305    /// Create a new empty knowledge base
306    pub fn new() -> Self {
307        Self {
308            bugs: Vec::new(),
309            bm25_index: BM25Index::new(),
310            chunk_to_bug: HashMap::new(),
311        }
312    }
313
314    /// Add a bug to the knowledge base
315    pub fn add_bug(&mut self, bug: BugDocument) {
316        let chunk = bug.to_rag_chunk();
317        let chunk_id = chunk.id;
318
319        // Index the chunk content using SparseIndex trait
320        self.bm25_index.add(&chunk);
321        self.chunk_to_bug.insert(chunk_id, bug.id.clone());
322        self.bugs.push(bug);
323    }
324
325    /// Get the number of bugs in the knowledge base
326    pub fn len(&self) -> usize {
327        self.bugs.len()
328    }
329
330    /// Check if the knowledge base is empty
331    pub fn is_empty(&self) -> bool {
332        self.bugs.is_empty()
333    }
334
335    /// Search for similar bugs using BM25
336    pub fn search(&self, query: &str, top_k: usize) -> Vec<SimilarBug> {
337        let results: Vec<(ChunkId, f32)> = self.bm25_index.search(query, top_k);
338
339        // Normalize scores to 0-1 range
340        let max_score = results.iter().map(|(_, s)| *s).fold(0.0_f32, f32::max);
341        let normalizer = if max_score > 0.0 { max_score } else { 1.0 };
342
343        results
344            .into_iter()
345            .filter_map(|(chunk_id, score): (ChunkId, f32)| {
346                // Look up bug ID from chunk mapping
347                let bug_id = self.chunk_to_bug.get(&chunk_id)?;
348                let bug = self.bugs.iter().find(|b| &b.id == bug_id)?;
349
350                Some(SimilarBug {
351                    id: bug.id.clone(),
352                    similarity: (score / normalizer).clamp(0.0, 1.0),
353                    category: bug.category,
354                    summary: bug.title.clone(),
355                    fix_commit: bug.fix_commit.clone(),
356                })
357            })
358            .collect()
359    }
360
361    /// Get fix patterns for similar bugs
362    pub fn get_fix_patterns(&self, bug_ids: &[String]) -> Vec<SuggestedFix> {
363        bug_ids
364            .iter()
365            .filter_map(|id| {
366                self.bugs
367                    .iter()
368                    .find(|b| &b.id == id)
369                    .map(|bug| SuggestedFix {
370                        pattern: format!("Fix pattern for {}", bug.category),
371                        confidence: 0.7,
372                        example: bug.fix_pattern.clone(),
373                        source_bug_id: bug.id.clone(),
374                    })
375            })
376            .collect()
377    }
378
379    /// Get bug by ID
380    pub fn get_bug(&self, id: &str) -> Option<&BugDocument> {
381        self.bugs.iter().find(|b| b.id == id)
382    }
383
384    /// Get all bugs in a category
385    pub fn get_by_category(&self, category: DefectCategory) -> Vec<&BugDocument> {
386        self.bugs
387            .iter()
388            .filter(|b| b.category == category)
389            .collect()
390    }
391
392    /// Import bugs from YAML file
393    pub fn import_from_yaml(path: &std::path::Path) -> Result<Self> {
394        let content = std::fs::read_to_string(path)?;
395        let bugs: Vec<BugDocument> = serde_yaml::from_str(&content).map_err(|e| {
396            RagLocalizationError::Serialization(serde_json::Error::io(std::io::Error::new(
397                std::io::ErrorKind::InvalidData,
398                e.to_string(),
399            )))
400        })?;
401
402        let mut kb = Self::new();
403        for bug in bugs {
404            kb.add_bug(bug);
405        }
406        Ok(kb)
407    }
408
409    /// Export bugs to YAML file
410    pub fn export_to_yaml(&self, path: &std::path::Path) -> Result<()> {
411        let content = serde_yaml::to_string(&self.bugs).map_err(|e| {
412            RagLocalizationError::Serialization(serde_json::Error::io(std::io::Error::new(
413                std::io::ErrorKind::InvalidData,
414                e.to_string(),
415            )))
416        })?;
417        std::fs::write(path, content)?;
418        Ok(())
419    }
420}
421
422impl Default for BugKnowledgeBase {
423    fn default() -> Self {
424        Self::new()
425    }
426}
427
428/// Fusion strategy wrapper for SBFL + RAG combination
429#[derive(Debug, Clone, Copy)]
430pub enum LocalizationFusion {
431    /// Reciprocal Rank Fusion (recommended)
432    RRF { k: f32 },
433    /// Linear weighted combination
434    Linear { sbfl_weight: f32 },
435    /// Distribution-based score fusion
436    DBSF,
437    /// Use only SBFL, RAG for context only
438    SbflOnly,
439}
440
441impl Default for LocalizationFusion {
442    fn default() -> Self {
443        LocalizationFusion::RRF { k: 60.0 }
444    }
445}
446
447impl LocalizationFusion {
448    /// Combine SBFL and RAG scores
449    pub fn combine(
450        &self,
451        sbfl_score: f32,
452        rag_score: f32,
453        sbfl_rank: usize,
454        rag_rank: usize,
455    ) -> f32 {
456        match self {
457            LocalizationFusion::RRF { k } => {
458                // RRF: sum of reciprocal ranks
459                let sbfl_rrf = 1.0 / (k + sbfl_rank as f32);
460                let rag_rrf = 1.0 / (k + rag_rank as f32);
461                sbfl_rrf + rag_rrf
462            }
463            LocalizationFusion::Linear { sbfl_weight } => {
464                // Linear: weighted combination of normalized scores
465                let rag_weight = 1.0 - sbfl_weight;
466                sbfl_score * sbfl_weight + rag_score * rag_weight
467            }
468            LocalizationFusion::DBSF => {
469                // DBSF: average of scores (simplified)
470                (sbfl_score + rag_score) / 2.0
471            }
472            LocalizationFusion::SbflOnly => sbfl_score,
473        }
474    }
475
476    /// Get display name
477    pub fn name(&self) -> &'static str {
478        match self {
479            LocalizationFusion::RRF { .. } => "RRF",
480            LocalizationFusion::Linear { .. } => "Linear",
481            LocalizationFusion::DBSF => "DBSF",
482            LocalizationFusion::SbflOnly => "SBFL Only",
483        }
484    }
485}
486
487/// Configuration for RAG-enhanced fault localization
488#[derive(Debug, Clone)]
489pub struct RagLocalizationConfig {
490    /// SBFL formula to use
491    pub sbfl_formula: SbflFormula,
492    /// Number of top statements to enhance with RAG
493    pub top_n: usize,
494    /// Number of similar bugs to retrieve
495    pub similar_bugs_k: usize,
496    /// Number of fix suggestions to retrieve
497    pub fix_suggestions_k: usize,
498    /// Fusion strategy
499    pub fusion: LocalizationFusion,
500    /// Include detailed explanations
501    pub include_explanations: bool,
502}
503
504impl Default for RagLocalizationConfig {
505    fn default() -> Self {
506        Self {
507            sbfl_formula: SbflFormula::Ochiai,
508            top_n: 10,
509            similar_bugs_k: 5,
510            fix_suggestions_k: 3,
511            fusion: LocalizationFusion::default(),
512            include_explanations: true,
513        }
514    }
515}
516
517impl RagLocalizationConfig {
518    /// Create new config with defaults
519    pub fn new() -> Self {
520        Self::default()
521    }
522
523    /// Set SBFL formula
524    pub fn with_formula(mut self, formula: SbflFormula) -> Self {
525        self.sbfl_formula = formula;
526        self
527    }
528
529    /// Set top-N statements to enhance
530    pub fn with_top_n(mut self, n: usize) -> Self {
531        self.top_n = n;
532        self
533    }
534
535    /// Set number of similar bugs to retrieve
536    pub fn with_similar_bugs(mut self, k: usize) -> Self {
537        self.similar_bugs_k = k;
538        self
539    }
540
541    /// Set number of fix suggestions
542    pub fn with_fix_suggestions(mut self, k: usize) -> Self {
543        self.fix_suggestions_k = k;
544        self
545    }
546
547    /// Set fusion strategy
548    pub fn with_fusion(mut self, fusion: LocalizationFusion) -> Self {
549        self.fusion = fusion;
550        self
551    }
552
553    /// Enable/disable explanations
554    pub fn with_explanations(mut self, include: bool) -> Self {
555        self.include_explanations = include;
556        self
557    }
558}
559
560/// RAG-enhanced fault localizer
561pub struct RagFaultLocalizer {
562    /// SBFL localizer
563    sbfl: SbflLocalizer,
564    /// Bug knowledge base
565    knowledge_base: BugKnowledgeBase,
566    /// Configuration
567    config: RagLocalizationConfig,
568}
569
570impl RagFaultLocalizer {
571    /// Create a new RAG fault localizer
572    pub fn new(knowledge_base: BugKnowledgeBase, config: RagLocalizationConfig) -> Self {
573        let sbfl = SbflLocalizer::new()
574            .with_formula(config.sbfl_formula)
575            .with_top_n(config.top_n)
576            .with_explanations(config.include_explanations);
577
578        Self {
579            sbfl,
580            knowledge_base,
581            config,
582        }
583    }
584
585    /// Create with default configuration
586    pub fn with_knowledge_base(knowledge_base: BugKnowledgeBase) -> Self {
587        Self::new(knowledge_base, RagLocalizationConfig::default())
588    }
589
590    /// Localize faults with RAG enhancement
591    pub fn localize(
592        &self,
593        coverage: &[StatementCoverage],
594        total_passed: usize,
595        total_failed: usize,
596    ) -> RagEnhancedResult {
597        tracing::info!(
598            "Running RAG-enhanced fault localization on {} statements",
599            coverage.len()
600        );
601
602        // Step 1: Run SBFL localization
603        let sbfl_result = self.sbfl.localize(coverage, total_passed, total_failed);
604
605        // Step 2: Enhance top-N rankings with RAG
606        let mut enhanced_rankings = Vec::new();
607
608        for (sbfl_rank, ranking) in sbfl_result.rankings.iter().enumerate() {
609            // Build query from statement context
610            let query = self.build_query(ranking);
611
612            // Search for similar bugs
613            let similar_bugs = self
614                .knowledge_base
615                .search(&query, self.config.similar_bugs_k);
616
617            // Get fix patterns from similar bugs
618            let bug_ids: Vec<String> = similar_bugs.iter().map(|b| b.id.clone()).collect();
619            let suggested_fixes = self.knowledge_base.get_fix_patterns(&bug_ids);
620
621            // Calculate RAG-based score (average similarity of top bugs)
622            let rag_score = if similar_bugs.is_empty() {
623                0.0
624            } else {
625                similar_bugs.iter().map(|b| b.similarity).sum::<f32>() / similar_bugs.len() as f32
626            };
627
628            // Combine scores using fusion strategy
629            let rag_rank = if rag_score > 0.0 {
630                sbfl_rank
631            } else {
632                sbfl_rank + 100
633            };
634            let combined_score =
635                self.config
636                    .fusion
637                    .combine(ranking.suspiciousness, rag_score, sbfl_rank, rag_rank);
638
639            // Generate contextual explanation
640            let context_explanation = if self.config.include_explanations {
641                self.generate_explanation(ranking, &similar_bugs)
642            } else {
643                String::new()
644            };
645
646            enhanced_rankings.push(RagEnhancedRanking {
647                sbfl_ranking: ranking.clone(),
648                similar_bugs,
649                suggested_fixes,
650                context_explanation,
651                combined_score,
652            });
653        }
654
655        // Re-sort by combined score
656        enhanced_rankings.sort_by(|a, b| {
657            b.combined_score
658                .partial_cmp(&a.combined_score)
659                .unwrap_or(std::cmp::Ordering::Equal)
660        });
661
662        // Update ranks
663        for (i, ranking) in enhanced_rankings.iter_mut().enumerate() {
664            ranking.sbfl_ranking.rank = i + 1;
665        }
666
667        RagEnhancedResult {
668            rankings: enhanced_rankings,
669            sbfl_result,
670            fusion_strategy: self.config.fusion.name().to_string(),
671            knowledge_base_size: self.knowledge_base.len(),
672        }
673    }
674
675    /// Build a search query from a suspicious ranking
676    fn build_query(&self, ranking: &SuspiciousnessRanking) -> String {
677        // Use file name and explanation to build query
678        let file_name = ranking
679            .statement
680            .file
681            .file_name()
682            .and_then(|n| n.to_str())
683            .unwrap_or("unknown");
684
685        format!(
686            "{} line {} {}",
687            file_name, ranking.statement.line, ranking.explanation
688        )
689    }
690
691    /// Generate contextual explanation
692    fn generate_explanation(
693        &self,
694        ranking: &SuspiciousnessRanking,
695        similar_bugs: &[SimilarBug],
696    ) -> String {
697        if similar_bugs.is_empty() {
698            return format!(
699                "Statement at {}:{} has suspiciousness score {:.3}. No similar historical bugs found in knowledge base.",
700                ranking.statement.file.display(),
701                ranking.statement.line,
702                ranking.suspiciousness
703            );
704        }
705
706        let top_bug = &similar_bugs[0];
707        let bug_count = similar_bugs.len();
708
709        format!(
710            "This pattern matches historical bug \"{}\" ({}) with {:.0}% similarity. \
711             {} similar bugs found in knowledge base. \
712             Most common category: {}.",
713            top_bug.id,
714            top_bug.summary,
715            top_bug.similarity * 100.0,
716            bug_count,
717            top_bug.category
718        )
719    }
720
721    /// Get the knowledge base
722    pub fn knowledge_base(&self) -> &BugKnowledgeBase {
723        &self.knowledge_base
724    }
725
726    /// Get mutable knowledge base
727    pub fn knowledge_base_mut(&mut self) -> &mut BugKnowledgeBase {
728        &mut self.knowledge_base
729    }
730}
731
732/// Builder for creating RAG-enhanced fault localizer
733pub struct RagFaultLocalizerBuilder {
734    knowledge_base: Option<BugKnowledgeBase>,
735    config: RagLocalizationConfig,
736}
737
738impl RagFaultLocalizerBuilder {
739    /// Create a new builder
740    pub fn new() -> Self {
741        Self {
742            knowledge_base: None,
743            config: RagLocalizationConfig::default(),
744        }
745    }
746
747    /// Set the knowledge base
748    pub fn knowledge_base(mut self, kb: BugKnowledgeBase) -> Self {
749        self.knowledge_base = Some(kb);
750        self
751    }
752
753    /// Set SBFL formula
754    pub fn formula(mut self, formula: SbflFormula) -> Self {
755        self.config.sbfl_formula = formula;
756        self
757    }
758
759    /// Set top-N statements
760    pub fn top_n(mut self, n: usize) -> Self {
761        self.config.top_n = n;
762        self
763    }
764
765    /// Set similar bugs count
766    pub fn similar_bugs(mut self, k: usize) -> Self {
767        self.config.similar_bugs_k = k;
768        self
769    }
770
771    /// Set fix suggestions count
772    pub fn fix_suggestions(mut self, k: usize) -> Self {
773        self.config.fix_suggestions_k = k;
774        self
775    }
776
777    /// Set fusion strategy
778    pub fn fusion(mut self, fusion: LocalizationFusion) -> Self {
779        self.config.fusion = fusion;
780        self
781    }
782
783    /// Enable explanations
784    pub fn with_explanations(mut self) -> Self {
785        self.config.include_explanations = true;
786        self
787    }
788
789    /// Build the localizer
790    pub fn build(self) -> RagFaultLocalizer {
791        RagFaultLocalizer::new(self.knowledge_base.unwrap_or_default(), self.config)
792    }
793}
794
795impl Default for RagFaultLocalizerBuilder {
796    fn default() -> Self {
797        Self::new()
798    }
799}
800
801/// Integration helper for generating reports
802pub struct RagReportGenerator;
803
804impl RagReportGenerator {
805    /// Generate YAML report
806    pub fn to_yaml(result: &RagEnhancedResult) -> Result<String> {
807        serde_yaml::to_string(result).map_err(|e| {
808            RagLocalizationError::Serialization(serde_json::Error::io(std::io::Error::new(
809                std::io::ErrorKind::InvalidData,
810                e.to_string(),
811            )))
812        })
813    }
814
815    /// Generate JSON report
816    pub fn to_json(result: &RagEnhancedResult) -> Result<String> {
817        serde_json::to_string_pretty(result).map_err(RagLocalizationError::Serialization)
818    }
819
820    /// Generate terminal report
821    pub fn to_terminal(result: &RagEnhancedResult) -> String {
822        let mut output = String::new();
823
824        output.push_str("╔══════════════════════════════════════════════════════════════╗\n");
825        output.push_str("║        RAG-ENHANCED FAULT LOCALIZATION REPORT                ║\n");
826        output.push_str("╠══════════════════════════════════════════════════════════════╣\n");
827        output.push_str(&format!(
828            "║ SBFL Formula: {:?}                                           \n",
829            result.sbfl_result.formula_used
830        ));
831        output.push_str(&format!(
832            "║ Fusion Strategy: {}                                          \n",
833            result.fusion_strategy
834        ));
835        output.push_str(&format!(
836            "║ Knowledge Base: {} bugs                                      \n",
837            result.knowledge_base_size
838        ));
839        output.push_str(&format!(
840            "║ Tests: {} passed, {} failed                                  \n",
841            result.sbfl_result.total_passed_tests, result.sbfl_result.total_failed_tests
842        ));
843        output.push_str("╠══════════════════════════════════════════════════════════════╣\n");
844        output.push_str("║  TOP SUSPICIOUS STATEMENTS (RAG-Enhanced)                    ║\n");
845        output.push_str("╠══════════════════════════════════════════════════════════════╣\n");
846
847        for ranking in result.rankings.iter().take(10) {
848            let file = ranking
849                .sbfl_ranking
850                .statement
851                .file
852                .file_name()
853                .and_then(|n| n.to_str())
854                .unwrap_or("unknown");
855            let line = ranking.sbfl_ranking.statement.line;
856            let score = ranking.combined_score;
857
858            // Score bar visualization
859            let bar_len = (score * 20.0).min(20.0) as usize;
860            let bar = "█".repeat(bar_len) + &"░".repeat(20 - bar_len);
861
862            output.push_str(&format!(
863                "║  #{:<2} {}:{:<6} {} {:.2}   ║\n",
864                ranking.sbfl_ranking.rank, file, line, bar, score
865            ));
866
867            // Show similar bugs if any
868            if !ranking.similar_bugs.is_empty() {
869                let top_bug = &ranking.similar_bugs[0];
870                output.push_str(&format!(
871                    "║      → Similar: {} ({:.0}%)                      ║\n",
872                    top_bug.summary,
873                    top_bug.similarity * 100.0
874                ));
875            }
876        }
877
878        output.push_str("╚══════════════════════════════════════════════════════════════╝\n");
879
880        output
881    }
882}
883
884#[cfg(test)]
885mod tests {
886    use super::*;
887    use crate::tarantula::StatementId;
888
889    // ============ BugDocument Tests ============
890
891    #[test]
892    fn test_bug_document_creation() {
893        let bug = BugDocument::new(
894            "bug-001",
895            "Null pointer dereference",
896            DefectCategory::MemorySafety,
897        );
898        assert_eq!(bug.id, "bug-001");
899        assert_eq!(bug.title, "Null pointer dereference");
900        assert_eq!(bug.category, DefectCategory::MemorySafety);
901        assert_eq!(bug.severity, 3); // default
902    }
903
904    #[test]
905    fn test_bug_document_builder() {
906        let bug = BugDocument::new("bug-002", "Race condition", DefectCategory::Concurrency)
907            .with_description("Thread safety issue in handler")
908            .with_fix_commit("abc123")
909            .with_affected_file("src/handler.rs")
910            .with_severity(5)
911            .with_symptom("Random test failures")
912            .with_root_cause("Missing mutex lock")
913            .with_fix_pattern("Add Arc<Mutex<T>> wrapper");
914
915        assert_eq!(bug.description, "Thread safety issue in handler");
916        assert_eq!(bug.fix_commit, "abc123");
917        assert_eq!(bug.affected_files, vec!["src/handler.rs"]);
918        assert_eq!(bug.severity, 5);
919        assert_eq!(bug.symptoms, vec!["Random test failures"]);
920        assert_eq!(bug.root_cause, "Missing mutex lock");
921        assert_eq!(bug.fix_pattern, "Add Arc<Mutex<T>> wrapper");
922    }
923
924    #[test]
925    fn test_bug_document_to_rag_chunk() {
926        let bug = BugDocument::new("bug-003", "Buffer overflow", DefectCategory::MemorySafety)
927            .with_description("Stack buffer overflow in parser")
928            .with_affected_file("src/parser.rs");
929
930        let chunk = bug.to_rag_chunk();
931        assert!(chunk.content.contains("Buffer overflow"));
932        assert!(chunk.content.contains("Stack buffer overflow"));
933    }
934
935    // ============ DefectCategory Tests ============
936
937    #[test]
938    fn test_defect_category_all() {
939        let categories = DefectCategory::all();
940        assert_eq!(categories.len(), 18);
941    }
942
943    #[test]
944    fn test_defect_category_display() {
945        assert_eq!(DefectCategory::MemorySafety.display_name(), "Memory Safety");
946        assert_eq!(
947            DefectCategory::OwnershipBorrow.display_name(),
948            "Ownership/Borrow"
949        );
950    }
951
952    // ============ BugKnowledgeBase Tests ============
953
954    #[test]
955    fn test_knowledge_base_new() {
956        let kb = BugKnowledgeBase::new();
957        assert!(kb.is_empty());
958        assert_eq!(kb.len(), 0);
959    }
960
961    #[test]
962    fn test_knowledge_base_add_bug() {
963        let mut kb = BugKnowledgeBase::new();
964        let bug = BugDocument::new("bug-001", "Test bug", DefectCategory::TypeErrors);
965        kb.add_bug(bug);
966
967        assert!(!kb.is_empty());
968        assert_eq!(kb.len(), 1);
969    }
970
971    #[test]
972    fn test_knowledge_base_get_bug() {
973        let mut kb = BugKnowledgeBase::new();
974        let bug = BugDocument::new("bug-001", "Test bug", DefectCategory::TypeErrors);
975        kb.add_bug(bug);
976
977        let retrieved = kb.get_bug("bug-001");
978        assert!(retrieved.is_some());
979        assert_eq!(retrieved.unwrap().title, "Test bug");
980
981        assert!(kb.get_bug("nonexistent").is_none());
982    }
983
984    #[test]
985    fn test_knowledge_base_get_by_category() {
986        let mut kb = BugKnowledgeBase::new();
987        kb.add_bug(BugDocument::new(
988            "bug-001",
989            "Bug 1",
990            DefectCategory::MemorySafety,
991        ));
992        kb.add_bug(BugDocument::new(
993            "bug-002",
994            "Bug 2",
995            DefectCategory::Concurrency,
996        ));
997        kb.add_bug(BugDocument::new(
998            "bug-003",
999            "Bug 3",
1000            DefectCategory::MemorySafety,
1001        ));
1002
1003        let memory_bugs = kb.get_by_category(DefectCategory::MemorySafety);
1004        assert_eq!(memory_bugs.len(), 2);
1005
1006        let concurrency_bugs = kb.get_by_category(DefectCategory::Concurrency);
1007        assert_eq!(concurrency_bugs.len(), 1);
1008    }
1009
1010    // ============ LocalizationFusion Tests ============
1011
1012    #[test]
1013    fn test_fusion_rrf() {
1014        let fusion = LocalizationFusion::RRF { k: 60.0 };
1015        let score = fusion.combine(0.9, 0.8, 0, 1);
1016        // RRF: 1/(60+0) + 1/(60+1) = 1/60 + 1/61
1017        let expected = 1.0 / 60.0 + 1.0 / 61.0;
1018        assert!((score - expected).abs() < 0.001);
1019    }
1020
1021    #[test]
1022    fn test_fusion_linear() {
1023        let fusion = LocalizationFusion::Linear { sbfl_weight: 0.7 };
1024        let score = fusion.combine(1.0, 0.5, 0, 0);
1025        // Linear: 1.0 * 0.7 + 0.5 * 0.3 = 0.7 + 0.15 = 0.85
1026        assert!((score - 0.85).abs() < 0.001);
1027    }
1028
1029    #[test]
1030    fn test_fusion_dbsf() {
1031        let fusion = LocalizationFusion::DBSF;
1032        let score = fusion.combine(0.8, 0.6, 0, 0);
1033        // DBSF: (0.8 + 0.6) / 2 = 0.7
1034        assert!((score - 0.7).abs() < 0.001);
1035    }
1036
1037    #[test]
1038    fn test_fusion_sbfl_only() {
1039        let fusion = LocalizationFusion::SbflOnly;
1040        let score = fusion.combine(0.9, 0.5, 0, 0);
1041        assert!((score - 0.9).abs() < 0.001);
1042    }
1043
1044    // ============ RagLocalizationConfig Tests ============
1045
1046    #[test]
1047    fn test_config_defaults() {
1048        let config = RagLocalizationConfig::default();
1049        assert_eq!(config.top_n, 10);
1050        assert_eq!(config.similar_bugs_k, 5);
1051        assert_eq!(config.fix_suggestions_k, 3);
1052        assert!(config.include_explanations);
1053    }
1054
1055    #[test]
1056    fn test_config_builder() {
1057        let config = RagLocalizationConfig::new()
1058            .with_formula(SbflFormula::Tarantula)
1059            .with_top_n(20)
1060            .with_similar_bugs(10)
1061            .with_fix_suggestions(5)
1062            .with_fusion(LocalizationFusion::Linear { sbfl_weight: 0.8 })
1063            .with_explanations(false);
1064
1065        assert!(matches!(config.sbfl_formula, SbflFormula::Tarantula));
1066        assert_eq!(config.top_n, 20);
1067        assert_eq!(config.similar_bugs_k, 10);
1068        assert_eq!(config.fix_suggestions_k, 5);
1069        assert!(!config.include_explanations);
1070    }
1071
1072    // ============ RagFaultLocalizer Tests ============
1073
1074    #[test]
1075    fn test_rag_localizer_creation() {
1076        let kb = BugKnowledgeBase::new();
1077        let localizer = RagFaultLocalizer::with_knowledge_base(kb);
1078        assert!(localizer.knowledge_base().is_empty());
1079    }
1080
1081    #[test]
1082    fn test_rag_localizer_builder() {
1083        let mut kb = BugKnowledgeBase::new();
1084        kb.add_bug(BugDocument::new(
1085            "bug-001",
1086            "Test",
1087            DefectCategory::TypeErrors,
1088        ));
1089
1090        let localizer = RagFaultLocalizerBuilder::new()
1091            .knowledge_base(kb)
1092            .formula(SbflFormula::Ochiai)
1093            .top_n(5)
1094            .similar_bugs(3)
1095            .fusion(LocalizationFusion::RRF { k: 60.0 })
1096            .with_explanations()
1097            .build();
1098
1099        assert_eq!(localizer.knowledge_base().len(), 1);
1100    }
1101
1102    #[test]
1103    fn test_rag_localizer_localize() {
1104        let mut kb = BugKnowledgeBase::new();
1105        kb.add_bug(
1106            BugDocument::new(
1107                "bug-001",
1108                "Null pointer in parser",
1109                DefectCategory::MemorySafety,
1110            )
1111            .with_description("Parser crashes on null input")
1112            .with_fix_pattern("Add null check"),
1113        );
1114
1115        let localizer = RagFaultLocalizer::with_knowledge_base(kb);
1116
1117        let coverage = vec![
1118            StatementCoverage::new(StatementId::new("src/parser.rs", 10), 5, 8),
1119            StatementCoverage::new(StatementId::new("src/parser.rs", 20), 90, 2),
1120        ];
1121
1122        let result = localizer.localize(&coverage, 100, 10);
1123
1124        assert!(!result.rankings.is_empty());
1125        assert_eq!(result.knowledge_base_size, 1);
1126        assert_eq!(result.fusion_strategy, "RRF");
1127    }
1128
1129    #[test]
1130    fn test_rag_localizer_empty_kb() {
1131        let kb = BugKnowledgeBase::new();
1132        let localizer = RagFaultLocalizer::with_knowledge_base(kb);
1133
1134        let coverage = vec![StatementCoverage::new(
1135            StatementId::new("src/test.rs", 10),
1136            5,
1137            8,
1138        )];
1139
1140        let result = localizer.localize(&coverage, 100, 10);
1141
1142        assert!(!result.rankings.is_empty());
1143        assert!(result.rankings[0].similar_bugs.is_empty());
1144        assert_eq!(result.knowledge_base_size, 0);
1145    }
1146
1147    // ============ RagReportGenerator Tests ============
1148
1149    #[test]
1150    fn test_report_generator_terminal() {
1151        let mut kb = BugKnowledgeBase::new();
1152        kb.add_bug(BugDocument::new(
1153            "bug-001",
1154            "Test bug",
1155            DefectCategory::TypeErrors,
1156        ));
1157
1158        let localizer = RagFaultLocalizer::with_knowledge_base(kb);
1159        let coverage = vec![StatementCoverage::new(
1160            StatementId::new("src/test.rs", 10),
1161            5,
1162            8,
1163        )];
1164        let result = localizer.localize(&coverage, 100, 10);
1165
1166        let report = RagReportGenerator::to_terminal(&result);
1167        assert!(report.contains("RAG-ENHANCED"));
1168        assert!(report.contains("SBFL Formula"));
1169    }
1170
1171    #[test]
1172    fn test_report_generator_json() {
1173        let kb = BugKnowledgeBase::new();
1174        let localizer = RagFaultLocalizer::with_knowledge_base(kb);
1175        let coverage = vec![StatementCoverage::new(
1176            StatementId::new("src/test.rs", 10),
1177            5,
1178            8,
1179        )];
1180        let result = localizer.localize(&coverage, 100, 10);
1181
1182        let json = RagReportGenerator::to_json(&result).unwrap();
1183        assert!(json.contains("rankings"));
1184        assert!(json.contains("fusion_strategy"));
1185    }
1186
1187    // ============ SimilarBug Tests ============
1188
1189    #[test]
1190    fn test_similar_bug_serialization() {
1191        let bug = SimilarBug {
1192            id: "bug-001".to_string(),
1193            similarity: 0.85,
1194            category: DefectCategory::MemorySafety,
1195            summary: "Null pointer".to_string(),
1196            fix_commit: "abc123".to_string(),
1197        };
1198
1199        let json = serde_json::to_string(&bug).unwrap();
1200        assert!(json.contains("bug-001"));
1201        assert!(json.contains("0.85"));
1202    }
1203
1204    // ============ SuggestedFix Tests ============
1205
1206    #[test]
1207    fn test_suggested_fix_serialization() {
1208        let fix = SuggestedFix {
1209            pattern: "Add null check".to_string(),
1210            confidence: 0.9,
1211            example: "if x.is_some() { ... }".to_string(),
1212            source_bug_id: "bug-001".to_string(),
1213        };
1214
1215        let json = serde_json::to_string(&fix).unwrap();
1216        assert!(json.contains("Add null check"));
1217        assert!(json.contains("0.9"));
1218    }
1219}