Skip to main content

synwire_agent/sbfl/
ochiai.rs

1//! Ochiai coefficient for SBFL ranking.
2
3use std::collections::HashMap;
4
5/// Coverage data for a single line in a source file.
6///
7/// Field naming follows standard SBFL notation:
8/// `ef` / `ep` = failing / passing tests that **execute** the line;
9/// `nf` / `np` = failing / passing tests that do **not** execute the line.
10#[non_exhaustive]
11pub struct CoverageRecord {
12    /// File path.
13    pub file: String,
14    /// Line number.
15    pub line: u32,
16    /// Number of failing tests that cover this line.
17    pub ef: u32,
18    /// Number of passing tests that cover this line.
19    pub ep: u32,
20    /// Number of failing tests that do NOT cover this line.
21    pub nf: u32,
22    /// Number of passing tests that do NOT cover this line.
23    pub np: u32,
24}
25
26impl CoverageRecord {
27    /// Create a new coverage record for a single source line.
28    pub const fn new(file: String, line: u32, ef: u32, ep: u32, nf: u32, np: u32) -> Self {
29        Self {
30            file,
31            line,
32            ef,
33            ep,
34            nf,
35            np,
36        }
37    }
38}
39
40/// Compute the Ochiai SBFL coefficient.
41///
42/// Higher scores indicate higher fault likelihood.
43///
44/// Formula: `ef / sqrt((ef + nf) * (ef + ep))`
45///
46/// # Examples
47///
48/// ```
49/// use synwire_agent::sbfl::ochiai;
50/// assert_eq!(ochiai(0, 0, 5), 0.0);
51/// let score = ochiai(10, 0, 0);
52/// assert!(score > 0.9, "expected high score, got {score}");
53/// ```
54#[allow(clippy::cast_precision_loss, clippy::similar_names)]
55pub fn ochiai(ef: u32, nf: u32, ep: u32) -> f32 {
56    if ef == 0 {
57        return 0.0;
58    }
59    let ef_f = ef as f32;
60    let nf_f = nf as f32;
61    let ep_f = ep as f32;
62    ef_f / ((ef_f + nf_f) * (ef_f + ep_f)).sqrt()
63}
64
65/// Ranks source files by their maximum Ochiai score.
66pub struct SbflRanker {
67    records: Vec<CoverageRecord>,
68}
69
70impl SbflRanker {
71    /// Create a ranker from coverage records.
72    pub const fn new(records: Vec<CoverageRecord>) -> Self {
73        Self { records }
74    }
75
76    /// Rank files by highest Ochiai score across all their lines.
77    ///
78    /// Returns `(file_path, max_ochiai_score)` sorted by score descending.
79    pub fn rank_files(&self) -> Vec<(String, f32)> {
80        let mut file_scores: HashMap<String, f32> = HashMap::new();
81        for record in &self.records {
82            let score = ochiai(record.ef, record.nf, record.ep);
83            let _ = file_scores
84                .entry(record.file.clone())
85                .and_modify(|s| {
86                    if score > *s {
87                        *s = score;
88                    }
89                })
90                .or_insert(score);
91        }
92        let mut ranked: Vec<_> = file_scores.into_iter().collect();
93        ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
94        ranked
95    }
96}
97
98/// Fuse SBFL file rankings with semantic search results.
99///
100/// Takes SBFL file scores and semantic similarity scores, returns combined ranking.
101/// `sbfl_weight` controls the relative contribution of SBFL scores; semantic
102/// weight is `1.0 - sbfl_weight`.
103pub fn fuse_sbfl_semantic(
104    sbfl: &[(String, f32)],
105    semantic: &[(String, f32)],
106    sbfl_weight: f32,
107) -> Vec<(String, f32)> {
108    let mut combined: HashMap<String, f32> = HashMap::new();
109
110    for (file, score) in sbfl {
111        let _ = combined
112            .entry(file.clone())
113            .and_modify(|s| *s += sbfl_weight * score)
114            .or_insert(sbfl_weight * score);
115    }
116
117    let semantic_weight = 1.0 - sbfl_weight;
118    for (file, score) in semantic {
119        let _ = combined
120            .entry(file.clone())
121            .and_modify(|s| *s += semantic_weight * score)
122            .or_insert(semantic_weight * score);
123    }
124
125    let mut result: Vec<_> = combined.into_iter().collect();
126    result.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
127    result
128}
129
130#[cfg(test)]
131#[allow(clippy::similar_names, clippy::float_cmp)]
132mod tests {
133    use super::*;
134
135    #[test]
136    fn ochiai_zero_when_no_failures() {
137        assert_eq!(ochiai(0, 0, 5), 0.0);
138    }
139
140    #[test]
141    fn ochiai_high_score_for_fault_concentrated() {
142        // ef=10 (all failing tests hit it), nf=0, np=0
143        let score = ochiai(10, 0, 0);
144        assert!(score > 0.9, "expected > 0.9, got {score}");
145    }
146
147    #[test]
148    fn sbfl_ranks_correctly() {
149        let records = vec![
150            CoverageRecord {
151                file: "buggy.rs".to_owned(),
152                line: 42,
153                ef: 8,
154                ep: 2,
155                nf: 0,
156                np: 0,
157            },
158            CoverageRecord {
159                file: "clean.rs".to_owned(),
160                line: 10,
161                ef: 0,
162                ep: 5,
163                nf: 0,
164                np: 5,
165            },
166        ];
167        let ranker = SbflRanker::new(records);
168        let ranked = ranker.rank_files();
169        assert_eq!(ranked[0].0, "buggy.rs");
170    }
171}