Skip to main content

bvr/analysis/
correlation.rs

1use std::collections::BTreeMap;
2use std::fs::{File, OpenOptions};
3use std::io::{BufRead, BufReader, Write};
4use std::path::{Path, PathBuf};
5
6use chrono::Utc;
7use serde::{Deserialize, Serialize};
8
9use super::git_history::HistoryCommitCompat;
10
11// ---------------------------------------------------------------------------
12// Types
13// ---------------------------------------------------------------------------
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
16#[serde(rename_all = "snake_case")]
17pub enum FeedbackType {
18    Confirm,
19    Reject,
20    Ignore,
21}
22
23impl std::fmt::Display for FeedbackType {
24    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
25        match self {
26            Self::Confirm => write!(f, "confirm"),
27            Self::Reject => write!(f, "reject"),
28            Self::Ignore => write!(f, "ignore"),
29        }
30    }
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct CorrelationFeedback {
35    pub commit_sha: String,
36    pub bead_id: String,
37    pub feedback_at: String,
38    pub feedback_by: String,
39    #[serde(rename = "type")]
40    pub feedback_type: FeedbackType,
41    pub reason: String,
42    pub original_conf: f64,
43}
44
45#[derive(Debug, Clone, Serialize)]
46pub struct FeedbackStats {
47    pub total_feedback: usize,
48    pub confirmed: usize,
49    pub rejected: usize,
50    pub ignored: usize,
51    pub accuracy_rate: f64,
52    pub avg_confirm_conf: f64,
53    pub avg_reject_conf: f64,
54}
55
56// ---------------------------------------------------------------------------
57// Signal and Explanation types
58// ---------------------------------------------------------------------------
59
60#[derive(Debug, Clone, Serialize)]
61pub struct CorrelationSignal {
62    #[serde(rename = "type")]
63    pub signal_type: String,
64    pub weight: u32,
65    pub detail: String,
66}
67
68#[derive(Debug, Clone, Serialize)]
69pub struct CorrelationExplanation {
70    pub commit_sha: String,
71    pub bead_id: String,
72    pub confidence: f64,
73    pub confidence_pct: u32,
74    pub level: String,
75    pub method: String,
76    pub signals: Vec<CorrelationSignal>,
77    pub total_weight: u32,
78    pub summary: String,
79    pub recommendation: String,
80}
81
82// ---------------------------------------------------------------------------
83// FeedbackStore
84// ---------------------------------------------------------------------------
85
86pub struct FeedbackStore {
87    path: PathBuf,
88    cache: BTreeMap<String, CorrelationFeedback>,
89}
90
91fn cache_key(commit_sha: &str, bead_id: &str) -> String {
92    format!("{commit_sha}:{bead_id}")
93}
94
95impl FeedbackStore {
96    /// Open (or create) a feedback store backed by the given JSONL file.
97    pub fn open(path: &Path) -> crate::Result<Self> {
98        let mut cache = BTreeMap::new();
99
100        if path.exists() {
101            let file = File::open(path)?;
102            let reader = BufReader::new(file);
103            for line in reader.lines() {
104                let line = line?;
105                let line = line.trim();
106                if line.is_empty() {
107                    continue;
108                }
109                match serde_json::from_str::<CorrelationFeedback>(line) {
110                    Ok(entry) => {
111                        let key = cache_key(&entry.commit_sha, &entry.bead_id);
112                        cache.insert(key, entry);
113                    }
114                    Err(err) => {
115                        tracing::warn!(
116                            "skipping malformed feedback line in {}: {err}",
117                            path.display()
118                        );
119                    }
120                }
121            }
122        }
123
124        Ok(Self {
125            path: path.to_path_buf(),
126            cache,
127        })
128    }
129
130    /// Record feedback, appending to the JSONL file and updating the cache.
131    pub fn save(&mut self, feedback: &CorrelationFeedback) -> crate::Result<()> {
132        if let Some(parent) = self.path.parent() {
133            std::fs::create_dir_all(parent)?;
134        }
135
136        let mut file = OpenOptions::new()
137            .create(true)
138            .append(true)
139            .open(&self.path)?;
140
141        let json = serde_json::to_string(feedback)?;
142        writeln!(file, "{json}")?;
143
144        let key = cache_key(&feedback.commit_sha, &feedback.bead_id);
145        self.cache.insert(key, feedback.clone());
146        Ok(())
147    }
148
149    /// Convenience: record a confirmation.
150    pub fn confirm(
151        &mut self,
152        commit_sha: &str,
153        bead_id: &str,
154        by: &str,
155        original_conf: f64,
156        reason: &str,
157    ) -> crate::Result<CorrelationFeedback> {
158        let feedback = CorrelationFeedback {
159            commit_sha: commit_sha.to_string(),
160            bead_id: bead_id.to_string(),
161            feedback_at: Utc::now().to_rfc3339(),
162            feedback_by: by.to_string(),
163            feedback_type: FeedbackType::Confirm,
164            reason: reason.to_string(),
165            original_conf,
166        };
167        self.save(&feedback)?;
168        Ok(feedback)
169    }
170
171    /// Convenience: record a rejection.
172    pub fn reject(
173        &mut self,
174        commit_sha: &str,
175        bead_id: &str,
176        by: &str,
177        original_conf: f64,
178        reason: &str,
179    ) -> crate::Result<CorrelationFeedback> {
180        let feedback = CorrelationFeedback {
181            commit_sha: commit_sha.to_string(),
182            bead_id: bead_id.to_string(),
183            feedback_at: Utc::now().to_rfc3339(),
184            feedback_by: by.to_string(),
185            feedback_type: FeedbackType::Reject,
186            reason: reason.to_string(),
187            original_conf,
188        };
189        self.save(&feedback)?;
190        Ok(feedback)
191    }
192
193    /// Look up existing feedback for a commit+bead pair.
194    #[must_use]
195    pub fn get(&self, commit_sha: &str, bead_id: &str) -> Option<&CorrelationFeedback> {
196        self.cache.get(&cache_key(commit_sha, bead_id))
197    }
198
199    /// Check if feedback exists for a commit+bead pair.
200    #[must_use]
201    pub fn has_feedback(&self, commit_sha: &str, bead_id: &str) -> bool {
202        self.cache.contains_key(&cache_key(commit_sha, bead_id))
203    }
204
205    /// All feedback entries for a specific bead.
206    #[must_use]
207    pub fn get_by_bead(&self, bead_id: &str) -> Vec<&CorrelationFeedback> {
208        self.cache
209            .values()
210            .filter(|entry| entry.bead_id == bead_id)
211            .collect()
212    }
213
214    /// Compute aggregate statistics.
215    #[must_use]
216    pub fn stats(&self) -> FeedbackStats {
217        let mut confirmed = 0usize;
218        let mut rejected = 0usize;
219        let mut ignored = 0usize;
220        let mut confirm_conf_sum = 0.0_f64;
221        let mut reject_conf_sum = 0.0_f64;
222
223        for entry in self.cache.values() {
224            match entry.feedback_type {
225                FeedbackType::Confirm => {
226                    confirmed += 1;
227                    confirm_conf_sum += entry.original_conf;
228                }
229                FeedbackType::Reject => {
230                    rejected += 1;
231                    reject_conf_sum += entry.original_conf;
232                }
233                FeedbackType::Ignore => {
234                    ignored += 1;
235                }
236            }
237        }
238
239        let total = confirmed + rejected + ignored;
240        let accuracy_rate = if confirmed + rejected > 0 {
241            confirmed as f64 / (confirmed + rejected) as f64
242        } else {
243            0.0
244        };
245        let avg_confirm_conf = if confirmed > 0 {
246            confirm_conf_sum / confirmed as f64
247        } else {
248            0.0
249        };
250        let avg_reject_conf = if rejected > 0 {
251            reject_conf_sum / rejected as f64
252        } else {
253            0.0
254        };
255
256        FeedbackStats {
257            total_feedback: total,
258            confirmed,
259            rejected,
260            ignored,
261            accuracy_rate,
262            avg_confirm_conf,
263            avg_reject_conf,
264        }
265    }
266}
267
268// ---------------------------------------------------------------------------
269// Explanation builder
270// ---------------------------------------------------------------------------
271
272/// Map a confidence score to a human-readable level string.
273#[must_use]
274pub fn confidence_level(confidence: f64) -> &'static str {
275    if confidence >= 0.9 {
276        "very high"
277    } else if confidence >= 0.75 {
278        "high"
279    } else if confidence >= 0.5 {
280        "moderate"
281    } else if confidence >= 0.3 {
282        "low"
283    } else {
284        "very low"
285    }
286}
287
288/// Build a detailed explanation for a commit-bead correlation.
289#[must_use]
290pub fn build_explanation(
291    commit: &HistoryCommitCompat,
292    bead_id: &str,
293    existing_feedback: Option<&CorrelationFeedback>,
294) -> CorrelationExplanation {
295    let mut signals = Vec::new();
296
297    // Primary signal based on correlation method
298    match commit.method.as_str() {
299        "co_committed" => {
300            signals.push(CorrelationSignal {
301                signal_type: "co_commit".to_string(),
302                weight: 50,
303                detail: "Commit modified both code and beads file together".to_string(),
304            });
305        }
306        "explicit_id" => {
307            signals.push(CorrelationSignal {
308                signal_type: "message_match".to_string(),
309                weight: 40,
310                detail: format!("Commit message references bead ID '{bead_id}'"),
311            });
312        }
313        "temporal_author" => {
314            signals.push(CorrelationSignal {
315                signal_type: "timing".to_string(),
316                weight: 25,
317                detail: "Commit within bead's active time window".to_string(),
318            });
319            signals.push(CorrelationSignal {
320                signal_type: "author_match".to_string(),
321                weight: 15,
322                detail: format!("Same author: {}", commit.author),
323            });
324        }
325        _ => {
326            signals.push(CorrelationSignal {
327                signal_type: "unknown".to_string(),
328                weight: 10,
329                detail: format!("Correlation method: {}", commit.method),
330            });
331        }
332    }
333
334    // File overlap signal
335    let file_count = commit.files.len();
336    if file_count > 0 {
337        let file_count_u32 = u32::try_from(file_count).unwrap_or(u32::MAX);
338        let weight = file_count_u32.saturating_mul(5).min(15);
339        signals.push(CorrelationSignal {
340            signal_type: "file_overlap".to_string(),
341            weight,
342            detail: format!("{file_count} file(s) modified in this commit"),
343        });
344    }
345
346    let total_weight: u32 = signals.iter().map(|s| s.weight).sum();
347    let level = confidence_level(commit.confidence);
348    let confidence_pct = format!("{:.0}", (commit.confidence * 100.0).clamp(0.0, 100.0))
349        .parse::<u32>()
350        .unwrap_or(0);
351
352    let summary = format!(
353        "{} with bead update ({confidence_pct}% confidence, {} signal{})",
354        match commit.method.as_str() {
355            "co_committed" => "Co-committed",
356            "explicit_id" => "Explicit ID reference",
357            "temporal_author" => "Temporal+author match",
358            _ => "Unknown method",
359        },
360        signals.len(),
361        if signals.len() == 1 { "" } else { "s" }
362    );
363
364    let recommendation = existing_feedback.map_or_else(
365        || {
366            if commit.confidence >= 0.75 {
367                "High confidence - likely correct, no action needed".to_string()
368            } else if commit.confidence >= 0.5 {
369                "Moderate confidence - review recommended".to_string()
370            } else {
371                "Low confidence - manual verification suggested".to_string()
372            }
373        },
374        |fb| {
375            format!(
376                "Already {} by {} at {}",
377                fb.feedback_type, fb.feedback_by, fb.feedback_at
378            )
379        },
380    );
381
382    CorrelationExplanation {
383        commit_sha: commit.sha.clone(),
384        bead_id: bead_id.to_string(),
385        confidence: commit.confidence,
386        confidence_pct,
387        level: level.to_string(),
388        method: commit.method.clone(),
389        signals,
390        total_weight,
391        summary,
392        recommendation,
393    }
394}
395
396/// Parse a `SHA:beadID` argument into (`commit_sha`, `bead_id`).
397pub fn parse_correlation_arg(arg: &str) -> crate::Result<(String, String)> {
398    let parts: Vec<&str> = arg.splitn(2, ':').collect();
399    if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
400        return Err(crate::BvrError::InvalidArgument(format!(
401            "Expected format SHA:beadID, got '{arg}'"
402        )));
403    }
404    Ok((parts[0].to_string(), parts[1].to_string()))
405}
406
407/// Default feedback file path: `.beads/correlation_feedback.jsonl` relative to repo root.
408#[must_use]
409pub fn default_feedback_path(repo_root: &Path) -> PathBuf {
410    repo_root.join(".beads").join("correlation_feedback.jsonl")
411}
412
413// ---------------------------------------------------------------------------
414// Robot output structs
415// ---------------------------------------------------------------------------
416
417#[derive(Debug, Serialize)]
418pub struct RobotExplainOutput {
419    #[serde(flatten)]
420    pub envelope: crate::robot::RobotEnvelope,
421    pub explanation: CorrelationExplanation,
422}
423
424#[derive(Debug, Serialize)]
425pub struct RobotCorrelationActionOutput {
426    pub status: String,
427    pub commit: String,
428    pub bead: String,
429    pub by: String,
430    pub reason: String,
431    pub orig_conf: f64,
432}
433
434#[derive(Debug, Serialize)]
435pub struct RobotCorrelationStatsOutput {
436    #[serde(flatten)]
437    pub envelope: crate::robot::RobotEnvelope,
438    #[serde(flatten)]
439    pub stats: FeedbackStats,
440}
441
442// ---------------------------------------------------------------------------
443// Tests
444// ---------------------------------------------------------------------------
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449    use crate::analysis::git_history::{HistoryCommitCompat, HistoryFileChangeCompat};
450    use tempfile::TempDir;
451
452    fn make_commit(method: &str, confidence: f64) -> HistoryCommitCompat {
453        HistoryCommitCompat {
454            sha: "abc123def456".to_string(),
455            short_sha: "abc123d".to_string(),
456            message: "feat(bd-test): implement feature".to_string(),
457            author: "TestUser".to_string(),
458            author_email: "test@example.com".to_string(),
459            timestamp: "2026-01-15T10:00:00Z".to_string(),
460            files: vec![HistoryFileChangeCompat {
461                path: "src/main.rs".to_string(),
462                action: "M".to_string(),
463                insertions: 10,
464                deletions: 2,
465            }],
466            method: method.to_string(),
467            confidence,
468            reason: "test reason".to_string(),
469            field_changes: vec![],
470            bead_diff_lines: vec![],
471        }
472    }
473
474    #[test]
475    fn feedback_store_roundtrip() {
476        let dir = TempDir::new().unwrap();
477        let path = dir.path().join("feedback.jsonl");
478
479        {
480            let mut store = FeedbackStore::open(&path).unwrap();
481            assert_eq!(store.stats().total_feedback, 0);
482
483            store
484                .confirm("sha1", "bd-1", "agent-a", 0.9, "looks good")
485                .unwrap();
486            store
487                .reject("sha2", "bd-2", "agent-b", 0.3, "false positive")
488                .unwrap();
489
490            assert!(store.has_feedback("sha1", "bd-1"));
491            assert!(!store.has_feedback("sha1", "bd-2"));
492
493            let stats = store.stats();
494            assert_eq!(stats.total_feedback, 2);
495            assert_eq!(stats.confirmed, 1);
496            assert_eq!(stats.rejected, 1);
497        }
498
499        // Reopen store and verify persistence
500        {
501            let store = FeedbackStore::open(&path).unwrap();
502            assert_eq!(store.stats().total_feedback, 2);
503            assert!(store.has_feedback("sha1", "bd-1"));
504
505            let fb = store.get("sha1", "bd-1").unwrap();
506            assert_eq!(fb.feedback_type, FeedbackType::Confirm);
507            assert_eq!(fb.feedback_by, "agent-a");
508        }
509    }
510
511    #[test]
512    fn feedback_store_get_by_bead() {
513        let dir = TempDir::new().unwrap();
514        let path = dir.path().join("feedback.jsonl");
515
516        let mut store = FeedbackStore::open(&path).unwrap();
517        store.confirm("sha1", "bd-1", "agent", 0.8, "").unwrap();
518        store.confirm("sha2", "bd-1", "agent", 0.7, "").unwrap();
519        store.confirm("sha3", "bd-2", "agent", 0.9, "").unwrap();
520
521        let entries = store.get_by_bead("bd-1");
522        assert_eq!(entries.len(), 2);
523    }
524
525    #[test]
526    fn feedback_stats_accuracy() {
527        let dir = TempDir::new().unwrap();
528        let path = dir.path().join("feedback.jsonl");
529
530        let mut store = FeedbackStore::open(&path).unwrap();
531        for i in 0..8 {
532            store
533                .confirm(&format!("sha-c{i}"), "bd-1", "agent", 0.8, "")
534                .unwrap();
535        }
536        for i in 0..2 {
537            store
538                .reject(&format!("sha-r{i}"), "bd-1", "agent", 0.3, "")
539                .unwrap();
540        }
541
542        let stats = store.stats();
543        assert_eq!(stats.confirmed, 8);
544        assert_eq!(stats.rejected, 2);
545        assert!((stats.accuracy_rate - 0.8).abs() < 0.001);
546        assert!((stats.avg_confirm_conf - 0.8).abs() < 0.001);
547        assert!((stats.avg_reject_conf - 0.3).abs() < 0.001);
548    }
549
550    #[test]
551    fn explanation_co_committed() {
552        let commit = make_commit("co_committed", 0.95);
553        let explanation = build_explanation(&commit, "bd-test", None);
554
555        assert_eq!(explanation.level, "very high");
556        assert_eq!(explanation.confidence_pct, 95);
557        assert!(
558            explanation
559                .signals
560                .iter()
561                .any(|s| s.signal_type == "co_commit")
562        );
563        assert!(explanation.recommendation.contains("likely correct"));
564    }
565
566    #[test]
567    fn explanation_explicit_id() {
568        let commit = make_commit("explicit_id", 0.75);
569        let explanation = build_explanation(&commit, "bd-test", None);
570
571        assert_eq!(explanation.level, "high");
572        assert!(
573            explanation
574                .signals
575                .iter()
576                .any(|s| s.signal_type == "message_match")
577        );
578    }
579
580    #[test]
581    fn explanation_low_confidence() {
582        let commit = make_commit("temporal_author", 0.25);
583        let explanation = build_explanation(&commit, "bd-test", None);
584
585        assert_eq!(explanation.level, "very low");
586        assert!(explanation.recommendation.contains("manual verification"));
587    }
588
589    #[test]
590    fn explanation_with_existing_feedback() {
591        let commit = make_commit("co_committed", 0.95);
592        let fb = CorrelationFeedback {
593            commit_sha: "abc123def456".to_string(),
594            bead_id: "bd-test".to_string(),
595            feedback_at: "2026-01-15T12:00:00Z".to_string(),
596            feedback_by: "agent-x".to_string(),
597            feedback_type: FeedbackType::Confirm,
598            reason: "verified".to_string(),
599            original_conf: 0.95,
600        };
601        let explanation = build_explanation(&commit, "bd-test", Some(&fb));
602
603        assert!(explanation.recommendation.contains("Already confirm"));
604        assert!(explanation.recommendation.contains("agent-x"));
605    }
606
607    #[test]
608    fn parse_correlation_arg_valid() {
609        let (sha, bead) = parse_correlation_arg("abc123:bd-test").unwrap();
610        assert_eq!(sha, "abc123");
611        assert_eq!(bead, "bd-test");
612    }
613
614    #[test]
615    fn parse_correlation_arg_invalid() {
616        assert!(parse_correlation_arg("no-colon").is_err());
617        assert!(parse_correlation_arg(":bd-test").is_err());
618        assert!(parse_correlation_arg("sha:").is_err());
619    }
620
621    #[test]
622    fn confidence_level_boundaries() {
623        assert_eq!(confidence_level(0.95), "very high");
624        assert_eq!(confidence_level(0.90), "very high");
625        assert_eq!(confidence_level(0.89), "high");
626        assert_eq!(confidence_level(0.75), "high");
627        assert_eq!(confidence_level(0.74), "moderate");
628        assert_eq!(confidence_level(0.50), "moderate");
629        assert_eq!(confidence_level(0.49), "low");
630        assert_eq!(confidence_level(0.30), "low");
631        assert_eq!(confidence_level(0.29), "very low");
632        assert_eq!(confidence_level(0.0), "very low");
633    }
634
635    #[test]
636    fn empty_feedback_store_stats() {
637        let dir = TempDir::new().unwrap();
638        let path = dir.path().join("nonexistent.jsonl");
639        let store = FeedbackStore::open(&path).unwrap();
640        let stats = store.stats();
641
642        assert_eq!(stats.total_feedback, 0);
643        assert!((stats.accuracy_rate - 0.0).abs() < f64::EPSILON);
644    }
645
646    #[test]
647    fn ignore_feedback_counted_in_stats() {
648        let dir = TempDir::new().unwrap();
649        let path = dir.path().join("feedback.jsonl");
650        let mut store = FeedbackStore::open(&path).unwrap();
651
652        store.confirm("sha1", "bd-1", "agent", 0.8, "good").unwrap();
653        store.reject("sha2", "bd-2", "agent", 0.3, "bad").unwrap();
654        // Manually save an Ignore entry since there's no convenience method.
655        let ignore_fb = CorrelationFeedback {
656            commit_sha: "sha3".to_string(),
657            bead_id: "bd-3".to_string(),
658            feedback_at: chrono::Utc::now().to_rfc3339(),
659            feedback_by: "agent".to_string(),
660            feedback_type: FeedbackType::Ignore,
661            reason: "not relevant".to_string(),
662            original_conf: 0.5,
663        };
664        store.save(&ignore_fb).unwrap();
665
666        let stats = store.stats();
667        assert_eq!(stats.total_feedback, 3);
668        assert_eq!(stats.confirmed, 1);
669        assert_eq!(stats.rejected, 1);
670        assert_eq!(stats.ignored, 1);
671        // accuracy_rate only considers confirmed / (confirmed + rejected)
672        assert!((stats.accuracy_rate - 0.5).abs() < f64::EPSILON);
673    }
674
675    #[test]
676    fn all_ignored_yields_zero_accuracy() {
677        let dir = TempDir::new().unwrap();
678        let path = dir.path().join("feedback.jsonl");
679        let mut store = FeedbackStore::open(&path).unwrap();
680
681        let fb = CorrelationFeedback {
682            commit_sha: "sha1".to_string(),
683            bead_id: "bd-1".to_string(),
684            feedback_at: chrono::Utc::now().to_rfc3339(),
685            feedback_by: "agent".to_string(),
686            feedback_type: FeedbackType::Ignore,
687            reason: String::new(),
688            original_conf: 0.5,
689        };
690        store.save(&fb).unwrap();
691
692        let stats = store.stats();
693        assert_eq!(stats.total_feedback, 1);
694        assert_eq!(stats.ignored, 1);
695        assert!((stats.accuracy_rate - 0.0).abs() < f64::EPSILON);
696    }
697}