1use anyhow::Result;
23use serde::{Deserialize, Serialize};
24
25use super::record::{
26 Category, ConfidenceScore, Priority, QualityScore, Record, RecordLifecycle, RecordSource,
27 RecordVersion, StalenessScore,
28};
29use super::session::now_secs;
30use super::Store;
31use crate::health::enrichment::EnrichmentDepth;
32
33pub const EXTRACTION_PREFIX: &str = "analytics:extraction:";
35
36pub const ENRICHED_TAG: &str = "enriched";
38
39pub const DEPTH_TAG_PREFIX: &str = "depth:";
41
42pub const SIGNAL_SOURCE_TAG_PREFIX: &str = "signal-source:";
47
48pub const NEG_EXEMPLAR_TAG: &str = "with-neg-exemplars";
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
54#[serde(rename_all = "snake_case")]
55pub enum ExtractionOutcome {
56 Pending,
58 Confirmed,
60 Tombstoned,
62}
63
64#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
69#[serde(rename_all = "snake_case")]
70pub enum SignalSource {
71 Ast,
74 Llm,
76}
77
78impl SignalSource {
79 pub fn as_str(self) -> &'static str {
80 match self {
81 SignalSource::Ast => "ast",
82 SignalSource::Llm => "llm",
83 }
84 }
85}
86
87#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
93pub struct ExtractionConfig {
94 pub signal_source: SignalSource,
95 pub with_negative_exemplars: bool,
96}
97
98impl Default for ExtractionConfig {
99 fn default() -> Self {
100 Self {
101 signal_source: SignalSource::Llm,
102 with_negative_exemplars: false,
103 }
104 }
105}
106
107impl ExtractionConfig {
108 pub fn label(&self) -> String {
112 format!(
113 "{}+{}",
114 self.signal_source.as_str(),
115 if self.with_negative_exemplars {
116 "neg"
117 } else {
118 "no_neg"
119 }
120 )
121 }
122}
123
124#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
128pub struct ExtractionRecord {
129 pub gotcha_key: String,
130 pub depth: Option<EnrichmentDepth>,
133 pub file_path: String,
136 pub created_at: u64,
137 pub outcome: ExtractionOutcome,
138 pub outcome_at: Option<u64>,
140 #[serde(default)]
144 pub config: ExtractionConfig,
145}
146
147impl ExtractionRecord {
148 pub fn days_to_outcome(&self) -> Option<i64> {
150 self.outcome_at.map(|t| {
151 let delta = t.saturating_sub(self.created_at);
152 (delta / 86_400) as i64
153 })
154 }
155}
156
157pub fn key_for(gotcha_key: &str) -> String {
159 let slug = gotcha_key.strip_prefix("gotcha:").unwrap_or(gotcha_key);
160 format!("{EXTRACTION_PREFIX}{slug}")
161}
162
163#[derive(Debug, Clone, Copy, PartialEq)]
165pub struct TagClassification {
166 pub is_enriched: bool,
167 pub depth: Option<EnrichmentDepth>,
168 pub config: ExtractionConfig,
169}
170
171pub fn classify_tags(tags: &[String]) -> TagClassification {
179 let mut is_enriched = false;
180 let mut depth = None;
181 let mut config = ExtractionConfig::default();
182 for tag in tags {
183 if tag == ENRICHED_TAG {
184 is_enriched = true;
185 } else if tag == NEG_EXEMPLAR_TAG {
186 config.with_negative_exemplars = true;
187 } else if let Some(rest) = tag.strip_prefix(DEPTH_TAG_PREFIX) {
188 depth = match rest {
189 "fast" => Some(EnrichmentDepth::Fast),
190 "standard" => Some(EnrichmentDepth::Standard),
191 "deep" => Some(EnrichmentDepth::Deep),
192 _ => None,
193 };
194 } else if let Some(rest) = tag.strip_prefix(SIGNAL_SOURCE_TAG_PREFIX) {
195 config.signal_source = match rest {
196 "ast" => SignalSource::Ast,
197 "llm" => SignalSource::Llm,
198 _ => config.signal_source,
199 };
200 }
201 }
202 TagClassification {
203 is_enriched,
204 depth,
205 config,
206 }
207}
208
209pub async fn write_on_extraction(
216 store: &Store,
217 gotcha_key: &str,
218 tags: &[String],
219 affected_files: &[String],
220) -> Result<bool> {
221 let TagClassification {
222 is_enriched,
223 depth,
224 config,
225 } = classify_tags(tags);
226 if !is_enriched {
227 return Ok(false);
228 }
229 let file_path = affected_files.first().cloned().unwrap_or_default();
230 let ts = now_secs();
231 let extraction = ExtractionRecord {
232 gotcha_key: gotcha_key.to_string(),
233 depth,
234 file_path,
235 created_at: ts,
236 outcome: ExtractionOutcome::Pending,
237 outcome_at: None,
238 config,
239 };
240 let key = key_for(gotcha_key);
241 let record = analytics_record(&key, &extraction, ts);
242 match store.put(&key, &record).await {
243 Ok(()) => Ok(true),
244 Err(e) => {
245 tracing::warn!("extraction: write failed for {gotcha_key}: {e}");
246 Ok(false)
247 }
248 }
249}
250
251pub async fn mark_outcome(
257 store: &Store,
258 gotcha_key: &str,
259 outcome: ExtractionOutcome,
260) -> Result<bool> {
261 let key = key_for(gotcha_key);
262 let Some(existing) = store.get(&key).await? else {
263 return Ok(false);
264 };
265 let Some(payload) = existing.payload.clone() else {
266 return Ok(false);
267 };
268 let Ok(mut extraction) = serde_json::from_value::<ExtractionRecord>(payload) else {
269 tracing::warn!("extraction: payload deserialize failed for {gotcha_key}");
270 return Ok(false);
271 };
272 if extraction.outcome == outcome {
275 return Ok(false);
276 }
277 extraction.outcome = outcome;
278 extraction.outcome_at = Some(now_secs());
279 let record = analytics_record(&key, &extraction, extraction.created_at);
280 match store.put(&key, &record).await {
281 Ok(()) => Ok(true),
282 Err(e) => {
283 tracing::warn!("extraction: outcome write failed for {gotcha_key}: {e}");
284 Ok(false)
285 }
286 }
287}
288
289#[derive(Debug, Clone, Default, Serialize, Deserialize)]
291pub struct ExtractionStats {
292 pub total: u64,
293 pub confirmed: u64,
294 pub tombstoned: u64,
295 pub pending: u64,
296 pub expired: u64,
299 pub per_tier: PerTierStats,
300 #[serde(default)]
305 pub per_config: std::collections::BTreeMap<String, TierStats>,
306}
307
308#[derive(Debug, Clone, Default, Serialize, Deserialize)]
309pub struct PerTierStats {
310 pub fast: TierStats,
311 pub standard: TierStats,
312 pub deep: TierStats,
313 pub unknown: TierStats,
315}
316
317#[derive(Debug, Clone, Default, Serialize, Deserialize)]
318pub struct TierStats {
319 pub total: u64,
320 pub confirmed: u64,
321 pub tombstoned: u64,
322 pub pending: u64,
323}
324
325impl TierStats {
326 pub fn confirmed_rate(&self) -> Option<f64> {
328 if self.total == 0 {
329 None
330 } else {
331 Some(self.confirmed as f64 / self.total as f64)
332 }
333 }
334}
335
336pub async fn compute_stats(store: &Store, since_secs: u64) -> Result<ExtractionStats> {
344 let records = store
345 .scan_prefix(EXTRACTION_PREFIX)
346 .await
347 .unwrap_or_default();
348 let extractions: Vec<ExtractionRecord> = records
349 .into_iter()
350 .filter_map(|r| r.payload.and_then(|p| serde_json::from_value(p).ok()))
351 .collect();
352 Ok(aggregate_stats(&extractions, since_secs, now_secs()))
353}
354
355pub fn aggregate_stats(
363 extractions: &[ExtractionRecord],
364 since_secs: u64,
365 now: u64,
366) -> ExtractionStats {
367 let expiry_cutoff = now.saturating_sub(90 * 86_400);
368
369 let mut stats = ExtractionStats::default();
370 for e in extractions {
371 if e.created_at < since_secs {
372 continue;
373 }
374 stats.total += 1;
375 let tier_stats: &mut TierStats = match e.depth {
376 Some(EnrichmentDepth::Fast) => &mut stats.per_tier.fast,
377 Some(EnrichmentDepth::Standard) => &mut stats.per_tier.standard,
378 Some(EnrichmentDepth::Deep) => &mut stats.per_tier.deep,
379 None => &mut stats.per_tier.unknown,
380 };
381 tier_stats.total += 1;
382 let config_label = e.config.label();
385 let config_stats: &mut TierStats = stats.per_config.entry(config_label).or_default();
386 config_stats.total += 1;
387
388 match e.outcome {
389 ExtractionOutcome::Confirmed => {
390 stats.confirmed += 1;
391 tier_stats.confirmed += 1;
392 config_stats.confirmed += 1;
393 }
394 ExtractionOutcome::Tombstoned => {
395 stats.tombstoned += 1;
396 tier_stats.tombstoned += 1;
397 config_stats.tombstoned += 1;
398 }
399 ExtractionOutcome::Pending => {
400 if e.created_at < expiry_cutoff {
401 stats.expired += 1;
402 } else {
403 stats.pending += 1;
404 tier_stats.pending += 1;
405 config_stats.pending += 1;
406 }
407 }
408 }
409 }
410 stats
411}
412
413fn analytics_record(key: &str, payload: &ExtractionRecord, created_at: u64) -> Record {
414 let value = format!(
415 "{:?} ({})",
416 payload.outcome,
417 payload.depth.map(|d| d.as_str()).unwrap_or("unknown")
418 );
419 Record {
420 key: key.to_string(),
421 value,
422 payload: serde_json::to_value(payload).ok(),
423 category: Category::Analytics,
424 priority: Priority::Normal,
425 tags: vec![],
426 created_at,
427 updated_at: now_secs(),
428 ref_url: None,
429 staleness: StalenessScore::fresh(),
430 lifecycle: RecordLifecycle::Active,
431 version: RecordVersion {
432 device_id: crate::store::stable_device_id(),
433 logical_clock: 1,
434 wall_clock: now_secs(),
435 },
436 quality: QualityScore::layer0_default(),
437 access_count: 0,
438 last_accessed: 0,
439 source: RecordSource::StaticAnalysis,
440 confidence: ConfidenceScore::for_new_record(&RecordSource::StaticAnalysis),
441 gap_analysis_score: 0.0,
442 }
443}
444
445#[cfg(test)]
446mod tests {
447 use super::*;
448 use tempfile::TempDir;
449
450 async fn fresh_store() -> Store {
451 let dir = TempDir::new().unwrap();
452 let path = Box::leak(Box::new(dir)).path().to_path_buf();
453 Store::open(&path).await.unwrap()
454 }
455
456 #[test]
457 fn classify_tags_detects_enriched_and_depth() {
458 let c = classify_tags(&["enriched".into(), "depth:deep".into()]);
459 assert!(c.is_enriched);
460 assert_eq!(c.depth, Some(EnrichmentDepth::Deep));
461 assert_eq!(c.config.signal_source, SignalSource::Llm);
463 assert!(!c.config.with_negative_exemplars);
464 }
465
466 #[test]
467 fn classify_tags_no_enriched_is_skipped() {
468 let c = classify_tags(&["test".into(), "depth:fast".into()]);
469 assert!(!c.is_enriched);
470 assert_eq!(c.depth, Some(EnrichmentDepth::Fast));
471 }
472
473 #[test]
474 fn classify_tags_unknown_depth_value_yields_none() {
475 let c = classify_tags(&["enriched".into(), "depth:bogus".into()]);
476 assert!(c.is_enriched);
477 assert!(c.depth.is_none());
478 }
479
480 #[test]
481 fn classify_tags_no_depth_tag_yields_none() {
482 let c = classify_tags(&["enriched".into(), "other".into()]);
483 assert!(c.is_enriched);
484 assert!(c.depth.is_none());
485 }
486
487 #[test]
488 fn classify_tags_picks_up_signal_source_ast_and_neg_exemplars() {
489 let c = classify_tags(&[
490 "enriched".into(),
491 "depth:deep".into(),
492 "signal-source:ast".into(),
493 "with-neg-exemplars".into(),
494 ]);
495 assert!(c.is_enriched);
496 assert_eq!(c.depth, Some(EnrichmentDepth::Deep));
497 assert_eq!(c.config.signal_source, SignalSource::Ast);
498 assert!(c.config.with_negative_exemplars);
499 }
500
501 #[test]
502 fn classify_tags_invalid_signal_source_keeps_default() {
503 let c = classify_tags(&["enriched".into(), "signal-source:bogus".into()]);
504 assert_eq!(c.config.signal_source, SignalSource::Llm);
505 }
506
507 #[test]
508 fn extraction_config_label_stable_for_all_combos() {
509 let combos = [
510 (SignalSource::Llm, false, "llm+no_neg"),
511 (SignalSource::Llm, true, "llm+neg"),
512 (SignalSource::Ast, false, "ast+no_neg"),
513 (SignalSource::Ast, true, "ast+neg"),
514 ];
515 for (src, neg, expected) in combos {
516 let cfg = ExtractionConfig {
517 signal_source: src,
518 with_negative_exemplars: neg,
519 };
520 assert_eq!(cfg.label(), expected, "{cfg:?}");
521 }
522 }
523
524 #[test]
525 fn key_for_strips_gotcha_prefix() {
526 assert_eq!(key_for("gotcha:foo"), "analytics:extraction:foo");
527 assert_eq!(key_for("gotcha:foo:bar"), "analytics:extraction:foo:bar");
528 assert_eq!(key_for("foo"), "analytics:extraction:foo");
529 }
530
531 #[tokio::test]
532 async fn write_on_extraction_skips_when_not_enriched() {
533 let store = fresh_store().await;
534 let written = write_on_extraction(
535 &store,
536 "gotcha:manual-add",
537 &["test".into()], &["src/foo.rs".into()],
539 )
540 .await
541 .unwrap();
542 assert!(!written);
543 assert!(store
545 .get("analytics:extraction:manual-add")
546 .await
547 .unwrap()
548 .is_none());
549 }
550
551 #[tokio::test]
552 async fn write_on_extraction_writes_pending_with_depth() {
553 let store = fresh_store().await;
554 let written = write_on_extraction(
555 &store,
556 "gotcha:r1",
557 &["enriched".into(), "depth:deep".into()],
558 &["src/cli/repair.rs".into()],
559 )
560 .await
561 .unwrap();
562 assert!(written);
563
564 let rec = store
565 .get("analytics:extraction:r1")
566 .await
567 .unwrap()
568 .expect("written");
569 let extraction: ExtractionRecord =
570 serde_json::from_value(rec.payload.expect("payload")).unwrap();
571 assert_eq!(extraction.gotcha_key, "gotcha:r1");
572 assert_eq!(extraction.depth, Some(EnrichmentDepth::Deep));
573 assert_eq!(extraction.file_path, "src/cli/repair.rs");
574 assert_eq!(extraction.outcome, ExtractionOutcome::Pending);
575 assert!(extraction.outcome_at.is_none());
576 }
577
578 #[tokio::test]
579 async fn mark_outcome_flips_pending_to_confirmed() {
580 let store = fresh_store().await;
581 write_on_extraction(
582 &store,
583 "gotcha:r2",
584 &["enriched".into(), "depth:fast".into()],
585 &["src/foo.rs".into()],
586 )
587 .await
588 .unwrap();
589
590 let updated = mark_outcome(&store, "gotcha:r2", ExtractionOutcome::Confirmed)
591 .await
592 .unwrap();
593 assert!(updated);
594
595 let rec = store
596 .get("analytics:extraction:r2")
597 .await
598 .unwrap()
599 .expect("present");
600 let extraction: ExtractionRecord =
601 serde_json::from_value(rec.payload.expect("payload")).unwrap();
602 assert_eq!(extraction.outcome, ExtractionOutcome::Confirmed);
603 assert!(extraction.outcome_at.is_some());
604 }
605
606 #[tokio::test]
607 async fn mark_outcome_is_idempotent() {
608 let store = fresh_store().await;
609 write_on_extraction(
610 &store,
611 "gotcha:r3",
612 &["enriched".into()],
613 &["src/x.rs".into()],
614 )
615 .await
616 .unwrap();
617 mark_outcome(&store, "gotcha:r3", ExtractionOutcome::Tombstoned)
618 .await
619 .unwrap();
620 let updated = mark_outcome(&store, "gotcha:r3", ExtractionOutcome::Tombstoned)
622 .await
623 .unwrap();
624 assert!(
625 !updated,
626 "second mark_outcome with same outcome must be no-op"
627 );
628 }
629
630 #[tokio::test]
631 async fn mark_outcome_missing_record_returns_false() {
632 let store = fresh_store().await;
633 let updated = mark_outcome(&store, "gotcha:nonexistent", ExtractionOutcome::Confirmed)
634 .await
635 .unwrap();
636 assert!(!updated);
637 }
638
639 #[tokio::test]
640 async fn compute_stats_per_tier_breakdown() {
641 let store = fresh_store().await;
642
643 let cases = [
645 ("gotcha:f1", "fast", ExtractionOutcome::Confirmed),
646 ("gotcha:f2", "fast", ExtractionOutcome::Tombstoned),
647 ("gotcha:s1", "standard", ExtractionOutcome::Confirmed),
648 ("gotcha:d1", "deep", ExtractionOutcome::Confirmed),
649 ];
650 for (gk, depth, outcome) in &cases {
651 write_on_extraction(
652 &store,
653 gk,
654 &["enriched".into(), format!("depth:{depth}")],
655 &["src/x.rs".into()],
656 )
657 .await
658 .unwrap();
659 mark_outcome(&store, gk, *outcome).await.unwrap();
660 }
661
662 let stats = compute_stats(&store, 0).await.unwrap();
663 assert_eq!(stats.total, 4);
664 assert_eq!(stats.confirmed, 3);
665 assert_eq!(stats.tombstoned, 1);
666 assert_eq!(stats.per_tier.fast.total, 2);
667 assert_eq!(stats.per_tier.fast.confirmed, 1);
668 assert_eq!(stats.per_tier.fast.tombstoned, 1);
669 assert_eq!(stats.per_tier.standard.total, 1);
670 assert_eq!(stats.per_tier.standard.confirmed, 1);
671 assert_eq!(stats.per_tier.deep.total, 1);
672 assert_eq!(stats.per_tier.deep.confirmed, 1);
673
674 assert_eq!(stats.per_tier.fast.confirmed_rate(), Some(0.5));
676 assert_eq!(stats.per_tier.standard.confirmed_rate(), Some(1.0));
677 assert_eq!(stats.per_tier.unknown.confirmed_rate(), None);
678 }
679
680 #[tokio::test]
681 async fn compute_stats_respects_since_secs() {
682 let store = fresh_store().await;
683 write_on_extraction(
684 &store,
685 "gotcha:r",
686 &["enriched".into()],
687 &["src/x.rs".into()],
688 )
689 .await
690 .unwrap();
691 let stats = compute_stats(&store, u64::MAX).await.unwrap();
693 assert_eq!(stats.total, 0);
694 }
695
696 #[test]
697 fn days_to_outcome_computed_from_timestamps() {
698 let extraction = ExtractionRecord {
699 gotcha_key: "gotcha:t".into(),
700 depth: None,
701 file_path: String::new(),
702 created_at: 1_000_000,
703 outcome: ExtractionOutcome::Confirmed,
704 outcome_at: Some(1_000_000 + 2 * 86_400),
705 config: ExtractionConfig::default(),
706 };
707 assert_eq!(extraction.days_to_outcome(), Some(2));
708
709 let pending = ExtractionRecord {
710 gotcha_key: "gotcha:p".into(),
711 depth: None,
712 file_path: String::new(),
713 created_at: 1_000_000,
714 outcome: ExtractionOutcome::Pending,
715 outcome_at: None,
716 config: ExtractionConfig::default(),
717 };
718 assert_eq!(pending.days_to_outcome(), None);
719 }
720
721 #[tokio::test]
722 async fn per_config_breakdown_aggregates_correctly() {
723 let store = fresh_store().await;
724
725 let cases = [
729 (
730 "gotcha:a",
731 vec!["enriched", "signal-source:ast", "with-neg-exemplars"],
732 ExtractionOutcome::Confirmed,
733 ),
734 (
735 "gotcha:b",
736 vec!["enriched", "signal-source:ast"],
737 ExtractionOutcome::Confirmed,
738 ),
739 (
740 "gotcha:c",
741 vec!["enriched", "signal-source:llm", "with-neg-exemplars"],
742 ExtractionOutcome::Tombstoned,
743 ),
744 ("gotcha:d", vec!["enriched"], ExtractionOutcome::Confirmed),
745 ];
746 for (key, tags, outcome) in &cases {
747 let owned: Vec<String> = tags.iter().map(|s| s.to_string()).collect();
748 write_on_extraction(&store, key, &owned, &["src/x.rs".into()])
749 .await
750 .unwrap();
751 mark_outcome(&store, key, *outcome).await.unwrap();
752 }
753
754 let stats = compute_stats(&store, 0).await.unwrap();
755 assert_eq!(stats.total, 4);
756 assert_eq!(stats.confirmed, 3);
757 assert_eq!(stats.tombstoned, 1);
758
759 assert_eq!(stats.per_config.get("ast+neg").unwrap().total, 1);
761 assert_eq!(stats.per_config.get("ast+no_neg").unwrap().total, 1);
762 assert_eq!(stats.per_config.get("llm+neg").unwrap().total, 1);
763 assert_eq!(stats.per_config.get("llm+no_neg").unwrap().total, 1);
764
765 assert_eq!(stats.per_config.get("ast+neg").unwrap().confirmed, 1);
767 assert_eq!(stats.per_config.get("ast+no_neg").unwrap().confirmed, 1);
768 assert_eq!(stats.per_config.get("llm+neg").unwrap().tombstoned, 1);
770 assert_eq!(stats.per_config.get("llm+no_neg").unwrap().confirmed, 1);
772 }
773}