Skip to main content

tailtriage_analyzer/
evidence.rs

1use serde::Serialize;
2use tailtriage_core::Run;
3
4use super::LOW_COMPLETED_REQUEST_THRESHOLD;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
7#[serde(rename_all = "snake_case")]
8/// Overall evidence-quality level for this capture.
9pub enum EvidenceQualityLevel {
10    /// Evidence coverage is sufficient for a strong triage interpretation.
11    Strong,
12    /// Evidence coverage has important limitations.
13    Partial,
14    /// Evidence coverage is too sparse/truncated for stable interpretation.
15    Weak,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
19#[serde(rename_all = "snake_case")]
20/// Coverage status for one signal family.
21pub enum SignalCoverageStatus {
22    /// Signal family has usable data.
23    Present,
24    /// Signal family is absent.
25    Missing,
26    /// Signal family exists but has limited interpretability.
27    Partial,
28    /// Signal family had capture drops due to truncation.
29    Truncated,
30}
31
32#[derive(Debug, Clone, PartialEq, Serialize)]
33/// Structured capture-coverage and interpretation-quality summary.
34pub struct EvidenceQuality {
35    /// Number of completed request events captured.
36    pub request_count: usize,
37    /// Number of queue events captured.
38    pub queue_event_count: usize,
39    /// Number of stage events captured.
40    pub stage_event_count: usize,
41    /// Number of runtime snapshots captured.
42    pub runtime_snapshot_count: usize,
43    /// Number of in-flight snapshots captured.
44    pub inflight_snapshot_count: usize,
45    /// Coverage status for request events.
46    pub requests: SignalCoverageStatus,
47    /// Coverage status for queue events.
48    pub queues: SignalCoverageStatus,
49    /// Coverage status for stage events.
50    pub stages: SignalCoverageStatus,
51    /// Coverage status for runtime snapshots.
52    pub runtime_snapshots: SignalCoverageStatus,
53    /// Coverage status for in-flight snapshots.
54    pub inflight_snapshots: SignalCoverageStatus,
55    /// Whether any capture truncation limit was hit.
56    pub truncated: bool,
57    /// Number of dropped request events.
58    pub dropped_requests: u64,
59    /// Number of dropped stage events.
60    pub dropped_stages: u64,
61    /// Number of dropped queue events.
62    pub dropped_queues: u64,
63    /// Number of dropped in-flight snapshots.
64    pub dropped_inflight_snapshots: u64,
65    /// Number of dropped runtime snapshots.
66    pub dropped_runtime_snapshots: u64,
67    /// Overall quality level for this report's evidence coverage.
68    pub quality: EvidenceQualityLevel,
69    /// Interpretation limitations inferred from coverage/truncation.
70    pub limitations: Vec<String>,
71}
72
73pub(super) fn evidence_quality(run: &Run) -> EvidenceQuality {
74    let requests = request_status(run);
75    let queues = family_status(run.queues.is_empty(), run.truncation.dropped_queues);
76    let stages = family_status(run.stages.is_empty(), run.truncation.dropped_stages);
77    let runtime_snapshots = runtime_status(run);
78    let inflight_snapshots = family_status(
79        run.inflight.is_empty(),
80        run.truncation.dropped_inflight_snapshots,
81    );
82    let limitations = evidence_limitations(run, queues, stages, runtime_snapshots);
83    let non_request_truncated = matches!(queues, SignalCoverageStatus::Truncated)
84        || matches!(stages, SignalCoverageStatus::Truncated)
85        || matches!(runtime_snapshots, SignalCoverageStatus::Truncated)
86        || matches!(inflight_snapshots, SignalCoverageStatus::Truncated);
87    let explanatory_present =
88        !run.queues.is_empty() || !run.stages.is_empty() || !run.runtime_snapshots.is_empty();
89    let quality = if run.requests.is_empty()
90        || run.requests.len() < LOW_COMPLETED_REQUEST_THRESHOLD
91        || run.truncation.dropped_requests > 0
92        || !explanatory_present
93    {
94        EvidenceQualityLevel::Weak
95    } else if non_request_truncated
96        || (run.queues.is_empty() && run.stages.is_empty())
97        || runtime_snapshots == SignalCoverageStatus::Partial
98    {
99        EvidenceQualityLevel::Partial
100    } else {
101        EvidenceQualityLevel::Strong
102    };
103
104    EvidenceQuality {
105        request_count: run.requests.len(),
106        queue_event_count: run.queues.len(),
107        stage_event_count: run.stages.len(),
108        runtime_snapshot_count: run.runtime_snapshots.len(),
109        inflight_snapshot_count: run.inflight.len(),
110        requests,
111        queues,
112        stages,
113        runtime_snapshots,
114        inflight_snapshots,
115        truncated: run.truncation.is_truncated() || run.truncation.limits_hit,
116        dropped_requests: run.truncation.dropped_requests,
117        dropped_stages: run.truncation.dropped_stages,
118        dropped_queues: run.truncation.dropped_queues,
119        dropped_inflight_snapshots: run.truncation.dropped_inflight_snapshots,
120        dropped_runtime_snapshots: run.truncation.dropped_runtime_snapshots,
121        quality,
122        limitations,
123    }
124}
125
126fn request_status(run: &Run) -> SignalCoverageStatus {
127    if run.requests.is_empty() {
128        SignalCoverageStatus::Missing
129    } else if run.truncation.dropped_requests > 0 {
130        SignalCoverageStatus::Truncated
131    } else if run.requests.len() < LOW_COMPLETED_REQUEST_THRESHOLD {
132        SignalCoverageStatus::Partial
133    } else {
134        SignalCoverageStatus::Present
135    }
136}
137
138fn family_status(is_empty: bool, dropped: u64) -> SignalCoverageStatus {
139    if dropped > 0 {
140        SignalCoverageStatus::Truncated
141    } else if is_empty {
142        SignalCoverageStatus::Missing
143    } else {
144        SignalCoverageStatus::Present
145    }
146}
147
148fn runtime_status(run: &Run) -> SignalCoverageStatus {
149    if run.truncation.dropped_runtime_snapshots > 0 {
150        SignalCoverageStatus::Truncated
151    } else if run.runtime_snapshots.is_empty() {
152        SignalCoverageStatus::Missing
153    } else if run
154        .runtime_snapshots
155        .iter()
156        .all(|s| s.blocking_queue_depth.is_none())
157        || run
158            .runtime_snapshots
159            .iter()
160            .all(|s| s.local_queue_depth.is_none())
161        || run
162            .runtime_snapshots
163            .iter()
164            .all(|s| s.global_queue_depth.is_none())
165    {
166        SignalCoverageStatus::Partial
167    } else {
168        SignalCoverageStatus::Present
169    }
170}
171
172fn evidence_limitations(
173    run: &Run,
174    queues: SignalCoverageStatus,
175    stages: SignalCoverageStatus,
176    runtime_snapshots: SignalCoverageStatus,
177) -> Vec<String> {
178    let mut limitations = Vec::new();
179    if run.requests.len() < LOW_COMPLETED_REQUEST_THRESHOLD {
180        limitations
181            .push("Low completed-request count can make suspect ranking unstable.".to_string());
182    }
183    if matches!(
184        queues,
185        SignalCoverageStatus::Missing | SignalCoverageStatus::Truncated
186    ) && matches!(
187        stages,
188        SignalCoverageStatus::Missing | SignalCoverageStatus::Truncated
189    ) {
190        limitations.push("Queue and stage instrumentation are both unavailable, limiting application vs downstream interpretation.".to_string());
191    }
192    if run.runtime_snapshots.is_empty() {
193        limitations.push("Runtime snapshots are missing, limiting executor and blocking-pressure interpretation.".to_string());
194    } else if runtime_snapshots == SignalCoverageStatus::Partial {
195        limitations.push("Runtime snapshots have missing queue-depth fields, limiting executor vs blocking differentiation.".to_string());
196    }
197    if run.truncation.is_truncated() || run.truncation.limits_hit {
198        limitations.push(
199            "Capture truncation dropped evidence and can reduce diagnosis completeness."
200                .to_string(),
201        );
202    }
203    limitations
204}
205
206pub(super) fn truncation_warnings(run: &Run) -> Vec<String> {
207    let mut warnings = Vec::new();
208    if run.truncation.limits_hit || run.truncation.is_truncated() {
209        warnings.push("Capture limits were hit during this run; dropped evidence can reduce diagnosis completeness and confidence.".to_string());
210    }
211    if run.truncation.dropped_requests > 0 {
212        warnings.push(format!("Capture truncated requests: dropped {} request events after reaching the configured max_requests limit. This dropped evidence can reduce diagnosis completeness and confidence.", run.truncation.dropped_requests));
213    }
214    if run.truncation.dropped_stages > 0 {
215        warnings.push(format!("Capture truncated stages: dropped {} stage events after reaching the configured max_stages limit. This dropped evidence can reduce diagnosis completeness and confidence.", run.truncation.dropped_stages));
216    }
217    if run.truncation.dropped_queues > 0 {
218        warnings.push(format!("Capture truncated queues: dropped {} queue events after reaching the configured max_queues limit. This dropped evidence can reduce diagnosis completeness and confidence.", run.truncation.dropped_queues));
219    }
220    if run.truncation.dropped_inflight_snapshots > 0 {
221        warnings.push(format!("Capture truncated in-flight snapshots: dropped {} entries after reaching max_inflight_snapshots. This dropped evidence can reduce diagnosis completeness and confidence.", run.truncation.dropped_inflight_snapshots));
222    }
223    if run.truncation.dropped_runtime_snapshots > 0 {
224        warnings.push(format!("Capture truncated runtime snapshots: dropped {} entries after reaching max_runtime_snapshots. This dropped evidence can reduce diagnosis completeness and confidence.", run.truncation.dropped_runtime_snapshots));
225    }
226    warnings
227}