Skip to main content

pi/
flake_classifier.rs

1// Conformance flake classifier (bd-k5q5.5.4)
2//
3// Classifies test failures as deterministic or transient based on
4// known flake patterns.  Used by CI retry logic and triage tooling.
5
6use serde::{Deserialize, Serialize};
7
8/// Category of a recognized transient failure.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum FlakeCategory {
12    /// TS oracle process timed out.
13    OracleTimeout,
14    /// OS-level resource exhaustion (OOM, file descriptors).
15    ResourceExhaustion,
16    /// Filesystem lock or busy error.
17    FsContention,
18    /// TCP port already in use.
19    PortConflict,
20    /// Temp directory disappeared mid-test.
21    TmpdirRace,
22    /// QuickJS runtime ran out of memory.
23    JsGcPressure,
24}
25
26impl FlakeCategory {
27    /// All known flake categories.
28    #[must_use]
29    pub const fn all() -> &'static [Self] {
30        &[
31            Self::OracleTimeout,
32            Self::ResourceExhaustion,
33            Self::FsContention,
34            Self::PortConflict,
35            Self::TmpdirRace,
36            Self::JsGcPressure,
37        ]
38    }
39
40    /// Human-readable label.
41    #[must_use]
42    pub const fn label(self) -> &'static str {
43        match self {
44            Self::OracleTimeout => "TS oracle timeout",
45            Self::ResourceExhaustion => "resource exhaustion",
46            Self::FsContention => "filesystem contention",
47            Self::PortConflict => "port conflict",
48            Self::TmpdirRace => "temp directory race",
49            Self::JsGcPressure => "QuickJS GC pressure",
50        }
51    }
52}
53
54impl std::fmt::Display for FlakeCategory {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        f.write_str(self.label())
57    }
58}
59
60/// Result of classifying a test failure.
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
62#[serde(rename_all = "snake_case")]
63pub enum FlakeClassification {
64    /// Matches a known transient pattern — eligible for retry.
65    Transient {
66        category: FlakeCategory,
67        matched_line: String,
68    },
69    /// No known flake pattern matched — treat as deterministic.
70    Deterministic,
71}
72
73impl FlakeClassification {
74    /// Whether this classification allows automatic retry.
75    #[must_use]
76    pub const fn is_retriable(&self) -> bool {
77        matches!(self, Self::Transient { .. })
78    }
79}
80
81/// A logged flake event for JSONL tracking.
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct FlakeEvent {
84    pub target: String,
85    pub classification: FlakeClassification,
86    pub attempt: u32,
87    pub timestamp: String,
88}
89
90/// Classify a test failure based on its output text.
91///
92/// Scans the output for known transient failure patterns and returns
93/// the first match, or `Deterministic` if no patterns match.
94#[must_use]
95pub fn classify_failure(output: &str) -> FlakeClassification {
96    // Check each line for known patterns.  We use simple substring
97    // matching to avoid regex dependency for this module.
98    let lower = output.to_lowercase();
99
100    for line in lower.lines() {
101        let trimmed = line.trim();
102
103        // Oracle timeout
104        if (trimmed.contains("oracle") || trimmed.contains("bun"))
105            && (trimmed.contains("timed out") || trimmed.contains("timeout"))
106        {
107            return FlakeClassification::Transient {
108                category: FlakeCategory::OracleTimeout,
109                matched_line: trimmed.to_string(),
110            };
111        }
112
113        // Resource exhaustion
114        if trimmed.contains("out of memory")
115            || trimmed.contains("enomem")
116            || trimmed.contains("cannot allocate")
117        {
118            // Distinguish JS GC pressure from OS-level OOM
119            let category = if trimmed.contains("quickjs") || trimmed.contains("allocation failed") {
120                FlakeCategory::JsGcPressure
121            } else {
122                FlakeCategory::ResourceExhaustion
123            };
124            return FlakeClassification::Transient {
125                category,
126                matched_line: trimmed.to_string(),
127            };
128        }
129
130        // Filesystem contention
131        if trimmed.contains("ebusy")
132            || trimmed.contains("etxtbsy")
133            || trimmed.contains("resource busy")
134        {
135            return FlakeClassification::Transient {
136                category: FlakeCategory::FsContention,
137                matched_line: trimmed.to_string(),
138            };
139        }
140
141        // Port conflict
142        if trimmed.contains("eaddrinuse") || trimmed.contains("address already in use") {
143            return FlakeClassification::Transient {
144                category: FlakeCategory::PortConflict,
145                matched_line: trimmed.to_string(),
146            };
147        }
148
149        // Temp directory race
150        if (trimmed.contains("no such file or directory") || trimmed.contains("enoent"))
151            && trimmed.contains("tmp")
152        {
153            return FlakeClassification::Transient {
154                category: FlakeCategory::TmpdirRace,
155                matched_line: trimmed.to_string(),
156            };
157        }
158
159        // QuickJS GC pressure (standalone)
160        if trimmed.contains("quickjs") && trimmed.contains("allocation failed") {
161            return FlakeClassification::Transient {
162                category: FlakeCategory::JsGcPressure,
163                matched_line: trimmed.to_string(),
164            };
165        }
166    }
167
168    FlakeClassification::Deterministic
169}
170
171/// Retry policy configuration.
172#[derive(Debug, Clone)]
173pub struct RetryPolicy {
174    /// Maximum automatic retries per target per run.
175    pub max_retries: u32,
176    /// Delay between retry attempts in seconds.
177    pub retry_delay_secs: u32,
178    /// Per-target 30-day flake budget.
179    pub flake_budget: u32,
180}
181
182impl Default for RetryPolicy {
183    fn default() -> Self {
184        Self {
185            max_retries: std::env::var("PI_CONFORMANCE_MAX_RETRIES")
186                .ok()
187                .and_then(|v| v.parse().ok())
188                .unwrap_or(1),
189            retry_delay_secs: std::env::var("PI_CONFORMANCE_RETRY_DELAY")
190                .ok()
191                .and_then(|v| v.parse().ok())
192                .unwrap_or(5),
193            flake_budget: std::env::var("PI_CONFORMANCE_FLAKE_BUDGET")
194                .ok()
195                .and_then(|v| v.parse().ok())
196                .unwrap_or(3),
197        }
198    }
199}
200
201impl RetryPolicy {
202    /// Whether we should retry after this classification.
203    #[must_use]
204    pub const fn should_retry(&self, classification: &FlakeClassification, attempt: u32) -> bool {
205        classification.is_retriable() && attempt < self.max_retries
206    }
207}
208
209// ============================================================================
210// Tests
211// ============================================================================
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    #[test]
218    fn classify_oracle_timeout() {
219        let output = "error: TS oracle process timed out after 30s";
220        let result = classify_failure(output);
221        assert!(matches!(
222            result,
223            FlakeClassification::Transient {
224                category: FlakeCategory::OracleTimeout,
225                ..
226            }
227        ));
228    }
229
230    #[test]
231    fn classify_bun_timeout() {
232        let output = "bun process timed out waiting for response";
233        let result = classify_failure(output);
234        assert!(matches!(
235            result,
236            FlakeClassification::Transient {
237                category: FlakeCategory::OracleTimeout,
238                ..
239            }
240        ));
241    }
242
243    #[test]
244    fn classify_oom() {
245        let output = "fatal: out of memory (allocator returned null)";
246        let result = classify_failure(output);
247        assert!(matches!(
248            result,
249            FlakeClassification::Transient {
250                category: FlakeCategory::ResourceExhaustion,
251                ..
252            }
253        ));
254    }
255
256    #[test]
257    fn classify_enomem() {
258        let output = "error: ENOMEM: not enough memory";
259        let result = classify_failure(output);
260        assert!(matches!(
261            result,
262            FlakeClassification::Transient {
263                category: FlakeCategory::ResourceExhaustion,
264                ..
265            }
266        ));
267    }
268
269    #[test]
270    fn classify_quickjs_gc() {
271        let output = "quickjs runtime: allocation failed, out of memory";
272        let result = classify_failure(output);
273        assert!(matches!(
274            result,
275            FlakeClassification::Transient {
276                category: FlakeCategory::JsGcPressure,
277                ..
278            }
279        ));
280    }
281
282    #[test]
283    fn classify_ebusy() {
284        let output = "error: EBUSY: resource busy or locked";
285        let result = classify_failure(output);
286        assert!(matches!(
287            result,
288            FlakeClassification::Transient {
289                category: FlakeCategory::FsContention,
290                ..
291            }
292        ));
293    }
294
295    #[test]
296    fn classify_port_conflict() {
297        let output = "listen EADDRINUSE: address already in use :::8080";
298        let result = classify_failure(output);
299        assert!(matches!(
300            result,
301            FlakeClassification::Transient {
302                category: FlakeCategory::PortConflict,
303                ..
304            }
305        ));
306    }
307
308    #[test]
309    fn classify_tmpdir_race() {
310        let output = "error: No such file or directory (os error 2), path: /tmp/pi-test-abc123";
311        let result = classify_failure(output);
312        assert!(matches!(
313            result,
314            FlakeClassification::Transient {
315                category: FlakeCategory::TmpdirRace,
316                ..
317            }
318        ));
319    }
320
321    #[test]
322    fn classify_deterministic() {
323        let output = "assertion failed: expected PASS but got FAIL\nnote: left == right";
324        let result = classify_failure(output);
325        assert_eq!(result, FlakeClassification::Deterministic);
326    }
327
328    #[test]
329    fn classify_empty_output() {
330        assert_eq!(classify_failure(""), FlakeClassification::Deterministic);
331    }
332
333    #[test]
334    fn classification_is_retriable() {
335        let transient = FlakeClassification::Transient {
336            category: FlakeCategory::OracleTimeout,
337            matched_line: "timeout".into(),
338        };
339        assert!(transient.is_retriable());
340        assert!(!FlakeClassification::Deterministic.is_retriable());
341    }
342
343    #[test]
344    fn retry_policy_default() {
345        let policy = RetryPolicy {
346            max_retries: 1,
347            retry_delay_secs: 5,
348            flake_budget: 3,
349        };
350        let transient = FlakeClassification::Transient {
351            category: FlakeCategory::OracleTimeout,
352            matched_line: "x".into(),
353        };
354        assert!(policy.should_retry(&transient, 0));
355        assert!(!policy.should_retry(&transient, 1));
356        assert!(!policy.should_retry(&FlakeClassification::Deterministic, 0));
357    }
358
359    #[test]
360    fn flake_event_serde_roundtrip() {
361        let event = FlakeEvent {
362            target: "ext_conformance".into(),
363            classification: FlakeClassification::Transient {
364                category: FlakeCategory::OracleTimeout,
365                matched_line: "oracle timed out".into(),
366            },
367            attempt: 1,
368            timestamp: "2026-02-08T03:00:00Z".into(),
369        };
370        let json = serde_json::to_string(&event).unwrap();
371        let back: FlakeEvent = serde_json::from_str(&json).unwrap();
372        assert_eq!(back.target, "ext_conformance");
373        assert!(back.classification.is_retriable());
374    }
375
376    #[test]
377    fn flake_category_all_covered() {
378        assert_eq!(FlakeCategory::all().len(), 6);
379        for cat in FlakeCategory::all() {
380            assert!(!cat.label().is_empty());
381            assert!(!cat.to_string().is_empty());
382        }
383    }
384
385    #[test]
386    fn multiline_output_matches_first_pattern() {
387        let output = "starting test...\ncompiling extensions...\nerror: bun process timed out\nassert failed";
388        let result = classify_failure(output);
389        assert!(matches!(
390            result,
391            FlakeClassification::Transient {
392                category: FlakeCategory::OracleTimeout,
393                ..
394            }
395        ));
396    }
397
398    #[test]
399    fn case_insensitive_matching() {
400        let output = "ERROR: OUT OF MEMORY";
401        let result = classify_failure(output);
402        assert!(result.is_retriable());
403    }
404
405    mod proptest_flake_classifier {
406        use super::*;
407        use proptest::prelude::*;
408
409        /// Generates a string guaranteed to trigger a specific `FlakeCategory`.
410        fn arb_transient_line() -> impl Strategy<Value = (String, FlakeCategory)> {
411            prop_oneof![
412                Just((
413                    "oracle process timed out".to_string(),
414                    FlakeCategory::OracleTimeout
415                )),
416                Just((
417                    "bun timed out waiting".to_string(),
418                    FlakeCategory::OracleTimeout
419                )),
420                Just((
421                    "fatal: out of memory".to_string(),
422                    FlakeCategory::ResourceExhaustion
423                )),
424                Just((
425                    "error: ENOMEM".to_string(),
426                    FlakeCategory::ResourceExhaustion
427                )),
428                Just((
429                    "cannot allocate 4 GB".to_string(),
430                    FlakeCategory::ResourceExhaustion
431                )),
432                Just((
433                    "quickjs runtime: allocation failed, out of memory".to_string(),
434                    FlakeCategory::JsGcPressure
435                )),
436                Just((
437                    "EBUSY: resource busy".to_string(),
438                    FlakeCategory::FsContention
439                )),
440                Just(("ETXTBSY".to_string(), FlakeCategory::FsContention)),
441                Just((
442                    "resource busy or locked".to_string(),
443                    FlakeCategory::FsContention
444                )),
445                Just((
446                    "EADDRINUSE on port 8080".to_string(),
447                    FlakeCategory::PortConflict
448                )),
449                Just((
450                    "address already in use".to_string(),
451                    FlakeCategory::PortConflict
452                )),
453                Just((
454                    "ENOENT: no such file or directory /tmp/pi-test".to_string(),
455                    FlakeCategory::TmpdirRace
456                )),
457            ]
458        }
459
460        proptest! {
461            #[test]
462            fn classify_failure_never_panics(s in ".*") {
463                let _ = classify_failure(&s);
464            }
465
466            #[test]
467            fn deterministic_is_not_retriable(s in "[a-zA-Z0-9 ]{0,200}") {
468                let result = classify_failure(&s);
469                if result == FlakeClassification::Deterministic {
470                    assert!(!result.is_retriable());
471                }
472            }
473
474            #[test]
475            fn transient_is_always_retriable(s in ".*") {
476                let result = classify_failure(&s);
477                if let FlakeClassification::Transient { .. } = &result {
478                    assert!(result.is_retriable());
479                }
480            }
481
482            #[test]
483            fn known_transient_lines_classify_correctly(
484                (line, expected_cat) in arb_transient_line()
485            ) {
486                let result = classify_failure(&line);
487                match result {
488                    FlakeClassification::Transient { category, .. } => {
489                        assert_eq!(
490                            category, expected_cat,
491                            "line {line:?} got {category:?} expected {expected_cat:?}"
492                        );
493                    }
494                    FlakeClassification::Deterministic => {
495                        panic!("expected Transient for {line:?}, got Deterministic");
496                    }
497                }
498            }
499
500            #[test]
501            fn classify_is_case_insensitive(
502                (line, expected_cat) in arb_transient_line()
503            ) {
504                let upper = classify_failure(&line.to_uppercase());
505                let lower = classify_failure(&line.to_lowercase());
506                match (&upper, &lower) {
507                    (
508                        FlakeClassification::Transient { category: cu, .. },
509                        FlakeClassification::Transient { category: cl, .. },
510                    ) => {
511                        assert_eq!(*cu, expected_cat);
512                        assert_eq!(*cl, expected_cat);
513                    }
514                    _ => panic!("expected both Transient for line {line:?}"),
515                }
516            }
517
518            #[test]
519            fn noise_prefix_preserves_classification(
520                noise in "[a-zA-Z0-9 ]{0,50}",
521                (line, expected_cat) in arb_transient_line(),
522            ) {
523                let input = format!("{noise}\n{line}");
524                let result = classify_failure(&input);
525                match result {
526                    FlakeClassification::Transient { category, .. } => {
527                        assert_eq!(category, expected_cat);
528                    }
529                    FlakeClassification::Deterministic => {
530                        panic!("expected Transient for input with line {line:?}");
531                    }
532                }
533            }
534
535            #[test]
536            fn whitespace_only_is_deterministic(s in "[ \\t\\n]{0,100}") {
537                assert_eq!(classify_failure(&s), FlakeClassification::Deterministic);
538            }
539
540            #[test]
541            fn serde_roundtrip_transient((line, _cat) in arb_transient_line()) {
542                let result = classify_failure(&line);
543                let json = serde_json::to_string(&result).unwrap();
544                let back: FlakeClassification = serde_json::from_str(&json).unwrap();
545                assert_eq!(result, back);
546            }
547
548            #[test]
549            fn serde_roundtrip_category(idx in 0..6usize) {
550                let cat = FlakeCategory::all()[idx];
551                let json = serde_json::to_string(&cat).unwrap();
552                let back: FlakeCategory = serde_json::from_str(&json).unwrap();
553                assert_eq!(cat, back);
554            }
555
556            #[test]
557            fn all_categories_have_nonempty_labels(idx in 0..6usize) {
558                let cat = FlakeCategory::all()[idx];
559                assert!(!cat.label().is_empty());
560                assert!(!cat.to_string().is_empty());
561                assert_eq!(cat.label(), cat.to_string());
562            }
563
564            #[test]
565            fn retry_policy_respects_attempt_bound(
566                max_retries in 0..10u32,
567                attempt in 0..20u32,
568            ) {
569                let policy = RetryPolicy {
570                    max_retries,
571                    retry_delay_secs: 1,
572                    flake_budget: 3,
573                };
574                let transient = FlakeClassification::Transient {
575                    category: FlakeCategory::OracleTimeout,
576                    matched_line: "x".into(),
577                };
578                let should = policy.should_retry(&transient, attempt);
579                assert_eq!(should, attempt < max_retries);
580            }
581
582            #[test]
583            fn retry_policy_never_retries_deterministic(
584                max_retries in 0..10u32,
585                attempt in 0..20u32,
586            ) {
587                let policy = RetryPolicy {
588                    max_retries,
589                    retry_delay_secs: 1,
590                    flake_budget: 3,
591                };
592                assert!(!policy.should_retry(&FlakeClassification::Deterministic, attempt));
593            }
594
595            #[test]
596            fn flake_event_serde_roundtrip_prop(
597                target in "[a-z_]{1,20}",
598                attempt in 0..100u32,
599                idx in 0..6usize,
600            ) {
601                let cat = FlakeCategory::all()[idx];
602                let event = FlakeEvent {
603                    target: target.clone(),
604                    classification: FlakeClassification::Transient {
605                        category: cat,
606                        matched_line: "matched".into(),
607                    },
608                    attempt,
609                    timestamp: "2026-01-01T00:00:00Z".into(),
610                };
611                let json = serde_json::to_string(&event).unwrap();
612                let back: FlakeEvent = serde_json::from_str(&json).unwrap();
613                assert_eq!(back.target, target);
614                assert_eq!(back.attempt, attempt);
615                assert!(back.classification.is_retriable());
616            }
617        }
618    }
619}