Skip to main content

perspt_coding/
runtime.rs

1//! Generic runtime smoke-probe scheme (PSP-8).
2//!
3//! Build + unit tests passing does not prove the produced *artifact runs*: a CLI
4//! can panic on startup, a library can fail at import, an entrypoint can crash on
5//! a code path no unit test exercised. This module is the language-neutral scheme
6//! for exercising built artifacts at runtime and turning failures into typed
7//! [`ResidualClass::Runtime`] residuals, so they feed the same energy + directed
8//! correction loop as compiler/test residuals instead of slipping through.
9//!
10//! The split mirrors the rest of `perspt-coding`: an adapter *describes* the
11//! smoke invocations and *classifies* their output, but the runtime is what
12//! actually executes them (it owns process spawning, timeouts, and sandboxing).
13//! Each [`crate::lang::LanguageAdapter`] gets default no-op implementations and
14//! overrides them for its language, so new language plugins extend the scheme by
15//! implementing two methods.
16
17use perspt_sdk::{IndependenceRoute, ResidualClass, ResidualEvent, ResidualSeverity, SensorRef};
18
19/// A single smoke invocation: a shell command run from the workspace root to
20/// exercise a built artifact's runtime entrypoint.
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct SmokeInvocation {
23    /// Full shell command, e.g. `cargo run -q -p cli -- --help`.
24    pub command: String,
25    /// Human-readable description for logs/telemetry.
26    pub description: String,
27    /// Whether a non-zero exit is itself a failure. Some entrypoints legitimately
28    /// exit non-zero (e.g. a usage error on missing args), so those are run with
29    /// `false` and only an in-output crash marker counts as a failure.
30    pub failure_on_nonzero: bool,
31}
32
33impl SmokeInvocation {
34    pub fn new(command: impl Into<String>, description: impl Into<String>) -> Self {
35        Self {
36            command: command.into(),
37            description: description.into(),
38            failure_on_nonzero: true,
39        }
40    }
41
42    /// Mark this invocation as tolerating a non-zero exit (only crash markers in
43    /// the output will flag it).
44    pub fn tolerate_nonzero(mut self) -> Self {
45        self.failure_on_nonzero = false;
46        self
47    }
48}
49
50/// Detect a genuine runtime crash in combined stdout+stderr, independent of exit
51/// code. Returns the offending line when found. Covers the common cross-language
52/// crash signatures (Rust panics/aborts, Python tracebacks, native faults).
53pub fn crash_marker(output: &str) -> Option<String> {
54    const MARKERS: &[&str] = &[
55        "panicked at",
56        "RUST_BACKTRACE",
57        "fatal runtime error",
58        "Traceback (most recent call last)",
59        "Segmentation fault",
60        "core dumped",
61        "AddressSanitizer",
62        "SIGSEGV",
63        "SIGABRT",
64        "Aborted (core dumped)",
65        "stack overflow",
66        "Uncaught",
67        "Unhandled",
68    ];
69    for line in output.lines() {
70        let trimmed = line.trim();
71        if MARKERS.iter().any(|m| trimmed.contains(m)) {
72            return Some(trimmed.to_string());
73        }
74    }
75    None
76}
77
78/// Detect a numeric anomaly (NaN / infinity) in an artifact's output — a strong
79/// signal of a scientific-computing/ML defect (divergence, divide-by-zero,
80/// unnormalized features). Token-based to avoid substring false positives like
81/// "banana" (contains "nan") or "info" (contains "inf"). Returns the token found.
82pub fn numeric_anomaly(output: &str) -> Option<String> {
83    output
84        .split(|c: char| !c.is_ascii_alphanumeric() && c != '-' && c != '+')
85        .find(|tok| {
86            let t = tok.trim_start_matches(['-', '+']).to_ascii_lowercase();
87            matches!(t.as_str(), "nan" | "inf" | "infinity")
88        })
89        .map(|t| t.to_string())
90}
91
92/// The sensor that produced a runtime residual. A real process run is a
93/// deterministic-tool route (full-weight eligible), not a model critique.
94pub fn runtime_sensor() -> SensorRef {
95    SensorRef::new("runtime-smoke", IndependenceRoute::DeterministicTool)
96}
97
98/// Build a [`ResidualClass::Runtime`] residual for a failed smoke invocation.
99pub fn runtime_residual(
100    node_id: &str,
101    generation: u32,
102    summary: impl Into<String>,
103) -> Option<ResidualEvent> {
104    let mut r = ResidualEvent::new(
105        node_id,
106        generation,
107        ResidualClass::Runtime,
108        ResidualSeverity::Error,
109        1.0,
110        runtime_sensor(),
111    )
112    .ok()?;
113    r.evidence.summary = summary.into();
114    Some(r)
115}
116
117/// Default classification shared by adapters: a smoke invocation fails when it
118/// exits non-zero (and `failure_on_nonzero`) or its output carries a crash
119/// marker. Returns at most one residual per invocation.
120pub fn default_classify_runtime(
121    node_id: &str,
122    generation: u32,
123    invocation: &SmokeInvocation,
124    exit_success: bool,
125    output: &str,
126) -> Vec<ResidualEvent> {
127    if let Some(line) = crash_marker(output) {
128        return runtime_residual(
129            node_id,
130            generation,
131            format!("runtime crash in `{}`: {}", invocation.description, line),
132        )
133        .into_iter()
134        .collect();
135    }
136    if let Some(tok) = numeric_anomaly(output) {
137        return runtime_residual(
138            node_id,
139            generation,
140            format!(
141                "numeric anomaly ({tok}) in `{}` output — likely divergence, \
142                 divide-by-zero, or unnormalized inputs",
143                invocation.description
144            ),
145        )
146        .into_iter()
147        .collect();
148    }
149    if invocation.failure_on_nonzero && !exit_success {
150        let tail: String = output
151            .lines()
152            .rev()
153            .take(3)
154            .collect::<Vec<_>>()
155            .into_iter()
156            .rev()
157            .collect::<Vec<_>>()
158            .join(" | ");
159        return runtime_residual(
160            node_id,
161            generation,
162            format!(
163                "runtime entrypoint `{}` exited with failure: {}",
164                invocation.description, tail
165            ),
166        )
167        .into_iter()
168        .collect();
169    }
170    Vec::new()
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176
177    #[test]
178    fn detects_rust_panic() {
179        let out = "thread 'main' panicked at src/main.rs:10:5:\nindex out of bounds";
180        assert!(crash_marker(out).unwrap().contains("panicked at"));
181    }
182
183    #[test]
184    fn detects_python_traceback() {
185        let out = "Traceback (most recent call last):\n  File ...\nValueError: bad";
186        assert!(crash_marker(out).is_some());
187    }
188
189    #[test]
190    fn clean_output_has_no_marker() {
191        assert!(crash_marker("Usage: cli <COMMAND>\nвсе хорошо").is_none());
192    }
193
194    #[test]
195    fn nonzero_exit_flagged_when_required() {
196        let inv = SmokeInvocation::new("cli run", "cli");
197        let r = default_classify_runtime("n1", 0, &inv, false, "Error: boom");
198        assert_eq!(r.len(), 1);
199        assert_eq!(r[0].class, ResidualClass::Runtime);
200    }
201
202    #[test]
203    fn tolerated_nonzero_without_crash_is_clean() {
204        let inv = SmokeInvocation::new("cli", "cli no-args").tolerate_nonzero();
205        // Usage error on missing args is expected, not a runtime defect.
206        let r = default_classify_runtime("n1", 0, &inv, false, "error: missing --model");
207        assert!(r.is_empty());
208    }
209
210    #[test]
211    fn numeric_anomaly_detects_nan_and_inf_not_substrings() {
212        assert_eq!(
213            numeric_anomaly("Forecasted Value: NaN").as_deref(),
214            Some("NaN")
215        );
216        assert!(numeric_anomaly("loss = inf after epoch 3").is_some());
217        assert!(numeric_anomaly("result: -inf").is_some());
218        // Must NOT false-positive on words containing nan/inf.
219        assert!(numeric_anomaly("banana split info panel").is_none());
220        assert!(numeric_anomaly("Forecasted Value: 36.0").is_none());
221    }
222
223    #[test]
224    fn numeric_anomaly_flagged_even_on_success_exit() {
225        let inv = SmokeInvocation::new("cli", "cli");
226        let r = default_classify_runtime("n1", 0, &inv, true, "Forecasted Value: NaN");
227        assert_eq!(r.len(), 1);
228        assert_eq!(r[0].class, ResidualClass::Runtime);
229    }
230
231    #[test]
232    fn crash_flagged_even_when_tolerating_nonzero() {
233        let inv = SmokeInvocation::new("cli", "cli").tolerate_nonzero();
234        let r = default_classify_runtime("n1", 0, &inv, false, "thread 'main' panicked at x");
235        assert_eq!(r.len(), 1);
236    }
237}