Skip to main content

assay_runner_core/
sdk.rs

1use crate::{run::is_safe_run_id, RunnerSpikeArchive};
2use assay_runner_schema::{SdkLayerEvent, SdkLayerStatus, SDK_EVENT_SCHEMA};
3use serde_json::Value;
4use std::collections::BTreeSet;
5use thiserror::Error;
6
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub struct SdkLayerCapture {
9    pub run_id: String,
10    pub sdk_layer_ndjson: Vec<u8>,
11    pub events: Vec<SdkLayerEvent>,
12}
13
14#[derive(Debug, Error)]
15pub enum SdkLayerError {
16    #[error("sdk layer run_id must not be empty")]
17    EmptyRunId,
18    #[error("sdk layer run_id may only contain ASCII letters, digits, '_' and '-'")]
19    UnsafeRunId,
20    #[error("sdk layer run_id mismatch: expected {expected}, found {actual}")]
21    RunIdMismatch { expected: String, actual: String },
22    #[error("sdk event log did not contain events")]
23    EmptySdkLog,
24    #[error("invalid sdk event json at line {line}: {source}")]
25    InvalidJson {
26        line: usize,
27        source: serde_json::Error,
28    },
29    #[error(
30        "sdk event line {line} must have schema {SDK_EVENT_SCHEMA}, found {observed_schema:?}"
31    )]
32    UnexpectedSchema {
33        line: usize,
34        observed_schema: Option<String>,
35    },
36    #[error("sdk event line {line} run_id mismatch: expected {expected}, found {actual}")]
37    EventRunIdMismatch {
38        line: usize,
39        expected: String,
40        actual: String,
41    },
42    #[error("sdk event line {line} missing required field {field}")]
43    MissingRequiredField { line: usize, field: String },
44    #[error("sdk event line {line} event_type {event_type} requires {field}")]
45    MissingToolCallField {
46        line: usize,
47        event_type: String,
48        field: &'static str,
49    },
50    #[error("sdk event line {line} has unsupported event_type {event_type}")]
51    UnsupportedEventType { line: usize, event_type: String },
52    #[error("sdk event line {line} seq mismatch: expected {expected}, found {actual}")]
53    SeqMismatch {
54        line: usize,
55        expected: u64,
56        actual: u64,
57    },
58    #[error("sdk event serialization failed: {0}")]
59    Json(#[from] serde_json::Error),
60}
61
62impl SdkLayerCapture {
63    pub fn from_sdk_ndjson(
64        run_id: impl Into<String>,
65        ndjson: &[u8],
66    ) -> Result<Self, SdkLayerError> {
67        let run_id = run_id.into();
68        if run_id.is_empty() {
69            return Err(SdkLayerError::EmptyRunId);
70        }
71        if !is_safe_run_id(&run_id) {
72            return Err(SdkLayerError::UnsafeRunId);
73        }
74
75        let input = String::from_utf8_lossy(ndjson);
76        let mut sdk_layer_ndjson = Vec::new();
77        let mut events = Vec::new();
78
79        for (idx, raw) in input.lines().enumerate() {
80            let line = idx + 1;
81            let trimmed = raw.trim();
82            if trimmed.is_empty() {
83                continue;
84            }
85            let value: Value = serde_json::from_str(trimmed)
86                .map_err(|source| SdkLayerError::InvalidJson { line, source })?;
87            let observed_schema = observed_schema(&value);
88            if observed_schema.as_deref() != Some(SDK_EVENT_SCHEMA) {
89                return Err(SdkLayerError::UnexpectedSchema {
90                    line,
91                    observed_schema,
92                });
93            }
94
95            let event_run_id = required_string(&value, "run_id", line)?;
96            if event_run_id != run_id {
97                return Err(SdkLayerError::EventRunIdMismatch {
98                    line,
99                    expected: run_id.clone(),
100                    actual: event_run_id,
101                });
102            }
103
104            let seq = required_u64(&value, "seq", line)?;
105            let expected_seq = events.len() as u64;
106            if seq != expected_seq {
107                return Err(SdkLayerError::SeqMismatch {
108                    line,
109                    expected: expected_seq,
110                    actual: seq,
111                });
112            }
113
114            let event_type = required_string(&value, "event_type", line)?;
115            if !is_supported_event_type(&event_type) {
116                return Err(SdkLayerError::UnsupportedEventType { line, event_type });
117            }
118            let tool_call_id = optional_string(&value, "tool_call_id");
119            let tool = optional_string(&value, "tool");
120            validate_tool_call_fields(line, &event_type, &tool_call_id, &tool)?;
121
122            let event = SdkLayerEvent {
123                schema: SDK_EVENT_SCHEMA.to_string(),
124                run_id: run_id.clone(),
125                seq,
126                event_type,
127                source: required_string(&value, "source", line)?,
128                sdk_name: optional_string(&value, "sdk_name"),
129                sdk_version: optional_string(&value, "sdk_version"),
130                tool_call_id,
131                tool,
132            };
133            serde_json::to_writer(&mut sdk_layer_ndjson, &event)?;
134            sdk_layer_ndjson.push(b'\n');
135            events.push(event);
136        }
137
138        if events.is_empty() {
139            return Err(SdkLayerError::EmptySdkLog);
140        }
141
142        Ok(Self {
143            run_id,
144            sdk_layer_ndjson,
145            events,
146        })
147    }
148
149    /// Apply this SDK capture to the archive.
150    ///
151    /// SDK-layer events are self-reported runtime observations. Applying them
152    /// may set `sdk_layer=self_reported`, but it must not promote kernel or
153    /// policy claims: side-effect evidence remains owned by those layers.
154    pub fn apply_to_archive(self, archive: &mut RunnerSpikeArchive) -> Result<(), SdkLayerError> {
155        let SdkLayerCapture {
156            run_id,
157            sdk_layer_ndjson,
158            events,
159        } = self;
160
161        if archive.run_id != run_id {
162            return Err(SdkLayerError::RunIdMismatch {
163                expected: archive.run_id.clone(),
164                actual: run_id,
165            });
166        }
167
168        archive.sdk_layer_ndjson = sdk_layer_ndjson;
169        archive.observation_health.sdk_layer = SdkLayerStatus::SelfReported;
170        archive
171            .observation_health
172            .notes
173            .retain(|note| !note.starts_with("s5_sdk_capture:"));
174        let sdk_tool_call_ids = sdk_tool_call_ids(&events);
175        mark_sdk_policy_mismatches(archive, &sdk_tool_call_ids);
176        archive.observation_health.notes.push(format!(
177            "s5_sdk_capture: sdk_events={} sdk_tool_calls={}",
178            events.len(),
179            sdk_tool_call_ids.len()
180        ));
181        Ok(())
182    }
183}
184
185fn required_string(
186    value: &Value,
187    field: &'static str,
188    line: usize,
189) -> Result<String, SdkLayerError> {
190    value
191        .get(field)
192        .and_then(Value::as_str)
193        .map(str::trim)
194        .filter(|value| !value.is_empty())
195        .map(ToOwned::to_owned)
196        .ok_or_else(|| SdkLayerError::MissingRequiredField {
197            line,
198            field: field.to_string(),
199        })
200}
201
202fn required_u64(value: &Value, field: &'static str, line: usize) -> Result<u64, SdkLayerError> {
203    value
204        .get(field)
205        .and_then(Value::as_u64)
206        .ok_or_else(|| SdkLayerError::MissingRequiredField {
207            line,
208            field: field.to_string(),
209        })
210}
211
212fn optional_string(value: &Value, field: &'static str) -> Option<String> {
213    value
214        .get(field)
215        .and_then(Value::as_str)
216        .map(str::trim)
217        .filter(|value| !value.is_empty())
218        .map(ToOwned::to_owned)
219}
220
221fn observed_schema(value: &Value) -> Option<String> {
222    value.get("schema").map(|schema| match schema {
223        Value::String(schema) => schema.clone(),
224        other => other.to_string(),
225    })
226}
227
228fn is_supported_event_type(event_type: &str) -> bool {
229    matches!(
230        event_type,
231        "tool_call_started" | "tool_call_completed" | "run_finished" | "run_failed"
232    )
233}
234
235fn validate_tool_call_fields(
236    line: usize,
237    event_type: &str,
238    tool_call_id: &Option<String>,
239    tool: &Option<String>,
240) -> Result<(), SdkLayerError> {
241    if !matches!(event_type, "tool_call_started" | "tool_call_completed") {
242        return Ok(());
243    }
244    if tool_call_id.is_none() {
245        return Err(SdkLayerError::MissingToolCallField {
246            line,
247            event_type: event_type.to_string(),
248            field: "tool_call_id",
249        });
250    }
251    if tool.is_none() {
252        return Err(SdkLayerError::MissingToolCallField {
253            line,
254            event_type: event_type.to_string(),
255            field: "tool",
256        });
257    }
258    Ok(())
259}
260
261fn sdk_tool_call_ids(events: &[SdkLayerEvent]) -> BTreeSet<String> {
262    events
263        .iter()
264        .filter_map(|event| event.tool_call_id.clone())
265        .collect()
266}
267
268fn mark_sdk_policy_mismatches(
269    archive: &mut RunnerSpikeArchive,
270    sdk_tool_call_ids: &BTreeSet<String>,
271) {
272    if archive.policy_layer_ndjson.is_empty() {
273        return;
274    }
275
276    let policy_binding_ids = archive
277        .correlation_report
278        .bindings
279        .iter()
280        .map(|binding| binding.tool_call_id.clone())
281        .collect::<BTreeSet<_>>();
282
283    for tool_call_id in sdk_tool_call_ids {
284        if !policy_binding_ids.contains(tool_call_id) {
285            archive.correlation_report.mark_partial(format!(
286                "sdk_tool_call_without_policy_binding:{tool_call_id}"
287            ));
288        }
289    }
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295    use assay_runner_schema::{KernelLayerStatus, PolicyLayerStatus};
296
297    const SDK_EVENTS: &[u8] = br#"{"schema":"assay.runner.sdk_event.v0","run_id":"run_001","seq":0,"event_type":"tool_call_started","source":"openai-agents","sdk_name":"@openai/agents","sdk_version":"0.0.0-fixture","tool_call_id":"tc_runner_policy_001","tool":"read_file"}
298{"schema":"assay.runner.sdk_event.v0","run_id":"run_001","seq":1,"event_type":"tool_call_completed","source":"openai-agents","sdk_name":"@openai/agents","sdk_version":"0.0.0-fixture","tool_call_id":"tc_runner_policy_001","tool":"read_file"}
299"#;
300
301    #[test]
302    fn sdk_log_records_self_reported_layer() {
303        let capture = SdkLayerCapture::from_sdk_ndjson("run_001", SDK_EVENTS).unwrap();
304        let sdk = String::from_utf8(capture.sdk_layer_ndjson.clone()).unwrap();
305
306        assert!(sdk.contains(SDK_EVENT_SCHEMA));
307        assert!(sdk.contains("tc_runner_policy_001"));
308        assert_eq!(capture.events.len(), 2);
309    }
310
311    #[test]
312    fn apply_marks_sdk_self_reported_without_promoting_other_layers() {
313        let capture = SdkLayerCapture::from_sdk_ndjson("run_001", SDK_EVENTS).unwrap();
314        let mut archive = RunnerSpikeArchive::empty("run_001", "linux");
315
316        capture.apply_to_archive(&mut archive).unwrap();
317
318        assert_eq!(
319            archive.observation_health.sdk_layer,
320            SdkLayerStatus::SelfReported
321        );
322        assert_eq!(
323            archive.observation_health.kernel_layer,
324            KernelLayerStatus::Absent
325        );
326        assert_eq!(
327            archive.observation_health.policy_layer,
328            PolicyLayerStatus::Absent
329        );
330        assert!(archive
331            .observation_health
332            .notes
333            .iter()
334            .any(|note| note == "s5_sdk_capture: sdk_events=2 sdk_tool_calls=1"));
335    }
336
337    #[test]
338    fn sdk_policy_matching_tool_call_keeps_existing_correlation_status() {
339        let capture = SdkLayerCapture::from_sdk_ndjson("run_001", SDK_EVENTS).unwrap();
340        let mut archive = RunnerSpikeArchive::empty("run_001", "linux");
341        archive.policy_layer_ndjson = b"{\"schema\":\"assay.runner.policy_event.v0\"}\n".to_vec();
342        archive
343            .correlation_report
344            .add_binding(assay_runner_schema::CorrelationBinding {
345                tool_call_id: "tc_runner_policy_001".to_string(),
346                policy_decision: Some("allow".to_string()),
347                kernel_event_count: 1,
348                window: assay_runner_schema::BindingWindow {
349                    start: "run_started".to_string(),
350                    end: "run_finished".to_string(),
351                },
352            });
353
354        capture.apply_to_archive(&mut archive).unwrap();
355
356        assert_eq!(
357            archive.correlation_report.status,
358            assay_runner_schema::CorrelationStatus::Clean
359        );
360        assert!(archive.correlation_report.ambiguities.is_empty());
361    }
362
363    #[test]
364    fn sdk_policy_mismatched_tool_call_marks_correlation_partial() {
365        let capture = SdkLayerCapture::from_sdk_ndjson("run_001", SDK_EVENTS).unwrap();
366        let mut archive = RunnerSpikeArchive::empty("run_001", "linux");
367        archive.policy_layer_ndjson = b"{\"schema\":\"assay.runner.policy_event.v0\"}\n".to_vec();
368        archive
369            .correlation_report
370            .add_binding(assay_runner_schema::CorrelationBinding {
371                tool_call_id: "tc_different_policy_call".to_string(),
372                policy_decision: Some("allow".to_string()),
373                kernel_event_count: 1,
374                window: assay_runner_schema::BindingWindow {
375                    start: "run_started".to_string(),
376                    end: "run_finished".to_string(),
377                },
378            });
379
380        capture.apply_to_archive(&mut archive).unwrap();
381
382        assert_eq!(
383            archive.correlation_report.status,
384            assay_runner_schema::CorrelationStatus::Partial
385        );
386        assert!(archive
387            .correlation_report
388            .ambiguities
389            .contains(&"sdk_tool_call_without_policy_binding:tc_runner_policy_001".to_string()));
390    }
391
392    #[test]
393    fn empty_sdk_log_is_rejected() {
394        assert!(matches!(
395            SdkLayerCapture::from_sdk_ndjson("run_001", b"\n\n"),
396            Err(SdkLayerError::EmptySdkLog)
397        ));
398    }
399
400    #[test]
401    fn unsupported_event_type_is_rejected() {
402        let err = SdkLayerCapture::from_sdk_ndjson(
403            "run_001",
404            br#"{"schema":"assay.runner.sdk_event.v0","run_id":"run_001","seq":0,"event_type":"unknown","source":"fixture"}
405"#,
406        )
407        .unwrap_err();
408
409        assert!(matches!(
410            err,
411            SdkLayerError::UnsupportedEventType {
412                line: 1,
413                ref event_type
414            } if event_type == "unknown"
415        ));
416    }
417
418    #[test]
419    fn tool_call_events_require_tool_call_id() {
420        let err = SdkLayerCapture::from_sdk_ndjson(
421            "run_001",
422            br#"{"schema":"assay.runner.sdk_event.v0","run_id":"run_001","seq":0,"event_type":"tool_call_started","source":"fixture","tool":"read_file"}
423"#,
424        )
425        .unwrap_err();
426
427        assert!(matches!(
428            err,
429            SdkLayerError::MissingToolCallField {
430                line: 1,
431                ref event_type,
432                field: "tool_call_id"
433            } if event_type == "tool_call_started"
434        ));
435    }
436
437    #[test]
438    fn tool_call_events_require_tool_name() {
439        let err = SdkLayerCapture::from_sdk_ndjson(
440            "run_001",
441            br#"{"schema":"assay.runner.sdk_event.v0","run_id":"run_001","seq":0,"event_type":"tool_call_completed","source":"fixture","tool_call_id":"tc_runner_sdk_001"}
442"#,
443        )
444        .unwrap_err();
445
446        assert!(matches!(
447            err,
448            SdkLayerError::MissingToolCallField {
449                line: 1,
450                ref event_type,
451                field: "tool"
452            } if event_type == "tool_call_completed"
453        ));
454    }
455
456    #[test]
457    fn run_finished_does_not_require_tool_call_fields() {
458        let capture = SdkLayerCapture::from_sdk_ndjson(
459            "run_001",
460            br#"{"schema":"assay.runner.sdk_event.v0","run_id":"run_001","seq":0,"event_type":"run_finished","source":"fixture"}
461"#,
462        )
463        .unwrap();
464
465        assert_eq!(capture.events[0].tool_call_id, None);
466        assert_eq!(capture.events[0].tool, None);
467    }
468
469    #[test]
470    fn seq_must_be_contiguous() {
471        let err = SdkLayerCapture::from_sdk_ndjson(
472            "run_001",
473            br#"{"schema":"assay.runner.sdk_event.v0","run_id":"run_001","seq":1,"event_type":"run_finished","source":"fixture"}
474"#,
475        )
476        .unwrap_err();
477
478        assert!(matches!(
479            err,
480            SdkLayerError::SeqMismatch {
481                line: 1,
482                expected: 0,
483                actual: 1
484            }
485        ));
486    }
487
488    #[test]
489    fn event_run_id_must_match_capture_run_id() {
490        let err = SdkLayerCapture::from_sdk_ndjson(
491            "run_001",
492            br#"{"schema":"assay.runner.sdk_event.v0","run_id":"run_002","seq":0,"event_type":"run_finished","source":"fixture"}
493"#,
494        )
495        .unwrap_err();
496
497        assert!(matches!(
498            err,
499            SdkLayerError::EventRunIdMismatch {
500                line: 1,
501                ref expected,
502                ref actual
503            } if expected == "run_001" && actual == "run_002"
504        ));
505    }
506}