Skip to main content

agent_sdk/observability/
payload.rs

1//! `GenAI` payload conversion from SDK types to semconv JSON.
2//!
3//! Converts SDK [`ChatRequest`] / [`ChatResponse`] types into the JSON
4//! schemas defined by the `GenAI` semantic conventions for
5//! `gen_ai.system_instructions`, `gen_ai.input.messages`, and
6//! `gen_ai.output.messages`.
7//!
8//! # Privacy
9//!
10//! This layer sits directly in front of observability egress —
11//! whatever shape we emit here flows into `OTel` spans, `ObservabilityStore`
12//! implementations, and ultimately into third-party collectors
13//! (Langfuse, Datadog, honeycomb, …). [`PayloadRedactor`] wraps a
14//! pluggable [`PiiDetector`] and masks PII in every text and JSON
15//! string leaf before they leave this module, so the two conversion
16//! paths (directly serialized onto spans, or handed to the store)
17//! share one audited redaction pass.
18//!
19//! Callers that do not need redaction — including existing call sites
20//! that have not adopted the detector yet — should use
21//! [`PayloadRedactor::noop`]. Financial and other PII-sensitive
22//! integrations should construct a redactor with
23//! [`agent_sdk_foundation::privacy::BaselineDetector`] or a custom
24//! detector. Routing every conversion through the [`PayloadRedactor`]
25//! type keeps the unredacted-egress surface to a single audited path.
26
27use crate::llm::{ChatRequest, ChatResponse, Content, ContentBlock, Message, Role};
28use agent_sdk_foundation::privacy::{NoopDetector, PiiDetector, mask_spans};
29use serde_json::{Value, json};
30use std::sync::Arc;
31
32use super::attrs::finish_reason_str;
33
34/// Redacts PII from payloads before they flow to observability sinks.
35///
36/// Wraps a [`PiiDetector`] and applies it to every text and JSON
37/// string leaf emitted by [`convert_system_instructions`](Self::convert_system_instructions),
38/// [`convert_input_messages`](Self::convert_input_messages), and
39/// [`convert_output_messages`](Self::convert_output_messages).
40///
41/// Construct with [`PayloadRedactor::new`] wrapping any
42/// `Arc<dyn PiiDetector>`, or with [`PayloadRedactor::noop`] for the
43/// pass-through case. The redactor is `Clone` and `Send + Sync`, so
44/// one instance can be shared across the agent loop for the lifetime
45/// of a run.
46#[derive(Clone)]
47pub struct PayloadRedactor {
48    detector: Arc<dyn PiiDetector>,
49    is_noop: bool,
50}
51
52impl PayloadRedactor {
53    /// Wrap an existing detector.
54    #[must_use]
55    pub fn new(detector: Arc<dyn PiiDetector>) -> Self {
56        Self {
57            detector,
58            is_noop: false,
59        }
60    }
61
62    /// Redactor that performs no masking — produces byte-identical
63    /// JSON output to the raw conversion path.
64    #[must_use]
65    pub fn noop() -> Self {
66        Self {
67            detector: Arc::new(NoopDetector),
68            is_noop: true,
69        }
70    }
71
72    /// Whether this redactor performs no masking.
73    ///
74    /// `true` for [`PayloadRedactor::noop`] / [`Default`]. The
75    /// payload-capture gate uses this to refuse `Inline` payloads from a
76    /// store that attested PII redaction but left the default noop redactor
77    /// in place — the attestation alone is not sufficient evidence that PII
78    /// is actually being masked.
79    #[must_use]
80    pub const fn is_noop(&self) -> bool {
81        self.is_noop
82    }
83
84    /// Convert system instructions, masking PII in the system prompt.
85    ///
86    /// Returns `None` if the system prompt is empty.
87    #[must_use]
88    pub fn convert_system_instructions(&self, request: &ChatRequest) -> Option<Value> {
89        if request.system.is_empty() {
90            return None;
91        }
92        Some(json!([{"text": self.mask_str(&request.system)}]))
93    }
94
95    /// Convert input messages into semconv JSON, masking PII in every
96    /// text and tool-argument leaf.
97    #[must_use]
98    pub fn convert_input_messages(&self, request: &ChatRequest) -> Value {
99        let messages: Vec<Value> = request
100            .messages
101            .iter()
102            .map(|m| self.convert_message(m))
103            .collect();
104        Value::Array(messages)
105    }
106
107    /// Convert a [`ChatResponse`] into semconv output-messages JSON,
108    /// masking PII in every assistant text, thinking text, and
109    /// tool-argument leaf.
110    #[must_use]
111    pub fn convert_output_messages(&self, response: &ChatResponse) -> Value {
112        let parts: Vec<Value> = response
113            .content
114            .iter()
115            .filter_map(|b| self.convert_block(b))
116            .collect();
117        let mut message = json!({
118            "role": "assistant",
119            "content": Value::Array(parts),
120        });
121        if let Some(reason) = response.stop_reason {
122            message["finish_reason"] = json!(finish_reason_str(reason));
123        }
124        json!([message])
125    }
126
127    fn convert_message(&self, message: &Message) -> Value {
128        let role = match message.role {
129            Role::User => determine_user_message_role(message),
130            Role::Assistant => "assistant",
131        };
132        let content = self.convert_content(&message.content);
133        json!({
134            "role": role,
135            "content": content,
136        })
137    }
138
139    fn convert_content(&self, content: &Content) -> Value {
140        match content {
141            Content::Text(text) => json!([{"text": self.mask_str(text)}]),
142            Content::Blocks(blocks) => {
143                let parts: Vec<Value> = blocks
144                    .iter()
145                    .filter_map(|b| self.convert_block(b))
146                    .collect();
147                Value::Array(parts)
148            }
149        }
150    }
151
152    fn convert_block(&self, block: &ContentBlock) -> Option<Value> {
153        match block {
154            ContentBlock::Text { text } => Some(json!({"text": self.mask_str(text)})),
155            ContentBlock::Thinking { thinking, .. } => Some(json!({
156                "type": "reasoning",
157                "text": self.mask_str(thinking),
158            })),
159            ContentBlock::ToolUse {
160                id, name, input, ..
161            } => {
162                let masked_input = self.mask_json(input);
163                Some(json!({
164                    "type": "tool_call",
165                    "id": id,
166                    "name": name,
167                    "arguments": masked_input.to_string(),
168                }))
169            }
170            ContentBlock::ToolResult {
171                tool_use_id,
172                content,
173                is_error,
174            } => {
175                let mut part = json!({
176                    "type": "tool_call_response",
177                    "id": tool_use_id,
178                    "output": self.mask_str(content),
179                });
180                if *is_error == Some(true) {
181                    part["is_error"] = json!(true);
182                }
183                Some(part)
184            }
185            ContentBlock::Image { source } => Some(json!({
186                "type": "blob",
187                "mime_type": source.media_type,
188                "modality": "image",
189                "size": source.data.len(),
190            })),
191            ContentBlock::Document { source } => {
192                let mut part = json!({
193                    "type": "blob",
194                    "mime_type": source.media_type,
195                    "size": source.data.len(),
196                });
197                if source.media_type.starts_with("image/") {
198                    part["modality"] = json!("image");
199                }
200                Some(part)
201            }
202            // Redacted-thinking blocks carry nothing to surface; this also
203            // omits unknown future `#[non_exhaustive]` block kinds from the
204            // observability payload.
205            _ => None,
206        }
207    }
208
209    /// Mask PII in a plain string.
210    fn mask_str(&self, text: &str) -> String {
211        let spans = self.detector.detect(text);
212        if spans.is_empty() {
213            text.to_owned()
214        } else {
215            mask_spans(text, &spans)
216        }
217    }
218
219    /// Recursively mask every string leaf within a JSON value.
220    fn mask_json(&self, value: &Value) -> Value {
221        match value {
222            Value::String(s) => Value::String(self.mask_str(s)),
223            Value::Array(arr) => Value::Array(arr.iter().map(|v| self.mask_json(v)).collect()),
224            Value::Object(map) => Value::Object(
225                map.iter()
226                    .map(|(k, v)| (k.clone(), self.mask_json(v)))
227                    .collect(),
228            ),
229            Value::Null | Value::Bool(_) | Value::Number(_) => value.clone(),
230        }
231    }
232}
233
234impl Default for PayloadRedactor {
235    fn default() -> Self {
236        Self::noop()
237    }
238}
239
240/// Decide whether a User-role message is actually a `tool` message
241/// (SDK batches tool results as User messages).
242fn determine_user_message_role(message: &Message) -> &'static str {
243    match &message.content {
244        Content::Blocks(blocks) => {
245            let has_tool_result = blocks
246                .iter()
247                .any(|b| matches!(b, ContentBlock::ToolResult { .. }));
248            if has_tool_result { "tool" } else { "user" }
249        }
250        Content::Text(_) => "user",
251    }
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257    use crate::llm::{ChatRequest, ChatResponse, ContentSource, StopReason, Usage};
258    use agent_sdk_foundation::privacy::BaselineDetector;
259
260    fn empty_request(system: &str, messages: Vec<Message>) -> ChatRequest {
261        ChatRequest {
262            system: system.to_owned(),
263            messages,
264            tools: None,
265            max_tokens: 1024,
266            max_tokens_explicit: false,
267            session_id: None,
268            cached_content: None,
269            thinking: None,
270            tool_choice: None,
271            response_format: None,
272        }
273    }
274
275    // ── Noop redactor / free functions preserve existing behaviour ──
276
277    #[test]
278    fn empty_system_returns_none() {
279        let request = empty_request("", vec![]);
280        assert!(
281            PayloadRedactor::noop()
282                .convert_system_instructions(&request)
283                .is_none()
284        );
285    }
286
287    #[test]
288    fn system_instructions_wraps_in_text_array() -> anyhow::Result<()> {
289        use anyhow::Context as _;
290        let request = empty_request("You are helpful.", vec![]);
291        let result = PayloadRedactor::noop()
292            .convert_system_instructions(&request)
293            .context("should be Some")?;
294        assert_eq!(result, json!([{"text": "You are helpful."}]));
295        Ok(())
296    }
297
298    #[test]
299    fn user_text_message_converts_correctly() {
300        let msg = Message::user("Hello");
301        let result = PayloadRedactor::noop().convert_message(&msg);
302        assert_eq!(result["role"], "user");
303        assert_eq!(result["content"][0]["text"], "Hello");
304    }
305
306    #[test]
307    fn assistant_text_message_converts_correctly() {
308        let msg = Message::assistant("Hi there");
309        let result = PayloadRedactor::noop().convert_message(&msg);
310        assert_eq!(result["role"], "assistant");
311        assert_eq!(result["content"][0]["text"], "Hi there");
312    }
313
314    #[test]
315    fn tool_result_batch_maps_to_tool_role() {
316        let msg = Message {
317            role: Role::User,
318            content: Content::Blocks(vec![ContentBlock::ToolResult {
319                tool_use_id: "call_1".to_string(),
320                content: "result data".to_string(),
321                is_error: None,
322            }]),
323        };
324        let result = PayloadRedactor::noop().convert_message(&msg);
325        assert_eq!(result["role"], "tool");
326        assert_eq!(result["content"][0]["type"], "tool_call_response");
327        assert_eq!(result["content"][0]["id"], "call_1");
328        assert_eq!(result["content"][0]["output"], "result data");
329    }
330
331    #[test]
332    fn tool_result_with_image_attachment_stays_in_tool_message() {
333        let msg = Message {
334            role: Role::User,
335            content: Content::Blocks(vec![
336                ContentBlock::ToolResult {
337                    tool_use_id: "call_1".to_string(),
338                    content: "screenshot taken".to_string(),
339                    is_error: None,
340                },
341                ContentBlock::Image {
342                    source: ContentSource::new("image/png", "aWdv"),
343                },
344            ]),
345        };
346        let result = PayloadRedactor::noop().convert_message(&msg);
347        assert_eq!(result["role"], "tool");
348        assert_eq!(result["content"][0]["type"], "tool_call_response");
349        assert_eq!(result["content"][1]["type"], "blob");
350        assert_eq!(result["content"][1]["modality"], "image");
351    }
352
353    #[test]
354    fn thinking_block_maps_to_reasoning_part() {
355        let msg = Message {
356            role: Role::Assistant,
357            content: Content::Blocks(vec![
358                ContentBlock::Thinking {
359                    thinking: "Let me think...".to_string(),
360                    signature: None,
361                },
362                ContentBlock::Text {
363                    text: "The answer is 42".to_string(),
364                },
365            ]),
366        };
367        let result = PayloadRedactor::noop().convert_message(&msg);
368        assert_eq!(result["content"][0]["type"], "reasoning");
369        assert_eq!(result["content"][0]["text"], "Let me think...");
370        assert_eq!(result["content"][1]["text"], "The answer is 42");
371    }
372
373    #[test]
374    fn redacted_thinking_is_omitted() {
375        let msg = Message {
376            role: Role::Assistant,
377            content: Content::Blocks(vec![
378                ContentBlock::RedactedThinking {
379                    data: "secret".to_string(),
380                },
381                ContentBlock::Text {
382                    text: "visible".to_string(),
383                },
384            ]),
385        };
386        let result = PayloadRedactor::noop().convert_message(&msg);
387        let content = result["content"].as_array().expect("array");
388        assert_eq!(content.len(), 1);
389        assert_eq!(content[0]["text"], "visible");
390    }
391
392    #[test]
393    fn tool_use_block_maps_to_tool_call_part() {
394        let msg = Message {
395            role: Role::Assistant,
396            content: Content::Blocks(vec![ContentBlock::ToolUse {
397                id: "call_1".to_string(),
398                name: "read".to_string(),
399                input: json!({"path": "/tmp/test.rs"}),
400                thought_signature: None,
401            }]),
402        };
403        let result = PayloadRedactor::noop().convert_message(&msg);
404        assert_eq!(result["content"][0]["type"], "tool_call");
405        assert_eq!(result["content"][0]["id"], "call_1");
406        assert_eq!(result["content"][0]["name"], "read");
407    }
408
409    #[test]
410    fn document_block_maps_to_blob_part() {
411        let msg = Message {
412            role: Role::User,
413            content: Content::Blocks(vec![ContentBlock::Document {
414                source: ContentSource::new("application/pdf", "cGRm"),
415            }]),
416        };
417        let result = PayloadRedactor::noop().convert_message(&msg);
418        assert_eq!(result["content"][0]["type"], "blob");
419        assert_eq!(result["content"][0]["mime_type"], "application/pdf");
420        assert_eq!(result["content"][0]["size"], 4);
421    }
422
423    #[test]
424    fn output_messages_includes_finish_reason() {
425        let response = ChatResponse {
426            id: "resp_1".to_string(),
427            content: vec![ContentBlock::Text {
428                text: "Done".to_string(),
429            }],
430            model: "test-model".to_string(),
431            stop_reason: Some(StopReason::EndTurn),
432            usage: Usage {
433                input_tokens: 10,
434                output_tokens: 5,
435                cached_input_tokens: 0,
436                cache_creation_input_tokens: 0,
437            },
438        };
439        let result = PayloadRedactor::noop().convert_output_messages(&response);
440        let msg = &result[0];
441        assert_eq!(msg["role"], "assistant");
442        assert_eq!(msg["finish_reason"], "stop");
443        assert_eq!(msg["content"][0]["text"], "Done");
444    }
445
446    #[test]
447    fn output_messages_tool_call_finish_reason() {
448        let response = ChatResponse {
449            id: "resp_1".to_string(),
450            content: vec![ContentBlock::ToolUse {
451                id: "c1".to_string(),
452                name: "bash".to_string(),
453                input: json!({"command": "ls"}),
454                thought_signature: None,
455            }],
456            model: "test-model".to_string(),
457            stop_reason: Some(StopReason::ToolUse),
458            usage: Usage {
459                input_tokens: 10,
460                output_tokens: 5,
461                cached_input_tokens: 0,
462                cache_creation_input_tokens: 0,
463            },
464        };
465        let result = PayloadRedactor::noop().convert_output_messages(&response);
466        assert_eq!(result[0]["finish_reason"], "tool_call");
467    }
468
469    #[test]
470    fn tool_result_error_flag_is_preserved() {
471        let msg = Message {
472            role: Role::User,
473            content: Content::Blocks(vec![ContentBlock::ToolResult {
474                tool_use_id: "call_1".to_string(),
475                content: "failed".to_string(),
476                is_error: Some(true),
477            }]),
478        };
479        let result = PayloadRedactor::noop().convert_message(&msg);
480        assert_eq!(result["content"][0]["is_error"], true);
481    }
482
483    #[test]
484    fn input_messages_preserves_order() {
485        let request = empty_request(
486            "",
487            vec![
488                Message::user("first"),
489                Message::assistant("second"),
490                Message::user("third"),
491            ],
492        );
493        let result = PayloadRedactor::noop().convert_input_messages(&request);
494        let arr = result.as_array().expect("array");
495        assert_eq!(arr.len(), 3);
496        assert_eq!(arr[0]["role"], "user");
497        assert_eq!(arr[1]["role"], "assistant");
498        assert_eq!(arr[2]["role"], "user");
499    }
500
501    // ── Baseline redactor masks PII across every covered surface ──
502
503    fn baseline_redactor() -> PayloadRedactor {
504        PayloadRedactor::new(Arc::new(
505            BaselineDetector::new().expect("baseline compiles"),
506        ))
507    }
508
509    #[test]
510    fn redacts_email_in_system_prompt() {
511        let request = empty_request("Contact support at ops@example.com.", vec![]);
512        let result = baseline_redactor()
513            .convert_system_instructions(&request)
514            .expect("some");
515        let text = result[0]["text"].as_str().expect("text string");
516        assert!(
517            text.contains("[REDACTED:email]"),
518            "system prompt not redacted: {text}"
519        );
520        assert!(!text.contains("ops@example.com"));
521    }
522
523    #[test]
524    fn redacts_cpf_in_user_text() {
525        let request = empty_request("", vec![Message::user("meu CPF é 111.444.777-35")]);
526        let result = baseline_redactor().convert_input_messages(&request);
527        let text = result[0]["content"][0]["text"].as_str().expect("text");
528        assert!(text.contains("[REDACTED:cpf]"), "user text: {text}");
529        assert!(!text.contains("111.444.777-35"));
530    }
531
532    #[test]
533    fn redacts_pan_in_tool_result_output() {
534        let msg = Message {
535            role: Role::User,
536            content: Content::Blocks(vec![ContentBlock::ToolResult {
537                tool_use_id: "c1".to_string(),
538                content: "charged card 4111 1111 1111 1111 successfully".to_string(),
539                is_error: None,
540            }]),
541        };
542        let result = baseline_redactor().convert_message(&msg);
543        let output = result["content"][0]["output"].as_str().expect("output");
544        assert!(
545            output.contains("[REDACTED:credit_card]"),
546            "tool output: {output}"
547        );
548        assert!(!output.contains("4111 1111 1111 1111"));
549    }
550
551    #[test]
552    fn redacts_strings_inside_tool_call_arguments_json() {
553        let msg = Message {
554            role: Role::Assistant,
555            content: Content::Blocks(vec![ContentBlock::ToolUse {
556                id: "c1".to_string(),
557                name: "send_pix".to_string(),
558                input: json!({
559                    "chave_pix": "ana@example.com",
560                    "amount_brl": 100,
561                    "metadata": {
562                        "recipient_cpf": "111.444.777-35",
563                        "note": "salário"
564                    }
565                }),
566                thought_signature: None,
567            }]),
568        };
569        let result = baseline_redactor().convert_message(&msg);
570        let args = result["content"][0]["arguments"].as_str().expect("args");
571        assert!(args.contains("[REDACTED:email]"), "args: {args}");
572        assert!(args.contains("[REDACTED:cpf]"), "args: {args}");
573        assert!(!args.contains("ana@example.com"));
574        assert!(!args.contains("111.444.777-35"));
575        // Non-string leaves are preserved as-is.
576        assert!(args.contains("100"));
577    }
578
579    #[test]
580    fn redacts_secret_in_assistant_output() {
581        let response = ChatResponse {
582            id: "r1".to_string(),
583            content: vec![ContentBlock::Text {
584                text: "here is the key sk-abcdefghijklmnopqrstuv for ci".to_string(),
585            }],
586            model: "m".to_string(),
587            stop_reason: Some(StopReason::EndTurn),
588            usage: Usage {
589                input_tokens: 0,
590                output_tokens: 0,
591                cached_input_tokens: 0,
592                cache_creation_input_tokens: 0,
593            },
594        };
595        let result = baseline_redactor().convert_output_messages(&response);
596        let text = result[0]["content"][0]["text"].as_str().expect("text");
597        assert!(text.contains("[REDACTED:secret]"), "output: {text}");
598        assert!(!text.contains("sk-abcdefghijklmnopqrstuv"));
599    }
600
601    #[test]
602    fn redacts_pii_in_thinking_text() {
603        let msg = Message {
604            role: Role::Assistant,
605            content: Content::Blocks(vec![ContentBlock::Thinking {
606                thinking: "User CPF is 111.444.777-35 — I should confirm before sending."
607                    .to_string(),
608                signature: None,
609            }]),
610        };
611        let result = baseline_redactor().convert_message(&msg);
612        let text = result["content"][0]["text"].as_str().expect("text");
613        assert!(text.contains("[REDACTED:cpf]"), "thinking: {text}");
614    }
615
616    #[test]
617    fn mask_json_preserves_non_string_leaves() {
618        let input = json!({
619            "amount": 42.5,
620            "active": true,
621            "items": null,
622            "email": "ana@example.com"
623        });
624        let redacted = baseline_redactor().mask_json(&input);
625        assert_eq!(redacted["amount"], json!(42.5));
626        assert_eq!(redacted["active"], json!(true));
627        assert_eq!(redacted["items"], json!(null));
628        assert!(
629            redacted["email"]
630                .as_str()
631                .expect("email")
632                .contains("[REDACTED:email]")
633        );
634    }
635
636    #[test]
637    fn is_noop_reflects_constructor() {
638        assert!(PayloadRedactor::noop().is_noop());
639        assert!(PayloadRedactor::default().is_noop());
640        assert!(
641            !baseline_redactor().is_noop(),
642            "a detector-backed redactor must not report as noop"
643        );
644    }
645}