Skip to main content

agent_sdk/observability/
payload.rs

1//! `GenAI` payload conversion from SDK types to semconv JSON.
2//!
3//! Converts SDK [`ChatRequest`] / [`ChatResponse`] types into the JSON
4//! schemas defined by the `GenAI` semantic conventions for
5//! `gen_ai.system_instructions`, `gen_ai.input.messages`, and
6//! `gen_ai.output.messages`.
7//!
8//! # Privacy
9//!
10//! This layer sits directly in front of observability egress —
11//! whatever shape we emit here flows into `OTel` spans, `ObservabilityStore`
12//! implementations, and ultimately into third-party collectors
13//! (Langfuse, Datadog, honeycomb, …). [`PayloadRedactor`] wraps a
14//! pluggable [`PiiDetector`] and masks PII in every text and JSON
15//! string leaf before they leave this module, so the two conversion
16//! paths (directly serialized onto spans, or handed to the store)
17//! share one audited redaction pass.
18//!
19//! Callers that do not need redaction — including existing call sites
20//! that have not adopted the detector yet — should use
21//! [`PayloadRedactor::noop`] (or the module-level free functions,
22//! which delegate to it). Financial and other PII-sensitive
23//! integrations should construct a redactor with
24//! [`agent_sdk_foundation::privacy::BaselineDetector`] or a custom
25//! detector.
26
27use crate::llm::{ChatRequest, ChatResponse, Content, ContentBlock, Message, Role};
28use agent_sdk_foundation::privacy::{NoopDetector, PiiDetector, mask_spans};
29use serde_json::{Value, json};
30use std::sync::Arc;
31
32use super::attrs::finish_reason_str;
33
34/// Redacts PII from payloads before they flow to observability sinks.
35///
36/// Wraps a [`PiiDetector`] and applies it to every text and JSON
37/// string leaf emitted by [`convert_system_instructions`](Self::convert_system_instructions),
38/// [`convert_input_messages`](Self::convert_input_messages), and
39/// [`convert_output_messages`](Self::convert_output_messages).
40///
41/// Construct with [`PayloadRedactor::new`] wrapping any
42/// `Arc<dyn PiiDetector>`, or with [`PayloadRedactor::noop`] for the
43/// pass-through case. The redactor is `Clone` and `Send + Sync`, so
44/// one instance can be shared across the agent loop for the lifetime
45/// of a run.
46#[derive(Clone)]
47pub struct PayloadRedactor {
48    detector: Arc<dyn PiiDetector>,
49}
50
51impl PayloadRedactor {
52    /// Wrap an existing detector.
53    #[must_use]
54    pub fn new(detector: Arc<dyn PiiDetector>) -> Self {
55        Self { detector }
56    }
57
58    /// Redactor that performs no masking — produces byte-identical
59    /// JSON output to the raw conversion path.
60    #[must_use]
61    pub fn noop() -> Self {
62        Self {
63            detector: Arc::new(NoopDetector),
64        }
65    }
66
67    /// Convert system instructions, masking PII in the system prompt.
68    ///
69    /// Returns `None` if the system prompt is empty.
70    #[must_use]
71    pub fn convert_system_instructions(&self, request: &ChatRequest) -> Option<Value> {
72        if request.system.is_empty() {
73            return None;
74        }
75        Some(json!([{"text": self.mask_str(&request.system)}]))
76    }
77
78    /// Convert input messages into semconv JSON, masking PII in every
79    /// text and tool-argument leaf.
80    #[must_use]
81    pub fn convert_input_messages(&self, request: &ChatRequest) -> Value {
82        let messages: Vec<Value> = request
83            .messages
84            .iter()
85            .map(|m| self.convert_message(m))
86            .collect();
87        Value::Array(messages)
88    }
89
90    /// Convert a [`ChatResponse`] into semconv output-messages JSON,
91    /// masking PII in every assistant text, thinking text, and
92    /// tool-argument leaf.
93    #[must_use]
94    pub fn convert_output_messages(&self, response: &ChatResponse) -> Value {
95        let parts: Vec<Value> = response
96            .content
97            .iter()
98            .filter_map(|b| self.convert_block(b))
99            .collect();
100        let mut message = json!({
101            "role": "assistant",
102            "content": Value::Array(parts),
103        });
104        if let Some(reason) = response.stop_reason {
105            message["finish_reason"] = json!(finish_reason_str(reason));
106        }
107        json!([message])
108    }
109
110    fn convert_message(&self, message: &Message) -> Value {
111        let role = match message.role {
112            Role::User => determine_user_message_role(message),
113            Role::Assistant => "assistant",
114        };
115        let content = self.convert_content(&message.content);
116        json!({
117            "role": role,
118            "content": content,
119        })
120    }
121
122    fn convert_content(&self, content: &Content) -> Value {
123        match content {
124            Content::Text(text) => json!([{"text": self.mask_str(text)}]),
125            Content::Blocks(blocks) => {
126                let parts: Vec<Value> = blocks
127                    .iter()
128                    .filter_map(|b| self.convert_block(b))
129                    .collect();
130                Value::Array(parts)
131            }
132        }
133    }
134
135    fn convert_block(&self, block: &ContentBlock) -> Option<Value> {
136        match block {
137            ContentBlock::Text { text } => Some(json!({"text": self.mask_str(text)})),
138            ContentBlock::Thinking { thinking, .. } => Some(json!({
139                "type": "reasoning",
140                "text": self.mask_str(thinking),
141            })),
142            ContentBlock::ToolUse {
143                id, name, input, ..
144            } => {
145                let masked_input = self.mask_json(input);
146                Some(json!({
147                    "type": "tool_call",
148                    "id": id,
149                    "name": name,
150                    "arguments": masked_input.to_string(),
151                }))
152            }
153            ContentBlock::ToolResult {
154                tool_use_id,
155                content,
156                is_error,
157            } => {
158                let mut part = json!({
159                    "type": "tool_call_response",
160                    "id": tool_use_id,
161                    "output": self.mask_str(content),
162                });
163                if *is_error == Some(true) {
164                    part["is_error"] = json!(true);
165                }
166                Some(part)
167            }
168            ContentBlock::Image { source } => Some(json!({
169                "type": "blob",
170                "mime_type": source.media_type,
171                "modality": "image",
172                "size": source.data.len(),
173            })),
174            ContentBlock::Document { source } => {
175                let mut part = json!({
176                    "type": "blob",
177                    "mime_type": source.media_type,
178                    "size": source.data.len(),
179                });
180                if source.media_type.starts_with("image/") {
181                    part["modality"] = json!("image");
182                }
183                Some(part)
184            }
185            // Redacted-thinking blocks carry nothing to surface; this also
186            // omits unknown future `#[non_exhaustive]` block kinds from the
187            // observability payload.
188            _ => None,
189        }
190    }
191
192    /// Mask PII in a plain string.
193    fn mask_str(&self, text: &str) -> String {
194        let spans = self.detector.detect(text);
195        if spans.is_empty() {
196            text.to_owned()
197        } else {
198            mask_spans(text, &spans)
199        }
200    }
201
202    /// Recursively mask every string leaf within a JSON value.
203    fn mask_json(&self, value: &Value) -> Value {
204        match value {
205            Value::String(s) => Value::String(self.mask_str(s)),
206            Value::Array(arr) => Value::Array(arr.iter().map(|v| self.mask_json(v)).collect()),
207            Value::Object(map) => Value::Object(
208                map.iter()
209                    .map(|(k, v)| (k.clone(), self.mask_json(v)))
210                    .collect(),
211            ),
212            Value::Null | Value::Bool(_) | Value::Number(_) => value.clone(),
213        }
214    }
215}
216
217impl Default for PayloadRedactor {
218    fn default() -> Self {
219        Self::noop()
220    }
221}
222
223// ─────────────────────────────────────────────────────────────────────
224// Free function façade — delegate to `PayloadRedactor::noop`.
225// Existing callers keep their call sites untouched; new integrations
226// should construct a `PayloadRedactor` explicitly.
227// ─────────────────────────────────────────────────────────────────────
228
229/// Convert system instructions from a `ChatRequest` into semconv JSON.
230///
231/// Returns `None` if the system prompt is empty.
232#[must_use]
233pub fn convert_system_instructions(request: &ChatRequest) -> Option<Value> {
234    PayloadRedactor::noop().convert_system_instructions(request)
235}
236
237/// Convert input messages from a `ChatRequest` into semconv JSON.
238#[must_use]
239pub fn convert_input_messages(request: &ChatRequest) -> Value {
240    PayloadRedactor::noop().convert_input_messages(request)
241}
242
243/// Convert a `ChatResponse` into semconv output messages JSON.
244///
245/// Returns a JSON array with one assistant message per response
246/// (the SDK currently returns a single candidate).
247#[must_use]
248pub fn convert_output_messages(response: &ChatResponse) -> Value {
249    PayloadRedactor::noop().convert_output_messages(response)
250}
251
252/// Decide whether a User-role message is actually a `tool` message
253/// (SDK batches tool results as User messages).
254fn determine_user_message_role(message: &Message) -> &'static str {
255    match &message.content {
256        Content::Blocks(blocks) => {
257            let has_tool_result = blocks
258                .iter()
259                .any(|b| matches!(b, ContentBlock::ToolResult { .. }));
260            if has_tool_result { "tool" } else { "user" }
261        }
262        Content::Text(_) => "user",
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use crate::llm::{ChatRequest, ChatResponse, ContentSource, StopReason, Usage};
270    use agent_sdk_foundation::privacy::BaselineDetector;
271
272    fn empty_request(system: &str, messages: Vec<Message>) -> ChatRequest {
273        ChatRequest {
274            system: system.to_owned(),
275            messages,
276            tools: None,
277            max_tokens: 1024,
278            max_tokens_explicit: false,
279            session_id: None,
280            cached_content: None,
281            thinking: None,
282            tool_choice: None,
283            response_format: None,
284        }
285    }
286
287    // ── Noop redactor / free functions preserve existing behaviour ──
288
289    #[test]
290    fn empty_system_returns_none() {
291        let request = empty_request("", vec![]);
292        assert!(convert_system_instructions(&request).is_none());
293    }
294
295    #[test]
296    fn system_instructions_wraps_in_text_array() {
297        let request = empty_request("You are helpful.", vec![]);
298        let result = convert_system_instructions(&request).expect("should be Some");
299        assert_eq!(result, json!([{"text": "You are helpful."}]));
300    }
301
302    #[test]
303    fn user_text_message_converts_correctly() {
304        let msg = Message::user("Hello");
305        let result = PayloadRedactor::noop().convert_message(&msg);
306        assert_eq!(result["role"], "user");
307        assert_eq!(result["content"][0]["text"], "Hello");
308    }
309
310    #[test]
311    fn assistant_text_message_converts_correctly() {
312        let msg = Message::assistant("Hi there");
313        let result = PayloadRedactor::noop().convert_message(&msg);
314        assert_eq!(result["role"], "assistant");
315        assert_eq!(result["content"][0]["text"], "Hi there");
316    }
317
318    #[test]
319    fn tool_result_batch_maps_to_tool_role() {
320        let msg = Message {
321            role: Role::User,
322            content: Content::Blocks(vec![ContentBlock::ToolResult {
323                tool_use_id: "call_1".to_string(),
324                content: "result data".to_string(),
325                is_error: None,
326            }]),
327        };
328        let result = PayloadRedactor::noop().convert_message(&msg);
329        assert_eq!(result["role"], "tool");
330        assert_eq!(result["content"][0]["type"], "tool_call_response");
331        assert_eq!(result["content"][0]["id"], "call_1");
332        assert_eq!(result["content"][0]["output"], "result data");
333    }
334
335    #[test]
336    fn tool_result_with_image_attachment_stays_in_tool_message() {
337        let msg = Message {
338            role: Role::User,
339            content: Content::Blocks(vec![
340                ContentBlock::ToolResult {
341                    tool_use_id: "call_1".to_string(),
342                    content: "screenshot taken".to_string(),
343                    is_error: None,
344                },
345                ContentBlock::Image {
346                    source: ContentSource::new("image/png", "aWdv"),
347                },
348            ]),
349        };
350        let result = PayloadRedactor::noop().convert_message(&msg);
351        assert_eq!(result["role"], "tool");
352        assert_eq!(result["content"][0]["type"], "tool_call_response");
353        assert_eq!(result["content"][1]["type"], "blob");
354        assert_eq!(result["content"][1]["modality"], "image");
355    }
356
357    #[test]
358    fn thinking_block_maps_to_reasoning_part() {
359        let msg = Message {
360            role: Role::Assistant,
361            content: Content::Blocks(vec![
362                ContentBlock::Thinking {
363                    thinking: "Let me think...".to_string(),
364                    signature: None,
365                },
366                ContentBlock::Text {
367                    text: "The answer is 42".to_string(),
368                },
369            ]),
370        };
371        let result = PayloadRedactor::noop().convert_message(&msg);
372        assert_eq!(result["content"][0]["type"], "reasoning");
373        assert_eq!(result["content"][0]["text"], "Let me think...");
374        assert_eq!(result["content"][1]["text"], "The answer is 42");
375    }
376
377    #[test]
378    fn redacted_thinking_is_omitted() {
379        let msg = Message {
380            role: Role::Assistant,
381            content: Content::Blocks(vec![
382                ContentBlock::RedactedThinking {
383                    data: "secret".to_string(),
384                },
385                ContentBlock::Text {
386                    text: "visible".to_string(),
387                },
388            ]),
389        };
390        let result = PayloadRedactor::noop().convert_message(&msg);
391        let content = result["content"].as_array().expect("array");
392        assert_eq!(content.len(), 1);
393        assert_eq!(content[0]["text"], "visible");
394    }
395
396    #[test]
397    fn tool_use_block_maps_to_tool_call_part() {
398        let msg = Message {
399            role: Role::Assistant,
400            content: Content::Blocks(vec![ContentBlock::ToolUse {
401                id: "call_1".to_string(),
402                name: "read".to_string(),
403                input: json!({"path": "/tmp/test.rs"}),
404                thought_signature: None,
405            }]),
406        };
407        let result = PayloadRedactor::noop().convert_message(&msg);
408        assert_eq!(result["content"][0]["type"], "tool_call");
409        assert_eq!(result["content"][0]["id"], "call_1");
410        assert_eq!(result["content"][0]["name"], "read");
411    }
412
413    #[test]
414    fn document_block_maps_to_blob_part() {
415        let msg = Message {
416            role: Role::User,
417            content: Content::Blocks(vec![ContentBlock::Document {
418                source: ContentSource::new("application/pdf", "cGRm"),
419            }]),
420        };
421        let result = PayloadRedactor::noop().convert_message(&msg);
422        assert_eq!(result["content"][0]["type"], "blob");
423        assert_eq!(result["content"][0]["mime_type"], "application/pdf");
424        assert_eq!(result["content"][0]["size"], 4);
425    }
426
427    #[test]
428    fn output_messages_includes_finish_reason() {
429        let response = ChatResponse {
430            id: "resp_1".to_string(),
431            content: vec![ContentBlock::Text {
432                text: "Done".to_string(),
433            }],
434            model: "test-model".to_string(),
435            stop_reason: Some(StopReason::EndTurn),
436            usage: Usage {
437                input_tokens: 10,
438                output_tokens: 5,
439                cached_input_tokens: 0,
440                cache_creation_input_tokens: 0,
441            },
442        };
443        let result = convert_output_messages(&response);
444        let msg = &result[0];
445        assert_eq!(msg["role"], "assistant");
446        assert_eq!(msg["finish_reason"], "stop");
447        assert_eq!(msg["content"][0]["text"], "Done");
448    }
449
450    #[test]
451    fn output_messages_tool_call_finish_reason() {
452        let response = ChatResponse {
453            id: "resp_1".to_string(),
454            content: vec![ContentBlock::ToolUse {
455                id: "c1".to_string(),
456                name: "bash".to_string(),
457                input: json!({"command": "ls"}),
458                thought_signature: None,
459            }],
460            model: "test-model".to_string(),
461            stop_reason: Some(StopReason::ToolUse),
462            usage: Usage {
463                input_tokens: 10,
464                output_tokens: 5,
465                cached_input_tokens: 0,
466                cache_creation_input_tokens: 0,
467            },
468        };
469        let result = convert_output_messages(&response);
470        assert_eq!(result[0]["finish_reason"], "tool_call");
471    }
472
473    #[test]
474    fn tool_result_error_flag_is_preserved() {
475        let msg = Message {
476            role: Role::User,
477            content: Content::Blocks(vec![ContentBlock::ToolResult {
478                tool_use_id: "call_1".to_string(),
479                content: "failed".to_string(),
480                is_error: Some(true),
481            }]),
482        };
483        let result = PayloadRedactor::noop().convert_message(&msg);
484        assert_eq!(result["content"][0]["is_error"], true);
485    }
486
487    #[test]
488    fn input_messages_preserves_order() {
489        let request = empty_request(
490            "",
491            vec![
492                Message::user("first"),
493                Message::assistant("second"),
494                Message::user("third"),
495            ],
496        );
497        let result = convert_input_messages(&request);
498        let arr = result.as_array().expect("array");
499        assert_eq!(arr.len(), 3);
500        assert_eq!(arr[0]["role"], "user");
501        assert_eq!(arr[1]["role"], "assistant");
502        assert_eq!(arr[2]["role"], "user");
503    }
504
505    // ── Baseline redactor masks PII across every covered surface ──
506
507    fn baseline_redactor() -> PayloadRedactor {
508        PayloadRedactor::new(Arc::new(
509            BaselineDetector::new().expect("baseline compiles"),
510        ))
511    }
512
513    #[test]
514    fn redacts_email_in_system_prompt() {
515        let request = empty_request("Contact support at ops@example.com.", vec![]);
516        let result = baseline_redactor()
517            .convert_system_instructions(&request)
518            .expect("some");
519        let text = result[0]["text"].as_str().expect("text string");
520        assert!(
521            text.contains("[REDACTED:email]"),
522            "system prompt not redacted: {text}"
523        );
524        assert!(!text.contains("ops@example.com"));
525    }
526
527    #[test]
528    fn redacts_cpf_in_user_text() {
529        let request = empty_request("", vec![Message::user("meu CPF é 111.444.777-35")]);
530        let result = baseline_redactor().convert_input_messages(&request);
531        let text = result[0]["content"][0]["text"].as_str().expect("text");
532        assert!(text.contains("[REDACTED:cpf]"), "user text: {text}");
533        assert!(!text.contains("111.444.777-35"));
534    }
535
536    #[test]
537    fn redacts_pan_in_tool_result_output() {
538        let msg = Message {
539            role: Role::User,
540            content: Content::Blocks(vec![ContentBlock::ToolResult {
541                tool_use_id: "c1".to_string(),
542                content: "charged card 4111 1111 1111 1111 successfully".to_string(),
543                is_error: None,
544            }]),
545        };
546        let result = baseline_redactor().convert_message(&msg);
547        let output = result["content"][0]["output"].as_str().expect("output");
548        assert!(
549            output.contains("[REDACTED:credit_card]"),
550            "tool output: {output}"
551        );
552        assert!(!output.contains("4111 1111 1111 1111"));
553    }
554
555    #[test]
556    fn redacts_strings_inside_tool_call_arguments_json() {
557        let msg = Message {
558            role: Role::Assistant,
559            content: Content::Blocks(vec![ContentBlock::ToolUse {
560                id: "c1".to_string(),
561                name: "send_pix".to_string(),
562                input: json!({
563                    "chave_pix": "ana@example.com",
564                    "amount_brl": 100,
565                    "metadata": {
566                        "recipient_cpf": "111.444.777-35",
567                        "note": "salário"
568                    }
569                }),
570                thought_signature: None,
571            }]),
572        };
573        let result = baseline_redactor().convert_message(&msg);
574        let args = result["content"][0]["arguments"].as_str().expect("args");
575        assert!(args.contains("[REDACTED:email]"), "args: {args}");
576        assert!(args.contains("[REDACTED:cpf]"), "args: {args}");
577        assert!(!args.contains("ana@example.com"));
578        assert!(!args.contains("111.444.777-35"));
579        // Non-string leaves are preserved as-is.
580        assert!(args.contains("100"));
581    }
582
583    #[test]
584    fn redacts_secret_in_assistant_output() {
585        let response = ChatResponse {
586            id: "r1".to_string(),
587            content: vec![ContentBlock::Text {
588                text: "here is the key sk-abcdefghijklmnopqrstuv for ci".to_string(),
589            }],
590            model: "m".to_string(),
591            stop_reason: Some(StopReason::EndTurn),
592            usage: Usage {
593                input_tokens: 0,
594                output_tokens: 0,
595                cached_input_tokens: 0,
596                cache_creation_input_tokens: 0,
597            },
598        };
599        let result = baseline_redactor().convert_output_messages(&response);
600        let text = result[0]["content"][0]["text"].as_str().expect("text");
601        assert!(text.contains("[REDACTED:secret]"), "output: {text}");
602        assert!(!text.contains("sk-abcdefghijklmnopqrstuv"));
603    }
604
605    #[test]
606    fn redacts_pii_in_thinking_text() {
607        let msg = Message {
608            role: Role::Assistant,
609            content: Content::Blocks(vec![ContentBlock::Thinking {
610                thinking: "User CPF is 111.444.777-35 — I should confirm before sending."
611                    .to_string(),
612                signature: None,
613            }]),
614        };
615        let result = baseline_redactor().convert_message(&msg);
616        let text = result["content"][0]["text"].as_str().expect("text");
617        assert!(text.contains("[REDACTED:cpf]"), "thinking: {text}");
618    }
619
620    #[test]
621    fn mask_json_preserves_non_string_leaves() {
622        let input = json!({
623            "amount": 42.5,
624            "active": true,
625            "items": null,
626            "email": "ana@example.com"
627        });
628        let redacted = baseline_redactor().mask_json(&input);
629        assert_eq!(redacted["amount"], json!(42.5));
630        assert_eq!(redacted["active"], json!(true));
631        assert_eq!(redacted["items"], json!(null));
632        assert!(
633            redacted["email"]
634                .as_str()
635                .expect("email")
636                .contains("[REDACTED:email]")
637        );
638    }
639
640    #[test]
641    fn noop_redactor_produces_same_output_as_free_functions() {
642        // Spot-check: a payload with no PII should serialize identically
643        // via the noop redactor and the free functions.
644        let request = empty_request("System text", vec![Message::user("Hello, world")]);
645        assert_eq!(
646            PayloadRedactor::noop().convert_input_messages(&request),
647            convert_input_messages(&request),
648        );
649        assert_eq!(
650            PayloadRedactor::noop().convert_system_instructions(&request),
651            convert_system_instructions(&request),
652        );
653    }
654}