Skip to main content

agent_code_lib/llm/
normalize.rs

1//! Message normalization and validation utilities.
2//!
3//! Ensures messages conform to API requirements before sending:
4//! - Tool use / tool result pairing
5//! - Content block ordering
6//! - Empty message handling
7
8use super::message::*;
9
10/// Ensure every tool_use block has a matching tool_result in the
11/// subsequent user message. Orphaned tool_use blocks cause API errors.
12pub fn ensure_tool_result_pairing(messages: &mut Vec<Message>) {
13    let mut pending_tool_ids: Vec<String> = Vec::new();
14
15    let mut i = 0;
16    while i < messages.len() {
17        match &messages[i] {
18            Message::Assistant(a) => {
19                // Collect tool_use IDs from this message.
20                for block in &a.content {
21                    if let ContentBlock::ToolUse { id, .. } = block {
22                        pending_tool_ids.push(id.clone());
23                    }
24                }
25            }
26            Message::User(u) => {
27                // Remove tool_result IDs that are satisfied.
28                for block in &u.content {
29                    if let ContentBlock::ToolResult { tool_use_id, .. } = block {
30                        pending_tool_ids.retain(|id| id != tool_use_id);
31                    }
32                }
33            }
34            _ => {}
35        }
36        i += 1;
37    }
38
39    // Any remaining pending IDs need synthetic error results.
40    if !pending_tool_ids.is_empty() {
41        for id in pending_tool_ids {
42            messages.push(tool_result_message(
43                &id,
44                "(tool execution was interrupted)",
45                true,
46            ));
47        }
48    }
49}
50
51/// Remove empty text blocks from messages.
52pub fn strip_empty_blocks(messages: &mut [Message]) {
53    for msg in messages.iter_mut() {
54        match msg {
55            Message::User(u) => {
56                u.content.retain(|b| match b {
57                    ContentBlock::Text { text } => !text.is_empty(),
58                    _ => true,
59                });
60            }
61            Message::Assistant(a) => {
62                a.content.retain(|b| match b {
63                    ContentBlock::Text { text } => !text.is_empty(),
64                    _ => true,
65                });
66            }
67            _ => {}
68        }
69    }
70}
71
72/// Validate that the message sequence alternates correctly
73/// (user/assistant/user/assistant...) as required by the API.
74pub fn validate_alternation(messages: &[Message]) -> Result<(), String> {
75    let mut expect_user = true;
76
77    for (i, msg) in messages.iter().enumerate() {
78        match msg {
79            Message::System(_) => continue, // System messages don't count.
80            Message::User(_) => {
81                if !expect_user {
82                    return Err(format!("Message {i}: expected assistant, got user"));
83                }
84                expect_user = false;
85            }
86            Message::Assistant(_) => {
87                if expect_user {
88                    return Err(format!("Message {i}: expected user, got assistant"));
89                }
90                expect_user = true;
91            }
92        }
93    }
94
95    Ok(())
96}
97
98/// Remove empty messages (messages with no content blocks after stripping).
99pub fn remove_empty_messages(messages: &mut Vec<Message>) {
100    messages.retain(|msg| match msg {
101        Message::User(u) => !u.content.is_empty(),
102        Message::Assistant(a) => !a.content.is_empty(),
103        Message::System(_) => true,
104    });
105}
106
107/// Cap oversized document blocks to prevent context blowout.
108pub fn cap_document_blocks(messages: &mut [Message], max_bytes: usize) {
109    for msg in messages.iter_mut() {
110        let content = match msg {
111            Message::User(u) => &mut u.content,
112            Message::Assistant(a) => &mut a.content,
113            _ => continue,
114        };
115        for block in content.iter_mut() {
116            if let ContentBlock::Document { data, title, .. } = block
117                && data.len() > max_bytes
118            {
119                let name = title.as_deref().unwrap_or("document");
120                *block = ContentBlock::Text {
121                    text: format!(
122                        "(Document '{name}' too large for context: {} bytes, max {max_bytes})",
123                        data.len()
124                    ),
125                };
126            }
127        }
128    }
129}
130
131/// Merge consecutive user messages into a single message.
132/// The API requires strict user/assistant alternation.
133pub fn merge_consecutive_user_messages(messages: &mut Vec<Message>) {
134    let mut i = 0;
135    while i + 1 < messages.len() {
136        let both_user = matches!(&messages[i], Message::User(_))
137            && matches!(&messages[i + 1], Message::User(_));
138
139        if both_user {
140            // Merge content from i+1 into i.
141            if let Message::User(next) = messages.remove(i + 1)
142                && let Message::User(ref mut current) = messages[i]
143            {
144                current.content.extend(next.content);
145            }
146        } else {
147            i += 1;
148        }
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155    use uuid::Uuid;
156
157    #[test]
158    fn test_tool_result_pairing() {
159        let mut messages = vec![
160            Message::Assistant(AssistantMessage {
161                uuid: Uuid::new_v4(),
162                timestamp: String::new(),
163                content: vec![ContentBlock::ToolUse {
164                    id: "call_1".into(),
165                    name: "Bash".into(),
166                    input: serde_json::json!({}),
167                }],
168                model: None,
169                usage: None,
170                stop_reason: None,
171                request_id: None,
172            }),
173            // No tool_result for call_1!
174        ];
175
176        ensure_tool_result_pairing(&mut messages);
177
178        // Should have added a synthetic error result.
179        assert_eq!(messages.len(), 2);
180        if let Message::User(u) = &messages[1] {
181            assert!(matches!(
182                &u.content[0],
183                ContentBlock::ToolResult { is_error: true, .. }
184            ));
185        } else {
186            panic!("Expected user message with tool result");
187        }
188    }
189
190    #[test]
191    fn test_merge_consecutive_users() {
192        let mut messages = vec![
193            user_message("hello"),
194            user_message("world"),
195            Message::Assistant(AssistantMessage {
196                uuid: Uuid::new_v4(),
197                timestamp: String::new(),
198                content: vec![ContentBlock::Text { text: "hi".into() }],
199                model: None,
200                usage: None,
201                stop_reason: None,
202                request_id: None,
203            }),
204        ];
205
206        merge_consecutive_user_messages(&mut messages);
207        assert_eq!(messages.len(), 2); // Two user messages merged into one.
208    }
209
210    #[test]
211    fn test_strip_empty_blocks() {
212        let mut messages = vec![Message::User(UserMessage {
213            uuid: Uuid::new_v4(),
214            timestamp: String::new(),
215            content: vec![
216                ContentBlock::Text {
217                    text: "".into(), // empty — should be removed
218                },
219                ContentBlock::Text {
220                    text: "keep me".into(),
221                },
222            ],
223            is_meta: false,
224            is_compact_summary: false,
225        })];
226        strip_empty_blocks(&mut messages);
227        if let Message::User(u) = &messages[0] {
228            assert_eq!(u.content.len(), 1);
229            assert_eq!(u.content[0].as_text(), Some("keep me"));
230        }
231    }
232
233    #[test]
234    fn test_validate_alternation_valid() {
235        let messages = vec![
236            user_message("hello"),
237            Message::Assistant(AssistantMessage {
238                uuid: Uuid::new_v4(),
239                timestamp: String::new(),
240                content: vec![ContentBlock::Text { text: "hi".into() }],
241                model: None,
242                usage: None,
243                stop_reason: None,
244                request_id: None,
245            }),
246        ];
247        assert!(validate_alternation(&messages).is_ok());
248    }
249
250    #[test]
251    fn test_validate_alternation_invalid() {
252        let messages = vec![
253            user_message("hello"),
254            user_message("world"), // Two users in a row.
255        ];
256        assert!(validate_alternation(&messages).is_err());
257    }
258
259    #[test]
260    fn test_remove_empty_messages() {
261        let mut messages = vec![
262            user_message("keep"),
263            Message::User(UserMessage {
264                uuid: Uuid::new_v4(),
265                timestamp: String::new(),
266                content: vec![], // empty — should be removed
267                is_meta: false,
268                is_compact_summary: false,
269            }),
270            user_message("also keep"),
271        ];
272        remove_empty_messages(&mut messages);
273        assert_eq!(messages.len(), 2);
274    }
275
276    #[test]
277    fn test_cap_document_blocks() {
278        let mut messages = vec![Message::User(UserMessage {
279            uuid: Uuid::new_v4(),
280            timestamp: String::new(),
281            content: vec![ContentBlock::Document {
282                media_type: "application/pdf".into(),
283                data: "x".repeat(1000),
284                title: Some("big.pdf".into()),
285            }],
286            is_meta: false,
287            is_compact_summary: false,
288        })];
289        // Cap at 500 bytes — should replace with text.
290        cap_document_blocks(&mut messages, 500);
291        if let Message::User(u) = &messages[0] {
292            assert!(matches!(&u.content[0], ContentBlock::Text { .. }));
293            if let ContentBlock::Text { text } = &u.content[0] {
294                assert!(text.contains("big.pdf"));
295                assert!(text.contains("too large"));
296            }
297        }
298    }
299
300    #[test]
301    fn test_cap_document_blocks_within_limit() {
302        let mut messages = vec![Message::User(UserMessage {
303            uuid: Uuid::new_v4(),
304            timestamp: String::new(),
305            content: vec![ContentBlock::Document {
306                media_type: "application/pdf".into(),
307                data: "small".into(),
308                title: Some("small.pdf".into()),
309            }],
310            is_meta: false,
311            is_compact_summary: false,
312        })];
313        // Cap at 500 bytes — should keep as-is.
314        cap_document_blocks(&mut messages, 500);
315        if let Message::User(u) = &messages[0] {
316            assert!(matches!(&u.content[0], ContentBlock::Document { .. }));
317        }
318    }
319
320    #[test]
321    fn test_tool_result_pairing_already_paired() {
322        let mut messages = vec![
323            Message::Assistant(AssistantMessage {
324                uuid: Uuid::new_v4(),
325                timestamp: String::new(),
326                content: vec![ContentBlock::ToolUse {
327                    id: "call_1".into(),
328                    name: "Bash".into(),
329                    input: serde_json::json!({}),
330                }],
331                model: None,
332                usage: None,
333                stop_reason: None,
334                request_id: None,
335            }),
336            Message::User(UserMessage {
337                uuid: Uuid::new_v4(),
338                timestamp: String::new(),
339                content: vec![ContentBlock::ToolResult {
340                    tool_use_id: "call_1".into(),
341                    content: "ok".into(),
342                    is_error: false,
343                    extra_content: vec![],
344                }],
345                is_meta: true,
346                is_compact_summary: false,
347            }),
348        ];
349
350        ensure_tool_result_pairing(&mut messages);
351        // No change expected — already paired.
352        assert_eq!(messages.len(), 2);
353    }
354
355    #[test]
356    fn test_tool_result_pairing_multiple_orphans() {
357        let mut messages = vec![Message::Assistant(AssistantMessage {
358            uuid: Uuid::new_v4(),
359            timestamp: String::new(),
360            content: vec![
361                ContentBlock::ToolUse {
362                    id: "call_a".into(),
363                    name: "Bash".into(),
364                    input: serde_json::json!({}),
365                },
366                ContentBlock::ToolUse {
367                    id: "call_b".into(),
368                    name: "FileRead".into(),
369                    input: serde_json::json!({}),
370                },
371            ],
372            model: None,
373            usage: None,
374            stop_reason: None,
375            request_id: None,
376        })];
377
378        ensure_tool_result_pairing(&mut messages);
379        // Should add two synthetic error results (one per orphan).
380        assert_eq!(messages.len(), 3);
381        for msg in &messages[1..] {
382            if let Message::User(u) = msg {
383                assert!(matches!(
384                    &u.content[0],
385                    ContentBlock::ToolResult { is_error: true, .. }
386                ));
387            } else {
388                panic!("Expected user message with tool result");
389            }
390        }
391    }
392
393    #[test]
394    fn test_merge_no_consecutive_users() {
395        let assistant = Message::Assistant(AssistantMessage {
396            uuid: Uuid::new_v4(),
397            timestamp: String::new(),
398            content: vec![ContentBlock::Text { text: "hi".into() }],
399            model: None,
400            usage: None,
401            stop_reason: None,
402            request_id: None,
403        });
404        let mut messages = vec![user_message("hello"), assistant, user_message("bye")];
405
406        merge_consecutive_user_messages(&mut messages);
407        assert_eq!(messages.len(), 3); // No change.
408    }
409
410    #[test]
411    fn test_merge_three_consecutive_users() {
412        let mut messages = vec![
413            user_message("one"),
414            user_message("two"),
415            user_message("three"),
416        ];
417
418        merge_consecutive_user_messages(&mut messages);
419        assert_eq!(messages.len(), 1); // All merged into one.
420        if let Message::User(u) = &messages[0] {
421            assert_eq!(u.content.len(), 3);
422        } else {
423            panic!("Expected user message");
424        }
425    }
426
427    #[test]
428    fn test_validate_alternation_with_system_messages() {
429        let messages = vec![
430            Message::System(SystemMessage {
431                uuid: Uuid::new_v4(),
432                timestamp: String::new(),
433                subtype: SystemMessageType::Informational,
434                content: "system note".into(),
435                level: MessageLevel::Info,
436            }),
437            user_message("hello"),
438            Message::System(SystemMessage {
439                uuid: Uuid::new_v4(),
440                timestamp: String::new(),
441                subtype: SystemMessageType::Informational,
442                content: "another note".into(),
443                level: MessageLevel::Info,
444            }),
445            Message::Assistant(AssistantMessage {
446                uuid: Uuid::new_v4(),
447                timestamp: String::new(),
448                content: vec![ContentBlock::Text { text: "hi".into() }],
449                model: None,
450                usage: None,
451                stop_reason: None,
452                request_id: None,
453            }),
454        ];
455        assert!(validate_alternation(&messages).is_ok());
456    }
457
458    #[test]
459    fn test_validate_alternation_empty_list() {
460        let messages: Vec<Message> = vec![];
461        assert!(validate_alternation(&messages).is_ok());
462    }
463
464    #[test]
465    fn test_strip_empty_blocks_on_assistant() {
466        let mut messages = vec![Message::Assistant(AssistantMessage {
467            uuid: Uuid::new_v4(),
468            timestamp: String::new(),
469            content: vec![
470                ContentBlock::Text { text: "".into() },
471                ContentBlock::Text {
472                    text: "real content".into(),
473                },
474                ContentBlock::Text { text: "".into() },
475            ],
476            model: None,
477            usage: None,
478            stop_reason: None,
479            request_id: None,
480        })];
481        strip_empty_blocks(&mut messages);
482        if let Message::Assistant(a) = &messages[0] {
483            assert_eq!(a.content.len(), 1);
484            assert_eq!(a.content[0].as_text(), Some("real content"));
485        }
486    }
487
488    #[test]
489    fn test_remove_empty_messages_preserves_system() {
490        let mut messages = vec![
491            Message::System(SystemMessage {
492                uuid: Uuid::new_v4(),
493                timestamp: String::new(),
494                subtype: SystemMessageType::Informational,
495                content: "".into(), // Empty content but system messages are always kept.
496                level: MessageLevel::Info,
497            }),
498            Message::User(UserMessage {
499                uuid: Uuid::new_v4(),
500                timestamp: String::new(),
501                content: vec![], // Empty — should be removed.
502                is_meta: false,
503                is_compact_summary: false,
504            }),
505            user_message("keep me"),
506        ];
507        remove_empty_messages(&mut messages);
508        assert_eq!(messages.len(), 2); // System + "keep me".
509        assert!(matches!(&messages[0], Message::System(_)));
510        assert!(matches!(&messages[1], Message::User(_)));
511    }
512
513    #[test]
514    fn test_cap_document_blocks_no_title_uses_document() {
515        let mut messages = vec![Message::User(UserMessage {
516            uuid: Uuid::new_v4(),
517            timestamp: String::new(),
518            content: vec![ContentBlock::Document {
519                media_type: "text/plain".into(),
520                data: "x".repeat(200),
521                title: None,
522            }],
523            is_meta: false,
524            is_compact_summary: false,
525        })];
526        cap_document_blocks(&mut messages, 100);
527        if let Message::User(u) = &messages[0] {
528            if let ContentBlock::Text { text } = &u.content[0] {
529                assert!(
530                    text.contains("document"),
531                    "should use fallback name 'document'"
532                );
533                assert!(text.contains("too large"));
534            } else {
535                panic!("Expected text block after capping");
536            }
537        }
538    }
539}