opendev_models/
validator.rs

1//! Message schema validation for session history.
2//!
3//! Validates messages before saving and repairs/filters on load to prevent
4//! malformed messages from corrupting session history.
5
6use crate::message::{ChatMessage, Role, ToolCall};
7use tracing::warn;
8
9/// Result of message validation.
10#[derive(Debug, Clone)]
11pub struct ValidationVerdict {
12    pub is_valid: bool,
13    pub reason: String,
14}
15
16impl ValidationVerdict {
17    fn valid() -> Self {
18        Self {
19            is_valid: true,
20            reason: String::new(),
21        }
22    }
23
24    fn invalid(reason: impl Into<String>) -> Self {
25        Self {
26            is_valid: false,
27            reason: reason.into(),
28        }
29    }
30}
31
32/// Check if a value is natively JSON-serializable.
33fn is_json_serializable(value: &serde_json::Value) -> bool {
34    // serde_json::Value is always serializable by definition
35    serde_json::to_string(value).is_ok()
36}
37
38/// Validate a single tool call. Returns error reason or None if valid.
39fn validate_tool_call(tc: &ToolCall, path: &str) -> Option<String> {
40    let prefix = if path.is_empty() {
41        "tool_call".to_string()
42    } else {
43        format!("{path}tool_call")
44    };
45
46    if tc.id.trim().is_empty() {
47        return Some(format!("{prefix} has empty id"));
48    }
49
50    if tc.name.trim().is_empty() {
51        return Some(format!("{prefix} [{}] has empty name", tc.id));
52    }
53
54    // A tool call must have result or error (except task_complete)
55    if tc.result.is_none() && tc.error.is_none() && tc.name != "task_complete" {
56        return Some(format!(
57            "{prefix} [{}] ({}) has no result and no error",
58            tc.id, tc.name
59        ));
60    }
61
62    // Check result serializability
63    if let Some(ref result) = tc.result
64        && !is_json_serializable(result)
65    {
66        return Some(format!("{prefix} [{}] has non-serializable result", tc.id));
67    }
68
69    // Validate nested tool calls recursively
70    for (i, nested) in tc.nested_tool_calls.iter().enumerate() {
71        let nested_path = format!("{prefix}[{i}].");
72        if let Some(reason) = validate_tool_call(nested, &nested_path) {
73            return Some(reason);
74        }
75    }
76
77    None
78}
79
80/// Strict pre-save validation of a message.
81pub fn validate_message(msg: &ChatMessage) -> ValidationVerdict {
82    match msg.role {
83        Role::User => {
84            if msg.content.trim().is_empty() {
85                return ValidationVerdict::invalid("user message has empty content");
86            }
87            if !msg.tool_calls.is_empty() {
88                return ValidationVerdict::invalid("user message has tool_calls");
89            }
90        }
91        Role::Assistant => {
92            let has_content = !msg.content.trim().is_empty();
93            let has_tools = !msg.tool_calls.is_empty();
94            if !has_content && !has_tools {
95                return ValidationVerdict::invalid(
96                    "assistant message has no content and no tool_calls",
97                );
98            }
99
100            for tc in &msg.tool_calls {
101                if let Some(reason) = validate_tool_call(tc, "") {
102                    return ValidationVerdict::invalid(reason);
103                }
104            }
105
106            if let Some(ref trace) = msg.thinking_trace
107                && trace.trim().is_empty()
108            {
109                return ValidationVerdict::invalid("assistant message has empty thinking_trace");
110            }
111            if let Some(ref reasoning) = msg.reasoning_content
112                && reasoning.trim().is_empty()
113            {
114                return ValidationVerdict::invalid("assistant message has empty reasoning_content");
115            }
116        }
117        Role::System => {
118            if msg.content.trim().is_empty() {
119                return ValidationVerdict::invalid("system message has empty content");
120            }
121        }
122    }
123
124    // Token usage validation
125    if let Some(ref usage) = msg.token_usage {
126        let value = serde_json::to_value(usage).unwrap_or(serde_json::Value::Null);
127        if !value.is_object() {
128            return ValidationVerdict::invalid("token_usage is not a dict");
129        }
130    }
131
132    ValidationVerdict::valid()
133}
134
135/// Repair a single tool call and return it.
136fn repair_tool_call(tc: &mut ToolCall) {
137    // Fix incomplete tool calls (no result and no error)
138    if tc.result.is_none() && tc.error.is_none() && tc.name != "task_complete" {
139        tc.error = Some("Tool execution was interrupted or never completed.".to_string());
140    }
141
142    // Repair nested tool calls recursively
143    for nested in &mut tc.nested_tool_calls {
144        repair_tool_call(nested);
145    }
146}
147
148/// Attempt to repair a malformed message. Returns None if unrecoverable.
149pub fn repair_message(msg: &mut ChatMessage) -> bool {
150    let has_content = !msg.content.trim().is_empty();
151    let has_tools = !msg.tool_calls.is_empty();
152
153    // Drop completely empty messages
154    if !has_content && !has_tools {
155        return false;
156    }
157
158    // Repair tool calls
159    for tc in &mut msg.tool_calls {
160        repair_tool_call(tc);
161    }
162
163    // Normalize empty thinking_trace / reasoning_content to None
164    if let Some(ref trace) = msg.thinking_trace
165        && trace.trim().is_empty()
166    {
167        msg.thinking_trace = None;
168    }
169    if let Some(ref reasoning) = msg.reasoning_content
170        && reasoning.trim().is_empty()
171    {
172        msg.reasoning_content = None;
173    }
174
175    // Fix non-serializable token_usage
176    if let Some(ref usage) = msg.token_usage
177        && serde_json::to_value(usage).is_err()
178    {
179        msg.token_usage = None;
180    }
181
182    true
183}
184
185/// Bulk load-time cleanup: repair what we can, drop what we can't.
186pub fn filter_and_repair_messages(messages: &mut Vec<ChatMessage>) -> (usize, usize) {
187    let original_len = messages.len();
188    let mut dropped = 0;
189    let mut repaired = 0;
190
191    messages.retain_mut(|msg| {
192        let thinking_before = msg.thinking_trace.clone();
193        let reasoning_before = msg.reasoning_content.clone();
194        let usage_before = msg.token_usage.clone();
195
196        if !repair_message(msg) {
197            dropped += 1;
198            return false;
199        }
200
201        if msg.thinking_trace != thinking_before
202            || msg.reasoning_content != reasoning_before
203            || msg.token_usage != usage_before
204        {
205            repaired += 1;
206        }
207
208        true
209    });
210
211    if dropped > 0 || repaired > 0 {
212        warn!(
213            "Session message cleanup: {} dropped, {} repaired out of {} total",
214            dropped, repaired, original_len
215        );
216    }
217
218    (dropped, repaired)
219}
220
221#[cfg(test)]
222#[path = "validator_tests.rs"]
223mod tests;
opendev_models/validator.rs

opendev_models/
validator.rs