Skip to main content

atomcode_core/conversation/
message.rs

1use crate::tool::result_store::ToolResultRef;
2use crate::tool::{ToolCall, ToolResult};
3
4#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
5pub enum Role {
6    System,
7    User,
8    Assistant,
9    Tool,
10}
11
12/// A single image attachment, base64-encoded.
13#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
14pub struct ImagePart {
15    /// MIME type, e.g. "image/png", "image/jpeg".
16    pub media_type: String,
17    /// Base64-encoded image data.
18    pub data: String,
19}
20
21/// One Anthropic-style extended-thinking content block. Anthropic's API
22/// returns thinking output as a sequence of `{type:"thinking", thinking,
23/// signature}` blocks; the `signature` is a server-issued cryptographic
24/// token that we MUST echo back unchanged on every subsequent assistant
25/// turn or the API rejects the request with `400 The content[].thinking
26/// in the thinking mode must be passed back to the API`. Per Anthropic
27/// docs, thinking blocks must also appear before text/tool_use blocks
28/// inside the assistant message — `provider/claude.rs::format_messages`
29/// enforces that ordering.
30#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq)]
31pub struct ThinkingBlock {
32    /// The thinking text streamed via `thinking_delta` events.
33    pub text: String,
34    /// Server-issued signature received via `signature_delta`. Required
35    /// for round-trip; an empty string means we never received one (older
36    /// session files, non-Anthropic provider) — emit anyway, the upstream
37    /// either accepts empty or rejects only when thinking + tool_use is
38    /// active (in which case we already had a signature).
39    pub signature: String,
40}
41
42#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
43pub enum MessageContent {
44    Text(String),
45    AssistantWithToolCalls {
46        text: Option<String>,
47        tool_calls: Vec<ToolCall>,
48        /// Thinking-model reasoning captured alongside the tool_calls. Some
49        /// provider APIs (Moonshot Kimi K2-thinking / K2.6, MiniMax-M2 when
50        /// `reasoning_split` is on) require the historical `reasoning_content`
51        /// to be echoed back on every assistant tool_call message or they
52        /// reject the next request with a 400. DeepSeek-R1 is the opposite —
53        /// it rejects the request if this field is echoed back. The send-side
54        /// `ReasoningPolicy` (per-provider) decides whether to emit.
55        /// Always captured on the receive side so we don't lose data.
56        #[serde(default)]
57        reasoning_content: Option<String>,
58        /// Anthropic-style extended-thinking blocks received alongside this
59        /// turn. Carries the cryptographic `signature` that Claude (and
60        /// Anthropic-compatible proxies routing models like deepseek-v4-pro
61        /// through claude.rs) require us to echo verbatim on every
62        /// subsequent request. Empty when the upstream isn't Anthropic
63        /// or thinking was disabled. `provider/claude.rs::format_messages`
64        /// emits these as the first elements of the `content` array.
65        #[serde(default, skip_serializing_if = "Vec::is_empty")]
66        thinking_blocks: Vec<ThinkingBlock>,
67    },
68    ToolResult(ToolResult),
69    /// Lightweight reference to a tool result whose full output is cached on disk.
70    /// Used for new tool results; old `ToolResult` variant kept for backward compat.
71    ToolResultRef(ToolResultRef),
72    /// User message with text and/or image attachments (vision models).
73    MultiPart {
74        text: Option<String>,
75        images: Vec<ImagePart>,
76    },
77}
78
79#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
80pub struct Message {
81    pub role: Role,
82    pub content: MessageContent,
83}
84
85impl Message {
86    pub fn new(role: Role, content: impl Into<String>) -> Self {
87        Self {
88            role,
89            content: MessageContent::Text(content.into()),
90        }
91    }
92
93    pub fn text(&self) -> Option<&str> {
94        match &self.content {
95            MessageContent::Text(s) => Some(s),
96            MessageContent::AssistantWithToolCalls { text, .. } => text.as_deref(),
97            MessageContent::ToolResult(r) => Some(&r.output),
98            MessageContent::ToolResultRef(r) => Some(&r.summary),
99            MessageContent::MultiPart { text, .. } => text.as_deref(),
100        }
101    }
102
103    /// Rough token estimate: bytes / 4 with a per-message overhead.
104    ///
105    /// Note on accuracy: this is a coarse approximation regardless of language.
106    /// For OpenAI-style BPE tokenizers it tracks reality within ~30% on mixed
107    /// English+CJK code/prose. We deliberately keep the formula simple and
108    /// per-content-type aware (tool args expanded, ToolResultRef counted by
109    /// what's actually sent on the wire) — small refinements to the divisor
110    /// are dwarfed by tokenizer differences across providers, so anything
111    /// short of a real tokenizer would be false precision.
112    pub fn estimate_tokens(&self) -> usize {
113        let byte_count = match &self.content {
114            MessageContent::Text(s) => s.len(),
115            MessageContent::AssistantWithToolCalls {
116                text,
117                tool_calls,
118                reasoning_content,
119                ..
120            } => {
121                let text_len = text.as_ref().map_or(0, |t| t.len());
122                // Each tool_use contributes name + JSON-stringified args + a
123                // small per-call overhead (id, type, wrapper braces).
124                // Matches CC's `name + jsonStringify(input)` accounting in
125                // services/tokenEstimation.ts:roughTokenCountEstimationForBlock.
126                let calls_len: usize = tool_calls
127                    .iter()
128                    .map(|tc| tc.name.len() + tc.arguments.len() + 20)
129                    .sum();
130                let reasoning_len = reasoning_content.as_ref().map_or(0, |r| r.len());
131                text_len + calls_len + reasoning_len
132            }
133            MessageContent::ToolResult(r) => r.output.len() + 10,
134            // ToolResultRef carries `byte_size` (the full original content
135            // size, kept for the cache lookup) AND `summary` (the short
136            // representation actually sent on the wire). The estimator must
137            // count what gets sent, not what's stashed on disk — the
138            // previous behaviour overestimated externalised results by 5-50×,
139            // pushing compression to fire on phantom budget pressure.
140            MessageContent::ToolResultRef(r) => r.summary.len() + 10,
141            MessageContent::MultiPart { text, images } => {
142                let text_len = text.as_ref().map_or(0, |t| t.len());
143                // Each image ≈ 1600 tokens (conservative estimate for vision models).
144                return (text_len / 4).max(1) + images.len() * 1600 + 4;
145            }
146        };
147        (byte_count / 4).max(1) + 4
148    }
149
150    /// Create a condensed version of this message for context budget savings.
151    /// Only condenses ToolResult messages (replaces full output with 1-line
152    /// summary). `tool_name` is looked up by the caller via the
153    /// paired ATC (see e.g. `ctx::truncate::post_process_tool_results`) —
154    /// pass `""` when unknown and this function will default to the generic
155    /// first-line summary. ToolResultRef and other variants return as-is.
156    ///
157    /// For `tool_name == "read_file"`, emits a skeleton that keeps function
158    /// signatures + line numbers so the model can still use line-number
159    /// edit mode without re-reading. Previously this decision used a
160    /// substring heuristic on the output format, which false-positived on
161    /// bash outputs that happened to start with `"  N| ..."` lines.
162    pub fn condensed(&self, tool_name: &str) -> Message {
163        match &self.content {
164            MessageContent::ToolResult(r) => {
165                let summary = if r.success {
166                    if tool_name == "read_file" && r.output.lines().count() > 50 {
167                        compress_file_to_skeleton(&r.output)
168                    } else {
169                        let first_line = r.output.lines().next().unwrap_or("OK");
170                        if first_line.chars().count() > 100 {
171                            format!("{}...", first_line.chars().take(97).collect::<String>())
172                        } else {
173                            first_line.to_string()
174                        }
175                    }
176                } else {
177                    let first_line = r.output.lines().next().unwrap_or("Error");
178                    format!(
179                        "FAILED: {}",
180                        if first_line.chars().count() > 80 {
181                            format!("{}...", first_line.chars().take(77).collect::<String>())
182                        } else {
183                            first_line.to_string()
184                        }
185                    )
186                };
187                Message {
188                    role: self.role.clone(),
189                    content: MessageContent::ToolResult(ToolResult {
190                        call_id: r.call_id.clone(),
191                        output: summary,
192                        success: r.success,
193                    }),
194                }
195            }
196            // ToolResultRef is already condensed (only holds a summary).
197            MessageContent::ToolResultRef(_) => self.clone(),
198            // MultiPart messages (images + text) are not condensable.
199            MessageContent::MultiPart { .. } => self.clone(),
200            _ => self.clone(),
201        }
202    }
203
204    /// Returns true if this message is a tool result (either inline or ref).
205    pub fn is_tool_result(&self) -> bool {
206        matches!(
207            self.content,
208            MessageContent::ToolResult(_) | MessageContent::ToolResultRef(_)
209        )
210    }
211
212    /// Extract call_id from tool result variants.
213    pub fn tool_result_call_id(&self) -> Option<&str> {
214        match &self.content {
215            MessageContent::ToolResult(r) => Some(&r.call_id),
216            MessageContent::ToolResultRef(r) => Some(&r.call_id),
217            _ => None,
218        }
219    }
220
221    /// Extract success status from tool result variants.
222    pub fn tool_result_success(&self) -> Option<bool> {
223        match &self.content {
224            MessageContent::ToolResult(r) => Some(r.success),
225            MessageContent::ToolResultRef(r) => Some(r.success),
226            _ => None,
227        }
228    }
229
230    /// Extract the output text from tool result variants (summary for refs).
231    pub fn tool_result_output(&self) -> Option<&str> {
232        match &self.content {
233            MessageContent::ToolResult(r) => Some(&r.output),
234            MessageContent::ToolResultRef(r) => Some(&r.summary),
235            _ => None,
236        }
237    }
238}
239
240/// Compress a read_file result to a skeleton: keep import lines, function/class
241/// signatures, and section markers (template/script/style for Vue).
242/// Output is ~10% of the original but preserves structure + line numbers.
243fn compress_file_to_skeleton(output: &str) -> String {
244    let lines: Vec<&str> = output.lines().collect();
245    let total = lines.len();
246    let mut skeleton = Vec::new();
247
248    // Function/class/struct signature keywords
249    let sig_keywords = [
250        "fn ",
251        "pub fn ",
252        "async fn ",
253        "pub async fn ",
254        "def ",
255        "class ",
256        "function ",
257        "func ",
258        "export ",
259        "import ",
260        "const ",
261        "let ",
262        "public ",
263        "private ",
264        "protected ",
265        "interface ",
266        "type ",
267        "struct ",
268        "enum ",
269        "impl ",
270        "<template",
271        "</template",
272        "<script",
273        "</script",
274        "<style",
275        "</style",
276        "package ",
277        "use ",
278        "from ",
279        "#include",
280    ];
281
282    for line in &lines {
283        // Extract the content after "N| " prefix
284        let content = if let Some(pos) = line.find("| ") {
285            &line[pos + 2..]
286        } else {
287            line
288        };
289        let trimmed = content.trim();
290
291        // Keep empty lines between sections (but not consecutive)
292        if trimmed.is_empty() {
293            if skeleton.last().is_none_or(|l: &&str| !l.trim().is_empty()) {
294                // Don't add empty lines to skeleton
295            }
296            continue;
297        }
298
299        // Keep lines at indent 0-1 that look like signatures
300        let indent = content.len() - content.trim_start().len();
301        let is_signature = indent <= 4 && sig_keywords.iter().any(|kw| trimmed.starts_with(kw));
302        let is_decorator = trimmed.starts_with('@') || trimmed.starts_with("#[");
303        // let _is_close = trimmed == "}" || trimmed == "}" || trimmed.starts_with("})");
304
305        if is_signature || is_decorator {
306            skeleton.push(*line);
307        }
308    }
309
310    if skeleton.is_empty() {
311        // Fallback: just first line + count
312        let first = lines.first().unwrap_or(&"");
313        return format!("{} ({} lines total)", first, total);
314    }
315
316    let mut result = format!(
317        "[File skeleton — {} lines total, use edit_file with start_line/end_line to edit:]\n",
318        total
319    );
320    for line in &skeleton {
321        result.push_str(line);
322        result.push('\n');
323    }
324    result
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330    use crate::tool::ToolResult;
331
332    fn tool_result_msg(output: &str) -> Message {
333        Message {
334            role: Role::Tool,
335            content: MessageContent::ToolResult(ToolResult {
336                call_id: "c1".to_string(),
337                output: output.to_string(),
338                success: true,
339            }),
340        }
341    }
342
343    /// A bash output that happens to start with `" N| ..."` lines
344    /// (numbered error dump, `cat -n`, etc.) must NOT be skeleton-compressed
345    /// when condensed as a bash result — only read_file should skeletonize.
346    /// The previous heuristic (`is_file_read_output`) false-positived here
347    /// and ran `compress_file_to_skeleton` on bash output, garbling it.
348    #[test]
349    fn condensed_bash_with_numbered_lines_uses_first_line_not_skeleton() {
350        // 60 lines of `" N| ..."` — would have triggered the old heuristic
351        // (first 3 lines match "digits + '| '") AND the 50-line floor.
352        let output: String = (1..=60)
353            .map(|n| format!("  {}| oops step failed at call {}", n, n))
354            .collect::<Vec<_>>()
355            .join("\n");
356        let msg = tool_result_msg(&output);
357        let condensed = msg.condensed("bash");
358        let MessageContent::ToolResult(ref r) = condensed.content else {
359            panic!("expected ToolResult");
360        };
361        // Expected: single-line first-line summary, NOT a skeleton.
362        assert!(
363            !r.output.contains("[File skeleton"),
364            "bash result must not be skeletonized: {}",
365            r.output
366        );
367        assert_eq!(r.output.lines().count(), 1);
368        assert!(r.output.starts_with("  1| oops step failed"));
369    }
370
371    /// read_file results should still skeletonize so the model keeps
372    /// function signatures + line numbers for line-mode edits.
373    #[test]
374    fn condensed_read_file_keeps_skeleton() {
375        let mut lines: Vec<String> = Vec::new();
376        for i in 1..=80 {
377            lines.push(format!("   {}| some line of code", i));
378        }
379        lines.insert(10, "   11| pub fn foo() -> u32 {".to_string());
380        let output = lines.join("\n");
381        let msg = tool_result_msg(&output);
382        let condensed = msg.condensed("read_file");
383        let MessageContent::ToolResult(ref r) = condensed.content else {
384            panic!("expected ToolResult");
385        };
386        assert!(
387            r.output.contains("[File skeleton"),
388            "read_file large results should skeletonize: {}",
389            r.output
390        );
391    }
392
393    /// Empty/unknown tool_name falls back to first-line summary — no
394    /// skeleton. This is the safe default when the caller can't look up
395    /// the tool_name (orphan fixtures, older conversations).
396    #[test]
397    fn condensed_unknown_tool_uses_first_line() {
398        let output: String = (1..=80)
399            .map(|n| format!("   {}| line {}", n, n))
400            .collect::<Vec<_>>()
401            .join("\n");
402        let msg = tool_result_msg(&output);
403        let condensed = msg.condensed("");
404        let MessageContent::ToolResult(ref r) = condensed.content else {
405            panic!("expected ToolResult");
406        };
407        assert!(!r.output.contains("[File skeleton"));
408        assert_eq!(r.output.lines().count(), 1);
409    }
410
411    // ── ImagePart / MultiPart tests ────────────────────────────────────────
412
413    fn sample_image_part() -> ImagePart {
414        ImagePart {
415            media_type: "image/png".to_string(),
416            data: "iVBORw0KGgoAAAANSUhEUg==".to_string(),
417        }
418    }
419
420    #[test]
421    fn image_part_serde_roundtrip() {
422        let img = sample_image_part();
423        let json = serde_json::to_string(&img).expect("serialize ImagePart");
424        let deserialized: ImagePart = serde_json::from_str(&json).expect("deserialize ImagePart");
425        assert_eq!(deserialized.media_type, "image/png");
426        assert_eq!(deserialized.data, img.data);
427    }
428
429    #[test]
430    fn multipart_serde_roundtrip_with_text_and_images() {
431        let content = MessageContent::MultiPart {
432            text: Some("describe this image".to_string()),
433            images: vec![sample_image_part()],
434        };
435        let json = serde_json::to_string(&content).expect("serialize MultiPart");
436        let deserialized: MessageContent =
437            serde_json::from_str(&json).expect("deserialize MultiPart");
438        match deserialized {
439            MessageContent::MultiPart { text, images } => {
440                assert_eq!(text.as_deref(), Some("describe this image"));
441                assert_eq!(images.len(), 1);
442                assert_eq!(images[0].media_type, "image/png");
443            }
444            other => panic!("expected MultiPart, got {:?}", other),
445        }
446    }
447
448    #[test]
449    fn multipart_serde_roundtrip_no_text() {
450        let content = MessageContent::MultiPart {
451            text: None,
452            images: vec![sample_image_part(), sample_image_part()],
453        };
454        let json = serde_json::to_string(&content).expect("serialize");
455        let deserialized: MessageContent = serde_json::from_str(&json).expect("deserialize");
456        match deserialized {
457            MessageContent::MultiPart { text, images } => {
458                assert!(text.is_none());
459                assert_eq!(images.len(), 2);
460            }
461            other => panic!("expected MultiPart, got {:?}", other),
462        }
463    }
464
465    #[test]
466    fn multipart_text_returns_some_when_present() {
467        let msg = Message {
468            role: Role::User,
469            content: MessageContent::MultiPart {
470                text: Some("hello".to_string()),
471                images: vec![],
472            },
473        };
474        assert_eq!(msg.text(), Some("hello"));
475    }
476
477    #[test]
478    fn multipart_text_returns_none_when_absent() {
479        let msg = Message {
480            role: Role::User,
481            content: MessageContent::MultiPart {
482                text: None,
483                images: vec![],
484            },
485        };
486        assert_eq!(msg.text(), None);
487    }
488
489    #[test]
490    fn multipart_estimate_tokens_includes_image_cost() {
491        let msg = Message {
492            role: Role::User,
493            content: MessageContent::MultiPart {
494                text: Some("short".to_string()),
495                images: vec![sample_image_part(), sample_image_part()],
496            },
497        };
498        let tokens = msg.estimate_tokens();
499        // 2 images * 1600 = 3200, plus text and message overhead.
500        assert!(
501            tokens >= 3200,
502            "token estimate should include ~1600 per image, got {}",
503            tokens
504        );
505    }
506
507    #[test]
508    fn multipart_estimate_tokens_no_images() {
509        let msg = Message {
510            role: Role::User,
511            content: MessageContent::MultiPart {
512                text: Some("hello world".to_string()),
513                images: vec![],
514            },
515        };
516        let tokens = msg.estimate_tokens();
517        // No images: "hello world" = 11 chars -> 11/4 = 2 (max with 1) + 0*1600 + 4 = 6
518        assert!(tokens < 100, "no-image multipart should have small token count, got {}", tokens);
519        assert!(tokens >= 5, "should have at least text + overhead, got {}", tokens);
520    }
521
522    #[test]
523    fn multipart_is_tool_result_returns_false() {
524        let msg = Message {
525            role: Role::User,
526            content: MessageContent::MultiPart {
527                text: Some("look at this".to_string()),
528                images: vec![sample_image_part()],
529            },
530        };
531        assert!(!msg.is_tool_result());
532    }
533
534    #[test]
535    fn multipart_condensed_returns_clone() {
536        let msg = Message {
537            role: Role::User,
538            content: MessageContent::MultiPart {
539                text: Some("analyze this".to_string()),
540                images: vec![sample_image_part()],
541            },
542        };
543        let condensed = msg.condensed("");
544        match (&msg.content, &condensed.content) {
545            (
546                MessageContent::MultiPart {
547                    text: t1,
548                    images: i1,
549                },
550                MessageContent::MultiPart {
551                    text: t2,
552                    images: i2,
553                },
554            ) => {
555                assert_eq!(t1, t2);
556                assert_eq!(i1.len(), i2.len());
557                assert_eq!(i1[0].media_type, i2[0].media_type);
558                assert_eq!(i1[0].data, i2[0].data);
559            }
560            _ => panic!("condensed MultiPart should remain MultiPart"),
561        }
562    }
563}