Skip to main content

oxi/
messages.rs

1//! SimpleMessage utilities for formatting and processing messages
2//!
3//! Provides utilities for formatting messages, truncating long content,
4//! estimating token counts, and summarizing conversation history.
5
6/// A simple message type for utilities
7#[derive(Debug, Clone)]
8pub struct SimpleMessage {
9    pub role: String,
10    pub content: String,
11}
12
13/// Maximum message length before truncation
14pub const DEFAULT_MAX_MESSAGE_LENGTH: usize = 10_000;
15
16/// Maximum summary length
17pub const DEFAULT_MAX_SUMMARY_LENGTH: usize = 500;
18
19/// Estimated tokens per character (for rough estimation)
20pub const ESTIMATED_TOKENS_PER_CHAR: f64 = 0.25;
21
22/// Format a message for display
23pub fn format_message(role: &str, content: &str) -> String {
24    let role_display = match role {
25        "user" => "You",
26        "assistant" => "Assistant",
27        "system" => "System",
28        "tool" => "Tool",
29        "tool_result" => "Result",
30        "bashExecution" => "Bash",
31        "custom" => "Custom",
32        "branchSummary" | "compactionSummary" => "Summary",
33        _ => role,
34    };
35
36    format!("[{}]\n{}", role_display, content)
37}
38
39/// Format a message with a prefix
40pub fn format_message_with_prefix(role: &str, content: &str, prefix: &str) -> String {
41    let role_display = match role {
42        "user" => "You",
43        "assistant" => "Assistant",
44        "system" => "System",
45        "tool" => "Tool",
46        "tool_result" => "Result",
47        _ => role,
48    };
49
50    if prefix.is_empty() {
51        format!("[{}]\n{}", role_display, content)
52    } else {
53        format!("[{}] {}\n{}", role_display, prefix, content)
54    }
55}
56
57/// Truncate a message to a maximum length
58///
59/// # Arguments
60/// * `content` - The content to truncate
61/// * `max_length` - Maximum length (defaults to DEFAULT_MAX_MESSAGE_LENGTH)
62/// * `suffix` - Suffix to append when truncated (defaults to "... [truncated]")
63///
64/// # Returns
65/// The truncated content
66pub fn truncate_message(
67    content: &str,
68    max_length: usize,
69    suffix: &str,
70) -> String {
71    if content.len() <= max_length {
72        return content.to_string();
73    }
74
75    // Find a good break point (end of line, sentence, or just max_length)
76    let truncated = &content[..max_length];
77
78    // Try to break at end of line
79    if let Some(last_newline) = truncated.rfind('\n') {
80        if last_newline > max_length / 2 {
81            return format!("{}{}", &content[..last_newline], suffix);
82        }
83    }
84
85    // Try to break at sentence end
86    if let Some(last_period) = truncated.rfind(". ") {
87        if last_period > max_length / 2 {
88            return format!("{}{}", &content[..last_period + 1], suffix);
89        }
90    }
91
92    format!("{}{}", truncated, suffix)
93}
94
95/// Truncate a message with default suffix
96pub fn truncate_message_default(content: &str) -> String {
97    truncate_message(content, DEFAULT_MAX_MESSAGE_LENGTH, "\n\n... [message truncated]")
98}
99
100/// Estimate token count for a message
101pub fn estimate_tokens(content: &str) -> usize {
102    // Rough estimation: ~4 characters per token for English
103    (content.len() as f64 * ESTIMATED_TOKENS_PER_CHAR) as usize
104}
105
106/// Estimate token count for multiple messages
107pub fn estimate_messages_tokens(messages: &[SimpleMessage]) -> usize {
108    messages.iter().map(|m| estimate_tokens(&m.content)).sum()
109}
110
111/// Check if content exceeds token limit
112pub fn exceeds_token_limit(content: &str, limit: usize) -> bool {
113    estimate_tokens(content) > limit
114}
115
116/// Summarize a conversation by extracting key points
117///
118/// This is a simple extractive summarization that takes the first
119/// and last messages plus any tool results.
120pub fn summarize_conversation(messages: &[SimpleMessage], max_length: usize) -> String {
121    if messages.is_empty() {
122        return String::new();
123    }
124
125    let mut summary_parts = Vec::new();
126
127    // Add first user message
128    if let Some(first) = messages.first() {
129        if first.role == "user" {
130            let content = truncate_message(&first.content, 200, "...");
131            summary_parts.push(format!("Started with: {}", content));
132        }
133    }
134
135    // Count messages by type
136    let mut user_count = 0;
137    let mut assistant_count = 0;
138    let mut tool_count = 0;
139
140    for msg in messages {
141        match msg.role.as_str() {
142            "user" => user_count += 1,
143            "assistant" => assistant_count += 1,
144            "tool" | "tool_result" => tool_count += 1,
145            _ => {}
146        }
147    }
148
149    summary_parts.push(format!(
150        "{} user message(s), {} assistant response(s), {} tool use(s)",
151        user_count, assistant_count, tool_count
152    ));
153
154    // Add last assistant message if present
155    if let Some(last) = messages.last() {
156        if last.role == "assistant" {
157            let content = truncate_message(&last.content, 300, "...");
158            summary_parts.push(format!("Last response: {}", content));
159        }
160    }
161
162    let summary = summary_parts.join("\n");
163    truncate_message(&summary, max_length, "...")
164}
165
166/// Compact messages for context window efficiency
167///
168/// Returns a compacted version that keeps the most recent messages
169/// and summarizes older ones.
170pub fn compact_messages(
171    messages: &[SimpleMessage],
172    max_messages: usize,
173    summary_prefix: &str,
174    summary_suffix: &str,
175) -> Vec<SimpleMessage> {
176    if messages.len() <= max_messages {
177        return messages.to_vec();
178    }
179
180    let to_keep = max_messages / 2;
181    let _to_summarize = messages.len() - to_keep;
182
183    // Keep the first few messages
184    let kept: Vec<SimpleMessage> = messages.iter().take(to_keep).cloned().collect();
185
186    // Summarize the rest
187    let to_summarize_msgs = &messages[to_keep..messages.len()];
188    let summary = summarize_conversation(to_summarize_msgs, 300);
189
190    let mut result = kept;
191    result.push(SimpleMessage {
192        role: "system".to_string(),
193        content: format!("{}{}{}", summary_prefix, summary, summary_suffix),
194    });
195
196    // Add the most recent messages
197    result.extend_from_slice(&messages[messages.len().saturating_sub(to_keep)..]);
198
199    result
200}
201
202/// Format bash execution for display
203pub fn format_bash_execution(command: &str, output: &str, exit_code: Option<i32>) -> String {
204    let mut result = format!("$ {}\n", command);
205
206    if !output.is_empty() {
207        result.push_str(output);
208        if !output.ends_with('\n') {
209            result.push('\n');
210        }
211    }
212
213    if let Some(code) = exit_code {
214        if code == 0 {
215            result.push_str(&format!("[exited with code {}]", code));
216        } else {
217            result.push_str(&format!("[error: exited with code {}]", code));
218        }
219    }
220
221    result
222}
223
224/// Format a tool result for display
225pub fn format_tool_result(tool_name: &str, result: &str) -> String {
226    format!("[Tool: {}]\n{}\n", tool_name, result)
227}
228
229/// Get a short preview of content
230pub fn get_preview(content: &str, max_length: usize) -> String {
231    let trimmed = content.trim();
232    if trimmed.len() <= max_length {
233        return trimmed.to_string();
234    }
235
236    let preview = &trimmed[..max_length];
237    if let Some(last_newline) = preview.rfind('\n') {
238        if last_newline > max_length / 2 {
239            return format!("{}...", &trimmed[..last_newline]);
240        }
241    }
242
243    format!("{}...", preview.trim_end())
244}
245
246/// Count messages by role
247pub fn count_messages_by_role(messages: &[SimpleMessage]) -> std::collections::HashMap<String, usize> {
248    let mut counts = std::collections::HashMap::new();
249    for msg in messages {
250        *counts.entry(msg.role.clone()).or_insert(0) += 1;
251    }
252    counts
253}
254
255/// Calculate context window usage
256pub fn calculate_context_usage(messages: &[SimpleMessage], context_window: usize) -> (usize, f64) {
257    let total_tokens = estimate_messages_tokens(messages);
258    let usage = (total_tokens as f64 / context_window as f64) * 100.0;
259    (total_tokens, usage)
260}
261
262/// Format tokens for display
263pub fn format_tokens(tokens: usize) -> String {
264    if tokens < 1000 {
265        format!("{} tokens", tokens)
266    } else if tokens < 1_000_000 {
267        format!("{:.1}K tokens", tokens as f64 / 1000.0)
268    } else {
269        format!("{:.1}M tokens", tokens as f64 / 1_000_000.0)
270    }
271}
272
273/// Check if a message is empty or only whitespace
274pub fn is_empty_message(msg: &SimpleMessage) -> bool {
275    msg.content.trim().is_empty()
276}
277
278/// Filter out empty messages
279pub fn filter_empty_messages(messages: &[SimpleMessage]) -> Vec<SimpleMessage> {
280    messages.iter().filter(|m| !is_empty_message(m)).cloned().collect()
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    #[test]
288    fn test_format_message() {
289        let formatted = format_message("user", "Hello, world!");
290        assert!(formatted.contains("You"));
291        assert!(formatted.contains("Hello, world!"));
292    }
293
294    #[test]
295    fn test_truncate_message_short() {
296        let content = "Short message";
297        let result = truncate_message(content, 100, "...");
298        assert_eq!(result, content);
299    }
300
301    #[test]
302    fn test_truncate_message_long() {
303        let content = "a".repeat(200);
304        let result = truncate_message(&content, 100, "...[truncated]");
305        assert!(result.ends_with("...[truncated]"));
306        assert!(result.len() <= 100 + "...[truncated]".len());
307    }
308
309    #[test]
310    fn test_truncate_message_at_newline() {
311        let content = format!("line1\nline2\nline3\n{}", "a".repeat(200));
312        let result = truncate_message(&content, 20, "...");
313        assert!(result.contains("line1\nline2\nline3"));
314    }
315
316    #[test]
317    fn test_estimate_tokens() {
318        let content = "Hello, world!";
319        let tokens = estimate_tokens(content);
320        // Should be roughly 3-4 tokens
321        assert!(tokens >= 2 && tokens <= 6);
322    }
323
324    #[test]
325    fn test_exceeds_token_limit() {
326        let content = "a".repeat(1000);
327        assert!(exceeds_token_limit(&content, 100));
328        assert!(!exceeds_token_limit(&content, 500));
329    }
330
331    #[test]
332    fn test_summarize_conversation_empty() {
333        let messages: Vec<SimpleMessage> = vec![];
334        let summary = summarize_conversation(&messages, 100);
335        assert!(summary.is_empty());
336    }
337
338    #[test]
339    fn test_summarize_conversation() {
340        let messages = vec![
341            SimpleMessage {
342                role: "user".to_string(),
343                content: "Hello, I need help with Rust".to_string(),
344            },
345            SimpleMessage {
346                role: "assistant".to_string(),
347                content: "I'd be happy to help with Rust! What specifically do you need?".to_string(),
348            },
349        ];
350        let summary = summarize_conversation(&messages, 200);
351        assert!(summary.contains("Started with"));
352        assert!(summary.contains("1 user message"));
353        assert!(summary.contains("1 assistant response"));
354    }
355
356    #[test]
357    fn test_compact_messages() {
358        let messages: Vec<SimpleMessage> = (0..10)
359            .map(|i| SimpleMessage {
360                role: "user".to_string(),
361                content: format!("SimpleMessage {}", i),
362            })
363            .collect();
364
365        let compacted = compact_messages(&messages, 4, "<summary>", "</summary>");
366        // Should have summary and some recent messages
367        assert!(compacted.len() < messages.len());
368        // Should contain the summary marker
369        assert!(compacted.iter().any(|m| m.content.contains("<summary>")));
370    }
371
372    #[test]
373    fn test_format_bash_execution() {
374        let result = format_bash_execution("echo hello", "hello\n", Some(0));
375        assert!(result.contains("echo hello"));
376        assert!(result.contains("hello"));
377    }
378
379    #[test]
380    fn test_get_preview() {
381        let content = "This is a very long message that should be truncated";
382        let preview = get_preview(content, 20);
383        assert!(preview.len() <= 23); // 20 + "..."
384        assert!(preview.starts_with("This is a very "));
385    }
386
387    #[test]
388    fn test_count_messages_by_role() {
389        let messages = vec![
390            SimpleMessage {
391                role: "user".to_string(),
392                content: "msg1".to_string(),
393            },
394            SimpleMessage {
395                role: "assistant".to_string(),
396                content: "msg2".to_string(),
397            },
398            SimpleMessage {
399                role: "user".to_string(),
400                content: "msg3".to_string(),
401            },
402        ];
403        let counts = count_messages_by_role(&messages);
404        assert_eq!(counts.get("user"), Some(&2));
405        assert_eq!(counts.get("assistant"), Some(&1));
406    }
407
408    #[test]
409    fn test_calculate_context_usage() {
410        let messages = vec![SimpleMessage {
411            role: "user".to_string(),
412            content: "a".repeat(1000),
413        }];
414        let (tokens, usage) = calculate_context_usage(&messages, 10000);
415        assert!(tokens > 0);
416        assert!(usage < 100.0);
417    }
418
419    #[test]
420    fn test_format_tokens() {
421        assert_eq!(format_tokens(500), "500 tokens");
422        assert_eq!(format_tokens(1500), "1.5K tokens");
423        assert_eq!(format_tokens(1_500_000), "1.5M tokens");
424    }
425
426    #[test]
427    fn test_is_empty_message() {
428        let empty = SimpleMessage {
429            role: "user".to_string(),
430            content: "   ".to_string(),
431        };
432        assert!(is_empty_message(&empty));
433
434        let non_empty = SimpleMessage {
435            role: "user".to_string(),
436            content: "Hello".to_string(),
437        };
438        assert!(!is_empty_message(&non_empty));
439    }
440
441    #[test]
442    fn test_filter_empty_messages() {
443        let messages = vec![
444            SimpleMessage {
445                role: "user".to_string(),
446                content: "Hello".to_string(),
447            },
448            SimpleMessage {
449                role: "user".to_string(),
450                content: "   ".to_string(),
451            },
452            SimpleMessage {
453                role: "assistant".to_string(),
454                content: "Hi there".to_string(),
455            },
456        ];
457        let filtered = filter_empty_messages(&messages);
458        assert_eq!(filtered.len(), 2);
459    }
460}