Skip to main content

codetether_agent/rlm/
router.rs

1//! RLM Router - Decides when to route content through RLM processing
2//!
3//! Routes large tool outputs through RLM when they would exceed
4//! the model's context window threshold.
5//!
6//! When the `functiongemma` feature is active the router passes RLM tool
7//! definitions alongside the analysis prompt so FunctionGemma can convert
8//! text-only LLM responses into structured tool calls.
9
10use super::{RlmChunker, RlmConfig, RlmResult, RlmStats};
11use crate::provider::{CompletionRequest, ContentPart, Message, Provider, Role};
12use anyhow::Result;
13use serde::{Deserialize, Serialize};
14use std::collections::HashSet;
15use std::sync::Arc;
16use std::time::Instant;
17use tracing::{info, warn};
18
19use crate::cognition::tool_router::{ToolCallRouter, ToolRouterConfig};
20
21use super::tools::rlm_tool_definitions;
22
23/// Tools eligible for RLM routing
24fn rlm_eligible_tools() -> HashSet<&'static str> {
25    ["read", "glob", "grep", "bash", "search"]
26        .iter()
27        .copied()
28        .collect()
29}
30
31/// Context for routing decisions
32#[derive(Debug, Clone)]
33pub struct RoutingContext {
34    pub tool_id: String,
35    pub session_id: String,
36    pub call_id: Option<String>,
37    pub model_context_limit: usize,
38    pub current_context_tokens: Option<usize>,
39}
40
41/// Result of routing decision
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct RoutingResult {
44    pub should_route: bool,
45    pub reason: String,
46    pub estimated_tokens: usize,
47}
48
49/// Context for auto-processing
50pub struct AutoProcessContext<'a> {
51    pub tool_id: &'a str,
52    pub tool_args: serde_json::Value,
53    pub session_id: &'a str,
54    pub abort: Option<tokio::sync::watch::Receiver<bool>>,
55    pub on_progress: Option<Box<dyn Fn(ProcessProgress) + Send + Sync>>,
56    pub provider: Arc<dyn Provider>,
57    pub model: String,
58}
59
60/// Progress update during processing
61#[derive(Debug, Clone)]
62pub struct ProcessProgress {
63    pub iteration: usize,
64    pub max_iterations: usize,
65    pub status: String,
66}
67
68/// RLM Router for large content processing
69pub struct RlmRouter;
70
71impl RlmRouter {
72    /// Check if a tool output should be routed through RLM
73    pub fn should_route(output: &str, ctx: &RoutingContext, config: &RlmConfig) -> RoutingResult {
74        let estimated_tokens = RlmChunker::estimate_tokens(output);
75
76        // Mode: off - never route
77        if config.mode == "off" {
78            return RoutingResult {
79                should_route: false,
80                reason: "rlm_mode_off".to_string(),
81                estimated_tokens,
82            };
83        }
84
85        // Mode: always - always route for eligible tools
86        if config.mode == "always" {
87            if !rlm_eligible_tools().contains(ctx.tool_id.as_str()) {
88                return RoutingResult {
89                    should_route: false,
90                    reason: "tool_not_eligible".to_string(),
91                    estimated_tokens,
92                };
93            }
94            return RoutingResult {
95                should_route: true,
96                reason: "rlm_mode_always".to_string(),
97                estimated_tokens,
98            };
99        }
100
101        // Mode: auto - route based on threshold
102        if !rlm_eligible_tools().contains(ctx.tool_id.as_str()) {
103            return RoutingResult {
104                should_route: false,
105                reason: "tool_not_eligible".to_string(),
106                estimated_tokens,
107            };
108        }
109
110        // Check if output exceeds threshold relative to context window
111        let threshold_tokens = (ctx.model_context_limit as f64 * config.threshold) as usize;
112        if estimated_tokens > threshold_tokens {
113            return RoutingResult {
114                should_route: true,
115                reason: "exceeds_threshold".to_string(),
116                estimated_tokens,
117            };
118        }
119
120        // Check if adding this output would cause overflow
121        if let Some(current) = ctx.current_context_tokens {
122            let projected_total = current + estimated_tokens;
123            if projected_total > (ctx.model_context_limit as f64 * 0.8) as usize {
124                return RoutingResult {
125                    should_route: true,
126                    reason: "would_overflow".to_string(),
127                    estimated_tokens,
128                };
129            }
130        }
131
132        RoutingResult {
133            should_route: false,
134            reason: "within_threshold".to_string(),
135            estimated_tokens,
136        }
137    }
138
139    /// Smart truncate large output with RLM hint
140    pub fn smart_truncate(
141        output: &str,
142        tool_id: &str,
143        tool_args: &serde_json::Value,
144        max_tokens: usize,
145    ) -> (String, bool, usize) {
146        let estimated_tokens = RlmChunker::estimate_tokens(output);
147
148        if estimated_tokens <= max_tokens {
149            return (output.to_string(), false, estimated_tokens);
150        }
151
152        info!(
153            tool = tool_id,
154            original_tokens = estimated_tokens,
155            max_tokens,
156            "Smart truncating large output"
157        );
158
159        // Calculate how much to keep (roughly 4 chars per token)
160        let max_chars = max_tokens * 4;
161        let head_chars = (max_chars as f64 * 0.6) as usize;
162        let tail_chars = (max_chars as f64 * 0.3) as usize;
163
164        let head: String = output.chars().take(head_chars).collect();
165        let tail: String = output
166            .chars()
167            .rev()
168            .take(tail_chars)
169            .collect::<String>()
170            .chars()
171            .rev()
172            .collect();
173
174        let omitted_tokens = estimated_tokens
175            - RlmChunker::estimate_tokens(&head)
176            - RlmChunker::estimate_tokens(&tail);
177        let rlm_hint = Self::build_rlm_hint(tool_id, tool_args, estimated_tokens);
178
179        let truncated = format!(
180            "{}\n\n[... {} tokens truncated ...]\n\n{}\n\n{}",
181            head, omitted_tokens, rlm_hint, tail
182        );
183
184        (truncated, true, estimated_tokens)
185    }
186
187    fn build_rlm_hint(tool_id: &str, args: &serde_json::Value, tokens: usize) -> String {
188        let base = format!(
189            "⚠️ OUTPUT TOO LARGE ({} tokens). Use RLM for full analysis:",
190            tokens
191        );
192
193        match tool_id {
194            "read" => {
195                let path = args
196                    .get("filePath")
197                    .and_then(|v| v.as_str())
198                    .unwrap_or("...");
199                format!(
200                    "{}\n```\nrlm({{ query: \"Analyze this file\", content_paths: [\"{}\"] }})\n```",
201                    base, path
202                )
203            }
204            "bash" => {
205                format!(
206                    "{}\n```\nrlm({{ query: \"Analyze this command output\", content: \"<paste or use content_paths>\" }})\n```",
207                    base
208                )
209            }
210            "grep" => {
211                let pattern = args
212                    .get("pattern")
213                    .and_then(|v| v.as_str())
214                    .unwrap_or("...");
215                let include = args.get("include").and_then(|v| v.as_str()).unwrap_or("*");
216                format!(
217                    "{}\n```\nrlm({{ query: \"Summarize search results for {}\", content_glob: \"{}\" }})\n```",
218                    base, pattern, include
219                )
220            }
221            _ => {
222                format!(
223                    "{}\n```\nrlm({{ query: \"Summarize this output\", content: \"...\" }})\n```",
224                    base
225                )
226            }
227        }
228    }
229
230    /// Automatically process large output through RLM
231    ///
232    /// Based on "Recursive Language Models" (Zhang et al. 2025):
233    /// - Context is loaded as a variable in a REPL-like environment
234    /// - LLM writes code/queries to analyze, decompose, and recursively sub-call itself
235    ///
236    /// When FunctionGemma is enabled, the router sends RLM tool definitions
237    /// alongside the analysis prompt and dispatches structured tool calls
238    /// returned by the model (or reformatted by FunctionGemma).
239    pub async fn auto_process(
240        output: &str,
241        ctx: AutoProcessContext<'_>,
242        config: &RlmConfig,
243    ) -> Result<RlmResult> {
244        let start = Instant::now();
245        let input_tokens = RlmChunker::estimate_tokens(output);
246
247        info!(
248            tool = ctx.tool_id,
249            input_tokens,
250            model = %ctx.model,
251            "RLM: Starting auto-processing"
252        );
253
254        // Initialise FunctionGemma router if available
255
256        let tool_router: Option<ToolCallRouter> = {
257            let cfg = ToolRouterConfig::from_env();
258            ToolCallRouter::from_config(&cfg)
259                .inspect_err(|e| {
260                    tracing::debug!(error = %e, "FunctionGemma router unavailable for RLM router");
261                })
262                .ok()
263                .flatten()
264        };
265
266        // Prepare RLM tool definitions
267        let tools = rlm_tool_definitions();
268
269        // Detect content type for smarter processing
270        let content_type = RlmChunker::detect_content_type(output);
271        let content_hints = RlmChunker::get_processing_hints(content_type);
272
273        info!(content_type = ?content_type, tool = ctx.tool_id, "RLM: Content type detected");
274
275        // For very large contexts, use semantic chunking to preserve important parts
276        let processed_output = if input_tokens > 50000 {
277            RlmChunker::compress(output, 40000, None)
278        } else {
279            output.to_string()
280        };
281
282        // Create a REPL for structured tool dispatch
283        let mut repl =
284            super::repl::RlmRepl::new(processed_output.clone(), super::repl::ReplRuntime::Rust);
285
286        // Build the query based on tool type
287        let base_query = Self::build_query_for_tool(ctx.tool_id, &ctx.tool_args);
288        let query = format!(
289            "{}\n\n## Content Analysis Hints\n{}",
290            base_query, content_hints
291        );
292
293        // Build the RLM system prompt
294        let system_prompt = Self::build_rlm_system_prompt(input_tokens, ctx.tool_id, &query);
295
296        let max_iterations = config.max_iterations;
297        let max_subcalls = config.max_subcalls;
298        let mut iterations = 0;
299        let mut subcalls = 0;
300        let mut final_answer: Option<String> = None;
301
302        // Build initial exploration prompt
303        let exploration = Self::build_exploration_summary(&processed_output, input_tokens);
304
305        // Run iterative analysis
306        let mut conversation = vec![Message {
307            role: Role::User,
308            content: vec![ContentPart::Text {
309                text: format!(
310                    "{}\n\nHere is the context exploration:\n```\n{}\n```\n\nNow analyze and answer the query.",
311                    system_prompt, exploration
312                ),
313            }],
314        }];
315
316        for i in 0..max_iterations {
317            iterations = i + 1;
318
319            if let Some(ref progress) = ctx.on_progress {
320                progress(ProcessProgress {
321                    iteration: iterations,
322                    max_iterations,
323                    status: "running".to_string(),
324                });
325            }
326
327            // Check for abort
328            if let Some(ref abort) = ctx.abort {
329                if *abort.borrow() {
330                    warn!("RLM: Processing aborted");
331                    break;
332                }
333            }
334
335            // Build completion request — include tool definitions
336            let request = CompletionRequest {
337                messages: conversation.clone(),
338                tools: tools.clone(),
339                model: ctx.model.clone(),
340                temperature: Some(0.7),
341                top_p: None,
342                max_tokens: Some(4000),
343                stop: Vec::new(),
344            };
345
346            // Call the model
347            let response = match ctx.provider.complete(request).await {
348                Ok(r) => r,
349                Err(e) => {
350                    warn!(error = %e, iteration = iterations, "RLM: Model call failed");
351                    if iterations > 1 {
352                        break; // Use what we have
353                    }
354                    return Ok(Self::fallback_result(
355                        output,
356                        ctx.tool_id,
357                        &ctx.tool_args,
358                        input_tokens,
359                    ));
360                }
361            };
362
363            // Optionally run FunctionGemma to convert text-only response
364
365            let response = if let Some(ref router) = tool_router {
366                // RLM router shares the session's provider which supports
367                // native tool calling.  Skip FunctionGemma.
368                router.maybe_reformat(response, &tools, true).await
369            } else {
370                response
371            };
372
373            // ── Structured tool-call path ────────────────────────────────
374            let tool_calls: Vec<(String, String, String)> = response
375                .message
376                .content
377                .iter()
378                .filter_map(|p| match p {
379                    ContentPart::ToolCall {
380                        id,
381                        name,
382                        arguments,
383                        ..
384                    } => Some((id.clone(), name.clone(), arguments.clone())),
385                    _ => None,
386                })
387                .collect();
388
389            if !tool_calls.is_empty() {
390                info!(
391                    count = tool_calls.len(),
392                    iteration = iterations,
393                    "RLM router: dispatching structured tool calls"
394                );
395
396                conversation.push(Message {
397                    role: Role::Assistant,
398                    content: response.message.content.clone(),
399                });
400
401                let mut tool_results: Vec<ContentPart> = Vec::new();
402
403                for (call_id, name, arguments) in &tool_calls {
404                    match super::tools::dispatch_tool_call(name, arguments, &mut repl) {
405                        Some(super::tools::RlmToolResult::Final(answer)) => {
406                            final_answer = Some(answer);
407                            tool_results.push(ContentPart::ToolResult {
408                                tool_call_id: call_id.clone(),
409                                content: "FINAL received".to_string(),
410                            });
411                            break;
412                        }
413                        Some(super::tools::RlmToolResult::Output(out)) => {
414                            tool_results.push(ContentPart::ToolResult {
415                                tool_call_id: call_id.clone(),
416                                content: out,
417                            });
418                        }
419                        None => {
420                            tool_results.push(ContentPart::ToolResult {
421                                tool_call_id: call_id.clone(),
422                                content: format!("Unknown tool: {name}"),
423                            });
424                        }
425                    }
426                }
427
428                if !tool_results.is_empty() {
429                    conversation.push(Message {
430                        role: Role::Tool,
431                        content: tool_results,
432                    });
433                }
434
435                subcalls += 1;
436                if final_answer.is_some() || subcalls >= max_subcalls {
437                    break;
438                }
439                continue;
440            }
441
442            // ── Legacy text-only path ────────────────────────────────────
443            let response_text: String = response
444                .message
445                .content
446                .iter()
447                .filter_map(|p| match p {
448                    ContentPart::Text { text } => Some(text.clone()),
449                    _ => None,
450                })
451                .collect::<Vec<_>>()
452                .join("\n");
453
454            info!(
455                iteration = iterations,
456                response_len = response_text.len(),
457                "RLM: Model response (text-only fallback)"
458            );
459
460            // Check for FINAL answer
461            if let Some(answer) = Self::extract_final(&response_text) {
462                final_answer = Some(answer);
463                break;
464            }
465
466            // Check for analysis that can be used directly
467            if iterations >= 3 && response_text.len() > 500 && !response_text.contains("```") {
468                // The model is providing direct analysis, use it
469                final_answer = Some(response_text.clone());
470                break;
471            }
472
473            // Add response to conversation
474            conversation.push(Message {
475                role: Role::Assistant,
476                content: vec![ContentPart::Text {
477                    text: response_text,
478                }],
479            });
480
481            // Prompt for continuation
482            conversation.push(Message {
483                role: Role::User,
484                content: vec![ContentPart::Text {
485                    text: "Continue analysis. Call FINAL(\"your answer\") when ready.".to_string(),
486                }],
487            });
488
489            subcalls += 1;
490            if subcalls >= max_subcalls {
491                warn!(subcalls, max = max_subcalls, "RLM: Max subcalls reached");
492                break;
493            }
494        }
495
496        if let Some(ref progress) = ctx.on_progress {
497            progress(ProcessProgress {
498                iteration: iterations,
499                max_iterations,
500                status: "completed".to_string(),
501            });
502        }
503
504        // Fallback if no FINAL was produced
505        let answer = final_answer.unwrap_or_else(|| {
506            warn!(
507                iterations,
508                subcalls, "RLM: No FINAL produced, using fallback"
509            );
510            Self::build_enhanced_fallback(output, ctx.tool_id, &ctx.tool_args, input_tokens)
511        });
512
513        let output_tokens = RlmChunker::estimate_tokens(&answer);
514        let compression_ratio = input_tokens as f64 / output_tokens.max(1) as f64;
515        let elapsed_ms = start.elapsed().as_millis() as u64;
516
517        let result = format!(
518            "[RLM: {} → {} tokens | {} iterations | {} sub-calls]\n\n{}",
519            input_tokens, output_tokens, iterations, subcalls, answer
520        );
521
522        info!(
523            input_tokens,
524            output_tokens,
525            iterations,
526            subcalls,
527            elapsed_ms,
528            compression_ratio = format!("{:.1}", compression_ratio),
529            "RLM: Processing complete"
530        );
531
532        Ok(RlmResult {
533            processed: result,
534            stats: RlmStats {
535                input_tokens,
536                output_tokens: RlmChunker::estimate_tokens(&answer),
537                iterations,
538                subcalls,
539                elapsed_ms,
540                compression_ratio,
541            },
542            success: true,
543            error: None,
544        })
545    }
546
547    fn extract_final(text: &str) -> Option<String> {
548        // Look for FINAL("...") or FINAL('...') or FINAL!(...)
549        let patterns = [r#"FINAL\s*\(\s*["'`]"#, r#"FINAL!\s*\(\s*["'`]?"#];
550
551        for _pattern_start in patterns {
552            if let Some(start_idx) = text.find("FINAL") {
553                let after = &text[start_idx..];
554
555                // Find the opening quote/paren
556                if let Some(open_idx) = after.find(['"', '\'', '`']) {
557                    let quote_char = after.chars().nth(open_idx)?;
558                    let content_start = start_idx + open_idx + 1;
559
560                    // Find matching close
561                    let content = &text[content_start..];
562                    if let Some(close_idx) = content.find(quote_char) {
563                        let answer = &content[..close_idx];
564                        if !answer.is_empty() {
565                            return Some(answer.to_string());
566                        }
567                    }
568                }
569            }
570        }
571
572        None
573    }
574
575    fn build_exploration_summary(content: &str, input_tokens: usize) -> String {
576        let lines: Vec<&str> = content.lines().collect();
577        let total_lines = lines.len();
578
579        let head: String = lines
580            .iter()
581            .take(30)
582            .copied()
583            .collect::<Vec<_>>()
584            .join("\n");
585        let tail: String = lines
586            .iter()
587            .rev()
588            .take(50)
589            .collect::<Vec<_>>()
590            .into_iter()
591            .rev()
592            .copied()
593            .collect::<Vec<_>>()
594            .join("\n");
595
596        format!(
597            "=== CONTEXT EXPLORATION ===\n\
598             Total: {} chars, {} lines, ~{} tokens\n\n\
599             === FIRST 30 LINES ===\n{}\n\n\
600             === LAST 50 LINES ===\n{}\n\
601             === END EXPLORATION ===",
602            content.len(),
603            total_lines,
604            input_tokens,
605            head,
606            tail
607        )
608    }
609
610    fn build_rlm_system_prompt(input_tokens: usize, tool_id: &str, query: &str) -> String {
611        let context_type = if tool_id == "session_context" {
612            "conversation history"
613        } else {
614            "tool output"
615        };
616
617        format!(
618            r#"You are tasked with analyzing large content that cannot fit in a normal context window.
619
620The content is a {} with {} total tokens.
621
622YOUR TASK: {}
623
624## Analysis Strategy
625
6261. First, examine the exploration (head + tail of content) to understand structure
6272. Identify the most important information for answering the query
6283. Focus on: errors, key decisions, file paths, recent activity
6294. Provide a concise but complete answer
630
631When ready, call FINAL("your detailed answer") with your findings.
632
633Be SPECIFIC - include actual file paths, function names, error messages. Generic summaries are not useful."#,
634            context_type, input_tokens, query
635        )
636    }
637
638    fn build_query_for_tool(tool_id: &str, args: &serde_json::Value) -> String {
639        match tool_id {
640            "read" => {
641                let path = args.get("filePath").and_then(|v| v.as_str()).unwrap_or("unknown");
642                format!("Summarize the key contents of file \"{}\". Focus on: structure, main functions/classes, important logic. Be concise.", path)
643            }
644            "bash" => {
645                "Summarize the command output. Extract key information, results, errors, warnings. Be concise.".to_string()
646            }
647            "grep" => {
648                let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("pattern");
649                format!("Summarize search results for \"{}\". Group by file, highlight most relevant matches. Be concise.", pattern)
650            }
651            "glob" => {
652                "Summarize the file listing. Group by directory, highlight important files. Be concise.".to_string()
653            }
654            "session_context" => {
655                r#"You are a CONTEXT MEMORY SYSTEM. Create a BRIEFING for an AI assistant to continue this conversation.
656
657CRITICAL: The assistant will ONLY see your briefing - it has NO memory of the conversation.
658
659## What to Extract
660
6611. **PRIMARY GOAL**: What is the user ultimately trying to achieve?
6622. **CURRENT STATE**: What has been accomplished? Current status?
6633. **LAST ACTIONS**: What just happened? (last 3-5 tool calls, their results)
6644. **ACTIVE FILES**: Which files were modified?
6655. **PENDING TASKS**: What remains to be done?
6666. **CRITICAL DETAILS**: File paths, error messages, specific values, decisions made
6677. **NEXT STEPS**: What should happen next?
668
669Be SPECIFIC with file paths, function names, error messages."#.to_string()
670            }
671            _ => "Summarize this output concisely, extracting the most important information.".to_string()
672        }
673    }
674
675    fn build_enhanced_fallback(
676        output: &str,
677        tool_id: &str,
678        tool_args: &serde_json::Value,
679        input_tokens: usize,
680    ) -> String {
681        let lines: Vec<&str> = output.lines().collect();
682
683        if tool_id == "session_context" {
684            // Extract key structural information
685            let file_matches: Vec<&str> = lines
686                .iter()
687                .filter_map(|l| {
688                    if l.contains(".ts")
689                        || l.contains(".rs")
690                        || l.contains(".py")
691                        || l.contains(".json")
692                    {
693                        Some(*l)
694                    } else {
695                        None
696                    }
697                })
698                .take(15)
699                .collect();
700
701            let tool_calls: Vec<&str> = lines
702                .iter()
703                .filter(|l| l.contains("[Tool "))
704                .take(10)
705                .copied()
706                .collect();
707
708            let errors: Vec<&str> = lines
709                .iter()
710                .filter(|l| {
711                    l.to_lowercase().contains("error") || l.to_lowercase().contains("failed")
712                })
713                .take(5)
714                .copied()
715                .collect();
716
717            let head: String = lines
718                .iter()
719                .take(30)
720                .copied()
721                .collect::<Vec<_>>()
722                .join("\n");
723            let tail: String = lines
724                .iter()
725                .rev()
726                .take(80)
727                .collect::<Vec<_>>()
728                .into_iter()
729                .rev()
730                .copied()
731                .collect::<Vec<_>>()
732                .join("\n");
733
734            let mut parts = vec![
735                "## Context Summary (Fallback Mode)".to_string(),
736                format!(
737                    "*Original: {} tokens - RLM processing produced insufficient output*",
738                    input_tokens
739                ),
740                String::new(),
741            ];
742
743            if !file_matches.is_empty() {
744                parts.push(format!("**Files Mentioned:** {}", file_matches.len()));
745            }
746
747            if !tool_calls.is_empty() {
748                parts.push(format!("**Recent Tool Calls:** {}", tool_calls.join(", ")));
749            }
750
751            if !errors.is_empty() {
752                parts.push("**Recent Errors:**".to_string());
753                for e in errors {
754                    parts.push(format!("- {}", e.chars().take(150).collect::<String>()));
755                }
756            }
757
758            parts.push(String::new());
759            parts.push("### Initial Request".to_string());
760            parts.push("```".to_string());
761            parts.push(head);
762            parts.push("```".to_string());
763            parts.push(String::new());
764            parts.push("### Recent Activity".to_string());
765            parts.push("```".to_string());
766            parts.push(tail);
767            parts.push("```".to_string());
768
769            parts.join("\n")
770        } else {
771            let (truncated, _, _) = Self::smart_truncate(output, tool_id, tool_args, 8000);
772            format!(
773                "## Fallback Summary\n*RLM processing failed - showing structured excerpt*\n\n{}",
774                truncated
775            )
776        }
777    }
778
779    fn fallback_result(
780        output: &str,
781        tool_id: &str,
782        tool_args: &serde_json::Value,
783        input_tokens: usize,
784    ) -> RlmResult {
785        let (truncated, _, _) = Self::smart_truncate(output, tool_id, tool_args, 8000);
786        let output_tokens = RlmChunker::estimate_tokens(&truncated);
787
788        RlmResult {
789            processed: format!(
790                "[RLM processing failed, showing truncated output]\n\n{}",
791                truncated
792            ),
793            stats: RlmStats {
794                input_tokens,
795                output_tokens,
796                iterations: 0,
797                subcalls: 0,
798                elapsed_ms: 0,
799                compression_ratio: input_tokens as f64 / output_tokens.max(1) as f64,
800            },
801            success: false,
802            error: Some("Model call failed".to_string()),
803        }
804    }
805}