Skip to main content

codetether_agent/rlm/
router.rs

1//! RLM Router - Decides when to route content through RLM processing
2//!
3//! Routes large tool outputs through RLM when they would exceed
4//! the model's context window threshold.
5//!
6//! When the `functiongemma` feature is active the router passes RLM tool
7//! definitions alongside the analysis prompt so FunctionGemma can convert
8//! text-only LLM responses into structured tool calls.
9
10use super::{RlmChunker, RlmConfig, RlmResult, RlmStats};
11use crate::provider::{CompletionRequest, ContentPart, Message, Provider, Role};
12use crate::rlm::context_trace::{ContextEvent, ContextTrace};
13use crate::session::{RlmCompletion, RlmOutcome, RlmProgressEvent, SessionBus, SessionEvent};
14use anyhow::Result;
15use serde::{Deserialize, Serialize};
16use std::collections::HashSet;
17use std::sync::Arc;
18use std::time::Instant;
19use tracing::{info, warn};
20use uuid::Uuid;
21
22use crate::cognition::tool_router::{ToolCallRouter, ToolRouterConfig};
23
24use super::tools::rlm_tool_definitions;
25
26/// Tools eligible for RLM routing
27fn rlm_eligible_tools() -> HashSet<&'static str> {
28    // NOTE: This list is intentionally biased toward tools that often return
29    // large blobs of text. The router still supports routing *any* tool output
30    // when it is clearly too large to fit in the model context window.
31    [
32        "read",
33        "glob",
34        "grep",
35        "bash",
36        "search",
37        // File/tree-ish tools can easily explode
38        "tree",
39        "diff",
40        "headtail",
41        // Web tools are a common source of huge HTML/JS blobs
42        "webfetch",
43        "websearch",
44        // Batch aggregates outputs and can exceed the window quickly
45        "batch",
46        // Code search can return large listings
47        "codesearch",
48    ]
49    .iter()
50    .copied()
51    .collect()
52}
53
54/// Context for routing decisions
55#[derive(Debug, Clone)]
56pub struct RoutingContext {
57    pub tool_id: String,
58    pub session_id: String,
59    pub call_id: Option<String>,
60    pub model_context_limit: usize,
61    pub current_context_tokens: Option<usize>,
62}
63
64/// Result of routing decision
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct RoutingResult {
67    pub should_route: bool,
68    pub reason: String,
69    pub estimated_tokens: usize,
70}
71
72/// Context for auto-processing.
73///
74/// ## Observability model
75///
76/// Two orthogonal streams describe an RLM run:
77///
78/// 1. **[`crate::rlm::context_trace::ContextTrace`] (durable):** the full,
79///    ordered record of every iteration, tool call, and model response.
80///    Attached to the returned [`RlmResult`] so the JSONL flywheel and
81///    post-hoc analysis can replay a run exactly.
82/// 2. **Session bus events (ephemeral):** [`SessionEvent::RlmProgress`]
83///    at every iteration start and a single terminal
84///    [`SessionEvent::RlmComplete`] at exit. Subscribers see the run as
85///    it happens but are *not* expected to reconstruct full history
86///    from the bus — for that, consume the trace on `RlmResult`.
87///
88/// ## Opting out of observability
89///
90/// Leaving `bus` and `trace_id` as `None` opts the call site out of
91/// **both** bus emission and upstream trace correlation. This is
92/// correct for tests, one-shot CLI runs, and synchronous utility
93/// paths, but production call sites should thread an `AppState`-owned
94/// bus through so the TUI and audit log see the activity.
95pub struct AutoProcessContext<'a> {
96    /// Identifier of the tool whose output is being analysed (e.g.
97    /// `"grep"`, `"bash"`, or `"session_context"` for compaction).
98    pub tool_id: &'a str,
99    /// JSON-encoded arguments that produced the tool output.
100    pub tool_args: serde_json::Value,
101    /// Session ID for logging / correlation.
102    pub session_id: &'a str,
103    /// Optional abort signal — when `Some(true)` the loop exits early
104    /// and emits [`RlmOutcome::Aborted`].
105    pub abort: Option<tokio::sync::watch::Receiver<bool>>,
106    /// Optional progress callback. Retained for backward compatibility;
107    /// prefer subscribing to [`SessionEvent::RlmProgress`] via `bus`.
108    pub on_progress: Option<Box<dyn Fn(ProcessProgress) + Send + Sync>>,
109    /// Provider used for the root RLM model call.
110    pub provider: Arc<dyn Provider>,
111    /// Model identifier for the root RLM call.
112    pub model: String,
113    /// Optional event bus for structured progress / completion events.
114    /// See the struct-level docs for the bus-vs-trace split.
115    pub bus: Option<SessionBus>,
116    /// Optional caller-supplied trace id so upstream events (e.g. a
117    /// compaction `CompactionStarted`) can be correlated with the
118    /// resulting RLM run. When `None`, a fresh id is generated and
119    /// returned on [`RlmResult::trace_id`].
120    pub trace_id: Option<Uuid>,
121    /// Pre-resolved provider for RLM sub-calls (iterations ≥ 2).
122    ///
123    /// When `None`, all iterations use [`Self::provider`]. When set,
124    /// iteration 1 uses the root provider and subsequent iterations
125    /// use this provider, enabling "expensive model for analysis,
126    /// cheap model for continuation" splitting.
127    ///
128    /// Resolution (and fallback-on-failure) is the caller's
129    /// responsibility — see
130    /// [`Session::resolve_subcall_provider`](crate::session::Session).
131    pub subcall_provider: Option<Arc<dyn Provider>>,
132    /// Model identifier for the subcall provider. Mirrors
133    /// [`Self::subcall_provider`]: when `None`, all iterations use
134    /// [`Self::model`].
135    pub subcall_model: Option<String>,
136}
137
138/// Progress update during processing
139#[derive(Debug, Clone)]
140pub struct ProcessProgress {
141    pub iteration: usize,
142    pub max_iterations: usize,
143    pub status: String,
144}
145
146/// RLM Router for large content processing
147pub struct RlmRouter;
148
149impl RlmRouter {
150    /// Check if a tool output should be routed through RLM
151    pub fn should_route(output: &str, ctx: &RoutingContext, config: &RlmConfig) -> RoutingResult {
152        let estimated_tokens = RlmChunker::estimate_tokens(output);
153
154        // Mode: off - never route
155        if config.mode == "off" {
156            return RoutingResult {
157                should_route: false,
158                reason: "rlm_mode_off".to_string(),
159                estimated_tokens,
160            };
161        }
162
163        // Mode: always - always route for eligible tools
164        if config.mode == "always" {
165            if !rlm_eligible_tools().contains(ctx.tool_id.as_str()) {
166                return RoutingResult {
167                    should_route: false,
168                    reason: "tool_not_eligible".to_string(),
169                    estimated_tokens,
170                };
171            }
172            return RoutingResult {
173                should_route: true,
174                reason: "rlm_mode_always".to_string(),
175                estimated_tokens,
176            };
177        }
178
179        // Mode: auto - route based on threshold
180        let eligible = rlm_eligible_tools().contains(ctx.tool_id.as_str())
181            || ctx.tool_id.as_str() == "session_context";
182
183        // Check if output exceeds threshold relative to context window
184        let threshold_tokens = (ctx.model_context_limit as f64 * config.threshold) as usize;
185        if estimated_tokens > threshold_tokens {
186            return RoutingResult {
187                should_route: true,
188                reason: if eligible {
189                    "exceeds_threshold".to_string()
190                } else {
191                    "exceeds_threshold_ineligible_tool".to_string()
192                },
193                estimated_tokens,
194            };
195        }
196
197        // Check if adding this output would cause overflow
198        if let Some(current) = ctx.current_context_tokens {
199            let projected_total = current + estimated_tokens;
200            if projected_total > (ctx.model_context_limit as f64 * 0.8) as usize {
201                return RoutingResult {
202                    should_route: true,
203                    reason: if eligible {
204                        "would_overflow".to_string()
205                    } else {
206                        "would_overflow_ineligible_tool".to_string()
207                    },
208                    estimated_tokens,
209                };
210            }
211        }
212
213        // If the tool isn't in the preferred set, we normally avoid routing to
214        // reduce extra model calls. However, we *must* protect the main session
215        // context window from oversized blobs (e.g. raw HTML from web tools).
216        if !eligible {
217            return RoutingResult {
218                should_route: false,
219                reason: "tool_not_eligible".to_string(),
220                estimated_tokens,
221            };
222        }
223
224        RoutingResult {
225            should_route: false,
226            reason: "within_threshold".to_string(),
227            estimated_tokens,
228        }
229    }
230
231    /// Smart truncate large output with RLM hint
232    pub fn smart_truncate(
233        output: &str,
234        tool_id: &str,
235        tool_args: &serde_json::Value,
236        max_tokens: usize,
237    ) -> (String, bool, usize) {
238        let estimated_tokens = RlmChunker::estimate_tokens(output);
239
240        if estimated_tokens <= max_tokens {
241            return (output.to_string(), false, estimated_tokens);
242        }
243
244        info!(
245            tool = tool_id,
246            original_tokens = estimated_tokens,
247            max_tokens,
248            "Smart truncating large output"
249        );
250
251        // Calculate how much to keep (roughly 4 chars per token)
252        let max_chars = max_tokens * 4;
253        let head_chars = (max_chars as f64 * 0.6) as usize;
254        let tail_chars = (max_chars as f64 * 0.3) as usize;
255
256        let head: String = output.chars().take(head_chars).collect();
257        let tail: String = output
258            .chars()
259            .rev()
260            .take(tail_chars)
261            .collect::<String>()
262            .chars()
263            .rev()
264            .collect();
265
266        let omitted_tokens = estimated_tokens
267            - RlmChunker::estimate_tokens(&head)
268            - RlmChunker::estimate_tokens(&tail);
269        let rlm_hint = Self::build_rlm_hint(tool_id, tool_args, estimated_tokens);
270
271        let truncated = format!(
272            "{}\n\n[... {} tokens truncated ...]\n\n{}\n\n{}",
273            head, omitted_tokens, rlm_hint, tail
274        );
275
276        (truncated, true, estimated_tokens)
277    }
278
279    fn build_rlm_hint(tool_id: &str, args: &serde_json::Value, tokens: usize) -> String {
280        let base = format!(
281            "⚠️ OUTPUT TOO LARGE ({} tokens). Use RLM for full analysis:",
282            tokens
283        );
284
285        match tool_id {
286            "read" => {
287                let path = args
288                    .get("filePath")
289                    .and_then(|v| v.as_str())
290                    .unwrap_or("...");
291                format!(
292                    "{}\n```\nrlm({{ query: \"Analyze this file\", content_paths: [\"{}\"] }})\n```",
293                    base, path
294                )
295            }
296            "bash" => {
297                format!(
298                    "{}\n```\nrlm({{ query: \"Analyze this command output\", content: \"<paste or use content_paths>\" }})\n```",
299                    base
300                )
301            }
302            "grep" => {
303                let pattern = args
304                    .get("pattern")
305                    .and_then(|v| v.as_str())
306                    .unwrap_or("...");
307                let include = args.get("include").and_then(|v| v.as_str()).unwrap_or("*");
308                format!(
309                    "{}\n```\nrlm({{ query: \"Summarize search results for {}\", content_glob: \"{}\" }})\n```",
310                    base, pattern, include
311                )
312            }
313            _ => {
314                format!(
315                    "{}\n```\nrlm({{ query: \"Summarize this output\", content: \"...\" }})\n```",
316                    base
317                )
318            }
319        }
320    }
321
322    /// Automatically process large output through RLM
323    ///
324    /// Based on "Recursive Language Models" (Zhang et al. 2025):
325    /// - Context is loaded as a variable in a REPL-like environment
326    /// - LLM writes code/queries to analyze, decompose, and recursively sub-call itself
327    ///
328    /// When FunctionGemma is enabled, the router sends RLM tool definitions
329    /// alongside the analysis prompt and dispatches structured tool calls
330    /// returned by the model (or reformatted by FunctionGemma).
331    pub async fn auto_process(
332        output: &str,
333        ctx: AutoProcessContext<'_>,
334        config: &RlmConfig,
335    ) -> Result<RlmResult> {
336        let start = Instant::now();
337        let input_tokens = RlmChunker::estimate_tokens(output);
338
339        info!(
340            tool = ctx.tool_id,
341            input_tokens,
342            model = %ctx.model,
343            "RLM: Starting auto-processing"
344        );
345
346        // Per-run trace id — correlates every progress event and the
347        // terminal RlmComplete emission with any caller-side record
348        // (e.g. a CompactionStarted event).
349        let trace_id = ctx.trace_id.unwrap_or_else(Uuid::new_v4);
350        // Generous budget — the trace is diagnostic, not load-bearing.
351        let trace_budget = input_tokens.saturating_mul(2).max(4096);
352        let mut trace = ContextTrace::new(trace_budget);
353        let mut aborted = false;
354
355        // Initialise FunctionGemma router if available
356
357        let tool_router: Option<ToolCallRouter> = {
358            let cfg = ToolRouterConfig::from_env();
359            ToolCallRouter::from_config(&cfg)
360                .inspect_err(|e| {
361                    tracing::debug!(error = %e, "FunctionGemma router unavailable for RLM router");
362                })
363                .ok()
364                .flatten()
365        };
366
367        // Prepare RLM tool definitions
368        let tools = rlm_tool_definitions();
369
370        // Detect content type for smarter processing
371        let content_type = RlmChunker::detect_content_type(output);
372        let content_hints = RlmChunker::get_processing_hints(content_type);
373
374        info!(content_type = ?content_type, tool = ctx.tool_id, "RLM: Content type detected");
375
376        // For very large contexts, use semantic chunking to preserve important parts
377        let processed_output = if input_tokens > 50000 {
378            RlmChunker::compress(output, 40000, None)
379        } else {
380            output.to_string()
381        };
382
383        // Create a REPL for structured tool dispatch
384        let mut repl =
385            super::repl::RlmRepl::new(processed_output.clone(), super::repl::ReplRuntime::Rust);
386
387        // Build the query based on tool type
388        let base_query = Self::build_query_for_tool(ctx.tool_id, &ctx.tool_args);
389        let query = format!(
390            "{}\n\n## Content Analysis Hints\n{}",
391            base_query, content_hints
392        );
393
394        // Build the RLM system prompt
395        let system_prompt = Self::build_rlm_system_prompt(input_tokens, ctx.tool_id, &query);
396        trace.log_event(ContextEvent::SystemPrompt {
397            content: system_prompt.clone(),
398            tokens: ContextTrace::estimate_tokens(&system_prompt),
399        });
400
401        let max_iterations = config.max_iterations;
402        let max_subcalls = config.max_subcalls;
403        let mut iterations = 0;
404        let mut subcalls = 0;
405        let mut final_answer: Option<String> = None;
406
407        // Build initial exploration prompt
408        let exploration = Self::build_exploration_summary(&processed_output, input_tokens);
409
410        // Run iterative analysis
411        let mut conversation = vec![Message {
412            role: Role::User,
413            content: vec![ContentPart::Text {
414                text: format!(
415                    "{}\n\nHere is the context exploration:\n```\n{}\n```\n\nNow analyze and answer the query.",
416                    system_prompt, exploration
417                ),
418            }],
419        }];
420
421        for i in 0..max_iterations {
422            iterations = i + 1;
423            trace.next_iteration();
424
425            if let Some(bus) = ctx.bus.as_ref() {
426                bus.emit(SessionEvent::RlmProgress(RlmProgressEvent {
427                    trace_id,
428                    iteration: iterations,
429                    max_iterations,
430                    status: "running".to_string(),
431                }));
432            }
433
434            if let Some(ref progress) = ctx.on_progress {
435                progress(ProcessProgress {
436                    iteration: iterations,
437                    max_iterations,
438                    status: "running".to_string(),
439                });
440            }
441
442            // Check for abort
443            if let Some(ref abort) = ctx.abort
444                && *abort.borrow()
445            {
446                warn!("RLM: Processing aborted");
447                aborted = true;
448                break;
449            }
450
451            // Build completion request — include tool definitions
452            let (active_provider, active_model) =
453                if iterations > 1 && ctx.subcall_provider.is_some() {
454                    (
455                        Arc::clone(ctx.subcall_provider.as_ref().unwrap()),
456                        ctx.subcall_model
457                            .as_deref()
458                            .unwrap_or(&ctx.model)
459                            .to_string(),
460                    )
461                } else {
462                    (Arc::clone(&ctx.provider), ctx.model.clone())
463                };
464
465            let request = CompletionRequest {
466                messages: conversation.clone(),
467                tools: tools.clone(),
468                model: active_model.clone(),
469                temperature: Some(0.7),
470                top_p: None,
471                max_tokens: Some(4000),
472                stop: Vec::new(),
473            };
474
475            // Call the model
476            let response = match active_provider.complete(request).await {
477                Ok(r) => r,
478                Err(e) => {
479                    warn!(error = %e, iteration = iterations, "RLM: Model call failed");
480                    if iterations > 1 {
481                        break; // Use what we have
482                    }
483                    if let Some(bus) = ctx.bus.as_ref() {
484                        bus.emit(SessionEvent::RlmComplete(RlmCompletion {
485                            trace_id,
486                            outcome: RlmOutcome::Failed,
487                            iterations,
488                            subcalls,
489                            input_tokens,
490                            output_tokens: 0,
491                            elapsed_ms: start.elapsed().as_millis() as u64,
492                            reason: Some(format!("provider call failed: {e}")),
493                            root_model: ctx.model.clone(),
494                            subcall_model_used: ctx.subcall_model.clone(),
495                        }));
496                    }
497                    let mut fb =
498                        Self::fallback_result(output, ctx.tool_id, &ctx.tool_args, input_tokens);
499                    fb.trace = Some(trace);
500                    fb.trace_id = Some(trace_id);
501                    return Ok(fb);
502                }
503            };
504
505            // Optionally run FunctionGemma to convert text-only response
506
507            let response = if let Some(ref router) = tool_router {
508                // RLM router shares the session's provider which supports
509                // native tool calling.  Skip FunctionGemma.
510                router.maybe_reformat(response, &tools, true).await
511            } else {
512                response
513            };
514
515            // ── Structured tool-call path ────────────────────────────────
516            let tool_calls: Vec<(String, String, String)> = response
517                .message
518                .content
519                .iter()
520                .filter_map(|p| match p {
521                    ContentPart::ToolCall {
522                        id,
523                        name,
524                        arguments,
525                        ..
526                    } => Some((id.clone(), name.clone(), arguments.clone())),
527                    _ => None,
528                })
529                .collect();
530
531            if !tool_calls.is_empty() {
532                info!(
533                    count = tool_calls.len(),
534                    iteration = iterations,
535                    "RLM router: dispatching structured tool calls"
536                );
537
538                conversation.push(Message {
539                    role: Role::Assistant,
540                    content: response.message.content.clone(),
541                });
542
543                let mut tool_results: Vec<ContentPart> = Vec::new();
544
545                for (call_id, name, arguments) in &tool_calls {
546                    trace.log_event(ContextEvent::ToolCall {
547                        name: name.clone(),
548                        arguments_preview: arguments.chars().take(200).collect(),
549                        tokens: ContextTrace::estimate_tokens(arguments),
550                    });
551                    match super::tools::dispatch_tool_call(name, arguments, &mut repl) {
552                        Some(super::tools::RlmToolResult::Final(answer)) => {
553                            trace.log_event(ContextEvent::ToolResult {
554                                tool_call_id: call_id.clone(),
555                                result_preview: "FINAL received".to_string(),
556                                tokens: 0,
557                            });
558                            final_answer = Some(answer);
559                            tool_results.push(ContentPart::ToolResult {
560                                tool_call_id: call_id.clone(),
561                                content: "FINAL received".to_string(),
562                            });
563                            break;
564                        }
565                        Some(super::tools::RlmToolResult::Output(out)) => {
566                            trace.log_event(ContextEvent::ToolResult {
567                                tool_call_id: call_id.clone(),
568                                result_preview: out.chars().take(200).collect(),
569                                tokens: ContextTrace::estimate_tokens(&out),
570                            });
571                            tool_results.push(ContentPart::ToolResult {
572                                tool_call_id: call_id.clone(),
573                                content: out,
574                            });
575                        }
576                        None => {
577                            trace.log_event(ContextEvent::ToolResult {
578                                tool_call_id: call_id.clone(),
579                                result_preview: format!("Unknown tool: {name}"),
580                                tokens: 0,
581                            });
582                            tool_results.push(ContentPart::ToolResult {
583                                tool_call_id: call_id.clone(),
584                                content: format!("Unknown tool: {name}"),
585                            });
586                        }
587                    }
588                }
589
590                if !tool_results.is_empty() {
591                    conversation.push(Message {
592                        role: Role::Tool,
593                        content: tool_results,
594                    });
595                }
596
597                subcalls += 1;
598                if final_answer.is_some() || subcalls >= max_subcalls {
599                    break;
600                }
601                continue;
602            }
603
604            // ── Legacy text-only path ────────────────────────────────────
605            let response_text: String = response
606                .message
607                .content
608                .iter()
609                .filter_map(|p| match p {
610                    ContentPart::Text { text } => Some(text.clone()),
611                    _ => None,
612                })
613                .collect::<Vec<_>>()
614                .join("\n");
615
616            info!(
617                iteration = iterations,
618                response_len = response_text.len(),
619                "RLM: Model response (text-only fallback)"
620            );
621            trace.log_event(ContextEvent::LlmQueryResult {
622                query: query.chars().take(120).collect(),
623                response_preview: response_text.chars().take(200).collect(),
624                tokens: ContextTrace::estimate_tokens(&response_text),
625            });
626
627            // Check for FINAL answer
628            if let Some(answer) = Self::extract_final(&response_text) {
629                final_answer = Some(answer);
630                break;
631            }
632
633            // Check for analysis that can be used directly
634            if iterations >= 3 && response_text.len() > 500 && !response_text.contains("```") {
635                // The model is providing direct analysis, use it
636                final_answer = Some(response_text.clone());
637                break;
638            }
639
640            // Add response to conversation
641            conversation.push(Message {
642                role: Role::Assistant,
643                content: vec![ContentPart::Text {
644                    text: response_text,
645                }],
646            });
647
648            // Prompt for continuation
649            conversation.push(Message {
650                role: Role::User,
651                content: vec![ContentPart::Text {
652                    text: "Continue analysis. Call FINAL(\"your answer\") when ready.".to_string(),
653                }],
654            });
655
656            subcalls += 1;
657            if subcalls >= max_subcalls {
658                warn!(subcalls, max = max_subcalls, "RLM: Max subcalls reached");
659                break;
660            }
661        }
662
663        if let Some(ref progress) = ctx.on_progress {
664            progress(ProcessProgress {
665                iteration: iterations,
666                max_iterations,
667                status: "completed".to_string(),
668            });
669        }
670
671        // Track whether we got a real answer before consuming final_answer.
672        let converged = final_answer.is_some();
673
674        // Produce result, synthesizing one if no FINAL was produced.
675        let answer = final_answer.unwrap_or_else(|| {
676            warn!(
677                iterations,
678                subcalls, "RLM: No FINAL produced, using fallback"
679            );
680            Self::build_enhanced_fallback(output, ctx.tool_id, &ctx.tool_args, input_tokens)
681        });
682
683        let output_tokens = RlmChunker::estimate_tokens(&answer);
684        let compression_ratio = input_tokens as f64 / output_tokens.max(1) as f64;
685        let elapsed_ms = start.elapsed().as_millis() as u64;
686
687        let result = format!(
688            "[RLM: {} → {} tokens | {} iterations | {} sub-calls]\n\n{}",
689            input_tokens, output_tokens, iterations, subcalls, answer
690        );
691
692        info!(
693            input_tokens,
694            output_tokens,
695            iterations,
696            subcalls,
697            elapsed_ms,
698            compression_ratio = format!("{:.1}", compression_ratio),
699            "RLM: Processing complete"
700        );
701
702        trace.log_event(ContextEvent::Final {
703            answer: answer.chars().take(200).collect(),
704            tokens: output_tokens,
705        });
706
707        // Classify outcome for the durable completion record.
708        let outcome = if aborted {
709            RlmOutcome::Aborted
710        } else if converged {
711            RlmOutcome::Converged
712        } else {
713            RlmOutcome::Exhausted
714        };
715        let reason = match outcome {
716            RlmOutcome::Converged => None,
717            RlmOutcome::Aborted => Some("abort signal".to_string()),
718            RlmOutcome::Exhausted => Some(format!(
719                "no FINAL after {iterations} iterations / {subcalls} subcalls"
720            )),
721            RlmOutcome::Failed => None, // emitted on the early-return path
722        };
723        if let Some(bus) = ctx.bus.as_ref() {
724            bus.emit(SessionEvent::RlmComplete(RlmCompletion {
725                trace_id,
726                outcome,
727                iterations,
728                subcalls,
729                input_tokens,
730                output_tokens,
731                elapsed_ms,
732                reason,
733                root_model: ctx.model.clone(),
734                subcall_model_used: ctx.subcall_model.clone(),
735            }));
736        }
737
738        Ok(RlmResult {
739            processed: result,
740            stats: RlmStats {
741                input_tokens,
742                output_tokens,
743                iterations,
744                subcalls,
745                elapsed_ms,
746                compression_ratio,
747            },
748            success: outcome.is_success(),
749            error: None,
750            trace: Some(trace),
751            trace_id: Some(trace_id),
752        })
753    }
754
755    fn extract_final(text: &str) -> Option<String> {
756        // Look for FINAL("...") or FINAL('...') or FINAL!(...)
757        let patterns = [r#"FINAL\s*\(\s*["'`]"#, r#"FINAL!\s*\(\s*["'`]?"#];
758
759        for _pattern_start in patterns {
760            if let Some(start_idx) = text.find("FINAL") {
761                let after = &text[start_idx..];
762
763                // Find the opening quote/paren
764                if let Some(open_idx) = after.find(['"', '\'', '`']) {
765                    let quote_char = after.chars().nth(open_idx)?;
766                    let content_start = start_idx + open_idx + 1;
767
768                    // Find matching close
769                    let content = &text[content_start..];
770                    if let Some(close_idx) = content.find(quote_char) {
771                        let answer = &content[..close_idx];
772                        if !answer.is_empty() {
773                            return Some(answer.to_string());
774                        }
775                    }
776                }
777            }
778        }
779
780        None
781    }
782
783    fn build_exploration_summary(content: &str, input_tokens: usize) -> String {
784        let lines: Vec<&str> = content.lines().collect();
785        let total_lines = lines.len();
786
787        let head: String = lines
788            .iter()
789            .take(30)
790            .copied()
791            .collect::<Vec<_>>()
792            .join("\n");
793        let tail: String = lines
794            .iter()
795            .rev()
796            .take(50)
797            .collect::<Vec<_>>()
798            .into_iter()
799            .rev()
800            .copied()
801            .collect::<Vec<_>>()
802            .join("\n");
803
804        format!(
805            "=== CONTEXT EXPLORATION ===\n\
806             Total: {} chars, {} lines, ~{} tokens\n\n\
807             === FIRST 30 LINES ===\n{}\n\n\
808             === LAST 50 LINES ===\n{}\n\
809             === END EXPLORATION ===",
810            content.len(),
811            total_lines,
812            input_tokens,
813            head,
814            tail
815        )
816    }
817
818    fn build_rlm_system_prompt(input_tokens: usize, tool_id: &str, query: &str) -> String {
819        let context_type = if tool_id == "session_context" {
820            "conversation history"
821        } else {
822            "tool output"
823        };
824
825        format!(
826            r#"You are tasked with analyzing large content that cannot fit in a normal context window.
827
828The content is a {} with {} total tokens.
829
830YOUR TASK: {}
831
832## Analysis Strategy
833
8341. First, examine the exploration (head + tail of content) to understand structure
8352. Identify the most important information for answering the query
8363. Focus on: errors, key decisions, file paths, recent activity
8374. Provide a concise but complete answer
838
839When ready, call FINAL("your detailed answer") with your findings.
840
841Be SPECIFIC - include actual file paths, function names, error messages. Generic summaries are not useful."#,
842            context_type, input_tokens, query
843        )
844    }
845
846    fn build_query_for_tool(tool_id: &str, args: &serde_json::Value) -> String {
847        match tool_id {
848            "read" => {
849                let path = args.get("filePath").and_then(|v| v.as_str()).unwrap_or("unknown");
850                format!("Summarize the key contents of file \"{}\". Focus on: structure, main functions/classes, important logic. Be concise.", path)
851            }
852            "bash" => {
853                "Summarize the command output. Extract key information, results, errors, warnings. Be concise.".to_string()
854            }
855            "grep" => {
856                let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("pattern");
857                format!("Summarize search results for \"{}\". Group by file, highlight most relevant matches. Be concise.", pattern)
858            }
859            "glob" => {
860                "Summarize the file listing. Group by directory, highlight important files. Be concise.".to_string()
861            }
862            "webfetch" => {
863                let url = args.get("url").and_then(|v| v.as_str()).unwrap_or("unknown");
864                format!(
865                    "Extract and summarize the main human-readable content from this fetched web page (URL: {}). Ignore scripts, styles, navigation, cookie banners, and boilerplate. Preserve headings, lists, code blocks, and important links. Be concise.",
866                    url
867                )
868            }
869            "websearch" => {
870                let q = args.get("query").and_then(|v| v.as_str()).unwrap_or("unknown");
871                format!(
872                    "Summarize these web search results for query: \"{}\". List the most relevant results with titles and URLs, and note why each is relevant. Be concise.",
873                    q
874                )
875            }
876            "batch" => {
877                "Summarize the batch tool output. Group by sub-call, highlight failures/errors first, then key results. Keep actionable details: URLs, file paths, command outputs, and next steps.".to_string()
878            }
879            "session_context" => {
880                r#"You are a CONTEXT MEMORY SYSTEM. Create a BRIEFING for an AI assistant to continue this conversation.
881
882CRITICAL: The assistant will ONLY see your briefing - it has NO memory of the conversation.
883
884## What to Extract
885
8861. **PRIMARY GOAL**: What is the user ultimately trying to achieve?
8872. **CURRENT STATE**: What has been accomplished? Current status?
8883. **LAST ACTIONS**: What just happened? (last 3-5 tool calls, their results)
8894. **ACTIVE FILES**: Which files were modified?
8905. **PENDING TASKS**: What remains to be done?
8916. **CRITICAL DETAILS**: File paths, error messages, specific values, decisions made
8927. **NEXT STEPS**: What should happen next?
893
894Be SPECIFIC with file paths, function names, error messages."#.to_string()
895            }
896            _ => "Summarize this output concisely, extracting the most important information.".to_string()
897        }
898    }
899
900    fn build_enhanced_fallback(
901        output: &str,
902        tool_id: &str,
903        tool_args: &serde_json::Value,
904        input_tokens: usize,
905    ) -> String {
906        let lines: Vec<&str> = output.lines().collect();
907
908        if tool_id == "session_context" {
909            // Extract key structural information
910            let file_matches: Vec<&str> = lines
911                .iter()
912                .filter_map(|l| {
913                    if l.contains(".ts")
914                        || l.contains(".rs")
915                        || l.contains(".py")
916                        || l.contains(".json")
917                    {
918                        Some(*l)
919                    } else {
920                        None
921                    }
922                })
923                .take(15)
924                .collect();
925
926            let tool_calls: Vec<&str> = lines
927                .iter()
928                .filter(|l| l.contains("[Tool "))
929                .take(10)
930                .copied()
931                .collect();
932
933            let errors: Vec<&str> = lines
934                .iter()
935                .filter(|l| {
936                    l.to_lowercase().contains("error") || l.to_lowercase().contains("failed")
937                })
938                .take(5)
939                .copied()
940                .collect();
941
942            let head: String = lines
943                .iter()
944                .take(30)
945                .copied()
946                .collect::<Vec<_>>()
947                .join("\n");
948            let tail: String = lines
949                .iter()
950                .rev()
951                .take(80)
952                .collect::<Vec<_>>()
953                .into_iter()
954                .rev()
955                .copied()
956                .collect::<Vec<_>>()
957                .join("\n");
958
959            let mut parts = vec![
960                "## Context Summary (Fallback Mode)".to_string(),
961                format!(
962                    "*Original: {} tokens - RLM processing produced insufficient output*",
963                    input_tokens
964                ),
965                String::new(),
966            ];
967
968            if !file_matches.is_empty() {
969                parts.push(format!("**Files Mentioned:** {}", file_matches.len()));
970            }
971
972            if !tool_calls.is_empty() {
973                parts.push(format!("**Recent Tool Calls:** {}", tool_calls.join(", ")));
974            }
975
976            if !errors.is_empty() {
977                parts.push("**Recent Errors:**".to_string());
978                for e in errors {
979                    parts.push(format!("- {}", e.chars().take(150).collect::<String>()));
980                }
981            }
982
983            parts.push(String::new());
984            parts.push("### Initial Request".to_string());
985            parts.push("```".to_string());
986            parts.push(head);
987            parts.push("```".to_string());
988            parts.push(String::new());
989            parts.push("### Recent Activity".to_string());
990            parts.push("```".to_string());
991            parts.push(tail);
992            parts.push("```".to_string());
993
994            parts.join("\n")
995        } else {
996            let (truncated, _, _) = Self::smart_truncate(output, tool_id, tool_args, 8000);
997            format!(
998                "## Fallback Summary\n*RLM processing failed - showing structured excerpt*\n\n{}",
999                truncated
1000            )
1001        }
1002    }
1003
1004    fn fallback_result(
1005        output: &str,
1006        tool_id: &str,
1007        tool_args: &serde_json::Value,
1008        input_tokens: usize,
1009    ) -> RlmResult {
1010        let (truncated, _, _) = Self::smart_truncate(output, tool_id, tool_args, 8000);
1011        let output_tokens = RlmChunker::estimate_tokens(&truncated);
1012
1013        RlmResult {
1014            processed: format!(
1015                "[RLM processing failed, showing truncated output]\n\n{}",
1016                truncated
1017            ),
1018            stats: RlmStats {
1019                input_tokens,
1020                output_tokens,
1021                iterations: 0,
1022                subcalls: 0,
1023                elapsed_ms: 0,
1024                compression_ratio: input_tokens as f64 / output_tokens.max(1) as f64,
1025            },
1026            success: false,
1027            error: Some("Model call failed".to_string()),
1028            trace: None,
1029            trace_id: None,
1030        }
1031    }
1032}
1033
1034#[cfg(test)]
1035mod tests {
1036    use super::*;
1037
1038    #[test]
1039    fn should_route_large_webfetch_output() {
1040        let output = "a".repeat(200_000);
1041        let ctx = RoutingContext {
1042            tool_id: "webfetch".to_string(),
1043            session_id: "s".to_string(),
1044            call_id: None,
1045            model_context_limit: 200_000,
1046            current_context_tokens: Some(1000),
1047        };
1048        let cfg = RlmConfig::default();
1049        let result = RlmRouter::should_route(&output, &ctx, &cfg);
1050        assert!(result.should_route);
1051    }
1052
1053    #[test]
1054    fn should_route_large_output_for_unknown_tool_to_protect_context() {
1055        let output = "a".repeat(200_000);
1056        let ctx = RoutingContext {
1057            tool_id: "some_new_tool".to_string(),
1058            session_id: "s".to_string(),
1059            call_id: None,
1060            model_context_limit: 200_000,
1061            current_context_tokens: Some(1000),
1062        };
1063        let cfg = RlmConfig::default();
1064        let result = RlmRouter::should_route(&output, &ctx, &cfg);
1065        assert!(result.should_route);
1066        assert!(result.reason.contains("ineligible") || result.reason.contains("threshold"));
1067    }
1068}