Skip to main content

codetether_agent/rlm/
router.rs

1//! RLM Router - Decides when to route content through RLM processing
2//!
3//! Routes large tool outputs through RLM when they would exceed
4//! the model's context window threshold.
5//!
6//! When the `functiongemma` feature is active the router passes RLM tool
7//! definitions alongside the analysis prompt so FunctionGemma can convert
8//! text-only LLM responses into structured tool calls.
9
10use super::{RlmChunker, RlmConfig, RlmResult, RlmStats};
11use crate::provider::{CompletionRequest, ContentPart, Message, Provider, Role};
12use crate::rlm::context_trace::{ContextEvent, ContextTrace};
13use crate::session::{RlmCompletion, RlmOutcome, RlmProgressEvent, SessionBus, SessionEvent};
14use anyhow::Result;
15use serde::{Deserialize, Serialize};
16use std::collections::HashSet;
17use std::sync::Arc;
18use std::time::Instant;
19use tracing::{info, warn};
20use uuid::Uuid;
21
22use crate::cognition::tool_router::{ToolCallRouter, ToolRouterConfig};
23
24use super::tools::rlm_tool_definitions;
25
26/// Tools eligible for RLM routing
27fn rlm_eligible_tools() -> HashSet<&'static str> {
28    // Only tools whose output is genuinely unpredictable in size AND that the
29    // agent cannot easily re-query in smaller chunks belong here. Routing
30    // replaces raw bytes with an RLM-generated prose summary, which is
31    // destructive for content-carrying tools like `read`/`grep`/`bash` — the
32    // agent has no way to get the original bytes back and re-reads produce
33    // the same summary, causing spin loops. Those tools already expose
34    // bounded slicing (offset/limit, line ranges, head/tail).
35    [
36        // Web tools commonly return megabytes of HTML/JS with no slicing API.
37        "webfetch",
38        "websearch",
39        // Batch aggregates many outputs and can exceed the window in one shot.
40        "batch",
41    ]
42    .iter()
43    .copied()
44    .collect()
45}
46
47/// Context for routing decisions
48#[derive(Debug, Clone)]
49pub struct RoutingContext {
50    pub tool_id: String,
51    pub session_id: String,
52    pub call_id: Option<String>,
53    pub model_context_limit: usize,
54    pub current_context_tokens: Option<usize>,
55}
56
57/// Result of routing decision
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct RoutingResult {
60    pub should_route: bool,
61    pub reason: String,
62    pub estimated_tokens: usize,
63}
64
65/// Context for auto-processing.
66///
67/// ## Observability model
68///
69/// Two orthogonal streams describe an RLM run:
70///
71/// 1. **[`crate::rlm::context_trace::ContextTrace`] (durable):** the full,
72///    ordered record of every iteration, tool call, and model response.
73///    Attached to the returned [`RlmResult`] so the JSONL flywheel and
74///    post-hoc analysis can replay a run exactly.
75/// 2. **Session bus events (ephemeral):** [`SessionEvent::RlmProgress`]
76///    at every iteration start and a single terminal
77///    [`SessionEvent::RlmComplete`] at exit. Subscribers see the run as
78///    it happens but are *not* expected to reconstruct full history
79///    from the bus — for that, consume the trace on `RlmResult`.
80///
81/// ## Opting out of observability
82///
83/// Leaving `bus` and `trace_id` as `None` opts the call site out of
84/// **both** bus emission and upstream trace correlation. This is
85/// correct for tests, one-shot CLI runs, and synchronous utility
86/// paths, but production call sites should thread an `AppState`-owned
87/// bus through so the TUI and audit log see the activity.
88pub struct AutoProcessContext<'a> {
89    /// Identifier of the tool whose output is being analysed (e.g.
90    /// `"grep"`, `"bash"`, or `"session_context"` for compaction).
91    pub tool_id: &'a str,
92    /// JSON-encoded arguments that produced the tool output.
93    pub tool_args: serde_json::Value,
94    /// Session ID for logging / correlation.
95    pub session_id: &'a str,
96    /// Optional abort signal — when `Some(true)` the loop exits early
97    /// and emits [`RlmOutcome::Aborted`].
98    pub abort: Option<tokio::sync::watch::Receiver<bool>>,
99    /// Optional progress callback. Retained for backward compatibility;
100    /// prefer subscribing to [`SessionEvent::RlmProgress`] via `bus`.
101    pub on_progress: Option<Box<dyn Fn(ProcessProgress) + Send + Sync>>,
102    /// Provider used for the root RLM model call.
103    pub provider: Arc<dyn Provider>,
104    /// Model identifier for the root RLM call.
105    pub model: String,
106    /// Optional event bus for structured progress / completion events.
107    /// See the struct-level docs for the bus-vs-trace split.
108    pub bus: Option<SessionBus>,
109    /// Optional caller-supplied trace id so upstream events (e.g. a
110    /// compaction `CompactionStarted`) can be correlated with the
111    /// resulting RLM run. When `None`, a fresh id is generated and
112    /// returned on [`RlmResult::trace_id`].
113    pub trace_id: Option<Uuid>,
114    /// Pre-resolved provider for RLM sub-calls (iterations ≥ 2).
115    ///
116    /// When `None`, all iterations use [`Self::provider`]. When set,
117    /// iteration 1 uses the root provider and subsequent iterations
118    /// use this provider, enabling "expensive model for analysis,
119    /// cheap model for continuation" splitting.
120    ///
121    /// Resolution (and fallback-on-failure) is the caller's
122    /// responsibility — see
123    /// [`Session::resolve_subcall_provider`](crate::session::Session).
124    pub subcall_provider: Option<Arc<dyn Provider>>,
125    /// Model identifier for the subcall provider. Mirrors
126    /// [`Self::subcall_provider`]: when `None`, all iterations use
127    /// [`Self::model`].
128    pub subcall_model: Option<String>,
129}
130
131/// Progress update during processing
132#[derive(Debug, Clone)]
133pub struct ProcessProgress {
134    pub iteration: usize,
135    pub max_iterations: usize,
136    pub status: String,
137}
138
139/// RLM Router for large content processing
140pub struct RlmRouter;
141
142impl RlmRouter {
143    /// Check if a tool output should be routed through RLM
144    pub fn should_route(output: &str, ctx: &RoutingContext, config: &RlmConfig) -> RoutingResult {
145        let estimated_tokens = RlmChunker::estimate_tokens(output);
146
147        // Mode: off - never route
148        if config.mode == "off" {
149            return RoutingResult {
150                should_route: false,
151                reason: "rlm_mode_off".to_string(),
152                estimated_tokens,
153            };
154        }
155
156        // Mode: always - always route for eligible tools
157        if config.mode == "always" {
158            if !rlm_eligible_tools().contains(ctx.tool_id.as_str()) {
159                return RoutingResult {
160                    should_route: false,
161                    reason: "tool_not_eligible".to_string(),
162                    estimated_tokens,
163                };
164            }
165            return RoutingResult {
166                should_route: true,
167                reason: "rlm_mode_always".to_string(),
168                estimated_tokens,
169            };
170        }
171
172        // Mode: auto - route based on threshold
173        let eligible = rlm_eligible_tools().contains(ctx.tool_id.as_str())
174            || ctx.tool_id.as_str() == "session_context";
175
176        // Check if output exceeds threshold relative to context window
177        let threshold_tokens = (ctx.model_context_limit as f64 * config.threshold) as usize;
178        if estimated_tokens > threshold_tokens {
179            return RoutingResult {
180                should_route: true,
181                reason: if eligible {
182                    "exceeds_threshold".to_string()
183                } else {
184                    "exceeds_threshold_ineligible_tool".to_string()
185                },
186                estimated_tokens,
187            };
188        }
189
190        // Check if adding this output would cause overflow.
191        //
192        // Historically this branch also fired based on `current_context_tokens`
193        // alone, which silently replaced small, content-carrying tool outputs
194        // (e.g. a 2 KB `read`) with an RLM prose summary whenever the prior
195        // conversation happened to be large. The agent had no way to
196        // distinguish the summary from the file's bytes and would re-read the
197        // same path, hitting the same branch, producing the same summary —
198        // an unbreakable spin loop. Context-window pressure is the
199        // compression pass's responsibility, not the tool-output router's.
200        //
201        // We now only route on "would overflow" when the *new output itself*
202        // is responsible for a large share of the projected total. This
203        // preserves protection against a single oversized blob while leaving
204        // normal-sized tool results verbatim.
205        if let Some(current) = ctx.current_context_tokens {
206            let projected_total = current + estimated_tokens;
207            let overflow_limit = (ctx.model_context_limit as f64 * 0.8) as usize;
208            // Require the new output to contribute at least half of the
209            // overflow so we don't punish small reads for old context.
210            let output_dominates = estimated_tokens * 2 >= projected_total;
211            if projected_total > overflow_limit && output_dominates {
212                return RoutingResult {
213                    should_route: true,
214                    reason: if eligible {
215                        "would_overflow".to_string()
216                    } else {
217                        "would_overflow_ineligible_tool".to_string()
218                    },
219                    estimated_tokens,
220                };
221            }
222        }
223
224        // If the tool isn't in the preferred set, we normally avoid routing to
225        // reduce extra model calls. However, we *must* protect the main session
226        // context window from oversized blobs (e.g. raw HTML from web tools).
227        if !eligible {
228            return RoutingResult {
229                should_route: false,
230                reason: "tool_not_eligible".to_string(),
231                estimated_tokens,
232            };
233        }
234
235        RoutingResult {
236            should_route: false,
237            reason: "within_threshold".to_string(),
238            estimated_tokens,
239        }
240    }
241
242    /// Smart truncate large output with RLM hint
243    pub fn smart_truncate(
244        output: &str,
245        tool_id: &str,
246        tool_args: &serde_json::Value,
247        max_tokens: usize,
248    ) -> (String, bool, usize) {
249        let estimated_tokens = RlmChunker::estimate_tokens(output);
250
251        if estimated_tokens <= max_tokens {
252            return (output.to_string(), false, estimated_tokens);
253        }
254
255        info!(
256            tool = tool_id,
257            original_tokens = estimated_tokens,
258            max_tokens,
259            "Smart truncating large output"
260        );
261
262        // Calculate how much to keep (roughly 4 chars per token)
263        let max_chars = max_tokens * 4;
264        let head_chars = (max_chars as f64 * 0.6) as usize;
265        let tail_chars = (max_chars as f64 * 0.3) as usize;
266
267        let head: String = output.chars().take(head_chars).collect();
268        let tail: String = output
269            .chars()
270            .rev()
271            .take(tail_chars)
272            .collect::<String>()
273            .chars()
274            .rev()
275            .collect();
276
277        let omitted_tokens = estimated_tokens
278            - RlmChunker::estimate_tokens(&head)
279            - RlmChunker::estimate_tokens(&tail);
280        let rlm_hint = Self::build_rlm_hint(tool_id, tool_args, estimated_tokens);
281
282        let truncated = format!(
283            "{}\n\n[... {} tokens truncated ...]\n\n{}\n\n{}",
284            head, omitted_tokens, rlm_hint, tail
285        );
286
287        (truncated, true, estimated_tokens)
288    }
289
290    fn build_rlm_hint(tool_id: &str, args: &serde_json::Value, tokens: usize) -> String {
291        let base = format!(
292            "⚠️ OUTPUT TOO LARGE ({} tokens). Use RLM for full analysis:",
293            tokens
294        );
295
296        match tool_id {
297            "read" => {
298                let path = args
299                    .get("filePath")
300                    .and_then(|v| v.as_str())
301                    .unwrap_or("...");
302                format!(
303                    "{}\n```\nrlm({{ query: \"Analyze this file\", content_paths: [\"{}\"] }})\n```",
304                    base, path
305                )
306            }
307            "bash" => {
308                format!(
309                    "{}\n```\nrlm({{ query: \"Analyze this command output\", content: \"<paste or use content_paths>\" }})\n```",
310                    base
311                )
312            }
313            "grep" => {
314                let pattern = args
315                    .get("pattern")
316                    .and_then(|v| v.as_str())
317                    .unwrap_or("...");
318                let include = args.get("include").and_then(|v| v.as_str()).unwrap_or("*");
319                format!(
320                    "{}\n```\nrlm({{ query: \"Summarize search results for {}\", content_glob: \"{}\" }})\n```",
321                    base, pattern, include
322                )
323            }
324            _ => {
325                format!(
326                    "{}\n```\nrlm({{ query: \"Summarize this output\", content: \"...\" }})\n```",
327                    base
328                )
329            }
330        }
331    }
332
333    /// Automatically process large output through RLM
334    ///
335    /// Based on "Recursive Language Models" (Zhang et al. 2025):
336    /// - Context is loaded as a variable in a REPL-like environment
337    /// - LLM writes code/queries to analyze, decompose, and recursively sub-call itself
338    ///
339    /// When FunctionGemma is enabled, the router sends RLM tool definitions
340    /// alongside the analysis prompt and dispatches structured tool calls
341    /// returned by the model (or reformatted by FunctionGemma).
342    pub async fn auto_process(
343        output: &str,
344        ctx: AutoProcessContext<'_>,
345        config: &RlmConfig,
346    ) -> Result<RlmResult> {
347        let start = Instant::now();
348        let input_tokens = RlmChunker::estimate_tokens(output);
349
350        info!(
351            tool = ctx.tool_id,
352            input_tokens,
353            model = %ctx.model,
354            "RLM: Starting auto-processing"
355        );
356
357        // Per-run trace id — correlates every progress event and the
358        // terminal RlmComplete emission with any caller-side record
359        // (e.g. a CompactionStarted event).
360        let trace_id = ctx.trace_id.unwrap_or_else(Uuid::new_v4);
361        // Generous budget — the trace is diagnostic, not load-bearing.
362        let trace_budget = input_tokens.saturating_mul(2).max(4096);
363        let mut trace = ContextTrace::new(trace_budget);
364        let mut aborted = false;
365
366        // Initialise FunctionGemma router if available
367
368        let tool_router: Option<ToolCallRouter> = {
369            let cfg = ToolRouterConfig::from_env();
370            ToolCallRouter::from_config(&cfg)
371                .inspect_err(|e| {
372                    tracing::debug!(error = %e, "FunctionGemma router unavailable for RLM router");
373                })
374                .ok()
375                .flatten()
376        };
377
378        // Prepare RLM tool definitions
379        let tools = rlm_tool_definitions();
380
381        // Detect content type for smarter processing
382        let content_type = RlmChunker::detect_content_type(output);
383        let content_hints = RlmChunker::get_processing_hints(content_type);
384
385        info!(content_type = ?content_type, tool = ctx.tool_id, "RLM: Content type detected");
386
387        // For very large contexts, use semantic chunking to preserve important parts
388        let processed_output = if input_tokens > 50000 {
389            RlmChunker::compress(output, 40000, None)
390        } else {
391            output.to_string()
392        };
393
394        // Create a REPL for structured tool dispatch
395        let mut repl =
396            super::repl::RlmRepl::new(processed_output.clone(), super::repl::ReplRuntime::Rust);
397
398        // Build the query based on tool type
399        let base_query = Self::build_query_for_tool(ctx.tool_id, &ctx.tool_args);
400        let query = format!(
401            "{}\n\n## Content Analysis Hints\n{}",
402            base_query, content_hints
403        );
404
405        // Build the RLM system prompt
406        let system_prompt = Self::build_rlm_system_prompt(input_tokens, ctx.tool_id, &query);
407        trace.log_event(ContextEvent::SystemPrompt {
408            content: system_prompt.clone(),
409            tokens: ContextTrace::estimate_tokens(&system_prompt),
410        });
411
412        let max_iterations = config.max_iterations;
413        let max_subcalls = config.max_subcalls;
414        let mut iterations = 0;
415        let mut subcalls = 0;
416        let mut final_answer: Option<String> = None;
417
418        // Build initial exploration prompt
419        let exploration = Self::build_exploration_summary(&processed_output, input_tokens);
420
421        // Run iterative analysis
422        let mut conversation = vec![Message {
423            role: Role::User,
424            content: vec![ContentPart::Text {
425                text: format!(
426                    "{}\n\nHere is the context exploration:\n```\n{}\n```\n\nNow analyze and answer the query.",
427                    system_prompt, exploration
428                ),
429            }],
430        }];
431
432        for i in 0..max_iterations {
433            iterations = i + 1;
434            trace.next_iteration();
435
436            if let Some(bus) = ctx.bus.as_ref() {
437                bus.emit(SessionEvent::RlmProgress(RlmProgressEvent {
438                    trace_id,
439                    iteration: iterations,
440                    max_iterations,
441                    status: "running".to_string(),
442                }));
443            }
444
445            if let Some(ref progress) = ctx.on_progress {
446                progress(ProcessProgress {
447                    iteration: iterations,
448                    max_iterations,
449                    status: "running".to_string(),
450                });
451            }
452
453            // Check for abort
454            if let Some(ref abort) = ctx.abort
455                && *abort.borrow()
456            {
457                warn!("RLM: Processing aborted");
458                aborted = true;
459                break;
460            }
461
462            // Build completion request — include tool definitions
463            let (active_provider, active_model) =
464                if iterations > 1 && ctx.subcall_provider.is_some() {
465                    (
466                        Arc::clone(ctx.subcall_provider.as_ref().unwrap()),
467                        ctx.subcall_model
468                            .as_deref()
469                            .unwrap_or(&ctx.model)
470                            .to_string(),
471                    )
472                } else {
473                    (Arc::clone(&ctx.provider), ctx.model.clone())
474                };
475
476            let request = CompletionRequest {
477                messages: conversation.clone(),
478                tools: tools.clone(),
479                model: active_model.clone(),
480                temperature: Some(0.7),
481                top_p: None,
482                max_tokens: Some(4000),
483                stop: Vec::new(),
484            };
485
486            // Call the model
487            let response = match active_provider.complete(request).await {
488                Ok(r) => r,
489                Err(e) => {
490                    warn!(error = %e, iteration = iterations, "RLM: Model call failed");
491                    if iterations > 1 {
492                        break; // Use what we have
493                    }
494                    if let Some(bus) = ctx.bus.as_ref() {
495                        bus.emit(SessionEvent::RlmComplete(RlmCompletion {
496                            trace_id,
497                            outcome: RlmOutcome::Failed,
498                            iterations,
499                            subcalls,
500                            input_tokens,
501                            output_tokens: 0,
502                            elapsed_ms: start.elapsed().as_millis() as u64,
503                            reason: Some(format!("provider call failed: {e}")),
504                            root_model: ctx.model.clone(),
505                            subcall_model_used: ctx.subcall_model.clone(),
506                        }));
507                    }
508                    let mut fb =
509                        Self::fallback_result(output, ctx.tool_id, &ctx.tool_args, input_tokens);
510                    fb.trace = Some(trace);
511                    fb.trace_id = Some(trace_id);
512                    return Ok(fb);
513                }
514            };
515
516            // Optionally run FunctionGemma to convert text-only response
517
518            let response = if let Some(ref router) = tool_router {
519                // RLM router shares the session's provider which supports
520                // native tool calling.  Skip FunctionGemma.
521                router.maybe_reformat(response, &tools, true).await
522            } else {
523                response
524            };
525
526            // ── Structured tool-call path ────────────────────────────────
527            let tool_calls: Vec<(String, String, String)> = response
528                .message
529                .content
530                .iter()
531                .filter_map(|p| match p {
532                    ContentPart::ToolCall {
533                        id,
534                        name,
535                        arguments,
536                        ..
537                    } => Some((id.clone(), name.clone(), arguments.clone())),
538                    _ => None,
539                })
540                .collect();
541
542            if !tool_calls.is_empty() {
543                info!(
544                    count = tool_calls.len(),
545                    iteration = iterations,
546                    "RLM router: dispatching structured tool calls"
547                );
548
549                conversation.push(Message {
550                    role: Role::Assistant,
551                    content: response.message.content.clone(),
552                });
553
554                let mut tool_results: Vec<ContentPart> = Vec::new();
555
556                for (call_id, name, arguments) in &tool_calls {
557                    trace.log_event(ContextEvent::ToolCall {
558                        name: name.clone(),
559                        arguments_preview: arguments.chars().take(200).collect(),
560                        tokens: ContextTrace::estimate_tokens(arguments),
561                    });
562                    match super::tools::dispatch_tool_call(name, arguments, &mut repl) {
563                        Some(super::tools::RlmToolResult::Final(answer)) => {
564                            trace.log_event(ContextEvent::ToolResult {
565                                tool_call_id: call_id.clone(),
566                                result_preview: "FINAL received".to_string(),
567                                tokens: 0,
568                            });
569                            final_answer = Some(answer);
570                            tool_results.push(ContentPart::ToolResult {
571                                tool_call_id: call_id.clone(),
572                                content: "FINAL received".to_string(),
573                            });
574                            break;
575                        }
576                        Some(super::tools::RlmToolResult::Output(out)) => {
577                            trace.log_event(ContextEvent::ToolResult {
578                                tool_call_id: call_id.clone(),
579                                result_preview: out.chars().take(200).collect(),
580                                tokens: ContextTrace::estimate_tokens(&out),
581                            });
582                            tool_results.push(ContentPart::ToolResult {
583                                tool_call_id: call_id.clone(),
584                                content: out,
585                            });
586                        }
587                        None => {
588                            trace.log_event(ContextEvent::ToolResult {
589                                tool_call_id: call_id.clone(),
590                                result_preview: format!("Unknown tool: {name}"),
591                                tokens: 0,
592                            });
593                            tool_results.push(ContentPart::ToolResult {
594                                tool_call_id: call_id.clone(),
595                                content: format!("Unknown tool: {name}"),
596                            });
597                        }
598                    }
599                }
600
601                if !tool_results.is_empty() {
602                    conversation.push(Message {
603                        role: Role::Tool,
604                        content: tool_results,
605                    });
606                }
607
608                subcalls += 1;
609                if final_answer.is_some() || subcalls >= max_subcalls {
610                    break;
611                }
612                continue;
613            }
614
615            // ── Legacy text-only path ────────────────────────────────────
616            let response_text: String = response
617                .message
618                .content
619                .iter()
620                .filter_map(|p| match p {
621                    ContentPart::Text { text } => Some(text.clone()),
622                    _ => None,
623                })
624                .collect::<Vec<_>>()
625                .join("\n");
626
627            info!(
628                iteration = iterations,
629                response_len = response_text.len(),
630                "RLM: Model response (text-only fallback)"
631            );
632            trace.log_event(ContextEvent::LlmQueryResult {
633                query: query.chars().take(120).collect(),
634                response_preview: response_text.chars().take(200).collect(),
635                tokens: ContextTrace::estimate_tokens(&response_text),
636            });
637
638            // Check for FINAL answer
639            if let Some(answer) = Self::extract_final(&response_text) {
640                final_answer = Some(answer);
641                break;
642            }
643
644            // Check for analysis that can be used directly
645            if iterations >= 3 && response_text.len() > 500 && !response_text.contains("```") {
646                // The model is providing direct analysis, use it
647                final_answer = Some(response_text.clone());
648                break;
649            }
650
651            // Add response to conversation
652            conversation.push(Message {
653                role: Role::Assistant,
654                content: vec![ContentPart::Text {
655                    text: response_text,
656                }],
657            });
658
659            // Prompt for continuation
660            conversation.push(Message {
661                role: Role::User,
662                content: vec![ContentPart::Text {
663                    text: "Continue analysis. Call FINAL(\"your answer\") when ready.".to_string(),
664                }],
665            });
666
667            subcalls += 1;
668            if subcalls >= max_subcalls {
669                warn!(subcalls, max = max_subcalls, "RLM: Max subcalls reached");
670                break;
671            }
672        }
673
674        if let Some(ref progress) = ctx.on_progress {
675            progress(ProcessProgress {
676                iteration: iterations,
677                max_iterations,
678                status: "completed".to_string(),
679            });
680        }
681
682        // Track whether we got a real answer before consuming final_answer.
683        let converged = final_answer.is_some();
684
685        // Produce result, synthesizing one if no FINAL was produced.
686        let answer = final_answer.unwrap_or_else(|| {
687            warn!(
688                iterations,
689                subcalls, "RLM: No FINAL produced, using fallback"
690            );
691            Self::build_enhanced_fallback(output, ctx.tool_id, &ctx.tool_args, input_tokens)
692        });
693
694        let output_tokens = RlmChunker::estimate_tokens(&answer);
695        let compression_ratio = input_tokens as f64 / output_tokens.max(1) as f64;
696        let elapsed_ms = start.elapsed().as_millis() as u64;
697
698        let result = format!(
699            "[RLM: {} → {} tokens | {} iterations | {} sub-calls]\n\n{}",
700            input_tokens, output_tokens, iterations, subcalls, answer
701        );
702
703        info!(
704            input_tokens,
705            output_tokens,
706            iterations,
707            subcalls,
708            elapsed_ms,
709            compression_ratio = format!("{:.1}", compression_ratio),
710            "RLM: Processing complete"
711        );
712
713        trace.log_event(ContextEvent::Final {
714            answer: answer.chars().take(200).collect(),
715            tokens: output_tokens,
716        });
717
718        // Classify outcome for the durable completion record.
719        let outcome = if aborted {
720            RlmOutcome::Aborted
721        } else if converged {
722            RlmOutcome::Converged
723        } else {
724            RlmOutcome::Exhausted
725        };
726        let reason = match outcome {
727            RlmOutcome::Converged => None,
728            RlmOutcome::Aborted => Some("abort signal".to_string()),
729            RlmOutcome::Exhausted => Some(format!(
730                "no FINAL after {iterations} iterations / {subcalls} subcalls"
731            )),
732            RlmOutcome::Failed => None, // emitted on the early-return path
733        };
734        if let Some(bus) = ctx.bus.as_ref() {
735            bus.emit(SessionEvent::RlmComplete(RlmCompletion {
736                trace_id,
737                outcome,
738                iterations,
739                subcalls,
740                input_tokens,
741                output_tokens,
742                elapsed_ms,
743                reason,
744                root_model: ctx.model.clone(),
745                subcall_model_used: ctx.subcall_model.clone(),
746            }));
747        }
748
749        Ok(RlmResult {
750            processed: result,
751            stats: RlmStats {
752                input_tokens,
753                output_tokens,
754                iterations,
755                subcalls,
756                elapsed_ms,
757                compression_ratio,
758            },
759            success: outcome.is_success(),
760            error: None,
761            trace: Some(trace),
762            trace_id: Some(trace_id),
763        })
764    }
765
766    fn extract_final(text: &str) -> Option<String> {
767        // Look for FINAL("...") or FINAL('...') or FINAL!(...)
768        let patterns = [r#"FINAL\s*\(\s*["'`]"#, r#"FINAL!\s*\(\s*["'`]?"#];
769
770        for _pattern_start in patterns {
771            if let Some(start_idx) = text.find("FINAL") {
772                let after = &text[start_idx..];
773
774                // Find the opening quote/paren
775                if let Some(open_idx) = after.find(['"', '\'', '`']) {
776                    let quote_char = after.chars().nth(open_idx)?;
777                    let content_start = start_idx + open_idx + 1;
778
779                    // Find matching close
780                    let content = &text[content_start..];
781                    if let Some(close_idx) = content.find(quote_char) {
782                        let answer = &content[..close_idx];
783                        if !answer.is_empty() {
784                            return Some(answer.to_string());
785                        }
786                    }
787                }
788            }
789        }
790
791        None
792    }
793
794    fn build_exploration_summary(content: &str, input_tokens: usize) -> String {
795        let lines: Vec<&str> = content.lines().collect();
796        let total_lines = lines.len();
797
798        let head: String = lines
799            .iter()
800            .take(30)
801            .copied()
802            .collect::<Vec<_>>()
803            .join("\n");
804        let tail: String = lines
805            .iter()
806            .rev()
807            .take(50)
808            .collect::<Vec<_>>()
809            .into_iter()
810            .rev()
811            .copied()
812            .collect::<Vec<_>>()
813            .join("\n");
814
815        format!(
816            "=== CONTEXT EXPLORATION ===\n\
817             Total: {} chars, {} lines, ~{} tokens\n\n\
818             === FIRST 30 LINES ===\n{}\n\n\
819             === LAST 50 LINES ===\n{}\n\
820             === END EXPLORATION ===",
821            content.len(),
822            total_lines,
823            input_tokens,
824            head,
825            tail
826        )
827    }
828
829    fn build_rlm_system_prompt(input_tokens: usize, tool_id: &str, query: &str) -> String {
830        let context_type = if tool_id == "session_context" {
831            "conversation history"
832        } else {
833            "tool output"
834        };
835
836        format!(
837            r#"You are tasked with analyzing large content that cannot fit in a normal context window.
838
839The content is a {} with {} total tokens.
840
841YOUR TASK: {}
842
843## Analysis Strategy
844
8451. First, examine the exploration (head + tail of content) to understand structure
8462. Identify the most important information for answering the query
8473. Focus on: errors, key decisions, file paths, recent activity
8484. Provide a concise but complete answer
849
850When ready, call FINAL("your detailed answer") with your findings.
851
852Be SPECIFIC - include actual file paths, function names, error messages. Generic summaries are not useful."#,
853            context_type, input_tokens, query
854        )
855    }
856
857    fn build_query_for_tool(tool_id: &str, args: &serde_json::Value) -> String {
858        match tool_id {
859            "read" => {
860                let path = args.get("filePath").and_then(|v| v.as_str()).unwrap_or("unknown");
861                format!("Summarize the key contents of file \"{}\". Focus on: structure, main functions/classes, important logic. Be concise.", path)
862            }
863            "bash" => {
864                "Summarize the command output. Extract key information, results, errors, warnings. Be concise.".to_string()
865            }
866            "grep" => {
867                let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("pattern");
868                format!("Summarize search results for \"{}\". Group by file, highlight most relevant matches. Be concise.", pattern)
869            }
870            "glob" => {
871                "Summarize the file listing. Group by directory, highlight important files. Be concise.".to_string()
872            }
873            "webfetch" => {
874                let url = args.get("url").and_then(|v| v.as_str()).unwrap_or("unknown");
875                format!(
876                    "Extract and summarize the main human-readable content from this fetched web page (URL: {}). Ignore scripts, styles, navigation, cookie banners, and boilerplate. Preserve headings, lists, code blocks, and important links. Be concise.",
877                    url
878                )
879            }
880            "websearch" => {
881                let q = args.get("query").and_then(|v| v.as_str()).unwrap_or("unknown");
882                format!(
883                    "Summarize these web search results for query: \"{}\". List the most relevant results with titles and URLs, and note why each is relevant. Be concise.",
884                    q
885                )
886            }
887            "batch" => {
888                "Summarize the batch tool output. Group by sub-call, highlight failures/errors first, then key results. Keep actionable details: URLs, file paths, command outputs, and next steps.".to_string()
889            }
890            "session_context" => {
891                r#"You are a CONTEXT MEMORY SYSTEM. Create a BRIEFING for an AI assistant to continue this conversation.
892
893CRITICAL: The assistant will ONLY see your briefing - it has NO memory of the conversation.
894
895## What to Extract
896
8971. **PRIMARY GOAL**: What is the user ultimately trying to achieve?
8982. **CURRENT STATE**: What has been accomplished? Current status?
8993. **LAST ACTIONS**: What just happened? (last 3-5 tool calls, their results)
9004. **ACTIVE FILES**: Which files were modified?
9015. **PENDING TASKS**: What remains to be done?
9026. **CRITICAL DETAILS**: File paths, error messages, specific values, decisions made
9037. **NEXT STEPS**: What should happen next?
904
905Be SPECIFIC with file paths, function names, error messages."#.to_string()
906            }
907            _ => "Summarize this output concisely, extracting the most important information.".to_string()
908        }
909    }
910
911    fn build_enhanced_fallback(
912        output: &str,
913        tool_id: &str,
914        tool_args: &serde_json::Value,
915        input_tokens: usize,
916    ) -> String {
917        let lines: Vec<&str> = output.lines().collect();
918
919        if tool_id == "session_context" {
920            // Extract key structural information
921            let file_matches: Vec<&str> = lines
922                .iter()
923                .filter_map(|l| {
924                    if l.contains(".ts")
925                        || l.contains(".rs")
926                        || l.contains(".py")
927                        || l.contains(".json")
928                    {
929                        Some(*l)
930                    } else {
931                        None
932                    }
933                })
934                .take(15)
935                .collect();
936
937            let tool_calls: Vec<&str> = lines
938                .iter()
939                .filter(|l| l.contains("[Tool "))
940                .take(10)
941                .copied()
942                .collect();
943
944            let errors: Vec<&str> = lines
945                .iter()
946                .filter(|l| {
947                    l.to_lowercase().contains("error") || l.to_lowercase().contains("failed")
948                })
949                .take(5)
950                .copied()
951                .collect();
952
953            let head: String = lines
954                .iter()
955                .take(30)
956                .copied()
957                .collect::<Vec<_>>()
958                .join("\n");
959            let tail: String = lines
960                .iter()
961                .rev()
962                .take(80)
963                .collect::<Vec<_>>()
964                .into_iter()
965                .rev()
966                .copied()
967                .collect::<Vec<_>>()
968                .join("\n");
969
970            let mut parts = vec![
971                "## Context Summary (Fallback Mode)".to_string(),
972                format!(
973                    "*Original: {} tokens - RLM processing produced insufficient output*",
974                    input_tokens
975                ),
976                String::new(),
977            ];
978
979            if !file_matches.is_empty() {
980                parts.push(format!("**Files Mentioned:** {}", file_matches.len()));
981            }
982
983            if !tool_calls.is_empty() {
984                parts.push(format!("**Recent Tool Calls:** {}", tool_calls.join(", ")));
985            }
986
987            if !errors.is_empty() {
988                parts.push("**Recent Errors:**".to_string());
989                for e in errors {
990                    parts.push(format!("- {}", e.chars().take(150).collect::<String>()));
991                }
992            }
993
994            parts.push(String::new());
995            parts.push("### Initial Request".to_string());
996            parts.push("```".to_string());
997            parts.push(head);
998            parts.push("```".to_string());
999            parts.push(String::new());
1000            parts.push("### Recent Activity".to_string());
1001            parts.push("```".to_string());
1002            parts.push(tail);
1003            parts.push("```".to_string());
1004
1005            parts.join("\n")
1006        } else {
1007            let (truncated, _, _) = Self::smart_truncate(output, tool_id, tool_args, 8000);
1008            format!(
1009                "## Fallback Summary\n*RLM processing failed - showing structured excerpt*\n\n{}",
1010                truncated
1011            )
1012        }
1013    }
1014
1015    fn fallback_result(
1016        output: &str,
1017        tool_id: &str,
1018        tool_args: &serde_json::Value,
1019        input_tokens: usize,
1020    ) -> RlmResult {
1021        let (truncated, _, _) = Self::smart_truncate(output, tool_id, tool_args, 8000);
1022        let output_tokens = RlmChunker::estimate_tokens(&truncated);
1023
1024        RlmResult {
1025            processed: format!(
1026                "[RLM processing failed, showing truncated output]\n\n{}",
1027                truncated
1028            ),
1029            stats: RlmStats {
1030                input_tokens,
1031                output_tokens,
1032                iterations: 0,
1033                subcalls: 0,
1034                elapsed_ms: 0,
1035                compression_ratio: input_tokens as f64 / output_tokens.max(1) as f64,
1036            },
1037            success: false,
1038            error: Some("Model call failed".to_string()),
1039            trace: None,
1040            trace_id: None,
1041        }
1042    }
1043}
1044
1045#[cfg(test)]
1046mod tests {
1047    use super::*;
1048
1049    #[test]
1050    fn should_route_large_webfetch_output() {
1051        let output = "a".repeat(200_000);
1052        let ctx = RoutingContext {
1053            tool_id: "webfetch".to_string(),
1054            session_id: "s".to_string(),
1055            call_id: None,
1056            model_context_limit: 200_000,
1057            current_context_tokens: Some(1000),
1058        };
1059        let cfg = RlmConfig::default();
1060        let result = RlmRouter::should_route(&output, &ctx, &cfg);
1061        assert!(result.should_route);
1062    }
1063
1064    #[test]
1065    fn should_route_large_output_for_unknown_tool_to_protect_context() {
1066        let output = "a".repeat(200_000);
1067        let ctx = RoutingContext {
1068            tool_id: "some_new_tool".to_string(),
1069            session_id: "s".to_string(),
1070            call_id: None,
1071            model_context_limit: 200_000,
1072            current_context_tokens: Some(1000),
1073        };
1074        let cfg = RlmConfig::default();
1075        let result = RlmRouter::should_route(&output, &ctx, &cfg);
1076        assert!(result.should_route);
1077        assert!(result.reason.contains("ineligible") || result.reason.contains("threshold"));
1078    }
1079}