1use super::{RlmChunker, RlmConfig, RlmResult, RlmStats};
11use crate::provider::{CompletionRequest, ContentPart, Message, Provider, Role};
12use crate::rlm::context_trace::{ContextEvent, ContextTrace};
13use crate::session::{RlmCompletion, RlmOutcome, RlmProgressEvent, SessionBus, SessionEvent};
14use anyhow::Result;
15use serde::{Deserialize, Serialize};
16use std::collections::HashSet;
17use std::sync::Arc;
18use std::time::Instant;
19use tracing::{info, warn};
20use uuid::Uuid;
21
22use crate::cognition::tool_router::{ToolCallRouter, ToolRouterConfig};
23
24use super::tools::rlm_tool_definitions;
25
26fn rlm_eligible_tools() -> HashSet<&'static str> {
28 [
32 "read",
33 "glob",
34 "grep",
35 "bash",
36 "search",
37 "tree",
39 "diff",
40 "headtail",
41 "webfetch",
43 "websearch",
44 "batch",
46 "codesearch",
48 ]
49 .iter()
50 .copied()
51 .collect()
52}
53
54#[derive(Debug, Clone)]
56pub struct RoutingContext {
57 pub tool_id: String,
58 pub session_id: String,
59 pub call_id: Option<String>,
60 pub model_context_limit: usize,
61 pub current_context_tokens: Option<usize>,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct RoutingResult {
67 pub should_route: bool,
68 pub reason: String,
69 pub estimated_tokens: usize,
70}
71
72pub struct AutoProcessContext<'a> {
96 pub tool_id: &'a str,
99 pub tool_args: serde_json::Value,
101 pub session_id: &'a str,
103 pub abort: Option<tokio::sync::watch::Receiver<bool>>,
106 pub on_progress: Option<Box<dyn Fn(ProcessProgress) + Send + Sync>>,
109 pub provider: Arc<dyn Provider>,
111 pub model: String,
113 pub bus: Option<SessionBus>,
116 pub trace_id: Option<Uuid>,
121 pub subcall_provider: Option<Arc<dyn Provider>>,
132 pub subcall_model: Option<String>,
136}
137
138#[derive(Debug, Clone)]
140pub struct ProcessProgress {
141 pub iteration: usize,
142 pub max_iterations: usize,
143 pub status: String,
144}
145
146pub struct RlmRouter;
148
149impl RlmRouter {
150 pub fn should_route(output: &str, ctx: &RoutingContext, config: &RlmConfig) -> RoutingResult {
152 let estimated_tokens = RlmChunker::estimate_tokens(output);
153
154 if config.mode == "off" {
156 return RoutingResult {
157 should_route: false,
158 reason: "rlm_mode_off".to_string(),
159 estimated_tokens,
160 };
161 }
162
163 if config.mode == "always" {
165 if !rlm_eligible_tools().contains(ctx.tool_id.as_str()) {
166 return RoutingResult {
167 should_route: false,
168 reason: "tool_not_eligible".to_string(),
169 estimated_tokens,
170 };
171 }
172 return RoutingResult {
173 should_route: true,
174 reason: "rlm_mode_always".to_string(),
175 estimated_tokens,
176 };
177 }
178
179 let eligible = rlm_eligible_tools().contains(ctx.tool_id.as_str())
181 || ctx.tool_id.as_str() == "session_context";
182
183 let threshold_tokens = (ctx.model_context_limit as f64 * config.threshold) as usize;
185 if estimated_tokens > threshold_tokens {
186 return RoutingResult {
187 should_route: true,
188 reason: if eligible {
189 "exceeds_threshold".to_string()
190 } else {
191 "exceeds_threshold_ineligible_tool".to_string()
192 },
193 estimated_tokens,
194 };
195 }
196
197 if let Some(current) = ctx.current_context_tokens {
199 let projected_total = current + estimated_tokens;
200 if projected_total > (ctx.model_context_limit as f64 * 0.8) as usize {
201 return RoutingResult {
202 should_route: true,
203 reason: if eligible {
204 "would_overflow".to_string()
205 } else {
206 "would_overflow_ineligible_tool".to_string()
207 },
208 estimated_tokens,
209 };
210 }
211 }
212
213 if !eligible {
217 return RoutingResult {
218 should_route: false,
219 reason: "tool_not_eligible".to_string(),
220 estimated_tokens,
221 };
222 }
223
224 RoutingResult {
225 should_route: false,
226 reason: "within_threshold".to_string(),
227 estimated_tokens,
228 }
229 }
230
231 pub fn smart_truncate(
233 output: &str,
234 tool_id: &str,
235 tool_args: &serde_json::Value,
236 max_tokens: usize,
237 ) -> (String, bool, usize) {
238 let estimated_tokens = RlmChunker::estimate_tokens(output);
239
240 if estimated_tokens <= max_tokens {
241 return (output.to_string(), false, estimated_tokens);
242 }
243
244 info!(
245 tool = tool_id,
246 original_tokens = estimated_tokens,
247 max_tokens,
248 "Smart truncating large output"
249 );
250
251 let max_chars = max_tokens * 4;
253 let head_chars = (max_chars as f64 * 0.6) as usize;
254 let tail_chars = (max_chars as f64 * 0.3) as usize;
255
256 let head: String = output.chars().take(head_chars).collect();
257 let tail: String = output
258 .chars()
259 .rev()
260 .take(tail_chars)
261 .collect::<String>()
262 .chars()
263 .rev()
264 .collect();
265
266 let omitted_tokens = estimated_tokens
267 - RlmChunker::estimate_tokens(&head)
268 - RlmChunker::estimate_tokens(&tail);
269 let rlm_hint = Self::build_rlm_hint(tool_id, tool_args, estimated_tokens);
270
271 let truncated = format!(
272 "{}\n\n[... {} tokens truncated ...]\n\n{}\n\n{}",
273 head, omitted_tokens, rlm_hint, tail
274 );
275
276 (truncated, true, estimated_tokens)
277 }
278
279 fn build_rlm_hint(tool_id: &str, args: &serde_json::Value, tokens: usize) -> String {
280 let base = format!(
281 "⚠️ OUTPUT TOO LARGE ({} tokens). Use RLM for full analysis:",
282 tokens
283 );
284
285 match tool_id {
286 "read" => {
287 let path = args
288 .get("filePath")
289 .and_then(|v| v.as_str())
290 .unwrap_or("...");
291 format!(
292 "{}\n```\nrlm({{ query: \"Analyze this file\", content_paths: [\"{}\"] }})\n```",
293 base, path
294 )
295 }
296 "bash" => {
297 format!(
298 "{}\n```\nrlm({{ query: \"Analyze this command output\", content: \"<paste or use content_paths>\" }})\n```",
299 base
300 )
301 }
302 "grep" => {
303 let pattern = args
304 .get("pattern")
305 .and_then(|v| v.as_str())
306 .unwrap_or("...");
307 let include = args.get("include").and_then(|v| v.as_str()).unwrap_or("*");
308 format!(
309 "{}\n```\nrlm({{ query: \"Summarize search results for {}\", content_glob: \"{}\" }})\n```",
310 base, pattern, include
311 )
312 }
313 _ => {
314 format!(
315 "{}\n```\nrlm({{ query: \"Summarize this output\", content: \"...\" }})\n```",
316 base
317 )
318 }
319 }
320 }
321
322 pub async fn auto_process(
332 output: &str,
333 ctx: AutoProcessContext<'_>,
334 config: &RlmConfig,
335 ) -> Result<RlmResult> {
336 let start = Instant::now();
337 let input_tokens = RlmChunker::estimate_tokens(output);
338
339 info!(
340 tool = ctx.tool_id,
341 input_tokens,
342 model = %ctx.model,
343 "RLM: Starting auto-processing"
344 );
345
346 let trace_id = ctx.trace_id.unwrap_or_else(Uuid::new_v4);
350 let trace_budget = input_tokens.saturating_mul(2).max(4096);
352 let mut trace = ContextTrace::new(trace_budget);
353 let mut aborted = false;
354
355 let tool_router: Option<ToolCallRouter> = {
358 let cfg = ToolRouterConfig::from_env();
359 ToolCallRouter::from_config(&cfg)
360 .inspect_err(|e| {
361 tracing::debug!(error = %e, "FunctionGemma router unavailable for RLM router");
362 })
363 .ok()
364 .flatten()
365 };
366
367 let tools = rlm_tool_definitions();
369
370 let content_type = RlmChunker::detect_content_type(output);
372 let content_hints = RlmChunker::get_processing_hints(content_type);
373
374 info!(content_type = ?content_type, tool = ctx.tool_id, "RLM: Content type detected");
375
376 let processed_output = if input_tokens > 50000 {
378 RlmChunker::compress(output, 40000, None)
379 } else {
380 output.to_string()
381 };
382
383 let mut repl =
385 super::repl::RlmRepl::new(processed_output.clone(), super::repl::ReplRuntime::Rust);
386
387 let base_query = Self::build_query_for_tool(ctx.tool_id, &ctx.tool_args);
389 let query = format!(
390 "{}\n\n## Content Analysis Hints\n{}",
391 base_query, content_hints
392 );
393
394 let system_prompt = Self::build_rlm_system_prompt(input_tokens, ctx.tool_id, &query);
396 trace.log_event(ContextEvent::SystemPrompt {
397 content: system_prompt.clone(),
398 tokens: ContextTrace::estimate_tokens(&system_prompt),
399 });
400
401 let max_iterations = config.max_iterations;
402 let max_subcalls = config.max_subcalls;
403 let mut iterations = 0;
404 let mut subcalls = 0;
405 let mut final_answer: Option<String> = None;
406
407 let exploration = Self::build_exploration_summary(&processed_output, input_tokens);
409
410 let mut conversation = vec![Message {
412 role: Role::User,
413 content: vec![ContentPart::Text {
414 text: format!(
415 "{}\n\nHere is the context exploration:\n```\n{}\n```\n\nNow analyze and answer the query.",
416 system_prompt, exploration
417 ),
418 }],
419 }];
420
421 for i in 0..max_iterations {
422 iterations = i + 1;
423 trace.next_iteration();
424
425 if let Some(bus) = ctx.bus.as_ref() {
426 bus.emit(SessionEvent::RlmProgress(RlmProgressEvent {
427 trace_id,
428 iteration: iterations,
429 max_iterations,
430 status: "running".to_string(),
431 }));
432 }
433
434 if let Some(ref progress) = ctx.on_progress {
435 progress(ProcessProgress {
436 iteration: iterations,
437 max_iterations,
438 status: "running".to_string(),
439 });
440 }
441
442 if let Some(ref abort) = ctx.abort
444 && *abort.borrow()
445 {
446 warn!("RLM: Processing aborted");
447 aborted = true;
448 break;
449 }
450
451 let (active_provider, active_model) =
453 if iterations > 1 && ctx.subcall_provider.is_some() {
454 (
455 Arc::clone(ctx.subcall_provider.as_ref().unwrap()),
456 ctx.subcall_model
457 .as_deref()
458 .unwrap_or(&ctx.model)
459 .to_string(),
460 )
461 } else {
462 (Arc::clone(&ctx.provider), ctx.model.clone())
463 };
464
465 let request = CompletionRequest {
466 messages: conversation.clone(),
467 tools: tools.clone(),
468 model: active_model.clone(),
469 temperature: Some(0.7),
470 top_p: None,
471 max_tokens: Some(4000),
472 stop: Vec::new(),
473 };
474
475 let response = match active_provider.complete(request).await {
477 Ok(r) => r,
478 Err(e) => {
479 warn!(error = %e, iteration = iterations, "RLM: Model call failed");
480 if iterations > 1 {
481 break; }
483 if let Some(bus) = ctx.bus.as_ref() {
484 bus.emit(SessionEvent::RlmComplete(RlmCompletion {
485 trace_id,
486 outcome: RlmOutcome::Failed,
487 iterations,
488 subcalls,
489 input_tokens,
490 output_tokens: 0,
491 elapsed_ms: start.elapsed().as_millis() as u64,
492 reason: Some(format!("provider call failed: {e}")),
493 root_model: ctx.model.clone(),
494 subcall_model_used: ctx.subcall_model.clone(),
495 }));
496 }
497 let mut fb =
498 Self::fallback_result(output, ctx.tool_id, &ctx.tool_args, input_tokens);
499 fb.trace = Some(trace);
500 fb.trace_id = Some(trace_id);
501 return Ok(fb);
502 }
503 };
504
505 let response = if let Some(ref router) = tool_router {
508 router.maybe_reformat(response, &tools, true).await
511 } else {
512 response
513 };
514
515 let tool_calls: Vec<(String, String, String)> = response
517 .message
518 .content
519 .iter()
520 .filter_map(|p| match p {
521 ContentPart::ToolCall {
522 id,
523 name,
524 arguments,
525 ..
526 } => Some((id.clone(), name.clone(), arguments.clone())),
527 _ => None,
528 })
529 .collect();
530
531 if !tool_calls.is_empty() {
532 info!(
533 count = tool_calls.len(),
534 iteration = iterations,
535 "RLM router: dispatching structured tool calls"
536 );
537
538 conversation.push(Message {
539 role: Role::Assistant,
540 content: response.message.content.clone(),
541 });
542
543 let mut tool_results: Vec<ContentPart> = Vec::new();
544
545 for (call_id, name, arguments) in &tool_calls {
546 trace.log_event(ContextEvent::ToolCall {
547 name: name.clone(),
548 arguments_preview: arguments.chars().take(200).collect(),
549 tokens: ContextTrace::estimate_tokens(arguments),
550 });
551 match super::tools::dispatch_tool_call(name, arguments, &mut repl) {
552 Some(super::tools::RlmToolResult::Final(answer)) => {
553 trace.log_event(ContextEvent::ToolResult {
554 tool_call_id: call_id.clone(),
555 result_preview: "FINAL received".to_string(),
556 tokens: 0,
557 });
558 final_answer = Some(answer);
559 tool_results.push(ContentPart::ToolResult {
560 tool_call_id: call_id.clone(),
561 content: "FINAL received".to_string(),
562 });
563 break;
564 }
565 Some(super::tools::RlmToolResult::Output(out)) => {
566 trace.log_event(ContextEvent::ToolResult {
567 tool_call_id: call_id.clone(),
568 result_preview: out.chars().take(200).collect(),
569 tokens: ContextTrace::estimate_tokens(&out),
570 });
571 tool_results.push(ContentPart::ToolResult {
572 tool_call_id: call_id.clone(),
573 content: out,
574 });
575 }
576 None => {
577 trace.log_event(ContextEvent::ToolResult {
578 tool_call_id: call_id.clone(),
579 result_preview: format!("Unknown tool: {name}"),
580 tokens: 0,
581 });
582 tool_results.push(ContentPart::ToolResult {
583 tool_call_id: call_id.clone(),
584 content: format!("Unknown tool: {name}"),
585 });
586 }
587 }
588 }
589
590 if !tool_results.is_empty() {
591 conversation.push(Message {
592 role: Role::Tool,
593 content: tool_results,
594 });
595 }
596
597 subcalls += 1;
598 if final_answer.is_some() || subcalls >= max_subcalls {
599 break;
600 }
601 continue;
602 }
603
604 let response_text: String = response
606 .message
607 .content
608 .iter()
609 .filter_map(|p| match p {
610 ContentPart::Text { text } => Some(text.clone()),
611 _ => None,
612 })
613 .collect::<Vec<_>>()
614 .join("\n");
615
616 info!(
617 iteration = iterations,
618 response_len = response_text.len(),
619 "RLM: Model response (text-only fallback)"
620 );
621 trace.log_event(ContextEvent::LlmQueryResult {
622 query: query.chars().take(120).collect(),
623 response_preview: response_text.chars().take(200).collect(),
624 tokens: ContextTrace::estimate_tokens(&response_text),
625 });
626
627 if let Some(answer) = Self::extract_final(&response_text) {
629 final_answer = Some(answer);
630 break;
631 }
632
633 if iterations >= 3 && response_text.len() > 500 && !response_text.contains("```") {
635 final_answer = Some(response_text.clone());
637 break;
638 }
639
640 conversation.push(Message {
642 role: Role::Assistant,
643 content: vec![ContentPart::Text {
644 text: response_text,
645 }],
646 });
647
648 conversation.push(Message {
650 role: Role::User,
651 content: vec![ContentPart::Text {
652 text: "Continue analysis. Call FINAL(\"your answer\") when ready.".to_string(),
653 }],
654 });
655
656 subcalls += 1;
657 if subcalls >= max_subcalls {
658 warn!(subcalls, max = max_subcalls, "RLM: Max subcalls reached");
659 break;
660 }
661 }
662
663 if let Some(ref progress) = ctx.on_progress {
664 progress(ProcessProgress {
665 iteration: iterations,
666 max_iterations,
667 status: "completed".to_string(),
668 });
669 }
670
671 let converged = final_answer.is_some();
673
674 let answer = final_answer.unwrap_or_else(|| {
676 warn!(
677 iterations,
678 subcalls, "RLM: No FINAL produced, using fallback"
679 );
680 Self::build_enhanced_fallback(output, ctx.tool_id, &ctx.tool_args, input_tokens)
681 });
682
683 let output_tokens = RlmChunker::estimate_tokens(&answer);
684 let compression_ratio = input_tokens as f64 / output_tokens.max(1) as f64;
685 let elapsed_ms = start.elapsed().as_millis() as u64;
686
687 let result = format!(
688 "[RLM: {} → {} tokens | {} iterations | {} sub-calls]\n\n{}",
689 input_tokens, output_tokens, iterations, subcalls, answer
690 );
691
692 info!(
693 input_tokens,
694 output_tokens,
695 iterations,
696 subcalls,
697 elapsed_ms,
698 compression_ratio = format!("{:.1}", compression_ratio),
699 "RLM: Processing complete"
700 );
701
702 trace.log_event(ContextEvent::Final {
703 answer: answer.chars().take(200).collect(),
704 tokens: output_tokens,
705 });
706
707 let outcome = if aborted {
709 RlmOutcome::Aborted
710 } else if converged {
711 RlmOutcome::Converged
712 } else {
713 RlmOutcome::Exhausted
714 };
715 let reason = match outcome {
716 RlmOutcome::Converged => None,
717 RlmOutcome::Aborted => Some("abort signal".to_string()),
718 RlmOutcome::Exhausted => Some(format!(
719 "no FINAL after {iterations} iterations / {subcalls} subcalls"
720 )),
721 RlmOutcome::Failed => None, };
723 if let Some(bus) = ctx.bus.as_ref() {
724 bus.emit(SessionEvent::RlmComplete(RlmCompletion {
725 trace_id,
726 outcome,
727 iterations,
728 subcalls,
729 input_tokens,
730 output_tokens,
731 elapsed_ms,
732 reason,
733 root_model: ctx.model.clone(),
734 subcall_model_used: ctx.subcall_model.clone(),
735 }));
736 }
737
738 Ok(RlmResult {
739 processed: result,
740 stats: RlmStats {
741 input_tokens,
742 output_tokens,
743 iterations,
744 subcalls,
745 elapsed_ms,
746 compression_ratio,
747 },
748 success: outcome.is_success(),
749 error: None,
750 trace: Some(trace),
751 trace_id: Some(trace_id),
752 })
753 }
754
755 fn extract_final(text: &str) -> Option<String> {
756 let patterns = [r#"FINAL\s*\(\s*["'`]"#, r#"FINAL!\s*\(\s*["'`]?"#];
758
759 for _pattern_start in patterns {
760 if let Some(start_idx) = text.find("FINAL") {
761 let after = &text[start_idx..];
762
763 if let Some(open_idx) = after.find(['"', '\'', '`']) {
765 let quote_char = after.chars().nth(open_idx)?;
766 let content_start = start_idx + open_idx + 1;
767
768 let content = &text[content_start..];
770 if let Some(close_idx) = content.find(quote_char) {
771 let answer = &content[..close_idx];
772 if !answer.is_empty() {
773 return Some(answer.to_string());
774 }
775 }
776 }
777 }
778 }
779
780 None
781 }
782
783 fn build_exploration_summary(content: &str, input_tokens: usize) -> String {
784 let lines: Vec<&str> = content.lines().collect();
785 let total_lines = lines.len();
786
787 let head: String = lines
788 .iter()
789 .take(30)
790 .copied()
791 .collect::<Vec<_>>()
792 .join("\n");
793 let tail: String = lines
794 .iter()
795 .rev()
796 .take(50)
797 .collect::<Vec<_>>()
798 .into_iter()
799 .rev()
800 .copied()
801 .collect::<Vec<_>>()
802 .join("\n");
803
804 format!(
805 "=== CONTEXT EXPLORATION ===\n\
806 Total: {} chars, {} lines, ~{} tokens\n\n\
807 === FIRST 30 LINES ===\n{}\n\n\
808 === LAST 50 LINES ===\n{}\n\
809 === END EXPLORATION ===",
810 content.len(),
811 total_lines,
812 input_tokens,
813 head,
814 tail
815 )
816 }
817
818 fn build_rlm_system_prompt(input_tokens: usize, tool_id: &str, query: &str) -> String {
819 let context_type = if tool_id == "session_context" {
820 "conversation history"
821 } else {
822 "tool output"
823 };
824
825 format!(
826 r#"You are tasked with analyzing large content that cannot fit in a normal context window.
827
828The content is a {} with {} total tokens.
829
830YOUR TASK: {}
831
832## Analysis Strategy
833
8341. First, examine the exploration (head + tail of content) to understand structure
8352. Identify the most important information for answering the query
8363. Focus on: errors, key decisions, file paths, recent activity
8374. Provide a concise but complete answer
838
839When ready, call FINAL("your detailed answer") with your findings.
840
841Be SPECIFIC - include actual file paths, function names, error messages. Generic summaries are not useful."#,
842 context_type, input_tokens, query
843 )
844 }
845
846 fn build_query_for_tool(tool_id: &str, args: &serde_json::Value) -> String {
847 match tool_id {
848 "read" => {
849 let path = args.get("filePath").and_then(|v| v.as_str()).unwrap_or("unknown");
850 format!("Summarize the key contents of file \"{}\". Focus on: structure, main functions/classes, important logic. Be concise.", path)
851 }
852 "bash" => {
853 "Summarize the command output. Extract key information, results, errors, warnings. Be concise.".to_string()
854 }
855 "grep" => {
856 let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("pattern");
857 format!("Summarize search results for \"{}\". Group by file, highlight most relevant matches. Be concise.", pattern)
858 }
859 "glob" => {
860 "Summarize the file listing. Group by directory, highlight important files. Be concise.".to_string()
861 }
862 "webfetch" => {
863 let url = args.get("url").and_then(|v| v.as_str()).unwrap_or("unknown");
864 format!(
865 "Extract and summarize the main human-readable content from this fetched web page (URL: {}). Ignore scripts, styles, navigation, cookie banners, and boilerplate. Preserve headings, lists, code blocks, and important links. Be concise.",
866 url
867 )
868 }
869 "websearch" => {
870 let q = args.get("query").and_then(|v| v.as_str()).unwrap_or("unknown");
871 format!(
872 "Summarize these web search results for query: \"{}\". List the most relevant results with titles and URLs, and note why each is relevant. Be concise.",
873 q
874 )
875 }
876 "batch" => {
877 "Summarize the batch tool output. Group by sub-call, highlight failures/errors first, then key results. Keep actionable details: URLs, file paths, command outputs, and next steps.".to_string()
878 }
879 "session_context" => {
880 r#"You are a CONTEXT MEMORY SYSTEM. Create a BRIEFING for an AI assistant to continue this conversation.
881
882CRITICAL: The assistant will ONLY see your briefing - it has NO memory of the conversation.
883
884## What to Extract
885
8861. **PRIMARY GOAL**: What is the user ultimately trying to achieve?
8872. **CURRENT STATE**: What has been accomplished? Current status?
8883. **LAST ACTIONS**: What just happened? (last 3-5 tool calls, their results)
8894. **ACTIVE FILES**: Which files were modified?
8905. **PENDING TASKS**: What remains to be done?
8916. **CRITICAL DETAILS**: File paths, error messages, specific values, decisions made
8927. **NEXT STEPS**: What should happen next?
893
894Be SPECIFIC with file paths, function names, error messages."#.to_string()
895 }
896 _ => "Summarize this output concisely, extracting the most important information.".to_string()
897 }
898 }
899
900 fn build_enhanced_fallback(
901 output: &str,
902 tool_id: &str,
903 tool_args: &serde_json::Value,
904 input_tokens: usize,
905 ) -> String {
906 let lines: Vec<&str> = output.lines().collect();
907
908 if tool_id == "session_context" {
909 let file_matches: Vec<&str> = lines
911 .iter()
912 .filter_map(|l| {
913 if l.contains(".ts")
914 || l.contains(".rs")
915 || l.contains(".py")
916 || l.contains(".json")
917 {
918 Some(*l)
919 } else {
920 None
921 }
922 })
923 .take(15)
924 .collect();
925
926 let tool_calls: Vec<&str> = lines
927 .iter()
928 .filter(|l| l.contains("[Tool "))
929 .take(10)
930 .copied()
931 .collect();
932
933 let errors: Vec<&str> = lines
934 .iter()
935 .filter(|l| {
936 l.to_lowercase().contains("error") || l.to_lowercase().contains("failed")
937 })
938 .take(5)
939 .copied()
940 .collect();
941
942 let head: String = lines
943 .iter()
944 .take(30)
945 .copied()
946 .collect::<Vec<_>>()
947 .join("\n");
948 let tail: String = lines
949 .iter()
950 .rev()
951 .take(80)
952 .collect::<Vec<_>>()
953 .into_iter()
954 .rev()
955 .copied()
956 .collect::<Vec<_>>()
957 .join("\n");
958
959 let mut parts = vec![
960 "## Context Summary (Fallback Mode)".to_string(),
961 format!(
962 "*Original: {} tokens - RLM processing produced insufficient output*",
963 input_tokens
964 ),
965 String::new(),
966 ];
967
968 if !file_matches.is_empty() {
969 parts.push(format!("**Files Mentioned:** {}", file_matches.len()));
970 }
971
972 if !tool_calls.is_empty() {
973 parts.push(format!("**Recent Tool Calls:** {}", tool_calls.join(", ")));
974 }
975
976 if !errors.is_empty() {
977 parts.push("**Recent Errors:**".to_string());
978 for e in errors {
979 parts.push(format!("- {}", e.chars().take(150).collect::<String>()));
980 }
981 }
982
983 parts.push(String::new());
984 parts.push("### Initial Request".to_string());
985 parts.push("```".to_string());
986 parts.push(head);
987 parts.push("```".to_string());
988 parts.push(String::new());
989 parts.push("### Recent Activity".to_string());
990 parts.push("```".to_string());
991 parts.push(tail);
992 parts.push("```".to_string());
993
994 parts.join("\n")
995 } else {
996 let (truncated, _, _) = Self::smart_truncate(output, tool_id, tool_args, 8000);
997 format!(
998 "## Fallback Summary\n*RLM processing failed - showing structured excerpt*\n\n{}",
999 truncated
1000 )
1001 }
1002 }
1003
1004 fn fallback_result(
1005 output: &str,
1006 tool_id: &str,
1007 tool_args: &serde_json::Value,
1008 input_tokens: usize,
1009 ) -> RlmResult {
1010 let (truncated, _, _) = Self::smart_truncate(output, tool_id, tool_args, 8000);
1011 let output_tokens = RlmChunker::estimate_tokens(&truncated);
1012
1013 RlmResult {
1014 processed: format!(
1015 "[RLM processing failed, showing truncated output]\n\n{}",
1016 truncated
1017 ),
1018 stats: RlmStats {
1019 input_tokens,
1020 output_tokens,
1021 iterations: 0,
1022 subcalls: 0,
1023 elapsed_ms: 0,
1024 compression_ratio: input_tokens as f64 / output_tokens.max(1) as f64,
1025 },
1026 success: false,
1027 error: Some("Model call failed".to_string()),
1028 trace: None,
1029 trace_id: None,
1030 }
1031 }
1032}
1033
1034#[cfg(test)]
1035mod tests {
1036 use super::*;
1037
1038 #[test]
1039 fn should_route_large_webfetch_output() {
1040 let output = "a".repeat(200_000);
1041 let ctx = RoutingContext {
1042 tool_id: "webfetch".to_string(),
1043 session_id: "s".to_string(),
1044 call_id: None,
1045 model_context_limit: 200_000,
1046 current_context_tokens: Some(1000),
1047 };
1048 let cfg = RlmConfig::default();
1049 let result = RlmRouter::should_route(&output, &ctx, &cfg);
1050 assert!(result.should_route);
1051 }
1052
1053 #[test]
1054 fn should_route_large_output_for_unknown_tool_to_protect_context() {
1055 let output = "a".repeat(200_000);
1056 let ctx = RoutingContext {
1057 tool_id: "some_new_tool".to_string(),
1058 session_id: "s".to_string(),
1059 call_id: None,
1060 model_context_limit: 200_000,
1061 current_context_tokens: Some(1000),
1062 };
1063 let cfg = RlmConfig::default();
1064 let result = RlmRouter::should_route(&output, &ctx, &cfg);
1065 assert!(result.should_route);
1066 assert!(result.reason.contains("ineligible") || result.reason.contains("threshold"));
1067 }
1068}