1use super::{RlmChunker, RlmConfig, RlmResult, RlmStats};
11use crate::provider::{CompletionRequest, ContentPart, Message, Provider, Role};
12use crate::rlm::context_trace::{ContextEvent, ContextTrace};
13use crate::session::{RlmCompletion, RlmOutcome, RlmProgressEvent, SessionBus, SessionEvent};
14use anyhow::Result;
15use serde::{Deserialize, Serialize};
16use std::collections::HashSet;
17use std::sync::Arc;
18use std::time::Instant;
19use tracing::{info, warn};
20use uuid::Uuid;
21
22use crate::cognition::tool_router::{ToolCallRouter, ToolRouterConfig};
23
24use super::tools::rlm_tool_definitions;
25
26fn rlm_eligible_tools() -> HashSet<&'static str> {
28 [
36 "webfetch",
38 "websearch",
39 "batch",
41 ]
42 .iter()
43 .copied()
44 .collect()
45}
46
47#[derive(Debug, Clone)]
49pub struct RoutingContext {
50 pub tool_id: String,
51 pub session_id: String,
52 pub call_id: Option<String>,
53 pub model_context_limit: usize,
54 pub current_context_tokens: Option<usize>,
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct RoutingResult {
60 pub should_route: bool,
61 pub reason: String,
62 pub estimated_tokens: usize,
63}
64
65pub struct AutoProcessContext<'a> {
89 pub tool_id: &'a str,
92 pub tool_args: serde_json::Value,
94 pub session_id: &'a str,
96 pub abort: Option<tokio::sync::watch::Receiver<bool>>,
99 pub on_progress: Option<Box<dyn Fn(ProcessProgress) + Send + Sync>>,
102 pub provider: Arc<dyn Provider>,
104 pub model: String,
106 pub bus: Option<SessionBus>,
109 pub trace_id: Option<Uuid>,
114 pub subcall_provider: Option<Arc<dyn Provider>>,
125 pub subcall_model: Option<String>,
129}
130
131#[derive(Debug, Clone)]
133pub struct ProcessProgress {
134 pub iteration: usize,
135 pub max_iterations: usize,
136 pub status: String,
137}
138
139pub struct RlmRouter;
141
142impl RlmRouter {
143 pub fn should_route(output: &str, ctx: &RoutingContext, config: &RlmConfig) -> RoutingResult {
145 let estimated_tokens = RlmChunker::estimate_tokens(output);
146
147 if config.mode == "off" {
149 return RoutingResult {
150 should_route: false,
151 reason: "rlm_mode_off".to_string(),
152 estimated_tokens,
153 };
154 }
155
156 if config.mode == "always" {
158 if !rlm_eligible_tools().contains(ctx.tool_id.as_str()) {
159 return RoutingResult {
160 should_route: false,
161 reason: "tool_not_eligible".to_string(),
162 estimated_tokens,
163 };
164 }
165 return RoutingResult {
166 should_route: true,
167 reason: "rlm_mode_always".to_string(),
168 estimated_tokens,
169 };
170 }
171
172 let eligible = rlm_eligible_tools().contains(ctx.tool_id.as_str())
174 || ctx.tool_id.as_str() == "session_context";
175
176 let threshold_tokens = (ctx.model_context_limit as f64 * config.threshold) as usize;
178 if estimated_tokens > threshold_tokens {
179 return RoutingResult {
180 should_route: true,
181 reason: if eligible {
182 "exceeds_threshold".to_string()
183 } else {
184 "exceeds_threshold_ineligible_tool".to_string()
185 },
186 estimated_tokens,
187 };
188 }
189
190 if let Some(current) = ctx.current_context_tokens {
206 let projected_total = current + estimated_tokens;
207 let overflow_limit = (ctx.model_context_limit as f64 * 0.8) as usize;
208 let output_dominates = estimated_tokens * 2 >= projected_total;
211 if projected_total > overflow_limit && output_dominates {
212 return RoutingResult {
213 should_route: true,
214 reason: if eligible {
215 "would_overflow".to_string()
216 } else {
217 "would_overflow_ineligible_tool".to_string()
218 },
219 estimated_tokens,
220 };
221 }
222 }
223
224 if !eligible {
228 return RoutingResult {
229 should_route: false,
230 reason: "tool_not_eligible".to_string(),
231 estimated_tokens,
232 };
233 }
234
235 RoutingResult {
236 should_route: false,
237 reason: "within_threshold".to_string(),
238 estimated_tokens,
239 }
240 }
241
242 pub fn smart_truncate(
244 output: &str,
245 tool_id: &str,
246 tool_args: &serde_json::Value,
247 max_tokens: usize,
248 ) -> (String, bool, usize) {
249 let estimated_tokens = RlmChunker::estimate_tokens(output);
250
251 if estimated_tokens <= max_tokens {
252 return (output.to_string(), false, estimated_tokens);
253 }
254
255 info!(
256 tool = tool_id,
257 original_tokens = estimated_tokens,
258 max_tokens,
259 "Smart truncating large output"
260 );
261
262 let max_chars = max_tokens * 4;
264 let head_chars = (max_chars as f64 * 0.6) as usize;
265 let tail_chars = (max_chars as f64 * 0.3) as usize;
266
267 let head: String = output.chars().take(head_chars).collect();
268 let tail: String = output
269 .chars()
270 .rev()
271 .take(tail_chars)
272 .collect::<String>()
273 .chars()
274 .rev()
275 .collect();
276
277 let omitted_tokens = estimated_tokens
278 - RlmChunker::estimate_tokens(&head)
279 - RlmChunker::estimate_tokens(&tail);
280 let rlm_hint = Self::build_rlm_hint(tool_id, tool_args, estimated_tokens);
281
282 let truncated = format!(
283 "{}\n\n[... {} tokens truncated ...]\n\n{}\n\n{}",
284 head, omitted_tokens, rlm_hint, tail
285 );
286
287 (truncated, true, estimated_tokens)
288 }
289
290 fn build_rlm_hint(tool_id: &str, args: &serde_json::Value, tokens: usize) -> String {
291 let base = format!(
292 "⚠️ OUTPUT TOO LARGE ({} tokens). Use RLM for full analysis:",
293 tokens
294 );
295
296 match tool_id {
297 "read" => {
298 let path = args
299 .get("filePath")
300 .and_then(|v| v.as_str())
301 .unwrap_or("...");
302 format!(
303 "{}\n```\nrlm({{ query: \"Analyze this file\", content_paths: [\"{}\"] }})\n```",
304 base, path
305 )
306 }
307 "bash" => {
308 format!(
309 "{}\n```\nrlm({{ query: \"Analyze this command output\", content: \"<paste or use content_paths>\" }})\n```",
310 base
311 )
312 }
313 "grep" => {
314 let pattern = args
315 .get("pattern")
316 .and_then(|v| v.as_str())
317 .unwrap_or("...");
318 let include = args.get("include").and_then(|v| v.as_str()).unwrap_or("*");
319 format!(
320 "{}\n```\nrlm({{ query: \"Summarize search results for {}\", content_glob: \"{}\" }})\n```",
321 base, pattern, include
322 )
323 }
324 _ => {
325 format!(
326 "{}\n```\nrlm({{ query: \"Summarize this output\", content: \"...\" }})\n```",
327 base
328 )
329 }
330 }
331 }
332
333 pub async fn auto_process(
343 output: &str,
344 ctx: AutoProcessContext<'_>,
345 config: &RlmConfig,
346 ) -> Result<RlmResult> {
347 let start = Instant::now();
348 let input_tokens = RlmChunker::estimate_tokens(output);
349
350 info!(
351 tool = ctx.tool_id,
352 input_tokens,
353 model = %ctx.model,
354 "RLM: Starting auto-processing"
355 );
356
357 let trace_id = ctx.trace_id.unwrap_or_else(Uuid::new_v4);
361 let trace_budget = input_tokens.saturating_mul(2).max(4096);
363 let mut trace = ContextTrace::new(trace_budget);
364 let mut aborted = false;
365
366 let tool_router: Option<ToolCallRouter> = {
369 let cfg = ToolRouterConfig::from_env();
370 ToolCallRouter::from_config(&cfg)
371 .inspect_err(|e| {
372 tracing::debug!(error = %e, "FunctionGemma router unavailable for RLM router");
373 })
374 .ok()
375 .flatten()
376 };
377
378 let tools = rlm_tool_definitions();
380
381 let content_type = RlmChunker::detect_content_type(output);
383 let content_hints = RlmChunker::get_processing_hints(content_type);
384
385 info!(content_type = ?content_type, tool = ctx.tool_id, "RLM: Content type detected");
386
387 let processed_output = if input_tokens > 50000 {
389 RlmChunker::compress(output, 40000, None)
390 } else {
391 output.to_string()
392 };
393
394 let mut repl =
396 super::repl::RlmRepl::new(processed_output.clone(), super::repl::ReplRuntime::Rust);
397
398 let base_query = Self::build_query_for_tool(ctx.tool_id, &ctx.tool_args);
400 let query = format!(
401 "{}\n\n## Content Analysis Hints\n{}",
402 base_query, content_hints
403 );
404
405 let system_prompt = Self::build_rlm_system_prompt(input_tokens, ctx.tool_id, &query);
407 trace.log_event(ContextEvent::SystemPrompt {
408 content: system_prompt.clone(),
409 tokens: ContextTrace::estimate_tokens(&system_prompt),
410 });
411
412 let max_iterations = config.max_iterations;
413 let max_subcalls = config.max_subcalls;
414 let mut iterations = 0;
415 let mut subcalls = 0;
416 let mut final_answer: Option<String> = None;
417
418 let exploration = Self::build_exploration_summary(&processed_output, input_tokens);
420
421 let mut conversation = vec![Message {
423 role: Role::User,
424 content: vec![ContentPart::Text {
425 text: format!(
426 "{}\n\nHere is the context exploration:\n```\n{}\n```\n\nNow analyze and answer the query.",
427 system_prompt, exploration
428 ),
429 }],
430 }];
431
432 for i in 0..max_iterations {
433 iterations = i + 1;
434 trace.next_iteration();
435
436 if let Some(bus) = ctx.bus.as_ref() {
437 bus.emit(SessionEvent::RlmProgress(RlmProgressEvent {
438 trace_id,
439 iteration: iterations,
440 max_iterations,
441 status: "running".to_string(),
442 }));
443 }
444
445 if let Some(ref progress) = ctx.on_progress {
446 progress(ProcessProgress {
447 iteration: iterations,
448 max_iterations,
449 status: "running".to_string(),
450 });
451 }
452
453 if let Some(ref abort) = ctx.abort
455 && *abort.borrow()
456 {
457 warn!("RLM: Processing aborted");
458 aborted = true;
459 break;
460 }
461
462 let (active_provider, active_model) =
464 if iterations > 1 && ctx.subcall_provider.is_some() {
465 (
466 Arc::clone(ctx.subcall_provider.as_ref().unwrap()),
467 ctx.subcall_model
468 .as_deref()
469 .unwrap_or(&ctx.model)
470 .to_string(),
471 )
472 } else {
473 (Arc::clone(&ctx.provider), ctx.model.clone())
474 };
475
476 let request = CompletionRequest {
477 messages: conversation.clone(),
478 tools: tools.clone(),
479 model: active_model.clone(),
480 temperature: Some(0.7),
481 top_p: None,
482 max_tokens: Some(4000),
483 stop: Vec::new(),
484 };
485
486 let response = match active_provider.complete(request).await {
488 Ok(r) => r,
489 Err(e) => {
490 warn!(error = %e, iteration = iterations, "RLM: Model call failed");
491 if iterations > 1 {
492 break; }
494 if let Some(bus) = ctx.bus.as_ref() {
495 bus.emit(SessionEvent::RlmComplete(RlmCompletion {
496 trace_id,
497 outcome: RlmOutcome::Failed,
498 iterations,
499 subcalls,
500 input_tokens,
501 output_tokens: 0,
502 elapsed_ms: start.elapsed().as_millis() as u64,
503 reason: Some(format!("provider call failed: {e}")),
504 root_model: ctx.model.clone(),
505 subcall_model_used: ctx.subcall_model.clone(),
506 }));
507 }
508 let mut fb =
509 Self::fallback_result(output, ctx.tool_id, &ctx.tool_args, input_tokens);
510 fb.trace = Some(trace);
511 fb.trace_id = Some(trace_id);
512 return Ok(fb);
513 }
514 };
515
516 let response = if let Some(ref router) = tool_router {
519 router.maybe_reformat(response, &tools, true).await
522 } else {
523 response
524 };
525
526 let tool_calls: Vec<(String, String, String)> = response
528 .message
529 .content
530 .iter()
531 .filter_map(|p| match p {
532 ContentPart::ToolCall {
533 id,
534 name,
535 arguments,
536 ..
537 } => Some((id.clone(), name.clone(), arguments.clone())),
538 _ => None,
539 })
540 .collect();
541
542 if !tool_calls.is_empty() {
543 info!(
544 count = tool_calls.len(),
545 iteration = iterations,
546 "RLM router: dispatching structured tool calls"
547 );
548
549 conversation.push(Message {
550 role: Role::Assistant,
551 content: response.message.content.clone(),
552 });
553
554 let mut tool_results: Vec<ContentPart> = Vec::new();
555
556 for (call_id, name, arguments) in &tool_calls {
557 trace.log_event(ContextEvent::ToolCall {
558 name: name.clone(),
559 arguments_preview: arguments.chars().take(200).collect(),
560 tokens: ContextTrace::estimate_tokens(arguments),
561 });
562 match super::tools::dispatch_tool_call(name, arguments, &mut repl) {
563 Some(super::tools::RlmToolResult::Final(answer)) => {
564 trace.log_event(ContextEvent::ToolResult {
565 tool_call_id: call_id.clone(),
566 result_preview: "FINAL received".to_string(),
567 tokens: 0,
568 });
569 final_answer = Some(answer);
570 tool_results.push(ContentPart::ToolResult {
571 tool_call_id: call_id.clone(),
572 content: "FINAL received".to_string(),
573 });
574 break;
575 }
576 Some(super::tools::RlmToolResult::Output(out)) => {
577 trace.log_event(ContextEvent::ToolResult {
578 tool_call_id: call_id.clone(),
579 result_preview: out.chars().take(200).collect(),
580 tokens: ContextTrace::estimate_tokens(&out),
581 });
582 tool_results.push(ContentPart::ToolResult {
583 tool_call_id: call_id.clone(),
584 content: out,
585 });
586 }
587 None => {
588 trace.log_event(ContextEvent::ToolResult {
589 tool_call_id: call_id.clone(),
590 result_preview: format!("Unknown tool: {name}"),
591 tokens: 0,
592 });
593 tool_results.push(ContentPart::ToolResult {
594 tool_call_id: call_id.clone(),
595 content: format!("Unknown tool: {name}"),
596 });
597 }
598 }
599 }
600
601 if !tool_results.is_empty() {
602 conversation.push(Message {
603 role: Role::Tool,
604 content: tool_results,
605 });
606 }
607
608 subcalls += 1;
609 if final_answer.is_some() || subcalls >= max_subcalls {
610 break;
611 }
612 continue;
613 }
614
615 let response_text: String = response
617 .message
618 .content
619 .iter()
620 .filter_map(|p| match p {
621 ContentPart::Text { text } => Some(text.clone()),
622 _ => None,
623 })
624 .collect::<Vec<_>>()
625 .join("\n");
626
627 info!(
628 iteration = iterations,
629 response_len = response_text.len(),
630 "RLM: Model response (text-only fallback)"
631 );
632 trace.log_event(ContextEvent::LlmQueryResult {
633 query: query.chars().take(120).collect(),
634 response_preview: response_text.chars().take(200).collect(),
635 tokens: ContextTrace::estimate_tokens(&response_text),
636 });
637
638 if let Some(answer) = Self::extract_final(&response_text) {
640 final_answer = Some(answer);
641 break;
642 }
643
644 if iterations >= 3 && response_text.len() > 500 && !response_text.contains("```") {
646 final_answer = Some(response_text.clone());
648 break;
649 }
650
651 conversation.push(Message {
653 role: Role::Assistant,
654 content: vec![ContentPart::Text {
655 text: response_text,
656 }],
657 });
658
659 conversation.push(Message {
661 role: Role::User,
662 content: vec![ContentPart::Text {
663 text: "Continue analysis. Call FINAL(\"your answer\") when ready.".to_string(),
664 }],
665 });
666
667 subcalls += 1;
668 if subcalls >= max_subcalls {
669 warn!(subcalls, max = max_subcalls, "RLM: Max subcalls reached");
670 break;
671 }
672 }
673
674 if let Some(ref progress) = ctx.on_progress {
675 progress(ProcessProgress {
676 iteration: iterations,
677 max_iterations,
678 status: "completed".to_string(),
679 });
680 }
681
682 let converged = final_answer.is_some();
684
685 let answer = final_answer.unwrap_or_else(|| {
687 warn!(
688 iterations,
689 subcalls, "RLM: No FINAL produced, using fallback"
690 );
691 Self::build_enhanced_fallback(output, ctx.tool_id, &ctx.tool_args, input_tokens)
692 });
693
694 let output_tokens = RlmChunker::estimate_tokens(&answer);
695 let compression_ratio = input_tokens as f64 / output_tokens.max(1) as f64;
696 let elapsed_ms = start.elapsed().as_millis() as u64;
697
698 let result = format!(
699 "[RLM: {} → {} tokens | {} iterations | {} sub-calls]\n\n{}",
700 input_tokens, output_tokens, iterations, subcalls, answer
701 );
702
703 info!(
704 input_tokens,
705 output_tokens,
706 iterations,
707 subcalls,
708 elapsed_ms,
709 compression_ratio = format!("{:.1}", compression_ratio),
710 "RLM: Processing complete"
711 );
712
713 trace.log_event(ContextEvent::Final {
714 answer: answer.chars().take(200).collect(),
715 tokens: output_tokens,
716 });
717
718 let outcome = if aborted {
720 RlmOutcome::Aborted
721 } else if converged {
722 RlmOutcome::Converged
723 } else {
724 RlmOutcome::Exhausted
725 };
726 let reason = match outcome {
727 RlmOutcome::Converged => None,
728 RlmOutcome::Aborted => Some("abort signal".to_string()),
729 RlmOutcome::Exhausted => Some(format!(
730 "no FINAL after {iterations} iterations / {subcalls} subcalls"
731 )),
732 RlmOutcome::Failed => None, };
734 if let Some(bus) = ctx.bus.as_ref() {
735 bus.emit(SessionEvent::RlmComplete(RlmCompletion {
736 trace_id,
737 outcome,
738 iterations,
739 subcalls,
740 input_tokens,
741 output_tokens,
742 elapsed_ms,
743 reason,
744 root_model: ctx.model.clone(),
745 subcall_model_used: ctx.subcall_model.clone(),
746 }));
747 }
748
749 Ok(RlmResult {
750 processed: result,
751 stats: RlmStats {
752 input_tokens,
753 output_tokens,
754 iterations,
755 subcalls,
756 elapsed_ms,
757 compression_ratio,
758 },
759 success: outcome.is_success(),
760 error: None,
761 trace: Some(trace),
762 trace_id: Some(trace_id),
763 })
764 }
765
766 fn extract_final(text: &str) -> Option<String> {
767 let patterns = [r#"FINAL\s*\(\s*["'`]"#, r#"FINAL!\s*\(\s*["'`]?"#];
769
770 for _pattern_start in patterns {
771 if let Some(start_idx) = text.find("FINAL") {
772 let after = &text[start_idx..];
773
774 if let Some(open_idx) = after.find(['"', '\'', '`']) {
776 let quote_char = after.chars().nth(open_idx)?;
777 let content_start = start_idx + open_idx + 1;
778
779 let content = &text[content_start..];
781 if let Some(close_idx) = content.find(quote_char) {
782 let answer = &content[..close_idx];
783 if !answer.is_empty() {
784 return Some(answer.to_string());
785 }
786 }
787 }
788 }
789 }
790
791 None
792 }
793
794 fn build_exploration_summary(content: &str, input_tokens: usize) -> String {
795 let lines: Vec<&str> = content.lines().collect();
796 let total_lines = lines.len();
797
798 let head: String = lines
799 .iter()
800 .take(30)
801 .copied()
802 .collect::<Vec<_>>()
803 .join("\n");
804 let tail: String = lines
805 .iter()
806 .rev()
807 .take(50)
808 .collect::<Vec<_>>()
809 .into_iter()
810 .rev()
811 .copied()
812 .collect::<Vec<_>>()
813 .join("\n");
814
815 format!(
816 "=== CONTEXT EXPLORATION ===\n\
817 Total: {} chars, {} lines, ~{} tokens\n\n\
818 === FIRST 30 LINES ===\n{}\n\n\
819 === LAST 50 LINES ===\n{}\n\
820 === END EXPLORATION ===",
821 content.len(),
822 total_lines,
823 input_tokens,
824 head,
825 tail
826 )
827 }
828
829 fn build_rlm_system_prompt(input_tokens: usize, tool_id: &str, query: &str) -> String {
830 let context_type = if tool_id == "session_context" {
831 "conversation history"
832 } else {
833 "tool output"
834 };
835
836 format!(
837 r#"You are tasked with analyzing large content that cannot fit in a normal context window.
838
839The content is a {} with {} total tokens.
840
841YOUR TASK: {}
842
843## Analysis Strategy
844
8451. First, examine the exploration (head + tail of content) to understand structure
8462. Identify the most important information for answering the query
8473. Focus on: errors, key decisions, file paths, recent activity
8484. Provide a concise but complete answer
849
850When ready, call FINAL("your detailed answer") with your findings.
851
852Be SPECIFIC - include actual file paths, function names, error messages. Generic summaries are not useful."#,
853 context_type, input_tokens, query
854 )
855 }
856
857 fn build_query_for_tool(tool_id: &str, args: &serde_json::Value) -> String {
858 match tool_id {
859 "read" => {
860 let path = args.get("filePath").and_then(|v| v.as_str()).unwrap_or("unknown");
861 format!("Summarize the key contents of file \"{}\". Focus on: structure, main functions/classes, important logic. Be concise.", path)
862 }
863 "bash" => {
864 "Summarize the command output. Extract key information, results, errors, warnings. Be concise.".to_string()
865 }
866 "grep" => {
867 let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("pattern");
868 format!("Summarize search results for \"{}\". Group by file, highlight most relevant matches. Be concise.", pattern)
869 }
870 "glob" => {
871 "Summarize the file listing. Group by directory, highlight important files. Be concise.".to_string()
872 }
873 "webfetch" => {
874 let url = args.get("url").and_then(|v| v.as_str()).unwrap_or("unknown");
875 format!(
876 "Extract and summarize the main human-readable content from this fetched web page (URL: {}). Ignore scripts, styles, navigation, cookie banners, and boilerplate. Preserve headings, lists, code blocks, and important links. Be concise.",
877 url
878 )
879 }
880 "websearch" => {
881 let q = args.get("query").and_then(|v| v.as_str()).unwrap_or("unknown");
882 format!(
883 "Summarize these web search results for query: \"{}\". List the most relevant results with titles and URLs, and note why each is relevant. Be concise.",
884 q
885 )
886 }
887 "batch" => {
888 "Summarize the batch tool output. Group by sub-call, highlight failures/errors first, then key results. Keep actionable details: URLs, file paths, command outputs, and next steps.".to_string()
889 }
890 "session_context" => {
891 r#"You are a CONTEXT MEMORY SYSTEM. Create a BRIEFING for an AI assistant to continue this conversation.
892
893CRITICAL: The assistant will ONLY see your briefing - it has NO memory of the conversation.
894
895## What to Extract
896
8971. **PRIMARY GOAL**: What is the user ultimately trying to achieve?
8982. **CURRENT STATE**: What has been accomplished? Current status?
8993. **LAST ACTIONS**: What just happened? (last 3-5 tool calls, their results)
9004. **ACTIVE FILES**: Which files were modified?
9015. **PENDING TASKS**: What remains to be done?
9026. **CRITICAL DETAILS**: File paths, error messages, specific values, decisions made
9037. **NEXT STEPS**: What should happen next?
904
905Be SPECIFIC with file paths, function names, error messages."#.to_string()
906 }
907 _ => "Summarize this output concisely, extracting the most important information.".to_string()
908 }
909 }
910
911 fn build_enhanced_fallback(
912 output: &str,
913 tool_id: &str,
914 tool_args: &serde_json::Value,
915 input_tokens: usize,
916 ) -> String {
917 let lines: Vec<&str> = output.lines().collect();
918
919 if tool_id == "session_context" {
920 let file_matches: Vec<&str> = lines
922 .iter()
923 .filter_map(|l| {
924 if l.contains(".ts")
925 || l.contains(".rs")
926 || l.contains(".py")
927 || l.contains(".json")
928 {
929 Some(*l)
930 } else {
931 None
932 }
933 })
934 .take(15)
935 .collect();
936
937 let tool_calls: Vec<&str> = lines
938 .iter()
939 .filter(|l| l.contains("[Tool "))
940 .take(10)
941 .copied()
942 .collect();
943
944 let errors: Vec<&str> = lines
945 .iter()
946 .filter(|l| {
947 l.to_lowercase().contains("error") || l.to_lowercase().contains("failed")
948 })
949 .take(5)
950 .copied()
951 .collect();
952
953 let head: String = lines
954 .iter()
955 .take(30)
956 .copied()
957 .collect::<Vec<_>>()
958 .join("\n");
959 let tail: String = lines
960 .iter()
961 .rev()
962 .take(80)
963 .collect::<Vec<_>>()
964 .into_iter()
965 .rev()
966 .copied()
967 .collect::<Vec<_>>()
968 .join("\n");
969
970 let mut parts = vec![
971 "## Context Summary (Fallback Mode)".to_string(),
972 format!(
973 "*Original: {} tokens - RLM processing produced insufficient output*",
974 input_tokens
975 ),
976 String::new(),
977 ];
978
979 if !file_matches.is_empty() {
980 parts.push(format!("**Files Mentioned:** {}", file_matches.len()));
981 }
982
983 if !tool_calls.is_empty() {
984 parts.push(format!("**Recent Tool Calls:** {}", tool_calls.join(", ")));
985 }
986
987 if !errors.is_empty() {
988 parts.push("**Recent Errors:**".to_string());
989 for e in errors {
990 parts.push(format!("- {}", e.chars().take(150).collect::<String>()));
991 }
992 }
993
994 parts.push(String::new());
995 parts.push("### Initial Request".to_string());
996 parts.push("```".to_string());
997 parts.push(head);
998 parts.push("```".to_string());
999 parts.push(String::new());
1000 parts.push("### Recent Activity".to_string());
1001 parts.push("```".to_string());
1002 parts.push(tail);
1003 parts.push("```".to_string());
1004
1005 parts.join("\n")
1006 } else {
1007 let (truncated, _, _) = Self::smart_truncate(output, tool_id, tool_args, 8000);
1008 format!(
1009 "## Fallback Summary\n*RLM processing failed - showing structured excerpt*\n\n{}",
1010 truncated
1011 )
1012 }
1013 }
1014
1015 fn fallback_result(
1016 output: &str,
1017 tool_id: &str,
1018 tool_args: &serde_json::Value,
1019 input_tokens: usize,
1020 ) -> RlmResult {
1021 let (truncated, _, _) = Self::smart_truncate(output, tool_id, tool_args, 8000);
1022 let output_tokens = RlmChunker::estimate_tokens(&truncated);
1023
1024 RlmResult {
1025 processed: format!(
1026 "[RLM processing failed, showing truncated output]\n\n{}",
1027 truncated
1028 ),
1029 stats: RlmStats {
1030 input_tokens,
1031 output_tokens,
1032 iterations: 0,
1033 subcalls: 0,
1034 elapsed_ms: 0,
1035 compression_ratio: input_tokens as f64 / output_tokens.max(1) as f64,
1036 },
1037 success: false,
1038 error: Some("Model call failed".to_string()),
1039 trace: None,
1040 trace_id: None,
1041 }
1042 }
1043}
1044
1045#[cfg(test)]
1046mod tests {
1047 use super::*;
1048
1049 #[test]
1050 fn should_route_large_webfetch_output() {
1051 let output = "a".repeat(200_000);
1052 let ctx = RoutingContext {
1053 tool_id: "webfetch".to_string(),
1054 session_id: "s".to_string(),
1055 call_id: None,
1056 model_context_limit: 200_000,
1057 current_context_tokens: Some(1000),
1058 };
1059 let cfg = RlmConfig::default();
1060 let result = RlmRouter::should_route(&output, &ctx, &cfg);
1061 assert!(result.should_route);
1062 }
1063
1064 #[test]
1065 fn should_route_large_output_for_unknown_tool_to_protect_context() {
1066 let output = "a".repeat(200_000);
1067 let ctx = RoutingContext {
1068 tool_id: "some_new_tool".to_string(),
1069 session_id: "s".to_string(),
1070 call_id: None,
1071 model_context_limit: 200_000,
1072 current_context_tokens: Some(1000),
1073 };
1074 let cfg = RlmConfig::default();
1075 let result = RlmRouter::should_route(&output, &ctx, &cfg);
1076 assert!(result.should_route);
1077 assert!(result.reason.contains("ineligible") || result.reason.contains("threshold"));
1078 }
1079}