codetether_agent/rlm/mod.rs
1//! Recursive Language Model (RLM) processing
2//!
3//! Handles large contexts that exceed model context windows by:
4//! 1. Loading context into a REPL environment as a variable
5//! 2. Having the LLM write code to analyze it
6//! 3. Supporting recursive sub-LM calls for semantic analysis
7//!
8//! Based on "Recursive Language Models" (Zhang et al. 2025)
9
10pub mod chunker;
11pub mod context_trace;
12pub mod oracle;
13pub mod repl;
14pub mod router;
15pub mod tools;
16
17pub use chunker::{Chunk, ChunkOptions, ContentType, RlmChunker};
18pub use context_trace::{ContextEvent, ContextTrace};
19pub use oracle::{
20 AstPayload, AstResult, FinalPayload, GeneratedQuery, GrepMatch, GrepOracle, GrepPayload,
21 GrepVerification, OracleResult, OracleTracePersistResult, OracleTraceRecord,
22 OracleTraceStorage, OracleTraceSyncStats, QueryTemplate, SemanticPayload, TemplateKind,
23 TraceStep, TraceValidator, TreeSitterOracle, TreeSitterVerification, ValidatedTrace,
24 VerificationMethod,
25};
26pub use repl::{ReplRuntime, RlmAnalysisResult, RlmExecutor, RlmRepl, SubQuery};
27pub use router::{RlmRouter, RoutingContext, RoutingResult};
28pub use tools::{RlmToolResult, dispatch_tool_call, rlm_tool_definitions};
29
30use serde::{Deserialize, Serialize};
31
32/// RLM processing statistics
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct RlmStats {
35 pub input_tokens: usize,
36 pub output_tokens: usize,
37 pub iterations: usize,
38 pub subcalls: usize,
39 pub elapsed_ms: u64,
40 pub compression_ratio: f64,
41}
42
43/// RLM processing result.
44///
45/// The `trace` field is populated when the caller supplied a
46/// [`crate::session::SessionBus`] (or otherwise opted in) so downstream
47/// consumers — the TUI `/rlm` view, the JSONL flywheel, trace-driven
48/// tuning jobs — can reconstruct the iteration-by-iteration behaviour
49/// of the loop after the fact.
50///
51/// `trace_id` is always generated for a run (even when no bus is
52/// attached) and is echoed in the matching
53/// [`crate::session::SessionEvent::RlmComplete`] event. Callers who
54/// supplied a bus can use it to correlate the durable completion
55/// record with this returned value.
56///
57/// # Examples
58///
59/// ```rust
60/// use codetether_agent::rlm::{RlmResult, RlmStats};
61///
62/// let r = RlmResult {
63/// processed: "summary".into(),
64/// stats: RlmStats::default(),
65/// success: true,
66/// error: None,
67/// trace: None,
68/// trace_id: None,
69/// };
70/// assert!(r.success);
71/// assert!(r.trace.is_none());
72/// ```
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct RlmResult {
75 /// The final text produced by the loop (summary or answer).
76 pub processed: String,
77 /// Aggregate statistics for the run.
78 pub stats: RlmStats,
79 /// `true` when the loop converged within its iteration budget.
80 pub success: bool,
81 /// Populated when `success` is `false` — a short diagnostic.
82 pub error: Option<String>,
83 /// Optional per-iteration event trace. Serialised only when present
84 /// so existing on-disk `RlmResult` records stay compatible.
85 #[serde(default, skip_serializing_if = "Option::is_none")]
86 pub trace: Option<context_trace::ContextTrace>,
87 /// Identifier echoed on the matching `RlmComplete` bus event.
88 /// `None` for on-disk records written before this field existed.
89 #[serde(default, skip_serializing_if = "Option::is_none")]
90 pub trace_id: Option<uuid::Uuid>,
91}
92
93/// RLM configuration
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct RlmConfig {
96 /// Mode: "auto", "off", or "always"
97 #[serde(default = "default_mode")]
98 pub mode: String,
99
100 /// Threshold ratio of context window to trigger RLM (0.0-1.0)
101 #[serde(default = "default_threshold")]
102 pub threshold: f64,
103
104 /// Maximum iterations for RLM processing.
105 ///
106 /// # Semantics
107 ///
108 /// An "iteration" is one full router step: system prompt + tools →
109 /// LLM round-trip → tool calls → results → next LLM round-trip.
110 /// The loop terminates in one of four ways, mapped directly to
111 /// [`RlmOutcome`](crate::session::RlmOutcome) on the emitted
112 /// [`RlmComplete`](crate::session::SessionEvent::RlmComplete) event:
113 ///
114 /// | Termination condition | Outcome |
115 /// |-------------------------------------------------------|---------------|
116 /// | Model emitted a `FINAL:` marker | `Converged` |
117 /// | `max_iterations` reached without `FINAL:` | `Exhausted` |
118 /// | Provider or tool raised an error | `Failed` |
119 /// | The caller's `AbortHandle` fired | `Aborted` |
120 ///
121 /// `Exhausted` is **not** an error — the partial result is still
122 /// returned and the caller decides whether to retry with a higher
123 /// limit, fall back to chunk compression, or surface the partial
124 /// answer to the user. Session-level context compaction (see
125 /// [`crate::session::helper::compression`]) treats `Exhausted` the
126 /// same as success and re-uses the summary it produced.
127 #[serde(default = "default_max_iterations")]
128 pub max_iterations: usize,
129
130 /// Maximum recursive sub-calls
131 #[serde(default = "default_max_subcalls")]
132 pub max_subcalls: usize,
133
134 /// Preferred runtime: "rust", "bun", or "python"
135 #[serde(default = "default_runtime")]
136 pub runtime: String,
137
138 /// Model reference for root processing (provider:model)
139 pub root_model: Option<String>,
140
141 /// Model reference for subcalls (provider:model)
142 pub subcall_model: Option<String>,
143
144 /// Trigger RLM compaction once the stored session history reaches
145 /// this many messages, regardless of the token-budget estimate.
146 ///
147 /// This is a belt-and-braces trigger for cases where the token
148 /// estimator under-counts (e.g. large tool outputs, image parts,
149 /// or provider-specific protocol framing). Set to `0` to disable.
150 #[serde(default = "default_history_trigger_messages")]
151 pub history_trigger_messages: usize,
152}
153
154fn default_mode() -> String {
155 "auto".to_string()
156}
157
158fn default_threshold() -> f64 {
159 0.35
160}
161
162fn default_max_iterations() -> usize {
163 15
164}
165
166fn default_max_subcalls() -> usize {
167 50
168}
169
170fn default_runtime() -> String {
171 "rust".to_string()
172}
173
174/// Default number of stored messages that triggers RLM compaction
175/// independently of the token-budget check.
176fn default_history_trigger_messages() -> usize {
177 60
178}
179
180impl Default for RlmConfig {
181 fn default() -> Self {
182 Self {
183 mode: default_mode(),
184 threshold: default_threshold(),
185 max_iterations: default_max_iterations(),
186 max_subcalls: default_max_subcalls(),
187 runtime: default_runtime(),
188 root_model: None,
189 subcall_model: None,
190 history_trigger_messages: default_history_trigger_messages(),
191 }
192 }
193}