Skip to main content

codetether_agent/rlm/
mod.rs

1//! Recursive Language Model (RLM) processing
2//!
3//! Handles large contexts that exceed model context windows by:
4//! 1. Loading context into a REPL environment as a variable
5//! 2. Having the LLM write code to analyze it
6//! 3. Supporting recursive sub-LM calls for semantic analysis
7//!
8//! Based on "Recursive Language Models" (Zhang et al. 2025)
9
10pub mod chunker;
11pub mod context_trace;
12pub mod oracle;
13pub mod repl;
14pub mod router;
15pub mod tools;
16
17pub use chunker::{Chunk, ChunkOptions, ContentType, RlmChunker};
18pub use context_trace::{ContextEvent, ContextTrace};
19pub use oracle::{
20    AstPayload, AstResult, FinalPayload, GeneratedQuery, GrepMatch, GrepOracle, GrepPayload,
21    GrepVerification, OracleResult, OracleTracePersistResult, OracleTraceRecord,
22    OracleTraceStorage, OracleTraceSyncStats, QueryTemplate, SemanticPayload, TemplateKind,
23    TraceStep, TraceValidator, TreeSitterOracle, TreeSitterVerification, ValidatedTrace,
24    VerificationMethod,
25};
26pub use repl::{ReplRuntime, RlmAnalysisResult, RlmExecutor, RlmRepl, SubQuery};
27pub use router::{RlmRouter, RoutingContext, RoutingResult};
28pub use tools::{RlmToolResult, dispatch_tool_call, rlm_tool_definitions};
29
30use serde::{Deserialize, Serialize};
31
32/// RLM processing statistics
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct RlmStats {
35    pub input_tokens: usize,
36    pub output_tokens: usize,
37    pub iterations: usize,
38    pub subcalls: usize,
39    pub elapsed_ms: u64,
40    pub compression_ratio: f64,
41}
42
43/// RLM processing result.
44///
45/// The `trace` field is populated when the caller supplied a
46/// [`crate::session::SessionBus`] (or otherwise opted in) so downstream
47/// consumers — the TUI `/rlm` view, the JSONL flywheel, trace-driven
48/// tuning jobs — can reconstruct the iteration-by-iteration behaviour
49/// of the loop after the fact.
50///
51/// `trace_id` is always generated for a run (even when no bus is
52/// attached) and is echoed in the matching
53/// [`crate::session::SessionEvent::RlmComplete`] event. Callers who
54/// supplied a bus can use it to correlate the durable completion
55/// record with this returned value.
56///
57/// # Examples
58///
59/// ```rust
60/// use codetether_agent::rlm::{RlmResult, RlmStats};
61///
62/// let r = RlmResult {
63///     processed: "summary".into(),
64///     stats: RlmStats::default(),
65///     success: true,
66///     error: None,
67///     trace: None,
68///     trace_id: None,
69/// };
70/// assert!(r.success);
71/// assert!(r.trace.is_none());
72/// ```
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct RlmResult {
75    /// The final text produced by the loop (summary or answer).
76    pub processed: String,
77    /// Aggregate statistics for the run.
78    pub stats: RlmStats,
79    /// `true` when the loop converged within its iteration budget.
80    pub success: bool,
81    /// Populated when `success` is `false` — a short diagnostic.
82    pub error: Option<String>,
83    /// Optional per-iteration event trace. Serialised only when present
84    /// so existing on-disk `RlmResult` records stay compatible.
85    #[serde(default, skip_serializing_if = "Option::is_none")]
86    pub trace: Option<context_trace::ContextTrace>,
87    /// Identifier echoed on the matching `RlmComplete` bus event.
88    /// `None` for on-disk records written before this field existed.
89    #[serde(default, skip_serializing_if = "Option::is_none")]
90    pub trace_id: Option<uuid::Uuid>,
91}
92
93/// RLM configuration
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct RlmConfig {
96    /// Mode: "auto", "off", or "always"
97    #[serde(default = "default_mode")]
98    pub mode: String,
99
100    /// Threshold ratio of context window to trigger RLM (0.0-1.0)
101    #[serde(default = "default_threshold")]
102    pub threshold: f64,
103
104    /// Maximum iterations for RLM processing.
105    ///
106    /// # Semantics
107    ///
108    /// An "iteration" is one full router step: system prompt + tools →
109    /// LLM round-trip → tool calls → results → next LLM round-trip.
110    /// The loop terminates in one of four ways, mapped directly to
111    /// [`RlmOutcome`](crate::session::RlmOutcome) on the emitted
112    /// [`RlmComplete`](crate::session::SessionEvent::RlmComplete) event:
113    ///
114    /// | Termination condition                                 | Outcome       |
115    /// |-------------------------------------------------------|---------------|
116    /// | Model emitted a `FINAL:` marker                        | `Converged`   |
117    /// | `max_iterations` reached without `FINAL:`              | `Exhausted`   |
118    /// | Provider or tool raised an error                       | `Failed`      |
119    /// | The caller's `AbortHandle` fired                       | `Aborted`     |
120    ///
121    /// `Exhausted` is **not** an error — the partial result is still
122    /// returned and the caller decides whether to retry with a higher
123    /// limit, fall back to chunk compression, or surface the partial
124    /// answer to the user. Session-level context compaction (see
125    /// [`crate::session::helper::compression`]) treats `Exhausted` the
126    /// same as success and re-uses the summary it produced.
127    #[serde(default = "default_max_iterations")]
128    pub max_iterations: usize,
129
130    /// Maximum recursive sub-calls
131    #[serde(default = "default_max_subcalls")]
132    pub max_subcalls: usize,
133
134    /// Preferred runtime: "rust", "bun", or "python"
135    #[serde(default = "default_runtime")]
136    pub runtime: String,
137
138    /// Model reference for root processing (provider:model)
139    pub root_model: Option<String>,
140
141    /// Model reference for subcalls (provider:model)
142    pub subcall_model: Option<String>,
143
144    /// Trigger RLM compaction once the stored session history reaches
145    /// this many messages, regardless of the token-budget estimate.
146    ///
147    /// This is a belt-and-braces trigger for cases where the token
148    /// estimator under-counts (e.g. large tool outputs, image parts,
149    /// or provider-specific protocol framing). Set to `0` to disable.
150    #[serde(default = "default_history_trigger_messages")]
151    pub history_trigger_messages: usize,
152}
153
154fn default_mode() -> String {
155    "auto".to_string()
156}
157
158fn default_threshold() -> f64 {
159    0.35
160}
161
162fn default_max_iterations() -> usize {
163    15
164}
165
166fn default_max_subcalls() -> usize {
167    50
168}
169
170fn default_runtime() -> String {
171    "rust".to_string()
172}
173
174/// Default number of stored messages that triggers RLM compaction
175/// independently of the token-budget check.
176fn default_history_trigger_messages() -> usize {
177    60
178}
179
180impl Default for RlmConfig {
181    fn default() -> Self {
182        Self {
183            mode: default_mode(),
184            threshold: default_threshold(),
185            max_iterations: default_max_iterations(),
186            max_subcalls: default_max_subcalls(),
187            runtime: default_runtime(),
188            root_model: None,
189            subcall_model: None,
190            history_trigger_messages: default_history_trigger_messages(),
191        }
192    }
193}