Skip to main content

codetether_agent/rlm/
mod.rs

1//! Recursive Language Model (RLM) processing
2//!
3//! Handles large contexts that exceed model context windows by:
4//! 1. Loading context into a REPL environment as a variable
5//! 2. Having the LLM write code to analyze it
6//! 3. Supporting recursive sub-LM calls for semantic analysis
7//!
8//! Based on "Recursive Language Models" (Zhang et al. 2025)
9
10pub mod chunker;
11pub mod context_trace;
12pub mod oracle;
13pub mod repl;
14pub mod router;
15pub mod tools;
16
17pub use chunker::{Chunk, ChunkOptions, ContentType, RlmChunker};
18pub use context_trace::{ContextEvent, ContextTrace};
19pub use oracle::{
20    AstPayload, AstResult, FinalPayload, GeneratedQuery, GrepMatch, GrepOracle, GrepPayload,
21    GrepVerification, OracleResult, OracleTracePersistResult, OracleTraceRecord,
22    OracleTraceStorage, OracleTraceSyncStats, QueryTemplate, SemanticPayload, TemplateKind,
23    TraceStep, TraceValidator, TreeSitterOracle, TreeSitterVerification, ValidatedTrace,
24    VerificationMethod,
25};
26pub use repl::{ReplRuntime, RlmAnalysisResult, RlmExecutor, RlmRepl, SubQuery};
27pub use router::{RlmRouter, RoutingContext, RoutingResult};
28pub use tools::{RlmToolResult, dispatch_tool_call, rlm_tool_definitions};
29
30use serde::{Deserialize, Serialize};
31
32/// RLM processing statistics
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct RlmStats {
35    pub input_tokens: usize,
36    pub output_tokens: usize,
37    pub iterations: usize,
38    pub subcalls: usize,
39    pub elapsed_ms: u64,
40    pub compression_ratio: f64,
41}
42
43/// RLM processing result.
44///
45/// The `trace` field is populated when the caller supplied a
46/// [`crate::session::SessionBus`] (or otherwise opted in) so downstream
47/// consumers — the TUI `/rlm` view, the JSONL flywheel, trace-driven
48/// tuning jobs — can reconstruct the iteration-by-iteration behaviour
49/// of the loop after the fact.
50///
51/// `trace_id` is always generated for a run (even when no bus is
52/// attached) and is echoed in the matching
53/// [`crate::session::SessionEvent::RlmComplete`] event. Callers who
54/// supplied a bus can use it to correlate the durable completion
55/// record with this returned value.
56///
57/// # Examples
58///
59/// ```rust
60/// use codetether_agent::rlm::{RlmResult, RlmStats};
61///
62/// let r = RlmResult {
63///     processed: "summary".into(),
64///     stats: RlmStats::default(),
65///     success: true,
66///     error: None,
67///     trace: None,
68///     trace_id: None,
69/// };
70/// assert!(r.success);
71/// assert!(r.trace.is_none());
72/// ```
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct RlmResult {
75    /// The final text produced by the loop (summary or answer).
76    pub processed: String,
77    /// Aggregate statistics for the run.
78    pub stats: RlmStats,
79    /// `true` when the loop converged within its iteration budget.
80    pub success: bool,
81    /// Populated when `success` is `false` — a short diagnostic.
82    pub error: Option<String>,
83    /// Optional per-iteration event trace. Serialised only when present
84    /// so existing on-disk `RlmResult` records stay compatible.
85    #[serde(default, skip_serializing_if = "Option::is_none")]
86    pub trace: Option<context_trace::ContextTrace>,
87    /// Identifier echoed on the matching `RlmComplete` bus event.
88    /// `None` for on-disk records written before this field existed.
89    #[serde(default, skip_serializing_if = "Option::is_none")]
90    pub trace_id: Option<uuid::Uuid>,
91}
92
93/// RLM configuration
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct RlmConfig {
96    /// Mode: "auto", "off", or "always"
97    #[serde(default = "default_mode")]
98    pub mode: String,
99
100    /// Threshold ratio of context window to trigger RLM (0.0-1.0)
101    #[serde(default = "default_threshold")]
102    pub threshold: f64,
103
104    /// Maximum iterations for RLM processing.
105    ///
106    /// # Semantics
107    ///
108    /// An "iteration" is one full router step: system prompt + tools →
109    /// LLM round-trip → tool calls → results → next LLM round-trip.
110    /// The loop terminates in one of four ways, mapped directly to
111    /// [`RlmOutcome`](crate::session::RlmOutcome) on the emitted
112    /// [`RlmComplete`](crate::session::SessionEvent::RlmComplete) event:
113    ///
114    /// | Termination condition                                 | Outcome       |
115    /// |-------------------------------------------------------|---------------|
116    /// | Model emitted a `FINAL:` marker                        | `Converged`   |
117    /// | `max_iterations` reached without `FINAL:`              | `Exhausted`   |
118    /// | Provider or tool raised an error                       | `Failed`      |
119    /// | The caller's `AbortHandle` fired                       | `Aborted`     |
120    ///
121    /// `Exhausted` is **not** an error — the partial result is still
122    /// returned and the caller decides whether to retry with a higher
123    /// limit, fall back to chunk compression, or surface the partial
124    /// answer to the user. Session-level context compaction (see
125    /// [`crate::session::helper::compression`]) treats `Exhausted` the
126    /// same as success and re-uses the summary it produced.
127    #[serde(default = "default_max_iterations")]
128    pub max_iterations: usize,
129
130    /// Maximum recursive sub-calls
131    #[serde(default = "default_max_subcalls")]
132    pub max_subcalls: usize,
133
134    /// Preferred runtime: "rust", "bun", or "python"
135    #[serde(default = "default_runtime")]
136    pub runtime: String,
137
138    /// Model reference for root processing (provider:model)
139    pub root_model: Option<String>,
140
141    /// Model reference for subcalls (provider:model)
142    pub subcall_model: Option<String>,
143}
144
145fn default_mode() -> String {
146    "auto".to_string()
147}
148
149fn default_threshold() -> f64 {
150    0.35
151}
152
153fn default_max_iterations() -> usize {
154    15
155}
156
157fn default_max_subcalls() -> usize {
158    50
159}
160
161fn default_runtime() -> String {
162    "rust".to_string()
163}
164
165impl Default for RlmConfig {
166    fn default() -> Self {
167        Self {
168            mode: default_mode(),
169            threshold: default_threshold(),
170            max_iterations: default_max_iterations(),
171            max_subcalls: default_max_subcalls(),
172            runtime: default_runtime(),
173            root_model: None,
174            subcall_model: None,
175        }
176    }
177}