codetether_agent/rlm/mod.rs
1//! Recursive Language Model (RLM) processing
2//!
3//! Handles large contexts that exceed model context windows by:
4//! 1. Loading context into a REPL environment as a variable
5//! 2. Having the LLM write code to analyze it
6//! 3. Supporting recursive sub-LM calls for semantic analysis
7//!
8//! Based on "Recursive Language Models" (Zhang et al. 2025)
9
10pub mod chunker;
11pub mod context_trace;
12pub mod oracle;
13pub mod repl;
14pub mod router;
15pub mod tools;
16
17pub use chunker::{Chunk, ChunkOptions, ContentType, RlmChunker};
18pub use context_trace::{ContextEvent, ContextTrace};
19pub use oracle::{
20 AstPayload, AstResult, FinalPayload, GeneratedQuery, GrepMatch, GrepOracle, GrepPayload,
21 GrepVerification, OracleResult, OracleTracePersistResult, OracleTraceRecord,
22 OracleTraceStorage, OracleTraceSyncStats, QueryTemplate, SemanticPayload, TemplateKind,
23 TraceStep, TraceValidator, TreeSitterOracle, TreeSitterVerification, ValidatedTrace,
24 VerificationMethod,
25};
26pub use repl::{ReplRuntime, RlmAnalysisResult, RlmExecutor, RlmRepl, SubQuery};
27pub use router::{RlmRouter, RoutingContext, RoutingResult};
28pub use tools::{RlmToolResult, dispatch_tool_call, rlm_tool_definitions};
29
30use serde::{Deserialize, Serialize};
31
32/// RLM processing statistics
33#[derive(Debug, Clone, Default, Serialize, Deserialize)]
34pub struct RlmStats {
35 pub input_tokens: usize,
36 pub output_tokens: usize,
37 pub iterations: usize,
38 pub subcalls: usize,
39 pub elapsed_ms: u64,
40 pub compression_ratio: f64,
41}
42
43/// RLM processing result.
44///
45/// The `trace` field is populated when the caller supplied a
46/// [`crate::session::SessionBus`] (or otherwise opted in) so downstream
47/// consumers — the TUI `/rlm` view, the JSONL flywheel, trace-driven
48/// tuning jobs — can reconstruct the iteration-by-iteration behaviour
49/// of the loop after the fact.
50///
51/// `trace_id` is always generated for a run (even when no bus is
52/// attached) and is echoed in the matching
53/// [`crate::session::SessionEvent::RlmComplete`] event. Callers who
54/// supplied a bus can use it to correlate the durable completion
55/// record with this returned value.
56///
57/// # Examples
58///
59/// ```rust
60/// use codetether_agent::rlm::{RlmResult, RlmStats};
61///
62/// let r = RlmResult {
63/// processed: "summary".into(),
64/// stats: RlmStats::default(),
65/// success: true,
66/// error: None,
67/// trace: None,
68/// trace_id: None,
69/// };
70/// assert!(r.success);
71/// assert!(r.trace.is_none());
72/// ```
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct RlmResult {
75 /// The final text produced by the loop (summary or answer).
76 pub processed: String,
77 /// Aggregate statistics for the run.
78 pub stats: RlmStats,
79 /// `true` when the loop converged within its iteration budget.
80 pub success: bool,
81 /// Populated when `success` is `false` — a short diagnostic.
82 pub error: Option<String>,
83 /// Optional per-iteration event trace. Serialised only when present
84 /// so existing on-disk `RlmResult` records stay compatible.
85 #[serde(default, skip_serializing_if = "Option::is_none")]
86 pub trace: Option<context_trace::ContextTrace>,
87 /// Identifier echoed on the matching `RlmComplete` bus event.
88 /// `None` for on-disk records written before this field existed.
89 #[serde(default, skip_serializing_if = "Option::is_none")]
90 pub trace_id: Option<uuid::Uuid>,
91}
92
93/// RLM configuration
94#[derive(Debug, Clone, Serialize, Deserialize)]
95pub struct RlmConfig {
96 /// Mode: "auto", "off", or "always"
97 #[serde(default = "default_mode")]
98 pub mode: String,
99
100 /// Threshold ratio of context window to trigger RLM (0.0-1.0)
101 #[serde(default = "default_threshold")]
102 pub threshold: f64,
103
104 /// Maximum iterations for RLM processing.
105 ///
106 /// # Semantics
107 ///
108 /// An "iteration" is one full router step: system prompt + tools →
109 /// LLM round-trip → tool calls → results → next LLM round-trip.
110 /// The loop terminates in one of four ways, mapped directly to
111 /// [`RlmOutcome`](crate::session::RlmOutcome) on the emitted
112 /// [`RlmComplete`](crate::session::SessionEvent::RlmComplete) event:
113 ///
114 /// | Termination condition | Outcome |
115 /// |-------------------------------------------------------|---------------|
116 /// | Model emitted a `FINAL:` marker | `Converged` |
117 /// | `max_iterations` reached without `FINAL:` | `Exhausted` |
118 /// | Provider or tool raised an error | `Failed` |
119 /// | The caller's `AbortHandle` fired | `Aborted` |
120 ///
121 /// `Exhausted` is **not** an error — the partial result is still
122 /// returned and the caller decides whether to retry with a higher
123 /// limit, fall back to chunk compression, or surface the partial
124 /// answer to the user. Session-level context compaction (see
125 /// [`crate::session::helper::compression`]) treats `Exhausted` the
126 /// same as success and re-uses the summary it produced.
127 #[serde(default = "default_max_iterations")]
128 pub max_iterations: usize,
129
130 /// Maximum recursive sub-calls
131 #[serde(default = "default_max_subcalls")]
132 pub max_subcalls: usize,
133
134 /// Preferred runtime: "rust", "bun", or "python"
135 #[serde(default = "default_runtime")]
136 pub runtime: String,
137
138 /// Model reference for root processing (provider:model)
139 pub root_model: Option<String>,
140
141 /// Model reference for subcalls (provider:model)
142 pub subcall_model: Option<String>,
143}
144
145fn default_mode() -> String {
146 "auto".to_string()
147}
148
149fn default_threshold() -> f64 {
150 0.35
151}
152
153fn default_max_iterations() -> usize {
154 15
155}
156
157fn default_max_subcalls() -> usize {
158 50
159}
160
161fn default_runtime() -> String {
162 "rust".to_string()
163}
164
165impl Default for RlmConfig {
166 fn default() -> Self {
167 Self {
168 mode: default_mode(),
169 threshold: default_threshold(),
170 max_iterations: default_max_iterations(),
171 max_subcalls: default_max_subcalls(),
172 runtime: default_runtime(),
173 root_model: None,
174 subcall_model: None,
175 }
176 }
177}