codetether_agent/session/relevance.rs
1//! Per-entry relevance metadata and CADMAS-CTX bucket projection.
2//!
3//! ## Phase B foundation
4//!
5//! The Liu et al. paper (arXiv:2512.22087) calls for a per-entry
6//! sidecar of relevance signals that an incremental derivation policy
7//! can score against the current task. The CADMAS-CTX paper
8//! (arXiv:2604.17950) independently needs a coarse **context bucket**
9//! `z = (difficulty, dependency, tool_use)` to key its per-(agent,
10//! skill, bucket) posteriors.
11//!
12//! Both consumers share ~80 % of the extraction work — file paths,
13//! tool names, error-class markers — so this module emits a single
14//! [`RelevanceMeta`] that projects down to a [`Bucket`] via
15//! [`RelevanceMeta::project_bucket`]. Phase B's `DerivePolicy::Incremental`
16//! and Phase C's `DelegationState` both read from the same sidecar.
17//!
18//! ## Scope in Phase B step 15
19//!
20//! Extraction is **pure and syntactic** — no LLM calls, no IO. Heuristics:
21//!
22//! * `files`: regex over text parts for path-like tokens.
23//! * `tools`: names of `ToolCall` / `ToolResult` content parts.
24//! * `error_classes`: leading tokens of common error markers
25//! (`Error:`, `error[E`, `failed`, `panicked`, `traceback`).
26//! * `explicit_refs`: left for future turns-N-reference extraction
27//! (empty in this first cut).
28//!
29//! Keeping it syntactic means the extractor can run in the append hot
30//! path (`Session::add_message`) without blocking.
31//!
32//! ## Examples
33//!
34//! ```rust
35//! use codetether_agent::provider::{ContentPart, Message, Role};
36//! use codetether_agent::session::relevance::{Bucket, Dependency, Difficulty, RelevanceMeta, ToolUse, extract};
37//!
38//! let msg = Message {
39//! role: Role::Assistant,
40//! content: vec![ContentPart::Text {
41//! text: "Edited src/lib.rs and tests/smoke.rs".to_string(),
42//! }],
43//! };
44//! let meta: RelevanceMeta = extract(&msg);
45//! assert_eq!(meta.files.len(), 2);
46//!
47//! let bucket: Bucket = meta.project_bucket();
48//! assert_eq!(bucket.tool_use, ToolUse::No);
49//! assert_eq!(bucket.dependency, Dependency::Chained);
50//! assert_eq!(bucket.difficulty, Difficulty::Easy);
51//! ```
52
53use serde::{Deserialize, Serialize};
54
55use crate::provider::{ContentPart, Message};
56
57/// Per-entry syntactic relevance signals.
58///
59/// Parallel-array friendly: one [`RelevanceMeta`] per entry in
60/// [`Session::messages`](crate::session::Session). Lives in a
61/// `<session-id>.relevance.jsonl` sidecar once wired into
62/// `Session::save` (a future commit).
63#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
64pub struct RelevanceMeta {
65 /// Path-like tokens found in the message's text parts.
66 #[serde(default)]
67 pub files: Vec<String>,
68 /// Names of any `ToolCall` or `ToolResult` content parts.
69 #[serde(default)]
70 pub tools: Vec<String>,
71 /// Short error-class tags surfaced by common error markers
72 /// (`Error:`, `error[E`, `failed`, `panicked`, `Traceback`).
73 #[serde(default)]
74 pub error_classes: Vec<String>,
75 /// Message indices this entry explicitly references
76 /// (reserved for future N-back extraction; empty in Phase B v1).
77 #[serde(default)]
78 pub explicit_refs: Vec<usize>,
79}
80
81/// CADMAS-CTX difficulty axis.
82#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
83#[serde(rename_all = "snake_case")]
84pub enum Difficulty {
85 Easy,
86 Medium,
87 Hard,
88}
89
90impl Difficulty {
91 /// Stable snake_case encoding. Never renamed — used as part of the
92 /// persisted [`crate::session::delegation::DelegationState`] key.
93 pub const fn as_str(self) -> &'static str {
94 match self {
95 Difficulty::Easy => "easy",
96 Difficulty::Medium => "medium",
97 Difficulty::Hard => "hard",
98 }
99 }
100}
101
102/// CADMAS-CTX dependency axis.
103#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
104#[serde(rename_all = "snake_case")]
105pub enum Dependency {
106 /// Single file or a small set of files in the same module.
107 Isolated,
108 /// Cross-module / multi-file reach.
109 Chained,
110}
111
112impl Dependency {
113 /// Stable snake_case encoding — see [`Difficulty::as_str`].
114 pub const fn as_str(self) -> &'static str {
115 match self {
116 Dependency::Isolated => "isolated",
117 Dependency::Chained => "chained",
118 }
119 }
120}
121
122/// CADMAS-CTX tool-use axis.
123#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
124#[serde(rename_all = "snake_case")]
125pub enum ToolUse {
126 No,
127 Yes,
128}
129
130impl ToolUse {
131 /// Stable snake_case encoding — see [`Difficulty::as_str`].
132 pub const fn as_str(self) -> &'static str {
133 match self {
134 ToolUse::No => "no",
135 ToolUse::Yes => "yes",
136 }
137 }
138}
139
140/// Coarse context bucket — CADMAS-CTX Section 3.1.
141///
142/// Start with 3–4 active cells in practice (the paper shows over-
143/// bucketing hurts — bias-variance collapses at 12 cells on GAIA).
144#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
145pub struct Bucket {
146 pub difficulty: Difficulty,
147 pub dependency: Dependency,
148 pub tool_use: ToolUse,
149}
150
151impl RelevanceMeta {
152 /// Project the relevance signals onto a coarse CADMAS-CTX bucket.
153 ///
154 /// Heuristic (Phase B v1):
155 ///
156 /// | Bucket field | Rule |
157 /// |------------------|-----------------------------------------------------|
158 /// | `tool_use` | `Yes` when [`Self::tools`] is non-empty |
159 /// | `dependency` | `Chained` when ≥ 2 files, or any path has `/` |
160 /// | `difficulty` | errors-per-entry ladder: 0 → Easy, 1–2 → Medium, ≥3 → Hard |
161 ///
162 /// # Examples
163 ///
164 /// ```rust
165 /// use codetether_agent::session::relevance::{
166 /// Dependency, Difficulty, RelevanceMeta, ToolUse,
167 /// };
168 ///
169 /// let meta = RelevanceMeta {
170 /// files: vec!["src/a.rs".into(), "tests/b.rs".into()],
171 /// tools: vec!["Shell".into()],
172 /// error_classes: vec!["Error:".into(), "panicked".into(), "failed".into()],
173 /// explicit_refs: Vec::new(),
174 /// };
175 /// let bucket = meta.project_bucket();
176 /// assert_eq!(bucket.tool_use, ToolUse::Yes);
177 /// assert_eq!(bucket.dependency, Dependency::Chained);
178 /// assert_eq!(bucket.difficulty, Difficulty::Hard);
179 /// ```
180 pub fn project_bucket(&self) -> Bucket {
181 let tool_use = if self.tools.is_empty() {
182 ToolUse::No
183 } else {
184 ToolUse::Yes
185 };
186 let dependency = if self.files.len() >= 2 || self.files.iter().any(|f| f.contains('/')) {
187 Dependency::Chained
188 } else {
189 Dependency::Isolated
190 };
191 let difficulty = match self.error_classes.len() {
192 0 => Difficulty::Easy,
193 1 | 2 => Difficulty::Medium,
194 _ => Difficulty::Hard,
195 };
196 Bucket {
197 difficulty,
198 dependency,
199 tool_use,
200 }
201 }
202}
203
204/// Short list of error-marker prefixes (lower-cased) we match literally.
205///
206/// Kept tiny and conservative — false positives in Phase C's delegation
207/// posteriors are more expensive than false negatives.
208const ERROR_MARKERS: &[&str] = &[
209 "error:",
210 "error[e",
211 "failed",
212 "panicked",
213 "traceback",
214 "stack trace",
215];
216
217/// Extract [`RelevanceMeta`] for a single chat-history entry.
218///
219/// Pure and fast: no LLM calls, no IO. Safe to call from
220/// [`Session::add_message`](crate::session::Session::add_message).
221pub fn extract(msg: &Message) -> RelevanceMeta {
222 let mut meta = RelevanceMeta::default();
223 for part in &msg.content {
224 match part {
225 ContentPart::Text { text } => {
226 append_files(text, &mut meta.files);
227 append_error_classes(text, &mut meta.error_classes);
228 }
229 ContentPart::ToolCall { name, .. } => {
230 if !meta.tools.contains(name) {
231 meta.tools.push(name.clone());
232 }
233 }
234 ContentPart::ToolResult { content, .. } => {
235 append_error_classes(content, &mut meta.error_classes);
236 }
237 _ => {}
238 }
239 }
240 dedupe_preserving_order(&mut meta.files);
241 dedupe_preserving_order(&mut meta.error_classes);
242 meta
243}
244
245/// Extract path-like tokens from `text` and append unique ones to `out`.
246///
247/// Heuristic: tokens that look like filesystem paths (contain `/` but
248/// not `://`, so URLs are excluded) or end with a common source-file
249/// extension. Intentionally conservative — this feeds
250/// [`Bucket`] projection, so false positives directly harm delegation
251/// calibration (over-reporting `Chained` dependency).
252fn append_files(text: &str, out: &mut Vec<String>) {
253 for raw in text.split(|c: char| c.is_whitespace() || matches!(c, ',' | ';' | '(' | ')' | '`')) {
254 let trimmed = raw.trim_matches(|c: char| matches!(c, '"' | '\'' | '.'));
255 if trimmed.is_empty() || trimmed.len() < 3 {
256 continue;
257 }
258 let looks_like_path =
259 (trimmed.contains('/') && !trimmed.contains("://") && trimmed.len() > 3)
260 || ends_with_source_ext(trimmed);
261 if looks_like_path && !out.contains(&trimmed.to_string()) {
262 out.push(trimmed.to_string());
263 }
264 }
265}
266
267fn ends_with_source_ext(s: &str) -> bool {
268 [
269 ".rs", ".ts", ".tsx", ".js", ".jsx", ".py", ".go", ".md", ".json", ".toml", ".yaml",
270 ".yml", ".html", ".css", ".c", ".cpp", ".h", ".hpp",
271 ]
272 .iter()
273 .any(|ext| s.ends_with(ext))
274}
275
276fn append_error_classes(text: &str, out: &mut Vec<String>) {
277 let lower = text.to_lowercase();
278 for marker in ERROR_MARKERS {
279 if lower.contains(marker) {
280 let tag = marker.trim_end_matches(':').to_string();
281 if !out.contains(&tag) {
282 out.push(tag);
283 }
284 }
285 }
286}
287
288fn dedupe_preserving_order(items: &mut Vec<String>) {
289 let mut seen: std::collections::HashSet<String> =
290 std::collections::HashSet::with_capacity(items.len());
291 items.retain(|item| seen.insert(item.clone()));
292}
293
294#[cfg(test)]
295mod tests {
296 use super::*;
297 use crate::provider::{ContentPart, Role};
298
299 fn text(s: &str) -> Message {
300 Message {
301 role: Role::Assistant,
302 content: vec![ContentPart::Text {
303 text: s.to_string(),
304 }],
305 }
306 }
307
308 fn tool_call(name: &str) -> Message {
309 Message {
310 role: Role::Assistant,
311 content: vec![ContentPart::ToolCall {
312 id: "call-1".to_string(),
313 name: name.to_string(),
314 arguments: "{}".to_string(),
315 thought_signature: None,
316 }],
317 }
318 }
319
320 fn tool_result(body: &str) -> Message {
321 Message {
322 role: Role::Tool,
323 content: vec![ContentPart::ToolResult {
324 tool_call_id: "call-1".to_string(),
325 content: body.to_string(),
326 }],
327 }
328 }
329
330 #[test]
331 fn extract_picks_up_paths_and_dedupes() {
332 let meta = extract(&text(
333 "Edited src/lib.rs and src/lib.rs again, plus tests/a.rs",
334 ));
335 assert_eq!(meta.files.len(), 2);
336 assert!(meta.files.contains(&"src/lib.rs".to_string()));
337 assert!(meta.files.contains(&"tests/a.rs".to_string()));
338 }
339
340 #[test]
341 fn extract_recognises_source_extensions_without_slash() {
342 let meta = extract(&text("check lib.rs and index.tsx"));
343 assert!(meta.files.iter().any(|f| f == "lib.rs"));
344 assert!(meta.files.iter().any(|f| f == "index.tsx"));
345 }
346
347 #[test]
348 fn extract_captures_tool_names_from_tool_calls() {
349 let meta = extract(&tool_call("Shell"));
350 assert_eq!(meta.tools, vec!["Shell".to_string()]);
351 }
352
353 #[test]
354 fn extract_tags_error_markers_from_tool_results() {
355 let meta = extract(&tool_result(
356 "Error: file not found\n panicked at main.rs:12",
357 ));
358 assert!(meta.error_classes.contains(&"error".to_string()));
359 assert!(meta.error_classes.contains(&"panicked".to_string()));
360 }
361
362 #[test]
363 fn project_bucket_maps_axes_correctly() {
364 let meta = RelevanceMeta {
365 files: vec!["src/a.rs".into()],
366 tools: Vec::new(),
367 error_classes: Vec::new(),
368 explicit_refs: Vec::new(),
369 };
370 let bucket = meta.project_bucket();
371 assert_eq!(bucket.tool_use, ToolUse::No);
372 assert_eq!(bucket.dependency, Dependency::Chained); // single path contains '/'
373 assert_eq!(bucket.difficulty, Difficulty::Easy);
374 }
375
376 #[test]
377 fn project_bucket_escalates_difficulty_with_error_count() {
378 let meta = RelevanceMeta {
379 error_classes: vec!["error".into(), "failed".into(), "panicked".into()],
380 ..Default::default()
381 };
382 assert_eq!(meta.project_bucket().difficulty, Difficulty::Hard);
383 }
384}