Skip to main content

truth_mirror/
context.rs

1//! Ground-truth constraints and conversation-trajectory context fed into review.
2//!
3//! Ground truth = inviolable constraints discovered from `TRUTH.md`/`AGENTS.md`/
4//! `CLAUDE.md` at any nested level plus OpenSpec specs. Trajectory = a bounded
5//! window of recent user/agent messages so the reviewer judges direction, not
6//! just the isolated diff.
7
8use std::{
9    fs,
10    path::{Path, PathBuf},
11};
12
13use anyhow::{Context, Result};
14use serde::Deserialize;
15
16use crate::config::{GroundTruthConfig, HistoryConfig};
17
18/// Directories never walked for ground-truth files.
19const SKIP_DIRS: &[&str] = &[".git", "target", "node_modules", ".truth-mirror"];
20
21/// Collect ground-truth constraints from the repo, shallowest (repo-root) first,
22/// bounded by `max_bytes`. Deterministic ordering (by depth then path) for tests.
23pub fn collect_ground_truth(repo_root: &Path, config: &GroundTruthConfig) -> Result<String> {
24    if !config.enabled {
25        return Ok(String::new());
26    }
27
28    let mut hits: Vec<(usize, PathBuf)> = Vec::new();
29    collect_files(repo_root, repo_root, config, 0, &mut hits)?;
30    // Shallowest first, then lexicographic — root AGENTS.md/TRUTH.md win the budget.
31    hits.sort_by(|(depth_a, path_a), (depth_b, path_b)| {
32        depth_a.cmp(depth_b).then_with(|| path_a.cmp(path_b))
33    });
34
35    let mut out = String::new();
36    for (_, path) in hits {
37        let rel = path.strip_prefix(repo_root).unwrap_or(&path);
38        let body = match fs::read_to_string(&path) {
39            Ok(body) => body,
40            Err(_) => continue,
41        };
42        let section = format!("### {}\n{}\n\n", rel.display(), body.trim());
43        if out.len() + section.len() > config.max_bytes {
44            let remaining = config.max_bytes.saturating_sub(out.len());
45            out.push_str(&truncate_on_char_boundary(&section, remaining));
46            break;
47        }
48        out.push_str(&section);
49    }
50
51    Ok(out.trim_end().to_owned())
52}
53
54fn collect_files(
55    repo_root: &Path,
56    dir: &Path,
57    config: &GroundTruthConfig,
58    depth: usize,
59    hits: &mut Vec<(usize, PathBuf)>,
60) -> Result<()> {
61    let entries = match fs::read_dir(dir) {
62        Ok(entries) => entries,
63        Err(_) => return Ok(()),
64    };
65
66    for entry in entries.flatten() {
67        let path = entry.path();
68        let name = entry.file_name().to_string_lossy().into_owned();
69        let file_type = match entry.file_type() {
70            Ok(file_type) => file_type,
71            Err(_) => continue,
72        };
73
74        if file_type.is_dir() {
75            if SKIP_DIRS.contains(&name.as_str()) {
76                continue;
77            }
78            collect_files(repo_root, &path, config, depth + 1, hits)?;
79        } else if is_ground_truth_file(repo_root, &path, &name, config) {
80            hits.push((depth, path));
81        }
82    }
83
84    Ok(())
85}
86
87fn is_ground_truth_file(
88    repo_root: &Path,
89    path: &Path,
90    name: &str,
91    config: &GroundTruthConfig,
92) -> bool {
93    if config.file_names.iter().any(|wanted| wanted == name) {
94        return true;
95    }
96
97    if config.include_openspec_specs
98        && name.ends_with(".md")
99        && let Ok(rel) = path.strip_prefix(repo_root)
100    {
101        let rel = rel.to_string_lossy();
102        return rel.starts_with("openspec/specs/");
103    }
104
105    false
106}
107
108#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq)]
109#[serde(rename_all = "lowercase")]
110pub enum Role {
111    User,
112    Agent,
113}
114
115#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
116pub struct TranscriptMessage {
117    pub role: Role,
118    pub text: String,
119}
120
121/// Yields recent conversation messages (oldest-first).
122pub trait TrajectoryProvider {
123    fn messages(&self) -> Result<Vec<TranscriptMessage>>;
124}
125
126/// Reads a `{"role":"user|agent","text":"..."}`-per-line transcript.
127#[derive(Clone, Debug)]
128pub struct JsonlTranscriptProvider {
129    pub path: PathBuf,
130}
131
132impl TrajectoryProvider for JsonlTranscriptProvider {
133    fn messages(&self) -> Result<Vec<TranscriptMessage>> {
134        let contents = match fs::read_to_string(&self.path) {
135            Ok(contents) => contents,
136            Err(error) if error.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
137            Err(error) => {
138                return Err(error)
139                    .with_context(|| format!("reading transcript {}", self.path.display()));
140            }
141        };
142
143        contents
144            .lines()
145            .filter(|line| !line.trim().is_empty())
146            .map(|line| {
147                serde_json::from_str::<TranscriptMessage>(line)
148                    .with_context(|| "parsing transcript line".to_string())
149            })
150            .collect()
151    }
152}
153
154/// Recent commit subjects as agent messages — a universal trajectory proxy when
155/// no explicit transcript is configured.
156#[derive(Clone, Debug)]
157pub struct GitLogProvider {
158    pub repo_root: PathBuf,
159    pub count: usize,
160}
161
162impl TrajectoryProvider for GitLogProvider {
163    fn messages(&self) -> Result<Vec<TranscriptMessage>> {
164        let output = std::process::Command::new("git")
165            .arg("-C")
166            .arg(&self.repo_root)
167            .args(["log", &format!("-n{}", self.count), "--format=%s"])
168            .output()
169            .context("running git log for trajectory")?;
170        if !output.status.success() {
171            return Ok(Vec::new());
172        }
173        let text = String::from_utf8_lossy(&output.stdout);
174        // git log is newest-first; reverse to oldest-first for chronological order.
175        let mut messages: Vec<TranscriptMessage> = text
176            .lines()
177            .filter(|line| !line.trim().is_empty())
178            .map(|line| TranscriptMessage {
179                role: Role::Agent,
180                text: format!("committed: {}", line.trim()),
181            })
182            .collect();
183        messages.reverse();
184        Ok(messages)
185    }
186}
187
188/// Pick a trajectory provider: the configured JSONL transcript if present, else
189/// recent commits.
190pub fn trajectory_provider(
191    repo_root: &Path,
192    history: &HistoryConfig,
193) -> Box<dyn TrajectoryProvider> {
194    if let Some(rel) = &history.transcript_path {
195        let path = repo_root.join(rel);
196        if path.is_file() {
197            return Box::new(JsonlTranscriptProvider { path });
198        }
199    }
200    Box::new(GitLogProvider {
201        repo_root: repo_root.to_path_buf(),
202        count: history.window_user + history.window_agent + 5,
203    })
204}
205
206/// Keep the last `window_user` user messages and last `window_agent` agent
207/// messages, preserving chronological order, then trim oldest to `max_bytes`.
208pub fn window_trajectory(
209    messages: &[TranscriptMessage],
210    window_user: usize,
211    window_agent: usize,
212    max_bytes: usize,
213) -> Vec<TranscriptMessage> {
214    let mut users = 0;
215    let mut agents = 0;
216    let mut kept: Vec<TranscriptMessage> = Vec::new();
217
218    for message in messages.iter().rev() {
219        let keep = match message.role {
220            Role::User if users < window_user => {
221                users += 1;
222                true
223            }
224            Role::Agent if agents < window_agent => {
225                agents += 1;
226                true
227            }
228            _ => false,
229        };
230        if keep {
231            kept.push(message.clone());
232        }
233    }
234    kept.reverse();
235
236    // Byte budget: drop oldest until under budget.
237    let mut total: usize = kept.iter().map(|message| message.text.len()).sum();
238    let mut start = 0;
239    while start < kept.len() && total > max_bytes {
240        total -= kept[start].text.len();
241        start += 1;
242    }
243    kept[start..].to_vec()
244}
245
246pub fn render_trajectory(messages: &[TranscriptMessage]) -> String {
247    if messages.is_empty() {
248        return String::new();
249    }
250    let mut out = String::new();
251    for message in messages {
252        let who = match message.role {
253            Role::User => "USER",
254            Role::Agent => "AGENT",
255        };
256        out.push_str(&format!("{who}: {}\n", message.text.trim()));
257    }
258    out.trim_end().to_owned()
259}
260
261/// Build the combined review-context block (constraints + trajectory).
262pub fn build_review_context(
263    repo_root: &Path,
264    ground_truth: &GroundTruthConfig,
265    history: &HistoryConfig,
266    provider: Option<&dyn TrajectoryProvider>,
267) -> Result<String> {
268    let mut out = String::new();
269
270    let constraints = collect_ground_truth(repo_root, ground_truth)?;
271    if !constraints.is_empty() {
272        out.push_str(
273            "INVIOLABLE CONSTRAINTS (ground truth — a change that violates these is a REJECT):\n",
274        );
275        out.push_str(&constraints);
276        out.push_str("\n\n");
277    }
278
279    if let Some(provider) = provider {
280        let messages = provider.messages()?;
281        let windowed = window_trajectory(
282            &messages,
283            history.window_user,
284            history.window_agent,
285            history.max_bytes,
286        );
287        let rendered = render_trajectory(&windowed);
288        if !rendered.is_empty() {
289            out.push_str("RECENT TRAJECTORY (judge the direction of work, not just this diff):\n");
290            out.push_str(&rendered);
291            out.push_str("\n\n");
292        }
293    }
294
295    Ok(out.trim_end().to_owned())
296}
297
298fn truncate_on_char_boundary(value: &str, max: usize) -> String {
299    if value.len() <= max {
300        return value.to_owned();
301    }
302    let mut end = max;
303    while end > 0 && !value.is_char_boundary(end) {
304        end -= 1;
305    }
306    value[..end].to_owned()
307}
308
309#[cfg(test)]
310mod tests {
311    use super::{
312        JsonlTranscriptProvider, Role, TrajectoryProvider, TranscriptMessage, build_review_context,
313        collect_ground_truth, render_trajectory, window_trajectory,
314    };
315    use crate::config::{GroundTruthConfig, HistoryConfig};
316
317    fn msg(role: Role, text: &str) -> TranscriptMessage {
318        TranscriptMessage {
319            role,
320            text: text.to_owned(),
321        }
322    }
323
324    #[test]
325    fn collects_nested_constraint_files() {
326        let temp = tempfile::tempdir().unwrap();
327        let root = temp.path();
328        std::fs::write(root.join("AGENTS.md"), "root agents").unwrap();
329        std::fs::create_dir_all(root.join("sub/dir")).unwrap();
330        std::fs::write(root.join("sub/dir/TRUTH.md"), "nested truth").unwrap();
331        std::fs::create_dir_all(root.join("openspec/specs/x")).unwrap();
332        std::fs::write(root.join("openspec/specs/x/spec.md"), "a spec").unwrap();
333        // Ignored: unrelated file and skip dir.
334        std::fs::write(root.join("README.md"), "readme").unwrap();
335        std::fs::create_dir_all(root.join(".git")).unwrap();
336        std::fs::write(root.join(".git/AGENTS.md"), "should be skipped").unwrap();
337
338        let out = collect_ground_truth(root, &GroundTruthConfig::default()).unwrap();
339
340        assert!(out.contains("root agents"));
341        assert!(out.contains("nested truth"));
342        assert!(out.contains("a spec"));
343        assert!(!out.contains("readme"));
344        assert!(!out.contains("should be skipped"));
345    }
346
347    #[test]
348    fn ground_truth_respects_byte_budget() {
349        let temp = tempfile::tempdir().unwrap();
350        std::fs::write(temp.path().join("AGENTS.md"), "x".repeat(1000)).unwrap();
351        let config = GroundTruthConfig {
352            max_bytes: 100,
353            ..GroundTruthConfig::default()
354        };
355
356        let out = collect_ground_truth(temp.path(), &config).unwrap();
357
358        assert!(out.len() <= 100, "got {} bytes", out.len());
359    }
360
361    #[test]
362    fn disabled_ground_truth_returns_empty() {
363        let temp = tempfile::tempdir().unwrap();
364        std::fs::write(temp.path().join("TRUTH.md"), "constraints").unwrap();
365        let config = GroundTruthConfig {
366            enabled: false,
367            ..GroundTruthConfig::default()
368        };
369
370        assert!(
371            collect_ground_truth(temp.path(), &config)
372                .unwrap()
373                .is_empty()
374        );
375    }
376
377    #[test]
378    fn window_keeps_last_n_and_m_in_order() {
379        let messages = vec![
380            msg(Role::User, "u1"),
381            msg(Role::Agent, "a1"),
382            msg(Role::Agent, "a2"),
383            msg(Role::User, "u2"),
384            msg(Role::Agent, "a3"),
385            msg(Role::User, "u3"),
386        ];
387
388        let windowed = window_trajectory(&messages, 2, 2, 10_000);
389
390        // last 2 users (u2,u3), last 2 agents (a2,a3), chronological.
391        let texts: Vec<&str> = windowed.iter().map(|m| m.text.as_str()).collect();
392        assert_eq!(texts, ["a2", "u2", "a3", "u3"]);
393    }
394
395    #[test]
396    fn window_never_exceeds_limits() {
397        let mut messages = Vec::new();
398        for i in 0..50 {
399            messages.push(msg(Role::User, &format!("u{i}")));
400            messages.push(msg(Role::Agent, &format!("a{i}")));
401        }
402
403        let windowed = window_trajectory(&messages, 3, 5, 10_000);
404
405        let users = windowed.iter().filter(|m| m.role == Role::User).count();
406        let agents = windowed.iter().filter(|m| m.role == Role::Agent).count();
407        assert!(users <= 3);
408        assert!(agents <= 5);
409    }
410
411    #[test]
412    fn jsonl_provider_reads_messages() {
413        let temp = tempfile::tempdir().unwrap();
414        let path = temp.path().join("t.jsonl");
415        std::fs::write(
416            &path,
417            "{\"role\":\"user\",\"text\":\"do X\"}\n{\"role\":\"agent\",\"text\":\"did Y\"}\n",
418        )
419        .unwrap();
420
421        let provider = JsonlTranscriptProvider { path };
422        let messages = provider.messages().unwrap();
423
424        assert_eq!(messages.len(), 2);
425        assert_eq!(messages[0].role, Role::User);
426        assert_eq!(messages[1].text, "did Y");
427    }
428
429    #[test]
430    fn build_context_includes_constraints_and_trajectory() {
431        let temp = tempfile::tempdir().unwrap();
432        std::fs::write(temp.path().join("TRUTH.md"), "never fake tests").unwrap();
433        let transcript = temp.path().join("t.jsonl");
434        std::fs::write(
435            &transcript,
436            "{\"role\":\"user\",\"text\":\"add feature\"}\n",
437        )
438        .unwrap();
439        let provider = JsonlTranscriptProvider { path: transcript };
440
441        let out = build_review_context(
442            temp.path(),
443            &GroundTruthConfig::default(),
444            &HistoryConfig::default(),
445            Some(&provider),
446        )
447        .unwrap();
448
449        assert!(out.contains("INVIOLABLE CONSTRAINTS"));
450        assert!(out.contains("never fake tests"));
451        assert!(out.contains("RECENT TRAJECTORY"));
452        assert!(out.contains("add feature"));
453    }
454
455    #[test]
456    fn render_trajectory_is_empty_for_no_messages() {
457        assert!(render_trajectory(&[]).is_empty());
458    }
459}