1use std::{
9 fs,
10 path::{Path, PathBuf},
11};
12
13use anyhow::{Context, Result};
14use serde::Deserialize;
15
16use crate::config::{GroundTruthConfig, HistoryConfig};
17
18const SKIP_DIRS: &[&str] = &[".git", "target", "node_modules", ".truth-mirror"];
20
21pub fn collect_ground_truth(repo_root: &Path, config: &GroundTruthConfig) -> Result<String> {
24 if !config.enabled {
25 return Ok(String::new());
26 }
27
28 let mut hits: Vec<(usize, PathBuf)> = Vec::new();
29 collect_files(repo_root, repo_root, config, 0, &mut hits)?;
30 hits.sort_by(|(depth_a, path_a), (depth_b, path_b)| {
32 depth_a.cmp(depth_b).then_with(|| path_a.cmp(path_b))
33 });
34
35 let mut out = String::new();
36 for (_, path) in hits {
37 let rel = path.strip_prefix(repo_root).unwrap_or(&path);
38 let body = match fs::read_to_string(&path) {
39 Ok(body) => body,
40 Err(_) => continue,
41 };
42 let section = format!("### {}\n{}\n\n", rel.display(), body.trim());
43 if out.len() + section.len() > config.max_bytes {
44 let remaining = config.max_bytes.saturating_sub(out.len());
45 out.push_str(&truncate_on_char_boundary(§ion, remaining));
46 break;
47 }
48 out.push_str(§ion);
49 }
50
51 Ok(out.trim_end().to_owned())
52}
53
54fn collect_files(
55 repo_root: &Path,
56 dir: &Path,
57 config: &GroundTruthConfig,
58 depth: usize,
59 hits: &mut Vec<(usize, PathBuf)>,
60) -> Result<()> {
61 let entries = match fs::read_dir(dir) {
62 Ok(entries) => entries,
63 Err(_) => return Ok(()),
64 };
65
66 for entry in entries.flatten() {
67 let path = entry.path();
68 let name = entry.file_name().to_string_lossy().into_owned();
69 let file_type = match entry.file_type() {
70 Ok(file_type) => file_type,
71 Err(_) => continue,
72 };
73
74 if file_type.is_dir() {
75 if SKIP_DIRS.contains(&name.as_str()) {
76 continue;
77 }
78 collect_files(repo_root, &path, config, depth + 1, hits)?;
79 } else if is_ground_truth_file(repo_root, &path, &name, config) {
80 hits.push((depth, path));
81 }
82 }
83
84 Ok(())
85}
86
87fn is_ground_truth_file(
88 repo_root: &Path,
89 path: &Path,
90 name: &str,
91 config: &GroundTruthConfig,
92) -> bool {
93 if config.file_names.iter().any(|wanted| wanted == name) {
94 return true;
95 }
96
97 if config.include_openspec_specs
98 && name.ends_with(".md")
99 && let Ok(rel) = path.strip_prefix(repo_root)
100 {
101 let rel = rel.to_string_lossy();
102 return rel.starts_with("openspec/specs/");
103 }
104
105 false
106}
107
108#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq)]
109#[serde(rename_all = "lowercase")]
110pub enum Role {
111 User,
112 Agent,
113}
114
115#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
116pub struct TranscriptMessage {
117 pub role: Role,
118 pub text: String,
119}
120
121pub trait TrajectoryProvider {
123 fn messages(&self) -> Result<Vec<TranscriptMessage>>;
124}
125
126#[derive(Clone, Debug)]
128pub struct JsonlTranscriptProvider {
129 pub path: PathBuf,
130}
131
132impl TrajectoryProvider for JsonlTranscriptProvider {
133 fn messages(&self) -> Result<Vec<TranscriptMessage>> {
134 let contents = match fs::read_to_string(&self.path) {
135 Ok(contents) => contents,
136 Err(error) if error.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
137 Err(error) => {
138 return Err(error)
139 .with_context(|| format!("reading transcript {}", self.path.display()));
140 }
141 };
142
143 contents
144 .lines()
145 .filter(|line| !line.trim().is_empty())
146 .map(|line| {
147 serde_json::from_str::<TranscriptMessage>(line)
148 .with_context(|| "parsing transcript line".to_string())
149 })
150 .collect()
151 }
152}
153
154#[derive(Clone, Debug)]
157pub struct GitLogProvider {
158 pub repo_root: PathBuf,
159 pub count: usize,
160}
161
162impl TrajectoryProvider for GitLogProvider {
163 fn messages(&self) -> Result<Vec<TranscriptMessage>> {
164 let output = std::process::Command::new("git")
165 .arg("-C")
166 .arg(&self.repo_root)
167 .args(["log", &format!("-n{}", self.count), "--format=%s"])
168 .output()
169 .context("running git log for trajectory")?;
170 if !output.status.success() {
171 return Ok(Vec::new());
172 }
173 let text = String::from_utf8_lossy(&output.stdout);
174 let mut messages: Vec<TranscriptMessage> = text
176 .lines()
177 .filter(|line| !line.trim().is_empty())
178 .map(|line| TranscriptMessage {
179 role: Role::Agent,
180 text: format!("committed: {}", line.trim()),
181 })
182 .collect();
183 messages.reverse();
184 Ok(messages)
185 }
186}
187
188pub fn trajectory_provider(
191 repo_root: &Path,
192 history: &HistoryConfig,
193) -> Box<dyn TrajectoryProvider> {
194 if let Some(rel) = &history.transcript_path {
195 let path = repo_root.join(rel);
196 if path.is_file() {
197 return Box::new(JsonlTranscriptProvider { path });
198 }
199 }
200 Box::new(GitLogProvider {
201 repo_root: repo_root.to_path_buf(),
202 count: history.window_user + history.window_agent + 5,
203 })
204}
205
206pub fn window_trajectory(
209 messages: &[TranscriptMessage],
210 window_user: usize,
211 window_agent: usize,
212 max_bytes: usize,
213) -> Vec<TranscriptMessage> {
214 let mut users = 0;
215 let mut agents = 0;
216 let mut kept: Vec<TranscriptMessage> = Vec::new();
217
218 for message in messages.iter().rev() {
219 let keep = match message.role {
220 Role::User if users < window_user => {
221 users += 1;
222 true
223 }
224 Role::Agent if agents < window_agent => {
225 agents += 1;
226 true
227 }
228 _ => false,
229 };
230 if keep {
231 kept.push(message.clone());
232 }
233 }
234 kept.reverse();
235
236 let mut total: usize = kept.iter().map(|message| message.text.len()).sum();
238 let mut start = 0;
239 while start < kept.len() && total > max_bytes {
240 total -= kept[start].text.len();
241 start += 1;
242 }
243 kept[start..].to_vec()
244}
245
246pub fn render_trajectory(messages: &[TranscriptMessage]) -> String {
247 if messages.is_empty() {
248 return String::new();
249 }
250 let mut out = String::new();
251 for message in messages {
252 let who = match message.role {
253 Role::User => "USER",
254 Role::Agent => "AGENT",
255 };
256 out.push_str(&format!("{who}: {}\n", message.text.trim()));
257 }
258 out.trim_end().to_owned()
259}
260
261pub fn build_review_context(
263 repo_root: &Path,
264 ground_truth: &GroundTruthConfig,
265 history: &HistoryConfig,
266 provider: Option<&dyn TrajectoryProvider>,
267) -> Result<String> {
268 let mut out = String::new();
269
270 let constraints = collect_ground_truth(repo_root, ground_truth)?;
271 if !constraints.is_empty() {
272 out.push_str(
273 "INVIOLABLE CONSTRAINTS (ground truth — a change that violates these is a REJECT):\n",
274 );
275 out.push_str(&constraints);
276 out.push_str("\n\n");
277 }
278
279 if let Some(provider) = provider {
280 let messages = provider.messages()?;
281 let windowed = window_trajectory(
282 &messages,
283 history.window_user,
284 history.window_agent,
285 history.max_bytes,
286 );
287 let rendered = render_trajectory(&windowed);
288 if !rendered.is_empty() {
289 out.push_str("RECENT TRAJECTORY (judge the direction of work, not just this diff):\n");
290 out.push_str(&rendered);
291 out.push_str("\n\n");
292 }
293 }
294
295 Ok(out.trim_end().to_owned())
296}
297
298fn truncate_on_char_boundary(value: &str, max: usize) -> String {
299 if value.len() <= max {
300 return value.to_owned();
301 }
302 let mut end = max;
303 while end > 0 && !value.is_char_boundary(end) {
304 end -= 1;
305 }
306 value[..end].to_owned()
307}
308
309#[cfg(test)]
310mod tests {
311 use super::{
312 JsonlTranscriptProvider, Role, TrajectoryProvider, TranscriptMessage, build_review_context,
313 collect_ground_truth, render_trajectory, window_trajectory,
314 };
315 use crate::config::{GroundTruthConfig, HistoryConfig};
316
317 fn msg(role: Role, text: &str) -> TranscriptMessage {
318 TranscriptMessage {
319 role,
320 text: text.to_owned(),
321 }
322 }
323
324 #[test]
325 fn collects_nested_constraint_files() {
326 let temp = tempfile::tempdir().unwrap();
327 let root = temp.path();
328 std::fs::write(root.join("AGENTS.md"), "root agents").unwrap();
329 std::fs::create_dir_all(root.join("sub/dir")).unwrap();
330 std::fs::write(root.join("sub/dir/TRUTH.md"), "nested truth").unwrap();
331 std::fs::create_dir_all(root.join("openspec/specs/x")).unwrap();
332 std::fs::write(root.join("openspec/specs/x/spec.md"), "a spec").unwrap();
333 std::fs::write(root.join("README.md"), "readme").unwrap();
335 std::fs::create_dir_all(root.join(".git")).unwrap();
336 std::fs::write(root.join(".git/AGENTS.md"), "should be skipped").unwrap();
337
338 let out = collect_ground_truth(root, &GroundTruthConfig::default()).unwrap();
339
340 assert!(out.contains("root agents"));
341 assert!(out.contains("nested truth"));
342 assert!(out.contains("a spec"));
343 assert!(!out.contains("readme"));
344 assert!(!out.contains("should be skipped"));
345 }
346
347 #[test]
348 fn ground_truth_respects_byte_budget() {
349 let temp = tempfile::tempdir().unwrap();
350 std::fs::write(temp.path().join("AGENTS.md"), "x".repeat(1000)).unwrap();
351 let config = GroundTruthConfig {
352 max_bytes: 100,
353 ..GroundTruthConfig::default()
354 };
355
356 let out = collect_ground_truth(temp.path(), &config).unwrap();
357
358 assert!(out.len() <= 100, "got {} bytes", out.len());
359 }
360
361 #[test]
362 fn disabled_ground_truth_returns_empty() {
363 let temp = tempfile::tempdir().unwrap();
364 std::fs::write(temp.path().join("TRUTH.md"), "constraints").unwrap();
365 let config = GroundTruthConfig {
366 enabled: false,
367 ..GroundTruthConfig::default()
368 };
369
370 assert!(
371 collect_ground_truth(temp.path(), &config)
372 .unwrap()
373 .is_empty()
374 );
375 }
376
377 #[test]
378 fn window_keeps_last_n_and_m_in_order() {
379 let messages = vec![
380 msg(Role::User, "u1"),
381 msg(Role::Agent, "a1"),
382 msg(Role::Agent, "a2"),
383 msg(Role::User, "u2"),
384 msg(Role::Agent, "a3"),
385 msg(Role::User, "u3"),
386 ];
387
388 let windowed = window_trajectory(&messages, 2, 2, 10_000);
389
390 let texts: Vec<&str> = windowed.iter().map(|m| m.text.as_str()).collect();
392 assert_eq!(texts, ["a2", "u2", "a3", "u3"]);
393 }
394
395 #[test]
396 fn window_never_exceeds_limits() {
397 let mut messages = Vec::new();
398 for i in 0..50 {
399 messages.push(msg(Role::User, &format!("u{i}")));
400 messages.push(msg(Role::Agent, &format!("a{i}")));
401 }
402
403 let windowed = window_trajectory(&messages, 3, 5, 10_000);
404
405 let users = windowed.iter().filter(|m| m.role == Role::User).count();
406 let agents = windowed.iter().filter(|m| m.role == Role::Agent).count();
407 assert!(users <= 3);
408 assert!(agents <= 5);
409 }
410
411 #[test]
412 fn jsonl_provider_reads_messages() {
413 let temp = tempfile::tempdir().unwrap();
414 let path = temp.path().join("t.jsonl");
415 std::fs::write(
416 &path,
417 "{\"role\":\"user\",\"text\":\"do X\"}\n{\"role\":\"agent\",\"text\":\"did Y\"}\n",
418 )
419 .unwrap();
420
421 let provider = JsonlTranscriptProvider { path };
422 let messages = provider.messages().unwrap();
423
424 assert_eq!(messages.len(), 2);
425 assert_eq!(messages[0].role, Role::User);
426 assert_eq!(messages[1].text, "did Y");
427 }
428
429 #[test]
430 fn build_context_includes_constraints_and_trajectory() {
431 let temp = tempfile::tempdir().unwrap();
432 std::fs::write(temp.path().join("TRUTH.md"), "never fake tests").unwrap();
433 let transcript = temp.path().join("t.jsonl");
434 std::fs::write(
435 &transcript,
436 "{\"role\":\"user\",\"text\":\"add feature\"}\n",
437 )
438 .unwrap();
439 let provider = JsonlTranscriptProvider { path: transcript };
440
441 let out = build_review_context(
442 temp.path(),
443 &GroundTruthConfig::default(),
444 &HistoryConfig::default(),
445 Some(&provider),
446 )
447 .unwrap();
448
449 assert!(out.contains("INVIOLABLE CONSTRAINTS"));
450 assert!(out.contains("never fake tests"));
451 assert!(out.contains("RECENT TRAJECTORY"));
452 assert!(out.contains("add feature"));
453 }
454
455 #[test]
456 fn render_trajectory_is_empty_for_no_messages() {
457 assert!(render_trajectory(&[]).is_empty());
458 }
459}