use std::{
fs,
path::{Path, PathBuf},
};
use anyhow::{Context, Result};
use serde::Deserialize;
use crate::config::{GroundTruthConfig, HistoryConfig};
const SKIP_DIRS: &[&str] = &[".git", "target", "node_modules", ".truth-mirror"];
pub fn collect_ground_truth(repo_root: &Path, config: &GroundTruthConfig) -> Result<String> {
if !config.enabled {
return Ok(String::new());
}
let mut hits: Vec<(usize, PathBuf)> = Vec::new();
collect_files(repo_root, repo_root, config, 0, &mut hits)?;
hits.sort_by(|(depth_a, path_a), (depth_b, path_b)| {
depth_a.cmp(depth_b).then_with(|| path_a.cmp(path_b))
});
let mut out = String::new();
for (_, path) in hits {
let rel = path.strip_prefix(repo_root).unwrap_or(&path);
let body = match fs::read_to_string(&path) {
Ok(body) => body,
Err(_) => continue,
};
let section = format!("### {}\n{}\n\n", rel.display(), body.trim());
if out.len() + section.len() > config.max_bytes {
let remaining = config.max_bytes.saturating_sub(out.len());
out.push_str(&truncate_on_char_boundary(§ion, remaining));
break;
}
out.push_str(§ion);
}
Ok(out.trim_end().to_owned())
}
fn collect_files(
repo_root: &Path,
dir: &Path,
config: &GroundTruthConfig,
depth: usize,
hits: &mut Vec<(usize, PathBuf)>,
) -> Result<()> {
let entries = match fs::read_dir(dir) {
Ok(entries) => entries,
Err(_) => return Ok(()),
};
for entry in entries.flatten() {
let path = entry.path();
let name = entry.file_name().to_string_lossy().into_owned();
let file_type = match entry.file_type() {
Ok(file_type) => file_type,
Err(_) => continue,
};
if file_type.is_dir() {
if SKIP_DIRS.contains(&name.as_str()) {
continue;
}
collect_files(repo_root, &path, config, depth + 1, hits)?;
} else if is_ground_truth_file(repo_root, &path, &name, config) {
hits.push((depth, path));
}
}
Ok(())
}
fn is_ground_truth_file(
repo_root: &Path,
path: &Path,
name: &str,
config: &GroundTruthConfig,
) -> bool {
if config.file_names.iter().any(|wanted| wanted == name) {
return true;
}
if config.include_openspec_specs
&& name.ends_with(".md")
&& let Ok(rel) = path.strip_prefix(repo_root)
{
let rel = rel.to_string_lossy();
return rel.starts_with("openspec/specs/");
}
false
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum Role {
User,
Agent,
}
#[derive(Clone, Debug, Deserialize, Eq, PartialEq)]
pub struct TranscriptMessage {
pub role: Role,
pub text: String,
}
pub trait TrajectoryProvider {
fn messages(&self) -> Result<Vec<TranscriptMessage>>;
}
#[derive(Clone, Debug)]
pub struct JsonlTranscriptProvider {
pub path: PathBuf,
}
impl TrajectoryProvider for JsonlTranscriptProvider {
fn messages(&self) -> Result<Vec<TranscriptMessage>> {
let contents = match fs::read_to_string(&self.path) {
Ok(contents) => contents,
Err(error) if error.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
Err(error) => {
return Err(error)
.with_context(|| format!("reading transcript {}", self.path.display()));
}
};
contents
.lines()
.filter(|line| !line.trim().is_empty())
.map(|line| {
serde_json::from_str::<TranscriptMessage>(line)
.with_context(|| "parsing transcript line".to_string())
})
.collect()
}
}
#[derive(Clone, Debug)]
pub struct GitLogProvider {
pub repo_root: PathBuf,
pub count: usize,
}
impl TrajectoryProvider for GitLogProvider {
fn messages(&self) -> Result<Vec<TranscriptMessage>> {
let output = std::process::Command::new("git")
.arg("-C")
.arg(&self.repo_root)
.args(["log", &format!("-n{}", self.count), "--format=%s"])
.output()
.context("running git log for trajectory")?;
if !output.status.success() {
return Ok(Vec::new());
}
let text = String::from_utf8_lossy(&output.stdout);
let mut messages: Vec<TranscriptMessage> = text
.lines()
.filter(|line| !line.trim().is_empty())
.map(|line| TranscriptMessage {
role: Role::Agent,
text: format!("committed: {}", line.trim()),
})
.collect();
messages.reverse();
Ok(messages)
}
}
pub fn trajectory_provider(
repo_root: &Path,
history: &HistoryConfig,
) -> Box<dyn TrajectoryProvider> {
if let Some(rel) = &history.transcript_path {
let path = repo_root.join(rel);
if path.is_file() {
return Box::new(JsonlTranscriptProvider { path });
}
}
Box::new(GitLogProvider {
repo_root: repo_root.to_path_buf(),
count: history.window_user + history.window_agent + 5,
})
}
pub fn window_trajectory(
messages: &[TranscriptMessage],
window_user: usize,
window_agent: usize,
max_bytes: usize,
) -> Vec<TranscriptMessage> {
let mut users = 0;
let mut agents = 0;
let mut kept: Vec<TranscriptMessage> = Vec::new();
for message in messages.iter().rev() {
let keep = match message.role {
Role::User if users < window_user => {
users += 1;
true
}
Role::Agent if agents < window_agent => {
agents += 1;
true
}
_ => false,
};
if keep {
kept.push(message.clone());
}
}
kept.reverse();
let mut total: usize = kept.iter().map(|message| message.text.len()).sum();
let mut start = 0;
while start < kept.len() && total > max_bytes {
total -= kept[start].text.len();
start += 1;
}
kept[start..].to_vec()
}
pub fn render_trajectory(messages: &[TranscriptMessage]) -> String {
if messages.is_empty() {
return String::new();
}
let mut out = String::new();
for message in messages {
let who = match message.role {
Role::User => "USER",
Role::Agent => "AGENT",
};
out.push_str(&format!("{who}: {}\n", message.text.trim()));
}
out.trim_end().to_owned()
}
pub fn build_review_context(
repo_root: &Path,
ground_truth: &GroundTruthConfig,
history: &HistoryConfig,
provider: Option<&dyn TrajectoryProvider>,
) -> Result<String> {
let mut out = String::new();
let constraints = collect_ground_truth(repo_root, ground_truth)?;
if !constraints.is_empty() {
out.push_str(
"INVIOLABLE CONSTRAINTS (ground truth — a change that violates these is a REJECT):\n",
);
out.push_str(&constraints);
out.push_str("\n\n");
}
if let Some(provider) = provider {
let messages = provider.messages()?;
let windowed = window_trajectory(
&messages,
history.window_user,
history.window_agent,
history.max_bytes,
);
let rendered = render_trajectory(&windowed);
if !rendered.is_empty() {
out.push_str("RECENT TRAJECTORY (judge the direction of work, not just this diff):\n");
out.push_str(&rendered);
out.push_str("\n\n");
}
}
Ok(out.trim_end().to_owned())
}
fn truncate_on_char_boundary(value: &str, max: usize) -> String {
if value.len() <= max {
return value.to_owned();
}
let mut end = max;
while end > 0 && !value.is_char_boundary(end) {
end -= 1;
}
value[..end].to_owned()
}
#[cfg(test)]
mod tests {
use super::{
JsonlTranscriptProvider, Role, TrajectoryProvider, TranscriptMessage, build_review_context,
collect_ground_truth, render_trajectory, window_trajectory,
};
use crate::config::{GroundTruthConfig, HistoryConfig};
fn msg(role: Role, text: &str) -> TranscriptMessage {
TranscriptMessage {
role,
text: text.to_owned(),
}
}
#[test]
fn collects_nested_constraint_files() {
let temp = tempfile::tempdir().unwrap();
let root = temp.path();
std::fs::write(root.join("AGENTS.md"), "root agents").unwrap();
std::fs::create_dir_all(root.join("sub/dir")).unwrap();
std::fs::write(root.join("sub/dir/TRUTH.md"), "nested truth").unwrap();
std::fs::create_dir_all(root.join("openspec/specs/x")).unwrap();
std::fs::write(root.join("openspec/specs/x/spec.md"), "a spec").unwrap();
std::fs::write(root.join("README.md"), "readme").unwrap();
std::fs::create_dir_all(root.join(".git")).unwrap();
std::fs::write(root.join(".git/AGENTS.md"), "should be skipped").unwrap();
let out = collect_ground_truth(root, &GroundTruthConfig::default()).unwrap();
assert!(out.contains("root agents"));
assert!(out.contains("nested truth"));
assert!(out.contains("a spec"));
assert!(!out.contains("readme"));
assert!(!out.contains("should be skipped"));
}
#[test]
fn ground_truth_respects_byte_budget() {
let temp = tempfile::tempdir().unwrap();
std::fs::write(temp.path().join("AGENTS.md"), "x".repeat(1000)).unwrap();
let config = GroundTruthConfig {
max_bytes: 100,
..GroundTruthConfig::default()
};
let out = collect_ground_truth(temp.path(), &config).unwrap();
assert!(out.len() <= 100, "got {} bytes", out.len());
}
#[test]
fn disabled_ground_truth_returns_empty() {
let temp = tempfile::tempdir().unwrap();
std::fs::write(temp.path().join("TRUTH.md"), "constraints").unwrap();
let config = GroundTruthConfig {
enabled: false,
..GroundTruthConfig::default()
};
assert!(
collect_ground_truth(temp.path(), &config)
.unwrap()
.is_empty()
);
}
#[test]
fn window_keeps_last_n_and_m_in_order() {
let messages = vec![
msg(Role::User, "u1"),
msg(Role::Agent, "a1"),
msg(Role::Agent, "a2"),
msg(Role::User, "u2"),
msg(Role::Agent, "a3"),
msg(Role::User, "u3"),
];
let windowed = window_trajectory(&messages, 2, 2, 10_000);
let texts: Vec<&str> = windowed.iter().map(|m| m.text.as_str()).collect();
assert_eq!(texts, ["a2", "u2", "a3", "u3"]);
}
#[test]
fn window_never_exceeds_limits() {
let mut messages = Vec::new();
for i in 0..50 {
messages.push(msg(Role::User, &format!("u{i}")));
messages.push(msg(Role::Agent, &format!("a{i}")));
}
let windowed = window_trajectory(&messages, 3, 5, 10_000);
let users = windowed.iter().filter(|m| m.role == Role::User).count();
let agents = windowed.iter().filter(|m| m.role == Role::Agent).count();
assert!(users <= 3);
assert!(agents <= 5);
}
#[test]
fn jsonl_provider_reads_messages() {
let temp = tempfile::tempdir().unwrap();
let path = temp.path().join("t.jsonl");
std::fs::write(
&path,
"{\"role\":\"user\",\"text\":\"do X\"}\n{\"role\":\"agent\",\"text\":\"did Y\"}\n",
)
.unwrap();
let provider = JsonlTranscriptProvider { path };
let messages = provider.messages().unwrap();
assert_eq!(messages.len(), 2);
assert_eq!(messages[0].role, Role::User);
assert_eq!(messages[1].text, "did Y");
}
#[test]
fn build_context_includes_constraints_and_trajectory() {
let temp = tempfile::tempdir().unwrap();
std::fs::write(temp.path().join("TRUTH.md"), "never fake tests").unwrap();
let transcript = temp.path().join("t.jsonl");
std::fs::write(
&transcript,
"{\"role\":\"user\",\"text\":\"add feature\"}\n",
)
.unwrap();
let provider = JsonlTranscriptProvider { path: transcript };
let out = build_review_context(
temp.path(),
&GroundTruthConfig::default(),
&HistoryConfig::default(),
Some(&provider),
)
.unwrap();
assert!(out.contains("INVIOLABLE CONSTRAINTS"));
assert!(out.contains("never fake tests"));
assert!(out.contains("RECENT TRAJECTORY"));
assert!(out.contains("add feature"));
}
#[test]
fn render_trajectory_is_empty_for_no_messages() {
assert!(render_trajectory(&[]).is_empty());
}
}