use std::sync::OnceLock;
use ahash::AHashSet;
use regex::RegexSet;
use serde::Serialize;
use super::safety;
const MAX_DECISIONS: usize = 50;
const MAX_ERRORS: usize = 50;
const MAX_FILES: usize = 200;
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct Checkpoint {
pub decisions: Vec<String>,
pub errors: Vec<String>,
pub files_changed: Vec<String>,
pub decisions_truncated: bool,
pub errors_truncated: bool,
}
fn decision_set() -> &'static RegexSet {
static SET: OnceLock<RegexSet> = OnceLock::new();
SET.get_or_init(|| {
RegexSet::new([
r"(?i)\bdecided\b",
r"(?i)\bdecision\b",
r"(?i)\bchose\b|\bchoosing\b",
r"(?i)\bwe (?:will|should|must|chose|decided)\b",
r"(?i)\bgoing with\b",
r"(?i)\bopt(?:ed|ing) for\b",
r"(?i)\bconclusion\s*:",
r"\bTODO\b",
r"\bFIXME\b",
])
.expect("static decision patterns compile")
})
}
fn is_decision_line(line: &str) -> bool {
decision_set().is_match(line)
}
pub fn extract_checkpoint(text: &str, files_changed: Vec<String>) -> Checkpoint {
let cleaned = safety::strip_ansi(text);
let mut decisions: Vec<String> = Vec::new();
let mut errors: Vec<String> = Vec::new();
let mut decisions_seen: AHashSet<&str> = AHashSet::new();
let mut errors_seen: AHashSet<&str> = AHashSet::new();
let mut decisions_truncated = false;
let mut errors_truncated = false;
for raw in cleaned.lines() {
let line = raw.trim();
if line.is_empty() {
continue;
}
if safety::contains_credential(line) {
continue;
}
if is_decision_line(line) && decisions_seen.insert(line) {
if decisions.len() < MAX_DECISIONS {
decisions.push(line.to_string());
} else {
decisions_truncated = true;
}
}
if safety::is_error_line(line) && errors_seen.insert(line) {
if errors.len() < MAX_ERRORS {
errors.push(line.to_string());
} else {
errors_truncated = true;
}
}
}
let files_changed = dedup_and_cap_files(files_changed);
Checkpoint {
decisions,
errors,
files_changed,
decisions_truncated,
errors_truncated,
}
}
fn dedup_and_cap_files(files: Vec<String>) -> Vec<String> {
let mut seen: AHashSet<String> = AHashSet::new();
let mut out: Vec<String> = Vec::new();
for path in files {
let trimmed = path.trim();
if trimmed.is_empty() {
continue;
}
if out.len() >= MAX_FILES {
break;
}
if seen.insert(trimmed.to_string()) {
out.push(trimmed.to_string());
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extracts_decision_line() {
let cp = extract_checkpoint("We decided to use Fjall.", Vec::new());
assert_eq!(cp.decisions, vec!["We decided to use Fjall.".to_string()]);
assert!(cp.errors.is_empty());
assert!(!cp.decisions_truncated);
}
#[test]
fn extracts_error_line_via_is_error_line() {
let cp = extract_checkpoint("error: build failed", Vec::new());
assert_eq!(cp.errors, vec!["error: build failed".to_string()]);
assert!(cp.decisions.is_empty());
}
#[test]
fn drops_credential_line_from_decisions_and_errors() {
let pat = format!("chose token=ghp_{}", "a".repeat(36));
let cp = extract_checkpoint(&pat, Vec::new());
assert!(
cp.decisions.is_empty(),
"credential decision line must be dropped, got: {:?}",
cp.decisions
);
assert!(
cp.errors.is_empty(),
"credential error line must be dropped, got: {:?}",
cp.errors
);
let pat_token = format!("ghp_{}", "a".repeat(36));
assert!(
!cp.decisions.iter().any(|d| d.contains(&pat_token)),
"PAT leaked into decisions"
);
assert!(
!cp.errors.iter().any(|e| e.contains(&pat_token)),
"PAT leaked into errors"
);
assert!(
!cp.files_changed.iter().any(|f| f.contains(&pat_token)),
"PAT leaked into files_changed"
);
}
#[test]
fn drops_credential_error_line() {
let line = "error: leaked AKIAIOSFODNN7EXAMPLE in config";
let cp = extract_checkpoint(line, Vec::new());
assert!(
cp.errors.is_empty(),
"credential error line must be dropped"
);
assert!(
!cp.errors.iter().any(|e| e.contains("AKIAIOSFODNN7EXAMPLE")),
"AWS key leaked into errors"
);
}
#[test]
fn dedup_collapses_duplicate_decisions() {
let text = "We decided to ship.\nWe decided to ship.\nWe decided to ship.";
let cp = extract_checkpoint(text, Vec::new());
assert_eq!(cp.decisions, vec!["We decided to ship.".to_string()]);
assert!(!cp.decisions_truncated);
}
#[test]
fn caps_and_flags_truncated_errors() {
let mut text = String::new();
for n in 0..(MAX_ERRORS + 10) {
text.push_str(&format!("error: failure number {n}\n"));
}
let cp = extract_checkpoint(&text, Vec::new());
assert_eq!(cp.errors.len(), MAX_ERRORS);
assert!(cp.errors_truncated, "errors_truncated must fire past cap");
}
#[test]
fn caps_and_flags_truncated_decisions() {
let mut text = String::new();
for n in 0..(MAX_DECISIONS + 5) {
text.push_str(&format!("We decided on option {n}.\n"));
}
let cp = extract_checkpoint(&text, Vec::new());
assert_eq!(cp.decisions.len(), MAX_DECISIONS);
assert!(cp.decisions_truncated);
}
#[test]
fn strips_ansi_before_matching() {
let colored = "\x1b[32mWe decided to use rayon.\x1b[0m";
let cp = extract_checkpoint(colored, Vec::new());
assert_eq!(cp.decisions, vec!["We decided to use rayon.".to_string()]);
}
#[test]
fn empty_input_yields_empty_checkpoint() {
let cp = extract_checkpoint("", Vec::new());
assert_eq!(cp.decisions, Vec::<String>::new());
assert_eq!(cp.errors, Vec::<String>::new());
assert_eq!(cp.files_changed, Vec::<String>::new());
assert!(!cp.decisions_truncated);
assert!(!cp.errors_truncated);
}
#[test]
fn files_changed_dedup_passthrough() {
let files = vec![
"src/lib.rs".to_string(),
"src/lib.rs".to_string(),
"src/main.rs".to_string(),
];
let cp = extract_checkpoint("", files);
assert_eq!(
cp.files_changed,
vec!["src/lib.rs".to_string(), "src/main.rs".to_string()]
);
}
#[test]
fn files_changed_capped_at_max() {
let files: Vec<String> = (0..(MAX_FILES + 25))
.map(|n| format!("src/file_{n:04}.rs"))
.collect();
let cp = extract_checkpoint("", files);
assert_eq!(cp.files_changed.len(), MAX_FILES);
assert_eq!(cp.files_changed[0], "src/file_0000.rs");
}
}