use std::fs;
use aristo_core::index::{AnnotationId, IndexEntry, IndexFile, Sha256};
use aristo_core::proof::ProofFile;
use serde::{Deserialize, Serialize};
use crate::commands::verify::validator::MAX_REPAIR_ATTEMPTS;
use crate::pipeline::queue::{self, QueueDir};
use crate::{CliError, CliResult, Workspace};
pub(crate) const PIPELINE_NAME: &str = "verify";
#[derive(Debug, Serialize, Deserialize)]
pub(crate) struct VerifyTask {
pub id: String,
pub text: String,
pub file: String,
pub site: String,
pub text_hash: Sha256,
pub body_hash: Sha256,
#[serde(default, skip_serializing_if = "is_zero_u32")]
pub prior_attempts: u32,
}
fn is_zero_u32(n: &u32) -> bool {
*n == 0
}
#[aristo::intent(
"Each pending verify task is a REQUEST from the SDK to the in-agent \
skill, enqueued at `.aristo/verify-queue/pending/<id>.toml`. Workers \
pop one at a time via `aristo verify --pop-next`. The SDK never reads \
these task files back as verdicts — verdicts arrive via \
`aristo verify --submit-verdict` and land at `.aristo/proofs/<id>.proof` \
after the mechanical validator gates them. A refactor that has the \
SDK auto-process its own queue (e.g., to call an LLM directly) would \
conflate the CLI with the agent and break the design split: the CLI \
never makes LLM calls; the agent never bypasses the SDK validator.",
verify = "neural",
id = "pending_neural_file_is_sdk_to_agent_request_not_a_result"
)]
pub(crate) fn enqueue_pending(
ws: &Workspace,
index: &IndexFile,
ids: &[&AnnotationId],
) -> CliResult<usize> {
let qdir = QueueDir::for_pipeline(ws, PIPELINE_NAME);
qdir.ensure_dirs()?;
let mut budget_exhausted: Vec<&AnnotationId> = Vec::new();
let mut enqueued = 0usize;
for id in ids {
let Some(entry) = index.entries.get(*id) else {
continue;
};
let prior_attempts = read_prior_attempts(ws, id);
if prior_attempts >= MAX_REPAIR_ATTEMPTS {
budget_exhausted.push(id);
continue;
}
backup_existing_proof(ws, id);
let task = VerifyTask {
id: id.as_str().to_string(),
text: entry_text(entry).to_string(),
file: entry_file(entry).to_string(),
site: entry_site(entry).to_string(),
text_hash: entry_text_hash(entry).clone(),
body_hash: entry_body_hash(entry).clone(),
prior_attempts,
};
let task_toml = toml::to_string_pretty(&task).map_err(|e| CliError::Other {
message: format!("serializing verify-queue task {}: {e}", id.as_str()),
exit_code: 1,
})?;
queue::enqueue(&qdir, id, &task_toml)?;
enqueued += 1;
}
if !budget_exhausted.is_empty() {
warn_budget_exhausted(&budget_exhausted);
}
Ok(enqueued)
}
#[aristo::intent(
"If a legacy `.aristo/pending-neural.toml` (single-file format from \
v0.0.6) is present, expand each entry into per-id queue files under \
`.aristo/verify-queue/pending/` and delete the legacy file. Runs at \
the start of every `aristo verify` invocation. Idempotent: a second \
run with no legacy file is a no-op. Single-pass migration — there \
is no compat shim that re-reads the legacy format on subsequent \
runs.",
verify = "test",
id = "verify_migrates_legacy_pending_neural_toml_to_queue"
)]
pub(crate) fn migrate_legacy_pending_if_present(ws: &Workspace) -> CliResult<()> {
let legacy_path = ws.aristo_dir().join("pending-neural.toml");
if !legacy_path.is_file() {
return Ok(());
}
let raw = fs::read_to_string(&legacy_path)?;
#[derive(Deserialize)]
struct LegacyFile {
#[allow(dead_code)]
schema_version: u32,
pending: Vec<VerifyTask>,
}
let parsed: LegacyFile = match toml::from_str(&raw) {
Ok(p) => p,
Err(e) => {
eprintln!(
"warning: legacy {} failed to parse ({e}); deleting it. \
Run `aristo verify` again to re-populate the new queue.",
legacy_path.display()
);
let _ = fs::remove_file(&legacy_path);
return Ok(());
}
};
let qdir = QueueDir::for_pipeline(ws, PIPELINE_NAME);
qdir.ensure_dirs()?;
for task in parsed.pending {
let Ok(id) = AnnotationId::parse(&task.id) else {
continue;
};
let task_toml = toml::to_string_pretty(&task).map_err(|e| CliError::Other {
message: format!("re-serializing migrated task {}: {e}", task.id),
exit_code: 1,
})?;
queue::enqueue(&qdir, &id, &task_toml)?;
}
let _ = fs::remove_file(&legacy_path);
eprintln!(
"note: migrated legacy `.aristo/pending-neural.toml` to \
`.aristo/verify-queue/pending/` (one task per file)."
);
Ok(())
}
#[aristo::intent(
"Prior attempts for an id come from the existing `.aristo/proofs/\
<id>.proof` file (if any), parsed once to extract verdict.attempts. \
Carrying this across re-spawns activates the K-bounded repair \
budget that would otherwise be dead code: each fresh subagent \
invocation writing attempts=1 means a hard-to-verify intent can \
re-spawn indefinitely without ever hitting the cap. Reading from \
the rejected proof on disk is the only persistence channel \
available — the SDK doesn't track per-entry attempt history \
elsewhere.",
verify = "test",
id = "pending_carries_prior_attempts_from_existing_proof"
)]
fn read_prior_attempts(ws: &Workspace, id: &AnnotationId) -> u32 {
let filename = format!("{}.proof", id.as_str().replace(':', "__"));
let path = ws.aristo_dir().join("proofs").join(filename);
if !path.is_file() {
return 0;
}
let Ok(raw) = std::fs::read_to_string(&path) else {
return 0;
};
let Ok(pf) = ProofFile::parse(&raw) else {
return 0;
};
pf.verdict.attempts
}
pub(crate) fn proof_path_for(ws: &Workspace, id: &AnnotationId) -> std::path::PathBuf {
let filename = format!("{}.proof", id.as_str().replace(':', "__"));
ws.aristo_dir().join("proofs").join(filename)
}
pub(crate) fn proof_bak_path_for(ws: &Workspace, id: &AnnotationId) -> std::path::PathBuf {
let filename = format!("{}.proof.bak", id.as_str().replace(':', "__"));
ws.aristo_dir().join("proofs").join(filename)
}
#[aristo::intent(
"When `aristo verify` re-enqueues an entry that already has a .proof \
on disk, move the existing proof to <id>.proof.bak before the next \
attempt overwrites it. Single-deep backup — overwrites any prior \
.bak. Lets the user diff a rejected re-attempt against the prior \
verdict. The .bak is auto-deleted on successful --apply-verdicts.",
verify = "test",
id = "pending_backs_up_existing_proof_on_rerun"
)]
fn backup_existing_proof(ws: &Workspace, id: &AnnotationId) {
let src = proof_path_for(ws, id);
if !src.is_file() {
return;
}
let bak = proof_bak_path_for(ws, id);
let _ = std::fs::rename(&src, &bak);
}
fn warn_budget_exhausted(ids: &[&AnnotationId]) {
eprintln!();
eprintln!(
"⚠ {} annotation(s) have exhausted the repair budget ({} attempts) and \
will not be re-dispatched until you intervene:",
ids.len(),
MAX_REPAIR_ATTEMPTS
);
for id in ids {
eprintln!(" {id}");
}
eprintln!();
eprintln!(
" The proof file on disk records why each attempt failed. Either fix \
the underlying issue and `aristo verify --rerun --filter id=<id>`, or \
delete `.aristo/proofs/<id>.proof` to start the budget fresh."
);
}
fn entry_text(e: &IndexEntry) -> &str {
match e {
IndexEntry::Intent(x) => &x.text,
IndexEntry::Assume(x) => &x.text,
}
}
fn entry_file(e: &IndexEntry) -> &str {
match e {
IndexEntry::Intent(x) => &x.file,
IndexEntry::Assume(x) => &x.file,
}
}
fn entry_site(e: &IndexEntry) -> &str {
match e {
IndexEntry::Intent(x) => &x.site,
IndexEntry::Assume(x) => &x.site,
}
}
fn entry_text_hash(e: &IndexEntry) -> &Sha256 {
match e {
IndexEntry::Intent(x) => &x.text_hash,
IndexEntry::Assume(x) => &x.text_hash,
}
}
fn entry_body_hash(e: &IndexEntry) -> &Sha256 {
match e {
IndexEntry::Intent(x) => &x.body_hash,
IndexEntry::Assume(x) => &x.body_hash,
}
}