use std::time::SystemTime;
use aristo_core::canon::{
AnnotationMatchInput, CacheEntry, CanonClient, CanonError, CanonMatchRequest,
CanonMatchResponse, CanonMatchesFile, Disposition, HttpCanonClient, MockCanonClient,
NoopCanonClient, PendingMatch,
};
use aristo_core::config::CanonConfig;
use aristo_core::index::{AnnotationId, IndexEntry, IndexFile};
use crate::{CliError, CliResult, Workspace};
#[derive(Debug)]
pub(crate) enum CanonStepOutcome {
Ok {
findings_added: usize,
canon_version: Option<String>,
},
CacheHit { existing_pending: usize },
DisabledByConfig,
SkippedByFlag,
FreeTier { annotations_skipped: usize },
Degraded {
error: CanonError,
failed_for: usize,
},
}
pub(crate) struct RunnerArgs<'a> {
pub(crate) ws: &'a Workspace,
pub(crate) index: &'a IndexFile,
pub(crate) config: &'a CanonConfig,
pub(crate) threshold: f64,
pub(crate) skip_flag: bool,
pub(crate) refresh_flag: bool,
pub(crate) found_by: &'static str,
}
pub(crate) fn run_canon_step(args: RunnerArgs) -> CliResult<CanonStepOutcome> {
if args.skip_flag {
return Ok(CanonStepOutcome::SkippedByFlag);
}
if !args.config.enabled {
return Ok(CanonStepOutcome::DisabledByConfig);
}
let cache_path = args.ws.canon_matches_path();
let mut cache = CanonMatchesFile::read(&cache_path).map_err(|e| CliError::Other {
message: format!("read {}: {e}", cache_path.display()),
exit_code: 1,
})?;
let (client, is_free_tier) = build_client(args.config);
let batch = collect_batch(args.index, &cache, args.refresh_flag);
if batch.is_empty() {
let existing_pending = count_pending(&cache);
return Ok(CanonStepOutcome::CacheHit { existing_pending });
}
if is_free_tier {
return Ok(CanonStepOutcome::FreeTier {
annotations_skipped: batch.len(),
});
}
let req = CanonMatchRequest {
annotations: batch
.iter()
.map(|b| AnnotationMatchInput {
annotation_text: b.text.clone(),
applies_to: b.applies_to.clone(),
})
.collect(),
confidence_threshold: args.threshold,
};
let response = match client.match_annotations(&req) {
Ok(r) => r,
Err(e) => {
return Ok(CanonStepOutcome::Degraded {
error: e,
failed_for: batch.len(),
});
}
};
let findings_added = merge_response_into_cache(&mut cache, &batch, &response, args.found_by);
cache.meta.canon_version = Some(response.canon_version.clone());
cache.meta.last_fetched = Some(response.matched_at.clone());
cache
.write_atomic(&cache_path)
.map_err(|e| CliError::Other {
message: format!("write {}: {e}", cache_path.display()),
exit_code: 1,
})?;
Ok(CanonStepOutcome::Ok {
findings_added,
canon_version: Some(response.canon_version),
})
}
#[aristo::intent(
"Client selection order is load-bearing: ARISTO_CANON_FIXTURE \
wins outright (test mode beats everything, including auth), \
then auth-token resolution decides between HttpCanonClient and \
the free-tier Noop. Reversing — e.g. checking auth first — \
would make integration tests need a fake token to work, \
coupling test setup to the auth substrate unnecessarily.",
verify = "test",
id = "canon_client_selection_test_mode_wins"
)]
fn build_client(_config: &CanonConfig) -> (Box<dyn CanonClient>, bool) {
if let Some(mock) = MockCanonClient::from_env() {
return (Box::new(mock), false);
}
match aristo_core::auth::resolve_full() {
Ok(creds) => {
let base_url = std::env::var("ARETTA_API_URL")
.unwrap_or_else(|_| creds.server.as_str().to_string());
(
Box::new(HttpCanonClient::new(base_url, &creds.token)),
false,
)
}
Err(_) => (Box::new(NoopCanonClient), true),
}
}
#[derive(Debug)]
struct BatchEntry {
id: AnnotationId,
text: String,
text_hash: String,
applies_to: Vec<String>,
}
#[aristo::intent(
"An annotation is added to the canon-match batch when (a) the \
user passed --refresh-canon, OR (b) no cached entry exists yet, \
OR (c) the cached entry's last_match_text_hash differs from the \
current annotation text_hash. A fresh cache hit produces no API \
traffic — load-bearing for the daily-loop UX where most stamps \
touch nothing canon-relevant.",
verify = "test",
id = "canon_batch_collection_honors_l5_cache_skip"
)]
fn collect_batch(index: &IndexFile, cache: &CanonMatchesFile, refresh: bool) -> Vec<BatchEntry> {
let mut batch = Vec::new();
for (id, entry) in &index.entries {
let intent = match entry {
IndexEntry::Intent(i) => i,
IndexEntry::Assume(_) => continue,
};
if id.is_canon_bound() {
continue;
}
let text_hash = intent.text_hash.as_str().to_string();
let needs_match = refresh
|| match cache.entries.get(id) {
None => true,
Some(cached) => cached.last_match_text_hash != text_hash,
};
if !needs_match {
continue;
}
batch.push(BatchEntry {
id: id.clone(),
text: intent.text.clone(),
text_hash,
applies_to: applies_to_from_site(&intent.site),
});
}
batch
}
fn applies_to_from_site(site: &str) -> Vec<String> {
let head = site.split_whitespace().next().unwrap_or("");
if head.is_empty() {
return Vec::new();
}
match head {
"fn" | "method" | "mod" | "struct" | "enum" | "trait" | "type" => {
vec![head.to_string()]
}
"impl" => {
if site.contains("::") {
vec!["method".to_string()]
} else {
vec!["impl".to_string()]
}
}
other => vec![other.to_string()],
}
}
#[aristo::intent(
"Merging match response into cache is per-annotation idempotent: \
each batched annotation's candidate list replaces ONLY that \
annotation's `pending_matches`; `accepted_matches` and \
`rejected_matches` for the same annotation are untouched (user \
decisions survive). A regression that overwrote accepted/rejected \
here would silently undo the user's review work on every stamp.",
verify = "test",
id = "canon_merge_response_preserves_user_decisions"
)]
fn merge_response_into_cache(
cache: &mut CanonMatchesFile,
batch: &[BatchEntry],
response: &CanonMatchResponse,
found_by: &str,
) -> usize {
let now = now_rfc3339();
let mut total_added = 0usize;
for (i, entry) in batch.iter().enumerate() {
let candidates = response.results.get(i).cloned().unwrap_or_default();
let cached_entry = cache
.entries
.entry(entry.id.clone())
.or_insert_with(|| CacheEntry {
last_match_text_hash: entry.text_hash.clone(),
canon_fetched_at: now.clone(),
pending_matches: Vec::new(),
accepted_matches: Vec::new(),
rejected_matches: Vec::new(),
});
let pending: Vec<PendingMatch> = candidates
.into_iter()
.filter(|c| !cached_entry.is_rejected(&c.canon_id, &entry.text_hash))
.map(|c| PendingMatch {
canon_id: c.canon_id,
version: c.version,
canonical_text: c.canonical_text,
canon_version: response.canon_version.clone(),
confidence: c.confidence,
prefix_tier: c.prefix_tier,
backed_by: c.backed_by,
linked: c.linked,
disposition: Disposition::Open,
found_at: now.clone(),
found_by: found_by.to_string(),
})
.collect();
total_added += pending.len();
cached_entry.last_match_text_hash = entry.text_hash.clone();
cached_entry.canon_fetched_at = now.clone();
cached_entry.pending_matches = pending;
}
total_added
}
fn count_pending(cache: &CanonMatchesFile) -> usize {
cache
.entries
.values()
.map(|e| e.pending_matches.len())
.sum()
}
fn now_rfc3339() -> String {
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
let _ = SystemTime::now();
OffsetDateTime::now_utc()
.format(&Rfc3339)
.unwrap_or_else(|_| "1970-01-01T00:00:00Z".into())
}
pub(crate) fn print_stamp_summary(
outcome: &CanonStepOutcome,
cache: &CanonMatchesFile,
ws: &Workspace,
) {
match outcome {
CanonStepOutcome::Ok {
findings_added,
canon_version,
} => {
let version_str = canon_version
.as_deref()
.map(|v| format!(", canon {v}"))
.unwrap_or_default();
println!("→ canon-match: {findings_added} new finding(s){version_str}.");
print_pending_matches(cache);
}
CanonStepOutcome::CacheHit { existing_pending } => {
if *existing_pending == 0 {
println!("→ canon-match: no annotations need a fresh match.");
} else {
println!(
"→ canon-match: cache hit ({existing_pending} pending finding(s) still open)."
);
println!(
" review with `aristo critique --apply-findings` or `aristo critique --filter id=<id>`"
);
}
}
CanonStepOutcome::DisabledByConfig => {
println!(
"→ canon-match: skipped (disabled via aristo.toml `[canon] enabled = false`)."
);
}
CanonStepOutcome::SkippedByFlag => {
println!("→ canon-match: skipped (`--skip-canon`).");
}
CanonStepOutcome::FreeTier {
annotations_skipped,
} => {
println!("→ canon-match: skipped (Pro feature).");
println!(
" note: canon matching is a Pro feature. {annotations_skipped} \
annotation(s) could have matched."
);
println!(
" Run `aristo auth login` to start a trial, or `aristo status` for details."
);
}
CanonStepOutcome::Degraded { error, failed_for } => {
let _ = ws; println!("→ canon-match: skipped ({error}). cached matches retained.");
if *failed_for > 0 {
println!(
" note: {failed_for} annotation(s) skipped this run; \
{} cached match(es) still valid.",
count_pending(cache)
);
}
}
}
}
fn print_pending_matches(cache: &CanonMatchesFile) {
for (id, entry) in &cache.entries {
for m in &entry.pending_matches {
if !matches!(m.disposition, Disposition::Open) {
continue;
}
let tier_label = match m.prefix_tier {
aristo_core::canon::PrefixTier::Aristos => "aristos: tier",
aristo_core::canon::PrefixTier::Kanon => "kanon: tier",
};
println!(
" {id} → {canon_id} {version} (conf {conf:.2}, {tier_label})",
canon_id = m.canon_id,
version = m.version,
conf = m.confidence,
);
if let Some(backed_by) = &m.backed_by {
println!(" backed by: {backed_by}");
}
println!(" review with `aristo critique --filter id={id}`");
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use aristo_core::canon::types::{
CanonMatch, CanonMatchResponse, PrefixTier, VerificationMetadata,
};
use aristo_core::index::{
AnnotationId, IndexEntry, IntentEntry, Meta, Sha256, Status, VerifyLevel, VerifyMethod,
};
use std::collections::BTreeMap;
fn aid(s: &str) -> AnnotationId {
AnnotationId::parse(s).unwrap()
}
fn sha(seed: char) -> Sha256 {
let body: String = std::iter::repeat_n(seed, 64).collect();
Sha256::parse(&format!("sha256:{body}")).unwrap()
}
fn intent_entry(text: &str, text_hash: Sha256, site: &str) -> IndexEntry {
IndexEntry::Intent(IntentEntry {
text: text.into(),
verify: VerifyLevel::Method(VerifyMethod::Neural),
status: Status::Unknown,
text_hash,
body_hash: sha('b'),
file: "src/lib.rs".into(),
site: site.into(),
covered_region: aristo_core::index::CoveredRegion::Function,
binding: aristo_core::index::BindingState::Local,
parent: None,
last_critiqued_at_text_hash: None,
last_critique_finding_count: None,
})
}
fn empty_index() -> IndexFile {
IndexFile {
meta: Meta {
schema_version: 1,
generated_by: None,
generated_at: None,
source_root: None,
},
entries: BTreeMap::new(),
}
}
#[test]
fn collect_batch_first_run_includes_every_intent() {
let mut index = empty_index();
index.entries.insert(
aid("alpha"),
intent_entry("text alpha", sha('a'), "fn alpha (line 1)"),
);
index.entries.insert(
aid("beta"),
intent_entry("text beta", sha('b'), "fn beta (line 2)"),
);
let cache = CanonMatchesFile::default();
let batch = collect_batch(&index, &cache, false);
assert_eq!(batch.len(), 2);
}
#[test]
fn collect_batch_skips_annotations_with_cache_hit() {
let mut index = empty_index();
let text_hash = sha('a');
index.entries.insert(
aid("alpha"),
intent_entry("text alpha", text_hash.clone(), "fn alpha (line 1)"),
);
index.entries.insert(
aid("beta"),
intent_entry("text beta", sha('b'), "fn beta (line 2)"),
);
let mut cache = CanonMatchesFile::default();
cache.entries.insert(
aid("alpha"),
CacheEntry {
last_match_text_hash: text_hash.as_str().into(),
canon_fetched_at: "2026-06-15T09:14:22Z".into(),
pending_matches: vec![],
accepted_matches: vec![],
rejected_matches: vec![],
},
);
let batch = collect_batch(&index, &cache, false);
assert_eq!(batch.len(), 1, "only beta should be batched");
assert_eq!(batch[0].id, aid("beta"));
}
#[test]
fn collect_batch_includes_drifted_annotation() {
let mut index = empty_index();
index.entries.insert(
aid("alpha"),
intent_entry("alpha v2", sha('a'), "fn alpha (line 1)"),
);
let mut cache = CanonMatchesFile::default();
cache.entries.insert(
aid("alpha"),
CacheEntry {
last_match_text_hash: "sha256:DIFFERENT".into(),
canon_fetched_at: "2026-06-14T09:14:22Z".into(),
pending_matches: vec![],
accepted_matches: vec![],
rejected_matches: vec![],
},
);
let batch = collect_batch(&index, &cache, false);
assert_eq!(batch.len(), 1);
}
#[test]
fn collect_batch_refresh_flag_invalidates_all_cache_hits() {
let mut index = empty_index();
let text_hash = sha('a');
index.entries.insert(
aid("alpha"),
intent_entry("text alpha", text_hash.clone(), "fn alpha (line 1)"),
);
let mut cache = CanonMatchesFile::default();
cache.entries.insert(
aid("alpha"),
CacheEntry {
last_match_text_hash: text_hash.as_str().into(),
canon_fetched_at: "2026-06-15T09:14:22Z".into(),
pending_matches: vec![],
accepted_matches: vec![],
rejected_matches: vec![],
},
);
assert!(collect_batch(&index, &cache, false).is_empty());
assert_eq!(collect_batch(&index, &cache, true).len(), 1);
}
#[test]
fn collect_batch_excludes_canon_bound_ids() {
let mut index = empty_index();
index.entries.insert(
aid("aristos:already_bound"),
intent_entry("x", sha('a'), "fn x (line 1)"),
);
index.entries.insert(
aid("kanon:also_bound"),
intent_entry("y", sha('b'), "fn y (line 2)"),
);
index.entries.insert(
aid("local_unbound"),
intent_entry("z", sha('c'), "fn z (line 3)"),
);
let cache = CanonMatchesFile::default();
let batch = collect_batch(&index, &cache, false);
assert_eq!(batch.len(), 1, "only local id is batched");
assert_eq!(batch[0].id, aid("local_unbound"));
}
#[test]
fn applies_to_extracts_first_token_from_site() {
assert_eq!(applies_to_from_site("fn foo (line 12)"), vec!["fn"]);
assert_eq!(applies_to_from_site("struct Bar (line 8)"), vec!["struct"]);
assert_eq!(
applies_to_from_site("method Baz::run (line 4)"),
vec!["method"]
);
}
#[test]
fn applies_to_empty_site_returns_empty() {
let v: Vec<String> = applies_to_from_site("");
assert!(v.is_empty());
}
#[test]
fn applies_to_for_fn_inside_impl_resolves_to_method() {
assert_eq!(
applies_to_from_site("impl Wal for WalFile::prepare_wal_finish (line 3985)"),
vec!["method"]
);
}
#[test]
fn applies_to_for_annotation_on_impl_block_itself_keeps_impl() {
assert_eq!(
applies_to_from_site("impl Wal for WalFile (line 3985)"),
vec!["impl"]
);
}
fn batch_entry(id: &str) -> BatchEntry {
BatchEntry {
id: aid(id),
text: format!("text {id}"),
text_hash: format!("sha256:{}", "a".repeat(64)),
applies_to: vec!["fn".into()],
}
}
fn canon_match_aristos() -> CanonMatch {
CanonMatch {
canon_id: "matched_canon".into(),
version: "v0.1.0".into(),
canonical_text: "matched text".into(),
confidence: 0.92,
scope: ":vanilla".into(),
prefix_tier: PrefixTier::Aristos,
backed_by: Some("specialized neural checker".into()),
linked: Some("arta_xyz".into()),
verification: VerificationMetadata {
coverage_level: "tight".into(),
test_binaries: vec![],
},
}
}
fn response_with(canon_matches_per_ann: Vec<Vec<CanonMatch>>) -> CanonMatchResponse {
CanonMatchResponse {
results: canon_matches_per_ann,
effective_scopes: vec![":vanilla".into()],
canon_version: "v0.2.0".into(),
matched_at: "2026-06-15T09:14:22Z".into(),
}
}
#[test]
fn merge_response_into_cache_writes_pending_for_each_match() {
let mut cache = CanonMatchesFile::default();
let batch = vec![batch_entry("alpha")];
let response = response_with(vec![vec![canon_match_aristos()]]);
let n = merge_response_into_cache(&mut cache, &batch, &response, "aristo stamp");
assert_eq!(n, 1);
let entry = &cache.entries[&aid("alpha")];
assert_eq!(entry.pending_matches.len(), 1);
assert_eq!(entry.pending_matches[0].canon_id, "matched_canon");
assert_eq!(entry.pending_matches[0].found_by, "aristo stamp");
assert!(matches!(
entry.pending_matches[0].disposition,
Disposition::Open
));
}
#[test]
fn merge_response_preserves_accepted_matches() {
let mut cache = CanonMatchesFile::default();
cache.entries.insert(
aid("alpha"),
CacheEntry {
last_match_text_hash: format!("sha256:{}", "a".repeat(64)),
canon_fetched_at: "2026-06-14T00:00:00Z".into(),
pending_matches: vec![],
accepted_matches: vec![aristo_core::canon::AcceptedMatch {
canon_id: "previously_accepted".into(),
version: "v0.1.0".into(),
canonical_text: "previously accepted text".into(),
canon_version: "v0.2.0".into(),
confidence: 0.95,
prefix_tier: PrefixTier::Aristos,
backed_by: Some("specialized neural checker".into()),
linked: None,
accepted_at: "2026-06-14T00:00:00Z".into(),
bound_at: "2026-06-14T00:00:00Z".into(),
}],
rejected_matches: vec![],
},
);
let batch = vec![batch_entry("alpha")];
let response = response_with(vec![vec![canon_match_aristos()]]);
merge_response_into_cache(&mut cache, &batch, &response, "aristo stamp");
let entry = &cache.entries[&aid("alpha")];
assert_eq!(entry.accepted_matches.len(), 1, "accepted must survive");
assert_eq!(entry.accepted_matches[0].canon_id, "previously_accepted");
assert_eq!(entry.pending_matches.len(), 1, "pending refreshed");
}
#[test]
fn merge_response_suppresses_rejected_canon_id_for_same_text_hash() {
let mut cache = CanonMatchesFile::default();
let text_hash = format!("sha256:{}", "a".repeat(64));
cache.entries.insert(
aid("alpha"),
CacheEntry {
last_match_text_hash: text_hash.clone(),
canon_fetched_at: "2026-06-14T00:00:00Z".into(),
pending_matches: vec![],
accepted_matches: vec![],
rejected_matches: vec![aristo_core::canon::RejectedMatch {
canon_id: "matched_canon".into(), version: "v0.1.0".into(),
text_hash: text_hash.clone(),
rejected_at: "2026-06-14T01:00:00Z".into(),
reason: None,
}],
},
);
let batch = vec![batch_entry("alpha")];
let response = response_with(vec![vec![canon_match_aristos()]]);
let n = merge_response_into_cache(&mut cache, &batch, &response, "aristo stamp");
assert_eq!(n, 0);
let entry = &cache.entries[&aid("alpha")];
assert!(entry.pending_matches.is_empty());
assert_eq!(entry.rejected_matches.len(), 1);
}
#[test]
fn count_pending_sums_across_annotations() {
let mut cache = CanonMatchesFile::default();
cache.entries.insert(
aid("alpha"),
CacheEntry {
last_match_text_hash: "x".into(),
canon_fetched_at: "y".into(),
pending_matches: vec![
PendingMatch {
canon_id: "c1".into(),
version: "v0.1.0".into(),
canonical_text: "x".into(),
canon_version: "v0.2.0".into(),
confidence: 0.9,
prefix_tier: PrefixTier::Aristos,
backed_by: None,
linked: Some("arta_x".into()),
disposition: Disposition::Open,
found_at: "t".into(),
found_by: "x".into(),
},
PendingMatch {
canon_id: "c2".into(),
version: "v0.1.0".into(),
canonical_text: "x".into(),
canon_version: "v0.2.0".into(),
confidence: 0.9,
prefix_tier: PrefixTier::Kanon,
backed_by: None,
linked: Some("arta_x".into()),
disposition: Disposition::Open,
found_at: "t".into(),
found_by: "x".into(),
},
],
accepted_matches: vec![],
rejected_matches: vec![],
},
);
assert_eq!(count_pending(&cache), 2);
}
}