ev/migrate.rs
1//! `ev migrate` — backfill an existing decision history into the ledger.
2//!
3//! Four PURE, format-aware extractors turn a source substrate (`&str`) into a `Vec<MigrationRecord>`:
4//! a chat-room/git log (`## R<N>` records), the `to-human` RESOLVED/FLAG markdown blocks (the
5//! authority substrate), a `decisions-immutable` §N document, and an `escalation` log (the SAME
6//! RESOLVED/FLAG reader, path-parameterized). The extractors parse **rulings + structured
7//! rejected-roads only** — they NEVER NLP a free-text reason into a ground (`grounds_are_never_
8//! synthesized`): a road becomes a ground iff the source declares it structurally (a `rejected:`
9//! token), otherwise the record carries zero grounds and stays an honest capture.
10//!
11//! The command driver then runs an IDEMPOTENT backfill loop (deterministic source_key sort →
12//! prospective-parent compute_id → ticks_dir pre-check → skip-if-present) on top of the shared
13//! `capture::append`, plus a `--reconcile` join and a `--bind-check` harvest.
14
15use crate::canonical::compute_id;
16use crate::capture::{harvested_test_check, Decision};
17use crate::store::Store;
18use crate::tick::{Ground, Tick};
19use std::collections::HashMap;
20use std::path::Path;
21
22/// One extracted, not-yet-appended decision from a source substrate. `source_key` is the stable,
23/// deterministic dedup/sort key (e.g. `R2289`, `#555`, `§3`) used to order the backfill and to
24/// reconcile against the store; `observe` carries that key as a durable token so reconcile can read
25/// it back from the HASHED payload, not from the events log. Grounds are ONLY the structurally
26/// declared rejected-roads — never synthesized from prose.
27#[derive(Debug, Clone, PartialEq)]
28pub struct MigrationRecord {
29 pub source_key: String,
30 pub decision: String,
31 pub observe: String,
32 pub blame: Option<String>,
33 pub grounds: Vec<Ground>,
34 // The bookkeeping tags a producer may declare. The four built-in extractors leave them at the
35 // legacy defaults (authority None, jurisdiction None — so the `--jurisdiction-map` fills it —,
36 // source_ref = the source_key token, provenance None); the canonical reader populates them from the
37 // wire record so an imported ruling lands with its true authority / jurisdiction / provenance.
38 pub authority: Option<String>,
39 pub jurisdiction: Option<String>,
40 pub source_ref: Option<serde_json::Value>,
41 pub provenance: Option<String>,
42}
43
44/// A `#<n>` / `R<n>` provenance token (issue or round id), leading-char + all-digits. Mirrors the
45/// `subject_refs` vocabulary in capture.rs but returns the FIRST `R<n>`/`#<n>` as a stable key.
46fn first_round_or_issue_token(text: &str) -> Option<String> {
47 text.split(|c: char| !(c.is_ascii_alphanumeric() || c == '#'))
48 .find(|tok| {
49 let rest = tok
50 .strip_prefix('#')
51 .or_else(|| tok.strip_prefix('R'))
52 .or_else(|| tok.strip_prefix('r'));
53 matches!(rest, Some(d) if !d.is_empty() && d.bytes().all(|b| b.is_ascii_digit()))
54 })
55 .map(|t| t.to_string())
56}
57
58/// Parse the structurally-declared rejected-roads out of a block's lines. A road is declared ONLY by
59/// an explicit `rejected: <option>: <why>` (or `reject <option>: <why>`) line — never inferred from
60/// prose. Returns one `rejected:<option>` ground per declared road, in source order. A block with no
61/// such line yields zero grounds (the honesty contract: no synthesis).
62fn structured_rejected_roads(block: &str) -> Vec<Ground> {
63 let mut out = Vec::new();
64 for line in block.lines() {
65 let l = line.trim_start_matches(['-', '*', ' ', '\t']).trim();
66 let body = l
67 .strip_prefix("rejected:")
68 .or_else(|| l.strip_prefix("rejected "))
69 .or_else(|| l.strip_prefix("reject:"))
70 .or_else(|| l.strip_prefix("reject "));
71 if let Some(rest) = body {
72 if let Some((opt, why)) = rest.split_once(':') {
73 let (opt, why) = (opt.trim(), why.trim());
74 if !opt.is_empty() && !why.is_empty() {
75 out.push(Ground {
76 claim: why.to_string(),
77 supports: format!("rejected:{opt}"),
78 check: None,
79 });
80 }
81 }
82 }
83 }
84 out
85}
86
87/// Build one MigrationRecord from a parsed (key, decision) header + its block body: observe carries the
88/// source_key as durable provenance, grounds are the structurally-declared rejected-roads only (never
89/// synthesized), blame is left for the backfill's `--blame` fallback. Shared by all three block extractors.
90fn flush_record(header: &Option<(String, String)>, body: &str, out: &mut Vec<MigrationRecord>) {
91 if let Some((key, decision)) = header {
92 out.push(MigrationRecord {
93 source_key: key.clone(),
94 decision: decision.clone(),
95 observe: key.clone(),
96 blame: None,
97 grounds: structured_rejected_roads(body),
98 // Legacy defaults: no inline authority/provenance, source_ref = the source_key token, and
99 // jurisdiction left None so the `--jurisdiction-map` remains the sole tagger on this path.
100 authority: None,
101 jurisdiction: None,
102 source_ref: Some(serde_json::Value::String(key.clone())),
103 provenance: None,
104 });
105 }
106}
107
108/// The store-side durable key for a tick: the dedup key derived from its opaque `source_ref` if
109/// present (a string verbatim, or an object's deterministic JSON — see `source_ref_key`), else the
110/// first round/`#<n>` token in the hashed `observe` — never the non-hashed events log. Shared by the
111/// idempotency index + reconcile, so the two never disagree on key precedence.
112fn store_key(raw: &serde_json::Value) -> Option<String> {
113 raw.get("source_ref")
114 .map(crate::tick::source_ref_key)
115 .or_else(|| {
116 raw.get("observe")
117 .and_then(|x| x.as_str())
118 .and_then(first_round_or_issue_token)
119 })
120}
121
122/// The closed key set of a Canonical Decision Intake line. The wire envelope is STRICT — unlike a
123/// stored tick (which tolerates an unknown non-hashed key as forward-compat), an external producer's
124/// line with an unknown key is a hard failure, so a mis-piped file cannot smuggle a field past ingest.
125const CANONICAL_KEYS: &[&str] = &[
126 "kind",
127 "decision",
128 "observe",
129 "grounds",
130 "blame",
131 "authority",
132 "jurisdiction",
133 "source_ref",
134 "provenance",
135];
136
137/// Parse a **Canonical Decision Intake** stream (JSONL) into `MigrationRecord`s — the format-neutral
138/// intake both an adopter's legacy adapter and a future live runner emit. This IS the trust boundary:
139/// the producer supplies STRUCTURE, and ev RE-VALIDATES it here through the very read-path validators
140/// (`ground_from_value`, the vocab checks) that guard an on-disk tick — never a parallel serde decode
141/// that could trust an unchecked `Ground`. Per line: skip blank / `#`-comment lines; require the fixed
142/// `kind` discriminator and reject any unknown envelope key loudly; require a non-empty `decision` and
143/// a `grounds` array (which may be empty — the honest zero-grounds capture); validate every declared
144/// tag against its closed vocabulary. The durable dedup/sort key mirrors `store_key`: the opaque
145/// `source_ref`'s derived key, else the first round/`#issue` token in `observe`.
146pub fn canonical_records(text: &str) -> Result<Vec<MigrationRecord>, String> {
147 use crate::capture::validate_authority;
148 use crate::tick::{
149 ground_from_value, only_keys, req_str, source_ref_key, validate_jurisdiction,
150 validate_provenance, validate_source_ref,
151 };
152 let mut out = Vec::new();
153 for (i, raw_line) in text.lines().enumerate() {
154 let n = i + 1;
155 let line = raw_line.trim();
156 if line.is_empty() || line.starts_with('#') {
157 continue;
158 }
159 let v: serde_json::Value =
160 serde_json::from_str(line).map_err(|e| format!("canonical line {n}: not JSON: {e}"))?;
161 let obj = v
162 .as_object()
163 .ok_or_else(|| format!("canonical line {n}: not a JSON object"))?;
164 only_keys(obj, CANONICAL_KEYS, &format!("canonical line {n}"))?;
165 match obj.get("kind").and_then(|x| x.as_str()) {
166 Some("ev-decision-intake") => {}
167 other => {
168 return Err(format!(
169 "canonical line {n}: not an ev-decision-intake record (kind={other:?})"
170 ))
171 }
172 }
173 let decision = req_str(obj, "decision").map_err(|e| format!("canonical line {n}: {e}"))?;
174 if decision.trim().is_empty() {
175 return Err(format!("canonical line {n}: decision is empty"));
176 }
177 let observe = obj
178 .get("observe")
179 .and_then(|x| x.as_str())
180 .unwrap_or("")
181 .to_string();
182 let grounds_v = obj
183 .get("grounds")
184 .and_then(|x| x.as_array())
185 .ok_or_else(|| format!("canonical line {n}: grounds missing/not array"))?;
186 let mut grounds = Vec::new();
187 for gv in grounds_v {
188 grounds.push(ground_from_value(gv).map_err(|e| format!("canonical line {n}: {e}"))?);
189 }
190 let blame = obj
191 .get("blame")
192 .and_then(|x| x.as_str())
193 .map(str::to_string);
194 // One validated optional string tag: absent → None; present → vocab-checked, with the line
195 // number threaded into the error. (source_ref is a raw Value, so it stays its own arm below.)
196 let opt_tag = |key: &str,
197 validate: fn(&str) -> Result<(), String>|
198 -> Result<Option<String>, String> {
199 match obj.get(key).and_then(|x| x.as_str()) {
200 None => Ok(None),
201 Some(v) => {
202 validate(v).map_err(|e| format!("canonical line {n}: {e}"))?;
203 Ok(Some(v.to_string()))
204 }
205 }
206 };
207 let authority = opt_tag("authority", validate_authority)?;
208 let jurisdiction = opt_tag("jurisdiction", validate_jurisdiction)?;
209 let provenance = opt_tag("provenance", validate_provenance)?;
210 let source_ref = match obj.get("source_ref") {
211 None => None,
212 Some(rv) => {
213 validate_source_ref(rv).map_err(|e| format!("canonical line {n}: {e}"))?;
214 Some(rv.clone())
215 }
216 };
217 // The dedup/sort key mirrors store_key's precedence: the source_ref's derived key, else the
218 // first round/`#issue` token in observe (so the canonical path and the store agree on keying).
219 let source_key = source_ref
220 .as_ref()
221 .map(source_ref_key)
222 .or_else(|| first_round_or_issue_token(&observe))
223 .unwrap_or_default();
224 out.push(MigrationRecord {
225 source_key,
226 decision,
227 observe,
228 blame,
229 grounds,
230 authority,
231 jurisdiction,
232 source_ref,
233 provenance,
234 });
235 }
236 Ok(out)
237}
238
239/// Extractor 1 — **gitlog / chat-room**: each `## R<N> …` header is one decision; the header text
240/// after the round token (and an optional `— ` em-dash separator) is the decision; any structurally
241/// declared rejected-road line in that record's body becomes a ground. The `R<N>`/`#<n>` token is the
242/// source_key and is carried into observe as a durable provenance token. Reasons are NEVER NLP'd.
243pub fn extract_gitlog(text: &str) -> Vec<MigrationRecord> {
244 let mut records = Vec::new();
245 let mut header: Option<(String, String)> = None; // (source_key, decision)
246 let mut body = String::new();
247 for line in text.lines() {
248 if let Some(rest) = line.strip_prefix("## ") {
249 flush_record(&header, &body, &mut records);
250 body.clear();
251 let key = first_round_or_issue_token(rest);
252 // The decision text is the header with the leading round token stripped + em-dash trimmed.
253 let decision = match key.as_deref() {
254 Some(k) => rest
255 .split_once(k)
256 .map(|x| x.1)
257 .unwrap_or(rest)
258 .trim_start_matches([' ', '—', '-', ':'])
259 .trim()
260 .to_string(),
261 None => rest.trim().to_string(),
262 };
263 header = key.map(|k| {
264 (
265 k,
266 if decision.is_empty() {
267 rest.trim().into()
268 } else {
269 decision
270 },
271 )
272 });
273 } else {
274 body.push_str(line);
275 body.push('\n');
276 }
277 }
278 flush_record(&header, &body, &mut records);
279 records
280}
281
282/// The shared RESOLVED / FLAG block reader (the authority substrate). A `### RESOLVED <key>: <decision>`
283/// or `### FLAG <key>: <decision>` header opens a block; the block's body is scanned for structured
284/// rejected-roads only. RESOLVED marks a user-ruled decision; FLAG marks an open one — both are
285/// captured (the ruling state is provenance, not a reason to drop the record). PATH-PARAMETERIZED by
286/// the caller: `to-human` and `escalation` are the SAME reader over different files (no hardcoded
287/// layout). Returns records in source order.
288fn read_resolved_flag_blocks(text: &str) -> Vec<MigrationRecord> {
289 let mut records = Vec::new();
290 let mut header: Option<(String, String)> = None;
291 let mut body = String::new();
292 for line in text.lines() {
293 let stripped = line
294 .trim_start_matches(['#', ' '])
295 .strip_prefix("RESOLVED")
296 .or_else(|| line.trim_start_matches(['#', ' ']).strip_prefix("FLAG"));
297 if let Some(rest) = stripped {
298 flush_record(&header, &body, &mut records);
299 body.clear();
300 let rest = rest.trim();
301 // `<key>: <decision>` — the key is the leading token before the first colon.
302 if let Some((key, decision)) = rest.split_once(':') {
303 let key = key.trim();
304 let source_key = first_round_or_issue_token(key).unwrap_or_else(|| key.to_string());
305 header = Some((source_key, decision.trim().to_string()));
306 } else {
307 let source_key =
308 first_round_or_issue_token(rest).unwrap_or_else(|| rest.to_string());
309 header = Some((source_key, rest.to_string()));
310 }
311 } else {
312 body.push_str(line);
313 body.push('\n');
314 }
315 }
316 flush_record(&header, &body, &mut records);
317 records
318}
319
320/// Extractor 2 — **to-human**: the RESOLVED/FLAG markdown blocks (the authority substrate).
321pub fn extract_to_human(text: &str) -> Vec<MigrationRecord> {
322 read_resolved_flag_blocks(text)
323}
324
325/// Extractor 4 — **escalation**: the SAME RESOLVED/FLAG reader, path-parameterized — escalation is
326/// just the reader over a different file, with NO hardcoded layout of its own.
327pub fn extract_escalation(text: &str) -> Vec<MigrationRecord> {
328 read_resolved_flag_blocks(text)
329}
330
331/// Extractor 3 — **decisions-immutable**: a document split on `## N.` / `## §N` section headers, one
332/// decision per numbered section. The section number is the source_key; the header text after the
333/// number is the decision; structured rejected-roads in the section body become grounds.
334pub fn extract_decisions_immutable(text: &str) -> Vec<MigrationRecord> {
335 let mut records = Vec::new();
336 let mut header: Option<(String, String)> = None;
337 let mut body = String::new();
338 for line in text.lines() {
339 if let Some(rest) = line.strip_prefix("## ") {
340 // A numbered section header: `## 3. <decision>` or `## §3 <decision>`.
341 let rest = rest.trim();
342 let digits: String = rest
343 .trim_start_matches('§')
344 .chars()
345 .take_while(|c| c.is_ascii_digit())
346 .collect();
347 if !digits.is_empty() {
348 flush_record(&header, &body, &mut records);
349 body.clear();
350 let decision = rest
351 .trim_start_matches('§')
352 .trim_start_matches(|c: char| c.is_ascii_digit())
353 .trim_start_matches(['.', ' ', ':', '—', '-'])
354 .trim()
355 .to_string();
356 header = Some((format!("§{digits}"), decision));
357 continue;
358 }
359 }
360 body.push_str(line);
361 body.push('\n');
362 }
363 flush_record(&header, &body, &mut records);
364 records
365}
366
367/// The outcome of one backfill pass (idempotent): how many records were imported, skipped (already
368/// present by content-addressed id), re-linked (a back-dated mid-chain insert that re-parented), and
369/// how many were source-only gaps that could not be appended (e.g. a source lacking authors with no
370/// `--blame` fallback). Rendered by the command layer.
371#[derive(Debug, Default, PartialEq)]
372pub struct BackfillSummary {
373 pub imported: usize,
374 pub skipped: usize,
375 pub relinked: usize,
376 pub source_only_gaps: usize,
377}
378
379/// Map the store's existing decisions to their durable source key → (id, parent_id). The key is the
380/// derived dedup key of the non-hashed `source_ref` if present, else the first round/#N token in the
381/// hashed `observe` — never the non-hashed events log. The idempotency + re-link index for a backfill.
382fn store_key_index(
383 store: &Store,
384) -> Result<std::collections::HashMap<String, (String, String)>, String> {
385 let files = store
386 .read_all()
387 .map_err(|e| format!("reading store: {e}"))?;
388 let mut idx = std::collections::HashMap::new();
389 for (name, raw) in &files {
390 let key = store_key(raw);
391 let parent = raw
392 .get("parent_id")
393 .and_then(|x| x.as_str())
394 .unwrap_or("")
395 .to_string();
396 if let Some(k) = key {
397 idx.insert(k, (name.clone(), parent));
398 }
399 }
400 Ok(idx)
401}
402
403/// Run the idempotent backfill of `records` into the store at `repo`. Deterministic order: records
404/// are sorted by `source_key` first so a re-run replays the same chain. Idempotency is keyed on the
405/// durable `source_key` (the non-hashed `source_ref`'s derived key, or a token in the hashed `observe`): a record
406/// whose key is already in the store is SKIPPED — chain-position-independent, so a re-run over a
407/// now-non-empty store writes nothing. The chain is kept by threading the PROSPECTIVE parent (the
408/// id we just wrote/found) instead of re-reading the live HEAD each step, so the lineage stays
409/// stable across re-runs. A skipped record whose stored parent differs from where it would now land
410/// is a back-dated mid-chain insert and is reported as re-linked. `blame_fallback` supplies the
411/// author for a record carrying none; a record with neither is a source-only gap (R5 stays intact —
412/// we never invent an author). `jurisdiction_map` (source_key → A/B/C/D bucket) tags each imported
413/// decision: a record whose key is in the map carries that jurisdiction, one absent imports untagged
414/// (None) — so the map is purely additive (an empty map ⇒ every record None, the prior behavior).
415/// jurisdiction is NON-hashed, so tagging never moves a tick id (idempotency holds across re-runs).
416/// `--dry-run` reports the would-import count but writes nothing.
417pub fn backfill(
418 repo: &Path,
419 mut records: Vec<MigrationRecord>,
420 blame_fallback: Option<&str>,
421 jurisdiction_map: &HashMap<String, String>,
422 dry_run: bool,
423) -> Result<BackfillSummary, String> {
424 records.sort_by(|a, b| a.source_key.cmp(&b.source_key));
425 let store = Store::at(repo);
426 if !store.exists() {
427 return Err("no .evolving/ store here — run `ev init` first".into());
428 }
429 let existing = store_key_index(&store)?;
430 // The prospective parent threads through the loop so the chain stays coherent across this pass:
431 // for a brand-new store it begins at the live HEAD; as records resolve it advances to each id.
432 // For relink detection we compare a found record's STORED parent against where this sorted pass
433 // would place it (`prospective_parent`) — equal ⇒ the chain is intact (a clean re-run reports
434 // 0); different ⇒ the chain was re-linked around it (a back-dated mid-chain insert).
435 let head = store
436 .read_head()
437 .map_err(|e| format!("reading HEAD: {e}"))?;
438 // Seed the prospective parent: if the FIRST sorted record is already the genesis (stored
439 // parent ""), the pass replays from genesis; otherwise it extends the current HEAD.
440 let first_is_stored_genesis = records
441 .first()
442 .and_then(|r| existing.get(&r.source_key))
443 .map(|(_, p)| p.is_empty())
444 .unwrap_or(false);
445 let mut prospective_parent = if first_is_stored_genesis {
446 String::new()
447 } else {
448 head
449 };
450 let mut summary = BackfillSummary::default();
451 for r in records {
452 // Idempotency PRE-CHECK on the durable source_key (chain-position-independent).
453 if let Some((existing_id, existing_parent)) = existing.get(&r.source_key) {
454 // A back-dated mid-chain insert: present, but its stored parent differs from where this
455 // pass would now place it — the chain was re-linked around it. Reported, never rewritten.
456 if *existing_parent != prospective_parent {
457 summary.relinked += 1;
458 }
459 // Keep the chain coherent for any later records in this same pass.
460 prospective_parent = existing_id.clone();
461 summary.skipped += 1;
462 continue;
463 }
464 let blame = match r.blame.as_deref().or(blame_fallback) {
465 Some(b) if !b.trim().is_empty() => b.trim().to_string(),
466 _ => {
467 // R5 stays intact: no author, no fabrication. Surface the gap; never invent a human.
468 summary.source_only_gaps += 1;
469 continue;
470 }
471 };
472 // Per-record bookkeeping, applied identically on the probe and the real path (all non-hashed,
473 // so the probe id stays byte-identical to the append id). An inline jurisdiction on the record
474 // WINS over the `--jurisdiction-map`; the map fills only a record that declares none; a record
475 // that declares a DIFFERENT bucket than the map is a hard error (two sources of truth disagree).
476 let jurisdiction = match (
477 r.jurisdiction.as_deref(),
478 jurisdiction_map.get(&r.source_key),
479 ) {
480 (Some(inline), Some(mapped)) if inline != mapped => {
481 return Err(format!(
482 "source {:?}: inline jurisdiction {inline:?} conflicts with the --jurisdiction-map entry {mapped:?}",
483 r.source_key
484 ));
485 }
486 (Some(inline), _) => Some(inline.to_string()),
487 (None, mapped) => mapped.cloned(),
488 };
489 let authority = r.authority.clone();
490 let source_ref = r.source_ref.clone();
491 // The migrate verb backfills HISTORY: a record with no declared provenance is stamped
492 // `imported`. An explicit value (a live runner emitting `agent-proposed` / `human-now`) wins.
493 // `ev decide` / `ev guard` never reach here, so fresh authorship is never stamped imported.
494 let provenance = r
495 .provenance
496 .clone()
497 .or_else(|| Some("imported".to_string()));
498 // Ingest-boundary structural gates — the SAME refusals `ev verify` enforces at rest, applied at
499 // the door so a malformed record never lands. A C/D (detect-only) decision may carry no runnable
500 // Test check (one shared predicate with verify, so they cannot drift):
501 if crate::tick::detect_only_carries_test(jurisdiction.as_deref(), &r.grounds) {
502 return Err(format!(
503 "source {:?}: a {} jurisdiction (detect-only) decision cannot carry a runnable test check",
504 r.source_key,
505 jurisdiction.as_deref().unwrap_or("")
506 ));
507 }
508 // And a harvested check (a Test with no counter-test) is allowed ONLY for imported history — a
509 // fresh `agent-proposed` binding must prove falsifiability with a counter-test, exactly as decide.
510 for g in &r.grounds {
511 if let Some(crate::tick::Check::Test {
512 counter_test: None, ..
513 }) = &g.check
514 {
515 if provenance.as_deref() != Some("imported") {
516 return Err(format!(
517 "source {:?}: a harvested test check (no counter-test) is allowed only for imported history, not {}",
518 r.source_key,
519 provenance.as_deref().unwrap_or("human-now")
520 ));
521 }
522 }
523 }
524 if dry_run {
525 // The id this record WOULD take at the prospective parent (no write). held_since is
526 // non-hashed, so this matches the id `append` computes on a real run — only the real
527 // path needs a write, so the probe lives here, not on the hot import path.
528 let probe = Tick {
529 id: String::new(),
530 parent_id: prospective_parent.clone(),
531 observe: r.observe.clone(),
532 decision: r.decision.clone(),
533 grounds: r.grounds.clone(),
534 status: "live".into(),
535 held_since: String::new(),
536 blame: blame.clone(),
537 authority: authority.clone(),
538 jurisdiction: jurisdiction.clone(),
539 source_ref: source_ref.clone(),
540 provenance: provenance.clone(),
541 };
542 prospective_parent = compute_id(&probe);
543 summary.imported += 1;
544 continue;
545 }
546 let written = crate::capture::append(
547 repo,
548 Decision {
549 observe: r.observe,
550 decision: r.decision,
551 grounds: r.grounds,
552 blame,
553 authority,
554 jurisdiction,
555 source_ref,
556 provenance,
557 },
558 )?;
559 prospective_parent = written.id;
560 summary.imported += 1;
561 }
562 Ok(summary)
563}
564
565/// A reconcile bucket count: how many source rulings are IN BOTH the source and the store, how many
566/// are SOURCE-ONLY (the capture gap — a ruling the source has that the ledger never captured), how
567/// many are STORE-ONLY (in the ledger, absent from this source), and how many store ticks could not
568/// be keyed at all (no round token in their hashed observe). Keys come from the non-hashed `source_ref`
569/// or the hashed `observe`, never from events.jsonl, so they are durable.
570#[derive(Debug, Default, PartialEq)]
571pub struct ReconcileReport {
572 pub in_both: usize,
573 pub source_only: usize,
574 pub store_only: usize,
575 pub un_keyable: usize,
576}
577
578/// Reconcile a source's extracted records against the store. The store-side key is read from each
579/// the derived key of its non-hashed `source_ref` if present, else the first round/#N token in the
580/// hashed `observe` — so the join is durable (NOT dependent on the events log). A source key with no store
581/// match is a SOURCE-ONLY gap (the capture gap to surface); a store key with no source match is
582/// STORE-ONLY; a store tick with no derivable key is counted separately as un-keyable.
583pub fn reconcile(
584 repo: &Path,
585 source_records: &[MigrationRecord],
586) -> Result<ReconcileReport, String> {
587 let store = Store::at(repo);
588 if !store.exists() {
589 return Err("no .evolving/ store here — run `ev init` first".into());
590 }
591 let files = store
592 .read_all()
593 .map_err(|e| format!("reading store: {e}"))?;
594 let mut store_keys: std::collections::HashSet<String> = std::collections::HashSet::new();
595 let mut un_keyable = 0usize;
596 for (_name, raw) in &files {
597 let key = store_key(raw);
598 match key {
599 Some(k) => {
600 store_keys.insert(k);
601 }
602 None => un_keyable += 1,
603 }
604 }
605 let source_keys: std::collections::HashSet<String> = source_records
606 .iter()
607 .map(|r| r.source_key.clone())
608 .collect();
609 let mut report = ReconcileReport {
610 un_keyable,
611 ..Default::default()
612 };
613 for k in &source_keys {
614 if store_keys.contains(k) {
615 report.in_both += 1;
616 } else {
617 report.source_only += 1;
618 }
619 }
620 report.store_only = store_keys
621 .iter()
622 .filter(|k| !source_keys.contains(*k))
623 .count();
624 Ok(report)
625}
626
627/// The `--bind-check` harvest: build a harvested `Check::Test` (counter_test None, full liveness) for
628/// the given selector, reusing the Task-5 migrate-only constructor. This is the SAME constructor the
629/// harvested-binding path uses — no second half-harvest gate. The caller attaches it to a ground.
630pub fn bind_check(
631 selector: String,
632 verified_at_sha: String,
633 platforms: Vec<String>,
634 triggered_by: Vec<String>,
635 surfaces: Vec<String>,
636) -> Result<crate::tick::Check, String> {
637 harvested_test_check(selector, verified_at_sha, platforms, triggered_by, surfaces)
638}
639
640#[cfg(test)]
641mod tests {
642 use super::*;
643
644 #[test]
645 fn extract_gitlog_should_yield_one_record_per_round_header_when_given_a_chat_room_log() {
646 // given: a chat-room log with two `## R<N>` decision records, one carrying a rejected road
647 let text = "\
648## R2289 QA — restore-safety counter DB-backed
649- rejected: Redis: would add a new infra dependency
650## R2290 Dev — ship the cross-pod drain
651some prose nobody parses for grounds
652";
653
654 // when: the gitlog extractor reads it
655 let recs = extract_gitlog(text);
656
657 // then: two records, keyed by their round token, the first carrying the structured road
658 assert_eq!(recs.len(), 2);
659 assert_eq!(recs[0].source_key, "R2289");
660 assert_eq!(recs[0].decision, "QA — restore-safety counter DB-backed");
661 assert_eq!(recs[0].grounds.len(), 1);
662 assert_eq!(recs[0].grounds[0].supports, "rejected:Redis");
663 assert_eq!(recs[1].source_key, "R2290");
664 assert!(recs[0].observe.contains("R2289"));
665 }
666
667 #[test]
668 fn extract_to_human_should_read_a_resolved_block_when_given_the_authority_substrate() {
669 // given: a to-human doc with a RESOLVED ruling and a FLAG (open) one
670 let text = "\
671### RESOLVED R555: restore-safety counter DB-backed; reject Redis
672- rejected: Redis: a new infra dependency
673### FLAG R600: multi-pod relax policy still open
674";
675
676 // when: the to-human extractor reads it
677 let recs = extract_to_human(text);
678
679 // then: both blocks are captured; the RESOLVED one carries its structured road
680 assert_eq!(recs.len(), 2);
681 assert_eq!(recs[0].source_key, "R555");
682 assert_eq!(
683 recs[0].decision,
684 "restore-safety counter DB-backed; reject Redis"
685 );
686 assert_eq!(recs[0].grounds.len(), 1);
687 assert_eq!(recs[1].source_key, "R600");
688 }
689
690 #[test]
691 fn extract_escalation_should_reuse_the_resolved_flag_reader_when_given_an_escalation_log() {
692 // given: an escalation log in the SAME RESOLVED/FLAG shape (path-parameterized reader)
693 let text = "### FLAG #1194: re-milestoned without sign-off\n";
694
695 // when: the escalation extractor reads it
696 let recs = extract_escalation(text);
697
698 // then: it is read identically to to-human (no hardcoded layout of its own)
699 assert_eq!(recs.len(), 1);
700 assert_eq!(recs[0].source_key, "#1194");
701 assert_eq!(recs[0].decision, "re-milestoned without sign-off");
702 }
703
704 #[test]
705 fn extract_decisions_immutable_should_split_on_numbered_sections_when_given_a_doc() {
706 // given: a decisions-immutable doc split into numbered sections
707 let text = "\
708## 1. freeze the retrieval schema for v2
709- rejected: pgvector: would lock our schema
710## 2. restore-safety counter DB-backed
711";
712
713 // when: the decisions-immutable extractor reads it
714 let recs = extract_decisions_immutable(text);
715
716 // then: one record per section, keyed by §N, the first carrying its structured road
717 assert_eq!(recs.len(), 2);
718 assert_eq!(recs[0].source_key, "§1");
719 assert_eq!(recs[0].decision, "freeze the retrieval schema for v2");
720 assert_eq!(recs[0].grounds.len(), 1);
721 assert_eq!(recs[1].source_key, "§2");
722 }
723
724 #[test]
725 fn grounds_are_never_synthesized_when_a_block_has_no_structured_rejected_road() {
726 // given: a record whose body is pure prose mentioning a rejected option WITHOUT the
727 // structured `rejected:<opt>: <why>` token — an NLP'able sentence we must NOT mine
728 let text = "\
729## R2289 we considered Redis but rejected it because it adds infra
730this paragraph explains at length why redis was rejected, in prose
731";
732
733 // when: the gitlog extractor reads it
734 let recs = extract_gitlog(text);
735
736 // then: the record exists but carries ZERO grounds — reasons are never NLP'd into grounds
737 assert_eq!(recs.len(), 1);
738 assert!(
739 recs[0].grounds.is_empty(),
740 "a prose reason must NEVER become a ground (no synthesis)"
741 );
742 }
743
744 // --- canonical intake reader (the trust boundary) ---
745
746 fn canonical_line(extra: &str) -> String {
747 // a minimal valid ev-decision-intake line, with room to splice in extra/override fields
748 format!(
749 "{{\"kind\":\"ev-decision-intake\",\"decision\":\"no Redis\",\"grounds\":[]{extra}}}"
750 )
751 }
752
753 #[test]
754 fn canonical_reader_should_parse_a_full_ruling_record_when_given_a_valid_line() {
755 // given: a full ev-decision-intake ruling carrying every declared tag
756 let text = "{\"kind\":\"ev-decision-intake\",\"decision\":\"rate-limit at the edge\",\
757\"observe\":\"round R1043\",\"grounds\":[{\"claim\":\"edge sees every request\",\"supports\":\"chosen\"},\
758{\"claim\":\"app tier double-counts\",\"supports\":\"rejected:app-tier\"}],\"blame\":\"Wang Yu\",\
759\"authority\":\"user-ruled\",\"jurisdiction\":\"C\",\"source_ref\":\"R1043\",\"provenance\":\"imported\"}";
760
761 // when: the canonical reader parses it
762 let recs = canonical_records(text).expect("valid record");
763
764 // then: every field maps onto the record, grounds re-parsed through the read-path validator
765 assert_eq!(recs.len(), 1);
766 let r = &recs[0];
767 assert_eq!(r.decision, "rate-limit at the edge");
768 assert_eq!(r.grounds.len(), 2);
769 assert_eq!(r.grounds[1].supports, "rejected:app-tier");
770 assert_eq!(r.blame.as_deref(), Some("Wang Yu"));
771 assert_eq!(r.authority.as_deref(), Some("user-ruled"));
772 assert_eq!(r.jurisdiction.as_deref(), Some("C"));
773 assert_eq!(r.source_ref, Some(serde_json::json!("R1043")));
774 assert_eq!(r.source_key, "R1043");
775 assert_eq!(r.provenance.as_deref(), Some("imported"));
776 }
777
778 #[test]
779 fn canonical_reader_should_reject_a_line_whose_kind_is_not_ev_decision_intake() {
780 // given: a JSON line with the wrong envelope kind (a mis-piped non-intake file)
781 let text = "{\"kind\":\"something-else\",\"decision\":\"x\",\"grounds\":[]}";
782
783 // when: the canonical reader parses it
784 let result = canonical_records(text);
785
786 // then: it loud-fails (the wire envelope is strict, not forward-compat-tolerant)
787 assert!(result.is_err());
788 }
789
790 #[test]
791 fn canonical_reader_should_reject_an_unknown_envelope_key() {
792 // given: an otherwise-valid line carrying a key outside the closed envelope set
793 let text = canonical_line(",\"emoji\":\"✅\"");
794
795 // when: the canonical reader parses it
796 let result = canonical_records(&text);
797
798 // then: the unknown key is rejected at the door (no format bleeds into core)
799 assert!(result.is_err());
800 }
801
802 #[test]
803 fn canonical_reader_should_reject_a_malformed_ground_via_ground_from_value() {
804 // given: a line whose ground has an invalid supports (not chosen / rejected:<opt>)
805 let text = "{\"kind\":\"ev-decision-intake\",\"decision\":\"x\",\
806\"grounds\":[{\"claim\":\"c\",\"supports\":\"maybe\"}]}";
807
808 // when: the canonical reader parses it
809 let result = canonical_records(text);
810
811 // then: it fails through the SAME read-path validator a stored tick uses (the trust boundary)
812 assert!(result.is_err());
813 }
814
815 #[test]
816 fn canonical_reader_should_import_zero_grounds_when_grounds_is_empty() {
817 // given: a valid line with an empty grounds array (the honest zero-grounds capture, e.g. a FLAG)
818 let text = canonical_line("");
819
820 // when: the canonical reader parses it
821 let recs = canonical_records(&text).expect("zero-grounds is first-class");
822
823 // then: the record imports with no grounds (never synthesized)
824 assert_eq!(recs.len(), 1);
825 assert!(recs[0].grounds.is_empty());
826 }
827
828 #[test]
829 fn canonical_reader_should_take_source_ref_verbatim_without_resniffing_tokens() {
830 // given: a line whose source_ref is an opaque key and whose observe carries a DIFFERENT token
831 let text = canonical_line(",\"observe\":\"see R2289\",\"source_ref\":\"ticket-42\"");
832
833 // when: the canonical reader parses it
834 let recs = canonical_records(&text).expect("valid");
835
836 // then: source_ref and the dedup key are the verbatim source_ref — never re-sniffed from observe
837 assert_eq!(recs[0].source_ref, Some(serde_json::json!("ticket-42")));
838 assert_eq!(recs[0].source_key, "ticket-42");
839 }
840
841 #[test]
842 fn canonical_reader_should_key_a_structured_source_ref_by_its_deterministic_json() {
843 // given: a line whose source_ref is a STRUCTURED object (richer than a string)
844 let text = canonical_line(",\"source_ref\":{\"round\":\"R1\",\"sprint\":\"S7\"}");
845
846 // when: the canonical reader parses it
847 let recs = canonical_records(&text).expect("valid");
848
849 // then: the object is carried opaquely and the dedup key is its deterministic (sorted) JSON
850 assert_eq!(
851 recs[0].source_ref,
852 Some(serde_json::json!({"round": "R1", "sprint": "S7"}))
853 );
854 assert_eq!(recs[0].source_key, "{\"round\":\"R1\",\"sprint\":\"S7\"}");
855 }
856
857 #[test]
858 fn canonical_reader_should_skip_blank_and_comment_lines() {
859 // given: a stream padded with a blank line and a #-comment around one record
860 let text = format!("\n# a comment\n{}\n\n", canonical_line(""));
861
862 // when: the canonical reader parses it
863 let recs = canonical_records(&text).expect("valid");
864
865 // then: only the real record is read (blank/comment lines are skipped, not errors)
866 assert_eq!(recs.len(), 1);
867 }
868
869 #[test]
870 fn canonical_reader_should_reject_an_out_of_vocab_provenance() {
871 // given: a line whose provenance is outside the closed vocabulary
872 let text = canonical_line(",\"provenance\":\"self-asserted\"");
873
874 // when: the canonical reader parses it
875 let result = canonical_records(&text);
876
877 // then: it fails (provenance is vocab-validated at the boundary, like jurisdiction/authority)
878 assert!(result.is_err());
879 }
880}