Skip to main content

harn_vm/testbench/
annotations.rs

1//! Annotation sidecar for testbench tapes.
2//!
3//! An annotation file (`<tape>.annotations.jsonl`) is the durable form of
4//! human judgment over a recorded run. Each annotation references a tape
5//! event by its immutable [`TapeRecord::seq`] number and carries a
6//! structured kind + evidence + author so downstream pipelines (eval
7//! rubrics, friction roll-ups, crystallization candidate detection,
8//! replay-for-teaching) can read the same artifact.
9//!
10//! ## File layout
11//!
12//! ```text
13//! run.tape                    # the unified event tape
14//! run.tape.annotations.jsonl  # annotations sidecar (this format)
15//! ```
16//!
17//! Like the tape itself, the annotations file is line-delimited JSON. The
18//! first line is a header; every subsequent line is one annotation. Empty
19//! lines and lines starting with `#` are tolerated so external tools can
20//! emit comments without breaking interop.
21//!
22//! ## Schema
23//!
24//! - **Header** (one line, always first):
25//!
26//!   ```json
27//!   {
28//!     "type": "header",
29//!     "schema_version": 1,
30//!     "tape_path": "run.tape",
31//!     "tape_content_hash": "<blake3>",
32//!     "harn_version": "0.8.6"
33//!   }
34//!   ```
35//!
36//! - **Annotation** (zero or more, after the header):
37//!
38//!   ```json
39//!   {
40//!     "type": "annotation",
41//!     "id": "ann_001",
42//!     "event_id": 42,
43//!     "kind": "hypothesis",
44//!     "evidence": "checkout incident — see runbook",
45//!     "author": {"id": "alice", "kind": "human", "surface": "burin-code"},
46//!     "timestamp": "2026-05-10T17:00:00Z",
47//!     "hypothesis_status": "active"
48//!   }
49//!   ```
50//!
51//! Optional fields default to their `None` / empty form so older readers
52//! roll forward when newer fields appear. Unknown [`AnnotationKind`]
53//! values surface as [`AnnotationKind::Unknown`] so a validator can
54//! still report on the rest.
55//!
56//! [`TapeRecord::seq`]: super::tape::TapeRecord::seq
57
58use std::collections::{BTreeMap, BTreeSet};
59use std::path::Path;
60
61use serde::{Deserialize, Serialize};
62
63use super::tape::{EventTape, TapeRecord};
64use crate::orchestration::{
65    friction_kind_allowed, FrictionEvent, FrictionLink, FRICTION_SCHEMA_VERSION,
66};
67
68/// Format version of the annotations sidecar. Bump on any breaking
69/// change. Loaders refuse files with a higher version.
70pub const ANNOTATION_SCHEMA_VERSION: u32 = 1;
71
72/// Conventional sidecar suffix appended to a tape path. `run.tape`
73/// pairs with `run.tape.annotations.jsonl`.
74pub const ANNOTATIONS_SIDECAR_SUFFIX: &str = ".annotations.jsonl";
75
76/// Header record at the top of every annotations file. Captures the
77/// schema version and a back-reference to the tape so a validator can
78/// detect mismatched bundles.
79#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
80pub struct AnnotationHeader {
81    pub schema_version: u32,
82    /// Tape this annotation set targets. Stored as the path the
83    /// annotation author saw — the validator resolves it relative to the
84    /// annotations file.
85    #[serde(default)]
86    pub tape_path: Option<String>,
87    /// BLAKE3 hex digest of the tape's NDJSON body when the annotations
88    /// were written. The validator uses this to spot tape edits that
89    /// invalidate event_id references.
90    #[serde(default)]
91    pub tape_content_hash: Option<String>,
92    /// `harn-vm` `CARGO_PKG_VERSION` of the producer. Informational.
93    #[serde(default)]
94    pub harn_version: Option<String>,
95}
96
97impl AnnotationHeader {
98    pub fn current(tape_path: Option<String>, tape_content_hash: Option<String>) -> Self {
99        Self {
100            schema_version: ANNOTATION_SCHEMA_VERSION,
101            tape_path,
102            tape_content_hash,
103            harn_version: Some(env!("CARGO_PKG_VERSION").to_string()),
104        }
105    }
106}
107
108/// One annotation record on a tape event. Every field except `event_id`
109/// and `kind` is optional so authoring tools can emit minimal records.
110#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
111pub struct Annotation {
112    /// Stable id for the annotation. Defaults to `ann_{event_id}_{seq}`
113    /// when authors don't pick one — the only requirement is uniqueness
114    /// within a file.
115    #[serde(default)]
116    pub id: String,
117    /// Tape event the annotation targets. Matches [`TapeRecord::seq`].
118    pub event_id: u64,
119    pub kind: AnnotationKind,
120    /// Free-text evidence (markdown allowed). Authors may also pass
121    /// structured links via [`Annotation::links`].
122    #[serde(default)]
123    pub evidence: Option<String>,
124    /// Optional structured fix suggestion. Free-form JSON so a candidate
125    /// edit, a missing context-pack entry, or a tool-call patch can ride
126    /// on the same record without inventing per-kind shapes.
127    #[serde(default)]
128    pub suggested_fix: Option<serde_json::Value>,
129    #[serde(default)]
130    pub author: Option<AnnotationAuthor>,
131    /// RFC-3339 timestamp. Defaults to the moment the annotation was
132    /// authored; downstream pipelines treat missing values as "unknown".
133    #[serde(default)]
134    pub timestamp: Option<String>,
135    /// Span-style annotations cover a range of events. `start_event_id`
136    /// must equal `event_id`; the wrapping is intentional so single-event
137    /// annotations and span annotations share a row shape.
138    #[serde(default)]
139    pub span: Option<AnnotationSpan>,
140    /// Required when `kind == hypothesis`; ignored otherwise.
141    #[serde(default)]
142    pub hypothesis_status: Option<HypothesisStatus>,
143    /// Required when `kind == friction`; matches the friction-event
144    /// taxonomy in [`crate::orchestration::friction`] so a bag of
145    /// annotations + a bag of FrictionEvents are interchangeable.
146    #[serde(default)]
147    pub friction_kind: Option<String>,
148    /// Optional structured links to runbooks, dashboards, tickets, or
149    /// upstream incidents. Authors who only have prose put it in
150    /// `evidence`.
151    #[serde(default)]
152    pub links: Vec<AnnotationLink>,
153    /// Free-form metadata for downstream consumers. Kept open-ended on
154    /// purpose — the eval rubric, persona quality dashboard, and
155    /// crystallization detector each tag annotations differently.
156    #[serde(default)]
157    pub metadata: BTreeMap<String, serde_json::Value>,
158}
159
160/// Closed taxonomy of annotation kinds. New kinds must be added here so
161/// the validator can reason about them; older readers receive
162/// [`AnnotationKind::Unknown`] for kinds they don't recognize.
163#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
164#[serde(rename_all = "snake_case")]
165pub enum AnnotationKind {
166    /// "This event was correct." Eval rubric ground truth.
167    Correct,
168    /// "This event was wrong." Eval rubric ground truth.
169    Incorrect,
170    /// "Here is a better way to handle this turn." Pairs with a
171    /// `suggested_fix` payload.
172    Alternative,
173    /// Free-text commentary with no judgment baked in.
174    Note,
175    /// Anchor for replay-for-teaching playback. Presenter mode pauses on
176    /// markers and surfaces the evidence.
177    Marker,
178    /// Suppress a downstream consumer from acting on this event (e.g.
179    /// silence a known-flake in a dashboard).
180    Mute,
181    /// Human prior to verify. Carries a `hypothesis_status`.
182    Hypothesis,
183    /// Operational learning signal. Carries a `friction_kind` matching
184    /// the friction-event taxonomy.
185    Friction,
186    /// "This sequence is a candidate workflow to crystallize." Surfaced
187    /// directly to the candidate-detection pipeline.
188    CrystallizeHere,
189    /// Catch-all for kinds emitted by a newer producer.
190    #[serde(other)]
191    Unknown,
192}
193
194impl AnnotationKind {
195    pub fn as_str(&self) -> &'static str {
196        match self {
197            Self::Correct => "correct",
198            Self::Incorrect => "incorrect",
199            Self::Alternative => "alternative",
200            Self::Note => "note",
201            Self::Marker => "marker",
202            Self::Mute => "mute",
203            Self::Hypothesis => "hypothesis",
204            Self::Friction => "friction",
205            Self::CrystallizeHere => "crystallize_here",
206            Self::Unknown => "unknown",
207        }
208    }
209
210    /// Parse a CLI-style kind name. Accepts the snake_case spellings the
211    /// schema serializes to.
212    pub fn parse_cli(input: &str) -> Result<Self, String> {
213        match input {
214            "correct" => Ok(Self::Correct),
215            "incorrect" => Ok(Self::Incorrect),
216            "alternative" => Ok(Self::Alternative),
217            "note" => Ok(Self::Note),
218            "marker" => Ok(Self::Marker),
219            "mute" => Ok(Self::Mute),
220            "hypothesis" => Ok(Self::Hypothesis),
221            "friction" => Ok(Self::Friction),
222            "crystallize_here" => Ok(Self::CrystallizeHere),
223            other => Err(format!(
224                "unknown annotation kind '{other}' (expected one of correct, incorrect, alternative, note, marker, mute, hypothesis, friction, crystallize_here)"
225            )),
226        }
227    }
228}
229
230/// Lifecycle of a hypothesis-kind annotation. Mirrors the
231/// human-hypothesis loop in harn-cloud#54 / burin-code#277.
232#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
233#[serde(rename_all = "snake_case")]
234pub enum HypothesisStatus {
235    /// Author posed the hypothesis; no verification yet.
236    Active,
237    /// An agent is gathering evidence.
238    Verifying,
239    /// Evidence supports the hypothesis.
240    Confirmed,
241    /// Evidence rules the hypothesis out.
242    Disproven,
243    /// Hypothesis aged out without a resolution.
244    Stale,
245}
246
247/// Span over a contiguous range of events. `start_event_id` must equal
248/// the wrapping annotation's `event_id`; `end_event_id` must be greater
249/// than or equal to the start. The validator enforces both invariants.
250#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
251pub struct AnnotationSpan {
252    pub start_event_id: u64,
253    pub end_event_id: u64,
254}
255
256/// Provenance for an annotation. Surfaces the difference between a human
257/// who clicked through Burin Code and an agent that auto-flagged a turn
258/// during a self-eval.
259#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
260pub struct AnnotationAuthor {
261    /// Stable identifier — email, agent id, or service slug. The schema
262    /// does not police format; downstream consumers do.
263    #[serde(default)]
264    pub id: Option<String>,
265    /// Where the annotation came from.
266    pub kind: AuthorKind,
267    /// Surface that authored the record — `burin-code`, `harn-cloud`,
268    /// `cli`, etc. Free-form so new surfaces don't need a schema bump.
269    #[serde(default)]
270    pub surface: Option<String>,
271}
272
273#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
274#[serde(rename_all = "snake_case")]
275pub enum AuthorKind {
276    Human,
277    Agent,
278    System,
279}
280
281#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
282#[serde(default)]
283pub struct AnnotationLink {
284    pub label: Option<String>,
285    pub url: Option<String>,
286    /// Optional ticket/issue reference (e.g. `harn#1474`,
287    /// `INGEST-321`). Kept separate from `url` so the cloud surface can
288    /// resolve them uniformly.
289    pub reference: Option<String>,
290}
291
292/// Fully-loaded annotation tape — header plus every record. Built by
293/// [`AnnotationTape::load`] and consumed by the validator, the replay
294/// surface, and the export pipeline.
295#[derive(Debug, Clone)]
296pub struct AnnotationTape {
297    pub header: AnnotationHeader,
298    pub annotations: Vec<Annotation>,
299}
300
301impl AnnotationTape {
302    pub fn new(header: AnnotationHeader) -> Self {
303        Self {
304            header,
305            annotations: Vec::new(),
306        }
307    }
308
309    /// Persist the tape as `path.annotations.jsonl`-style NDJSON.
310    pub fn persist(&self, path: &Path) -> Result<(), String> {
311        if let Some(parent) = path.parent() {
312            if !parent.as_os_str().is_empty() {
313                std::fs::create_dir_all(parent)
314                    .map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
315            }
316        }
317        let mut body = String::new();
318        body.push_str(
319            &serde_json::to_string(&AnnotationLine::Header(self.header.clone()))
320                .map_err(|err| format!("serialize annotation header: {err}"))?,
321        );
322        body.push('\n');
323        for annotation in &self.annotations {
324            body.push_str(
325                &serde_json::to_string(&AnnotationLine::Annotation(annotation.clone()))
326                    .map_err(|err| format!("serialize annotation: {err}"))?,
327            );
328            body.push('\n');
329        }
330        std::fs::write(path, body).map_err(|err| format!("write {}: {err}", path.display()))
331    }
332
333    /// Read an annotation file. Empty lines and `#`-prefixed comment
334    /// lines are skipped so authors can group records visually.
335    pub fn load(path: &Path) -> Result<Self, String> {
336        let body = std::fs::read_to_string(path)
337            .map_err(|err| format!("read {}: {err}", path.display()))?;
338        let mut lines = body.lines().enumerate().filter(|(_, line)| {
339            let trimmed = line.trim();
340            !trimmed.is_empty() && !trimmed.starts_with('#')
341        });
342        let (header_idx, header_line) = lines.next().ok_or_else(|| {
343            format!(
344                "empty annotation file: {} (expected a header on line 1)",
345                path.display()
346            )
347        })?;
348        let parsed_header: AnnotationLine =
349            serde_json::from_str(header_line.trim()).map_err(|err| {
350                format!(
351                    "parse annotation header at line {} in {}: {err}",
352                    header_idx + 1,
353                    path.display()
354                )
355            })?;
356        let header = match parsed_header {
357            AnnotationLine::Header(header) => header,
358            AnnotationLine::Annotation(_) => {
359                return Err(format!(
360                    "annotation file {} is missing its header (first non-empty line is a record)",
361                    path.display()
362                ))
363            }
364        };
365        if header.schema_version > ANNOTATION_SCHEMA_VERSION {
366            return Err(format!(
367                "annotation file {} declares schema_version {} but this runtime supports up to {ANNOTATION_SCHEMA_VERSION}",
368                path.display(),
369                header.schema_version
370            ));
371        }
372
373        let mut annotations = Vec::new();
374        for (idx, line) in lines {
375            let parsed: AnnotationLine = serde_json::from_str(line.trim()).map_err(|err| {
376                format!(
377                    "parse annotation at line {} in {}: {err}",
378                    idx + 1,
379                    path.display()
380                )
381            })?;
382            match parsed {
383                AnnotationLine::Annotation(annotation) => annotations.push(annotation),
384                AnnotationLine::Header(_) => {
385                    return Err(format!(
386                        "annotation file {} contains a second header at line {}",
387                        path.display(),
388                        idx + 1
389                    ))
390                }
391            }
392        }
393        Ok(Self {
394            header,
395            annotations,
396        })
397    }
398
399    /// Filter annotations by kind. Used by the export pipeline.
400    pub fn filter_by_kind<'a>(
401        &'a self,
402        kind: AnnotationKind,
403    ) -> impl Iterator<Item = &'a Annotation> + 'a {
404        self.annotations
405            .iter()
406            .filter(move |annotation| annotation.kind == kind)
407    }
408
409    /// Convert friction-kind annotations into [`FrictionEvent`]s so they
410    /// flow into [`crate::orchestration::generate_context_pack_suggestions`]
411    /// and the friction roll-up dashboard alongside natively-emitted
412    /// events.
413    pub fn to_friction_events(&self) -> Vec<FrictionEvent> {
414        self.filter_by_kind(AnnotationKind::Friction)
415            .filter_map(|annotation| annotation_to_friction_event(annotation, &self.header))
416            .collect()
417    }
418
419    /// Anchor seqs flagged by `crystallize_here` annotations. The
420    /// crystallization candidate detector keys on these to bias toward
421    /// human-curated workflow-shaped sequences over inferred ones.
422    pub fn crystallize_anchors(&self) -> Vec<CrystallizeAnchor> {
423        self.filter_by_kind(AnnotationKind::CrystallizeHere)
424            .map(|annotation| CrystallizeAnchor {
425                event_id: annotation.event_id,
426                end_event_id: annotation
427                    .span
428                    .as_ref()
429                    .map(|span| span.end_event_id)
430                    .unwrap_or(annotation.event_id),
431                evidence: annotation.evidence.clone(),
432                author: annotation.author.clone(),
433                metadata: annotation.metadata.clone(),
434            })
435            .collect()
436    }
437}
438
439/// One event the human-judgment loop has flagged as worth crystallizing.
440/// The candidate detector consumes a `Vec<CrystallizeAnchor>` alongside
441/// inferred candidates so the two paths converge into one ranked list.
442#[derive(Debug, Clone, PartialEq, Eq)]
443pub struct CrystallizeAnchor {
444    pub event_id: u64,
445    pub end_event_id: u64,
446    pub evidence: Option<String>,
447    pub author: Option<AnnotationAuthor>,
448    pub metadata: BTreeMap<String, serde_json::Value>,
449}
450
451/// One on-disk line in the annotations file. Tagged-enum dispatch keeps
452/// the file homogeneous JSONL.
453///
454/// The size disparity between `Header` and `Annotation` is intentional:
455/// every JSONL file starts with exactly one `Header`, then any number of
456/// `Annotation` lines. Boxing `Annotation` would add a heap indirection
457/// to the hot deserialize loop to save a few bytes on the one-off header
458/// — an obvious lose. Surfaced by the host-target compile of `harn-vm`
459/// introduced when `harn-cli`'s build script gained `harn-vm` as a
460/// build-dep for the AOT bytecode embedding pass (G7 / harn#2300).
461#[allow(clippy::large_enum_variant)]
462#[derive(Debug, Clone, Serialize, Deserialize)]
463#[serde(tag = "type", rename_all = "snake_case")]
464enum AnnotationLine {
465    Header(AnnotationHeader),
466    Annotation(Annotation),
467}
468
469/// Validation problem detected by [`validate_against_tape`]. Each variant
470/// carries enough context for a CLI report or a CI annotation comment.
471#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
472#[serde(tag = "code", rename_all = "snake_case")]
473pub enum AnnotationProblem {
474    /// Schema-level error (missing required field, malformed enum). The
475    /// loader rejects most of these, but a few only surface once we
476    /// cross-reference with the tape (e.g. `event_id` out of range).
477    Schema {
478        annotation_id: String,
479        message: String,
480    },
481    /// `event_id` does not match any record in the tape.
482    UnknownEventId {
483        annotation_id: String,
484        event_id: u64,
485    },
486    /// `kind == hypothesis` but `hypothesis_status` is missing.
487    HypothesisStatusMissing { annotation_id: String },
488    /// `kind != hypothesis` but `hypothesis_status` is set.
489    HypothesisStatusUnexpected { annotation_id: String },
490    /// `kind == friction` but `friction_kind` is missing.
491    FrictionKindMissing { annotation_id: String },
492    /// `kind != friction` but `friction_kind` is set.
493    FrictionKindUnexpected { annotation_id: String },
494    /// `friction_kind` is set but does not match the
495    /// [`friction::FRICTION_KINDS`] taxonomy.
496    FrictionKindUnknown {
497        annotation_id: String,
498        friction_kind: String,
499    },
500    /// `span` shape is malformed (start != event_id, end < start, or end
501    /// out of range).
502    InvalidSpan {
503        annotation_id: String,
504        message: String,
505    },
506    /// Two annotations share the same `id`.
507    DuplicateId { annotation_id: String },
508    /// Header references a tape whose content hash does not match the
509    /// loaded tape. Indicates the tape was edited after annotations were
510    /// authored — references may be stale.
511    TapeDigestMismatch { expected: String, actual: String },
512    /// `AnnotationKind::Unknown` records were preserved on load but the
513    /// validator can't reason about them.
514    UnknownKind { annotation_id: String },
515}
516
517/// Result of validating annotations against a tape.
518#[derive(Debug, Clone, Default, Serialize, Deserialize)]
519pub struct AnnotationValidationReport {
520    pub annotations_checked: usize,
521    pub problems: Vec<AnnotationProblem>,
522    /// Counts per-kind so reporting can show a quick taxonomy summary.
523    pub kind_counts: BTreeMap<String, usize>,
524}
525
526impl AnnotationValidationReport {
527    pub fn is_ok(&self) -> bool {
528        self.problems.is_empty()
529    }
530}
531
532/// Validate an annotations file against its target tape. Returns a
533/// structured report so the CLI can emit either pretty-printed problems
534/// or a machine-readable JSON payload.
535pub fn validate_against_tape(
536    annotations: &AnnotationTape,
537    tape: &EventTape,
538) -> AnnotationValidationReport {
539    let event_seqs: BTreeSet<u64> = tape.records.iter().map(|record| record.seq).collect();
540    let max_seq = event_seqs.iter().max().copied();
541    let mut problems = Vec::new();
542    let mut seen_ids: BTreeSet<String> = BTreeSet::new();
543    let mut kind_counts: BTreeMap<String, usize> = BTreeMap::new();
544
545    for annotation in &annotations.annotations {
546        let id_for_report = if annotation.id.is_empty() {
547            format!("ann@event_{}", annotation.event_id)
548        } else {
549            annotation.id.clone()
550        };
551        *kind_counts
552            .entry(annotation.kind.as_str().to_string())
553            .or_insert(0) += 1;
554
555        if !annotation.id.is_empty() && !seen_ids.insert(annotation.id.clone()) {
556            problems.push(AnnotationProblem::DuplicateId {
557                annotation_id: id_for_report.clone(),
558            });
559        }
560
561        if !event_seqs.contains(&annotation.event_id) {
562            problems.push(AnnotationProblem::UnknownEventId {
563                annotation_id: id_for_report.clone(),
564                event_id: annotation.event_id,
565            });
566        }
567
568        match annotation.kind {
569            AnnotationKind::Hypothesis => {
570                if annotation.hypothesis_status.is_none() {
571                    problems.push(AnnotationProblem::HypothesisStatusMissing {
572                        annotation_id: id_for_report.clone(),
573                    });
574                }
575                if annotation.friction_kind.is_some() {
576                    problems.push(AnnotationProblem::FrictionKindUnexpected {
577                        annotation_id: id_for_report.clone(),
578                    });
579                }
580            }
581            AnnotationKind::Friction => {
582                if annotation.hypothesis_status.is_some() {
583                    problems.push(AnnotationProblem::HypothesisStatusUnexpected {
584                        annotation_id: id_for_report.clone(),
585                    });
586                }
587                match annotation.friction_kind.as_deref() {
588                    None => problems.push(AnnotationProblem::FrictionKindMissing {
589                        annotation_id: id_for_report.clone(),
590                    }),
591                    Some(kind) if !friction_kind_allowed(kind) => {
592                        problems.push(AnnotationProblem::FrictionKindUnknown {
593                            annotation_id: id_for_report.clone(),
594                            friction_kind: kind.to_string(),
595                        });
596                    }
597                    Some(_) => {}
598                }
599            }
600            AnnotationKind::Unknown => {
601                problems.push(AnnotationProblem::UnknownKind {
602                    annotation_id: id_for_report.clone(),
603                });
604            }
605            _ => {
606                if annotation.hypothesis_status.is_some() {
607                    problems.push(AnnotationProblem::HypothesisStatusUnexpected {
608                        annotation_id: id_for_report.clone(),
609                    });
610                }
611                if annotation.friction_kind.is_some() {
612                    problems.push(AnnotationProblem::FrictionKindUnexpected {
613                        annotation_id: id_for_report.clone(),
614                    });
615                }
616            }
617        }
618
619        if let Some(span) = annotation.span.as_ref() {
620            if span.start_event_id != annotation.event_id {
621                problems.push(AnnotationProblem::InvalidSpan {
622                    annotation_id: id_for_report.clone(),
623                    message: format!(
624                        "span.start_event_id ({}) must equal event_id ({})",
625                        span.start_event_id, annotation.event_id
626                    ),
627                });
628            }
629            if span.end_event_id < span.start_event_id {
630                problems.push(AnnotationProblem::InvalidSpan {
631                    annotation_id: id_for_report.clone(),
632                    message: format!(
633                        "span.end_event_id ({}) is before start_event_id ({})",
634                        span.end_event_id, span.start_event_id
635                    ),
636                });
637            }
638            if let Some(max) = max_seq {
639                if span.end_event_id > max {
640                    problems.push(AnnotationProblem::InvalidSpan {
641                        annotation_id: id_for_report.clone(),
642                        message: format!(
643                            "span.end_event_id ({}) is past the last tape event (seq={max})",
644                            span.end_event_id
645                        ),
646                    });
647                }
648            }
649        }
650    }
651
652    if let (Some(expected), Some(actual)) = (
653        annotations.header.tape_content_hash.as_deref(),
654        compute_tape_content_hash(tape).as_deref(),
655    ) {
656        if expected != actual {
657            problems.push(AnnotationProblem::TapeDigestMismatch {
658                expected: expected.to_string(),
659                actual: actual.to_string(),
660            });
661        }
662    }
663
664    AnnotationValidationReport {
665        annotations_checked: annotations.annotations.len(),
666        problems,
667        kind_counts,
668    }
669}
670
671/// BLAKE3 hex digest of a tape's logical content. Implemented by
672/// hashing the deterministically-serialized record stream so the digest
673/// is stable across runs that produce the same tape — and changes when
674/// any record content does.
675pub fn compute_tape_content_hash(tape: &EventTape) -> Option<String> {
676    let mut hasher = blake3::Hasher::new();
677    for record in &tape.records {
678        let line = serde_json::to_vec(record).ok()?;
679        hasher.update(&line);
680        hasher.update(b"\n");
681    }
682    Some(hasher.finalize().to_hex().to_string())
683}
684
685/// Convenience: pair a tape record with all annotations that reference
686/// its seq. Used by the replay surface and the export pipeline.
687pub fn annotations_for_record<'a>(
688    annotations: &'a AnnotationTape,
689    record: &TapeRecord,
690) -> Vec<&'a Annotation> {
691    annotations
692        .annotations
693        .iter()
694        .filter(|annotation| annotation.event_id == record.seq)
695        .collect()
696}
697
698/// Adapt a friction-kind annotation into a [`FrictionEvent`]. Returns
699/// `None` when the annotation is not a friction record or is missing the
700/// required `friction_kind`.
701pub fn annotation_to_friction_event(
702    annotation: &Annotation,
703    header: &AnnotationHeader,
704) -> Option<FrictionEvent> {
705    if annotation.kind != AnnotationKind::Friction {
706        return None;
707    }
708    let kind = annotation.friction_kind.clone()?;
709    if !friction_kind_allowed(&kind) {
710        return None;
711    }
712    let summary = annotation.evidence.clone().unwrap_or_else(|| {
713        format!(
714            "annotation {} on event {}",
715            annotation.id, annotation.event_id
716        )
717    });
718    let mut links = Vec::new();
719    for link in &annotation.links {
720        links.push(FrictionLink {
721            label: link.label.clone(),
722            url: link.url.clone(),
723            trace_id: link.reference.clone(),
724        });
725    }
726    Some(FrictionEvent {
727        schema_version: FRICTION_SCHEMA_VERSION,
728        id: if annotation.id.is_empty() {
729            format!("annotation_{}", annotation.event_id)
730        } else {
731            annotation.id.clone()
732        },
733        kind,
734        source: header.tape_path.clone(),
735        actor: annotation.author.as_ref().and_then(|a| a.id.clone()),
736        tenant_id: None,
737        task_id: None,
738        run_id: None,
739        workflow_id: None,
740        tool: None,
741        provider: None,
742        redacted_summary: summary,
743        estimated_cost_usd: None,
744        estimated_time_ms: None,
745        recurrence_hints: Vec::new(),
746        trace_id: None,
747        span_id: None,
748        links,
749        human_hypothesis: None,
750        metadata: annotation.metadata.clone(),
751        timestamp: annotation
752            .timestamp
753            .clone()
754            .unwrap_or_else(crate::orchestration::now_rfc3339),
755    })
756}
757
758#[cfg(test)]
759mod tests {
760    use super::*;
761    use crate::testbench::tape::{TapeHeader, TapePhase, TapeRecord, TapeRecordKind};
762    use tempfile::TempDir;
763
764    fn sample_tape() -> EventTape {
765        let mut tape = EventTape::new(TapeHeader::current(
766            Some(1_700_000_000_000),
767            Some("script.harn".into()),
768            Vec::new(),
769        ));
770        for seq in 0..3 {
771            tape.records.push(TapeRecord {
772                seq,
773                phase: TapePhase::UserScript,
774                virtual_time_ms: 0,
775                monotonic_ms: 0,
776                kind: TapeRecordKind::ClockSleep { duration_ms: 1 },
777            });
778        }
779        tape
780    }
781
782    fn note_annotation(id: &str, event_id: u64) -> Annotation {
783        Annotation {
784            id: id.into(),
785            event_id,
786            kind: AnnotationKind::Note,
787            evidence: Some("looked fine".into()),
788            suggested_fix: None,
789            author: Some(AnnotationAuthor {
790                id: Some("alice".into()),
791                kind: AuthorKind::Human,
792                surface: Some("burin-code".into()),
793            }),
794            timestamp: Some("2026-05-10T17:00:00Z".into()),
795            span: None,
796            hypothesis_status: None,
797            friction_kind: None,
798            links: Vec::new(),
799            metadata: BTreeMap::new(),
800        }
801    }
802
803    #[test]
804    fn round_trip_preserves_records() {
805        let temp = TempDir::new().unwrap();
806        let path = temp.path().join("run.tape.annotations.jsonl");
807        let mut tape = AnnotationTape::new(AnnotationHeader::current(
808            Some("run.tape".into()),
809            Some("deadbeef".into()),
810        ));
811        tape.annotations.push(note_annotation("ann-1", 0));
812        tape.annotations.push(Annotation {
813            kind: AnnotationKind::Hypothesis,
814            hypothesis_status: Some(HypothesisStatus::Active),
815            ..note_annotation("ann-2", 1)
816        });
817        tape.persist(&path).unwrap();
818
819        let loaded = AnnotationTape::load(&path).unwrap();
820        assert_eq!(loaded.header.schema_version, ANNOTATION_SCHEMA_VERSION);
821        assert_eq!(loaded.annotations.len(), 2);
822        assert_eq!(loaded.annotations[0].kind, AnnotationKind::Note);
823        assert_eq!(loaded.annotations[1].kind, AnnotationKind::Hypothesis);
824        assert_eq!(
825            loaded.annotations[1].hypothesis_status,
826            Some(HypothesisStatus::Active)
827        );
828    }
829
830    #[test]
831    fn validator_flags_unknown_event_id_and_missing_status() {
832        let tape = sample_tape();
833        let mut annotations =
834            AnnotationTape::new(AnnotationHeader::current(Some("run.tape".into()), None));
835        annotations.annotations.push(note_annotation("note", 0));
836        annotations.annotations.push(Annotation {
837            event_id: 99,
838            kind: AnnotationKind::Hypothesis,
839            hypothesis_status: None,
840            ..note_annotation("missing", 99)
841        });
842        annotations.annotations.push(Annotation {
843            kind: AnnotationKind::Friction,
844            friction_kind: Some("does_not_exist".into()),
845            ..note_annotation("bad-friction", 1)
846        });
847        annotations.annotations.push(Annotation {
848            kind: AnnotationKind::Friction,
849            friction_kind: None,
850            ..note_annotation("missing-friction", 2)
851        });
852
853        let report = validate_against_tape(&annotations, &tape);
854        assert_eq!(report.annotations_checked, 4);
855        assert!(report
856            .problems
857            .iter()
858            .any(|p| matches!(p, AnnotationProblem::UnknownEventId { event_id: 99, .. })));
859        assert!(report
860            .problems
861            .iter()
862            .any(|p| matches!(p, AnnotationProblem::HypothesisStatusMissing { .. })));
863        assert!(report
864            .problems
865            .iter()
866            .any(|p| matches!(p, AnnotationProblem::FrictionKindUnknown { .. })));
867        assert!(report
868            .problems
869            .iter()
870            .any(|p| matches!(p, AnnotationProblem::FrictionKindMissing { .. })));
871    }
872
873    #[test]
874    fn span_validation_enforces_invariants() {
875        let tape = sample_tape();
876        let mut annotations = AnnotationTape::new(AnnotationHeader::current(None, None));
877        annotations.annotations.push(Annotation {
878            span: Some(AnnotationSpan {
879                start_event_id: 5,
880                end_event_id: 10,
881            }),
882            ..note_annotation("bad-start", 1)
883        });
884        annotations.annotations.push(Annotation {
885            span: Some(AnnotationSpan {
886                start_event_id: 1,
887                end_event_id: 0,
888            }),
889            ..note_annotation("inverted", 1)
890        });
891        annotations.annotations.push(Annotation {
892            span: Some(AnnotationSpan {
893                start_event_id: 1,
894                end_event_id: 99,
895            }),
896            ..note_annotation("past-end", 1)
897        });
898
899        let report = validate_against_tape(&annotations, &tape);
900        // bad-start: start != event_id + end > max ⇒ 2 problems.
901        // inverted: end < start ⇒ 1 problem.
902        // past-end: end > max ⇒ 1 problem.
903        assert_eq!(
904            report
905                .problems
906                .iter()
907                .filter(|p| matches!(p, AnnotationProblem::InvalidSpan { .. }))
908                .count(),
909            4
910        );
911    }
912
913    #[test]
914    fn duplicate_ids_are_flagged() {
915        let tape = sample_tape();
916        let mut annotations = AnnotationTape::new(AnnotationHeader::current(None, None));
917        annotations.annotations.push(note_annotation("dupe", 0));
918        annotations.annotations.push(note_annotation("dupe", 1));
919        let report = validate_against_tape(&annotations, &tape);
920        assert!(report
921            .problems
922            .iter()
923            .any(|p| matches!(p, AnnotationProblem::DuplicateId { .. })));
924    }
925
926    #[test]
927    fn tape_digest_mismatch_flags_stale_annotations() {
928        let tape = sample_tape();
929        let mut annotations = AnnotationTape::new(AnnotationHeader::current(
930            Some("run.tape".into()),
931            Some("not-the-real-hash".into()),
932        ));
933        annotations.annotations.push(note_annotation("note", 0));
934        let report = validate_against_tape(&annotations, &tape);
935        assert!(report
936            .problems
937            .iter()
938            .any(|p| matches!(p, AnnotationProblem::TapeDigestMismatch { .. })));
939    }
940
941    #[test]
942    fn unknown_kind_round_trips_and_validator_flags() {
943        let temp = TempDir::new().unwrap();
944        let path = temp.path().join("future.annotations.jsonl");
945        let body = format!(
946            "{}\n{}\n",
947            serde_json::to_string(&AnnotationLine::Header(AnnotationHeader::current(
948                None, None
949            )))
950            .unwrap(),
951            r#"{"type":"annotation","id":"ann","event_id":0,"kind":"future_kind"}"#
952        );
953        std::fs::write(&path, body).unwrap();
954        let loaded = AnnotationTape::load(&path).unwrap();
955        assert_eq!(loaded.annotations.len(), 1);
956        assert_eq!(loaded.annotations[0].kind, AnnotationKind::Unknown);
957        let report = validate_against_tape(&loaded, &sample_tape());
958        assert!(report
959            .problems
960            .iter()
961            .any(|p| matches!(p, AnnotationProblem::UnknownKind { .. })));
962    }
963
964    #[test]
965    fn rejects_newer_schema_version() {
966        let temp = TempDir::new().unwrap();
967        let path = temp.path().join("future.annotations.jsonl");
968        std::fs::write(
969            &path,
970            r#"{"type":"header","schema_version":99}
971"#,
972        )
973        .unwrap();
974        let err = AnnotationTape::load(&path).unwrap_err();
975        assert!(err.contains("schema_version 99"), "{err}");
976    }
977
978    #[test]
979    fn comments_and_blank_lines_are_skipped() {
980        let temp = TempDir::new().unwrap();
981        let path = temp.path().join("commented.annotations.jsonl");
982        let header = serde_json::to_string(&AnnotationLine::Header(AnnotationHeader::current(
983            None, None,
984        )))
985        .unwrap();
986        let annotation =
987            serde_json::to_string(&AnnotationLine::Annotation(note_annotation("ann", 0))).unwrap();
988        let body = format!("# leading comment\n\n{header}\n\n# spacer\n{annotation}\n");
989        std::fs::write(&path, body).unwrap();
990        let loaded = AnnotationTape::load(&path).unwrap();
991        assert_eq!(loaded.annotations.len(), 1);
992    }
993
994    #[test]
995    fn friction_annotations_round_trip_through_friction_event() {
996        let mut tape =
997            AnnotationTape::new(AnnotationHeader::current(Some("run.tape".into()), None));
998        tape.annotations.push(Annotation {
999            kind: AnnotationKind::Friction,
1000            friction_kind: Some("repeated_query".into()),
1001            evidence: Some("Splunk lookup repeats every incident".into()),
1002            ..note_annotation("friction-1", 2)
1003        });
1004        let events = tape.to_friction_events();
1005        assert_eq!(events.len(), 1);
1006        assert_eq!(events[0].kind, "repeated_query");
1007        assert_eq!(events[0].schema_version, FRICTION_SCHEMA_VERSION);
1008        assert_eq!(
1009            events[0].redacted_summary,
1010            "Splunk lookup repeats every incident"
1011        );
1012    }
1013
1014    #[test]
1015    fn crystallize_anchors_surface_event_ids() {
1016        let mut tape = AnnotationTape::new(AnnotationHeader::current(None, None));
1017        tape.annotations.push(Annotation {
1018            kind: AnnotationKind::CrystallizeHere,
1019            span: Some(AnnotationSpan {
1020                start_event_id: 1,
1021                end_event_id: 4,
1022            }),
1023            ..note_annotation("crys-1", 1)
1024        });
1025        let anchors = tape.crystallize_anchors();
1026        assert_eq!(anchors.len(), 1);
1027        assert_eq!(anchors[0].event_id, 1);
1028        assert_eq!(anchors[0].end_event_id, 4);
1029    }
1030}