use std::collections::{BTreeMap, BTreeSet};
use std::path::Path;
use serde::{Deserialize, Serialize};
use super::tape::{EventTape, TapeRecord};
use crate::orchestration::{
friction_kind_allowed, FrictionEvent, FrictionLink, FRICTION_SCHEMA_VERSION,
};
pub const ANNOTATION_SCHEMA_VERSION: u32 = 1;
pub const ANNOTATIONS_SIDECAR_SUFFIX: &str = ".annotations.jsonl";
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AnnotationHeader {
pub schema_version: u32,
#[serde(default)]
pub tape_path: Option<String>,
#[serde(default)]
pub tape_content_hash: Option<String>,
#[serde(default)]
pub harn_version: Option<String>,
}
impl AnnotationHeader {
pub fn current(tape_path: Option<String>, tape_content_hash: Option<String>) -> Self {
Self {
schema_version: ANNOTATION_SCHEMA_VERSION,
tape_path,
tape_content_hash,
harn_version: Some(env!("CARGO_PKG_VERSION").to_string()),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Annotation {
#[serde(default)]
pub id: String,
pub event_id: u64,
pub kind: AnnotationKind,
#[serde(default)]
pub evidence: Option<String>,
#[serde(default)]
pub suggested_fix: Option<serde_json::Value>,
#[serde(default)]
pub author: Option<AnnotationAuthor>,
#[serde(default)]
pub timestamp: Option<String>,
#[serde(default)]
pub span: Option<AnnotationSpan>,
#[serde(default)]
pub hypothesis_status: Option<HypothesisStatus>,
#[serde(default)]
pub friction_kind: Option<String>,
#[serde(default)]
pub links: Vec<AnnotationLink>,
#[serde(default)]
pub metadata: BTreeMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[serde(rename_all = "snake_case")]
pub enum AnnotationKind {
Correct,
Incorrect,
Alternative,
Note,
Marker,
Mute,
Hypothesis,
Friction,
CrystallizeHere,
#[serde(other)]
Unknown,
}
impl AnnotationKind {
pub fn as_str(&self) -> &'static str {
match self {
Self::Correct => "correct",
Self::Incorrect => "incorrect",
Self::Alternative => "alternative",
Self::Note => "note",
Self::Marker => "marker",
Self::Mute => "mute",
Self::Hypothesis => "hypothesis",
Self::Friction => "friction",
Self::CrystallizeHere => "crystallize_here",
Self::Unknown => "unknown",
}
}
pub fn parse_cli(input: &str) -> Result<Self, String> {
match input {
"correct" => Ok(Self::Correct),
"incorrect" => Ok(Self::Incorrect),
"alternative" => Ok(Self::Alternative),
"note" => Ok(Self::Note),
"marker" => Ok(Self::Marker),
"mute" => Ok(Self::Mute),
"hypothesis" => Ok(Self::Hypothesis),
"friction" => Ok(Self::Friction),
"crystallize_here" => Ok(Self::CrystallizeHere),
other => Err(format!(
"unknown annotation kind '{other}' (expected one of correct, incorrect, alternative, note, marker, mute, hypothesis, friction, crystallize_here)"
)),
}
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum HypothesisStatus {
Active,
Verifying,
Confirmed,
Disproven,
Stale,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AnnotationSpan {
pub start_event_id: u64,
pub end_event_id: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AnnotationAuthor {
#[serde(default)]
pub id: Option<String>,
pub kind: AuthorKind,
#[serde(default)]
pub surface: Option<String>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum AuthorKind {
Human,
Agent,
System,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct AnnotationLink {
pub label: Option<String>,
pub url: Option<String>,
pub reference: Option<String>,
}
#[derive(Debug, Clone)]
pub struct AnnotationTape {
pub header: AnnotationHeader,
pub annotations: Vec<Annotation>,
}
impl AnnotationTape {
pub fn new(header: AnnotationHeader) -> Self {
Self {
header,
annotations: Vec::new(),
}
}
pub fn persist(&self, path: &Path) -> Result<(), String> {
if let Some(parent) = path.parent() {
if !parent.as_os_str().is_empty() {
std::fs::create_dir_all(parent)
.map_err(|err| format!("mkdir {}: {err}", parent.display()))?;
}
}
let mut body = String::new();
body.push_str(
&serde_json::to_string(&AnnotationLine::Header(self.header.clone()))
.map_err(|err| format!("serialize annotation header: {err}"))?,
);
body.push('\n');
for annotation in &self.annotations {
body.push_str(
&serde_json::to_string(&AnnotationLine::Annotation(annotation.clone()))
.map_err(|err| format!("serialize annotation: {err}"))?,
);
body.push('\n');
}
std::fs::write(path, body).map_err(|err| format!("write {}: {err}", path.display()))
}
pub fn load(path: &Path) -> Result<Self, String> {
let body = std::fs::read_to_string(path)
.map_err(|err| format!("read {}: {err}", path.display()))?;
let mut lines = body.lines().enumerate().filter(|(_, line)| {
let trimmed = line.trim();
!trimmed.is_empty() && !trimmed.starts_with('#')
});
let (header_idx, header_line) = lines.next().ok_or_else(|| {
format!(
"empty annotation file: {} (expected a header on line 1)",
path.display()
)
})?;
let parsed_header: AnnotationLine =
serde_json::from_str(header_line.trim()).map_err(|err| {
format!(
"parse annotation header at line {} in {}: {err}",
header_idx + 1,
path.display()
)
})?;
let header = match parsed_header {
AnnotationLine::Header(header) => header,
AnnotationLine::Annotation(_) => {
return Err(format!(
"annotation file {} is missing its header (first non-empty line is a record)",
path.display()
))
}
};
if header.schema_version > ANNOTATION_SCHEMA_VERSION {
return Err(format!(
"annotation file {} declares schema_version {} but this runtime supports up to {ANNOTATION_SCHEMA_VERSION}",
path.display(),
header.schema_version
));
}
let mut annotations = Vec::new();
for (idx, line) in lines {
let parsed: AnnotationLine = serde_json::from_str(line.trim()).map_err(|err| {
format!(
"parse annotation at line {} in {}: {err}",
idx + 1,
path.display()
)
})?;
match parsed {
AnnotationLine::Annotation(annotation) => annotations.push(annotation),
AnnotationLine::Header(_) => {
return Err(format!(
"annotation file {} contains a second header at line {}",
path.display(),
idx + 1
))
}
}
}
Ok(Self {
header,
annotations,
})
}
pub fn filter_by_kind<'a>(
&'a self,
kind: AnnotationKind,
) -> impl Iterator<Item = &'a Annotation> + 'a {
self.annotations
.iter()
.filter(move |annotation| annotation.kind == kind)
}
pub fn to_friction_events(&self) -> Vec<FrictionEvent> {
self.filter_by_kind(AnnotationKind::Friction)
.filter_map(|annotation| annotation_to_friction_event(annotation, &self.header))
.collect()
}
pub fn crystallize_anchors(&self) -> Vec<CrystallizeAnchor> {
self.filter_by_kind(AnnotationKind::CrystallizeHere)
.map(|annotation| CrystallizeAnchor {
event_id: annotation.event_id,
end_event_id: annotation
.span
.as_ref()
.map(|span| span.end_event_id)
.unwrap_or(annotation.event_id),
evidence: annotation.evidence.clone(),
author: annotation.author.clone(),
metadata: annotation.metadata.clone(),
})
.collect()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CrystallizeAnchor {
pub event_id: u64,
pub end_event_id: u64,
pub evidence: Option<String>,
pub author: Option<AnnotationAuthor>,
pub metadata: BTreeMap<String, serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum AnnotationLine {
Header(AnnotationHeader),
Annotation(Annotation),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "code", rename_all = "snake_case")]
pub enum AnnotationProblem {
Schema {
annotation_id: String,
message: String,
},
UnknownEventId {
annotation_id: String,
event_id: u64,
},
HypothesisStatusMissing { annotation_id: String },
HypothesisStatusUnexpected { annotation_id: String },
FrictionKindMissing { annotation_id: String },
FrictionKindUnexpected { annotation_id: String },
FrictionKindUnknown {
annotation_id: String,
friction_kind: String,
},
InvalidSpan {
annotation_id: String,
message: String,
},
DuplicateId { annotation_id: String },
TapeDigestMismatch { expected: String, actual: String },
UnknownKind { annotation_id: String },
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct AnnotationValidationReport {
pub annotations_checked: usize,
pub problems: Vec<AnnotationProblem>,
pub kind_counts: BTreeMap<String, usize>,
}
impl AnnotationValidationReport {
pub fn is_ok(&self) -> bool {
self.problems.is_empty()
}
}
pub fn validate_against_tape(
annotations: &AnnotationTape,
tape: &EventTape,
) -> AnnotationValidationReport {
let event_seqs: BTreeSet<u64> = tape.records.iter().map(|record| record.seq).collect();
let max_seq = event_seqs.iter().max().copied();
let mut problems = Vec::new();
let mut seen_ids: BTreeSet<String> = BTreeSet::new();
let mut kind_counts: BTreeMap<String, usize> = BTreeMap::new();
for annotation in &annotations.annotations {
let id_for_report = if annotation.id.is_empty() {
format!("ann@event_{}", annotation.event_id)
} else {
annotation.id.clone()
};
*kind_counts
.entry(annotation.kind.as_str().to_string())
.or_insert(0) += 1;
if !annotation.id.is_empty() && !seen_ids.insert(annotation.id.clone()) {
problems.push(AnnotationProblem::DuplicateId {
annotation_id: id_for_report.clone(),
});
}
if !event_seqs.contains(&annotation.event_id) {
problems.push(AnnotationProblem::UnknownEventId {
annotation_id: id_for_report.clone(),
event_id: annotation.event_id,
});
}
match annotation.kind {
AnnotationKind::Hypothesis => {
if annotation.hypothesis_status.is_none() {
problems.push(AnnotationProblem::HypothesisStatusMissing {
annotation_id: id_for_report.clone(),
});
}
if annotation.friction_kind.is_some() {
problems.push(AnnotationProblem::FrictionKindUnexpected {
annotation_id: id_for_report.clone(),
});
}
}
AnnotationKind::Friction => {
if annotation.hypothesis_status.is_some() {
problems.push(AnnotationProblem::HypothesisStatusUnexpected {
annotation_id: id_for_report.clone(),
});
}
match annotation.friction_kind.as_deref() {
None => problems.push(AnnotationProblem::FrictionKindMissing {
annotation_id: id_for_report.clone(),
}),
Some(kind) if !friction_kind_allowed(kind) => {
problems.push(AnnotationProblem::FrictionKindUnknown {
annotation_id: id_for_report.clone(),
friction_kind: kind.to_string(),
})
}
Some(_) => {}
}
}
AnnotationKind::Unknown => {
problems.push(AnnotationProblem::UnknownKind {
annotation_id: id_for_report.clone(),
});
}
_ => {
if annotation.hypothesis_status.is_some() {
problems.push(AnnotationProblem::HypothesisStatusUnexpected {
annotation_id: id_for_report.clone(),
});
}
if annotation.friction_kind.is_some() {
problems.push(AnnotationProblem::FrictionKindUnexpected {
annotation_id: id_for_report.clone(),
});
}
}
}
if let Some(span) = annotation.span.as_ref() {
if span.start_event_id != annotation.event_id {
problems.push(AnnotationProblem::InvalidSpan {
annotation_id: id_for_report.clone(),
message: format!(
"span.start_event_id ({}) must equal event_id ({})",
span.start_event_id, annotation.event_id
),
});
}
if span.end_event_id < span.start_event_id {
problems.push(AnnotationProblem::InvalidSpan {
annotation_id: id_for_report.clone(),
message: format!(
"span.end_event_id ({}) is before start_event_id ({})",
span.end_event_id, span.start_event_id
),
});
}
if let Some(max) = max_seq {
if span.end_event_id > max {
problems.push(AnnotationProblem::InvalidSpan {
annotation_id: id_for_report.clone(),
message: format!(
"span.end_event_id ({}) is past the last tape event (seq={max})",
span.end_event_id
),
});
}
}
}
}
if let (Some(expected), Some(actual)) = (
annotations.header.tape_content_hash.as_deref(),
compute_tape_content_hash(tape).as_deref(),
) {
if expected != actual {
problems.push(AnnotationProblem::TapeDigestMismatch {
expected: expected.to_string(),
actual: actual.to_string(),
});
}
}
AnnotationValidationReport {
annotations_checked: annotations.annotations.len(),
problems,
kind_counts,
}
}
pub fn compute_tape_content_hash(tape: &EventTape) -> Option<String> {
let mut hasher = blake3::Hasher::new();
for record in &tape.records {
let line = serde_json::to_vec(record).ok()?;
hasher.update(&line);
hasher.update(b"\n");
}
Some(hasher.finalize().to_hex().to_string())
}
pub fn annotations_for_record<'a>(
annotations: &'a AnnotationTape,
record: &TapeRecord,
) -> Vec<&'a Annotation> {
annotations
.annotations
.iter()
.filter(|annotation| annotation.event_id == record.seq)
.collect()
}
pub fn annotation_to_friction_event(
annotation: &Annotation,
header: &AnnotationHeader,
) -> Option<FrictionEvent> {
if annotation.kind != AnnotationKind::Friction {
return None;
}
let kind = annotation.friction_kind.clone()?;
if !friction_kind_allowed(&kind) {
return None;
}
let summary = annotation.evidence.clone().unwrap_or_else(|| {
format!(
"annotation {} on event {}",
annotation.id, annotation.event_id
)
});
let mut links = Vec::new();
for link in &annotation.links {
links.push(FrictionLink {
label: link.label.clone(),
url: link.url.clone(),
trace_id: link.reference.clone(),
});
}
Some(FrictionEvent {
schema_version: FRICTION_SCHEMA_VERSION,
id: if annotation.id.is_empty() {
format!("annotation_{}", annotation.event_id)
} else {
annotation.id.clone()
},
kind,
source: header.tape_path.clone(),
actor: annotation.author.as_ref().and_then(|a| a.id.clone()),
tenant_id: None,
task_id: None,
run_id: None,
workflow_id: None,
tool: None,
provider: None,
redacted_summary: summary,
estimated_cost_usd: None,
estimated_time_ms: None,
recurrence_hints: Vec::new(),
trace_id: None,
span_id: None,
links,
human_hypothesis: None,
metadata: annotation.metadata.clone(),
timestamp: annotation
.timestamp
.clone()
.unwrap_or_else(crate::orchestration::now_rfc3339),
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::testbench::tape::{TapeHeader, TapeRecord, TapeRecordKind};
use tempfile::TempDir;
fn sample_tape() -> EventTape {
let mut tape = EventTape::new(TapeHeader::current(
Some(1_700_000_000_000),
Some("script.harn".into()),
Vec::new(),
));
for seq in 0..3 {
tape.records.push(TapeRecord {
seq,
virtual_time_ms: 0,
monotonic_ms: 0,
kind: TapeRecordKind::ClockSleep { duration_ms: 1 },
});
}
tape
}
fn note_annotation(id: &str, event_id: u64) -> Annotation {
Annotation {
id: id.into(),
event_id,
kind: AnnotationKind::Note,
evidence: Some("looked fine".into()),
suggested_fix: None,
author: Some(AnnotationAuthor {
id: Some("alice".into()),
kind: AuthorKind::Human,
surface: Some("burin-code".into()),
}),
timestamp: Some("2026-05-10T17:00:00Z".into()),
span: None,
hypothesis_status: None,
friction_kind: None,
links: Vec::new(),
metadata: BTreeMap::new(),
}
}
#[test]
fn round_trip_preserves_records() {
let temp = TempDir::new().unwrap();
let path = temp.path().join("run.tape.annotations.jsonl");
let mut tape = AnnotationTape::new(AnnotationHeader::current(
Some("run.tape".into()),
Some("deadbeef".into()),
));
tape.annotations.push(note_annotation("ann-1", 0));
tape.annotations.push(Annotation {
kind: AnnotationKind::Hypothesis,
hypothesis_status: Some(HypothesisStatus::Active),
..note_annotation("ann-2", 1)
});
tape.persist(&path).unwrap();
let loaded = AnnotationTape::load(&path).unwrap();
assert_eq!(loaded.header.schema_version, ANNOTATION_SCHEMA_VERSION);
assert_eq!(loaded.annotations.len(), 2);
assert_eq!(loaded.annotations[0].kind, AnnotationKind::Note);
assert_eq!(loaded.annotations[1].kind, AnnotationKind::Hypothesis);
assert_eq!(
loaded.annotations[1].hypothesis_status,
Some(HypothesisStatus::Active)
);
}
#[test]
fn validator_flags_unknown_event_id_and_missing_status() {
let tape = sample_tape();
let mut annotations =
AnnotationTape::new(AnnotationHeader::current(Some("run.tape".into()), None));
annotations.annotations.push(note_annotation("note", 0));
annotations.annotations.push(Annotation {
event_id: 99,
kind: AnnotationKind::Hypothesis,
hypothesis_status: None,
..note_annotation("missing", 99)
});
annotations.annotations.push(Annotation {
kind: AnnotationKind::Friction,
friction_kind: Some("does_not_exist".into()),
..note_annotation("bad-friction", 1)
});
annotations.annotations.push(Annotation {
kind: AnnotationKind::Friction,
friction_kind: None,
..note_annotation("missing-friction", 2)
});
let report = validate_against_tape(&annotations, &tape);
assert_eq!(report.annotations_checked, 4);
assert!(report
.problems
.iter()
.any(|p| matches!(p, AnnotationProblem::UnknownEventId { event_id: 99, .. })));
assert!(report
.problems
.iter()
.any(|p| matches!(p, AnnotationProblem::HypothesisStatusMissing { .. })));
assert!(report
.problems
.iter()
.any(|p| matches!(p, AnnotationProblem::FrictionKindUnknown { .. })));
assert!(report
.problems
.iter()
.any(|p| matches!(p, AnnotationProblem::FrictionKindMissing { .. })));
}
#[test]
fn span_validation_enforces_invariants() {
let tape = sample_tape();
let mut annotations = AnnotationTape::new(AnnotationHeader::current(None, None));
annotations.annotations.push(Annotation {
span: Some(AnnotationSpan {
start_event_id: 5,
end_event_id: 10,
}),
..note_annotation("bad-start", 1)
});
annotations.annotations.push(Annotation {
span: Some(AnnotationSpan {
start_event_id: 1,
end_event_id: 0,
}),
..note_annotation("inverted", 1)
});
annotations.annotations.push(Annotation {
span: Some(AnnotationSpan {
start_event_id: 1,
end_event_id: 99,
}),
..note_annotation("past-end", 1)
});
let report = validate_against_tape(&annotations, &tape);
assert_eq!(
report
.problems
.iter()
.filter(|p| matches!(p, AnnotationProblem::InvalidSpan { .. }))
.count(),
4
);
}
#[test]
fn duplicate_ids_are_flagged() {
let tape = sample_tape();
let mut annotations = AnnotationTape::new(AnnotationHeader::current(None, None));
annotations.annotations.push(note_annotation("dupe", 0));
annotations.annotations.push(note_annotation("dupe", 1));
let report = validate_against_tape(&annotations, &tape);
assert!(report
.problems
.iter()
.any(|p| matches!(p, AnnotationProblem::DuplicateId { .. })));
}
#[test]
fn tape_digest_mismatch_flags_stale_annotations() {
let tape = sample_tape();
let mut annotations = AnnotationTape::new(AnnotationHeader::current(
Some("run.tape".into()),
Some("not-the-real-hash".into()),
));
annotations.annotations.push(note_annotation("note", 0));
let report = validate_against_tape(&annotations, &tape);
assert!(report
.problems
.iter()
.any(|p| matches!(p, AnnotationProblem::TapeDigestMismatch { .. })));
}
#[test]
fn unknown_kind_round_trips_and_validator_flags() {
let temp = TempDir::new().unwrap();
let path = temp.path().join("future.annotations.jsonl");
let body = format!(
"{}\n{}\n",
serde_json::to_string(&AnnotationLine::Header(AnnotationHeader::current(
None, None
)))
.unwrap(),
r#"{"type":"annotation","id":"ann","event_id":0,"kind":"future_kind"}"#
);
std::fs::write(&path, body).unwrap();
let loaded = AnnotationTape::load(&path).unwrap();
assert_eq!(loaded.annotations.len(), 1);
assert_eq!(loaded.annotations[0].kind, AnnotationKind::Unknown);
let report = validate_against_tape(&loaded, &sample_tape());
assert!(report
.problems
.iter()
.any(|p| matches!(p, AnnotationProblem::UnknownKind { .. })));
}
#[test]
fn rejects_newer_schema_version() {
let temp = TempDir::new().unwrap();
let path = temp.path().join("future.annotations.jsonl");
std::fs::write(
&path,
r#"{"type":"header","schema_version":99}
"#,
)
.unwrap();
let err = AnnotationTape::load(&path).unwrap_err();
assert!(err.contains("schema_version 99"), "{err}");
}
#[test]
fn comments_and_blank_lines_are_skipped() {
let temp = TempDir::new().unwrap();
let path = temp.path().join("commented.annotations.jsonl");
let header = serde_json::to_string(&AnnotationLine::Header(AnnotationHeader::current(
None, None,
)))
.unwrap();
let annotation =
serde_json::to_string(&AnnotationLine::Annotation(note_annotation("ann", 0))).unwrap();
let body = format!("# leading comment\n\n{header}\n\n# spacer\n{annotation}\n");
std::fs::write(&path, body).unwrap();
let loaded = AnnotationTape::load(&path).unwrap();
assert_eq!(loaded.annotations.len(), 1);
}
#[test]
fn friction_annotations_round_trip_through_friction_event() {
let mut tape =
AnnotationTape::new(AnnotationHeader::current(Some("run.tape".into()), None));
tape.annotations.push(Annotation {
kind: AnnotationKind::Friction,
friction_kind: Some("repeated_query".into()),
evidence: Some("Splunk lookup repeats every incident".into()),
..note_annotation("friction-1", 2)
});
let events = tape.to_friction_events();
assert_eq!(events.len(), 1);
assert_eq!(events[0].kind, "repeated_query");
assert_eq!(events[0].schema_version, FRICTION_SCHEMA_VERSION);
assert_eq!(
events[0].redacted_summary,
"Splunk lookup repeats every incident"
);
}
#[test]
fn crystallize_anchors_surface_event_ids() {
let mut tape = AnnotationTape::new(AnnotationHeader::current(None, None));
tape.annotations.push(Annotation {
kind: AnnotationKind::CrystallizeHere,
span: Some(AnnotationSpan {
start_event_id: 1,
end_event_id: 4,
}),
..note_annotation("crys-1", 1)
});
let anchors = tape.crystallize_anchors();
assert_eq!(anchors.len(), 1);
assert_eq!(anchors[0].event_id, 1);
assert_eq!(anchors[0].end_event_id, 4);
}
}