Skip to main content

omni_dev/voice/reflect/
validate.rs

1//! Parses and validates the LLM's YAML reflection response.
2//!
3//! Two-stage: (1) `serde_yaml::from_str` into an `LlmEnvelope` of
4//! [`EventKind`]s, then (2) per-variant semantic checks against the
5//! existing item IDs in the current session state. Failure returns a
6//! [`ValidationError`] that carries the raw output for the caller to
7//! attach to a `reflection.error` event.
8
9use std::collections::HashSet;
10use std::hash::BuildHasher;
11
12use serde::Deserialize;
13
14use crate::voice::events::{EventKind, ExpireReason, ItemId};
15
16/// What the LLM is expected to emit at the top level — a single YAML
17/// document with one key, `events:`, holding the discriminated event
18/// list.
19#[derive(Debug, Deserialize)]
20struct LlmEnvelope {
21    events: Vec<EventKind>,
22}
23
24/// A parse failure or a semantic check failure.
25#[derive(Debug, Clone)]
26pub struct ValidationError {
27    /// One-line human-readable description of what failed.
28    pub error: String,
29    /// The original LLM output, included in the resulting
30    /// `reflection.error` event so an operator can debug.
31    pub raw_output: String,
32}
33
34impl std::fmt::Display for ValidationError {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        write!(f, "{}", self.error)
37    }
38}
39
40impl std::error::Error for ValidationError {}
41
42/// Parses `raw_yaml` and validates each event against the v1 rules.
43///
44/// `existing_ids` is the set of item IDs already known from the session's
45/// projected state. Items minted earlier in the same batch are also
46/// allowed as references (the LLM may create-then-update / create-then-
47/// complete inside one response).
48///
49/// On success: returns the validated list of [`EventKind`]s in document
50/// order. On failure: returns a [`ValidationError`] carrying `raw_yaml`
51/// in its `raw_output` field.
52pub fn parse_and_validate<S: BuildHasher>(
53    raw_yaml: &str,
54    existing_ids: &HashSet<ItemId, S>,
55) -> Result<Vec<EventKind>, ValidationError> {
56    let envelope: LlmEnvelope = serde_yaml::from_str(raw_yaml).map_err(|e| ValidationError {
57        error: format!("YAML parse failure: {e}"),
58        raw_output: raw_yaml.to_string(),
59    })?;
60
61    let mut known: HashSet<ItemId> = existing_ids.iter().copied().collect();
62    let mut seen_new_ids: HashSet<ItemId> = HashSet::new();
63
64    for (idx, kind) in envelope.events.iter().enumerate() {
65        if let Err(error) = check_event(kind, &known, &mut seen_new_ids) {
66            return Err(ValidationError {
67                error: format!("event[{idx}] ({}): {error}", event_name(kind)),
68                raw_output: raw_yaml.to_string(),
69            });
70        }
71        // After a successful create, the newly-minted ID becomes
72        // referenceable by later events in the same batch.
73        if let EventKind::ItemCreate(c) = kind {
74            known.insert(c.item_id);
75        }
76    }
77
78    Ok(envelope.events)
79}
80
81fn event_name(kind: &EventKind) -> &'static str {
82    match kind {
83        EventKind::ItemCreate(_) => "item.create",
84        EventKind::ItemUpdate(_) => "item.update",
85        EventKind::ItemExpire(_) => "item.expire",
86        EventKind::ItemComplete(_) => "item.complete",
87        EventKind::DecisionRecord(_) => "decision.record",
88        EventKind::ResearchNote(_) => "research.note",
89        EventKind::ReflectionError(_) => "reflection.error",
90    }
91}
92
93fn check_event(
94    kind: &EventKind,
95    known: &HashSet<ItemId>,
96    seen_new: &mut HashSet<ItemId>,
97) -> Result<(), String> {
98    match kind {
99        EventKind::ItemCreate(c) => {
100            if !seen_new.insert(c.item_id) {
101                return Err(format!(
102                    "duplicate item_id {} minted in this batch",
103                    c.item_id
104                ));
105            }
106            if known.contains(&c.item_id) {
107                return Err(format!(
108                    "item_id {} collides with an existing item from current_state",
109                    c.item_id
110                ));
111            }
112            Ok(())
113        }
114        EventKind::ItemUpdate(u) => require_known(known, u.item_id, "item.update"),
115        EventKind::ItemExpire(e) => {
116            require_known(known, e.item_id, "item.expire")?;
117            if matches!(e.reason, ExpireReason::Ttl) {
118                return Err("reason: ttl is reserved for `voice review`; \
119                            reflect must use `retracted` or `superseded`"
120                    .to_string());
121            }
122            let is_superseded = matches!(e.reason, ExpireReason::Superseded);
123            if is_superseded && e.superseded_by.is_none() {
124                return Err("reason: superseded requires superseded_by".to_string());
125            }
126            if !is_superseded && e.superseded_by.is_some() {
127                return Err(format!(
128                    "superseded_by is only valid when reason == superseded \
129                     (got reason: {:?})",
130                    e.reason
131                ));
132            }
133            Ok(())
134        }
135        EventKind::ItemComplete(c) => require_known(known, c.item_id, "item.complete"),
136        EventKind::DecisionRecord(_)
137        | EventKind::ResearchNote(_)
138        | EventKind::ReflectionError(_) => Ok(()),
139    }
140}
141
142fn require_known(known: &HashSet<ItemId>, id: ItemId, what: &str) -> Result<(), String> {
143    if known.contains(&id) {
144        Ok(())
145    } else {
146        Err(format!(
147            "{what} references unknown item_id {id} (not in current_state and not minted earlier in this batch)"
148        ))
149    }
150}
151
152#[cfg(test)]
153#[allow(clippy::unwrap_used, clippy::expect_used)]
154mod tests {
155    use super::*;
156
157    fn ulid(n: u128) -> ItemId {
158        ulid::Ulid::from_parts(0, n)
159    }
160
161    fn no_existing() -> HashSet<ItemId> {
162        HashSet::new()
163    }
164
165    #[test]
166    fn empty_events_list_is_valid() {
167        let yaml = "events: []";
168        let events = parse_and_validate(yaml, &no_existing()).unwrap();
169        assert!(events.is_empty());
170    }
171
172    #[test]
173    fn item_create_is_accepted_and_minted_id_is_referenceable() {
174        let new_id = ulid(1);
175        let yaml = format!(
176            "events:\n  - event_type: item.create\n    payload:\n      item_id: {new_id}\n      class: todo\n      text: alpha\n  - event_type: item.update\n    payload:\n      item_id: {new_id}\n      priority: high\n"
177        );
178        let events = parse_and_validate(&yaml, &no_existing()).unwrap();
179        assert_eq!(events.len(), 2);
180    }
181
182    #[test]
183    fn item_update_unknown_id_errors() {
184        let yaml = format!(
185            "events:\n  - event_type: item.update\n    payload:\n      item_id: {}\n      text: changed\n",
186            ulid(99)
187        );
188        let err = parse_and_validate(&yaml, &no_existing()).unwrap_err();
189        assert!(err.error.contains("unknown item_id"), "got: {}", err.error);
190    }
191
192    #[test]
193    fn item_complete_unknown_id_errors() {
194        let yaml = format!(
195            "events:\n  - event_type: item.complete\n    payload:\n      item_id: {}\n",
196            ulid(99)
197        );
198        let err = parse_and_validate(&yaml, &no_existing()).unwrap_err();
199        assert!(
200            err.error.contains("item.complete") && err.error.contains("unknown item_id"),
201            "expected item.complete error: {}",
202            err.error
203        );
204    }
205
206    #[test]
207    fn item_expire_unknown_id_errors() {
208        let yaml = format!(
209            "events:\n  - event_type: item.expire\n    payload:\n      item_id: {}\n      reason: retracted\n",
210            ulid(99)
211        );
212        let err = parse_and_validate(&yaml, &no_existing()).unwrap_err();
213        assert!(
214            err.error.contains("item.expire") && err.error.contains("unknown item_id"),
215            "expected item.expire error: {}",
216            err.error
217        );
218    }
219
220    #[test]
221    fn validation_error_display_returns_inner_message() {
222        let err = ValidationError {
223            error: "schema mismatch".into(),
224            raw_output: "raw bytes".into(),
225        };
226        assert_eq!(err.to_string(), "schema mismatch");
227    }
228
229    #[test]
230    fn item_complete_with_existing_id_is_accepted() {
231        let existing_id = ulid(7);
232        let mut existing = HashSet::new();
233        existing.insert(existing_id);
234        let yaml = format!(
235            "events:\n  - event_type: item.complete\n    payload:\n      item_id: {existing_id}\n      note: shipped\n"
236        );
237        let events = parse_and_validate(&yaml, &existing).unwrap();
238        assert_eq!(events.len(), 1);
239    }
240
241    #[test]
242    fn item_expire_with_reason_ttl_is_rejected() {
243        let existing_id = ulid(7);
244        let mut existing = HashSet::new();
245        existing.insert(existing_id);
246        let yaml = format!(
247            "events:\n  - event_type: item.expire\n    payload:\n      item_id: {existing_id}\n      reason: ttl\n"
248        );
249        let err = parse_and_validate(&yaml, &existing).unwrap_err();
250        assert!(
251            err.error.contains("reason: ttl is reserved"),
252            "got: {}",
253            err.error
254        );
255    }
256
257    #[test]
258    fn item_expire_superseded_requires_superseded_by() {
259        let existing_id = ulid(7);
260        let mut existing = HashSet::new();
261        existing.insert(existing_id);
262        let yaml = format!(
263            "events:\n  - event_type: item.expire\n    payload:\n      item_id: {existing_id}\n      reason: superseded\n"
264        );
265        let err = parse_and_validate(&yaml, &existing).unwrap_err();
266        assert!(err.error.contains("superseded_by"), "got: {}", err.error);
267    }
268
269    #[test]
270    fn item_expire_retracted_with_superseded_by_is_rejected() {
271        let existing_id = ulid(7);
272        let mut existing = HashSet::new();
273        existing.insert(existing_id);
274        let yaml = format!(
275            "events:\n  - event_type: item.expire\n    payload:\n      item_id: {existing_id}\n      reason: retracted\n      superseded_by: {}\n",
276            ulid(8)
277        );
278        let err = parse_and_validate(&yaml, &existing).unwrap_err();
279        assert!(
280            err.error.contains("superseded_by is only valid"),
281            "got: {}",
282            err.error
283        );
284    }
285
286    #[test]
287    fn duplicate_item_create_id_is_rejected() {
288        let new_id = ulid(1);
289        let yaml = format!(
290            "events:\n  - event_type: item.create\n    payload:\n      item_id: {new_id}\n      class: todo\n      text: a\n  - event_type: item.create\n    payload:\n      item_id: {new_id}\n      class: todo\n      text: b\n"
291        );
292        let err = parse_and_validate(&yaml, &no_existing()).unwrap_err();
293        assert!(
294            err.error.contains("duplicate item_id"),
295            "got: {}",
296            err.error
297        );
298    }
299
300    #[test]
301    fn item_create_id_colliding_with_existing_state_is_rejected() {
302        let id = ulid(1);
303        let mut existing = HashSet::new();
304        existing.insert(id);
305        let yaml = format!(
306            "events:\n  - event_type: item.create\n    payload:\n      item_id: {id}\n      class: todo\n      text: a\n"
307        );
308        let err = parse_and_validate(&yaml, &existing).unwrap_err();
309        assert!(err.error.contains("collides"), "got: {}", err.error);
310    }
311
312    #[test]
313    fn malformed_yaml_returns_validation_error_carrying_raw_output() {
314        let yaml = "this is not: valid yaml\n  - unbalanced";
315        let err = parse_and_validate(yaml, &no_existing()).unwrap_err();
316        assert!(
317            err.error.contains("YAML parse failure"),
318            "got: {}",
319            err.error
320        );
321        assert_eq!(err.raw_output, yaml);
322    }
323
324    #[test]
325    fn unknown_event_type_errors_via_serde() {
326        let yaml = "events:\n  - event_type: item.invent\n    payload:\n      item_id: nonsense\n";
327        let err = parse_and_validate(yaml, &no_existing()).unwrap_err();
328        assert!(
329            err.error.contains("YAML parse failure"),
330            "got: {}",
331            err.error
332        );
333    }
334
335    #[test]
336    fn decision_record_does_not_need_existing_ids() {
337        let yaml = format!(
338            "events:\n  - event_type: decision.record\n    payload:\n      decision_id: {}\n      text: choose ULIDs\n",
339            ulid(1)
340        );
341        let events = parse_and_validate(&yaml, &no_existing()).unwrap();
342        assert_eq!(events.len(), 1);
343    }
344
345    #[test]
346    fn research_note_does_not_need_existing_ids() {
347        let yaml = format!(
348            "events:\n  - event_type: research.note\n    payload:\n      note_id: {}\n      text: assemblyai is immutable-finals\n",
349            ulid(1)
350        );
351        let events = parse_and_validate(&yaml, &no_existing()).unwrap();
352        assert_eq!(events.len(), 1);
353    }
354}