Skip to main content

ff_core/contracts/
decode.rs

1// `EngineError` is ~200 bytes; the decoder and its helpers return
2// `Result<_, EngineError>` throughout to match the
3// [`crate::engine_backend::EngineBackend::list_edges`] contract. The
4// variant size is a cross-crate design point (see ff-backend-valkey's
5// crate-level allow for the same rationale); a future PR can box the
6// large `Conflict`/`Transport` variants. Module-local allow to
7// contain the exception to this one file.
8#![allow(clippy::result_large_err)]
9
10//! Canonical decoders for engine-owned hash shapes.
11//!
12//! RFC-012 Stage 1c (T2): the edge-hash decoder lives here so every
13//! `EngineBackend` implementation — not just `ff-backend-valkey` —
14//! shares one strict-parse posture and one error surface
15//! ([`EngineError::Validation { kind: Corruption, .. }`]). ff-sdk's
16//! snapshot module historically owned this code and surfaced
17//! `SdkError::Config`; the pre-migration wrapper still maps to that
18//! shape so public ff-sdk callers see no behavior change while the
19//! engine-side decoder moves.
20//!
21//! Stage 1c T3 adds [`build_execution_snapshot`] and
22//! [`build_flow_snapshot`] alongside the edge decoder: every
23//! engine-owned hash shape now parses through one canonical strict-parse
24//! surface, freeing `ff-backend-valkey` to implement
25//! `describe_execution` / `describe_flow` against the trait and letting
26//! ff-sdk collapse its snapshot module into thin trait forwarders.
27//!
28//! [`EngineError::Validation { kind: Corruption, .. }`]: crate::engine_error::EngineError::Validation
29
30use std::collections::{BTreeMap, HashMap};
31
32use crate::contracts::{
33    AttemptSummary, EdgeSnapshot, ExecutionSnapshot, FlowSnapshot, LeaseSummary,
34};
35use crate::engine_error::{EngineError, ValidationKind};
36use crate::state::PublicState;
37use crate::types::{
38    AttemptId, AttemptIndex, EdgeId, ExecutionId, FlowId, LaneId, LeaseEpoch, Namespace,
39    TimestampMs, WaitpointId, WorkerInstanceId,
40};
41
42/// FF-owned fields on the flow-scoped `edge:<edge_id>` hash.
43///
44/// An HGETALL field outside this set signals on-disk corruption or
45/// protocol drift — see [`build_edge_snapshot`]'s unknown-field
46/// sweep. Kept `pub` so test fixtures / diagnostic tooling can share
47/// the canonical list instead of hard-coding duplicates.
48pub const EDGE_KNOWN_FIELDS: &[&str] = &[
49    "edge_id",
50    "flow_id",
51    "upstream_execution_id",
52    "downstream_execution_id",
53    "dependency_kind",
54    "satisfaction_condition",
55    "data_passing_ref",
56    "edge_state",
57    "created_at",
58    "created_by",
59];
60
61/// Assemble an [`EdgeSnapshot`] from the raw HGETALL field map.
62///
63/// Mirrors the pre-T2 ff-sdk free-fn: every validation gate (unknown
64/// fields, missing required fields, identity cross-check against the
65/// caller-supplied `flow_id`/`edge_id`) returns the same diagnostic
66/// shape, just routed through [`EngineError::Validation`] with
67/// [`ValidationKind::Corruption`] instead of `SdkError::Config`. The
68/// pre-migration ff-sdk wrapper re-maps to `SdkError::Config` for
69/// public-API parity; direct backend callers read the
70/// `EngineError::Validation` payload.
71///
72/// `flow_id` + `edge_id` are the caller's expected identities. The
73/// decoder verifies both are present and match the stored values; a
74/// mismatch or absence surfaces as `Corruption` because it indicates
75/// a wrong-key read or an on-disk drift.
76pub fn build_edge_snapshot(
77    flow_id: &FlowId,
78    edge_id: &EdgeId,
79    raw: &HashMap<String, String>,
80) -> Result<EdgeSnapshot, EngineError> {
81    // Unknown-field sweep — reject eagerly so a future FF rename that
82    // landed a new field surfaces as an explicit parse failure rather
83    // than silently dropping data.
84    for k in raw.keys() {
85        if !EDGE_KNOWN_FIELDS.contains(&k.as_str()) {
86            return Err(corruption(
87                "edge_snapshot: edge_hash",
88                None,
89                &format!("has unexpected field '{k}' (protocol drift or corruption?)"),
90            ));
91        }
92    }
93
94    // edge_id cross-check.
95    let stored_edge_id_str = required(raw, "edge_snapshot: edge_hash", "edge_id")?;
96    if stored_edge_id_str != edge_id.to_string() {
97        return Err(corruption(
98            "edge_snapshot: edge_hash",
99            Some("edge_id"),
100            &format!(
101                "'{stored_edge_id_str}' does not match requested edge_id \
102                 '{edge_id}' (key corruption or wrong-key read?)"
103            ),
104        ));
105    }
106
107    // flow_id cross-check.
108    let stored_flow_id_str = required(raw, "edge_snapshot: edge_hash", "flow_id")?;
109    if stored_flow_id_str != flow_id.to_string() {
110        return Err(corruption(
111            "edge_snapshot: edge_hash",
112            Some("flow_id"),
113            &format!(
114                "'{stored_flow_id_str}' does not match requested flow_id \
115                 '{flow_id}' (key corruption or wrong-key read?)"
116            ),
117        ));
118    }
119
120    let upstream_execution_id = parse_eid(raw, "upstream_execution_id")?;
121    let downstream_execution_id = parse_eid(raw, "downstream_execution_id")?;
122
123    let dependency_kind = required(raw, "edge_snapshot: edge_hash", "dependency_kind")?;
124    let satisfaction_condition =
125        required(raw, "edge_snapshot: edge_hash", "satisfaction_condition")?;
126
127    // data_passing_ref is stored as "" when the stager passed None.
128    // Treat empty as absent rather than surfacing an empty String.
129    let data_passing_ref = raw
130        .get("data_passing_ref")
131        .filter(|s| !s.is_empty())
132        .cloned();
133
134    let edge_state = required(raw, "edge_snapshot: edge_hash", "edge_state")?;
135
136    let created_at = parse_ts_required(raw, "edge_snapshot: edge_hash", "created_at")?;
137    let created_by = required(raw, "edge_snapshot: edge_hash", "created_by")?;
138
139    Ok(EdgeSnapshot::new(
140        edge_id.clone(),
141        flow_id.clone(),
142        upstream_execution_id,
143        downstream_execution_id,
144        dependency_kind,
145        satisfaction_condition,
146        data_passing_ref,
147        edge_state,
148        created_at,
149        created_by,
150    ))
151}
152
153/// Format a `Corruption` detail string in the
154/// `"<context>: <field?>: <message>"` shape documented on
155/// [`ValidationKind::Corruption`].
156fn corruption(context: &str, field: Option<&str>, message: &str) -> EngineError {
157    let detail = match field {
158        Some(f) => format!("{context}: {f}: {message}"),
159        None => format!("{context}: {message}"),
160    };
161    EngineError::Validation {
162        kind: ValidationKind::Corruption,
163        detail,
164    }
165}
166
167/// Fetch a required non-empty string field, emitting a `Corruption`
168/// error when the field is absent or empty.
169fn required(
170    raw: &HashMap<String, String>,
171    context: &str,
172    field: &str,
173) -> Result<String, EngineError> {
174    raw.get(field)
175        .filter(|s| !s.is_empty())
176        .cloned()
177        .ok_or_else(|| {
178            corruption(
179                context,
180                Some(field),
181                "is missing or empty (key corruption?)",
182            )
183        })
184}
185
186/// Parse a ms-timestamp field that must be present.
187fn parse_ts_required(
188    raw: &HashMap<String, String>,
189    context: &str,
190    field: &str,
191) -> Result<TimestampMs, EngineError> {
192    let s = required(raw, context, field)?;
193    let ms: i64 = s.parse().map_err(|e| {
194        corruption(
195            context,
196            Some(field),
197            &format!("is not a valid ms timestamp ('{s}'): {e}"),
198        )
199    })?;
200    Ok(TimestampMs::from_millis(ms))
201}
202
203/// Parse a required ExecutionId field.
204fn parse_eid(raw: &HashMap<String, String>, field: &str) -> Result<ExecutionId, EngineError> {
205    let s = required(raw, "edge_snapshot: edge_hash", field)?;
206    ExecutionId::parse(&s).map_err(|e| {
207        corruption(
208            "edge_snapshot: edge_hash",
209            Some(field),
210            &format!("'{s}' is not a valid ExecutionId (key corruption?): {e}"),
211        )
212    })
213}
214
215// ═══════════════════════════════════════════════════════════════════════
216// execution decoder (describe_execution)
217// ═══════════════════════════════════════════════════════════════════════
218
219/// Assemble an [`ExecutionSnapshot`] from the raw HGETALL field maps.
220///
221/// `core` is the HGETALL of `exec_core`, `tags_raw` the HGETALL of the
222/// sibling tags hash (which may be empty for executions created without
223/// tags). Every parse failure surfaces as
224/// [`EngineError::Validation { kind: Corruption, .. }`] — fields that
225/// FCALLs write atomically are strict-required, while fields that clear
226/// on transition (`blocking_reason`, `current_attempt_id`, etc.) are
227/// treated as absent when empty.
228pub fn build_execution_snapshot(
229    execution_id: ExecutionId,
230    core: &HashMap<String, String>,
231    tags_raw: HashMap<String, String>,
232) -> Result<Option<ExecutionSnapshot>, EngineError> {
233    let ctx = "describe_execution: exec_core";
234
235    let public_state = parse_public_state(opt_str(core, "public_state").unwrap_or(""))?;
236
237    // `LaneId::try_new` validates non-empty + ASCII-printable + <= 64 bytes.
238    // Exec_core writes a LaneId that already passed these invariants at
239    // ingress; a read that fails validation here signals on-disk
240    // corruption — surface it rather than silently constructing an
241    // invalid LaneId that would mis-partition downstream.
242    let lane_id = LaneId::try_new(opt_str(core, "lane_id").unwrap_or("")).map_err(|e| {
243        corruption(
244            ctx,
245            Some("lane_id"),
246            &format!("fails LaneId validation (key corruption?): {e}"),
247        )
248    })?;
249
250    let namespace_str = opt_str(core, "namespace").unwrap_or("").to_owned();
251    let namespace = Namespace::new(namespace_str);
252
253    let flow_id = opt_str(core, "flow_id")
254        .filter(|s| !s.is_empty())
255        .map(|s| {
256            FlowId::parse(s).map_err(|e| {
257                corruption(
258                    ctx,
259                    Some("flow_id"),
260                    &format!("is not a valid UUID (key corruption?): {e}"),
261                )
262            })
263        })
264        .transpose()?;
265
266    let blocking_reason = opt_str(core, "blocking_reason")
267        .filter(|s| !s.is_empty())
268        .map(str::to_owned);
269    let blocking_detail = opt_str(core, "blocking_detail")
270        .filter(|s| !s.is_empty())
271        .map(str::to_owned);
272
273    // created_at + last_mutation_at are engine-maintained invariants
274    // (lua/execution.lua writes both on create; every mutating FCALL
275    // updates last_mutation_at). Missing values indicate on-disk
276    // corruption, not a valid pre-create state — fail loudly.
277    let created_at = parse_ts(core, ctx, "created_at")?.ok_or_else(|| {
278        corruption(
279            ctx,
280            Some("created_at"),
281            "is missing or empty (key corruption?)",
282        )
283    })?;
284    let last_mutation_at = parse_ts(core, ctx, "last_mutation_at")?.ok_or_else(|| {
285        corruption(
286            ctx,
287            Some("last_mutation_at"),
288            "is missing or empty (key corruption?)",
289        )
290    })?;
291
292    let total_attempt_count: u32 =
293        parse_u32_strict(core, ctx, "total_attempt_count")?.unwrap_or(0);
294
295    let current_attempt = build_attempt_summary(core)?;
296    let current_lease = build_lease_summary(core)?;
297
298    let current_waitpoint = opt_str(core, "current_waitpoint_id")
299        .filter(|s| !s.is_empty())
300        .map(|s| {
301            WaitpointId::parse(s).map_err(|e| {
302                corruption(
303                    ctx,
304                    Some("current_waitpoint_id"),
305                    &format!("is not a valid UUID (key corruption?): {e}"),
306                )
307            })
308        })
309        .transpose()?;
310
311    let tags: BTreeMap<String, String> = tags_raw.into_iter().collect();
312
313    Ok(Some(ExecutionSnapshot::new(
314        execution_id,
315        flow_id,
316        lane_id,
317        namespace,
318        public_state,
319        blocking_reason,
320        blocking_detail,
321        current_attempt,
322        current_lease,
323        current_waitpoint,
324        created_at,
325        last_mutation_at,
326        total_attempt_count,
327        tags,
328    )))
329}
330
331fn opt_str<'a>(map: &'a HashMap<String, String>, field: &str) -> Option<&'a str> {
332    map.get(field).map(String::as_str)
333}
334
335/// Strictly parse a ms-timestamp field. `Ok(None)` when absent/empty,
336/// `Err` on unparseable content. `context` names both the calling
337/// FCALL and the hash (e.g. `"describe_execution: exec_core"`) so
338/// error messages point to the exact source of corruption.
339fn parse_ts(
340    map: &HashMap<String, String>,
341    context: &str,
342    field: &str,
343) -> Result<Option<TimestampMs>, EngineError> {
344    match opt_str(map, field).filter(|s| !s.is_empty()) {
345        None => Ok(None),
346        Some(raw) => {
347            let ms: i64 = raw.parse().map_err(|e| {
348                corruption(
349                    context,
350                    Some(field),
351                    &format!("is not a valid ms timestamp ('{raw}'): {e}"),
352                )
353            })?;
354            Ok(Some(TimestampMs::from_millis(ms)))
355        }
356    }
357}
358
359/// Strictly parse a `u32` field. Returns `Ok(None)` when the field is
360/// absent or empty (a valid pre-write state), `Err` when the value is
361/// present but unparseable (on-disk corruption).
362fn parse_u32_strict(
363    map: &HashMap<String, String>,
364    context: &str,
365    field: &str,
366) -> Result<Option<u32>, EngineError> {
367    match opt_str(map, field).filter(|s| !s.is_empty()) {
368        None => Ok(None),
369        Some(raw) => Ok(Some(raw.parse().map_err(|e| {
370            corruption(
371                context,
372                Some(field),
373                &format!("is not a valid u32 ('{raw}'): {e}"),
374            )
375        })?)),
376    }
377}
378
379/// Strictly parse a `u64` field. Semantics mirror [`parse_u32_strict`].
380fn parse_u64_strict(
381    map: &HashMap<String, String>,
382    context: &str,
383    field: &str,
384) -> Result<Option<u64>, EngineError> {
385    match opt_str(map, field).filter(|s| !s.is_empty()) {
386        None => Ok(None),
387        Some(raw) => Ok(Some(raw.parse().map_err(|e| {
388            corruption(
389                context,
390                Some(field),
391                &format!("is not a valid u64 ('{raw}'): {e}"),
392            )
393        })?)),
394    }
395}
396
397fn parse_public_state(raw: &str) -> Result<PublicState, EngineError> {
398    // exec_core stores the snake_case literal (e.g. "waiting"). PublicState's
399    // Deserialize accepts the JSON-quoted form, so wrap + delegate.
400    let quoted = format!("\"{raw}\"");
401    serde_json::from_str(&quoted).map_err(|e| {
402        corruption(
403            "describe_execution: exec_core",
404            Some("public_state"),
405            &format!("'{raw}' is not a known public state: {e}"),
406        )
407    })
408}
409
410fn build_attempt_summary(
411    core: &HashMap<String, String>,
412) -> Result<Option<AttemptSummary>, EngineError> {
413    let ctx = "describe_execution: exec_core";
414    let attempt_id_str = match opt_str(core, "current_attempt_id").filter(|s| !s.is_empty()) {
415        None => return Ok(None),
416        Some(s) => s,
417    };
418    let attempt_id = AttemptId::parse(attempt_id_str).map_err(|e| {
419        corruption(
420            ctx,
421            Some("current_attempt_id"),
422            &format!("is not a valid UUID: {e}"),
423        )
424    })?;
425    // When `current_attempt_id` is set, `current_attempt_index` MUST be
426    // set too — lua/execution.lua writes both atomically in
427    // `ff_claim_execution`. A missing index while the id is populated
428    // is corruption, not a valid intermediate state.
429    let attempt_index = parse_u32_strict(core, ctx, "current_attempt_index")?.ok_or_else(|| {
430        corruption(
431            ctx,
432            Some("current_attempt_index"),
433            "is missing while current_attempt_id is set (key corruption?)",
434        )
435    })?;
436    Ok(Some(AttemptSummary::new(
437        attempt_id,
438        AttemptIndex::new(attempt_index),
439    )))
440}
441
442fn build_lease_summary(
443    core: &HashMap<String, String>,
444) -> Result<Option<LeaseSummary>, EngineError> {
445    let ctx = "describe_execution: exec_core";
446    // A lease is "held" when the worker_instance_id field is populated
447    // AND lease_expires_at is set. Both clear together on revoke/expire
448    // (see clear_lease_and_indexes in lua/helpers.lua).
449    let wid_str = match opt_str(core, "current_worker_instance_id").filter(|s| !s.is_empty()) {
450        None => return Ok(None),
451        Some(s) => s,
452    };
453    let expires_at = match parse_ts(core, ctx, "lease_expires_at")? {
454        None => return Ok(None),
455        Some(ts) => ts,
456    };
457    // A lease is only "held" if the epoch is present too — lua/helpers.lua
458    // sets/clears epoch atomically with wid + expires_at. Parse strictly
459    // and require it: a missing epoch alongside a live wid is corruption.
460    let epoch = parse_u64_strict(core, ctx, "current_lease_epoch")?.ok_or_else(|| {
461        corruption(
462            ctx,
463            Some("current_lease_epoch"),
464            "is missing while current_worker_instance_id is set (key corruption?)",
465        )
466    })?;
467    Ok(Some(LeaseSummary::new(
468        LeaseEpoch::new(epoch),
469        WorkerInstanceId::new(wid_str.to_owned()),
470        expires_at,
471    )))
472}
473
474// ═══════════════════════════════════════════════════════════════════════
475// flow decoder (describe_flow)
476// ═══════════════════════════════════════════════════════════════════════
477
478/// FF-owned snake_case fields on flow_core. Any HGETALL field NOT in
479/// this set AND matching the `^[a-z][a-z0-9_]*\.` namespaced-tag shape
480/// is surfaced on [`FlowSnapshot::tags`]. Fields that are neither FF-
481/// owned nor namespaced (unexpected shapes) are surfaced as a
482/// `Corruption` error so on-disk corruption or protocol drift fails loud.
483pub const FLOW_CORE_KNOWN_FIELDS: &[&str] = &[
484    "flow_id",
485    "flow_kind",
486    "namespace",
487    "public_flow_state",
488    "graph_revision",
489    "node_count",
490    "edge_count",
491    "created_at",
492    "last_mutation_at",
493    "cancelled_at",
494    "cancel_reason",
495    "cancellation_policy",
496];
497
498/// Assemble a [`FlowSnapshot`] from the raw HGETALL field map.
499///
500/// Cross-checks the stored `flow_id` against the caller's expected id.
501/// Unknown fields that match the `^[a-z][a-z0-9_]*\.` namespaced-tag
502/// shape are routed to `tags`; any other unknown field surfaces as
503/// `Corruption`.
504pub fn build_flow_snapshot(
505    flow_id: FlowId,
506    raw: &HashMap<String, String>,
507) -> Result<FlowSnapshot, EngineError> {
508    let ctx = "describe_flow: flow_core";
509
510    // flow_id cross-check — corruption or wrong-key read.
511    let stored_flow_id_str = opt_str(raw, "flow_id")
512        .filter(|s| !s.is_empty())
513        .ok_or_else(|| corruption(ctx, Some("flow_id"), "is missing or empty (key corruption?)"))?;
514    if stored_flow_id_str != flow_id.to_string() {
515        return Err(corruption(
516            ctx,
517            Some("flow_id"),
518            &format!(
519                "'{stored_flow_id_str}' does not match requested flow_id \
520                 '{flow_id}' (key corruption or wrong-key read?)"
521            ),
522        ));
523    }
524
525    let namespace_str = opt_str(raw, "namespace")
526        .filter(|s| !s.is_empty())
527        .ok_or_else(|| {
528            corruption(ctx, Some("namespace"), "is missing or empty (key corruption?)")
529        })?;
530    let namespace = Namespace::new(namespace_str.to_owned());
531
532    let flow_kind = opt_str(raw, "flow_kind")
533        .filter(|s| !s.is_empty())
534        .ok_or_else(|| {
535            corruption(ctx, Some("flow_kind"), "is missing or empty (key corruption?)")
536        })?
537        .to_owned();
538
539    let public_flow_state = opt_str(raw, "public_flow_state")
540        .filter(|s| !s.is_empty())
541        .ok_or_else(|| {
542            corruption(
543                ctx,
544                Some("public_flow_state"),
545                "is missing or empty (key corruption?)",
546            )
547        })?
548        .to_owned();
549
550    let graph_revision = parse_u64_strict(raw, ctx, "graph_revision")?
551        .ok_or_else(|| corruption(ctx, Some("graph_revision"), "is missing (key corruption?)"))?;
552    let node_count = parse_u32_strict(raw, ctx, "node_count")?
553        .ok_or_else(|| corruption(ctx, Some("node_count"), "is missing (key corruption?)"))?;
554    let edge_count = parse_u32_strict(raw, ctx, "edge_count")?
555        .ok_or_else(|| corruption(ctx, Some("edge_count"), "is missing (key corruption?)"))?;
556
557    let created_at = parse_ts(raw, ctx, "created_at")?.ok_or_else(|| {
558        corruption(
559            ctx,
560            Some("created_at"),
561            "is missing or empty (key corruption?)",
562        )
563    })?;
564    let last_mutation_at = parse_ts(raw, ctx, "last_mutation_at")?.ok_or_else(|| {
565        corruption(
566            ctx,
567            Some("last_mutation_at"),
568            "is missing or empty (key corruption?)",
569        )
570    })?;
571
572    let cancelled_at = parse_ts(raw, ctx, "cancelled_at")?;
573    let cancel_reason = opt_str(raw, "cancel_reason")
574        .filter(|s| !s.is_empty())
575        .map(str::to_owned);
576    let cancellation_policy = opt_str(raw, "cancellation_policy")
577        .filter(|s| !s.is_empty())
578        .map(str::to_owned);
579
580    // Route unknown fields: namespaced-prefix (e.g. `cairn.task_id`) →
581    // tags; anything else → corruption.
582    let mut tags: BTreeMap<String, String> = BTreeMap::new();
583    for (k, v) in raw {
584        if FLOW_CORE_KNOWN_FIELDS.contains(&k.as_str()) {
585            continue;
586        }
587        if is_namespaced_tag_key(k) {
588            tags.insert(k.clone(), v.clone());
589        } else {
590            return Err(corruption(
591                ctx,
592                None,
593                &format!(
594                    "has unexpected field '{k}' — not an FF field and not a namespaced \
595                     tag (lowercase-alphanumeric-prefix + '.')"
596                ),
597            ));
598        }
599    }
600
601    Ok(FlowSnapshot::new(
602        flow_id,
603        flow_kind,
604        namespace,
605        public_flow_state,
606        graph_revision,
607        node_count,
608        edge_count,
609        created_at,
610        last_mutation_at,
611        cancelled_at,
612        cancel_reason,
613        cancellation_policy,
614        tags,
615    ))
616}
617
618/// Match the namespaced-tag shape `^[a-z][a-z0-9_]*\.` documented on
619/// [`ExecutionSnapshot::tags`] / [`FlowSnapshot::tags`]. Kept inline
620/// (no regex dependency) — the shape is tight enough to hand-check.
621pub(crate) fn is_namespaced_tag_key(k: &str) -> bool {
622    let mut chars = k.chars();
623    let Some(first) = chars.next() else {
624        return false;
625    };
626    if !first.is_ascii_lowercase() {
627        return false;
628    }
629    let mut saw_dot = false;
630    for c in chars {
631        if c == '.' {
632            saw_dot = true;
633            break;
634        }
635        if !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') {
636            return false;
637        }
638    }
639    saw_dot
640}
641
642#[cfg(test)]
643mod tests {
644    use super::*;
645    use crate::partition::PartitionConfig;
646
647    fn fid() -> FlowId {
648        FlowId::new()
649    }
650
651    fn eids_for_flow(f: &FlowId) -> (ExecutionId, ExecutionId) {
652        let cfg = PartitionConfig::default();
653        (
654            ExecutionId::for_flow(f, &cfg),
655            ExecutionId::for_flow(f, &cfg),
656        )
657    }
658
659    fn minimal_edge_hash(
660        flow: &FlowId,
661        edge: &EdgeId,
662        up: &ExecutionId,
663        down: &ExecutionId,
664    ) -> HashMap<String, String> {
665        let mut m = HashMap::new();
666        m.insert("edge_id".into(), edge.to_string());
667        m.insert("flow_id".into(), flow.to_string());
668        m.insert("upstream_execution_id".into(), up.to_string());
669        m.insert("downstream_execution_id".into(), down.to_string());
670        m.insert("dependency_kind".into(), "success_only".into());
671        m.insert("satisfaction_condition".into(), "all_required".into());
672        m.insert("data_passing_ref".into(), String::new());
673        m.insert("edge_state".into(), "pending".into());
674        m.insert("created_at".into(), "1234".into());
675        m.insert("created_by".into(), "engine".into());
676        m
677    }
678
679    #[test]
680    fn round_trips_all_fields() {
681        let f = fid();
682        let edge = EdgeId::new();
683        let (up, down) = eids_for_flow(&f);
684        let raw = minimal_edge_hash(&f, &edge, &up, &down);
685        let snap = build_edge_snapshot(&f, &edge, &raw).unwrap();
686        assert_eq!(snap.edge_id, edge);
687        assert_eq!(snap.flow_id, f);
688        assert_eq!(snap.upstream_execution_id, up);
689        assert_eq!(snap.downstream_execution_id, down);
690        assert_eq!(snap.dependency_kind, "success_only");
691        assert_eq!(snap.satisfaction_condition, "all_required");
692        assert!(snap.data_passing_ref.is_none());
693        assert_eq!(snap.edge_state, "pending");
694        assert_eq!(snap.created_at.0, 1234);
695        assert_eq!(snap.created_by, "engine");
696    }
697
698    #[test]
699    fn data_passing_ref_round_trips_when_set() {
700        let f = fid();
701        let edge = EdgeId::new();
702        let (up, down) = eids_for_flow(&f);
703        let mut raw = minimal_edge_hash(&f, &edge, &up, &down);
704        raw.insert("data_passing_ref".into(), "ref://blob-42".into());
705        let snap = build_edge_snapshot(&f, &edge, &raw).unwrap();
706        assert_eq!(snap.data_passing_ref.as_deref(), Some("ref://blob-42"));
707    }
708
709    fn expect_corruption(err: EngineError) -> String {
710        match err {
711            EngineError::Validation {
712                kind: ValidationKind::Corruption,
713                detail,
714            } => detail,
715            other => panic!("expected Validation::Corruption, got {other:?}"),
716        }
717    }
718
719    #[test]
720    fn unknown_field_fails_loud() {
721        let f = fid();
722        let edge = EdgeId::new();
723        let (up, down) = eids_for_flow(&f);
724        let mut raw = minimal_edge_hash(&f, &edge, &up, &down);
725        raw.insert("bogus_future_field".into(), "v".into());
726        let detail = expect_corruption(build_edge_snapshot(&f, &edge, &raw).unwrap_err());
727        assert!(detail.contains("bogus_future_field"), "{detail}");
728    }
729
730    #[test]
731    fn flow_id_mismatch_fails_loud() {
732        let f = fid();
733        let other = fid();
734        let edge = EdgeId::new();
735        let (up, down) = eids_for_flow(&f);
736        let raw = minimal_edge_hash(&other, &edge, &up, &down);
737        let detail = expect_corruption(build_edge_snapshot(&f, &edge, &raw).unwrap_err());
738        assert!(detail.contains("flow_id"), "{detail}");
739        assert!(detail.contains("does not match"), "{detail}");
740    }
741
742    #[test]
743    fn edge_id_mismatch_fails_loud() {
744        let f = fid();
745        let edge = EdgeId::new();
746        let other_edge = EdgeId::new();
747        let (up, down) = eids_for_flow(&f);
748        let raw = minimal_edge_hash(&f, &other_edge, &up, &down);
749        let detail = expect_corruption(build_edge_snapshot(&f, &edge, &raw).unwrap_err());
750        assert!(detail.contains("edge_id"), "{detail}");
751        assert!(detail.contains("does not match"), "{detail}");
752    }
753
754    #[test]
755    fn missing_required_fields_fail_loud() {
756        for want in [
757            "edge_id",
758            "flow_id",
759            "upstream_execution_id",
760            "downstream_execution_id",
761            "dependency_kind",
762            "satisfaction_condition",
763            "edge_state",
764            "created_at",
765            "created_by",
766        ] {
767            let f = fid();
768            let edge = EdgeId::new();
769            let (up, down) = eids_for_flow(&f);
770            let mut raw = minimal_edge_hash(&f, &edge, &up, &down);
771            raw.remove(want);
772            let err = build_edge_snapshot(&f, &edge, &raw)
773                .err()
774                .unwrap_or_else(|| panic!("missing {want} should fail"));
775            let detail = expect_corruption(err);
776            assert!(detail.contains(want), "detail for {want}: {detail}");
777        }
778    }
779
780    #[test]
781    fn malformed_created_at_fails_loud() {
782        let f = fid();
783        let edge = EdgeId::new();
784        let (up, down) = eids_for_flow(&f);
785        let mut raw = minimal_edge_hash(&f, &edge, &up, &down);
786        raw.insert("created_at".into(), "not-a-number".into());
787        let detail = expect_corruption(build_edge_snapshot(&f, &edge, &raw).unwrap_err());
788        assert!(detail.contains("created_at"), "{detail}");
789    }
790
791    #[test]
792    fn malformed_upstream_eid_fails_loud() {
793        let f = fid();
794        let edge = EdgeId::new();
795        let (up, down) = eids_for_flow(&f);
796        let mut raw = minimal_edge_hash(&f, &edge, &up, &down);
797        raw.insert("upstream_execution_id".into(), "not-an-execution-id".into());
798        let detail = expect_corruption(build_edge_snapshot(&f, &edge, &raw).unwrap_err());
799        assert!(detail.contains("upstream_execution_id"), "{detail}");
800    }
801
802    // ─── ExecutionSnapshot (describe_execution) ───────────────────────
803
804    fn eid() -> ExecutionId {
805        let config = PartitionConfig::default();
806        ExecutionId::for_flow(&FlowId::new(), &config)
807    }
808
809    fn minimal_core(public_state: &str) -> HashMap<String, String> {
810        let mut m = HashMap::new();
811        m.insert("public_state".to_owned(), public_state.to_owned());
812        m.insert("lane_id".to_owned(), "default".to_owned());
813        m.insert("namespace".to_owned(), "ns".to_owned());
814        m.insert("created_at".to_owned(), "1000".to_owned());
815        m.insert("last_mutation_at".to_owned(), "2000".to_owned());
816        m.insert("total_attempt_count".to_owned(), "0".to_owned());
817        m
818    }
819
820    fn expect_corruption_field<F>(err: EngineError, pred: F)
821    where
822        F: FnOnce(&str) -> bool,
823    {
824        let detail = expect_corruption(err);
825        assert!(pred(&detail), "detail did not match predicate: {detail}");
826    }
827
828    #[test]
829    fn waiting_exec_no_attempt_no_lease_no_tags() {
830        let snap = build_execution_snapshot(eid(), &minimal_core("waiting"), HashMap::new())
831            .unwrap()
832            .expect("should build");
833        assert_eq!(snap.public_state, PublicState::Waiting);
834        assert!(snap.current_attempt.is_none());
835        assert!(snap.current_lease.is_none());
836        assert!(snap.current_waitpoint.is_none());
837        assert_eq!(snap.tags.len(), 0);
838        assert_eq!(snap.created_at.0, 1000);
839        assert_eq!(snap.last_mutation_at.0, 2000);
840        assert!(snap.flow_id.is_none());
841        assert!(snap.blocking_reason.is_none());
842    }
843
844    #[test]
845    fn tags_flow_through_sorted() {
846        let mut tags = HashMap::new();
847        tags.insert("cairn.task_id".to_owned(), "t-1".to_owned());
848        tags.insert("cairn.project".to_owned(), "proj".to_owned());
849        let snap = build_execution_snapshot(eid(), &minimal_core("waiting"), tags)
850            .unwrap()
851            .unwrap();
852        let keys: Vec<_> = snap.tags.keys().cloned().collect();
853        assert_eq!(
854            keys,
855            vec!["cairn.project".to_owned(), "cairn.task_id".to_owned()]
856        );
857    }
858
859    #[test]
860    fn invalid_public_state_fails_loud() {
861        let err =
862            build_execution_snapshot(eid(), &minimal_core("bogus"), HashMap::new()).unwrap_err();
863        expect_corruption_field(err, |d| d.contains("public_state"));
864    }
865
866    #[test]
867    fn invalid_lane_id_fails_loud() {
868        let mut core = minimal_core("waiting");
869        core.insert("lane_id".to_owned(), "lane\nbroken".to_owned());
870        let err = build_execution_snapshot(eid(), &core, HashMap::new()).unwrap_err();
871        expect_corruption_field(err, |d| d.contains("lane_id"));
872    }
873
874    #[test]
875    fn missing_required_timestamps_fail_loud() {
876        for want in ["created_at", "last_mutation_at"] {
877            let mut core = minimal_core("waiting");
878            core.remove(want);
879            let err = build_execution_snapshot(eid(), &core, HashMap::new()).unwrap_err();
880            expect_corruption_field(err, |d| d.contains(want));
881        }
882    }
883
884    #[test]
885    fn malformed_total_attempt_count_fails_loud() {
886        let mut core = minimal_core("waiting");
887        core.insert("total_attempt_count".to_owned(), "not-a-number".to_owned());
888        let err = build_execution_snapshot(eid(), &core, HashMap::new()).unwrap_err();
889        expect_corruption_field(err, |d| d.contains("total_attempt_count"));
890    }
891
892    #[test]
893    fn attempt_id_without_index_fails_loud() {
894        let mut core = minimal_core("active");
895        core.insert(
896            "current_attempt_id".to_owned(),
897            AttemptId::new().to_string(),
898        );
899        let err = build_execution_snapshot(eid(), &core, HashMap::new()).unwrap_err();
900        expect_corruption_field(err, |d| d.contains("current_attempt_index"));
901    }
902
903    #[test]
904    fn lease_without_epoch_fails_loud() {
905        let mut core = minimal_core("active");
906        core.insert(
907            "current_worker_instance_id".to_owned(),
908            "w-inst-1".to_owned(),
909        );
910        core.insert("lease_expires_at".to_owned(), "9000".to_owned());
911        let err = build_execution_snapshot(eid(), &core, HashMap::new()).unwrap_err();
912        expect_corruption_field(err, |d| d.contains("current_lease_epoch"));
913    }
914
915    #[test]
916    fn lease_summary_requires_both_wid_and_expires_at() {
917        let mut core = minimal_core("active");
918        core.insert(
919            "current_worker_instance_id".to_owned(),
920            "w-inst-1".to_owned(),
921        );
922        let snap = build_execution_snapshot(eid(), &core, HashMap::new())
923            .unwrap()
924            .unwrap();
925        assert!(snap.current_lease.is_none());
926
927        core.insert("lease_expires_at".to_owned(), "9000".to_owned());
928        core.insert("current_lease_epoch".to_owned(), "3".to_owned());
929        let snap = build_execution_snapshot(eid(), &core, HashMap::new())
930            .unwrap()
931            .unwrap();
932        let lease = snap.current_lease.expect("lease present");
933        assert_eq!(lease.lease_epoch, LeaseEpoch::new(3));
934        assert_eq!(lease.expires_at.0, 9000);
935        assert_eq!(lease.worker_instance_id.as_str(), "w-inst-1");
936    }
937
938    // ─── FlowSnapshot (describe_flow) ─────────────────────────────────
939
940    fn minimal_flow_core(id: &FlowId, state: &str) -> HashMap<String, String> {
941        let mut m = HashMap::new();
942        m.insert("flow_id".to_owned(), id.to_string());
943        m.insert("flow_kind".to_owned(), "dag".to_owned());
944        m.insert("namespace".to_owned(), "ns".to_owned());
945        m.insert("public_flow_state".to_owned(), state.to_owned());
946        m.insert("graph_revision".to_owned(), "0".to_owned());
947        m.insert("node_count".to_owned(), "0".to_owned());
948        m.insert("edge_count".to_owned(), "0".to_owned());
949        m.insert("created_at".to_owned(), "1000".to_owned());
950        m.insert("last_mutation_at".to_owned(), "1000".to_owned());
951        m
952    }
953
954    #[test]
955    fn open_flow_round_trips() {
956        let f = fid();
957        let snap = build_flow_snapshot(f.clone(), &minimal_flow_core(&f, "open")).unwrap();
958        assert_eq!(snap.flow_id, f);
959        assert_eq!(snap.flow_kind, "dag");
960        assert_eq!(snap.namespace.as_str(), "ns");
961        assert_eq!(snap.public_flow_state, "open");
962        assert_eq!(snap.graph_revision, 0);
963        assert_eq!(snap.node_count, 0);
964        assert_eq!(snap.edge_count, 0);
965        assert_eq!(snap.created_at.0, 1000);
966        assert_eq!(snap.last_mutation_at.0, 1000);
967        assert!(snap.cancelled_at.is_none());
968        assert!(snap.cancel_reason.is_none());
969        assert!(snap.cancellation_policy.is_none());
970        assert!(snap.tags.is_empty());
971    }
972
973    #[test]
974    fn cancelled_flow_surfaces_cancel_fields() {
975        let f = fid();
976        let mut core = minimal_flow_core(&f, "cancelled");
977        core.insert("cancelled_at".to_owned(), "2000".to_owned());
978        core.insert("cancel_reason".to_owned(), "operator".to_owned());
979        core.insert("cancellation_policy".to_owned(), "cancel_all".to_owned());
980        let snap = build_flow_snapshot(f, &core).unwrap();
981        assert_eq!(snap.public_flow_state, "cancelled");
982        assert_eq!(snap.cancelled_at.unwrap().0, 2000);
983        assert_eq!(snap.cancel_reason.as_deref(), Some("operator"));
984        assert_eq!(snap.cancellation_policy.as_deref(), Some("cancel_all"));
985    }
986
987    #[test]
988    fn namespaced_tags_routed_to_tags_map() {
989        let f = fid();
990        let mut core = minimal_flow_core(&f, "open");
991        core.insert("cairn.task_id".to_owned(), "t-1".to_owned());
992        core.insert("cairn.project".to_owned(), "proj".to_owned());
993        core.insert("operator.label".to_owned(), "v".to_owned());
994        let snap = build_flow_snapshot(f, &core).unwrap();
995        assert_eq!(snap.tags.len(), 3);
996        let keys: Vec<_> = snap.tags.keys().cloned().collect();
997        assert_eq!(
998            keys,
999            vec![
1000                "cairn.project".to_owned(),
1001                "cairn.task_id".to_owned(),
1002                "operator.label".to_owned()
1003            ]
1004        );
1005    }
1006
1007    #[test]
1008    fn unknown_flat_field_fails_loud() {
1009        let f = fid();
1010        let mut core = minimal_flow_core(&f, "open");
1011        core.insert("bogus_future_field".to_owned(), "v".to_owned());
1012        let err = build_flow_snapshot(f, &core).unwrap_err();
1013        expect_corruption_field(err, |d| d.contains("bogus_future_field"));
1014    }
1015
1016    #[test]
1017    fn missing_required_flow_fields_fail_loud() {
1018        for want in [
1019            "flow_id",
1020            "namespace",
1021            "flow_kind",
1022            "public_flow_state",
1023            "graph_revision",
1024            "node_count",
1025            "edge_count",
1026            "created_at",
1027            "last_mutation_at",
1028        ] {
1029            let f = fid();
1030            let mut core = minimal_flow_core(&f, "open");
1031            core.remove(want);
1032            let err = build_flow_snapshot(f, &core).err().unwrap_or_else(|| {
1033                panic!("field {want} should fail but build_flow_snapshot returned Ok")
1034            });
1035            expect_corruption_field(err, |d| d.contains(want));
1036        }
1037    }
1038
1039    #[test]
1040    fn empty_required_strings_fail_loud() {
1041        for want in ["flow_id", "namespace", "flow_kind", "public_flow_state"] {
1042            let f = fid();
1043            let mut core = minimal_flow_core(&f, "open");
1044            core.insert(want.to_owned(), String::new());
1045            let err = build_flow_snapshot(f, &core).err().unwrap_or_else(|| {
1046                panic!("empty {want} should fail but build_flow_snapshot returned Ok")
1047            });
1048            expect_corruption_field(err, |d| d.contains(want));
1049        }
1050    }
1051
1052    #[test]
1053    fn flow_snapshot_flow_id_mismatch_fails_loud() {
1054        let requested = fid();
1055        let other = fid();
1056        let core = minimal_flow_core(&other, "open");
1057        let err = build_flow_snapshot(requested, &core).unwrap_err();
1058        expect_corruption_field(err, |d| d.contains("flow_id") && d.contains("does not match"));
1059    }
1060
1061    #[test]
1062    fn malformed_counter_fails_loud() {
1063        let f = fid();
1064        let mut core = minimal_flow_core(&f, "open");
1065        core.insert("graph_revision".to_owned(), "not-a-number".to_owned());
1066        let err = build_flow_snapshot(f, &core).unwrap_err();
1067        expect_corruption_field(err, |d| d.contains("graph_revision"));
1068    }
1069
1070    #[test]
1071    fn namespaced_tag_matcher_boundaries() {
1072        assert!(is_namespaced_tag_key("cairn.task_id"));
1073        assert!(is_namespaced_tag_key("a.b"));
1074        assert!(is_namespaced_tag_key("ab_12.field"));
1075        assert!(!is_namespaced_tag_key("cairn_task_id"));
1076        assert!(!is_namespaced_tag_key("Cairn.task"));
1077        assert!(!is_namespaced_tag_key("1cairn.task"));
1078        assert!(!is_namespaced_tag_key(""));
1079        assert!(!is_namespaced_tag_key(".x"));
1080        assert!(!is_namespaced_tag_key("caIrn.task"));
1081    }
1082}