Skip to main content

ff_core/contracts/
decode.rs

1// `EngineError` is ~200 bytes; the decoder and its helpers return
2// `Result<_, EngineError>` throughout to match the
3// [`crate::engine_backend::EngineBackend::list_edges`] contract. The
4// variant size is a cross-crate design point (see ff-backend-valkey's
5// crate-level allow for the same rationale); a future PR can box the
6// large `Conflict`/`Transport` variants. Module-local allow to
7// contain the exception to this one file.
8#![allow(clippy::result_large_err)]
9
10//! Canonical decoders for engine-owned hash shapes.
11//!
12//! RFC-012 Stage 1c (T2): the edge-hash decoder lives here so every
13//! `EngineBackend` implementation — not just `ff-backend-valkey` —
14//! shares one strict-parse posture and one error surface
15//! ([`EngineError::Validation { kind: Corruption, .. }`]). ff-sdk's
16//! snapshot module historically owned this code and surfaced
17//! `SdkError::Config`; the pre-migration wrapper still maps to that
18//! shape so public ff-sdk callers see no behavior change while the
19//! engine-side decoder moves.
20//!
21//! Stage 1c T3 adds [`build_execution_snapshot`] and
22//! [`build_flow_snapshot`] alongside the edge decoder: every
23//! engine-owned hash shape now parses through one canonical strict-parse
24//! surface, freeing `ff-backend-valkey` to implement
25//! `describe_execution` / `describe_flow` against the trait and letting
26//! ff-sdk collapse its snapshot module into thin trait forwarders.
27//!
28//! [`EngineError::Validation { kind: Corruption, .. }`]: crate::engine_error::EngineError::Validation
29
30use std::collections::{BTreeMap, HashMap};
31
32use crate::contracts::{
33    AttemptSummary, EdgeSnapshot, ExecutionSnapshot, FlowSnapshot, LeaseSummary,
34};
35use crate::engine_error::{EngineError, ValidationKind};
36use crate::state::PublicState;
37use crate::types::{
38    AttemptId, AttemptIndex, EdgeId, ExecutionId, FlowId, LaneId, LeaseEpoch, Namespace,
39    TimestampMs, WaitpointId, WorkerInstanceId,
40};
41
42/// FF-owned fields on the flow-scoped `edge:<edge_id>` hash.
43///
44/// An HGETALL field outside this set signals on-disk corruption or
45/// protocol drift — see [`build_edge_snapshot`]'s unknown-field
46/// sweep. Kept `pub` so test fixtures / diagnostic tooling can share
47/// the canonical list instead of hard-coding duplicates.
48pub const EDGE_KNOWN_FIELDS: &[&str] = &[
49    "edge_id",
50    "flow_id",
51    "upstream_execution_id",
52    "downstream_execution_id",
53    "dependency_kind",
54    "satisfaction_condition",
55    "data_passing_ref",
56    "edge_state",
57    "created_at",
58    "created_by",
59];
60
61/// Assemble an [`EdgeSnapshot`] from the raw HGETALL field map.
62///
63/// Mirrors the pre-T2 ff-sdk free-fn: every validation gate (unknown
64/// fields, missing required fields, identity cross-check against the
65/// caller-supplied `flow_id`/`edge_id`) returns the same diagnostic
66/// shape, just routed through [`EngineError::Validation`] with
67/// [`ValidationKind::Corruption`] instead of `SdkError::Config`. The
68/// pre-migration ff-sdk wrapper re-maps to `SdkError::Config` for
69/// public-API parity; direct backend callers read the
70/// `EngineError::Validation` payload.
71///
72/// `flow_id` + `edge_id` are the caller's expected identities. The
73/// decoder verifies both are present and match the stored values; a
74/// mismatch or absence surfaces as `Corruption` because it indicates
75/// a wrong-key read or an on-disk drift.
76pub fn build_edge_snapshot(
77    flow_id: &FlowId,
78    edge_id: &EdgeId,
79    raw: &HashMap<String, String>,
80) -> Result<EdgeSnapshot, EngineError> {
81    // Unknown-field sweep — reject eagerly so a future FF rename that
82    // landed a new field surfaces as an explicit parse failure rather
83    // than silently dropping data.
84    for k in raw.keys() {
85        if !EDGE_KNOWN_FIELDS.contains(&k.as_str()) {
86            return Err(corruption(
87                "edge_snapshot: edge_hash",
88                None,
89                &format!("has unexpected field '{k}' (protocol drift or corruption?)"),
90            ));
91        }
92    }
93
94    // edge_id cross-check.
95    let stored_edge_id_str = required(raw, "edge_snapshot: edge_hash", "edge_id")?;
96    if stored_edge_id_str != edge_id.to_string() {
97        return Err(corruption(
98            "edge_snapshot: edge_hash",
99            Some("edge_id"),
100            &format!(
101                "'{stored_edge_id_str}' does not match requested edge_id \
102                 '{edge_id}' (key corruption or wrong-key read?)"
103            ),
104        ));
105    }
106
107    // flow_id cross-check.
108    let stored_flow_id_str = required(raw, "edge_snapshot: edge_hash", "flow_id")?;
109    if stored_flow_id_str != flow_id.to_string() {
110        return Err(corruption(
111            "edge_snapshot: edge_hash",
112            Some("flow_id"),
113            &format!(
114                "'{stored_flow_id_str}' does not match requested flow_id \
115                 '{flow_id}' (key corruption or wrong-key read?)"
116            ),
117        ));
118    }
119
120    let upstream_execution_id = parse_eid(raw, "upstream_execution_id")?;
121    let downstream_execution_id = parse_eid(raw, "downstream_execution_id")?;
122
123    let dependency_kind = required(raw, "edge_snapshot: edge_hash", "dependency_kind")?;
124    let satisfaction_condition =
125        required(raw, "edge_snapshot: edge_hash", "satisfaction_condition")?;
126
127    // data_passing_ref is stored as "" when the stager passed None.
128    // Treat empty as absent rather than surfacing an empty String.
129    let data_passing_ref = raw
130        .get("data_passing_ref")
131        .filter(|s| !s.is_empty())
132        .cloned();
133
134    let edge_state = required(raw, "edge_snapshot: edge_hash", "edge_state")?;
135
136    let created_at = parse_ts_required(raw, "edge_snapshot: edge_hash", "created_at")?;
137    let created_by = required(raw, "edge_snapshot: edge_hash", "created_by")?;
138
139    Ok(EdgeSnapshot::new(
140        edge_id.clone(),
141        flow_id.clone(),
142        upstream_execution_id,
143        downstream_execution_id,
144        dependency_kind,
145        satisfaction_condition,
146        data_passing_ref,
147        edge_state,
148        created_at,
149        created_by,
150    ))
151}
152
153/// Format a `Corruption` detail string in the
154/// `"<context>: <field?>: <message>"` shape documented on
155/// [`ValidationKind::Corruption`].
156fn corruption(context: &str, field: Option<&str>, message: &str) -> EngineError {
157    let detail = match field {
158        Some(f) => format!("{context}: {f}: {message}"),
159        None => format!("{context}: {message}"),
160    };
161    EngineError::Validation {
162        kind: ValidationKind::Corruption,
163        detail,
164    }
165}
166
167/// Fetch a required non-empty string field, emitting a `Corruption`
168/// error when the field is absent or empty.
169fn required(
170    raw: &HashMap<String, String>,
171    context: &str,
172    field: &str,
173) -> Result<String, EngineError> {
174    raw.get(field)
175        .filter(|s| !s.is_empty())
176        .cloned()
177        .ok_or_else(|| {
178            corruption(
179                context,
180                Some(field),
181                "is missing or empty (key corruption?)",
182            )
183        })
184}
185
186/// Parse a ms-timestamp field that must be present.
187fn parse_ts_required(
188    raw: &HashMap<String, String>,
189    context: &str,
190    field: &str,
191) -> Result<TimestampMs, EngineError> {
192    let s = required(raw, context, field)?;
193    let ms: i64 = s.parse().map_err(|e| {
194        corruption(
195            context,
196            Some(field),
197            &format!("is not a valid ms timestamp ('{s}'): {e}"),
198        )
199    })?;
200    Ok(TimestampMs::from_millis(ms))
201}
202
203/// Parse a required ExecutionId field.
204fn parse_eid(raw: &HashMap<String, String>, field: &str) -> Result<ExecutionId, EngineError> {
205    let s = required(raw, "edge_snapshot: edge_hash", field)?;
206    ExecutionId::parse(&s).map_err(|e| {
207        corruption(
208            "edge_snapshot: edge_hash",
209            Some(field),
210            &format!("'{s}' is not a valid ExecutionId (key corruption?): {e}"),
211        )
212    })
213}
214
215// ═══════════════════════════════════════════════════════════════════════
216// execution decoder (describe_execution)
217// ═══════════════════════════════════════════════════════════════════════
218
219/// Assemble an [`ExecutionSnapshot`] from the raw HGETALL field maps.
220///
221/// `core` is the HGETALL of `exec_core`, `tags_raw` the HGETALL of the
222/// sibling tags hash (which may be empty for executions created without
223/// tags). Every parse failure surfaces as
224/// [`EngineError::Validation { kind: Corruption, .. }`] — fields that
225/// FCALLs write atomically are strict-required, while fields that clear
226/// on transition (`blocking_reason`, `current_attempt_id`, etc.) are
227/// treated as absent when empty.
228pub fn build_execution_snapshot(
229    execution_id: ExecutionId,
230    core: &HashMap<String, String>,
231    tags_raw: HashMap<String, String>,
232) -> Result<Option<ExecutionSnapshot>, EngineError> {
233    let ctx = "describe_execution: exec_core";
234
235    let public_state = parse_public_state(opt_str(core, "public_state").unwrap_or(""))?;
236
237    // `LaneId::try_new` validates non-empty + ASCII-printable + <= 64 bytes.
238    // Exec_core writes a LaneId that already passed these invariants at
239    // ingress; a read that fails validation here signals on-disk
240    // corruption — surface it rather than silently constructing an
241    // invalid LaneId that would mis-partition downstream.
242    let lane_id = LaneId::try_new(opt_str(core, "lane_id").unwrap_or("")).map_err(|e| {
243        corruption(
244            ctx,
245            Some("lane_id"),
246            &format!("fails LaneId validation (key corruption?): {e}"),
247        )
248    })?;
249
250    let namespace_str = opt_str(core, "namespace").unwrap_or("").to_owned();
251    let namespace = Namespace::new(namespace_str);
252
253    let flow_id = opt_str(core, "flow_id")
254        .filter(|s| !s.is_empty())
255        .map(|s| {
256            FlowId::parse(s).map_err(|e| {
257                corruption(
258                    ctx,
259                    Some("flow_id"),
260                    &format!("is not a valid UUID (key corruption?): {e}"),
261                )
262            })
263        })
264        .transpose()?;
265
266    let blocking_reason = opt_str(core, "blocking_reason")
267        .filter(|s| !s.is_empty())
268        .map(str::to_owned);
269    let blocking_detail = opt_str(core, "blocking_detail")
270        .filter(|s| !s.is_empty())
271        .map(str::to_owned);
272
273    // created_at + last_mutation_at are engine-maintained invariants
274    // (lua/execution.lua writes both on create; every mutating FCALL
275    // updates last_mutation_at). Missing values indicate on-disk
276    // corruption, not a valid pre-create state — fail loudly.
277    let created_at = parse_ts(core, ctx, "created_at")?.ok_or_else(|| {
278        corruption(
279            ctx,
280            Some("created_at"),
281            "is missing or empty (key corruption?)",
282        )
283    })?;
284    let last_mutation_at = parse_ts(core, ctx, "last_mutation_at")?.ok_or_else(|| {
285        corruption(
286            ctx,
287            Some("last_mutation_at"),
288            "is missing or empty (key corruption?)",
289        )
290    })?;
291
292    let total_attempt_count: u32 =
293        parse_u32_strict(core, ctx, "total_attempt_count")?.unwrap_or(0);
294
295    let current_attempt = build_attempt_summary(core)?;
296    let current_lease = build_lease_summary(core)?;
297
298    let current_waitpoint = opt_str(core, "current_waitpoint_id")
299        .filter(|s| !s.is_empty())
300        .map(|s| {
301            WaitpointId::parse(s).map_err(|e| {
302                corruption(
303                    ctx,
304                    Some("current_waitpoint_id"),
305                    &format!("is not a valid UUID (key corruption?): {e}"),
306                )
307            })
308        })
309        .transpose()?;
310
311    let tags: BTreeMap<String, String> = tags_raw.into_iter().collect();
312
313    Ok(Some(ExecutionSnapshot::new(
314        execution_id,
315        flow_id,
316        lane_id,
317        namespace,
318        public_state,
319        blocking_reason,
320        blocking_detail,
321        current_attempt,
322        current_lease,
323        current_waitpoint,
324        created_at,
325        last_mutation_at,
326        total_attempt_count,
327        tags,
328    )))
329}
330
331fn opt_str<'a>(map: &'a HashMap<String, String>, field: &str) -> Option<&'a str> {
332    map.get(field).map(String::as_str)
333}
334
335/// Strictly parse a ms-timestamp field. `Ok(None)` when absent/empty,
336/// `Err` on unparseable content. `context` names both the calling
337/// FCALL and the hash (e.g. `"describe_execution: exec_core"`) so
338/// error messages point to the exact source of corruption.
339fn parse_ts(
340    map: &HashMap<String, String>,
341    context: &str,
342    field: &str,
343) -> Result<Option<TimestampMs>, EngineError> {
344    match opt_str(map, field).filter(|s| !s.is_empty()) {
345        None => Ok(None),
346        Some(raw) => {
347            let ms: i64 = raw.parse().map_err(|e| {
348                corruption(
349                    context,
350                    Some(field),
351                    &format!("is not a valid ms timestamp ('{raw}'): {e}"),
352                )
353            })?;
354            Ok(Some(TimestampMs::from_millis(ms)))
355        }
356    }
357}
358
359/// Strictly parse a `u32` field. Returns `Ok(None)` when the field is
360/// absent or empty (a valid pre-write state), `Err` when the value is
361/// present but unparseable (on-disk corruption).
362fn parse_u32_strict(
363    map: &HashMap<String, String>,
364    context: &str,
365    field: &str,
366) -> Result<Option<u32>, EngineError> {
367    match opt_str(map, field).filter(|s| !s.is_empty()) {
368        None => Ok(None),
369        Some(raw) => Ok(Some(raw.parse().map_err(|e| {
370            corruption(
371                context,
372                Some(field),
373                &format!("is not a valid u32 ('{raw}'): {e}"),
374            )
375        })?)),
376    }
377}
378
379/// Strictly parse a `u64` field. Semantics mirror [`parse_u32_strict`].
380fn parse_u64_strict(
381    map: &HashMap<String, String>,
382    context: &str,
383    field: &str,
384) -> Result<Option<u64>, EngineError> {
385    match opt_str(map, field).filter(|s| !s.is_empty()) {
386        None => Ok(None),
387        Some(raw) => Ok(Some(raw.parse().map_err(|e| {
388            corruption(
389                context,
390                Some(field),
391                &format!("is not a valid u64 ('{raw}'): {e}"),
392            )
393        })?)),
394    }
395}
396
397fn parse_public_state(raw: &str) -> Result<PublicState, EngineError> {
398    // exec_core stores the snake_case literal (e.g. "waiting"). PublicState's
399    // Deserialize accepts the JSON-quoted form, so wrap + delegate.
400    let quoted = format!("\"{raw}\"");
401    serde_json::from_str(&quoted).map_err(|e| {
402        corruption(
403            "describe_execution: exec_core",
404            Some("public_state"),
405            &format!("'{raw}' is not a known public state: {e}"),
406        )
407    })
408}
409
410fn build_attempt_summary(
411    core: &HashMap<String, String>,
412) -> Result<Option<AttemptSummary>, EngineError> {
413    let ctx = "describe_execution: exec_core";
414    let attempt_id_str = match opt_str(core, "current_attempt_id").filter(|s| !s.is_empty()) {
415        None => return Ok(None),
416        Some(s) => s,
417    };
418    let attempt_id = AttemptId::parse(attempt_id_str).map_err(|e| {
419        corruption(
420            ctx,
421            Some("current_attempt_id"),
422            &format!("is not a valid UUID: {e}"),
423        )
424    })?;
425    // When `current_attempt_id` is set, `current_attempt_index` MUST be
426    // set too — lua/execution.lua writes both atomically in
427    // `ff_claim_execution`. A missing index while the id is populated
428    // is corruption, not a valid intermediate state.
429    let attempt_index = parse_u32_strict(core, ctx, "current_attempt_index")?.ok_or_else(|| {
430        corruption(
431            ctx,
432            Some("current_attempt_index"),
433            "is missing while current_attempt_id is set (key corruption?)",
434        )
435    })?;
436    Ok(Some(AttemptSummary::new(
437        attempt_id,
438        AttemptIndex::new(attempt_index),
439    )))
440}
441
442fn build_lease_summary(
443    core: &HashMap<String, String>,
444) -> Result<Option<LeaseSummary>, EngineError> {
445    let ctx = "describe_execution: exec_core";
446    // A lease is "held" when the worker_instance_id field is populated
447    // AND lease_expires_at is set. Both clear together on revoke/expire
448    // (see clear_lease_and_indexes in lua/helpers.lua).
449    let wid_str = match opt_str(core, "current_worker_instance_id").filter(|s| !s.is_empty()) {
450        None => return Ok(None),
451        Some(s) => s,
452    };
453    let expires_at = match parse_ts(core, ctx, "lease_expires_at")? {
454        None => return Ok(None),
455        Some(ts) => ts,
456    };
457    // A lease is only "held" if the epoch is present too — lua/helpers.lua
458    // sets/clears epoch atomically with wid + expires_at. Parse strictly
459    // and require it: a missing epoch alongside a live wid is corruption.
460    let epoch = parse_u64_strict(core, ctx, "current_lease_epoch")?.ok_or_else(|| {
461        corruption(
462            ctx,
463            Some("current_lease_epoch"),
464            "is missing while current_worker_instance_id is set (key corruption?)",
465        )
466    })?;
467    Ok(Some(LeaseSummary::new(
468        LeaseEpoch::new(epoch),
469        WorkerInstanceId::new(wid_str.to_owned()),
470        expires_at,
471    )))
472}
473
474// ═══════════════════════════════════════════════════════════════════════
475// flow decoder (describe_flow)
476// ═══════════════════════════════════════════════════════════════════════
477
478/// FF-owned snake_case fields on flow_core. Any HGETALL field NOT in
479/// this set AND matching the `^[a-z][a-z0-9_]*\.` namespaced-tag shape
480/// is surfaced on [`FlowSnapshot::tags`]. Fields that are neither FF-
481/// owned nor namespaced (unexpected shapes) are surfaced as a
482/// `Corruption` error so on-disk corruption or protocol drift fails loud.
483pub const FLOW_CORE_KNOWN_FIELDS: &[&str] = &[
484    "flow_id",
485    "flow_kind",
486    "namespace",
487    "public_flow_state",
488    "graph_revision",
489    "node_count",
490    "edge_count",
491    "created_at",
492    "last_mutation_at",
493    "cancelled_at",
494    "cancel_reason",
495    "cancellation_policy",
496];
497
498/// Assemble a [`FlowSnapshot`] from the raw HGETALL field map.
499///
500/// Cross-checks the stored `flow_id` against the caller's expected id.
501/// Unknown fields that match the `^[a-z][a-z0-9_]*\.` namespaced-tag
502/// shape are routed to `tags`; any other unknown field surfaces as
503/// `Corruption`.
504pub fn build_flow_snapshot(
505    flow_id: FlowId,
506    raw: &HashMap<String, String>,
507    edge_groups: Vec<crate::contracts::EdgeGroupSnapshot>,
508) -> Result<FlowSnapshot, EngineError> {
509    let ctx = "describe_flow: flow_core";
510
511    // flow_id cross-check — corruption or wrong-key read.
512    let stored_flow_id_str = opt_str(raw, "flow_id")
513        .filter(|s| !s.is_empty())
514        .ok_or_else(|| corruption(ctx, Some("flow_id"), "is missing or empty (key corruption?)"))?;
515    if stored_flow_id_str != flow_id.to_string() {
516        return Err(corruption(
517            ctx,
518            Some("flow_id"),
519            &format!(
520                "'{stored_flow_id_str}' does not match requested flow_id \
521                 '{flow_id}' (key corruption or wrong-key read?)"
522            ),
523        ));
524    }
525
526    let namespace_str = opt_str(raw, "namespace")
527        .filter(|s| !s.is_empty())
528        .ok_or_else(|| {
529            corruption(ctx, Some("namespace"), "is missing or empty (key corruption?)")
530        })?;
531    let namespace = Namespace::new(namespace_str.to_owned());
532
533    let flow_kind = opt_str(raw, "flow_kind")
534        .filter(|s| !s.is_empty())
535        .ok_or_else(|| {
536            corruption(ctx, Some("flow_kind"), "is missing or empty (key corruption?)")
537        })?
538        .to_owned();
539
540    let public_flow_state = opt_str(raw, "public_flow_state")
541        .filter(|s| !s.is_empty())
542        .ok_or_else(|| {
543            corruption(
544                ctx,
545                Some("public_flow_state"),
546                "is missing or empty (key corruption?)",
547            )
548        })?
549        .to_owned();
550
551    let graph_revision = parse_u64_strict(raw, ctx, "graph_revision")?
552        .ok_or_else(|| corruption(ctx, Some("graph_revision"), "is missing (key corruption?)"))?;
553    let node_count = parse_u32_strict(raw, ctx, "node_count")?
554        .ok_or_else(|| corruption(ctx, Some("node_count"), "is missing (key corruption?)"))?;
555    let edge_count = parse_u32_strict(raw, ctx, "edge_count")?
556        .ok_or_else(|| corruption(ctx, Some("edge_count"), "is missing (key corruption?)"))?;
557
558    let created_at = parse_ts(raw, ctx, "created_at")?.ok_or_else(|| {
559        corruption(
560            ctx,
561            Some("created_at"),
562            "is missing or empty (key corruption?)",
563        )
564    })?;
565    let last_mutation_at = parse_ts(raw, ctx, "last_mutation_at")?.ok_or_else(|| {
566        corruption(
567            ctx,
568            Some("last_mutation_at"),
569            "is missing or empty (key corruption?)",
570        )
571    })?;
572
573    let cancelled_at = parse_ts(raw, ctx, "cancelled_at")?;
574    let cancel_reason = opt_str(raw, "cancel_reason")
575        .filter(|s| !s.is_empty())
576        .map(str::to_owned);
577    let cancellation_policy = opt_str(raw, "cancellation_policy")
578        .filter(|s| !s.is_empty())
579        .map(str::to_owned);
580
581    // Route unknown fields: namespaced-prefix (e.g. `cairn.task_id`) →
582    // tags; anything else → corruption.
583    let mut tags: BTreeMap<String, String> = BTreeMap::new();
584    for (k, v) in raw {
585        if FLOW_CORE_KNOWN_FIELDS.contains(&k.as_str()) {
586            continue;
587        }
588        if is_namespaced_tag_key(k) {
589            tags.insert(k.clone(), v.clone());
590        } else {
591            return Err(corruption(
592                ctx,
593                None,
594                &format!(
595                    "has unexpected field '{k}' — not an FF field and not a namespaced \
596                     tag (lowercase-alphanumeric-prefix + '.')"
597                ),
598            ));
599        }
600    }
601
602    Ok(FlowSnapshot::new(
603        flow_id,
604        flow_kind,
605        namespace,
606        public_flow_state,
607        graph_revision,
608        node_count,
609        edge_count,
610        created_at,
611        last_mutation_at,
612        cancelled_at,
613        cancel_reason,
614        cancellation_policy,
615        tags,
616        edge_groups,
617    ))
618}
619
620/// Match the namespaced-tag shape `^[a-z][a-z0-9_]*\.` documented on
621/// [`ExecutionSnapshot::tags`] / [`FlowSnapshot::tags`]. Kept inline
622/// (no regex dependency) — the shape is tight enough to hand-check.
623pub(crate) fn is_namespaced_tag_key(k: &str) -> bool {
624    let mut chars = k.chars();
625    let Some(first) = chars.next() else {
626        return false;
627    };
628    if !first.is_ascii_lowercase() {
629        return false;
630    }
631    let mut saw_dot = false;
632    for c in chars {
633        if c == '.' {
634            saw_dot = true;
635            break;
636        }
637        if !(c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') {
638            return false;
639        }
640    }
641    saw_dot
642}
643
644#[cfg(test)]
645mod tests {
646    use super::*;
647    use crate::partition::PartitionConfig;
648
649    fn fid() -> FlowId {
650        FlowId::new()
651    }
652
653    fn eids_for_flow(f: &FlowId) -> (ExecutionId, ExecutionId) {
654        let cfg = PartitionConfig::default();
655        (
656            ExecutionId::for_flow(f, &cfg),
657            ExecutionId::for_flow(f, &cfg),
658        )
659    }
660
661    fn minimal_edge_hash(
662        flow: &FlowId,
663        edge: &EdgeId,
664        up: &ExecutionId,
665        down: &ExecutionId,
666    ) -> HashMap<String, String> {
667        let mut m = HashMap::new();
668        m.insert("edge_id".into(), edge.to_string());
669        m.insert("flow_id".into(), flow.to_string());
670        m.insert("upstream_execution_id".into(), up.to_string());
671        m.insert("downstream_execution_id".into(), down.to_string());
672        m.insert("dependency_kind".into(), "success_only".into());
673        m.insert("satisfaction_condition".into(), "all_required".into());
674        m.insert("data_passing_ref".into(), String::new());
675        m.insert("edge_state".into(), "pending".into());
676        m.insert("created_at".into(), "1234".into());
677        m.insert("created_by".into(), "engine".into());
678        m
679    }
680
681    #[test]
682    fn round_trips_all_fields() {
683        let f = fid();
684        let edge = EdgeId::new();
685        let (up, down) = eids_for_flow(&f);
686        let raw = minimal_edge_hash(&f, &edge, &up, &down);
687        let snap = build_edge_snapshot(&f, &edge, &raw).unwrap();
688        assert_eq!(snap.edge_id, edge);
689        assert_eq!(snap.flow_id, f);
690        assert_eq!(snap.upstream_execution_id, up);
691        assert_eq!(snap.downstream_execution_id, down);
692        assert_eq!(snap.dependency_kind, "success_only");
693        assert_eq!(snap.satisfaction_condition, "all_required");
694        assert!(snap.data_passing_ref.is_none());
695        assert_eq!(snap.edge_state, "pending");
696        assert_eq!(snap.created_at.0, 1234);
697        assert_eq!(snap.created_by, "engine");
698    }
699
700    #[test]
701    fn data_passing_ref_round_trips_when_set() {
702        let f = fid();
703        let edge = EdgeId::new();
704        let (up, down) = eids_for_flow(&f);
705        let mut raw = minimal_edge_hash(&f, &edge, &up, &down);
706        raw.insert("data_passing_ref".into(), "ref://blob-42".into());
707        let snap = build_edge_snapshot(&f, &edge, &raw).unwrap();
708        assert_eq!(snap.data_passing_ref.as_deref(), Some("ref://blob-42"));
709    }
710
711    fn expect_corruption(err: EngineError) -> String {
712        match err {
713            EngineError::Validation {
714                kind: ValidationKind::Corruption,
715                detail,
716            } => detail,
717            other => panic!("expected Validation::Corruption, got {other:?}"),
718        }
719    }
720
721    #[test]
722    fn unknown_field_fails_loud() {
723        let f = fid();
724        let edge = EdgeId::new();
725        let (up, down) = eids_for_flow(&f);
726        let mut raw = minimal_edge_hash(&f, &edge, &up, &down);
727        raw.insert("bogus_future_field".into(), "v".into());
728        let detail = expect_corruption(build_edge_snapshot(&f, &edge, &raw).unwrap_err());
729        assert!(detail.contains("bogus_future_field"), "{detail}");
730    }
731
732    #[test]
733    fn flow_id_mismatch_fails_loud() {
734        let f = fid();
735        let other = fid();
736        let edge = EdgeId::new();
737        let (up, down) = eids_for_flow(&f);
738        let raw = minimal_edge_hash(&other, &edge, &up, &down);
739        let detail = expect_corruption(build_edge_snapshot(&f, &edge, &raw).unwrap_err());
740        assert!(detail.contains("flow_id"), "{detail}");
741        assert!(detail.contains("does not match"), "{detail}");
742    }
743
744    #[test]
745    fn edge_id_mismatch_fails_loud() {
746        let f = fid();
747        let edge = EdgeId::new();
748        let other_edge = EdgeId::new();
749        let (up, down) = eids_for_flow(&f);
750        let raw = minimal_edge_hash(&f, &other_edge, &up, &down);
751        let detail = expect_corruption(build_edge_snapshot(&f, &edge, &raw).unwrap_err());
752        assert!(detail.contains("edge_id"), "{detail}");
753        assert!(detail.contains("does not match"), "{detail}");
754    }
755
756    #[test]
757    fn missing_required_fields_fail_loud() {
758        for want in [
759            "edge_id",
760            "flow_id",
761            "upstream_execution_id",
762            "downstream_execution_id",
763            "dependency_kind",
764            "satisfaction_condition",
765            "edge_state",
766            "created_at",
767            "created_by",
768        ] {
769            let f = fid();
770            let edge = EdgeId::new();
771            let (up, down) = eids_for_flow(&f);
772            let mut raw = minimal_edge_hash(&f, &edge, &up, &down);
773            raw.remove(want);
774            let err = build_edge_snapshot(&f, &edge, &raw)
775                .err()
776                .unwrap_or_else(|| panic!("missing {want} should fail"));
777            let detail = expect_corruption(err);
778            assert!(detail.contains(want), "detail for {want}: {detail}");
779        }
780    }
781
782    #[test]
783    fn malformed_created_at_fails_loud() {
784        let f = fid();
785        let edge = EdgeId::new();
786        let (up, down) = eids_for_flow(&f);
787        let mut raw = minimal_edge_hash(&f, &edge, &up, &down);
788        raw.insert("created_at".into(), "not-a-number".into());
789        let detail = expect_corruption(build_edge_snapshot(&f, &edge, &raw).unwrap_err());
790        assert!(detail.contains("created_at"), "{detail}");
791    }
792
793    #[test]
794    fn malformed_upstream_eid_fails_loud() {
795        let f = fid();
796        let edge = EdgeId::new();
797        let (up, down) = eids_for_flow(&f);
798        let mut raw = minimal_edge_hash(&f, &edge, &up, &down);
799        raw.insert("upstream_execution_id".into(), "not-an-execution-id".into());
800        let detail = expect_corruption(build_edge_snapshot(&f, &edge, &raw).unwrap_err());
801        assert!(detail.contains("upstream_execution_id"), "{detail}");
802    }
803
804    // ─── ExecutionSnapshot (describe_execution) ───────────────────────
805
806    fn eid() -> ExecutionId {
807        let config = PartitionConfig::default();
808        ExecutionId::for_flow(&FlowId::new(), &config)
809    }
810
811    fn minimal_core(public_state: &str) -> HashMap<String, String> {
812        let mut m = HashMap::new();
813        m.insert("public_state".to_owned(), public_state.to_owned());
814        m.insert("lane_id".to_owned(), "default".to_owned());
815        m.insert("namespace".to_owned(), "ns".to_owned());
816        m.insert("created_at".to_owned(), "1000".to_owned());
817        m.insert("last_mutation_at".to_owned(), "2000".to_owned());
818        m.insert("total_attempt_count".to_owned(), "0".to_owned());
819        m
820    }
821
822    fn expect_corruption_field<F>(err: EngineError, pred: F)
823    where
824        F: FnOnce(&str) -> bool,
825    {
826        let detail = expect_corruption(err);
827        assert!(pred(&detail), "detail did not match predicate: {detail}");
828    }
829
830    #[test]
831    fn waiting_exec_no_attempt_no_lease_no_tags() {
832        let snap = build_execution_snapshot(eid(), &minimal_core("waiting"), HashMap::new())
833            .unwrap()
834            .expect("should build");
835        assert_eq!(snap.public_state, PublicState::Waiting);
836        assert!(snap.current_attempt.is_none());
837        assert!(snap.current_lease.is_none());
838        assert!(snap.current_waitpoint.is_none());
839        assert_eq!(snap.tags.len(), 0);
840        assert_eq!(snap.created_at.0, 1000);
841        assert_eq!(snap.last_mutation_at.0, 2000);
842        assert!(snap.flow_id.is_none());
843        assert!(snap.blocking_reason.is_none());
844    }
845
846    #[test]
847    fn tags_flow_through_sorted() {
848        let mut tags = HashMap::new();
849        tags.insert("cairn.task_id".to_owned(), "t-1".to_owned());
850        tags.insert("cairn.project".to_owned(), "proj".to_owned());
851        let snap = build_execution_snapshot(eid(), &minimal_core("waiting"), tags)
852            .unwrap()
853            .unwrap();
854        let keys: Vec<_> = snap.tags.keys().cloned().collect();
855        assert_eq!(
856            keys,
857            vec!["cairn.project".to_owned(), "cairn.task_id".to_owned()]
858        );
859    }
860
861    #[test]
862    fn invalid_public_state_fails_loud() {
863        let err =
864            build_execution_snapshot(eid(), &minimal_core("bogus"), HashMap::new()).unwrap_err();
865        expect_corruption_field(err, |d| d.contains("public_state"));
866    }
867
868    #[test]
869    fn invalid_lane_id_fails_loud() {
870        let mut core = minimal_core("waiting");
871        core.insert("lane_id".to_owned(), "lane\nbroken".to_owned());
872        let err = build_execution_snapshot(eid(), &core, HashMap::new()).unwrap_err();
873        expect_corruption_field(err, |d| d.contains("lane_id"));
874    }
875
876    #[test]
877    fn missing_required_timestamps_fail_loud() {
878        for want in ["created_at", "last_mutation_at"] {
879            let mut core = minimal_core("waiting");
880            core.remove(want);
881            let err = build_execution_snapshot(eid(), &core, HashMap::new()).unwrap_err();
882            expect_corruption_field(err, |d| d.contains(want));
883        }
884    }
885
886    #[test]
887    fn malformed_total_attempt_count_fails_loud() {
888        let mut core = minimal_core("waiting");
889        core.insert("total_attempt_count".to_owned(), "not-a-number".to_owned());
890        let err = build_execution_snapshot(eid(), &core, HashMap::new()).unwrap_err();
891        expect_corruption_field(err, |d| d.contains("total_attempt_count"));
892    }
893
894    #[test]
895    fn attempt_id_without_index_fails_loud() {
896        let mut core = minimal_core("active");
897        core.insert(
898            "current_attempt_id".to_owned(),
899            AttemptId::new().to_string(),
900        );
901        let err = build_execution_snapshot(eid(), &core, HashMap::new()).unwrap_err();
902        expect_corruption_field(err, |d| d.contains("current_attempt_index"));
903    }
904
905    #[test]
906    fn lease_without_epoch_fails_loud() {
907        let mut core = minimal_core("active");
908        core.insert(
909            "current_worker_instance_id".to_owned(),
910            "w-inst-1".to_owned(),
911        );
912        core.insert("lease_expires_at".to_owned(), "9000".to_owned());
913        let err = build_execution_snapshot(eid(), &core, HashMap::new()).unwrap_err();
914        expect_corruption_field(err, |d| d.contains("current_lease_epoch"));
915    }
916
917    #[test]
918    fn lease_summary_requires_both_wid_and_expires_at() {
919        let mut core = minimal_core("active");
920        core.insert(
921            "current_worker_instance_id".to_owned(),
922            "w-inst-1".to_owned(),
923        );
924        let snap = build_execution_snapshot(eid(), &core, HashMap::new())
925            .unwrap()
926            .unwrap();
927        assert!(snap.current_lease.is_none());
928
929        core.insert("lease_expires_at".to_owned(), "9000".to_owned());
930        core.insert("current_lease_epoch".to_owned(), "3".to_owned());
931        let snap = build_execution_snapshot(eid(), &core, HashMap::new())
932            .unwrap()
933            .unwrap();
934        let lease = snap.current_lease.expect("lease present");
935        assert_eq!(lease.lease_epoch, LeaseEpoch::new(3));
936        assert_eq!(lease.expires_at.0, 9000);
937        assert_eq!(lease.worker_instance_id.as_str(), "w-inst-1");
938    }
939
940    // ─── FlowSnapshot (describe_flow) ─────────────────────────────────
941
942    fn minimal_flow_core(id: &FlowId, state: &str) -> HashMap<String, String> {
943        let mut m = HashMap::new();
944        m.insert("flow_id".to_owned(), id.to_string());
945        m.insert("flow_kind".to_owned(), "dag".to_owned());
946        m.insert("namespace".to_owned(), "ns".to_owned());
947        m.insert("public_flow_state".to_owned(), state.to_owned());
948        m.insert("graph_revision".to_owned(), "0".to_owned());
949        m.insert("node_count".to_owned(), "0".to_owned());
950        m.insert("edge_count".to_owned(), "0".to_owned());
951        m.insert("created_at".to_owned(), "1000".to_owned());
952        m.insert("last_mutation_at".to_owned(), "1000".to_owned());
953        m
954    }
955
956    #[test]
957    fn open_flow_round_trips() {
958        let f = fid();
959        let snap = build_flow_snapshot(f.clone(), &minimal_flow_core(&f, "open"), Vec::new()).unwrap();
960        assert_eq!(snap.flow_id, f);
961        assert_eq!(snap.flow_kind, "dag");
962        assert_eq!(snap.namespace.as_str(), "ns");
963        assert_eq!(snap.public_flow_state, "open");
964        assert_eq!(snap.graph_revision, 0);
965        assert_eq!(snap.node_count, 0);
966        assert_eq!(snap.edge_count, 0);
967        assert_eq!(snap.created_at.0, 1000);
968        assert_eq!(snap.last_mutation_at.0, 1000);
969        assert!(snap.cancelled_at.is_none());
970        assert!(snap.cancel_reason.is_none());
971        assert!(snap.cancellation_policy.is_none());
972        assert!(snap.tags.is_empty());
973    }
974
975    #[test]
976    fn cancelled_flow_surfaces_cancel_fields() {
977        let f = fid();
978        let mut core = minimal_flow_core(&f, "cancelled");
979        core.insert("cancelled_at".to_owned(), "2000".to_owned());
980        core.insert("cancel_reason".to_owned(), "operator".to_owned());
981        core.insert("cancellation_policy".to_owned(), "cancel_all".to_owned());
982        let snap = build_flow_snapshot(f, &core, Vec::new()).unwrap();
983        assert_eq!(snap.public_flow_state, "cancelled");
984        assert_eq!(snap.cancelled_at.unwrap().0, 2000);
985        assert_eq!(snap.cancel_reason.as_deref(), Some("operator"));
986        assert_eq!(snap.cancellation_policy.as_deref(), Some("cancel_all"));
987    }
988
989    #[test]
990    fn namespaced_tags_routed_to_tags_map() {
991        let f = fid();
992        let mut core = minimal_flow_core(&f, "open");
993        core.insert("cairn.task_id".to_owned(), "t-1".to_owned());
994        core.insert("cairn.project".to_owned(), "proj".to_owned());
995        core.insert("operator.label".to_owned(), "v".to_owned());
996        let snap = build_flow_snapshot(f, &core, Vec::new()).unwrap();
997        assert_eq!(snap.tags.len(), 3);
998        let keys: Vec<_> = snap.tags.keys().cloned().collect();
999        assert_eq!(
1000            keys,
1001            vec![
1002                "cairn.project".to_owned(),
1003                "cairn.task_id".to_owned(),
1004                "operator.label".to_owned()
1005            ]
1006        );
1007    }
1008
1009    #[test]
1010    fn unknown_flat_field_fails_loud() {
1011        let f = fid();
1012        let mut core = minimal_flow_core(&f, "open");
1013        core.insert("bogus_future_field".to_owned(), "v".to_owned());
1014        let err = build_flow_snapshot(f, &core, Vec::new()).unwrap_err();
1015        expect_corruption_field(err, |d| d.contains("bogus_future_field"));
1016    }
1017
1018    #[test]
1019    fn missing_required_flow_fields_fail_loud() {
1020        for want in [
1021            "flow_id",
1022            "namespace",
1023            "flow_kind",
1024            "public_flow_state",
1025            "graph_revision",
1026            "node_count",
1027            "edge_count",
1028            "created_at",
1029            "last_mutation_at",
1030        ] {
1031            let f = fid();
1032            let mut core = minimal_flow_core(&f, "open");
1033            core.remove(want);
1034            let err = build_flow_snapshot(f, &core, Vec::new()).err().unwrap_or_else(|| {
1035                panic!("field {want} should fail but build_flow_snapshot returned Ok")
1036            });
1037            expect_corruption_field(err, |d| d.contains(want));
1038        }
1039    }
1040
1041    #[test]
1042    fn empty_required_strings_fail_loud() {
1043        for want in ["flow_id", "namespace", "flow_kind", "public_flow_state"] {
1044            let f = fid();
1045            let mut core = minimal_flow_core(&f, "open");
1046            core.insert(want.to_owned(), String::new());
1047            let err = build_flow_snapshot(f, &core, Vec::new()).err().unwrap_or_else(|| {
1048                panic!("empty {want} should fail but build_flow_snapshot returned Ok")
1049            });
1050            expect_corruption_field(err, |d| d.contains(want));
1051        }
1052    }
1053
1054    #[test]
1055    fn flow_snapshot_flow_id_mismatch_fails_loud() {
1056        let requested = fid();
1057        let other = fid();
1058        let core = minimal_flow_core(&other, "open");
1059        let err = build_flow_snapshot(requested, &core, Vec::new()).unwrap_err();
1060        expect_corruption_field(err, |d| d.contains("flow_id") && d.contains("does not match"));
1061    }
1062
1063    #[test]
1064    fn malformed_counter_fails_loud() {
1065        let f = fid();
1066        let mut core = minimal_flow_core(&f, "open");
1067        core.insert("graph_revision".to_owned(), "not-a-number".to_owned());
1068        let err = build_flow_snapshot(f, &core, Vec::new()).unwrap_err();
1069        expect_corruption_field(err, |d| d.contains("graph_revision"));
1070    }
1071
1072    #[test]
1073    fn namespaced_tag_matcher_boundaries() {
1074        assert!(is_namespaced_tag_key("cairn.task_id"));
1075        assert!(is_namespaced_tag_key("a.b"));
1076        assert!(is_namespaced_tag_key("ab_12.field"));
1077        assert!(!is_namespaced_tag_key("cairn_task_id"));
1078        assert!(!is_namespaced_tag_key("Cairn.task"));
1079        assert!(!is_namespaced_tag_key("1cairn.task"));
1080        assert!(!is_namespaced_tag_key(""));
1081        assert!(!is_namespaced_tag_key(".x"));
1082        assert!(!is_namespaced_tag_key("caIrn.task"));
1083    }
1084}