Skip to main content

mfm_machine/
lib.rs

1//! Public API contract for the MFM runtime.
2//!
3//! `mfm-machine` defines the stable identifiers, execution-plan types, context and IO
4//! abstractions, and storage interfaces used throughout the workspace. Runtime implementations
5//! live in unstable submodules such as [`runtime`] and [`live_io`], while the types in this file
6//! model the architectural contract described in `docs/redesign.md`.
7//!
8//! # Example
9//!
10//! ```rust
11//! use async_trait::async_trait;
12//! use mfm_machine::context::DynContext;
13//! use mfm_machine::errors::{ContextError, StateError};
14//! use mfm_machine::ids::{ContextKey, OpId, StateId};
15//! use mfm_machine::io::IoProvider;
16//! use mfm_machine::meta::{DependencyStrategy, Idempotency, SideEffectKind, StateMeta, Tag};
17//! use mfm_machine::plan::{ExecutionPlan, StateGraph, StateNode};
18//! use mfm_machine::recorder::EventRecorder;
19//! use mfm_machine::state::{SnapshotPolicy, State, StateOutcome};
20//! use serde_json::Value;
21//! use std::sync::Arc;
22//!
23//! struct ExampleState;
24//!
25//! #[async_trait]
26//! impl State for ExampleState {
27//!     fn meta(&self) -> StateMeta {
28//!         StateMeta {
29//!             tags: vec![Tag("report".to_string())],
30//!             depends_on: Vec::new(),
31//!             depends_on_strategy: DependencyStrategy::Latest,
32//!             side_effects: SideEffectKind::Pure,
33//!             idempotency: Idempotency::None,
34//!         }
35//!     }
36//!
37//!     async fn handle(
38//!         &self,
39//!         _ctx: &mut dyn DynContext,
40//!         _io: &mut dyn IoProvider,
41//!         _rec: &mut dyn EventRecorder,
42//!     ) -> Result<StateOutcome, StateError> {
43//!         Ok(StateOutcome {
44//!             snapshot: SnapshotPolicy::OnSuccess,
45//!         })
46//!     }
47//! }
48//!
49//! struct NullContext;
50//!
51//! impl DynContext for NullContext {
52//!     fn read(&self, _key: &ContextKey) -> Result<Option<Value>, ContextError> {
53//!         Ok(None)
54//!     }
55//!
56//!     fn write(&mut self, _key: ContextKey, _value: Value) -> Result<(), ContextError> {
57//!         Ok(())
58//!     }
59//!
60//!     fn delete(&mut self, _key: &ContextKey) -> Result<(), ContextError> {
61//!         Ok(())
62//!     }
63//!
64//!     fn dump(&self) -> Result<Value, ContextError> {
65//!         Ok(serde_json::json!({}))
66//!     }
67//! }
68//!
69//! let _context = NullContext;
70//! let plan = ExecutionPlan {
71//!     op_id: OpId::must_new("portfolio_snapshot"),
72//!     graph: StateGraph {
73//!         states: vec![StateNode {
74//!             id: StateId::must_new("machine.main.report"),
75//!             state: Arc::new(ExampleState),
76//!         }],
77//!         edges: Vec::new(),
78//!     },
79//! };
80//!
81//! assert_eq!(plan.op_id.as_str(), "portfolio_snapshot");
82//! ```
83//!
84//! Source of truth: `docs/redesign.md` Appendix C.1.
85#![warn(missing_docs)]
86
87use async_trait::async_trait;
88use serde::{Deserialize, Serialize};
89use std::sync::Arc;
90use std::time::Duration;
91
92/// Stable identifier types shared by manifests, plans, events, and persisted records.
93pub mod ids {
94    use super::*;
95    use std::fmt;
96
97    /// Error returned when an identifier fails the runtime naming contract.
98    ///
99    /// The stable identifier family deliberately keeps validation strict so identifiers remain
100    /// safe to embed in manifests, event streams, context keys, and artifact-derived metadata.
101    #[derive(Clone, Debug, PartialEq, Eq)]
102    pub struct IdValidationError {
103        kind: &'static str,
104        value: String,
105    }
106
107    impl IdValidationError {
108        fn new(kind: &'static str, value: impl Into<String>) -> Self {
109            Self {
110                kind,
111                value: value.into(),
112            }
113        }
114    }
115
116    impl fmt::Display for IdValidationError {
117        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
118            write!(f, "invalid {}: {}", self.kind, self.value)
119        }
120    }
121
122    impl std::error::Error for IdValidationError {}
123
124    /// Stable identifier for an operation (human meaningful).
125    /// Invariant: stable across environments; should not be random.
126    #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
127    #[serde(try_from = "String", into = "String")]
128    pub struct OpId(String);
129
130    /// Enforced: "<machine_id>.<step_id>"
131    #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
132    pub struct OpPath(pub String);
133
134    /// Enforced: "<machine_id>.<step_id>.<state_local_id>"
135    #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
136    #[serde(try_from = "String", into = "String")]
137    pub struct StateId(String);
138
139    /// Unique run identifier (can be random).
140    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
141    pub struct RunId(pub uuid::Uuid);
142
143    /// Content-addressed identifier (hash) for an artifact.
144    /// Invariant: lowercase hex digest string (algorithm defined by policy; default SHA-256).
145    #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
146    pub struct ArtifactId(pub String);
147
148    /// Namespaced key for recorded facts (external inputs).
149    #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
150    pub struct FactKey(pub String);
151
152    /// Namespaced key for context entries.
153    #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
154    pub struct ContextKey(pub String);
155
156    /// Stable machine-readable error code.
157    #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
158    pub struct ErrorCode(pub String);
159
160    #[allow(dead_code)]
161    pub(crate) fn is_valid_id_segment(segment: &str) -> bool {
162        let b = segment.as_bytes();
163        if b.is_empty() || b.len() > 63 {
164            return false;
165        }
166
167        // ^[a-z][a-z0-9_]{0,62}$
168        match b[0] {
169            b'a'..=b'z' => {}
170            _ => return false,
171        }
172
173        for &c in &b[1..] {
174            match c {
175                b'a'..=b'z' | b'0'..=b'9' | b'_' => {}
176                _ => return false,
177            }
178        }
179
180        true
181    }
182
183    impl OpId {
184        /// Creates an operation identifier after validating the naming contract.
185        ///
186        /// Accepted values match `^[a-z][a-z0-9_]{0,62}$`.
187        ///
188        /// # Examples
189        ///
190        /// ```rust
191        /// use mfm_machine::ids::OpId;
192        ///
193        /// let op_id = OpId::new("keystore_list")?;
194        /// assert_eq!(op_id.as_str(), "keystore_list");
195        /// # Ok::<(), mfm_machine::ids::IdValidationError>(())
196        /// ```
197        pub fn new(value: impl Into<String>) -> Result<Self, IdValidationError> {
198            let value = value.into();
199            if !is_valid_id_segment(&value) {
200                return Err(IdValidationError::new("op_id", value));
201            }
202            Ok(Self(value))
203        }
204
205        /// Creates an [`OpId`] and panics if the value is invalid.
206        pub fn must_new(value: impl Into<String>) -> Self {
207            Self::new(value).expect("op id must satisfy ^[a-z][a-z0-9_]{0,62}$")
208        }
209
210        /// Returns the validated identifier as a borrowed string slice.
211        pub fn as_str(&self) -> &str {
212            &self.0
213        }
214    }
215
216    impl TryFrom<String> for OpId {
217        type Error = IdValidationError;
218
219        fn try_from(value: String) -> Result<Self, Self::Error> {
220            Self::new(value)
221        }
222    }
223
224    impl TryFrom<&str> for OpId {
225        type Error = IdValidationError;
226
227        fn try_from(value: &str) -> Result<Self, Self::Error> {
228            Self::new(value)
229        }
230    }
231
232    impl From<OpId> for String {
233        fn from(value: OpId) -> Self {
234            value.0
235        }
236    }
237
238    impl fmt::Display for OpId {
239        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
240            write!(f, "{}", self.0)
241        }
242    }
243
244    #[allow(dead_code)]
245    pub(crate) fn validate_op_path(value: &str) -> bool {
246        let mut it = value.split('.');
247        let Some(machine_id) = it.next() else {
248            return false;
249        };
250        let Some(step_id) = it.next() else {
251            return false;
252        };
253        if it.next().is_some() {
254            return false;
255        }
256        is_valid_id_segment(machine_id) && is_valid_id_segment(step_id)
257    }
258
259    #[allow(dead_code)]
260    pub(crate) fn validate_state_id(value: &str) -> bool {
261        let mut it = value.split('.');
262        let Some(machine_id) = it.next() else {
263            return false;
264        };
265        let Some(step_id) = it.next() else {
266            return false;
267        };
268        let Some(state_local_id) = it.next() else {
269            return false;
270        };
271        if it.next().is_some() {
272            return false;
273        }
274        is_valid_id_segment(machine_id)
275            && is_valid_id_segment(step_id)
276            && is_valid_id_segment(state_local_id)
277    }
278
279    impl StateId {
280        /// Creates a state identifier after validating the `<machine>.<step>.<state>` shape.
281        ///
282        /// Each segment must satisfy the same naming contract as [`OpId`].
283        ///
284        /// # Examples
285        ///
286        /// ```rust
287        /// use mfm_machine::ids::StateId;
288        ///
289        /// let state_id = StateId::new("portfolio_snapshot.fetch_balances.read_eth")?;
290        /// assert_eq!(state_id.as_str(), "portfolio_snapshot.fetch_balances.read_eth");
291        /// # Ok::<(), mfm_machine::ids::IdValidationError>(())
292        /// ```
293        pub fn new(value: impl Into<String>) -> Result<Self, IdValidationError> {
294            let value = value.into();
295            if !validate_state_id(&value) {
296                return Err(IdValidationError::new("state_id", value));
297            }
298            Ok(Self(value))
299        }
300
301        /// Creates a [`StateId`] and panics if the value is invalid.
302        pub fn must_new(value: impl Into<String>) -> Self {
303            Self::new(value).expect("state id must satisfy <machine_id>.<step_id>.<state_local_id>")
304        }
305
306        /// Returns the validated identifier as a borrowed string slice.
307        pub fn as_str(&self) -> &str {
308            &self.0
309        }
310    }
311
312    impl TryFrom<String> for StateId {
313        type Error = IdValidationError;
314
315        fn try_from(value: String) -> Result<Self, Self::Error> {
316            Self::new(value)
317        }
318    }
319
320    impl TryFrom<&str> for StateId {
321        type Error = IdValidationError;
322
323        fn try_from(value: &str) -> Result<Self, Self::Error> {
324            Self::new(value)
325        }
326    }
327
328    impl From<StateId> for String {
329        fn from(value: StateId) -> Self {
330            value.0
331        }
332    }
333
334    impl fmt::Display for StateId {
335        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
336            write!(f, "{}", self.0)
337        }
338    }
339
340    #[cfg(test)]
341    mod ids_tests {
342        include!("tests/ids_tests.rs");
343    }
344}
345
346/// Marker traits for canonical JSON hashing policy.
347pub mod canonical {
348    /// Canonical JSON policy marker.
349    ///
350    /// Design contract:
351    /// - Structured data that participates in hashing MUST be serialized as canonical JSON.
352    /// - Target semantics: RFC 8785 (JCS).
353    ///
354    /// Implementations belong in `machine` internals; this module only reserves the concept.
355    pub trait CanonicalJsonPolicy: Send + Sync {}
356}
357
358/// Run configuration types that shape execution, replay, and provenance behavior.
359pub mod config {
360    use super::*;
361    use crate::ids::OpId;
362    use crate::meta::Tag;
363
364    /// Whether a run is allowed to perform live IO or must replay from recorded facts/artifacts.
365    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
366    pub enum IoMode {
367        /// Execute side effects through live transports and record replay inputs.
368        Live,
369        /// Disallow live side effects and resolve deterministic IO from recorded facts.
370        Replay,
371    }
372
373    /// Controls domain event verbosity. Kernel events are always emitted.
374    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
375    pub enum EventProfile {
376        /// Emit only the minimal domain-event surface alongside kernel events.
377        Minimal,
378        /// Emit the default domain-event surface for normal operation.
379        Normal,
380        /// Emit the richest built-in domain-event surface.
381        Verbose,
382        /// Use an operation-specific profile string.
383        Custom(String),
384    }
385
386    /// Backoff policy for retryable errors.
387    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
388    pub enum BackoffPolicy {
389        /// Retry after the same delay for every attempt.
390        Fixed {
391            /// Delay applied before each retry.
392            delay: Duration,
393        },
394        /// Increase the retry delay between attempts until reaching a cap.
395        Exponential {
396            /// Base delay used for the first exponential retry step.
397            base_delay: Duration,
398            /// Maximum retry delay once the exponential curve saturates.
399            max_delay: Duration,
400        },
401    }
402
403    /// Retry policy for retryable errors (including replay missing-fact errors if configured retryable).
404    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
405    pub struct RetryPolicy {
406        /// Maximum number of total attempts, including the first execution.
407        pub max_attempts: u32,
408        /// Backoff policy applied between retry attempts.
409        pub backoff: BackoffPolicy,
410    }
411
412    /// Execution mode.
413    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
414    pub enum ExecutionMode {
415        /// Run one ready state at a time.
416        Sequential,
417        /// Explicit fan-out/join model. Avoids concurrent writes to shared context.
418        FanOutJoin {
419            /// Maximum number of states the executor may run concurrently.
420            max_concurrency: u32,
421        },
422    }
423
424    /// Run-level context checkpointing policy.
425    ///
426    /// Default is `AfterEveryState` to keep resume semantics simple (no replay required on resume).
427    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
428    pub enum ContextCheckpointing {
429        /// Persist a fresh context snapshot after every state transition.
430        AfterEveryState,
431        /// Reserved for future: periodic/tag-based checkpointing policies.
432        Custom(String),
433    }
434
435    /// Run-level execution configuration (policy).
436    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
437    pub struct RunConfig {
438        /// Whether execution uses live IO or replay-only IO.
439        pub io_mode: IoMode,
440        /// Retry policy applied to retryable failures.
441        pub retry_policy: RetryPolicy,
442        /// Desired domain-event verbosity profile.
443        pub event_profile: EventProfile,
444        /// Scheduler policy used to execute ready states.
445        pub execution_mode: ExecutionMode,
446        /// Policy for persisting run-level context snapshots.
447        pub context_checkpointing: ContextCheckpointing,
448
449        /// If true, ReplayIo MissingFact errors are retryable (default false).
450        pub replay_missing_fact_retryable: bool,
451
452        /// States with any of these tags may be skipped by the executor.
453        /// Common use: skip APPLY_SIDE_EFFECT for dry runs.
454        pub skip_tags: Vec<Tag>,
455
456        /// Allowlisted flake prefixes for `nix.exec` preflight resolution.
457        ///
458        /// Example prefix: `github:willyrgf/mfm`.
459        #[serde(default = "default_nix_flake_allowlist")]
460        pub nix_flake_allowlist: Vec<String>,
461    }
462
463    /// Returns the default allowlist for `nix.exec` flake resolution.
464    pub fn default_nix_flake_allowlist() -> Vec<String> {
465        vec!["github:willyrgf/mfm".to_string()]
466    }
467
468    /// Minimal run manifest shape (stored as an artifact; hashed via canonical JSON).
469    /// Note: `input_params` MUST be canonical-JSON hashable and MUST NOT contain secrets.
470    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
471    pub struct RunManifest {
472        /// Stable identifier for the operation being executed.
473        pub op_id: OpId,
474        /// Operation version string included in the manifest hash.
475        pub op_version: String,
476        /// Canonical JSON input parameters for the operation.
477        pub input_params: serde_json::Value,
478        /// Run policy captured as part of the manifest.
479        pub run_config: RunConfig,
480        /// Build metadata used for reproducibility and provenance.
481        pub build: BuildProvenance,
482    }
483
484    /// Build provenance (reproducibility metadata).
485    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
486    pub struct BuildProvenance {
487        /// Git commit of the build, if available.
488        pub git_commit: Option<String>,
489        /// Hash of `Cargo.lock`, if captured by the caller.
490        pub cargo_lock_hash: Option<String>,
491        /// Hash of `flake.lock`, if captured by the caller.
492        pub flake_lock_hash: Option<String>,
493        /// Rust compiler version used to build the operation.
494        pub rustc_version: Option<String>,
495        /// Target triple of the built artifact.
496        pub target_triple: Option<String>,
497        /// Environment-variable names intentionally allowed into provenance.
498        pub env_allowlist: Vec<String>,
499    }
500}
501
502/// Metadata used to classify states and express recovery or idempotency intent.
503pub mod meta {
504    use super::*;
505
506    /// Tags are used for classification, filtering, and policy decisions.
507    /// Recommended format: lowercase; allow separators for namespacing if needed.
508    #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
509    pub struct Tag(pub String);
510
511    /// Standard tags (stable identifiers).
512    ///
513    /// These constants are the stable classification vocabulary shared across planners,
514    /// executors, and policy code. Prefer reusing them instead of inventing near-duplicate
515    /// spellings in downstream crates.
516    ///
517    /// A good default mapping is:
518    /// - `CONFIG` for validation and config-shaping nodes
519    /// - `FETCH_DATA` for read/fact acquisition nodes
520    /// - `COMPUTE` for pure transforms
521    /// - `EXECUTE` for imperative runtime work
522    /// - `REPORT` for user-facing output assembly
523    ///
524    /// `APPLY_SIDE_EFFECT` and `IMPURE` are stronger policy signals and should be reserved for
525    /// states whose replay or retry behavior genuinely depends on those classifications.
526    pub mod standard_tags {
527        /// Marks a configuration or validation state.
528        pub const CONFIG: &str = "config";
529        /// Marks a state whose primary job is to retrieve external data.
530        pub const FETCH_DATA: &str = "fetch_data";
531        /// Marks a pure computation or transformation state.
532        pub const COMPUTE: &str = "compute";
533        /// Marks a state that performs or prepares executable work.
534        pub const EXECUTE: &str = "execute";
535        /// Marks a reporting or output-producing state.
536        pub const REPORT: &str = "report";
537
538        /// Marks a state that applies an external side effect.
539        pub const APPLY_SIDE_EFFECT: &str = "apply_side_effect";
540        /// Marks a state whose behavior is not purely deterministic.
541        pub const IMPURE: &str = "impure";
542    }
543
544    /// Side-effect classification (affects replay and retry semantics).
545    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
546    pub enum SideEffectKind {
547        /// The state is pure and does not interact with external systems.
548        Pure,
549        /// The state performs read-only IO.
550        ReadOnlyIo,
551        /// The state applies a side effect to an external system.
552        ApplySideEffect,
553    }
554
555    /// Optional idempotency declaration for side-effecting states.
556    /// Key semantics: stable value used for dedupe (e.g., tx intent hash).
557    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
558    pub enum Idempotency {
559        /// The state does not declare an idempotency key.
560        None,
561        /// Stable key used to deduplicate externally visible effects.
562        Key(String),
563    }
564
565    /// Strategy for choosing a recovery point among dependency candidates.
566    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
567    pub enum DependencyStrategy {
568        /// Recover from the latest matching dependency candidate.
569        Latest,
570        /// Recover from the earliest matching dependency candidate.
571        Earliest,
572        /// Recover from the latest dependency candidate that completed successfully.
573        LatestSuccessful,
574    }
575
576    /// State metadata used for policy decisions and validation.
577    ///
578    /// Notes:
579    /// - `depends_on` is an authoring-time *hint* (often used by planners).
580    /// - Execution correctness is governed by explicit plan edges.
581    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
582    pub struct StateMeta {
583        /// Classification tags applied to the state.
584        pub tags: Vec<Tag>,
585
586        /// Optional authoring-time dependency hints expressed as tags.
587        pub depends_on: Vec<Tag>,
588        /// Strategy used when resolving tag-based dependency hints.
589        pub depends_on_strategy: DependencyStrategy,
590
591        /// Side-effect classification used by replay and retry policy.
592        pub side_effects: SideEffectKind,
593        /// Declared idempotency behavior for the state.
594        pub idempotency: Idempotency,
595    }
596}
597
598/// Structured error types used by state handlers, storage layers, and the engine.
599pub mod errors {
600    use super::*;
601    use crate::ids::{ErrorCode, StateId};
602
603    #[allow(dead_code)]
604    pub(crate) const CODE_MISSING_FACT_KEY: &str = "missing_fact_key";
605
606    /// Error category used for stable handling and policies.
607    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
608    pub enum ErrorCategory {
609        /// Failed while parsing user input or structured payloads.
610        ParsingInput,
611        /// Failed while interacting with on-chain state or transactions.
612        OnChain,
613        /// Failed while interacting with off-chain systems or services.
614        OffChain,
615        /// Failed while talking to RPC-like endpoints.
616        Rpc,
617        /// Failed while reading or writing persisted state.
618        Storage,
619        /// Failed while reading or mutating execution context.
620        Context,
621        /// Failed for an uncategorized reason.
622        Unknown,
623    }
624
625    /// Structured error info (canonical-JSON compatible; MUST NOT contain secrets).
626    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
627    pub struct ErrorInfo {
628        /// Stable machine-readable error code.
629        pub code: ErrorCode,
630        /// High-level category for policy decisions and display.
631        pub category: ErrorCategory,
632        /// Whether the operation may be retried safely.
633        pub retryable: bool,
634        /// Human-readable message safe to persist and display.
635        pub message: String,
636        /// Optional structured details that must remain secret-free.
637        pub details: Option<serde_json::Value>,
638    }
639
640    /// Errors returned by state handlers (no secrets).
641    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
642    pub struct StateError {
643        /// State identifier, if the failure can be attributed to a specific planned state.
644        pub state_id: Option<StateId>,
645        /// Structured error payload safe for persistence.
646        pub info: ErrorInfo,
647    }
648
649    /// IO errors (live or replay).
650    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
651    pub enum IoError {
652        /// Replay was asked to perform deterministic IO without a fact key.
653        MissingFactKey(ErrorInfo),
654
655        /// A requested recorded fact was not found.
656        MissingFact {
657            /// Missing fact key.
658            key: crate::ids::FactKey,
659            /// Structured details about the lookup failure.
660            info: ErrorInfo,
661        },
662        /// The underlying transport returned an error.
663        Transport(ErrorInfo),
664        /// The underlying transport reported a rate limit.
665        RateLimited(ErrorInfo),
666        /// Any other IO failure that does not fit the more specific variants.
667        Other(ErrorInfo),
668    }
669
670    /// Context errors.
671    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
672    pub enum ContextError {
673        /// A requested context key was not present.
674        MissingKey {
675            /// Missing context key.
676            key: crate::ids::ContextKey,
677            /// Structured details about the lookup failure.
678            info: ErrorInfo,
679        },
680        /// JSON serialization or deserialization failed.
681        Serialization(ErrorInfo),
682        /// Any other context failure.
683        Other(ErrorInfo),
684    }
685
686    /// Storage errors (event store / artifact store).
687    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
688    pub enum StorageError {
689        /// Append failed because the optimistic concurrency expectation was stale.
690        Concurrency(ErrorInfo),
691        /// A requested record or artifact was not found.
692        NotFound(ErrorInfo),
693        /// Persisted data existed but failed integrity or shape checks.
694        Corruption(ErrorInfo),
695        /// Any other storage failure.
696        Other(ErrorInfo),
697    }
698
699    /// Run-level errors from the engine.
700    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
701    pub enum RunError {
702        /// The supplied execution plan violated structural invariants.
703        InvalidPlan(ErrorInfo),
704        /// The run failed because a storage backend returned an error.
705        Storage(StorageError),
706        /// The run failed because the execution context returned an error.
707        Context(ContextError),
708        /// The run failed because the IO provider returned an error.
709        Io(IoError),
710        /// The run failed because a state handler returned an error.
711        State(StateError),
712        /// Any other run-level failure.
713        Other(ErrorInfo),
714    }
715
716    #[cfg(test)]
717    mod tests {
718        use super::*;
719
720        #[test]
721        fn missing_fact_key_code_is_stable() {
722            assert_eq!(CODE_MISSING_FACT_KEY, "missing_fact_key");
723
724            let info = ErrorInfo {
725                code: ErrorCode(CODE_MISSING_FACT_KEY.to_string()),
726                category: ErrorCategory::Rpc,
727                retryable: false,
728                message: "missing fact key".to_string(),
729                details: None,
730            };
731
732            let err = IoError::MissingFactKey(info);
733            match err {
734                IoError::MissingFactKey(info) => assert_eq!(info.code.0, "missing_fact_key"),
735                _ => unreachable!("wrong error variant"),
736            }
737        }
738    }
739}
740
741/// Context traits for reading, mutating, and snapshotting state-machine data.
742pub mod context {
743    use super::*;
744    use crate::errors::ContextError;
745    use crate::ids::ContextKey;
746
747    /// Dynamic context interface.
748    ///
749    /// Contract:
750    /// - `dump()` returns a **full snapshot** of current state (canonical JSON object recommended).
751    /// - Implementations MUST ensure deterministic serialization of snapshot artifacts.
752    pub trait DynContext: Send {
753        /// Reads a raw JSON value from context.
754        fn read(&self, key: &ContextKey) -> Result<Option<serde_json::Value>, ContextError>;
755        /// Writes a raw JSON value into context.
756        fn write(&mut self, key: ContextKey, value: serde_json::Value) -> Result<(), ContextError>;
757        /// Deletes a context entry if it exists.
758        fn delete(&mut self, key: &ContextKey) -> Result<(), ContextError>;
759
760        /// Full snapshot of current context state.
761        fn dump(&self) -> Result<serde_json::Value, ContextError>;
762    }
763
764    /// Typed convenience extension (no default bodies; implementations may blanket-impl internally).
765    pub trait TypedContextExt {
766        /// Reads a context value and deserializes it into `T`.
767        fn read_typed<T: serde::de::DeserializeOwned>(
768            &self,
769            key: &ContextKey,
770        ) -> Result<Option<T>, ContextError>;
771
772        /// Serializes `value` and writes it into context.
773        fn write_typed<T: Serialize>(
774            &mut self,
775            key: ContextKey,
776            value: &T,
777        ) -> Result<(), ContextError>;
778    }
779
780    impl<C: DynContext + ?Sized> TypedContextExt for C {
781        fn read_typed<T: serde::de::DeserializeOwned>(
782            &self,
783            key: &ContextKey,
784        ) -> Result<Option<T>, ContextError> {
785            let Some(value) = self.read(key)? else {
786                return Ok(None);
787            };
788
789            serde_json::from_value(value).map(Some).map_err(|_| {
790                ContextError::Serialization(crate::errors::ErrorInfo {
791                    code: crate::ids::ErrorCode("context_deserialize_failed".to_string()),
792                    category: crate::errors::ErrorCategory::Context,
793                    retryable: false,
794                    message: "context value deserialization failed".to_string(),
795                    details: None,
796                })
797            })
798        }
799
800        fn write_typed<T: Serialize>(
801            &mut self,
802            key: ContextKey,
803            value: &T,
804        ) -> Result<(), ContextError> {
805            let v = serde_json::to_value(value).map_err(|_| {
806                ContextError::Serialization(crate::errors::ErrorInfo {
807                    code: crate::ids::ErrorCode("context_serialize_failed".to_string()),
808                    category: crate::errors::ErrorCategory::Context,
809                    retryable: false,
810                    message: "context value serialization failed".to_string(),
811                    details: None,
812                })
813            })?;
814
815            self.write(key, v)
816        }
817    }
818}
819
820/// Event types emitted by the engine and by state handlers during a run.
821pub mod events {
822    use super::*;
823    use crate::errors::StateError;
824    use crate::ids::{ArtifactId, OpId, OpPath, RunId, StateId};
825
826    /// Recommended stable `DomainEvent.name` values.
827    pub const DOMAIN_EVENT_FACT_RECORDED: &str = "fact_recorded";
828    /// Recommended `DomainEvent.name` for artifact-write notifications.
829    pub const DOMAIN_EVENT_ARTIFACT_WRITTEN: &str = "artifact_written";
830    /// Recommended `DomainEvent.name` for operation boundary markers.
831    pub const DOMAIN_EVENT_OP_BOUNDARY: &str = "op_boundary";
832    /// Recommended `DomainEvent.name` for child-run spawn notifications.
833    pub const DOMAIN_EVENT_CHILD_RUN_SPAWNED: &str = "child_run_spawned";
834    /// Recommended `DomainEvent.name` for child-run completion notifications.
835    pub const DOMAIN_EVENT_CHILD_RUN_COMPLETED: &str = "child_run_completed";
836
837    /// Run completion status.
838    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
839    pub enum RunStatus {
840        /// The run completed successfully.
841        Completed,
842        /// The run terminated because a state failed.
843        Failed,
844        /// The run was cancelled before completion.
845        Cancelled,
846    }
847
848    /// Kernel event variants required for recovery/resume correctness.
849    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
850    pub enum KernelEvent {
851        /// Marks the start of a run and records its manifest and initial snapshot.
852        RunStarted {
853            /// Operation identifier for the run.
854            op_id: OpId,
855            /// Manifest artifact identifier.
856            manifest_id: ArtifactId,
857            /// Snapshot of initial context at run start.
858            initial_snapshot_id: ArtifactId,
859        },
860        /// Marks entry into a state attempt.
861        StateEntered {
862            /// Planned state being entered.
863            state_id: StateId,
864            /// Zero-based attempt number for this state.
865            attempt: u32,
866            /// Snapshot the attempt starts from (resume/retry boundary).
867            base_snapshot_id: ArtifactId,
868        },
869        /// Marks successful completion of a state.
870        StateCompleted {
871            /// State that completed successfully.
872            state_id: StateId,
873            /// Context snapshot captured after completion.
874            context_snapshot_id: ArtifactId,
875        },
876        /// Marks failure of a state attempt.
877        StateFailed {
878            /// State that failed.
879            state_id: StateId,
880            /// Structured failure payload.
881            error: StateError,
882            /// Diagnostic-only snapshot (must not be used as a resume boundary).
883            failure_snapshot_id: Option<ArtifactId>,
884        },
885        /// Marks the terminal status of the run.
886        RunCompleted {
887            /// Final run status.
888            status: RunStatus,
889            /// Final context snapshot, if one was produced.
890            final_snapshot_id: Option<ArtifactId>,
891        },
892    }
893
894    /// Optional domain event (operation-defined; verbosity is controlled by event profile).
895    ///
896    /// Rules:
897    /// - payload MUST be canonical-JSON compatible
898    /// - payload MUST NOT contain secrets
899    /// - large payloads SHOULD be stored as artifacts and referenced via `payload_ref`
900    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
901    pub struct DomainEvent {
902        /// Stable event name defined by the operation or a shared convention.
903        pub name: String,
904        /// Canonical JSON payload safe to persist.
905        pub payload: serde_json::Value,
906        /// Optional artifact reference for large payloads.
907        pub payload_ref: Option<ArtifactId>,
908    }
909
910    /// Envelope type used by event stores.
911    ///
912    /// The event store contract expects values of this enum to be wrapped in [`EventEnvelope`]
913    /// with strictly increasing per-run sequence numbers.
914    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
915    pub enum Event {
916        /// Engine-level event required for resume and recovery.
917        Kernel(KernelEvent),
918        /// Operation-defined event emitted during state handling.
919        Domain(DomainEvent),
920    }
921
922    /// Envelope stored in the event store.
923    ///
924    /// `ts_millis` is informational only; replay semantics come from event order and payload
925    /// content, not wall-clock time.
926    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
927    pub struct EventEnvelope {
928        /// Run to which this event belongs.
929        pub run_id: RunId,
930        /// Monotonic per-run sequence number.
931        pub seq: u64,
932
933        /// Informational timestamp; must not be required for deterministic replay semantics.
934        pub ts_millis: Option<u64>,
935
936        /// Event payload stored at this sequence number.
937        pub event: Event,
938    }
939
940    /// Recommended standard domain event payloads (not required by engine).
941    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
942    pub struct FactRecorded {
943        /// Fact key that was recorded.
944        pub key: crate::ids::FactKey,
945        /// Artifact containing the recorded payload.
946        pub payload_id: ArtifactId,
947        /// Operation-defined metadata about the fact.
948        pub meta: serde_json::Value,
949    }
950
951    /// Recommended payload for domain events that announce artifact writes.
952    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
953    pub struct ArtifactWritten {
954        /// Artifact identifier that was written.
955        pub artifact_id: ArtifactId,
956        /// Kind assigned to the written artifact.
957        pub kind: crate::stores::ArtifactKind,
958        /// Operation-defined metadata about the artifact.
959        pub meta: serde_json::Value,
960    }
961
962    /// Recommended payload for domain events that mark op boundaries.
963    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
964    pub struct OpBoundary {
965        /// Nested operation path being reported.
966        pub op_path: OpPath,
967        /// Boundary phase, typically values such as `started` or `completed`.
968        pub phase: String, // e.g. "started" | "completed"
969    }
970
971    /// Reserved for later expansion (nested machines).
972    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
973    pub struct ChildRunSpawned {
974        /// Parent run that spawned the child.
975        pub parent_run_id: RunId,
976        /// Newly created child run identifier.
977        pub child_run_id: RunId,
978        /// Manifest artifact for the child run.
979        pub child_manifest_id: ArtifactId,
980    }
981
982    /// Recommended payload for domain events that announce child-run completion.
983    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
984    pub struct ChildRunCompleted {
985        /// Child run that finished.
986        pub child_run_id: RunId,
987        /// Final status of the child run.
988        pub status: RunStatus,
989        /// Final snapshot produced by the child run, if any.
990        pub final_snapshot_id: Option<ArtifactId>,
991    }
992
993    #[cfg(test)]
994    mod tests {
995        use super::*;
996
997        use crate::hashing::artifact_id_for_json;
998
999        #[test]
1000        fn child_run_payloads_are_canonical_and_non_secret() {
1001            let v = serde_json::to_value(ChildRunSpawned {
1002                parent_run_id: RunId(uuid::Uuid::new_v4()),
1003                child_run_id: RunId(uuid::Uuid::new_v4()),
1004                child_manifest_id: ArtifactId("0".repeat(64)),
1005            })
1006            .expect("serialize");
1007            artifact_id_for_json(&v).expect("canonical-json-hashable");
1008            assert!(!crate::secrets::json_contains_secrets(&v));
1009
1010            let v = serde_json::to_value(ChildRunCompleted {
1011                child_run_id: RunId(uuid::Uuid::new_v4()),
1012                status: RunStatus::Completed,
1013                final_snapshot_id: Some(ArtifactId("1".repeat(64))),
1014            })
1015            .expect("serialize");
1016            artifact_id_for_json(&v).expect("canonical-json-hashable");
1017            assert!(!crate::secrets::json_contains_secrets(&v));
1018        }
1019    }
1020}
1021
1022/// IO request and response abstractions used by live and replay providers.
1023pub mod io {
1024    use super::*;
1025    use crate::errors::IoError;
1026    use crate::ids::{ArtifactId, FactKey};
1027
1028    /// Opaque IO call surface; collectors define typed adapters on top.
1029    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1030    pub struct IoCall {
1031        /// Namespace like "http", "jsonrpc", "coingecko", etc.
1032        pub namespace: String,
1033        /// Canonical JSON request payload (typed by the caller/collector).
1034        pub request: serde_json::Value,
1035        /// Fact key for recording/replay.
1036        ///
1037        /// Contract:
1038        /// - In Live mode, callers SHOULD provide this for replayable IO.
1039        /// - In Replay mode, deterministic IO MUST provide this (otherwise `IoError::MissingFactKey`).
1040        pub fact_key: Option<FactKey>,
1041    }
1042
1043    /// Opaque IO result surface; collectors define typed adapters on top.
1044    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1045    pub struct IoResult {
1046        /// Canonical JSON response payload.
1047        pub response: serde_json::Value,
1048        /// If recorded, points to the stored payload artifact.
1049        pub recorded_payload_id: Option<ArtifactId>,
1050    }
1051
1052    /// IO provider (LiveIo/ReplayIo are implementations).
1053    #[async_trait]
1054    pub trait IoProvider: Send {
1055        /// Executes an IO request and returns the structured response.
1056        async fn call(&mut self, call: IoCall) -> Result<IoResult, IoError>;
1057
1058        /// Persist a deterministic structured value directly as a fact payload.
1059        async fn record_value(
1060            &mut self,
1061            key: FactKey,
1062            value: serde_json::Value,
1063        ) -> Result<ArtifactId, IoError>;
1064
1065        /// Lookup recorded fact payload by key.
1066        async fn get_recorded_fact(&mut self, key: &FactKey)
1067            -> Result<Option<ArtifactId>, IoError>;
1068
1069        /// Current time. If used in deterministic logic, implementations MUST record as facts.
1070        async fn now_millis(&mut self) -> Result<u64, IoError>;
1071
1072        /// Random bytes. If used in reproducible paths, implementations MUST record as facts.
1073        async fn random_bytes(&mut self, n: usize) -> Result<Vec<u8>, IoError>;
1074    }
1075}
1076
1077/// Traits for recording operation-defined domain events during state execution.
1078pub mod recorder {
1079    use super::*;
1080    use crate::errors::RunError;
1081    use crate::events::DomainEvent;
1082
1083    /// Domain event recorder used by state handlers.
1084    ///
1085    /// Engine contract:
1086    /// - Domain events are associated with the current state attempt (bounded by `StateEntered` and a terminal
1087    ///   event).
1088    /// - A state attempt MAY span multiple transactional appends; each append is atomic.
1089    #[async_trait]
1090    pub trait EventRecorder: Send {
1091        /// Emits a single domain event.
1092        async fn emit(&mut self, event: DomainEvent) -> Result<(), RunError>;
1093        /// Emits multiple domain events in order.
1094        async fn emit_many(&mut self, events: Vec<DomainEvent>) -> Result<(), RunError>;
1095    }
1096}
1097
1098/// State trait and outcome types used by planned execution nodes.
1099pub mod state {
1100    use super::*;
1101    use crate::context::DynContext;
1102    use crate::errors::StateError;
1103    use crate::io::IoProvider;
1104    use crate::meta::StateMeta;
1105    use crate::recorder::EventRecorder;
1106
1107    /// Indicates whether the engine should snapshot context after the state.
1108    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1109    pub enum SnapshotPolicy {
1110        /// Do not request an additional snapshot for this state outcome.
1111        Never,
1112        /// Request a snapshot only when the state succeeds.
1113        OnSuccess,
1114        /// Request a snapshot even if the state fails.
1115        Always,
1116    }
1117
1118    /// Result returned by a successful state handler.
1119    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1120    pub struct StateOutcome {
1121        /// Snapshot hint returned by the state handler.
1122        pub snapshot: SnapshotPolicy,
1123    }
1124
1125    /// Note:
1126    /// - The engine MAY still checkpoint context according to `RunConfig.context_checkpointing`
1127    ///   regardless of `StateOutcome.snapshot`. This hint controls additional snapshot behavior
1128    ///   and/or diagnostic snapshots, not permission to bypass required checkpoints.
1129    /// State behavior. States do NOT own their `StateId` — IDs are assigned by the plan.
1130    #[async_trait]
1131    pub trait State: Send + Sync {
1132        /// Returns metadata used for planning, replay, and policy decisions.
1133        fn meta(&self) -> StateMeta;
1134
1135        /// Executes the state against the provided context, IO provider, and event recorder.
1136        async fn handle(
1137            &self,
1138            ctx: &mut dyn DynContext,
1139            io: &mut dyn IoProvider,
1140            rec: &mut dyn EventRecorder,
1141        ) -> Result<StateOutcome, StateError>;
1142    }
1143
1144    /// Shared trait-object form used by planners and executors.
1145    pub type DynState = Arc<dyn State>;
1146}
1147
1148/// Types that represent the executable state graph for a run.
1149pub mod plan {
1150    use super::*;
1151    use crate::ids::{OpId, StateId};
1152    use crate::state::DynState;
1153
1154    /// An edge `from -> to` means `from` must complete before `to` can run.
1155    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1156    pub struct DependencyEdge {
1157        /// Upstream state that must complete first.
1158        pub from: StateId,
1159        /// Downstream state that depends on `from`.
1160        pub to: StateId,
1161    }
1162
1163    /// Planned state plus the runtime implementation that will execute it.
1164    #[derive(Clone)]
1165    pub struct StateNode {
1166        /// Stable identifier assigned by the planner.
1167        pub id: StateId,
1168        /// Runtime implementation invoked for this node.
1169        pub state: DynState,
1170    }
1171
1172    /// A state graph is the executable structure derived from ops/pipelines.
1173    #[derive(Clone)]
1174    pub struct StateGraph {
1175        /// Planned states included in the graph.
1176        pub states: Vec<StateNode>,
1177        /// Dependency edges connecting those states.
1178        pub edges: Vec<DependencyEdge>,
1179    }
1180
1181    /// Operation-specific executable plan passed to the engine.
1182    ///
1183    /// The engine treats this as immutable input: planners finish all graph shaping first, then
1184    /// the engine executes the resulting nodes and dependency edges exactly as supplied.
1185    #[derive(Clone)]
1186    pub struct ExecutionPlan {
1187        /// Operation identifier associated with this plan.
1188        pub op_id: OpId,
1189        /// Executable state graph for the run.
1190        pub graph: StateGraph,
1191    }
1192
1193    /// Plan validation errors (fail-fast).
1194    ///
1195    /// These errors are intended for plan construction time, before a run is started. They help
1196    /// callers distinguish malformed planner output from runtime failures that occur later inside
1197    /// the execution engine.
1198    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1199    pub enum PlanValidationError {
1200        /// The plan contained no states.
1201        EmptyPlan,
1202        /// A state identifier appeared more than once.
1203        DuplicateStateId {
1204            /// Duplicated state identifier.
1205            state_id: StateId,
1206        },
1207        /// A dependency edge referenced a state that does not exist in the graph.
1208        MissingStateForEdge {
1209            /// Referenced state that could not be found.
1210            missing: StateId,
1211        },
1212        /// The graph contained a cycle.
1213        CircularDependency {
1214            /// State identifiers participating in the detected cycle.
1215            cycle: Vec<StateId>,
1216        },
1217
1218        /// Optional: if planners derive edges from tag dependencies, they may validate those too.
1219        DanglingDependencyTag {
1220            /// State whose dependency hint could not be resolved.
1221            state_id: StateId,
1222            /// Missing tag referenced by that dependency hint.
1223            missing_tag: crate::meta::Tag,
1224        },
1225    }
1226
1227    /// Validator interface for rejecting malformed execution plans before a run starts.
1228    ///
1229    /// Typical validators enforce graph-shape rules such as:
1230    /// - all state identifiers are unique
1231    /// - every dependency edge points to an existing state
1232    /// - the plan is acyclic
1233    pub trait PlanValidator: Send + Sync {
1234        /// Validates the supplied execution plan.
1235        fn validate(&self, plan: &ExecutionPlan) -> Result<(), PlanValidationError>;
1236    }
1237}
1238
1239/// Storage traits for append-only events and immutable artifacts.
1240pub mod stores {
1241    use super::*;
1242    use crate::errors::StorageError;
1243    use crate::events::EventEnvelope;
1244    use crate::ids::{ArtifactId, RunId};
1245
1246    /// Artifact classification used for retention, validation, and output handling.
1247    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1248    pub enum ArtifactKind {
1249        /// Run manifest artifact.
1250        Manifest,
1251        /// Serialized context snapshot artifact.
1252        ContextSnapshot,
1253        /// Recorded fact payload artifact.
1254        FactPayload,
1255        /// Encrypted secret payload (ciphertext bytes only).
1256        ///
1257        /// Secret plaintext MUST NOT be stored directly in the artifact store.
1258        SecretPayload,
1259        /// User-facing output artifact.
1260        Output,
1261        /// Operation- or backend-specific artifact kind.
1262        Other(String),
1263    }
1264
1265    /// Append-only event store with optimistic concurrency.
1266    #[async_trait]
1267    pub trait EventStore: Send + Sync {
1268        /// Returns the current head sequence for `run_id`.
1269        async fn head_seq(&self, run_id: RunId) -> Result<u64, StorageError>;
1270
1271        /// Appends events atomically if `expected_seq` matches the current head.
1272        async fn append(
1273            &self,
1274            run_id: RunId,
1275            expected_seq: u64,
1276            events: Vec<EventEnvelope>,
1277        ) -> Result<u64, StorageError>;
1278
1279        /// Reads a contiguous event range starting at `from_seq`.
1280        async fn read_range(
1281            &self,
1282            run_id: RunId,
1283            from_seq: u64,
1284            to_seq: Option<u64>,
1285        ) -> Result<Vec<EventEnvelope>, StorageError>;
1286    }
1287
1288    /// Immutable, content-addressed artifact store.
1289    #[async_trait]
1290    pub trait ArtifactStore: Send + Sync {
1291        /// Stores bytes under a content-derived identifier and returns that identifier.
1292        async fn put(&self, kind: ArtifactKind, bytes: Vec<u8>)
1293            -> Result<ArtifactId, StorageError>;
1294        /// Loads the bytes for an existing artifact identifier.
1295        async fn get(&self, id: &ArtifactId) -> Result<Vec<u8>, StorageError>;
1296        /// Returns whether an artifact exists without loading its bytes.
1297        async fn exists(&self, id: &ArtifactId) -> Result<bool, StorageError>;
1298    }
1299}
1300
1301/// Engine traits and input/output types for starting or resuming runs.
1302pub mod engine {
1303    use super::*;
1304    use crate::config::{RunConfig, RunManifest};
1305    use crate::context::DynContext;
1306    use crate::errors::RunError;
1307    use crate::ids::{ArtifactId, RunId};
1308    use crate::plan::ExecutionPlan;
1309    use crate::stores::{ArtifactStore, EventStore};
1310
1311    /// Current run phase (observability).
1312    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1313    pub enum RunPhase {
1314        /// The run is still executing.
1315        Running,
1316        /// The run completed successfully.
1317        Completed,
1318        /// The run ended because of a failure.
1319        Failed,
1320        /// The run was cancelled.
1321        Cancelled,
1322    }
1323
1324    /// Summary returned by the execution engine after start or resume.
1325    ///
1326    /// `phase` reports the final or current lifecycle state, while `final_snapshot_id` is populated
1327    /// only when a snapshot exists at the point the engine returns.
1328    ///
1329    /// This is intentionally compact so transport layers can expose a stable run result without
1330    /// forcing callers to inspect raw event streams for the common success/failure path.
1331    #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1332    pub struct RunResult {
1333        /// Run identifier that was started or resumed.
1334        pub run_id: RunId,
1335        /// Final or current phase of the run.
1336        pub phase: RunPhase,
1337        /// Final context snapshot, if one exists.
1338        pub final_snapshot_id: Option<ArtifactId>,
1339    }
1340
1341    /// Inputs required to start a run.
1342    ///
1343    /// Callers usually construct this once planning is complete and the manifest has already been
1344    /// content-addressed and persisted or prepared for persistence.
1345    ///
1346    /// Keeping the manifest, derived plan, run config, and initial context together makes the
1347    /// engine boundary explicit: everything above this type is planning/orchestration, everything
1348    /// below it is execution and persistence.
1349    pub struct StartRun {
1350        /// Manifest content for the run.
1351        pub manifest: RunManifest,
1352        /// Content-addressed manifest identifier.
1353        pub manifest_id: ArtifactId,
1354        /// Execution plan derived for the manifest.
1355        pub plan: ExecutionPlan,
1356        /// Effective run configuration used by the engine.
1357        pub run_config: RunConfig,
1358        /// Initial context snapshot used as the run's starting point.
1359        pub initial_context: Box<dyn DynContext>,
1360    }
1361
1362    /// Store bundle passed to the engine.
1363    ///
1364    /// Keeping the stores grouped makes it easier for higher layers to swap persistence backends
1365    /// without threading each store separately through every engine constructor.
1366    #[derive(Clone)]
1367    pub struct Stores {
1368        /// Event store used for append-only kernel and domain events.
1369        pub events: Arc<dyn EventStore>,
1370        /// Artifact store used for manifests, snapshots, facts, and outputs.
1371        pub artifacts: Arc<dyn ArtifactStore>,
1372    }
1373
1374    /// Execution engine interface.
1375    #[async_trait]
1376    pub trait ExecutionEngine: Send + Sync {
1377        /// Starts a new run from the supplied manifest, plan, and initial context.
1378        async fn start(&self, stores: Stores, run: StartRun) -> Result<RunResult, RunError>;
1379        /// Resumes a previously started run from persisted state.
1380        async fn resume(&self, stores: Stores, run_id: RunId) -> Result<RunResult, RunError>;
1381    }
1382}
1383
1384/// Internal helpers for canonical JSON hashing and `ArtifactId` computation.
1385///
1386/// This module is not part of the stable API contract (Appendix C.1) and may change.
1387pub mod hashing;
1388
1389/// Unstable v4 runtime implementation (executor + resume logic).
1390///
1391/// Not part of the stable API contract (Appendix C.1).
1392pub mod runtime;
1393
1394/// Unstable Live IO implementation (facts recording).
1395///
1396/// Not part of the stable API contract (Appendix C.1).
1397pub mod live_io;
1398
1399/// Unstable live IO transport for external program execution (`exec` namespace).
1400///
1401/// Not part of the stable API contract (Appendix C.1).
1402pub mod exec_transport;
1403
1404/// Unstable shared process execution helpers for live IO transports.
1405///
1406/// Not part of the stable API contract (Appendix C.1).
1407pub mod process_exec;
1408
1409/// Unstable Live IO transport router (namespace dispatch).
1410///
1411/// Not part of the stable API contract (Appendix C.1).
1412pub mod live_io_router;
1413
1414/// Unstable Live IO transport registry (runtime wiring).
1415///
1416/// Not part of the stable API contract (Appendix C.1).
1417pub mod live_io_registry;
1418
1419/// Unstable Replay IO implementation (facts replay).
1420///
1421/// Not part of the stable API contract (Appendix C.1).
1422pub mod replay_io;
1423
1424pub(crate) mod attempt_envelope;
1425pub(crate) mod context_runtime;
1426pub(crate) mod event_profile;
1427pub(crate) mod secrets;