mfm_machine/lib.rs
1//! Public API contract for the MFM runtime.
2//!
3//! `mfm-machine` defines the stable identifiers, execution-plan types, context and IO
4//! abstractions, and storage interfaces used throughout the workspace. Runtime implementations
5//! live in unstable submodules such as [`runtime`] and [`live_io`], while the types in this file
6//! model the architectural contract described in `docs/redesign.md`.
7//!
8//! # Example
9//!
10//! ```rust
11//! use async_trait::async_trait;
12//! use mfm_machine::context::DynContext;
13//! use mfm_machine::errors::{ContextError, StateError};
14//! use mfm_machine::ids::{ContextKey, OpId, StateId};
15//! use mfm_machine::io::IoProvider;
16//! use mfm_machine::meta::{DependencyStrategy, Idempotency, SideEffectKind, StateMeta, Tag};
17//! use mfm_machine::plan::{ExecutionPlan, StateGraph, StateNode};
18//! use mfm_machine::recorder::EventRecorder;
19//! use mfm_machine::state::{SnapshotPolicy, State, StateOutcome};
20//! use serde_json::Value;
21//! use std::sync::Arc;
22//!
23//! struct ExampleState;
24//!
25//! #[async_trait]
26//! impl State for ExampleState {
27//! fn meta(&self) -> StateMeta {
28//! StateMeta {
29//! tags: vec![Tag("report".to_string())],
30//! depends_on: Vec::new(),
31//! depends_on_strategy: DependencyStrategy::Latest,
32//! side_effects: SideEffectKind::Pure,
33//! idempotency: Idempotency::None,
34//! }
35//! }
36//!
37//! async fn handle(
38//! &self,
39//! _ctx: &mut dyn DynContext,
40//! _io: &mut dyn IoProvider,
41//! _rec: &mut dyn EventRecorder,
42//! ) -> Result<StateOutcome, StateError> {
43//! Ok(StateOutcome {
44//! snapshot: SnapshotPolicy::OnSuccess,
45//! })
46//! }
47//! }
48//!
49//! struct NullContext;
50//!
51//! impl DynContext for NullContext {
52//! fn read(&self, _key: &ContextKey) -> Result<Option<Value>, ContextError> {
53//! Ok(None)
54//! }
55//!
56//! fn write(&mut self, _key: ContextKey, _value: Value) -> Result<(), ContextError> {
57//! Ok(())
58//! }
59//!
60//! fn delete(&mut self, _key: &ContextKey) -> Result<(), ContextError> {
61//! Ok(())
62//! }
63//!
64//! fn dump(&self) -> Result<Value, ContextError> {
65//! Ok(serde_json::json!({}))
66//! }
67//! }
68//!
69//! let _context = NullContext;
70//! let plan = ExecutionPlan {
71//! op_id: OpId::must_new("portfolio_snapshot"),
72//! graph: StateGraph {
73//! states: vec![StateNode {
74//! id: StateId::must_new("machine.main.report"),
75//! state: Arc::new(ExampleState),
76//! }],
77//! edges: Vec::new(),
78//! },
79//! };
80//!
81//! assert_eq!(plan.op_id.as_str(), "portfolio_snapshot");
82//! ```
83//!
84//! Source of truth: `docs/redesign.md` Appendix C.1.
85#![warn(missing_docs)]
86
87use async_trait::async_trait;
88use serde::{Deserialize, Serialize};
89use std::sync::Arc;
90use std::time::Duration;
91
92/// Stable identifier types shared by manifests, plans, events, and persisted records.
93pub mod ids {
94 use super::*;
95 use std::fmt;
96
97 /// Error returned when an identifier fails the runtime naming contract.
98 ///
99 /// The stable identifier family deliberately keeps validation strict so identifiers remain
100 /// safe to embed in manifests, event streams, context keys, and artifact-derived metadata.
101 #[derive(Clone, Debug, PartialEq, Eq)]
102 pub struct IdValidationError {
103 kind: &'static str,
104 value: String,
105 }
106
107 impl IdValidationError {
108 fn new(kind: &'static str, value: impl Into<String>) -> Self {
109 Self {
110 kind,
111 value: value.into(),
112 }
113 }
114 }
115
116 impl fmt::Display for IdValidationError {
117 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
118 write!(f, "invalid {}: {}", self.kind, self.value)
119 }
120 }
121
122 impl std::error::Error for IdValidationError {}
123
124 /// Stable identifier for an operation (human meaningful).
125 /// Invariant: stable across environments; should not be random.
126 #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
127 #[serde(try_from = "String", into = "String")]
128 pub struct OpId(String);
129
130 /// Enforced: "<machine_id>.<step_id>"
131 #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
132 pub struct OpPath(pub String);
133
134 /// Enforced: "<machine_id>.<step_id>.<state_local_id>"
135 #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
136 #[serde(try_from = "String", into = "String")]
137 pub struct StateId(String);
138
139 /// Unique run identifier (can be random).
140 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
141 pub struct RunId(pub uuid::Uuid);
142
143 /// Content-addressed identifier (hash) for an artifact.
144 /// Invariant: lowercase hex digest string (algorithm defined by policy; default SHA-256).
145 #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
146 pub struct ArtifactId(pub String);
147
148 /// Namespaced key for recorded facts (external inputs).
149 #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
150 pub struct FactKey(pub String);
151
152 /// Namespaced key for context entries.
153 #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
154 pub struct ContextKey(pub String);
155
156 /// Stable machine-readable error code.
157 #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
158 pub struct ErrorCode(pub String);
159
160 #[allow(dead_code)]
161 pub(crate) fn is_valid_id_segment(segment: &str) -> bool {
162 let b = segment.as_bytes();
163 if b.is_empty() || b.len() > 63 {
164 return false;
165 }
166
167 // ^[a-z][a-z0-9_]{0,62}$
168 match b[0] {
169 b'a'..=b'z' => {}
170 _ => return false,
171 }
172
173 for &c in &b[1..] {
174 match c {
175 b'a'..=b'z' | b'0'..=b'9' | b'_' => {}
176 _ => return false,
177 }
178 }
179
180 true
181 }
182
183 impl OpId {
184 /// Creates an operation identifier after validating the naming contract.
185 ///
186 /// Accepted values match `^[a-z][a-z0-9_]{0,62}$`.
187 ///
188 /// # Examples
189 ///
190 /// ```rust
191 /// use mfm_machine::ids::OpId;
192 ///
193 /// let op_id = OpId::new("keystore_list")?;
194 /// assert_eq!(op_id.as_str(), "keystore_list");
195 /// # Ok::<(), mfm_machine::ids::IdValidationError>(())
196 /// ```
197 pub fn new(value: impl Into<String>) -> Result<Self, IdValidationError> {
198 let value = value.into();
199 if !is_valid_id_segment(&value) {
200 return Err(IdValidationError::new("op_id", value));
201 }
202 Ok(Self(value))
203 }
204
205 /// Creates an [`OpId`] and panics if the value is invalid.
206 pub fn must_new(value: impl Into<String>) -> Self {
207 Self::new(value).expect("op id must satisfy ^[a-z][a-z0-9_]{0,62}$")
208 }
209
210 /// Returns the validated identifier as a borrowed string slice.
211 pub fn as_str(&self) -> &str {
212 &self.0
213 }
214 }
215
216 impl TryFrom<String> for OpId {
217 type Error = IdValidationError;
218
219 fn try_from(value: String) -> Result<Self, Self::Error> {
220 Self::new(value)
221 }
222 }
223
224 impl TryFrom<&str> for OpId {
225 type Error = IdValidationError;
226
227 fn try_from(value: &str) -> Result<Self, Self::Error> {
228 Self::new(value)
229 }
230 }
231
232 impl From<OpId> for String {
233 fn from(value: OpId) -> Self {
234 value.0
235 }
236 }
237
238 impl fmt::Display for OpId {
239 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
240 write!(f, "{}", self.0)
241 }
242 }
243
244 #[allow(dead_code)]
245 pub(crate) fn validate_op_path(value: &str) -> bool {
246 let mut it = value.split('.');
247 let Some(machine_id) = it.next() else {
248 return false;
249 };
250 let Some(step_id) = it.next() else {
251 return false;
252 };
253 if it.next().is_some() {
254 return false;
255 }
256 is_valid_id_segment(machine_id) && is_valid_id_segment(step_id)
257 }
258
259 #[allow(dead_code)]
260 pub(crate) fn validate_state_id(value: &str) -> bool {
261 let mut it = value.split('.');
262 let Some(machine_id) = it.next() else {
263 return false;
264 };
265 let Some(step_id) = it.next() else {
266 return false;
267 };
268 let Some(state_local_id) = it.next() else {
269 return false;
270 };
271 if it.next().is_some() {
272 return false;
273 }
274 is_valid_id_segment(machine_id)
275 && is_valid_id_segment(step_id)
276 && is_valid_id_segment(state_local_id)
277 }
278
279 impl StateId {
280 /// Creates a state identifier after validating the `<machine>.<step>.<state>` shape.
281 ///
282 /// Each segment must satisfy the same naming contract as [`OpId`].
283 ///
284 /// # Examples
285 ///
286 /// ```rust
287 /// use mfm_machine::ids::StateId;
288 ///
289 /// let state_id = StateId::new("portfolio_snapshot.fetch_balances.read_eth")?;
290 /// assert_eq!(state_id.as_str(), "portfolio_snapshot.fetch_balances.read_eth");
291 /// # Ok::<(), mfm_machine::ids::IdValidationError>(())
292 /// ```
293 pub fn new(value: impl Into<String>) -> Result<Self, IdValidationError> {
294 let value = value.into();
295 if !validate_state_id(&value) {
296 return Err(IdValidationError::new("state_id", value));
297 }
298 Ok(Self(value))
299 }
300
301 /// Creates a [`StateId`] and panics if the value is invalid.
302 pub fn must_new(value: impl Into<String>) -> Self {
303 Self::new(value).expect("state id must satisfy <machine_id>.<step_id>.<state_local_id>")
304 }
305
306 /// Returns the validated identifier as a borrowed string slice.
307 pub fn as_str(&self) -> &str {
308 &self.0
309 }
310 }
311
312 impl TryFrom<String> for StateId {
313 type Error = IdValidationError;
314
315 fn try_from(value: String) -> Result<Self, Self::Error> {
316 Self::new(value)
317 }
318 }
319
320 impl TryFrom<&str> for StateId {
321 type Error = IdValidationError;
322
323 fn try_from(value: &str) -> Result<Self, Self::Error> {
324 Self::new(value)
325 }
326 }
327
328 impl From<StateId> for String {
329 fn from(value: StateId) -> Self {
330 value.0
331 }
332 }
333
334 impl fmt::Display for StateId {
335 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
336 write!(f, "{}", self.0)
337 }
338 }
339
340 #[cfg(test)]
341 mod ids_tests {
342 include!("tests/ids_tests.rs");
343 }
344}
345
346/// Marker traits for canonical JSON hashing policy.
347pub mod canonical {
348 /// Canonical JSON policy marker.
349 ///
350 /// Design contract:
351 /// - Structured data that participates in hashing MUST be serialized as canonical JSON.
352 /// - Target semantics: RFC 8785 (JCS).
353 ///
354 /// Implementations belong in `machine` internals; this module only reserves the concept.
355 pub trait CanonicalJsonPolicy: Send + Sync {}
356}
357
358/// Run configuration types that shape execution, replay, and provenance behavior.
359pub mod config {
360 use super::*;
361 use crate::ids::OpId;
362 use crate::meta::Tag;
363
364 /// Whether a run is allowed to perform live IO or must replay from recorded facts/artifacts.
365 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
366 pub enum IoMode {
367 /// Execute side effects through live transports and record replay inputs.
368 Live,
369 /// Disallow live side effects and resolve deterministic IO from recorded facts.
370 Replay,
371 }
372
373 /// Controls domain event verbosity. Kernel events are always emitted.
374 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
375 pub enum EventProfile {
376 /// Emit only the minimal domain-event surface alongside kernel events.
377 Minimal,
378 /// Emit the default domain-event surface for normal operation.
379 Normal,
380 /// Emit the richest built-in domain-event surface.
381 Verbose,
382 /// Use an operation-specific profile string.
383 Custom(String),
384 }
385
386 /// Backoff policy for retryable errors.
387 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
388 pub enum BackoffPolicy {
389 /// Retry after the same delay for every attempt.
390 Fixed {
391 /// Delay applied before each retry.
392 delay: Duration,
393 },
394 /// Increase the retry delay between attempts until reaching a cap.
395 Exponential {
396 /// Base delay used for the first exponential retry step.
397 base_delay: Duration,
398 /// Maximum retry delay once the exponential curve saturates.
399 max_delay: Duration,
400 },
401 }
402
403 /// Retry policy for retryable errors (including replay missing-fact errors if configured retryable).
404 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
405 pub struct RetryPolicy {
406 /// Maximum number of total attempts, including the first execution.
407 pub max_attempts: u32,
408 /// Backoff policy applied between retry attempts.
409 pub backoff: BackoffPolicy,
410 }
411
412 /// Execution mode.
413 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
414 pub enum ExecutionMode {
415 /// Run one ready state at a time.
416 Sequential,
417 /// Explicit fan-out/join model. Avoids concurrent writes to shared context.
418 FanOutJoin {
419 /// Maximum number of states the executor may run concurrently.
420 max_concurrency: u32,
421 },
422 }
423
424 /// Run-level context checkpointing policy.
425 ///
426 /// Default is `AfterEveryState` to keep resume semantics simple (no replay required on resume).
427 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
428 pub enum ContextCheckpointing {
429 /// Persist a fresh context snapshot after every state transition.
430 AfterEveryState,
431 /// Reserved for future: periodic/tag-based checkpointing policies.
432 Custom(String),
433 }
434
435 /// Run-level execution configuration (policy).
436 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
437 pub struct RunConfig {
438 /// Whether execution uses live IO or replay-only IO.
439 pub io_mode: IoMode,
440 /// Retry policy applied to retryable failures.
441 pub retry_policy: RetryPolicy,
442 /// Desired domain-event verbosity profile.
443 pub event_profile: EventProfile,
444 /// Scheduler policy used to execute ready states.
445 pub execution_mode: ExecutionMode,
446 /// Policy for persisting run-level context snapshots.
447 pub context_checkpointing: ContextCheckpointing,
448
449 /// If true, ReplayIo MissingFact errors are retryable (default false).
450 pub replay_missing_fact_retryable: bool,
451
452 /// States with any of these tags may be skipped by the executor.
453 /// Common use: skip APPLY_SIDE_EFFECT for dry runs.
454 pub skip_tags: Vec<Tag>,
455
456 /// Allowlisted flake prefixes for `nix.exec` preflight resolution.
457 ///
458 /// Example prefix: `github:willyrgf/mfm`.
459 #[serde(default = "default_nix_flake_allowlist")]
460 pub nix_flake_allowlist: Vec<String>,
461 }
462
463 /// Returns the default allowlist for `nix.exec` flake resolution.
464 pub fn default_nix_flake_allowlist() -> Vec<String> {
465 vec!["github:willyrgf/mfm".to_string()]
466 }
467
468 /// Minimal run manifest shape (stored as an artifact; hashed via canonical JSON).
469 /// Note: `input_params` MUST be canonical-JSON hashable and MUST NOT contain secrets.
470 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
471 pub struct RunManifest {
472 /// Stable identifier for the operation being executed.
473 pub op_id: OpId,
474 /// Operation version string included in the manifest hash.
475 pub op_version: String,
476 /// Canonical JSON input parameters for the operation.
477 pub input_params: serde_json::Value,
478 /// Run policy captured as part of the manifest.
479 pub run_config: RunConfig,
480 /// Build metadata used for reproducibility and provenance.
481 pub build: BuildProvenance,
482 }
483
484 /// Build provenance (reproducibility metadata).
485 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
486 pub struct BuildProvenance {
487 /// Git commit of the build, if available.
488 pub git_commit: Option<String>,
489 /// Hash of `Cargo.lock`, if captured by the caller.
490 pub cargo_lock_hash: Option<String>,
491 /// Hash of `flake.lock`, if captured by the caller.
492 pub flake_lock_hash: Option<String>,
493 /// Rust compiler version used to build the operation.
494 pub rustc_version: Option<String>,
495 /// Target triple of the built artifact.
496 pub target_triple: Option<String>,
497 /// Environment-variable names intentionally allowed into provenance.
498 pub env_allowlist: Vec<String>,
499 }
500}
501
502/// Metadata used to classify states and express recovery or idempotency intent.
503pub mod meta {
504 use super::*;
505
506 /// Tags are used for classification, filtering, and policy decisions.
507 /// Recommended format: lowercase; allow separators for namespacing if needed.
508 #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
509 pub struct Tag(pub String);
510
511 /// Standard tags (stable identifiers).
512 ///
513 /// These constants are the stable classification vocabulary shared across planners,
514 /// executors, and policy code. Prefer reusing them instead of inventing near-duplicate
515 /// spellings in downstream crates.
516 ///
517 /// A good default mapping is:
518 /// - `CONFIG` for validation and config-shaping nodes
519 /// - `FETCH_DATA` for read/fact acquisition nodes
520 /// - `COMPUTE` for pure transforms
521 /// - `EXECUTE` for imperative runtime work
522 /// - `REPORT` for user-facing output assembly
523 ///
524 /// `APPLY_SIDE_EFFECT` and `IMPURE` are stronger policy signals and should be reserved for
525 /// states whose replay or retry behavior genuinely depends on those classifications.
526 pub mod standard_tags {
527 /// Marks a configuration or validation state.
528 pub const CONFIG: &str = "config";
529 /// Marks a state whose primary job is to retrieve external data.
530 pub const FETCH_DATA: &str = "fetch_data";
531 /// Marks a pure computation or transformation state.
532 pub const COMPUTE: &str = "compute";
533 /// Marks a state that performs or prepares executable work.
534 pub const EXECUTE: &str = "execute";
535 /// Marks a reporting or output-producing state.
536 pub const REPORT: &str = "report";
537
538 /// Marks a state that applies an external side effect.
539 pub const APPLY_SIDE_EFFECT: &str = "apply_side_effect";
540 /// Marks a state whose behavior is not purely deterministic.
541 pub const IMPURE: &str = "impure";
542 }
543
544 /// Side-effect classification (affects replay and retry semantics).
545 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
546 pub enum SideEffectKind {
547 /// The state is pure and does not interact with external systems.
548 Pure,
549 /// The state performs read-only IO.
550 ReadOnlyIo,
551 /// The state applies a side effect to an external system.
552 ApplySideEffect,
553 }
554
555 /// Optional idempotency declaration for side-effecting states.
556 /// Key semantics: stable value used for dedupe (e.g., tx intent hash).
557 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
558 pub enum Idempotency {
559 /// The state does not declare an idempotency key.
560 None,
561 /// Stable key used to deduplicate externally visible effects.
562 Key(String),
563 }
564
565 /// Strategy for choosing a recovery point among dependency candidates.
566 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
567 pub enum DependencyStrategy {
568 /// Recover from the latest matching dependency candidate.
569 Latest,
570 /// Recover from the earliest matching dependency candidate.
571 Earliest,
572 /// Recover from the latest dependency candidate that completed successfully.
573 LatestSuccessful,
574 }
575
576 /// State metadata used for policy decisions and validation.
577 ///
578 /// Notes:
579 /// - `depends_on` is an authoring-time *hint* (often used by planners).
580 /// - Execution correctness is governed by explicit plan edges.
581 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
582 pub struct StateMeta {
583 /// Classification tags applied to the state.
584 pub tags: Vec<Tag>,
585
586 /// Optional authoring-time dependency hints expressed as tags.
587 pub depends_on: Vec<Tag>,
588 /// Strategy used when resolving tag-based dependency hints.
589 pub depends_on_strategy: DependencyStrategy,
590
591 /// Side-effect classification used by replay and retry policy.
592 pub side_effects: SideEffectKind,
593 /// Declared idempotency behavior for the state.
594 pub idempotency: Idempotency,
595 }
596}
597
598/// Structured error types used by state handlers, storage layers, and the engine.
599pub mod errors {
600 use super::*;
601 use crate::ids::{ErrorCode, StateId};
602
603 #[allow(dead_code)]
604 pub(crate) const CODE_MISSING_FACT_KEY: &str = "missing_fact_key";
605
606 /// Error category used for stable handling and policies.
607 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
608 pub enum ErrorCategory {
609 /// Failed while parsing user input or structured payloads.
610 ParsingInput,
611 /// Failed while interacting with on-chain state or transactions.
612 OnChain,
613 /// Failed while interacting with off-chain systems or services.
614 OffChain,
615 /// Failed while talking to RPC-like endpoints.
616 Rpc,
617 /// Failed while reading or writing persisted state.
618 Storage,
619 /// Failed while reading or mutating execution context.
620 Context,
621 /// Failed for an uncategorized reason.
622 Unknown,
623 }
624
625 /// Structured error info (canonical-JSON compatible; MUST NOT contain secrets).
626 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
627 pub struct ErrorInfo {
628 /// Stable machine-readable error code.
629 pub code: ErrorCode,
630 /// High-level category for policy decisions and display.
631 pub category: ErrorCategory,
632 /// Whether the operation may be retried safely.
633 pub retryable: bool,
634 /// Human-readable message safe to persist and display.
635 pub message: String,
636 /// Optional structured details that must remain secret-free.
637 pub details: Option<serde_json::Value>,
638 }
639
640 /// Errors returned by state handlers (no secrets).
641 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
642 pub struct StateError {
643 /// State identifier, if the failure can be attributed to a specific planned state.
644 pub state_id: Option<StateId>,
645 /// Structured error payload safe for persistence.
646 pub info: ErrorInfo,
647 }
648
649 /// IO errors (live or replay).
650 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
651 pub enum IoError {
652 /// Replay was asked to perform deterministic IO without a fact key.
653 MissingFactKey(ErrorInfo),
654
655 /// A requested recorded fact was not found.
656 MissingFact {
657 /// Missing fact key.
658 key: crate::ids::FactKey,
659 /// Structured details about the lookup failure.
660 info: ErrorInfo,
661 },
662 /// The underlying transport returned an error.
663 Transport(ErrorInfo),
664 /// The underlying transport reported a rate limit.
665 RateLimited(ErrorInfo),
666 /// Any other IO failure that does not fit the more specific variants.
667 Other(ErrorInfo),
668 }
669
670 /// Context errors.
671 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
672 pub enum ContextError {
673 /// A requested context key was not present.
674 MissingKey {
675 /// Missing context key.
676 key: crate::ids::ContextKey,
677 /// Structured details about the lookup failure.
678 info: ErrorInfo,
679 },
680 /// JSON serialization or deserialization failed.
681 Serialization(ErrorInfo),
682 /// Any other context failure.
683 Other(ErrorInfo),
684 }
685
686 /// Storage errors (event store / artifact store).
687 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
688 pub enum StorageError {
689 /// Append failed because the optimistic concurrency expectation was stale.
690 Concurrency(ErrorInfo),
691 /// A requested record or artifact was not found.
692 NotFound(ErrorInfo),
693 /// Persisted data existed but failed integrity or shape checks.
694 Corruption(ErrorInfo),
695 /// Any other storage failure.
696 Other(ErrorInfo),
697 }
698
699 /// Run-level errors from the engine.
700 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
701 pub enum RunError {
702 /// The supplied execution plan violated structural invariants.
703 InvalidPlan(ErrorInfo),
704 /// The run failed because a storage backend returned an error.
705 Storage(StorageError),
706 /// The run failed because the execution context returned an error.
707 Context(ContextError),
708 /// The run failed because the IO provider returned an error.
709 Io(IoError),
710 /// The run failed because a state handler returned an error.
711 State(StateError),
712 /// Any other run-level failure.
713 Other(ErrorInfo),
714 }
715
716 #[cfg(test)]
717 mod tests {
718 use super::*;
719
720 #[test]
721 fn missing_fact_key_code_is_stable() {
722 assert_eq!(CODE_MISSING_FACT_KEY, "missing_fact_key");
723
724 let info = ErrorInfo {
725 code: ErrorCode(CODE_MISSING_FACT_KEY.to_string()),
726 category: ErrorCategory::Rpc,
727 retryable: false,
728 message: "missing fact key".to_string(),
729 details: None,
730 };
731
732 let err = IoError::MissingFactKey(info);
733 match err {
734 IoError::MissingFactKey(info) => assert_eq!(info.code.0, "missing_fact_key"),
735 _ => unreachable!("wrong error variant"),
736 }
737 }
738 }
739}
740
741/// Context traits for reading, mutating, and snapshotting state-machine data.
742pub mod context {
743 use super::*;
744 use crate::errors::ContextError;
745 use crate::ids::ContextKey;
746
747 /// Dynamic context interface.
748 ///
749 /// Contract:
750 /// - `dump()` returns a **full snapshot** of current state (canonical JSON object recommended).
751 /// - Implementations MUST ensure deterministic serialization of snapshot artifacts.
752 pub trait DynContext: Send {
753 /// Reads a raw JSON value from context.
754 fn read(&self, key: &ContextKey) -> Result<Option<serde_json::Value>, ContextError>;
755 /// Writes a raw JSON value into context.
756 fn write(&mut self, key: ContextKey, value: serde_json::Value) -> Result<(), ContextError>;
757 /// Deletes a context entry if it exists.
758 fn delete(&mut self, key: &ContextKey) -> Result<(), ContextError>;
759
760 /// Full snapshot of current context state.
761 fn dump(&self) -> Result<serde_json::Value, ContextError>;
762 }
763
764 /// Typed convenience extension (no default bodies; implementations may blanket-impl internally).
765 pub trait TypedContextExt {
766 /// Reads a context value and deserializes it into `T`.
767 fn read_typed<T: serde::de::DeserializeOwned>(
768 &self,
769 key: &ContextKey,
770 ) -> Result<Option<T>, ContextError>;
771
772 /// Serializes `value` and writes it into context.
773 fn write_typed<T: Serialize>(
774 &mut self,
775 key: ContextKey,
776 value: &T,
777 ) -> Result<(), ContextError>;
778 }
779
780 impl<C: DynContext + ?Sized> TypedContextExt for C {
781 fn read_typed<T: serde::de::DeserializeOwned>(
782 &self,
783 key: &ContextKey,
784 ) -> Result<Option<T>, ContextError> {
785 let Some(value) = self.read(key)? else {
786 return Ok(None);
787 };
788
789 serde_json::from_value(value).map(Some).map_err(|_| {
790 ContextError::Serialization(crate::errors::ErrorInfo {
791 code: crate::ids::ErrorCode("context_deserialize_failed".to_string()),
792 category: crate::errors::ErrorCategory::Context,
793 retryable: false,
794 message: "context value deserialization failed".to_string(),
795 details: None,
796 })
797 })
798 }
799
800 fn write_typed<T: Serialize>(
801 &mut self,
802 key: ContextKey,
803 value: &T,
804 ) -> Result<(), ContextError> {
805 let v = serde_json::to_value(value).map_err(|_| {
806 ContextError::Serialization(crate::errors::ErrorInfo {
807 code: crate::ids::ErrorCode("context_serialize_failed".to_string()),
808 category: crate::errors::ErrorCategory::Context,
809 retryable: false,
810 message: "context value serialization failed".to_string(),
811 details: None,
812 })
813 })?;
814
815 self.write(key, v)
816 }
817 }
818}
819
820/// Event types emitted by the engine and by state handlers during a run.
821pub mod events {
822 use super::*;
823 use crate::errors::StateError;
824 use crate::ids::{ArtifactId, OpId, OpPath, RunId, StateId};
825
826 /// Recommended stable `DomainEvent.name` values.
827 pub const DOMAIN_EVENT_FACT_RECORDED: &str = "fact_recorded";
828 /// Recommended `DomainEvent.name` for artifact-write notifications.
829 pub const DOMAIN_EVENT_ARTIFACT_WRITTEN: &str = "artifact_written";
830 /// Recommended `DomainEvent.name` for operation boundary markers.
831 pub const DOMAIN_EVENT_OP_BOUNDARY: &str = "op_boundary";
832 /// Recommended `DomainEvent.name` for child-run spawn notifications.
833 pub const DOMAIN_EVENT_CHILD_RUN_SPAWNED: &str = "child_run_spawned";
834 /// Recommended `DomainEvent.name` for child-run completion notifications.
835 pub const DOMAIN_EVENT_CHILD_RUN_COMPLETED: &str = "child_run_completed";
836
837 /// Run completion status.
838 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
839 pub enum RunStatus {
840 /// The run completed successfully.
841 Completed,
842 /// The run terminated because a state failed.
843 Failed,
844 /// The run was cancelled before completion.
845 Cancelled,
846 }
847
848 /// Kernel event variants required for recovery/resume correctness.
849 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
850 pub enum KernelEvent {
851 /// Marks the start of a run and records its manifest and initial snapshot.
852 RunStarted {
853 /// Operation identifier for the run.
854 op_id: OpId,
855 /// Manifest artifact identifier.
856 manifest_id: ArtifactId,
857 /// Snapshot of initial context at run start.
858 initial_snapshot_id: ArtifactId,
859 },
860 /// Marks entry into a state attempt.
861 StateEntered {
862 /// Planned state being entered.
863 state_id: StateId,
864 /// Zero-based attempt number for this state.
865 attempt: u32,
866 /// Snapshot the attempt starts from (resume/retry boundary).
867 base_snapshot_id: ArtifactId,
868 },
869 /// Marks successful completion of a state.
870 StateCompleted {
871 /// State that completed successfully.
872 state_id: StateId,
873 /// Context snapshot captured after completion.
874 context_snapshot_id: ArtifactId,
875 },
876 /// Marks failure of a state attempt.
877 StateFailed {
878 /// State that failed.
879 state_id: StateId,
880 /// Structured failure payload.
881 error: StateError,
882 /// Diagnostic-only snapshot (must not be used as a resume boundary).
883 failure_snapshot_id: Option<ArtifactId>,
884 },
885 /// Marks the terminal status of the run.
886 RunCompleted {
887 /// Final run status.
888 status: RunStatus,
889 /// Final context snapshot, if one was produced.
890 final_snapshot_id: Option<ArtifactId>,
891 },
892 }
893
894 /// Optional domain event (operation-defined; verbosity is controlled by event profile).
895 ///
896 /// Rules:
897 /// - payload MUST be canonical-JSON compatible
898 /// - payload MUST NOT contain secrets
899 /// - large payloads SHOULD be stored as artifacts and referenced via `payload_ref`
900 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
901 pub struct DomainEvent {
902 /// Stable event name defined by the operation or a shared convention.
903 pub name: String,
904 /// Canonical JSON payload safe to persist.
905 pub payload: serde_json::Value,
906 /// Optional artifact reference for large payloads.
907 pub payload_ref: Option<ArtifactId>,
908 }
909
910 /// Envelope type used by event stores.
911 ///
912 /// The event store contract expects values of this enum to be wrapped in [`EventEnvelope`]
913 /// with strictly increasing per-run sequence numbers.
914 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
915 pub enum Event {
916 /// Engine-level event required for resume and recovery.
917 Kernel(KernelEvent),
918 /// Operation-defined event emitted during state handling.
919 Domain(DomainEvent),
920 }
921
922 /// Envelope stored in the event store.
923 ///
924 /// `ts_millis` is informational only; replay semantics come from event order and payload
925 /// content, not wall-clock time.
926 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
927 pub struct EventEnvelope {
928 /// Run to which this event belongs.
929 pub run_id: RunId,
930 /// Monotonic per-run sequence number.
931 pub seq: u64,
932
933 /// Informational timestamp; must not be required for deterministic replay semantics.
934 pub ts_millis: Option<u64>,
935
936 /// Event payload stored at this sequence number.
937 pub event: Event,
938 }
939
940 /// Recommended standard domain event payloads (not required by engine).
941 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
942 pub struct FactRecorded {
943 /// Fact key that was recorded.
944 pub key: crate::ids::FactKey,
945 /// Artifact containing the recorded payload.
946 pub payload_id: ArtifactId,
947 /// Operation-defined metadata about the fact.
948 pub meta: serde_json::Value,
949 }
950
951 /// Recommended payload for domain events that announce artifact writes.
952 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
953 pub struct ArtifactWritten {
954 /// Artifact identifier that was written.
955 pub artifact_id: ArtifactId,
956 /// Kind assigned to the written artifact.
957 pub kind: crate::stores::ArtifactKind,
958 /// Operation-defined metadata about the artifact.
959 pub meta: serde_json::Value,
960 }
961
962 /// Recommended payload for domain events that mark op boundaries.
963 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
964 pub struct OpBoundary {
965 /// Nested operation path being reported.
966 pub op_path: OpPath,
967 /// Boundary phase, typically values such as `started` or `completed`.
968 pub phase: String, // e.g. "started" | "completed"
969 }
970
971 /// Reserved for later expansion (nested machines).
972 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
973 pub struct ChildRunSpawned {
974 /// Parent run that spawned the child.
975 pub parent_run_id: RunId,
976 /// Newly created child run identifier.
977 pub child_run_id: RunId,
978 /// Manifest artifact for the child run.
979 pub child_manifest_id: ArtifactId,
980 }
981
982 /// Recommended payload for domain events that announce child-run completion.
983 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
984 pub struct ChildRunCompleted {
985 /// Child run that finished.
986 pub child_run_id: RunId,
987 /// Final status of the child run.
988 pub status: RunStatus,
989 /// Final snapshot produced by the child run, if any.
990 pub final_snapshot_id: Option<ArtifactId>,
991 }
992
993 #[cfg(test)]
994 mod tests {
995 use super::*;
996
997 use crate::hashing::artifact_id_for_json;
998
999 #[test]
1000 fn child_run_payloads_are_canonical_and_non_secret() {
1001 let v = serde_json::to_value(ChildRunSpawned {
1002 parent_run_id: RunId(uuid::Uuid::new_v4()),
1003 child_run_id: RunId(uuid::Uuid::new_v4()),
1004 child_manifest_id: ArtifactId("0".repeat(64)),
1005 })
1006 .expect("serialize");
1007 artifact_id_for_json(&v).expect("canonical-json-hashable");
1008 assert!(!crate::secrets::json_contains_secrets(&v));
1009
1010 let v = serde_json::to_value(ChildRunCompleted {
1011 child_run_id: RunId(uuid::Uuid::new_v4()),
1012 status: RunStatus::Completed,
1013 final_snapshot_id: Some(ArtifactId("1".repeat(64))),
1014 })
1015 .expect("serialize");
1016 artifact_id_for_json(&v).expect("canonical-json-hashable");
1017 assert!(!crate::secrets::json_contains_secrets(&v));
1018 }
1019 }
1020}
1021
1022/// IO request and response abstractions used by live and replay providers.
1023pub mod io {
1024 use super::*;
1025 use crate::errors::IoError;
1026 use crate::ids::{ArtifactId, FactKey};
1027
1028 /// Opaque IO call surface; collectors define typed adapters on top.
1029 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1030 pub struct IoCall {
1031 /// Namespace like "http", "jsonrpc", "coingecko", etc.
1032 pub namespace: String,
1033 /// Canonical JSON request payload (typed by the caller/collector).
1034 pub request: serde_json::Value,
1035 /// Fact key for recording/replay.
1036 ///
1037 /// Contract:
1038 /// - In Live mode, callers SHOULD provide this for replayable IO.
1039 /// - In Replay mode, deterministic IO MUST provide this (otherwise `IoError::MissingFactKey`).
1040 pub fact_key: Option<FactKey>,
1041 }
1042
1043 /// Opaque IO result surface; collectors define typed adapters on top.
1044 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1045 pub struct IoResult {
1046 /// Canonical JSON response payload.
1047 pub response: serde_json::Value,
1048 /// If recorded, points to the stored payload artifact.
1049 pub recorded_payload_id: Option<ArtifactId>,
1050 }
1051
1052 /// IO provider (LiveIo/ReplayIo are implementations).
1053 #[async_trait]
1054 pub trait IoProvider: Send {
1055 /// Executes an IO request and returns the structured response.
1056 async fn call(&mut self, call: IoCall) -> Result<IoResult, IoError>;
1057
1058 /// Persist a deterministic structured value directly as a fact payload.
1059 async fn record_value(
1060 &mut self,
1061 key: FactKey,
1062 value: serde_json::Value,
1063 ) -> Result<ArtifactId, IoError>;
1064
1065 /// Lookup recorded fact payload by key.
1066 async fn get_recorded_fact(&mut self, key: &FactKey)
1067 -> Result<Option<ArtifactId>, IoError>;
1068
1069 /// Current time. If used in deterministic logic, implementations MUST record as facts.
1070 async fn now_millis(&mut self) -> Result<u64, IoError>;
1071
1072 /// Random bytes. If used in reproducible paths, implementations MUST record as facts.
1073 async fn random_bytes(&mut self, n: usize) -> Result<Vec<u8>, IoError>;
1074 }
1075}
1076
1077/// Traits for recording operation-defined domain events during state execution.
1078pub mod recorder {
1079 use super::*;
1080 use crate::errors::RunError;
1081 use crate::events::DomainEvent;
1082
1083 /// Domain event recorder used by state handlers.
1084 ///
1085 /// Engine contract:
1086 /// - Domain events are associated with the current state attempt (bounded by `StateEntered` and a terminal
1087 /// event).
1088 /// - A state attempt MAY span multiple transactional appends; each append is atomic.
1089 #[async_trait]
1090 pub trait EventRecorder: Send {
1091 /// Emits a single domain event.
1092 async fn emit(&mut self, event: DomainEvent) -> Result<(), RunError>;
1093 /// Emits multiple domain events in order.
1094 async fn emit_many(&mut self, events: Vec<DomainEvent>) -> Result<(), RunError>;
1095 }
1096}
1097
1098/// State trait and outcome types used by planned execution nodes.
1099pub mod state {
1100 use super::*;
1101 use crate::context::DynContext;
1102 use crate::errors::StateError;
1103 use crate::io::IoProvider;
1104 use crate::meta::StateMeta;
1105 use crate::recorder::EventRecorder;
1106
1107 /// Indicates whether the engine should snapshot context after the state.
1108 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1109 pub enum SnapshotPolicy {
1110 /// Do not request an additional snapshot for this state outcome.
1111 Never,
1112 /// Request a snapshot only when the state succeeds.
1113 OnSuccess,
1114 /// Request a snapshot even if the state fails.
1115 Always,
1116 }
1117
1118 /// Result returned by a successful state handler.
1119 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1120 pub struct StateOutcome {
1121 /// Snapshot hint returned by the state handler.
1122 pub snapshot: SnapshotPolicy,
1123 }
1124
1125 /// Note:
1126 /// - The engine MAY still checkpoint context according to `RunConfig.context_checkpointing`
1127 /// regardless of `StateOutcome.snapshot`. This hint controls additional snapshot behavior
1128 /// and/or diagnostic snapshots, not permission to bypass required checkpoints.
1129 /// State behavior. States do NOT own their `StateId` — IDs are assigned by the plan.
1130 #[async_trait]
1131 pub trait State: Send + Sync {
1132 /// Returns metadata used for planning, replay, and policy decisions.
1133 fn meta(&self) -> StateMeta;
1134
1135 /// Executes the state against the provided context, IO provider, and event recorder.
1136 async fn handle(
1137 &self,
1138 ctx: &mut dyn DynContext,
1139 io: &mut dyn IoProvider,
1140 rec: &mut dyn EventRecorder,
1141 ) -> Result<StateOutcome, StateError>;
1142 }
1143
1144 /// Shared trait-object form used by planners and executors.
1145 pub type DynState = Arc<dyn State>;
1146}
1147
1148/// Types that represent the executable state graph for a run.
1149pub mod plan {
1150 use super::*;
1151 use crate::ids::{OpId, StateId};
1152 use crate::state::DynState;
1153
1154 /// An edge `from -> to` means `from` must complete before `to` can run.
1155 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1156 pub struct DependencyEdge {
1157 /// Upstream state that must complete first.
1158 pub from: StateId,
1159 /// Downstream state that depends on `from`.
1160 pub to: StateId,
1161 }
1162
1163 /// Planned state plus the runtime implementation that will execute it.
1164 #[derive(Clone)]
1165 pub struct StateNode {
1166 /// Stable identifier assigned by the planner.
1167 pub id: StateId,
1168 /// Runtime implementation invoked for this node.
1169 pub state: DynState,
1170 }
1171
1172 /// A state graph is the executable structure derived from ops/pipelines.
1173 #[derive(Clone)]
1174 pub struct StateGraph {
1175 /// Planned states included in the graph.
1176 pub states: Vec<StateNode>,
1177 /// Dependency edges connecting those states.
1178 pub edges: Vec<DependencyEdge>,
1179 }
1180
1181 /// Operation-specific executable plan passed to the engine.
1182 ///
1183 /// The engine treats this as immutable input: planners finish all graph shaping first, then
1184 /// the engine executes the resulting nodes and dependency edges exactly as supplied.
1185 #[derive(Clone)]
1186 pub struct ExecutionPlan {
1187 /// Operation identifier associated with this plan.
1188 pub op_id: OpId,
1189 /// Executable state graph for the run.
1190 pub graph: StateGraph,
1191 }
1192
1193 /// Plan validation errors (fail-fast).
1194 ///
1195 /// These errors are intended for plan construction time, before a run is started. They help
1196 /// callers distinguish malformed planner output from runtime failures that occur later inside
1197 /// the execution engine.
1198 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1199 pub enum PlanValidationError {
1200 /// The plan contained no states.
1201 EmptyPlan,
1202 /// A state identifier appeared more than once.
1203 DuplicateStateId {
1204 /// Duplicated state identifier.
1205 state_id: StateId,
1206 },
1207 /// A dependency edge referenced a state that does not exist in the graph.
1208 MissingStateForEdge {
1209 /// Referenced state that could not be found.
1210 missing: StateId,
1211 },
1212 /// The graph contained a cycle.
1213 CircularDependency {
1214 /// State identifiers participating in the detected cycle.
1215 cycle: Vec<StateId>,
1216 },
1217
1218 /// Optional: if planners derive edges from tag dependencies, they may validate those too.
1219 DanglingDependencyTag {
1220 /// State whose dependency hint could not be resolved.
1221 state_id: StateId,
1222 /// Missing tag referenced by that dependency hint.
1223 missing_tag: crate::meta::Tag,
1224 },
1225 }
1226
1227 /// Validator interface for rejecting malformed execution plans before a run starts.
1228 ///
1229 /// Typical validators enforce graph-shape rules such as:
1230 /// - all state identifiers are unique
1231 /// - every dependency edge points to an existing state
1232 /// - the plan is acyclic
1233 pub trait PlanValidator: Send + Sync {
1234 /// Validates the supplied execution plan.
1235 fn validate(&self, plan: &ExecutionPlan) -> Result<(), PlanValidationError>;
1236 }
1237}
1238
1239/// Storage traits for append-only events and immutable artifacts.
1240pub mod stores {
1241 use super::*;
1242 use crate::errors::StorageError;
1243 use crate::events::EventEnvelope;
1244 use crate::ids::{ArtifactId, RunId};
1245
1246 /// Artifact classification used for retention, validation, and output handling.
1247 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1248 pub enum ArtifactKind {
1249 /// Run manifest artifact.
1250 Manifest,
1251 /// Serialized context snapshot artifact.
1252 ContextSnapshot,
1253 /// Recorded fact payload artifact.
1254 FactPayload,
1255 /// Encrypted secret payload (ciphertext bytes only).
1256 ///
1257 /// Secret plaintext MUST NOT be stored directly in the artifact store.
1258 SecretPayload,
1259 /// User-facing output artifact.
1260 Output,
1261 /// Operation- or backend-specific artifact kind.
1262 Other(String),
1263 }
1264
1265 /// Append-only event store with optimistic concurrency.
1266 #[async_trait]
1267 pub trait EventStore: Send + Sync {
1268 /// Returns the current head sequence for `run_id`.
1269 async fn head_seq(&self, run_id: RunId) -> Result<u64, StorageError>;
1270
1271 /// Appends events atomically if `expected_seq` matches the current head.
1272 async fn append(
1273 &self,
1274 run_id: RunId,
1275 expected_seq: u64,
1276 events: Vec<EventEnvelope>,
1277 ) -> Result<u64, StorageError>;
1278
1279 /// Reads a contiguous event range starting at `from_seq`.
1280 async fn read_range(
1281 &self,
1282 run_id: RunId,
1283 from_seq: u64,
1284 to_seq: Option<u64>,
1285 ) -> Result<Vec<EventEnvelope>, StorageError>;
1286 }
1287
1288 /// Immutable, content-addressed artifact store.
1289 #[async_trait]
1290 pub trait ArtifactStore: Send + Sync {
1291 /// Stores bytes under a content-derived identifier and returns that identifier.
1292 async fn put(&self, kind: ArtifactKind, bytes: Vec<u8>)
1293 -> Result<ArtifactId, StorageError>;
1294 /// Loads the bytes for an existing artifact identifier.
1295 async fn get(&self, id: &ArtifactId) -> Result<Vec<u8>, StorageError>;
1296 /// Returns whether an artifact exists without loading its bytes.
1297 async fn exists(&self, id: &ArtifactId) -> Result<bool, StorageError>;
1298 }
1299}
1300
1301/// Engine traits and input/output types for starting or resuming runs.
1302pub mod engine {
1303 use super::*;
1304 use crate::config::{RunConfig, RunManifest};
1305 use crate::context::DynContext;
1306 use crate::errors::RunError;
1307 use crate::ids::{ArtifactId, RunId};
1308 use crate::plan::ExecutionPlan;
1309 use crate::stores::{ArtifactStore, EventStore};
1310
1311 /// Current run phase (observability).
1312 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1313 pub enum RunPhase {
1314 /// The run is still executing.
1315 Running,
1316 /// The run completed successfully.
1317 Completed,
1318 /// The run ended because of a failure.
1319 Failed,
1320 /// The run was cancelled.
1321 Cancelled,
1322 }
1323
1324 /// Summary returned by the execution engine after start or resume.
1325 ///
1326 /// `phase` reports the final or current lifecycle state, while `final_snapshot_id` is populated
1327 /// only when a snapshot exists at the point the engine returns.
1328 ///
1329 /// This is intentionally compact so transport layers can expose a stable run result without
1330 /// forcing callers to inspect raw event streams for the common success/failure path.
1331 #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
1332 pub struct RunResult {
1333 /// Run identifier that was started or resumed.
1334 pub run_id: RunId,
1335 /// Final or current phase of the run.
1336 pub phase: RunPhase,
1337 /// Final context snapshot, if one exists.
1338 pub final_snapshot_id: Option<ArtifactId>,
1339 }
1340
1341 /// Inputs required to start a run.
1342 ///
1343 /// Callers usually construct this once planning is complete and the manifest has already been
1344 /// content-addressed and persisted or prepared for persistence.
1345 ///
1346 /// Keeping the manifest, derived plan, run config, and initial context together makes the
1347 /// engine boundary explicit: everything above this type is planning/orchestration, everything
1348 /// below it is execution and persistence.
1349 pub struct StartRun {
1350 /// Manifest content for the run.
1351 pub manifest: RunManifest,
1352 /// Content-addressed manifest identifier.
1353 pub manifest_id: ArtifactId,
1354 /// Execution plan derived for the manifest.
1355 pub plan: ExecutionPlan,
1356 /// Effective run configuration used by the engine.
1357 pub run_config: RunConfig,
1358 /// Initial context snapshot used as the run's starting point.
1359 pub initial_context: Box<dyn DynContext>,
1360 }
1361
1362 /// Store bundle passed to the engine.
1363 ///
1364 /// Keeping the stores grouped makes it easier for higher layers to swap persistence backends
1365 /// without threading each store separately through every engine constructor.
1366 #[derive(Clone)]
1367 pub struct Stores {
1368 /// Event store used for append-only kernel and domain events.
1369 pub events: Arc<dyn EventStore>,
1370 /// Artifact store used for manifests, snapshots, facts, and outputs.
1371 pub artifacts: Arc<dyn ArtifactStore>,
1372 }
1373
1374 /// Execution engine interface.
1375 #[async_trait]
1376 pub trait ExecutionEngine: Send + Sync {
1377 /// Starts a new run from the supplied manifest, plan, and initial context.
1378 async fn start(&self, stores: Stores, run: StartRun) -> Result<RunResult, RunError>;
1379 /// Resumes a previously started run from persisted state.
1380 async fn resume(&self, stores: Stores, run_id: RunId) -> Result<RunResult, RunError>;
1381 }
1382}
1383
1384/// Internal helpers for canonical JSON hashing and `ArtifactId` computation.
1385///
1386/// This module is not part of the stable API contract (Appendix C.1) and may change.
1387pub mod hashing;
1388
1389/// Unstable v4 runtime implementation (executor + resume logic).
1390///
1391/// Not part of the stable API contract (Appendix C.1).
1392pub mod runtime;
1393
1394/// Unstable Live IO implementation (facts recording).
1395///
1396/// Not part of the stable API contract (Appendix C.1).
1397pub mod live_io;
1398
1399/// Unstable live IO transport for external program execution (`exec` namespace).
1400///
1401/// Not part of the stable API contract (Appendix C.1).
1402pub mod exec_transport;
1403
1404/// Unstable shared process execution helpers for live IO transports.
1405///
1406/// Not part of the stable API contract (Appendix C.1).
1407pub mod process_exec;
1408
1409/// Unstable Live IO transport router (namespace dispatch).
1410///
1411/// Not part of the stable API contract (Appendix C.1).
1412pub mod live_io_router;
1413
1414/// Unstable Live IO transport registry (runtime wiring).
1415///
1416/// Not part of the stable API contract (Appendix C.1).
1417pub mod live_io_registry;
1418
1419/// Unstable Replay IO implementation (facts replay).
1420///
1421/// Not part of the stable API contract (Appendix C.1).
1422pub mod replay_io;
1423
1424pub(crate) mod attempt_envelope;
1425pub(crate) mod context_runtime;
1426pub(crate) mod event_profile;
1427pub(crate) mod secrets;