durable_execution_sdk/
lib.rs

1//! # AWS Durable Execution SDK for Lambda Rust Runtime
2//!
3//! This SDK enables Rust developers to build reliable, long-running workflows
4//! in AWS Lambda with automatic checkpointing, replay, and state management.
5//!
6//! ## Overview
7//!
8//! The AWS Durable Execution SDK provides a framework for building workflows that can
9//! survive Lambda function restarts, timeouts, and failures. It automatically checkpoints
10//! the state of your workflow, allowing it to resume exactly where it left off after
11//! any interruption.
12//!
13//! ### Key Features
14//!
15//! - **Automatic Checkpointing**: Every operation is automatically checkpointed, ensuring
16//!   your workflow can resume from the last completed step.
17//! - **Replay Mechanism**: When a function resumes, completed operations return their
18//!   checkpointed results instantly without re-execution.
19//! - **Concurrent Operations**: Process collections in parallel with configurable
20//!   concurrency limits and failure tolerance.
21//! - **External Integration**: Wait for callbacks from external systems with configurable
22//!   timeouts.
23//! - **Type Safety**: Full Rust type safety with generics, trait-based abstractions,
24//!   and newtype wrappers for domain identifiers.
25//! - **Promise Combinators**: Coordinate multiple durable promises with `all`, `any`, `race`, and `all_settled`.
26//! - **Replay-Safe Helpers**: Generate deterministic UUIDs and timestamps that are safe for replay.
27//! - **Configurable Checkpointing**: Choose between eager, batched, or optimistic checkpointing modes.
28//! - **Trait Aliases**: Cleaner function signatures with [`DurableValue`], [`StepError`], and [`StepFuture`] type aliases.
29//! - **Sealed Traits**: Internal traits are sealed to allow API evolution without breaking changes.
30//!
31//! ## Important Documentation
32//!
33//! Before writing durable workflows, please read:
34//!
35//! - [`docs::determinism`]: **Critical** - Understanding determinism requirements for replay-safe workflows
36//! - [`docs::limits`]: Execution limits and constraints you need to know
37//!
38//! ## Getting Started
39//!
40//! Add the SDK to your `Cargo.toml`:
41//!
42//! ```toml
43//! [dependencies]
44//! durable-execution-sdk = "0.1"
45//! tokio = { version = "1.0", features = ["full"] }
46//! serde = { version = "1.0", features = ["derive"] }
47//! ```
48//!
49//! ### Basic Workflow Example
50//!
51//! Here's a simple workflow that processes an order:
52//!
53//! ```rust,no_run
54//! use durable_execution_sdk::{durable_execution, DurableContext, DurableError, Duration};
55//! use serde::{Deserialize, Serialize};
56//!
57//! #[derive(Deserialize)]
58//! struct OrderEvent {
59//!     order_id: String,
60//!     amount: f64,
61//! }
62//!
63//! #[derive(Serialize)]
64//! struct OrderResult {
65//!     status: String,
66//!     order_id: String,
67//! }
68//!
69//! #[durable_execution]
70//! async fn process_order(event: OrderEvent, ctx: DurableContext) -> Result<OrderResult, DurableError> {
71//!     // Step 1: Validate the order (checkpointed automatically)
72//!     let is_valid: bool = ctx.step(|_step_ctx| async move {
73//!         // Validation logic here
74//!         Ok(true)
75//!     }, None).await?;
76//!
77//!     if !is_valid {
78//!         return Err(DurableError::execution("Invalid order"));
79//!     }
80//!
81//!     // Step 2: Process payment (checkpointed automatically)
82//!     let payment_id: String = ctx.step(|_step_ctx| async move {
83//!         // Payment processing logic here
84//!         Ok("pay_123".to_string())
85//!     }, None).await?;
86//!
87//!     // Step 3: Wait for payment confirmation (suspends Lambda, resumes later)
88//!     ctx.wait(Duration::from_seconds(5), Some("payment_confirmation")).await?;
89//!
90//!     // Step 4: Complete the order
91//!     Ok(OrderResult {
92//!         status: "completed".to_string(),
93//!         order_id: event.order_id,
94//!     })
95//! }
96//! ```
97//!
98//! ## Core Concepts
99//!
100//! ### DurableContext
101//!
102//! The [`DurableContext`] is the main interface for durable operations. It provides:
103//!
104//! - [`step`](DurableContext::step): Execute and checkpoint a unit of work
105//! - [`wait`](DurableContext::wait): Pause execution for a specified duration
106//! - [`create_callback`](DurableContext::create_callback): Wait for external systems to signal completion
107//! - [`invoke`](DurableContext::invoke): Call other durable Lambda functions
108//! - [`map`](DurableContext::map): Process collections in parallel
109//! - [`parallel`](DurableContext::parallel): Execute multiple operations concurrently
110//! - [`run_in_child_context`](DurableContext::run_in_child_context): Create isolated nested workflows
111//!
112//! ### Steps
113//!
114//! Steps are the fundamental unit of work in durable executions. Each step is
115//! automatically checkpointed, allowing the workflow to resume from the last
116//! completed step after interruptions.
117//!
118//! ```rust,no_run
119//! # use durable_execution_sdk::{DurableContext, StepConfig, StepSemantics};
120//! # async fn example(ctx: DurableContext) -> Result<(), Box<dyn std::error::Error>> {
121//! // Simple step
122//! let result: i32 = ctx.step(|_| async move { Ok(42) }, None).await?;
123//!
124//! // Named step for better debugging
125//! let result: String = ctx.step_named("fetch_data", |_| async move {
126//!     Ok("data".to_string())
127//! }, None).await?;
128//!
129//! // Step with custom configuration
130//! let config = StepConfig {
131//!     step_semantics: StepSemantics::AtMostOncePerRetry,
132//!     ..Default::default()
133//! };
134//! let result: i32 = ctx.step(|_| async move { Ok(42) }, Some(config)).await?;
135//! # Ok(())
136//! # }
137//! ```
138//!
139//! ### Step Semantics
140//!
141//! The SDK supports two execution semantics for steps:
142//!
143//! - **AtLeastOncePerRetry** (default): Checkpoint after execution. The step may
144//!   execute multiple times if interrupted, but the result is always checkpointed.
145//! - **AtMostOncePerRetry**: Checkpoint before execution. Guarantees the step
146//!   executes at most once per retry, useful for non-idempotent operations.
147//!
148//! ### Wait Operations
149//!
150//! Wait operations suspend the Lambda execution and resume after the specified
151//! duration. This is efficient because it doesn't block Lambda resources.
152//!
153//! ```rust,ignore
154//! use durable_execution_sdk::Duration;
155//!
156//! // Wait for 5 seconds
157//! ctx.wait(Duration::from_seconds(5), None).await?;
158//!
159//! // Wait for 1 hour with a name
160//! ctx.wait(Duration::from_hours(1), Some("wait_for_approval")).await?;
161//! ```
162//!
163//! ### Callbacks
164//!
165//! Callbacks allow external systems to signal your workflow. Create a callback,
166//! share the callback ID with an external system, and wait for the result.
167//!
168//! ```rust,ignore
169//! use durable_execution_sdk::CallbackConfig;
170//!
171//! // Create a callback with 24-hour timeout
172//! let callback = ctx.create_callback::<ApprovalResponse>(Some(CallbackConfig {
173//!     timeout: Duration::from_hours(24),
174//!     ..Default::default()
175//! })).await?;
176//!
177//! // Share callback.callback_id with external system
178//! notify_approver(&callback.callback_id).await?;
179//!
180//! // Wait for the callback result (suspends until callback is received)
181//! let approval = callback.result().await?;
182//! ```
183//!
184//! ### Parallel Processing
185//!
186//! Process collections in parallel with configurable concurrency and failure tolerance:
187//!
188//! ```rust,ignore
189//! use durable_execution_sdk::{MapConfig, CompletionConfig};
190//!
191//! // Process items with max 5 concurrent executions
192//! let results = ctx.map(
193//!     vec![1, 2, 3, 4, 5],
194//!     |child_ctx, item, index| async move {
195//!         child_ctx.step(|_| async move { Ok(item * 2) }, None).await
196//!     },
197//!     Some(MapConfig {
198//!         max_concurrency: Some(5),
199//!         completion_config: CompletionConfig::all_successful(),
200//!         ..Default::default()
201//!     }),
202//! ).await?;
203//!
204//! // Get all successful results
205//! let values = results.get_results()?;
206//! ```
207//!
208//! ### Parallel Branches
209//!
210//! Execute multiple independent operations concurrently:
211//!
212//! ```rust,ignore
213//! use durable_execution_sdk::ParallelConfig;
214//!
215//! let results = ctx.parallel(
216//!     vec![
217//!         |ctx| Box::pin(async move { ctx.step(|_| async move { Ok("a") }, None).await }),
218//!         |ctx| Box::pin(async move { ctx.step(|_| async move { Ok("b") }, None).await }),
219//!         |ctx| Box::pin(async move { ctx.step(|_| async move { Ok("c") }, None).await }),
220//!     ],
221//!     None,
222//! ).await?;
223//! ```
224//!
225//! ### Promise Combinators
226//!
227//! The SDK provides promise combinators for coordinating multiple durable operations:
228//!
229//! ```rust,ignore
230//! use durable_execution_sdk::DurableContext;
231//!
232//! async fn coordinate_operations(ctx: &DurableContext) -> Result<(), DurableError> {
233//!     // Wait for ALL operations to complete successfully
234//!     let results = ctx.all(vec![
235//!         ctx.step(|_| async move { Ok(1) }, None),
236//!         ctx.step(|_| async move { Ok(2) }, None),
237//!         ctx.step(|_| async move { Ok(3) }, None),
238//!     ]).await?;
239//!     // results = [1, 2, 3]
240//!
241//!     // Wait for ALL operations to settle (success or failure)
242//!     let batch_result = ctx.all_settled(vec![
243//!         ctx.step(|_| async move { Ok("success") }, None),
244//!         ctx.step(|_| async move { Err("failure".into()) }, None),
245//!     ]).await;
246//!     // batch_result contains both success and failure outcomes
247//!
248//!     // Return the FIRST operation to settle (success or failure)
249//!     let first = ctx.race(vec![
250//!         ctx.step(|_| async move { Ok("fast") }, None),
251//!         ctx.step(|_| async move { Ok("slow") }, None),
252//!     ]).await?;
253//!
254//!     // Return the FIRST operation to succeed
255//!     let first_success = ctx.any(vec![
256//!         ctx.step(|_| async move { Err("fail".into()) }, None),
257//!         ctx.step(|_| async move { Ok("success") }, None),
258//!     ]).await?;
259//!
260//!     Ok(())
261//! }
262//! ```
263//!
264//! ### Accessing Original Input
265//!
266//! Access the original input that started the execution:
267//!
268//! ```rust,ignore
269//! use serde::Deserialize;
270//!
271//! #[derive(Deserialize)]
272//! struct OrderEvent {
273//!     order_id: String,
274//!     amount: f64,
275//! }
276//!
277//! async fn my_workflow(ctx: DurableContext) -> Result<(), DurableError> {
278//!     // Get the original input that started this execution
279//!     let event: OrderEvent = ctx.get_original_input()?;
280//!     println!("Processing order: {}", event.order_id);
281//!     
282//!     // Or get the raw JSON string
283//!     if let Some(raw_input) = ctx.get_original_input_raw() {
284//!         println!("Raw input: {}", raw_input);
285//!     }
286//!     
287//!     Ok(())
288//! }
289//! ```
290//!
291//! ### Replay-Safe Helpers
292//!
293//! Generate deterministic values that are safe for replay:
294//!
295//! ```rust
296//! use durable_execution_sdk::replay_safe::{
297//!     uuid_from_operation, uuid_to_string, uuid_string_from_operation,
298//! };
299//!
300//! // Generate a deterministic UUID from an operation ID
301//! let operation_id = "my-operation-123";
302//! let uuid_bytes = uuid_from_operation(operation_id, 0);
303//! let uuid_string = uuid_to_string(&uuid_bytes);
304//!
305//! // Or use the convenience function
306//! let uuid = uuid_string_from_operation(operation_id, 0);
307//!
308//! // Same inputs always produce the same UUID
309//! let uuid2 = uuid_string_from_operation(operation_id, 0);
310//! assert_eq!(uuid, uuid2);
311//!
312//! // Different seeds produce different UUIDs
313//! let uuid3 = uuid_string_from_operation(operation_id, 1);
314//! assert_ne!(uuid, uuid3);
315//! ```
316//!
317//! For timestamps, use the execution start time instead of current time:
318//!
319//! ```rust,ignore
320//! use durable_execution_sdk::replay_safe::{
321//!     timestamp_from_execution, timestamp_seconds_from_execution,
322//! };
323//!
324//! async fn my_workflow(ctx: DurableContext) -> Result<(), DurableError> {
325//!     // Get replay-safe timestamp (milliseconds since epoch)
326//!     if let Some(timestamp_ms) = timestamp_from_execution(ctx.state()) {
327//!         println!("Execution started at: {} ms", timestamp_ms);
328//!     }
329//!     
330//!     // Or get seconds since epoch
331//!     if let Some(timestamp_secs) = timestamp_seconds_from_execution(ctx.state()) {
332//!         println!("Execution started at: {} seconds", timestamp_secs);
333//!     }
334//!     
335//!     Ok(())
336//! }
337//! ```
338//!
339//! **Important**: See [`docs::determinism`] for detailed guidance on writing replay-safe code.
340//!
341//! ### Wait Cancellation
342//!
343//! Cancel an active wait operation:
344//!
345//! ```rust,ignore
346//! async fn cancellable_workflow(ctx: DurableContext) -> Result<(), DurableError> {
347//!     // Start a long wait in a child context
348//!     let wait_op_id = ctx.next_operation_id();
349//!     
350//!     // In another branch, you can cancel the wait
351//!     ctx.cancel_wait(&wait_op_id).await?;
352//!     
353//!     Ok(())
354//! }
355//! ```
356//!
357//! ### Extended Duration Support
358//!
359//! The Duration type supports extended time periods:
360//!
361//! ```rust
362//! use durable_execution_sdk::Duration;
363//!
364//! // Standard durations
365//! let seconds = Duration::from_seconds(30);
366//! let minutes = Duration::from_minutes(5);
367//! let hours = Duration::from_hours(2);
368//! let days = Duration::from_days(7);
369//!
370//! // Extended durations
371//! let weeks = Duration::from_weeks(2);      // 14 days
372//! let months = Duration::from_months(3);    // 90 days (30 days per month)
373//! let years = Duration::from_years(1);      // 365 days
374//!
375//! assert_eq!(weeks.to_seconds(), 14 * 24 * 60 * 60);
376//! assert_eq!(months.to_seconds(), 90 * 24 * 60 * 60);
377//! assert_eq!(years.to_seconds(), 365 * 24 * 60 * 60);
378//! ```
379//!
380//! ## Type-Safe Identifiers (Newtypes)
381//!
382//! The SDK provides newtype wrappers for domain identifiers to prevent accidental
383//! mixing of different ID types at compile time. These types are available in the
384//! [`types`] module and re-exported at the crate root.
385//!
386//! ### Available Newtypes
387//!
388//! - [`OperationId`]: Unique identifier for an operation within a durable execution
389//! - [`ExecutionArn`]: Amazon Resource Name identifying a durable execution
390//! - [`CallbackId`]: Unique identifier for a callback operation
391//!
392//! ### Creating Newtypes
393//!
394//! ```rust
395//! use durable_execution_sdk::{OperationId, ExecutionArn, CallbackId};
396//!
397//! // From String or &str (no validation, for backward compatibility)
398//! let op_id = OperationId::from("op-123");
399//! let op_id2: OperationId = "op-456".into();
400//!
401//! // With validation (rejects empty strings)
402//! let op_id3 = OperationId::new("op-789").unwrap();
403//! assert!(OperationId::new("").is_err());
404//!
405//! // ExecutionArn validates ARN format
406//! let arn = ExecutionArn::new("arn:aws:lambda:us-east-1:123456789012:function:my-func:durable:abc123");
407//! assert!(arn.is_ok());
408//!
409//! // CallbackId for external system integration
410//! let callback_id = CallbackId::from("callback-xyz");
411//! ```
412//!
413//! ### Using Newtypes as Strings
414//!
415//! All newtypes implement `Deref<Target=str>` and `AsRef<str>` for convenient string access:
416//!
417//! ```rust
418//! use durable_execution_sdk::OperationId;
419//!
420//! let op_id = OperationId::from("op-123");
421//!
422//! // Use string methods directly via Deref
423//! assert!(op_id.starts_with("op-"));
424//! assert_eq!(op_id.len(), 6);
425//!
426//! // Use as &str via AsRef
427//! let s: &str = op_id.as_ref();
428//! assert_eq!(s, "op-123");
429//! ```
430//!
431//! ### Newtypes in Collections
432//!
433//! All newtypes implement `Hash` and `Eq`, making them suitable for use in `HashMap` and `HashSet`:
434//!
435//! ```rust
436//! use durable_execution_sdk::OperationId;
437//! use std::collections::HashMap;
438//!
439//! let mut results: HashMap<OperationId, String> = HashMap::new();
440//! results.insert(OperationId::from("op-1"), "success".to_string());
441//! results.insert(OperationId::from("op-2"), "pending".to_string());
442//!
443//! assert_eq!(results.get(&OperationId::from("op-1")), Some(&"success".to_string()));
444//! ```
445//!
446//! ### Serialization
447//!
448//! All newtypes use `#[serde(transparent)]` for seamless JSON serialization:
449//!
450//! ```rust
451//! use durable_execution_sdk::OperationId;
452//!
453//! let op_id = OperationId::from("op-123");
454//! let json = serde_json::to_string(&op_id).unwrap();
455//! assert_eq!(json, "\"op-123\""); // Serializes as plain string
456//!
457//! let restored: OperationId = serde_json::from_str(&json).unwrap();
458//! assert_eq!(restored, op_id);
459//! ```
460//!
461//! ## Trait Aliases
462//!
463//! The SDK provides trait aliases to simplify common trait bound combinations.
464//! These are available in the [`traits`] module and re-exported at the crate root.
465//!
466//! ### DurableValue
467//!
468//! [`DurableValue`] is a trait alias for types that can be durably stored and retrieved:
469//!
470//! ```rust
471//! use durable_execution_sdk::DurableValue;
472//! use serde::{Deserialize, Serialize};
473//!
474//! // DurableValue is equivalent to: Serialize + DeserializeOwned + Send
475//!
476//! // Any type implementing these traits automatically implements DurableValue
477//! #[derive(Debug, Clone, Serialize, Deserialize)]
478//! struct OrderResult {
479//!     order_id: String,
480//!     status: String,
481//! }
482//!
483//! // Use in generic functions for cleaner signatures
484//! fn process_result<T: DurableValue>(result: T) -> String {
485//!     serde_json::to_string(&result).unwrap_or_default()
486//! }
487//! ```
488//!
489//! ## Sealed Traits and Factory Functions
490//!
491//! Some SDK traits are "sealed" - they cannot be implemented outside this crate.
492//! This allows the SDK to evolve without breaking external code. Sealed traits include:
493//!
494//! - [`Logger`]: For structured logging in durable executions
495//! - [`SerDes`]: For custom serialization/deserialization
496//!
497//! ### Custom Loggers
498//!
499//! Instead of implementing `Logger` directly, use the factory functions:
500//!
501//! ```rust
502//! use durable_execution_sdk::{custom_logger, simple_custom_logger, LogInfo};
503//!
504//! // Simple custom logger with a single function for all levels
505//! let logger = simple_custom_logger(|level, msg, info| {
506//!     println!("[{}] {}: {:?}", level, msg, info);
507//! });
508//!
509//! // Full custom logger with separate functions for each level
510//! let logger = custom_logger(
511//!     |msg, info| println!("[DEBUG] {}", msg),  // debug
512//!     |msg, info| println!("[INFO] {}", msg),   // info
513//!     |msg, info| println!("[WARN] {}", msg),   // warn
514//!     |msg, info| println!("[ERROR] {}", msg),  // error
515//! );
516//! ```
517//!
518//! ### Custom Serializers
519//!
520//! Instead of implementing `SerDes` directly, use the factory function:
521//!
522//! ```rust
523//! use durable_execution_sdk::serdes::{custom_serdes, SerDesContext, SerDesError};
524//!
525//! // Create a custom serializer for a specific type
526//! let serdes = custom_serdes::<String, _, _>(
527//!     |value, _ctx| Ok(format!("custom:{}", value)),  // serialize
528//!     |data, _ctx| {                                   // deserialize
529//!         data.strip_prefix("custom:")
530//!             .map(|s| s.to_string())
531//!             .ok_or_else(|| SerDesError::deserialization("Invalid format"))
532//!     },
533//! );
534//! ```
535//!
536//! ## Configuration Types
537//!
538//! The SDK provides type-safe configuration for all operations:
539//!
540//! - [`StepConfig`]: Configure retry strategy, execution semantics, and serialization
541//! - [`CallbackConfig`]: Configure timeout and heartbeat for callbacks
542//! - [`InvokeConfig`]: Configure timeout and serialization for function invocations
543//! - [`MapConfig`]: Configure concurrency, batching, and completion criteria for map operations
544//! - [`ParallelConfig`]: Configure concurrency and completion criteria for parallel operations
545//! - [`CompletionConfig`]: Define success/failure criteria for concurrent operations
546//!
547//! ### Completion Configuration
548//!
549//! Control when concurrent operations complete:
550//!
551//! ```rust
552//! use durable_execution_sdk::CompletionConfig;
553//!
554//! // Complete when first task succeeds
555//! let first = CompletionConfig::first_successful();
556//!
557//! // Wait for all tasks to complete (regardless of success/failure)
558//! let all = CompletionConfig::all_completed();
559//!
560//! // Require all tasks to succeed (zero failure tolerance)
561//! let strict = CompletionConfig::all_successful();
562//!
563//! // Custom: require at least 3 successes
564//! let custom = CompletionConfig::with_min_successful(3);
565//! ```
566//!
567//! ## Error Handling
568//!
569//! The SDK provides a comprehensive error hierarchy through [`DurableError`]:
570//!
571//! - **Execution**: Errors that return FAILED status without Lambda retry
572//! - **Invocation**: Errors that trigger Lambda retry
573//! - **Checkpoint**: Checkpoint failures (retriable or non-retriable)
574//! - **Callback**: Callback-specific failures
575//! - **NonDeterministic**: Replay mismatches (operation type changed between runs)
576//! - **Validation**: Invalid configuration or arguments
577//! - **SerDes**: Serialization/deserialization failures
578//! - **Suspend**: Signal to pause execution and return control to Lambda
579//!
580//! ```rust,ignore
581//! use durable_execution_sdk::DurableError;
582//!
583//! // Create specific error types
584//! let exec_error = DurableError::execution("Something went wrong");
585//! let validation_error = DurableError::validation("Invalid input");
586//!
587//! // Check error properties
588//! if error.is_retriable() {
589//!     // Handle retriable error
590//! }
591//! ```
592//!
593//! ## Custom Serialization
594//!
595//! The SDK uses JSON serialization by default, but you can provide custom
596//! serializers by implementing the [`SerDes`] trait:
597//!
598//! ```rust,ignore
599//! use durable_execution_sdk::serdes::{SerDes, SerDesContext, SerDesError};
600//!
601//! struct MyCustomSerDes;
602//!
603//! impl SerDes<MyType> for MyCustomSerDes {
604//!     fn serialize(&self, value: &MyType, context: &SerDesContext) -> Result<String, SerDesError> {
605//!         // Custom serialization logic
606//!         Ok(format!("{:?}", value))
607//!     }
608//!
609//!     fn deserialize(&self, data: &str, context: &SerDesContext) -> Result<MyType, SerDesError> {
610//!         // Custom deserialization logic
611//!         todo!()
612//!     }
613//! }
614//! ```
615//!
616//! ## Logging and Tracing
617//!
618//! The SDK integrates with the `tracing` crate for structured logging. All operations
619//! automatically include execution context (ARN, operation ID, parent ID) in log messages.
620//!
621//! For detailed guidance on configuring tracing for Lambda, log correlation, and best practices,
622//! see the [TRACING.md](../docs/TRACING.md) documentation.
623//!
624//! ### Simplified Logging API
625//!
626//! The [`DurableContext`] provides convenience methods for logging with automatic context:
627//!
628//! ```rust,no_run
629//! use durable_execution_sdk::{DurableContext, DurableError};
630//!
631//! async fn my_workflow(ctx: DurableContext) -> Result<(), DurableError> {
632//!     // Basic logging - context is automatically included
633//!     ctx.log_info("Starting order processing");
634//!     ctx.log_debug("Validating input parameters");
635//!     ctx.log_warn("Retry attempt 2 of 5");
636//!     ctx.log_error("Failed to process payment");
637//!     
638//!     // Logging with extra fields for filtering
639//!     ctx.log_info_with("Processing order", &[
640//!         ("order_id", "ORD-12345"),
641//!         ("customer_id", "CUST-789"),
642//!     ]);
643//!     
644//!     ctx.log_error_with("Payment failed", &[
645//!         ("error_code", "INSUFFICIENT_FUNDS"),
646//!         ("amount", "150.00"),
647//!     ]);
648//!     
649//!     Ok(())
650//! }
651//! ```
652//!
653//! All logging methods automatically include:
654//! - `durable_execution_arn`: The execution ARN for correlation
655//! - `parent_id`: The parent operation ID (for nested operations)
656//! - `is_replay`: Whether the operation is being replayed
657//!
658//! ### Extra Fields in Log Output
659//!
660//! Extra fields passed to `log_*_with` methods are included in the tracing output
661//! as key-value pairs, making them queryable in log aggregation systems like CloudWatch:
662//!
663//! ```rust,ignore
664//! // This log message...
665//! ctx.log_info_with("Order event", &[
666//!     ("event_type", "ORDER_CREATED"),
667//!     ("order_id", "ORD-123"),
668//! ]);
669//!
670//! // ...produces JSON output like:
671//! // {
672//! //   "message": "Order event",
673//! //   "durable_execution_arn": "arn:aws:...",
674//! //   "extra": "event_type=ORDER_CREATED, order_id=ORD-123",
675//! //   ...
676//! // }
677//! ```
678//!
679//! ### Replay-Aware Logging
680//!
681//! The SDK supports replay-aware logging that can suppress or filter logs during replay.
682//! This is useful to reduce noise when replaying previously executed operations.
683//!
684//! ```rust
685//! use durable_execution_sdk::{TracingLogger, ReplayAwareLogger, ReplayLoggingConfig};
686//! use std::sync::Arc;
687//!
688//! // Suppress all logs during replay (default)
689//! let logger = ReplayAwareLogger::suppress_replay(Arc::new(TracingLogger));
690//!
691//! // Allow only errors during replay
692//! let logger_errors = ReplayAwareLogger::new(
693//!     Arc::new(TracingLogger),
694//!     ReplayLoggingConfig::ErrorsOnly,
695//! );
696//!
697//! // Allow all logs during replay
698//! let logger_all = ReplayAwareLogger::allow_all(Arc::new(TracingLogger));
699//! ```
700//!
701//! ### Custom Logger
702//!
703//! You can also provide a custom logger using the factory functions:
704//!
705//! ```rust
706//! use durable_execution_sdk::{custom_logger, simple_custom_logger, LogInfo};
707//!
708//! // Simple custom logger with a single function for all levels
709//! let logger = simple_custom_logger(|level, msg, info| {
710//!     println!("[{}] {}: {:?}", level, msg, info);
711//! });
712//!
713//! // Full custom logger with separate functions for each level
714//! let logger = custom_logger(
715//!     |msg, info| println!("[DEBUG] {}", msg),  // debug
716//!     |msg, info| println!("[INFO] {}", msg),   // info
717//!     |msg, info| println!("[WARN] {}", msg),   // warn
718//!     |msg, info| println!("[ERROR] {}", msg),  // error
719//! );
720//! ```
721//!
722//! ## Duration Type
723//!
724//! The SDK provides a [`Duration`] type with convenient constructors:
725//!
726//! ```rust
727//! use durable_execution_sdk::Duration;
728//!
729//! let five_seconds = Duration::from_seconds(5);
730//! let two_minutes = Duration::from_minutes(2);
731//! let one_hour = Duration::from_hours(1);
732//! let one_day = Duration::from_days(1);
733//!
734//! assert_eq!(five_seconds.to_seconds(), 5);
735//! assert_eq!(two_minutes.to_seconds(), 120);
736//! assert_eq!(one_hour.to_seconds(), 3600);
737//! assert_eq!(one_day.to_seconds(), 86400);
738//! ```
739//!
740//! ## Thread Safety
741//!
742//! The SDK is designed for use in async Rust with Tokio. All core types are
743//! `Send + Sync` and can be safely shared across async tasks:
744//!
745//! - [`DurableContext`] uses `Arc` for shared state
746//! - [`ExecutionState`] uses `RwLock` and atomic operations for thread-safe access
747//! - Operation ID generation uses atomic counters
748//!
749//! ## Best Practices
750//!
751//! 1. **Keep steps small and focused**: Each step should do one thing well.
752//!    This makes debugging easier and reduces the impact of failures.
753//!
754//! 2. **Use named operations**: Named steps and waits make logs and debugging
755//!    much easier to understand.
756//!
757//! 3. **Handle errors appropriately**: Use `DurableError::execution` for errors
758//!    that should fail the workflow, and `DurableError::invocation` for errors
759//!    that should trigger a retry.
760//!
761//! 4. **Consider idempotency**: For operations that may be retried, ensure they
762//!    are idempotent or use `AtMostOncePerRetry` semantics.
763//!
764//! 5. **Use appropriate concurrency limits**: When using `map` or `parallel`,
765//!    set `max_concurrency` to avoid overwhelming downstream services.
766//!
767//! 6. **Set reasonable timeouts**: Always configure timeouts for callbacks and
768//!    invocations to prevent workflows from hanging indefinitely.
769//!
770//! 7. **Ensure determinism**: Your workflow must execute the same sequence of
771//!    operations on every run. Avoid using `HashMap` iteration, random numbers,
772//!    or current time outside of steps. See [`docs::determinism`] for details.
773//!
774//! 8. **Use replay-safe helpers**: When you need UUIDs or timestamps, use the
775//!    helpers in [`replay_safe`] to ensure consistent values across replays.
776//!
777//! 9. **Use type-safe identifiers**: Prefer [`OperationId`], [`ExecutionArn`], and
778//!    [`CallbackId`] over raw strings to catch type mismatches at compile time.
779//!
780//! 10. **Use trait aliases**: Use [`DurableValue`] in your generic
781//!     functions for cleaner, more maintainable signatures.
782//!
783//! ## Result Type Aliases
784//!
785//! The SDK provides semantic result type aliases for cleaner function signatures:
786//!
787//! - [`DurableResult<T>`](DurableResult): Alias for `Result<T, DurableError>` - general durable operations
788//! - [`StepResult<T>`](StepResult): Alias for `Result<T, DurableError>` - step operation results
789//! - [`CheckpointResult<T>`](CheckpointResult): Alias for `Result<T, DurableError>` - checkpoint operation results
790//!
791//! ```rust,no_run
792//! use durable_execution_sdk::{DurableResult, StepResult, DurableError};
793//!
794//! // Use in function signatures for clarity
795//! fn process_order(order_id: &str) -> DurableResult<String> {
796//!     Ok(format!("Processed: {}", order_id))
797//! }
798//!
799//! fn execute_step() -> StepResult<i32> {
800//!     Ok(42)
801//! }
802//! ```
803//!
804//! ## Module Organization
805//!
806//! - [`client`]: Lambda service client for checkpoint operations
807//! - [`concurrency`]: Concurrent execution types (BatchResult, ConcurrentExecutor)
808//! - [`config`]: Configuration types for all operations
809//! - [`context`]: DurableContext, operation identifiers, and logging (includes factory functions
810//!   [`custom_logger`] and [`simple_custom_logger`] for creating custom loggers)
811//! - [`docs`]: **Documentation modules** - determinism requirements and execution limits
812//!   - [`docs::determinism`]: Understanding determinism for replay-safe workflows
813//!   - [`docs::limits`]: Execution limits and constraints
814//! - [`duration`]: Duration type with convenient constructors
815//! - [`error`]: Error types, error handling, and result type aliases ([`DurableResult`], [`StepResult`], [`CheckpointResult`])
816//! - [`handlers`]: Operation handlers (step, wait, callback, etc.)
817//! - [`lambda`]: Lambda integration types (input/output)
818//! - [`operation`]: Operation types and status enums (optimized with `#[repr(u8)]` for compact memory layout)
819//! - [`replay_safe`]: Replay-safe helpers for deterministic UUIDs and timestamps
820//! - [`serdes`]: Serialization/deserialization system (includes [`custom_serdes`] factory function)
821//! - [`state`]: Execution state and checkpointing system
822//! - [`traits`]: Trait aliases for common bounds ([`DurableValue`])
823//! - [`types`]: Type-safe newtype wrappers for domain identifiers ([`OperationId`], [`ExecutionArn`], [`CallbackId`])
824
825pub mod client;
826pub mod concurrency;
827pub mod config;
828pub mod context;
829pub mod docs;
830pub mod duration;
831pub mod error;
832pub mod handlers;
833pub mod lambda;
834#[macro_use]
835pub mod macros;
836pub mod operation;
837pub mod replay_safe;
838pub mod retry_presets;
839pub mod runtime;
840pub mod serdes;
841pub mod state;
842pub mod structured_json_logger;
843pub mod summary_generators;
844pub mod termination;
845pub mod traits;
846pub mod types;
847
848// Private module for sealed trait pattern
849mod sealed;
850
851// Re-export main types at crate root
852pub use client::{
853    CheckpointResponse, DurableServiceClient, GetOperationsResponse, LambdaClientConfig,
854    LambdaDurableServiceClient, SharedDurableServiceClient,
855};
856pub use config::*;
857pub use context::{
858    custom_logger, generate_operation_id, simple_custom_logger, CustomLogger, DurableContext,
859    LogInfo, Logger, OperationIdGenerator, OperationIdentifier, ReplayAwareLogger,
860    ReplayLoggingConfig, TracingLogger, WaitForConditionConfig, WaitForConditionContext,
861};
862pub use duration::Duration;
863pub use error::{
864    AwsError, CheckpointResult, DurableError, DurableResult, ErrorObject, StepResult,
865    TerminationReason,
866};
867pub use lambda::{
868    DurableExecutionInvocationInput, DurableExecutionInvocationOutput, InitialExecutionState,
869    InvocationStatus,
870};
871pub use operation::{
872    CallbackDetails, CallbackOptions, ChainedInvokeDetails, ChainedInvokeOptions, ContextDetails,
873    ContextOptions, ExecutionDetails, Operation, OperationAction, OperationStatus, OperationType,
874    OperationUpdate, StepDetails, StepOptions, WaitDetails, WaitOptions,
875};
876pub use serdes::{custom_serdes, CustomSerDes, JsonSerDes, SerDes, SerDesContext, SerDesError};
877pub use state::{
878    create_checkpoint_queue, CheckpointBatcher, CheckpointBatcherConfig, CheckpointRequest,
879    CheckpointSender, CheckpointedResult, ExecutionState, ReplayStatus,
880};
881
882// Re-export newtype wrappers for domain identifiers
883pub use types::{CallbackId, ExecutionArn, OperationId, ValidationError};
884
885// Re-export trait aliases for common bounds
886pub use traits::{DurableValue, StepError, StepFuture};
887
888// Re-export concurrency types
889pub use concurrency::{
890    BatchItem, BatchItemStatus, BatchResult, CompletionReason, ConcurrentExecutor,
891    ExecutionCounters,
892};
893
894// Re-export handlers
895pub use handlers::{
896    all_handler, all_settled_handler, any_handler, callback_handler, child_handler, invoke_handler,
897    map_handler, parallel_handler, race_handler, step_handler, wait_cancel_handler, wait_handler,
898    Callback, StepContext,
899};
900
901// Re-export replay-safe helpers
902pub use replay_safe::{
903    timestamp_from_execution, timestamp_seconds_from_execution, uuid_from_operation,
904    uuid_string_from_operation, uuid_to_string,
905};
906
907// Re-export runtime support
908pub use runtime::run_durable_handler;
909
910// Re-export termination manager
911pub use termination::TerminationManager;
912
913// Re-export structured JSON logger
914pub use structured_json_logger::{JsonLogContext, LogLevel, StructuredJsonLogger};
915
916// Re-export macro if enabled
917#[cfg(feature = "macros")]
918pub use durable_execution_sdk_macros::durable_execution;
durable_execution_sdk/lib.rs

durable_execution_sdk/
lib.rs