blazen-core 0.1.106

Core workflow engine for Blazen - event-driven, async, pausable workflows
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
//! Shared workflow state accessible by all steps.
//!
//! [`Context`] wraps an `Arc<RwLock<ContextInner>>` so it can be cheaply
//! cloned and shared across concurrent step executions. It provides:
//!
//! - Typed key/value state storage (backed by JSON for serializability)
//! - Event emission to the internal routing queue
//! - Fan-in event collection
//! - Publishing events to the external streaming channel
//! - Workflow metadata (e.g. run ID)
//! - State snapshotting and restoration for pause/resume/checkpoint

use std::collections::HashMap;
use std::sync::Arc;

use blazen_events::{AnyEvent, Event, EventEnvelope};
use serde::Serialize;
use serde::de::DeserializeOwned;
use tokio::sync::{RwLock, broadcast, mpsc};
use uuid::Uuid;

use crate::value::{BytesWrapper, StateValue};

/// Type alias for the state map (supports both JSON and binary values).
type StateMap = HashMap<String, StateValue>;

/// Internal state behind the `Arc<RwLock<_>>`.
struct ContextInner {
    /// JSON-serialized key/value store shared across all steps.
    state: StateMap,
    /// Sender side of the internal event routing channel.
    event_tx: mpsc::UnboundedSender<EventEnvelope>,
    /// Sender side of the external broadcast channel for streaming.
    stream_tx: broadcast::Sender<Box<dyn AnyEvent>>,
    /// Fan-in accumulator keyed by event type string.
    collected: HashMap<String, Vec<serde_json::Value>>,
    /// Arbitrary JSON metadata (e.g. `run_id`, workflow name).
    metadata: HashMap<String, serde_json::Value>,
}

/// Shared workflow context.
///
/// Cheaply clonable handle to the shared state. Every step receives a
/// `Context` and can read/write state, emit events, and publish to the
/// external stream.
///
/// State values are stored as JSON internally, enabling serialization for
/// pause/resume/checkpoint functionality. Users can still use ergonomic
/// typed accessors (`set`/`get`) as long as their types implement
/// `Serialize`/`DeserializeOwned`.
#[derive(Clone)]
pub struct Context {
    inner: Arc<RwLock<ContextInner>>,
}

impl Context {
    // -----------------------------------------------------------------
    // Construction (crate-internal)
    // -----------------------------------------------------------------

    /// Create a new context wired to the given channels.
    pub(crate) fn new(
        event_tx: mpsc::UnboundedSender<EventEnvelope>,
        stream_tx: broadcast::Sender<Box<dyn AnyEvent>>,
    ) -> Self {
        Self {
            inner: Arc::new(RwLock::new(ContextInner {
                state: HashMap::new(),
                event_tx,
                stream_tx,
                collected: HashMap::new(),
                metadata: HashMap::new(),
            })),
        }
    }

    // -----------------------------------------------------------------
    // Public state accessors
    // -----------------------------------------------------------------

    /// Store a typed value under `key`.
    ///
    /// The value is serialized to JSON before storage. Overwrites any
    /// previous value stored under the same key regardless of its type.
    ///
    /// # Panics
    ///
    /// Panics if the value cannot be serialized to JSON. In practice this
    /// should never happen for well-formed serde types.
    pub async fn set<T: Serialize + Send + Sync + 'static>(&self, key: &str, value: T) {
        let json_value =
            serde_json::to_value(&value).expect("Context::set: value must be JSON-serializable");
        let mut inner = self.inner.write().await;
        inner
            .state
            .insert(key.to_owned(), StateValue::Json(json_value));
    }

    /// Retrieve a typed value previously stored under `key`.
    ///
    /// The stored JSON is deserialized back into type `T`. Returns `None`
    /// if the key does not exist or the stored JSON cannot be deserialized
    /// into `T`.
    pub async fn get<T: DeserializeOwned + Send + Sync + Clone + 'static>(
        &self,
        key: &str,
    ) -> Option<T> {
        let inner = self.inner.read().await;
        inner.state.get(key).and_then(|sv| match sv {
            StateValue::Json(v) => serde_json::from_value::<T>(v.clone()).ok(),
            StateValue::Bytes(_) => None,
        })
    }

    /// Store raw binary data under `key`.
    ///
    /// Useful for files, images, audio, and other binary artifacts that
    /// should not be JSON-serialized.
    pub async fn set_bytes(&self, key: &str, data: Vec<u8>) {
        let mut inner = self.inner.write().await;
        inner
            .state
            .insert(key.to_owned(), StateValue::Bytes(BytesWrapper(data)));
    }

    /// Retrieve raw binary data previously stored under `key`.
    ///
    /// Returns `None` if the key does not exist or the stored value is
    /// a JSON variant rather than bytes.
    pub async fn get_bytes(&self, key: &str) -> Option<Vec<u8>> {
        let inner = self.inner.read().await;
        inner.state.get(key).and_then(|sv| match sv {
            StateValue::Bytes(b) => Some(b.0.clone()),
            StateValue::Json(_) => None,
        })
    }

    // -----------------------------------------------------------------
    // Event emission
    // -----------------------------------------------------------------

    /// Emit an event into the internal routing queue.
    ///
    /// The event will be picked up by the event loop and routed to any
    /// step whose `accepts` list includes its event type.
    pub async fn send_event<E: Event + Serialize>(&self, event: E) {
        let inner = self.inner.read().await;
        let envelope = EventEnvelope::new(Box::new(event), None);
        // Ignore send errors -- the receiver may have been dropped if the
        // workflow already terminated.
        let _ = inner.event_tx.send(envelope);
    }

    /// Publish an event to the external broadcast stream.
    ///
    /// Consumers that called [`crate::WorkflowHandler::stream_events`] will
    /// receive this event. Unlike [`send_event`](Self::send_event), this does
    /// **not** route the event through the internal step registry.
    pub async fn write_event_to_stream<E: Event + Serialize>(&self, event: E) {
        let inner = self.inner.read().await;
        // Ignore send errors -- there may be no active subscribers.
        let _ = inner.stream_tx.send(Box::new(event));
    }

    // -----------------------------------------------------------------
    // Fan-in collection
    // -----------------------------------------------------------------

    /// Accumulate events of type `E` until `expected_count` are available.
    ///
    /// Returns `Some(Vec<E>)` when exactly `expected_count` events have been
    /// collected, or `None` if not enough have arrived yet.
    ///
    /// Once the threshold is reached the internal buffer for this type is
    /// cleared automatically so a subsequent call starts fresh.
    pub async fn collect_events<E: Event + DeserializeOwned>(
        &self,
        expected_count: usize,
    ) -> Option<Vec<E>> {
        let mut inner = self.inner.write().await;
        let type_key = E::event_type().to_owned();

        let collected = inner.collected.entry(type_key).or_default();
        if collected.len() >= expected_count {
            let drained: Vec<serde_json::Value> = collected.drain(..expected_count).collect();
            let mut results = Vec::with_capacity(drained.len());
            for json_val in drained {
                if let Ok(concrete) = serde_json::from_value::<E>(json_val) {
                    results.push(concrete);
                }
            }
            Some(results)
        } else {
            None
        }
    }

    /// Push a type-erased event into the fan-in accumulator.
    ///
    /// The event is serialized to JSON and stored under its event type
    /// string (obtained via `AnyEvent::event_type_id`).
    pub(crate) async fn push_collected(&self, event: &dyn AnyEvent) {
        let mut inner = self.inner.write().await;
        let type_key = event.event_type_id().to_owned();
        let json_val = event.to_json();
        inner.collected.entry(type_key).or_default().push(json_val);
    }

    /// Clear the collection buffer for a specific event type.
    #[allow(dead_code)]
    pub(crate) async fn clear_collected<E: Event>(&self) {
        let mut inner = self.inner.write().await;
        let type_key = E::event_type().to_owned();
        inner.collected.remove(&type_key);
    }

    // -----------------------------------------------------------------
    // Snapshotting & restoration
    // -----------------------------------------------------------------

    /// Returns a clone of the entire state map.
    ///
    /// Useful for checkpointing or pausing a workflow so it can be
    /// resumed later.
    pub async fn snapshot_state(&self) -> HashMap<String, StateValue> {
        let inner = self.inner.read().await;
        inner.state.clone()
    }

    /// Replace the state map wholesale.
    ///
    /// Used to restore state from a previous checkpoint. Any existing
    /// state is discarded.
    pub async fn restore_state(&self, state: HashMap<String, StateValue>) {
        let mut inner = self.inner.write().await;
        inner.state = state;
    }

    /// Returns a clone of the collected events map (serialized as JSON).
    ///
    /// Useful for checkpointing fan-in state alongside the main state map.
    pub async fn snapshot_collected(&self) -> HashMap<String, Vec<serde_json::Value>> {
        let inner = self.inner.read().await;
        inner.collected.clone()
    }

    /// Replace the collected events map wholesale.
    ///
    /// Used to restore fan-in state from a previous checkpoint. Any existing
    /// collected events are discarded.
    pub async fn restore_collected(&self, collected: HashMap<String, Vec<serde_json::Value>>) {
        let mut inner = self.inner.write().await;
        inner.collected = collected;
    }

    /// Returns a clone of the metadata map.
    ///
    /// Useful for checkpointing metadata alongside the main state map.
    pub async fn snapshot_metadata(&self) -> HashMap<String, serde_json::Value> {
        let inner = self.inner.read().await;
        inner.metadata.clone()
    }

    /// Replace the metadata map wholesale.
    ///
    /// Used to restore metadata from a previous checkpoint. Any existing
    /// metadata is discarded.
    pub(crate) async fn restore_metadata(&self, metadata: HashMap<String, serde_json::Value>) {
        let mut inner = self.inner.write().await;
        inner.metadata = metadata;
    }

    // -----------------------------------------------------------------
    // Metadata
    // -----------------------------------------------------------------

    /// Get the workflow run ID from metadata.
    ///
    /// # Panics
    ///
    /// Panics if the `run_id` metadata key was never set (this is always
    /// set by the workflow engine before any step executes).
    pub async fn run_id(&self) -> Uuid {
        let inner = self.inner.read().await;
        inner
            .metadata
            .get("run_id")
            .and_then(|v| v.as_str())
            .and_then(|s| Uuid::parse_str(s).ok())
            .expect("run_id must be set in workflow metadata")
    }

    /// Store a metadata key/value pair.
    pub(crate) async fn set_metadata(&self, key: &str, value: serde_json::Value) {
        let mut inner = self.inner.write().await;
        inner.metadata.insert(key.to_owned(), value);
    }

    /// Send a sentinel event through the broadcast stream to signal that
    /// no more events will be published.
    ///
    /// Consumers that check for `"blazen::StreamEnd"` can use this to
    /// terminate their iteration.
    pub(crate) async fn signal_stream_end(&self) {
        self.write_event_to_stream(blazen_events::DynamicEvent {
            event_type: "blazen::StreamEnd".to_owned(),
            data: serde_json::Value::Null,
        })
        .await;
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Helper to build a context with throw-away channels.
    fn test_context() -> Context {
        let (event_tx, _event_rx) = mpsc::unbounded_channel();
        let (stream_tx, _stream_rx) = broadcast::channel(16);
        Context::new(event_tx, stream_tx)
    }

    #[tokio::test]
    async fn set_and_get_typed_value() {
        let ctx = test_context();
        ctx.set("counter", 42_u64).await;
        assert_eq!(ctx.get::<u64>("counter").await, Some(42));
    }

    #[tokio::test]
    async fn get_wrong_type_returns_none() {
        let ctx = test_context();
        ctx.set("counter", 42_u64).await;
        // JSON number 42 can be deserialized as a String? No -- serde_json
        // will fail to parse a number as a String, so this returns None.
        assert_eq!(ctx.get::<String>("counter").await, None);
    }

    #[tokio::test]
    async fn get_missing_key_returns_none() {
        let ctx = test_context();
        assert_eq!(ctx.get::<u64>("nope").await, None);
    }

    #[tokio::test]
    async fn run_id_roundtrip() {
        let ctx = test_context();
        let id = Uuid::new_v4();
        ctx.set_metadata("run_id", serde_json::Value::String(id.to_string()))
            .await;
        assert_eq!(ctx.run_id().await, id);
    }

    #[tokio::test]
    async fn collect_events_accumulation() {
        use blazen_events::StartEvent;

        let ctx = test_context();
        let e1 = StartEvent {
            data: serde_json::json!(1),
        };
        let e2 = StartEvent {
            data: serde_json::json!(2),
        };

        ctx.push_collected(&e1).await;
        // Not enough yet.
        assert!(ctx.collect_events::<StartEvent>(2).await.is_none());

        ctx.push_collected(&e2).await;
        // Now we have 2.
        let events = ctx.collect_events::<StartEvent>(2).await.unwrap();
        assert_eq!(events.len(), 2);
        assert_eq!(events[0].data, serde_json::json!(1));
        assert_eq!(events[1].data, serde_json::json!(2));
    }

    #[tokio::test]
    async fn snapshot_and_restore_state() {
        let ctx = test_context();
        ctx.set("name", "alice".to_string()).await;
        ctx.set("count", 10_u32).await;

        // Snapshot
        let snap = ctx.snapshot_state().await;
        assert_eq!(snap.len(), 2);
        assert_eq!(
            snap.get("name").unwrap(),
            &StateValue::Json(serde_json::json!("alice"))
        );
        assert_eq!(
            snap.get("count").unwrap(),
            &StateValue::Json(serde_json::json!(10))
        );

        // Modify state
        ctx.set("name", "bob".to_string()).await;
        assert_eq!(ctx.get::<String>("name").await, Some("bob".to_string()));

        // Restore
        ctx.restore_state(snap).await;
        assert_eq!(ctx.get::<String>("name").await, Some("alice".to_string()));
        assert_eq!(ctx.get::<u32>("count").await, Some(10));
    }

    #[tokio::test]
    async fn set_and_get_bytes() {
        let ctx = test_context();
        let data = vec![0xDE, 0xAD, 0xBE, 0xEF];
        ctx.set_bytes("binary", data.clone()).await;

        assert_eq!(ctx.get_bytes("binary").await, Some(data));
        // get<T> should return None for bytes values.
        assert_eq!(ctx.get::<String>("binary").await, None);
    }

    #[tokio::test]
    async fn get_bytes_returns_none_for_json() {
        let ctx = test_context();
        ctx.set("key", "value".to_string()).await;
        assert_eq!(ctx.get_bytes("key").await, None);
    }

    #[tokio::test]
    async fn get_bytes_returns_none_for_missing_key() {
        let ctx = test_context();
        assert_eq!(ctx.get_bytes("nope").await, None);
    }

    #[tokio::test]
    async fn snapshot_collected() {
        use blazen_events::StartEvent;

        let ctx = test_context();
        let e1 = StartEvent {
            data: serde_json::json!("a"),
        };
        ctx.push_collected(&e1).await;

        let snap = ctx.snapshot_collected().await;
        assert_eq!(snap.len(), 1);
        let start_events = snap.get("blazen::StartEvent").unwrap();
        assert_eq!(start_events.len(), 1);
    }
}