bamboo-infrastructure 2026.6.2

Infrastructure services and integrations for the Bamboo agent framework
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
//! Merge-aware session save helper.
//!
//! Provides [`merge_save_session`], which preserves any concurrent UI edits to
//! the authoritative metadata group (`title`, `title_version`, `pinned`,
//! `metadata_version`) before writing the runtime-modified session to storage.
//! Re-reads the latest persisted copy and only takes in-memory values when the
//! caller's `metadata_version` strictly exceeds disk's.
//!
//! ## Field-by-field merge policy
//!
//! All authoritative metadata fields are grouped under `metadata_version`:
//! when `disk.metadata_version >= session.metadata_version`, the on-disk
//! `title`, `title_version`, `pinned`, and `metadata_version` overwrite the
//! in-memory values before writing. Authoritative writers bump
//! `metadata_version` (and `title_version` for title edits) before calling so
//! their values survive the merge; non-authoritative writers don't bump and so
//! are overwritten by any later disk changes.
//!
//! ## Two save primitives
//!
//! - **`merge_save_session`** — stateless merge+save. Still works for
//!   non-authoritative writers that hold `Arc<dyn Storage>` directly.
//! - **`LockedSessionStore::merge_save_runtime`** — per-session-locked variant
//!   that additionally serializes writes for the same session. Prefer this for
//!   server-side paths where an authoritative writer may race with a runtime
//!   save.
//! - **`LockedSessionStore::commit_metadata`** — plain save inside a per-session
//!   lock. For authoritative writers that have already performed
//!   load→mutate→bump inside the lock; no merge needed (they hold the latest).
//!
//! Bare [`Storage::save_session`] is reserved for first-write paths (e.g. new
//! session creation) where there is no prior on-disk copy to merge against.

use std::sync::Arc;

use bamboo_domain::session::types::Session;
use bamboo_domain::storage::Storage;
use bamboo_domain::RuntimeSessionPersistence;
use dashmap::DashMap;
use tokio::sync::{Mutex, OwnedMutexGuard};

const AUTHORITATIVE_METADATA_KEYS: &[&str] = &["gold_config"];

// ── LockedSessionStore ────────────────────────────────────────────────

/// Wraps a [`Storage`] implementation with per-session write serialization.
///
/// Under the hood it maintains a `DashMap<String, Arc<Mutex<()>>>` so that
/// only writes targeting the *same* session are serialised; different
/// sessions proceed concurrently.
pub struct LockedSessionStore {
    storage: Arc<dyn Storage>,
    locks: Arc<DashMap<String, Arc<Mutex<()>>>>,
}

impl LockedSessionStore {
    /// Wrap an existing storage backend.
    pub fn new(storage: Arc<dyn Storage>) -> Self {
        Self {
            storage,
            locks: Arc::new(DashMap::new()),
        }
    }

    /// Borrow the inner storage for read-only access.
    pub fn storage(&self) -> &Arc<dyn Storage> {
        &self.storage
    }

    /// Acquire a per-session serialization guard.
    ///
    /// Only writes for the **same** session are serialised; writes for
    /// different sessions can proceed concurrently.
    pub async fn acquire_lock(&self, session_id: &str) -> OwnedMutexGuard<()> {
        let lock = self
            .locks
            .entry(session_id.to_string())
            .or_insert_with(|| Arc::new(Mutex::new(())))
            .clone();
        lock.lock_owned().await
    }

    /// Authoritative metadata commit.
    ///
    /// The caller must have already loaded the latest session, mutated the
    /// metadata fields, and bumped `metadata_version` (and `title_version` if
    /// applicable).  This method simply acquires the per-session lock and
    /// performs a plain `storage.save_session`.
    ///
    /// The lock guarantees that no other write for this session interleaves
    /// between the caller's load and this save, so merge is unnecessary.
    pub async fn commit_metadata(&self, session: &Session) -> std::io::Result<()> {
        let _guard = self.acquire_lock(&session.id).await;
        self.storage.save_session(session).await
    }

    /// Runtime / non-authoritative save with per-session lock.
    ///
    /// Inside the lock: reload disk, merge the authoritative metadata group
    /// (`title`, `title_version`, `pinned`, `metadata_version`) from disk into
    /// the in-memory copy if disk's `metadata_version >= session.metadata_version`,
    /// then save.
    ///
    /// This is the locked equivalent of [`merge_save_session`]; prefer it for
    /// server-side paths where an authoritative write may race with this save.
    pub async fn merge_save_runtime(&self, session: &mut Session) -> std::io::Result<()> {
        let _guard = self.acquire_lock(&session.id).await;

        // DIAGNOSTIC: merge_save_runtime overwrites the whole `messages` array
        // (it only merges authoritative metadata, not messages). If the incoming
        // session is stale (fewer messages than what is already on disk), this save
        // silently reverts a concurrent append (e.g. a just-persisted user message).
        // Log a SHRINK warning so we can identify the stale writer.
        let existing_message_count = self
            .storage
            .load_session(&session.id)
            .await
            .ok()
            .flatten()
            .map(|s| s.messages.len());
        let incoming_message_count = session.messages.len();
        if existing_message_count.is_some_and(|existing| existing > incoming_message_count) {
            tracing::warn!(
                "[{}] merge_save_runtime SHRINK: disk has {:?} messages, saving {} (last_role={:?}, updated_at={}); a stale writer is reverting a concurrent append",
                session.id,
                existing_message_count,
                incoming_message_count,
                session.messages.last().map(|m| format!("{:?}", m.role)),
                session.updated_at,
            );
        } else {
            tracing::debug!(
                "[{}] merge_save_runtime: disk={:?} messages, saving {} (updated_at={})",
                session.id,
                existing_message_count,
                incoming_message_count,
                session.updated_at,
            );
        }

        merge_authoritative_metadata_into_stale(&self.storage, session).await;
        self.storage.save_session(session).await
    }

    /// Apply a config-only mutation to a session without ever clobbering its
    /// `messages` (or other concurrently-written state).
    ///
    /// Unlike [`Self::merge_save_runtime`], the caller does NOT pass a session
    /// snapshot. Instead this loads the **latest** session from storage *inside*
    /// the per-session lock, applies `mutate` (intended for small config fields
    /// like `model_ref` / `reasoning_effort`), and saves. Because the load and
    /// save both happen under the lock, a concurrent append (e.g. `POST /chat`
    /// adding a user message) can never be reverted by this write.
    ///
    /// Returns the saved session, or `None` if it does not exist.
    pub async fn update_runtime_config<F>(
        &self,
        session_id: &str,
        mutate: F,
    ) -> std::io::Result<Option<Session>>
    where
        F: FnOnce(&mut Session),
    {
        let _guard = self.acquire_lock(session_id).await;
        let Some(mut session) = self.storage.load_session(session_id).await? else {
            return Ok(None);
        };
        mutate(&mut session);
        self.storage.save_session(&session).await?;
        Ok(Some(session))
    }
}

/// Infrastructure implementation of the domain runtime-persistence port.
/// Server should assemble this as `Arc<dyn RuntimeSessionPersistence>` and must
/// not define a separate adapter layer for the same behavior.
#[async_trait::async_trait]
impl RuntimeSessionPersistence for LockedSessionStore {
    async fn save_runtime_session(&self, session: &mut Session) -> std::io::Result<()> {
        self.merge_save_runtime(session).await
    }
}

// ── Internal merge helper ─────────────────────────────────────────────

/// Re-read the on-disk session and, when the disk copy carries a
/// `metadata_version >= session.metadata_version`, overwrite the in-memory
/// authoritative metadata fields with the disk values.
///
/// This is the core staleness-correction: non-authoritative writers call it
/// before saving so they don't accidentally revert a concurrent UI edit.
async fn merge_authoritative_metadata_into_stale(
    storage: &Arc<dyn Storage>,
    session: &mut Session,
) {
    if let Ok(Some(latest)) = storage.load_session(&session.id).await {
        if latest.metadata_version >= session.metadata_version {
            session.title = latest.title;
            session.title_version = latest.title_version;
            session.pinned = latest.pinned;
            for key in AUTHORITATIVE_METADATA_KEYS {
                if let Some(value) = latest.metadata.get(*key) {
                    session.metadata.insert((*key).to_string(), value.clone());
                } else {
                    session.metadata.remove(*key);
                }
            }
            session.metadata_version = latest.metadata_version;
        }
    }
}

// ── Free merge-save function ──────────────────────────────────────────

/// Save a session while preserving any concurrent UI edits to the
/// authoritative metadata group.
///
/// Behaviour: if the on-disk session has `metadata_version >=
/// session.metadata_version`, the on-disk `title`, `title_version`, `pinned`
/// and `metadata_version` overwrite the in-memory values before writing.
///
/// This is the stateless variant (no per-session lock). Prefer
/// [`LockedSessionStore::merge_save_runtime`] for server-side paths where an
/// authoritative writer may race with this save.
pub async fn merge_save_session(
    storage: &Arc<dyn Storage>,
    session: &mut Session,
) -> std::io::Result<()> {
    merge_authoritative_metadata_into_stale(storage, session).await;
    storage.save_session(session).await
}

// ── Tests ─────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use crate::storage::v2::SessionStoreV2;
    use bamboo_domain::session::types::Session;

    async fn make_storage() -> (tempfile::TempDir, Arc<dyn Storage>) {
        let temp = tempfile::tempdir().unwrap();
        let storage = SessionStoreV2::new(temp.path().to_path_buf())
            .await
            .expect("storage init");
        (temp, Arc::new(storage) as Arc<dyn Storage>)
    }

    fn fresh(id: &str) -> Session {
        Session::new(id.to_string(), "test-model".to_string())
    }

    // ── update_runtime_config: config patches must never clobber messages ──

    #[tokio::test]
    async fn update_runtime_config_preserves_concurrently_appended_messages() {
        use bamboo_domain::session::types::Message;
        use bamboo_domain::ReasoningEffort;

        let (_temp, storage) = make_storage().await;
        let store = LockedSessionStore::new(storage.clone());
        let session_id = "cfg-preserve";

        // Persisted baseline: one user + one assistant turn.
        let mut initial = fresh(session_id);
        initial.add_message(Message::user("hello"));
        initial.add_message(Message::assistant("hi", None));
        storage.save_session(&initial).await.unwrap();

        // Simulate `POST /chat` appending a new user message to disk.
        let mut after_chat = storage.load_session(session_id).await.unwrap().unwrap();
        after_chat.add_message(Message::user("second question"));
        storage.save_session(&after_chat).await.unwrap();
        assert_eq!(after_chat.messages.len(), 3);

        // A config-only patch must load the freshest session and preserve the
        // appended message (this is the regression that broke message sending on
        // existing sessions).
        let updated = store
            .update_runtime_config(session_id, |s| {
                s.reasoning_effort = Some(ReasoningEffort::Max);
            })
            .await
            .unwrap()
            .expect("session exists");

        assert_eq!(updated.reasoning_effort, Some(ReasoningEffort::Max));
        assert_eq!(
            updated.messages.len(),
            3,
            "config patch must not revert a concurrently-appended message"
        );

        let on_disk = storage.load_session(session_id).await.unwrap().unwrap();
        assert_eq!(on_disk.messages.len(), 3);
        assert_eq!(on_disk.reasoning_effort, Some(ReasoningEffort::Max));
    }

    #[tokio::test]
    async fn update_runtime_config_returns_none_for_missing_session() {
        use bamboo_domain::ReasoningEffort;

        let (_temp, storage) = make_storage().await;
        let store = LockedSessionStore::new(storage);
        let result = store
            .update_runtime_config("does-not-exist", |s| {
                s.reasoning_effort = Some(ReasoningEffort::Low);
            })
            .await
            .unwrap();
        assert!(result.is_none());
    }

    #[tokio::test]
    async fn merge_save_runtime_overwrites_messages_from_stale_snapshot() {
        // Characterization of the bug that motivated `update_runtime_config`:
        // `merge_save_runtime` writes the caller's `messages` verbatim, so a
        // stale snapshot reverts a concurrent append. Config-only writers must
        // therefore use `update_runtime_config`, never `merge_save_runtime`.
        use bamboo_domain::session::types::Message;

        let (_temp, storage) = make_storage().await;
        let store = LockedSessionStore::new(storage.clone());
        let session_id = "stale-clobber";

        // A handler loads the session (1 message) …
        let mut baseline = fresh(session_id);
        baseline.add_message(Message::user("hello"));
        storage.save_session(&baseline).await.unwrap();
        let mut stale_snapshot = storage.load_session(session_id).await.unwrap().unwrap();

        // … then `POST /chat` appends a second message to disk …
        let mut after_chat = storage.load_session(session_id).await.unwrap().unwrap();
        after_chat.add_message(Message::user("second"));
        storage.save_session(&after_chat).await.unwrap();
        assert_eq!(
            storage
                .load_session(session_id)
                .await
                .unwrap()
                .unwrap()
                .messages
                .len(),
            2
        );

        // … and the stale handler saves via merge_save_runtime -> append reverted.
        store.merge_save_runtime(&mut stale_snapshot).await.unwrap();
        let after = storage.load_session(session_id).await.unwrap().unwrap();
        assert_eq!(
            after.messages.len(),
            1,
            "merge_save_runtime clobbers concurrent appends — this is why config patches must use update_runtime_config"
        );
    }

    // ── Free-function merge tests (updated for metadata-group) ──────

    #[tokio::test]
    async fn merge_preserves_disk_title_when_versions_equal() {
        let (_temp, storage) = make_storage().await;
        let session_id = "merge-equal";

        let mut on_disk = fresh(session_id);
        on_disk.title = "User Set This".to_string();
        on_disk.title_version = 0;
        on_disk.metadata_version = 0;
        storage.save_session(&on_disk).await.unwrap();

        let mut runtime_copy = fresh(session_id);
        runtime_copy.title = "Stale Default".to_string();
        runtime_copy.title_version = 0;
        runtime_copy.metadata_version = 0;
        runtime_copy.messages = vec![];

        merge_save_session(&storage, &mut runtime_copy)
            .await
            .unwrap();

        let after = storage.load_session(session_id).await.unwrap().unwrap();
        assert_eq!(after.title, "User Set This");
        assert_eq!(after.title_version, 0);
        assert_eq!(runtime_copy.title, "User Set This");
    }

    #[tokio::test]
    async fn merge_preserves_disk_when_disk_version_higher() {
        let (_temp, storage) = make_storage().await;
        let session_id = "merge-higher";

        let mut on_disk = fresh(session_id);
        on_disk.title = "User Title v3".to_string();
        on_disk.title_version = 3;
        on_disk.metadata_version = 5;
        storage.save_session(&on_disk).await.unwrap();

        let mut runtime_copy = fresh(session_id);
        runtime_copy.title = "Stale".to_string();
        runtime_copy.title_version = 1;
        runtime_copy.metadata_version = 0;

        merge_save_session(&storage, &mut runtime_copy)
            .await
            .unwrap();

        let after = storage.load_session(session_id).await.unwrap().unwrap();
        assert_eq!(after.title, "User Title v3");
        assert_eq!(after.title_version, 3);
        assert_eq!(after.metadata_version, 5);
    }

    #[tokio::test]
    async fn merge_now_preserves_disk_pinned_in_metadata_group() {
        let (_temp, storage) = make_storage().await;
        let session_id = "pinned-merge";

        let mut on_disk = fresh(session_id);
        on_disk.pinned = true;
        on_disk.metadata_version = 2;
        storage.save_session(&on_disk).await.unwrap();

        let mut runtime_copy = fresh(session_id);
        runtime_copy.pinned = false;
        runtime_copy.metadata_version = 0;

        merge_save_session(&storage, &mut runtime_copy)
            .await
            .unwrap();

        let after = storage.load_session(session_id).await.unwrap().unwrap();
        assert!(
            after.pinned,
            "disk pinned=true should win over runtime false"
        );
        assert_eq!(after.metadata_version, 2);
    }

    #[tokio::test]
    async fn merge_keeps_in_memory_when_session_version_higher() {
        let (_temp, storage) = make_storage().await;
        let session_id = "merge-bumped";

        let mut on_disk = fresh(session_id);
        on_disk.title = "Old".to_string();
        on_disk.title_version = 1;
        on_disk.metadata_version = 3;
        storage.save_session(&on_disk).await.unwrap();

        let mut authoritative_copy = fresh(session_id);
        authoritative_copy.title = "New Authoritative".to_string();
        authoritative_copy.title_version = 2;
        authoritative_copy.metadata_version = 4;
        authoritative_copy.pinned = true;

        merge_save_session(&storage, &mut authoritative_copy)
            .await
            .unwrap();

        let after = storage.load_session(session_id).await.unwrap().unwrap();
        assert_eq!(after.title, "New Authoritative");
        assert_eq!(after.title_version, 2);
        assert_eq!(after.metadata_version, 4);
        assert!(after.pinned);
    }

    #[tokio::test]
    async fn merge_keeps_runtime_messages_when_disk_only_changed_metadata() {
        let (_temp, storage) = make_storage().await;
        let session_id = "merge-messages";

        let mut on_disk = fresh(session_id);
        on_disk.title = "Fresh Title".to_string();
        on_disk.title_version = 2;
        on_disk.metadata_version = 5;
        storage.save_session(&on_disk).await.unwrap();

        let mut runtime_copy = fresh(session_id);
        runtime_copy.title = "Stale".to_string();
        runtime_copy.metadata_version = 0;
        runtime_copy.messages = vec![bamboo_domain::session::types::Message {
            role: bamboo_domain::session::types::Role::User,
            content: "keep me".to_string(),
            id: "msg-1".to_string(),
            created_at: chrono::Utc::now(),
            reasoning: None,
            content_parts: None,
            image_ocr: None,
            phase: None,
            tool_calls: None,
            tool_call_id: None,
            tool_success: None,
            compressed: false,
            compressed_by_event_id: None,
            never_compress: false,
            compression_level: 0,
            metadata: None,
        }];

        merge_save_session(&storage, &mut runtime_copy)
            .await
            .unwrap();

        let after = storage.load_session(session_id).await.unwrap().unwrap();
        assert_eq!(after.title, "Fresh Title");
        assert_eq!(after.metadata_version, 5);
        assert_eq!(after.messages.len(), 1);
        assert_eq!(after.messages[0].content, "keep me");
    }

    // ── LockedSessionStore tests ────────────────────────────────────

    #[tokio::test]
    async fn locked_merge_save_runtime_serialises_concurrent_writes() {
        let (_temp, storage) = make_storage().await;
        let store = Arc::new(LockedSessionStore::new(storage));
        let session_id = "lock-serial".to_string();

        // Seed with base version.
        let base = fresh(&session_id);
        store.storage().save_session(&base).await.unwrap();

        // Two concurrent authorised writers each bump and commit.
        // We'll simulate via clone-and-bump-then-commit.
        let store_a = store.clone();
        let store_b = store.clone();
        let sid_a = session_id.clone();
        let sid_b = session_id.clone();

        let a = tokio::spawn(async move {
            let _guard = store_a.acquire_lock(&sid_a).await;
            let mut s = store_a
                .storage()
                .load_session(&sid_a)
                .await
                .unwrap()
                .unwrap();
            s.title = "Writer A".to_string();
            s.title_version = s.title_version.saturating_add(1);
            s.metadata_version = s.metadata_version.saturating_add(1);
            s.updated_at = chrono::Utc::now();
            store_a.storage().save_session(&s).await.unwrap();
            s.title_version
        });

        // Tiny yield so A goes first.
        tokio::time::sleep(std::time::Duration::from_millis(10)).await;

        let b = tokio::spawn(async move {
            let _guard = store_b.acquire_lock(&sid_b).await;
            let mut s = store_b
                .storage()
                .load_session(&sid_b)
                .await
                .unwrap()
                .unwrap();
            s.title = "Writer B".to_string();
            s.title_version = s.title_version.saturating_add(1);
            s.metadata_version = s.metadata_version.saturating_add(1);
            s.updated_at = chrono::Utc::now();
            store_b.storage().save_session(&s).await.unwrap();
            s.title_version
        });

        let (ver_a, ver_b) = tokio::join!(a, b);
        let final_s = store
            .storage()
            .load_session(&session_id)
            .await
            .unwrap()
            .unwrap();
        assert!(
            ver_a.unwrap() != ver_b.unwrap(),
            "concurrent writers must produce distinct versions"
        );
        assert_eq!(final_s.metadata_version, 2);
    }

    #[tokio::test]
    async fn commit_metadata_is_plain_save_inside_lock() {
        let (_temp, storage) = make_storage().await;
        let store = LockedSessionStore::new(storage);
        let session_id = "commit-plain";

        let mut s = fresh(session_id);
        s.title = "Committed".to_string();
        s.metadata_version = 1;
        s.title_version = 2;

        store.commit_metadata(&s).await.unwrap();

        let after = store
            .storage()
            .load_session(session_id)
            .await
            .unwrap()
            .unwrap();
        assert_eq!(after.title, "Committed");
        assert_eq!(after.metadata_version, 1);
        assert_eq!(after.title_version, 2);
    }
}