kiromi-ai-memory 0.2.2

Local-first multi-tenant memory store engine: Markdown/text content on object storage, metadata in SQLite, plugin-shaped embedder/storage/metadata, hybrid text+vector search.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
// SPDX-License-Identifier: Apache-2.0 OR MIT
//! `Memory::builder()` and the runtime open path.

use std::sync::Arc;

use crate::embedder::Embedder;
use crate::error::{Error, Result};
use crate::handle::{Memory, MemoryInner};
use crate::metadata::{MetadataStore, SchemaMeta};
use crate::partition::PartitionScheme;
use crate::storage::Storage;
use crate::tenancy::TenantLocks;
use crate::tenant::TenantId;
use crate::util::{Clock, SystemClock};

/// Pre-open builder.
pub struct Builder {
    pub(crate) storage: Option<Box<dyn Storage>>,
    pub(crate) metadata: Option<Box<dyn MetadataStore>>,
    pub(crate) embedder: Option<Box<dyn Embedder>>,
    pub(crate) vector_index: Option<Box<dyn crate::index::VectorIndex>>,
    pub(crate) lexical_index: Option<Box<dyn crate::index::LexicalIndex>>,
    pub(crate) tenant: Option<TenantId>,
    pub(crate) scheme: Option<PartitionScheme>,
    pub(crate) actor: Option<String>,
    pub(crate) clock: Box<dyn Clock>,
    pub(crate) event_capacity: Option<usize>,
}

impl Default for Builder {
    fn default() -> Self {
        Self {
            storage: None,
            metadata: None,
            embedder: None,
            vector_index: None,
            lexical_index: None,
            tenant: None,
            scheme: None,
            actor: None,
            clock: Box::new(SystemClock),
            event_capacity: None,
        }
    }
}

impl Builder {
    /// Set storage backend.
    #[must_use]
    pub fn storage(mut self, s: impl Storage) -> Self {
        self.storage = Some(Box::new(s));
        self
    }

    /// Set metadata store.
    #[must_use]
    pub fn metadata(mut self, m: impl MetadataStore) -> Self {
        self.metadata = Some(Box::new(m));
        self
    }

    /// Set embedder.
    #[must_use]
    pub fn embedder(mut self, e: impl Embedder) -> Self {
        self.embedder = Some(Box::new(e));
        self
    }

    /// Set storage from a pre-boxed trait object.
    ///
    /// Useful when callers (the CLI in particular) already have a
    /// `Box<dyn Storage>` from a URI-driven factory and the typed
    /// [`Builder::storage`] setter would force them to re-erase.
    #[must_use]
    pub fn storage_boxed(mut self, s: Box<dyn Storage>) -> Self {
        self.storage = Some(s);
        self
    }

    /// Set metadata from a pre-boxed trait object.
    #[must_use]
    pub fn metadata_boxed(mut self, m: Box<dyn MetadataStore>) -> Self {
        self.metadata = Some(m);
        self
    }

    /// Set embedder from a pre-boxed trait object.
    #[must_use]
    pub fn embedder_boxed(mut self, e: Box<dyn Embedder>) -> Self {
        self.embedder = Some(e);
        self
    }

    /// Plan 18 phase D: override the `VectorIndex` impl. Defaults to
    /// [`crate::index::SqliteVecIndex`] when the metadata store exposes a
    /// SQLite pool.
    #[must_use]
    pub fn vector_index(mut self, vi: impl crate::index::VectorIndex) -> Self {
        self.vector_index = Some(Box::new(vi));
        self
    }

    /// Plan 18 phase D: override the `LexicalIndex` impl. Defaults to
    /// [`crate::index::Fts5Index`] when the metadata store exposes a SQLite
    /// pool.
    #[must_use]
    pub fn lexical_index(mut self, li: impl crate::index::LexicalIndex) -> Self {
        self.lexical_index = Some(Box::new(li));
        self
    }

    /// Plan 18 phase D: set the `VectorIndex` from a pre-boxed trait object.
    #[must_use]
    pub fn vector_index_boxed(mut self, vi: Box<dyn crate::index::VectorIndex>) -> Self {
        self.vector_index = Some(vi);
        self
    }

    /// Plan 18 phase D: set the `LexicalIndex` from a pre-boxed trait object.
    #[must_use]
    pub fn lexical_index_boxed(mut self, li: Box<dyn crate::index::LexicalIndex>) -> Self {
        self.lexical_index = Some(li);
        self
    }

    /// Set tenant.
    #[must_use]
    pub fn tenant(mut self, t: TenantId) -> Self {
        self.tenant = Some(t);
        self
    }

    /// Set partition scheme template.
    pub fn partition_scheme(mut self, template: &str) -> Result<Self> {
        self.scheme = Some(PartitionScheme::parse(template)?);
        Ok(self)
    }

    /// Set audit-log actor field.
    #[must_use]
    pub fn actor(mut self, actor: impl Into<String>) -> Self {
        self.actor = Some(actor.into());
        self
    }

    /// Override the clock (tests).
    #[must_use]
    pub fn clock(mut self, c: impl Clock + 'static) -> Self {
        self.clock = Box::new(c);
        self
    }

    /// Override the broadcast channel capacity for `Memory::subscribe`.
    /// Defaults to `DEFAULT_EVENT_CAPACITY` (1024). Lower bounds memory cost
    /// per slow subscriber; higher reduces `Lagged` errors under load.
    #[must_use]
    pub fn event_capacity(mut self, n: usize) -> Self {
        self.event_capacity = Some(n);
        self
    }

    /// Open the store. Applies migrations, validates / writes `schema_meta`.
    pub async fn open(self) -> Result<Memory> {
        let storage = self
            .storage
            .ok_or_else(|| Error::Config("storage backend required".into()))?;
        let metadata = self
            .metadata
            .ok_or_else(|| Error::Config("metadata store required".into()))?;
        let embedder = self.embedder; // optional under caller-owned-models
        let tenant = self
            .tenant
            .ok_or_else(|| Error::Config("tenant required".into()))?;
        let scheme = self
            .scheme
            .ok_or_else(|| Error::Config("partition_scheme required".into()))?;

        // ----- Capabilities pre-flight (spec § 12.11). -----
        check_storage(storage.capabilities())?;
        check_metadata(metadata.capabilities())?;
        if let Some(e) = embedder.as_ref() {
            check_embedder(e.capabilities())?;
        }

        metadata.migrate().await?;

        // Plan 18 dispatch 4 dropped the Plan 9 storage-layout v1→v2
        // migrator. The library has zero pre-Plan-9 stores in production
        // and no compat shim is owed.

        // Plan 14 phase F: the legacy `memory.inline_summary` column was
        // dropped in 0010. Pre-flight in `metadata.migrate()` refuses to drop
        // it if any rows still carry a non-NULL value — callers must drain
        // through a pre-0.1 release. The Plan 9 inline_summary_v1 migrator
        // is gone.

        // Plan 10: backfill `summary_input` rows from the legacy JSON column.
        // Idempotent — guarded by a `migration_state` row.
        crate::summary::migrate_inputs::migrate(metadata.as_ref(), self.clock.as_ref()).await?;

        // Plan 18 dispatch 4: the Plan 17 parquet→`embedding_blob` backfill
        // migrator was removed. The library has zero pre-Plan-17 stores in
        // production and no compat shim is owed; new stores write
        // `embedding_blob` inline at append time.

        // Plan 18: index virtual tables are now created idempotently on every
        // open via `MetadataStore::create_indices_if_missing(dim)`. The dim is
        // resolved below once schema_meta is read; we call this method after
        // that read.

        let now = self.clock.now_ms();
        match metadata.read_schema_meta().await? {
            None => {
                // First-open: write what we know now. If embedder is configured,
                // bind embedder_id + dims. Otherwise leave them NULL — first
                // append with a caller-provided vector will set them to
                // [`crate::embedder::CALLER_PROVIDED_EMBEDDER_ID`] + that
                // vector's length.
                let (eid, edims) = match embedder.as_ref() {
                    Some(e) => (
                        Some(e.id().to_string()),
                        Some(i64::try_from(e.dimensions()).unwrap_or(0)),
                    ),
                    None => (None, None),
                };
                metadata
                    .write_schema_meta(&SchemaMeta {
                        partition_scheme: scheme.to_string(),
                        // Plan 9: brand-new stores land at v2 directly; older
                        // stores opened later see the migrator.
                        scheme_version: 2,
                        embedder_id: eid,
                        embedder_dims: edims,
                        created_at_ms: now,
                    })
                    .await?;
            }
            Some(meta) => {
                if meta.partition_scheme != scheme.to_string() {
                    return Err(Error::PartitionSchemeMismatch {
                        expected: meta.partition_scheme,
                        got: scheme.to_string(),
                    });
                }
                match (&meta.embedder_id, embedder.as_ref()) {
                    (Some(stored), Some(e)) => {
                        let edims_stored = meta.embedder_dims.unwrap_or(0);
                        if stored == crate::embedder::CALLER_PROVIDED_EMBEDDER_ID {
                            return Err(Error::EmbedderMismatch {
                                expected: stored.clone(),
                                expected_dims: usize::try_from(edims_stored).unwrap_or(0),
                                got: e.id().to_string(),
                                got_dims: e.dimensions(),
                            });
                        }
                        if stored != e.id()
                            || edims_stored != i64::try_from(e.dimensions()).unwrap_or(0)
                        {
                            return Err(Error::EmbedderMismatch {
                                expected: stored.clone(),
                                expected_dims: usize::try_from(edims_stored).unwrap_or(0),
                                got: e.id().to_string(),
                                got_dims: e.dimensions(),
                            });
                        }
                    }
                    (Some(stored), None) => {
                        // Store has an embedder bound; caller configured none → mismatch
                        // unless the store explicitly recorded the
                        // [`crate::embedder::CALLER_PROVIDED_EMBEDDER_ID`] sentinel.
                        if stored != crate::embedder::CALLER_PROVIDED_EMBEDDER_ID {
                            return Err(Error::EmbedderMismatch {
                                expected: stored.clone(),
                                expected_dims: usize::try_from(meta.embedder_dims.unwrap_or(0))
                                    .unwrap_or(0),
                                got: "<none>".to_string(),
                                got_dims: 0,
                            });
                        }
                    }
                    (None, _) => {
                        // schema_meta has no embedder bound yet; first append
                        // (engine or caller-provided) decides.
                    }
                }
            }
        }

        let dims_hint = match (&embedder, metadata.read_schema_meta().await?) {
            (Some(e), _) => e.dimensions(),
            (None, Some(m)) => usize::try_from(m.embedder_dims.unwrap_or(0)).unwrap_or(0),
            _ => 0,
        };

        // Plan 18: bootstrap the index virtual tables now that we know the
        // embedder dim. When `dims_hint == 0` (caller-provided mode pre-first
        // append), creation is deferred to the first `append` call which knows
        // the dim from the supplied vector.
        if dims_hint > 0 {
            metadata.create_indices_if_missing(dims_hint).await?;
        }

        // Plan 18 phase D: default the vector + lexical index implementations
        // to the SQLite-backed ones, sharing the metadata store's pool. Caller
        // overrides win — Builder::vector_index / Builder::lexical_index can
        // swap in a Vectorlite / hosted-search / mocking impl.
        let pool_arc: Option<std::sync::Arc<sqlx::SqlitePool>> = metadata
            .sqlite_pool()
            .map(|p| std::sync::Arc::new(p.clone()));
        let vector_index: Box<dyn crate::index::VectorIndex> = match self.vector_index {
            Some(vi) => vi,
            None => {
                let pool = pool_arc.clone().ok_or_else(|| {
                    Error::Config(
                        "default VectorIndex requires a SQLite-backed MetadataStore; \
                         supply Builder::vector_index for non-SQLite stores"
                            .into(),
                    )
                })?;
                Box::new(crate::index::SqliteVecIndex::new(pool))
            }
        };
        let lexical_index: Box<dyn crate::index::LexicalIndex> = match self.lexical_index {
            Some(li) => li,
            None => {
                let pool = pool_arc.clone().ok_or_else(|| {
                    Error::Config(
                        "default LexicalIndex requires a SQLite-backed MetadataStore; \
                         supply Builder::lexical_index for non-SQLite stores"
                            .into(),
                    )
                })?;
                Box::new(crate::index::Fts5Index::new(pool))
            }
        };

        let event_capacity = self
            .event_capacity
            .unwrap_or(crate::event::DEFAULT_EVENT_CAPACITY);
        let (event_tx, _initial_rx) =
            tokio::sync::broadcast::channel::<crate::event::MemoryEvent>(event_capacity);

        // Plan 18 dispatch 4 dropped the legacy `IndexConfig` / cache /
        // flusher / recovery walker; the SQLite-everything refactor also
        // removed `flush_interval`, `flush_threshold`, `index_cache_capacity`,
        // and `index_root` from the Builder surface (CHANGELOG breaking).
        let _ = dims_hint;

        let inner = Arc::new(MemoryInner {
            storage,
            metadata,
            embedder,
            tenant: tenant.clone(),
            scheme,
            actor: self.actor.clone(),
            clock: self.clock,
            locks: TenantLocks::default(),
            vector_index,
            lexical_index,
            schema_meta_locked: parking_lot::Mutex::new(false),
            event_tx: event_tx.clone(),
        });

        Ok(Memory { inner })
    }
}

fn check_storage(caps: crate::capabilities::StorageCapabilities) -> Result<()> {
    let req = crate::capabilities::REQUIRED_STORAGE;
    macro_rules! must {
        ($field:ident) => {
            if req.$field && !caps.$field {
                return Err(Error::CapabilityMissing {
                    plugin: crate::capabilities::Plugin::Storage.as_str(),
                    required: stringify!($field),
                    got: caps.$field,
                });
            }
        };
    }
    must!(put);
    must!(get);
    must!(delete);
    must!(list_prefix);
    must!(atomic_overwrite);
    Ok(())
}

fn check_metadata(caps: crate::capabilities::MetadataCapabilities) -> Result<()> {
    let req = crate::capabilities::REQUIRED_METADATA;
    macro_rules! must {
        ($field:ident) => {
            if req.$field && !caps.$field {
                return Err(Error::CapabilityMissing {
                    plugin: crate::capabilities::Plugin::Metadata.as_str(),
                    required: stringify!($field),
                    got: caps.$field,
                });
            }
        };
    }
    must!(migrate);
    must!(audit_since);
    must!(links);
    Ok(())
}

fn check_embedder(caps: crate::capabilities::EmbedderCapabilities) -> Result<()> {
    let req = crate::capabilities::REQUIRED_EMBEDDER;
    if req.batched && !caps.batched {
        return Err(Error::CapabilityMissing {
            plugin: crate::capabilities::Plugin::Embedder.as_str(),
            required: "batched",
            got: caps.batched,
        });
    }
    Ok(())
}