Skip to main content

khive_storage/
types.rs

1//! Shared types used across storage capability traits.
2
3use std::fmt;
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use uuid::Uuid;
9
10use khive_types::{EdgeRelation, SubstrateKind};
11
12use crate::error::StorageError;
13
14pub type StorageResult<T> = Result<T, StorageError>;
15
16#[derive(Clone, Debug, Default, Serialize, Deserialize)]
17pub struct BatchWriteSummary {
18    pub attempted: u64,
19    pub affected: u64,
20    pub failed: u64,
21    #[serde(default, skip_serializing_if = "String::is_empty")]
22    pub first_error: String,
23}
24
25#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
26#[serde(rename_all = "snake_case")]
27pub enum DeleteMode {
28    Soft,
29    Hard,
30}
31
32// -- SQL primitives --
33
34#[derive(Clone, Debug, Serialize, Deserialize)]
35#[serde(rename_all = "snake_case")]
36pub enum SqlValue {
37    Null,
38    Bool(bool),
39    Integer(i64),
40    Float(f64),
41    Text(String),
42    Blob(Vec<u8>),
43    Json(Value),
44    Uuid(Uuid),
45    Timestamp(DateTime<Utc>),
46}
47
48#[derive(Clone, Debug, Serialize, Deserialize)]
49pub struct SqlStatement {
50    pub sql: String,
51    pub params: Vec<SqlValue>,
52    pub label: Option<String>,
53}
54
55#[derive(Clone, Debug, Serialize, Deserialize)]
56pub struct SqlColumn {
57    pub name: String,
58    pub value: SqlValue,
59}
60
61#[derive(Clone, Debug, Serialize, Deserialize)]
62pub struct SqlRow {
63    pub columns: Vec<SqlColumn>,
64}
65
66impl SqlRow {
67    pub fn get(&self, name: &str) -> Option<&SqlValue> {
68        self.columns
69            .iter()
70            .find(|c| c.name == name)
71            .map(|c| &c.value)
72    }
73}
74
75#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
76#[serde(rename_all = "snake_case")]
77pub enum SqlIsolation {
78    Default,
79    ReadCommitted,
80    RepeatableRead,
81    Serializable,
82}
83
84#[derive(Clone, Debug, Serialize, Deserialize)]
85pub struct SqlTxOptions {
86    pub read_only: bool,
87    pub isolation: SqlIsolation,
88    pub label: Option<String>,
89}
90
91impl Default for SqlTxOptions {
92    fn default() -> Self {
93        Self {
94            read_only: false,
95            isolation: SqlIsolation::Default,
96            label: None,
97        }
98    }
99}
100
101// -- Vector types --
102
103#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
104#[serde(rename_all = "snake_case")]
105pub enum VectorIndexKind {
106    Hnsw,
107    SqliteVec,
108    Flat,
109}
110
111/// Backend capability declaration for vector stores (ADR-041, ADR-044).
112///
113/// Returned by [`VectorStore::capabilities`]. Higher-level retrieval policy
114/// (hybrid search, HyDE fan-out, etc.) introspects this struct at construction
115/// time to select the optimal code path without relying on error-type matching.
116#[derive(Clone, Debug, Serialize, Deserialize)]
117pub struct VectorStoreCapabilities {
118    /// Supports metadata pre-filter pushdown into the index scan.
119    pub supports_filter: bool,
120    /// Supports batch search (multiple query vectors in one call).
121    pub supports_batch_search: bool,
122    /// Supports quantization (reduces memory; may trade recall).
123    pub supports_quantization: bool,
124    /// Supports in-place update without a delete+insert round-trip.
125    pub supports_update: bool,
126    /// Supports orphan sweep (deleting vectors with no live subject).
127    pub supports_orphan_sweep: bool,
128    /// Supports multiple named fields per subject (e.g. `entity.title` and
129    /// `entity.body` stored as separate vectors). sqlite-vec backends use a
130    /// `subject_id PRIMARY KEY` table and therefore only support one vector
131    /// per subject per namespace — this field is `false` for those backends.
132    #[serde(default)]
133    pub supports_multi_field: bool,
134    /// Maximum supported embedding dimension, or `None` if unbounded.
135    pub max_dimensions: Option<u32>,
136    /// Index algorithms available in this backend.
137    pub index_kinds: Vec<VectorIndexKind>,
138}
139
140/// A typed predicate for backend-pushable metadata filtering (ADR-041, ADR-044).
141#[derive(Clone, Debug, Default, Serialize, Deserialize)]
142pub struct VectorMetadataFilter {
143    /// Restrict to these namespaces.
144    pub namespaces: Vec<String>,
145    /// Restrict to these substrate kinds.
146    pub kinds: Vec<SubstrateKind>,
147    /// Typed property predicates (ADR-044).
148    pub property_filters: Vec<PropertyFilter>,
149}
150
151impl VectorMetadataFilter {
152    /// Returns `true` when no predicates are set (filter is a no-op).
153    pub fn is_empty(&self) -> bool {
154        self.namespaces.is_empty() && self.kinds.is_empty() && self.property_filters.is_empty()
155    }
156}
157
158/// A single typed metadata predicate (ADR-044).
159#[derive(Clone, Debug, Serialize, Deserialize)]
160pub struct PropertyFilter {
161    pub key: String,
162    pub op: PropertyOp,
163    pub value: serde_json::Value,
164}
165
166/// Comparison operators for [`PropertyFilter`] (ADR-044).
167#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
168#[serde(rename_all = "snake_case")]
169pub enum PropertyOp {
170    Eq,
171    Ne,
172    In,
173    Range,
174    Exists,
175}
176
177#[derive(Clone, Debug, Serialize, Deserialize)]
178pub struct VectorRecord {
179    pub subject_id: Uuid,
180    pub kind: SubstrateKind,
181    pub namespace: String,
182    /// Which embedding field this record represents (e.g. `"entity.body"`).
183    pub field: String,
184    #[serde(default)]
185    pub embedding_model: Option<String>,
186    /// One or many dense vectors; sqlite-vec backends enforce `vectors.len() == 1`.
187    pub vectors: Vec<Vec<f32>>,
188    pub updated_at: DateTime<Utc>,
189}
190
191#[derive(Clone, Debug, Serialize, Deserialize)]
192pub struct VectorSearchRequest {
193    /// One or many query vectors; sqlite-vec backends enforce `query_vectors.len() == 1`.
194    pub query_vectors: Vec<Vec<f32>>,
195    pub top_k: u32,
196    pub namespace: Option<String>,
197    pub kind: Option<SubstrateKind>,
198    /// Restrict results to this embedding model. Defaults to the store's own model.
199    #[serde(default)]
200    pub embedding_model: Option<String>,
201    /// Optional metadata filter for backends that support pushdown.
202    pub filter: Option<VectorMetadataFilter>,
203    /// Backend-specific hints (opaque JSON blob, ignored by default).
204    pub backend_hints: Option<serde_json::Value>,
205}
206
207/// Configuration for an orphan-sweep pass (ADR-044).
208#[derive(Clone, Debug, Serialize, Deserialize)]
209pub struct OrphanSweepConfig {
210    /// Optional allowlist of subject IDs to check. `None` = scan all rows.
211    /// `Some(ids)` restricts the sweep to only those IDs; rows not in the list
212    /// are untouched even if orphaned (ADR-044 §5).
213    pub subject_id_allowlist: Option<Vec<Uuid>>,
214    pub namespaces: Vec<String>,
215    pub substrate_kinds: Vec<SubstrateKind>,
216    pub max_delete: u32,
217    pub dry_run: bool,
218}
219
220/// Result of an orphan-sweep pass (ADR-044).
221#[derive(Clone, Debug, Serialize, Deserialize)]
222pub struct OrphanSweepResult {
223    pub scanned: u64,
224    pub deleted: u64,
225    pub would_delete: u64,
226    pub max_delete_hit: bool,
227}
228
229// -- Sparse vector types (ADR-031) --
230
231/// A sparse vector represented as parallel indices and values arrays.
232#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
233pub struct SparseVector {
234    /// Dimension indices (must be strictly increasing).
235    pub indices: Vec<u32>,
236    /// Corresponding non-zero values (must be finite).
237    pub values: Vec<f32>,
238}
239
240#[derive(Clone, Debug, Serialize, Deserialize)]
241pub struct SparseRecord {
242    pub subject_id: Uuid,
243    pub kind: SubstrateKind,
244    pub namespace: String,
245    pub field: String,
246    pub vector: SparseVector,
247    pub updated_at: DateTime<Utc>,
248}
249
250#[derive(Clone, Debug, Serialize, Deserialize)]
251pub struct SparseSearchRequest {
252    pub query: SparseVector,
253    pub top_k: u32,
254    pub namespace: Option<String>,
255    pub kind: Option<SubstrateKind>,
256}
257
258#[derive(Clone, Debug, Serialize, Deserialize)]
259pub struct SparseSearchHit {
260    pub subject_id: Uuid,
261    pub score: khive_score::DeterministicScore,
262    pub rank: u32,
263}
264
265#[derive(Clone, Debug, Serialize, Deserialize)]
266pub struct VectorSearchHit {
267    pub subject_id: Uuid,
268    pub score: khive_score::DeterministicScore,
269    pub rank: u32,
270}
271
272#[derive(Clone, Debug, Serialize, Deserialize)]
273pub struct VectorStoreInfo {
274    pub model_name: String,
275    pub dimensions: usize,
276    pub index_kind: VectorIndexKind,
277    pub entry_count: u64,
278    pub needs_rebuild: bool,
279    pub last_rebuild_at: Option<DateTime<Utc>>,
280}
281
282// -- Text search types --
283
284#[derive(Clone, Debug, Serialize, Deserialize)]
285pub struct TextDocument {
286    pub subject_id: Uuid,
287    pub kind: SubstrateKind,
288    pub namespace: String,
289    pub title: Option<String>,
290    pub body: String,
291    pub tags: Vec<String>,
292    pub metadata: Option<Value>,
293    pub updated_at: DateTime<Utc>,
294}
295
296#[derive(Clone, Debug, Default, Serialize, Deserialize)]
297pub struct TextFilter {
298    pub ids: Vec<Uuid>,
299    pub kinds: Vec<SubstrateKind>,
300    pub namespaces: Vec<String>,
301}
302
303#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
304#[serde(rename_all = "snake_case")]
305pub enum TextQueryMode {
306    Plain,
307    Phrase,
308}
309
310#[derive(Clone, Debug, Serialize, Deserialize)]
311pub struct TextSearchRequest {
312    pub query: String,
313    pub mode: TextQueryMode,
314    pub filter: Option<TextFilter>,
315    pub top_k: u32,
316    pub snippet_chars: usize,
317}
318
319#[derive(Clone, Debug, Serialize, Deserialize)]
320pub struct TextSearchHit {
321    pub subject_id: Uuid,
322    pub score: khive_score::DeterministicScore,
323    pub rank: u32,
324    pub title: Option<String>,
325    pub snippet: Option<String>,
326}
327
328#[derive(Clone, Debug, Serialize, Deserialize)]
329pub struct TextIndexStats {
330    pub document_count: u64,
331    pub needs_rebuild: bool,
332    pub last_rebuild_at: Option<DateTime<Utc>>,
333}
334
335#[derive(Clone, Debug, Serialize, Deserialize)]
336#[serde(rename_all = "snake_case")]
337pub enum IndexRebuildScope {
338    Full,
339    Entities(Vec<Uuid>),
340}
341
342// -- Pagination --
343
344#[derive(Clone, Debug, Serialize, Deserialize)]
345pub struct PageRequest {
346    pub offset: u64,
347    pub limit: u32,
348}
349
350impl Default for PageRequest {
351    fn default() -> Self {
352        Self {
353            offset: 0,
354            limit: 50,
355        }
356    }
357}
358
359#[derive(Clone, Debug, Serialize, Deserialize)]
360pub struct Page<T> {
361    pub items: Vec<T>,
362    pub total: Option<u64>,
363}
364
365// -- Graph types --
366
367/// A type-safe link ID (wraps Uuid).
368#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
369pub struct LinkId(pub Uuid);
370
371impl From<Uuid> for LinkId {
372    fn from(u: Uuid) -> Self {
373        Self(u)
374    }
375}
376
377impl From<LinkId> for Uuid {
378    fn from(l: LinkId) -> Uuid {
379        l.0
380    }
381}
382
383impl fmt::Display for LinkId {
384    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
385        self.0.fmt(f)
386    }
387}
388
389/// A directed edge in the graph.
390#[derive(Clone, Debug, Serialize, Deserialize)]
391pub struct Edge {
392    pub id: LinkId,
393    pub namespace: String,
394    pub source_id: Uuid,
395    pub target_id: Uuid,
396    pub relation: EdgeRelation,
397    pub weight: f64,
398    pub created_at: DateTime<Utc>,
399    pub updated_at: DateTime<Utc>,
400    pub deleted_at: Option<DateTime<Utc>>,
401    pub metadata: Option<Value>,
402    pub target_backend: Option<String>,
403}
404
405#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
406#[serde(rename_all = "snake_case")]
407pub enum Direction {
408    #[default]
409    Out,
410    In,
411    Both,
412}
413
414#[derive(Clone, Debug, Default, Serialize, Deserialize)]
415pub struct TimeRange {
416    pub start: Option<DateTime<Utc>>,
417    pub end: Option<DateTime<Utc>>,
418}
419
420#[derive(Clone, Debug, Default, Serialize, Deserialize)]
421pub struct EdgeFilter {
422    pub ids: Vec<LinkId>,
423    pub source_ids: Vec<Uuid>,
424    pub target_ids: Vec<Uuid>,
425    pub relations: Vec<EdgeRelation>,
426    pub min_weight: Option<f64>,
427    pub max_weight: Option<f64>,
428    pub created_at: Option<TimeRange>,
429}
430
431#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
432#[serde(rename_all = "snake_case")]
433pub enum EdgeSortField {
434    CreatedAt,
435    Weight,
436    Relation,
437}
438
439#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
440#[serde(rename_all = "snake_case")]
441pub enum SortDirection {
442    Asc,
443    Desc,
444}
445
446#[derive(Clone, Debug, Serialize, Deserialize)]
447pub struct SortOrder<F> {
448    pub field: F,
449    pub direction: SortDirection,
450}
451
452#[derive(Clone, Debug, Serialize, Deserialize)]
453pub struct NeighborQuery {
454    pub direction: Direction,
455    pub relations: Option<Vec<EdgeRelation>>,
456    pub limit: Option<u32>,
457    pub min_weight: Option<f64>,
458}
459
460/// One neighbor returned by a graph query.
461///
462/// Field naming (#148): on the JSON wire, the node identifier is serialized as
463/// `id` (not `node_id`) so it matches the verb-wide identifier convention.
464/// Internal Rust code still uses `.node_id` on the struct.
465///
466/// Enrichment (#162): `name` and `kind` are populated by the runtime layer
467/// after the storage call returns. Storage `GraphStore` impls leave them
468/// `None`; the runtime batch-fetches the entity rows and fills them in.
469#[derive(Clone, Debug, Serialize, Deserialize)]
470pub struct NeighborHit {
471    #[serde(rename = "id")]
472    pub node_id: Uuid,
473    pub edge_id: Uuid,
474    pub relation: EdgeRelation,
475    pub weight: f64,
476    #[serde(default, skip_serializing_if = "Option::is_none")]
477    pub name: Option<String>,
478    #[serde(default, skip_serializing_if = "Option::is_none")]
479    pub kind: Option<String>,
480}
481
482#[derive(Clone, Debug, Default, Serialize, Deserialize)]
483pub struct TraversalOptions {
484    pub max_depth: usize,
485    pub direction: Direction,
486    pub relations: Option<Vec<EdgeRelation>>,
487    pub min_weight: Option<f64>,
488    pub limit: Option<u32>,
489}
490
491impl TraversalOptions {
492    pub fn new(max_depth: usize) -> Self {
493        Self {
494            max_depth,
495            ..Default::default()
496        }
497    }
498
499    pub fn with_direction(mut self, d: Direction) -> Self {
500        self.direction = d;
501        self
502    }
503}
504
505#[derive(Clone, Debug, Serialize, Deserialize)]
506pub struct TraversalRequest {
507    pub roots: Vec<Uuid>,
508    pub options: TraversalOptions,
509    pub include_roots: bool,
510}
511
512/// One node along a traversal path.
513///
514/// Field naming (#148): JSON wire serialization is `id`. Enrichment (#162):
515/// `name`/`kind` are filled by the runtime layer after the storage call.
516#[derive(Clone, Debug, Serialize, Deserialize)]
517pub struct PathNode {
518    #[serde(rename = "id")]
519    pub node_id: Uuid,
520    pub via_edge: Option<Uuid>,
521    pub depth: usize,
522    #[serde(default, skip_serializing_if = "Option::is_none")]
523    pub name: Option<String>,
524    #[serde(default, skip_serializing_if = "Option::is_none")]
525    pub kind: Option<String>,
526}
527
528#[derive(Clone, Debug, Serialize, Deserialize)]
529pub struct GraphPath {
530    pub root_id: Uuid,
531    pub nodes: Vec<PathNode>,
532    pub total_weight: f64,
533}