Skip to main content

khive_storage/
types.rs

1//! Shared types used across storage capability traits.
2
3use std::fmt;
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use uuid::Uuid;
9
10use khive_types::{EdgeRelation, SubstrateKind};
11
12use crate::error::StorageError;
13
14pub type StorageResult<T> = Result<T, StorageError>;
15
16#[derive(Clone, Debug, Default, Serialize, Deserialize)]
17pub struct BatchWriteSummary {
18    pub attempted: u64,
19    pub affected: u64,
20    pub failed: u64,
21    #[serde(default, skip_serializing_if = "String::is_empty")]
22    pub first_error: String,
23}
24
25#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
26#[serde(rename_all = "snake_case")]
27pub enum DeleteMode {
28    Soft,
29    Hard,
30}
31
32// -- SQL primitives --
33
34#[derive(Clone, Debug, Serialize, Deserialize)]
35#[serde(rename_all = "snake_case")]
36pub enum SqlValue {
37    Null,
38    Bool(bool),
39    Integer(i64),
40    Float(f64),
41    Text(String),
42    Blob(Vec<u8>),
43    Json(Value),
44    Uuid(Uuid),
45    Timestamp(DateTime<Utc>),
46}
47
48#[derive(Clone, Debug, Serialize, Deserialize)]
49pub struct SqlStatement {
50    pub sql: String,
51    pub params: Vec<SqlValue>,
52    pub label: Option<String>,
53}
54
55#[derive(Clone, Debug, Serialize, Deserialize)]
56pub struct SqlColumn {
57    pub name: String,
58    pub value: SqlValue,
59}
60
61#[derive(Clone, Debug, Serialize, Deserialize)]
62pub struct SqlRow {
63    pub columns: Vec<SqlColumn>,
64}
65
66impl SqlRow {
67    pub fn get(&self, name: &str) -> Option<&SqlValue> {
68        self.columns
69            .iter()
70            .find(|c| c.name == name)
71            .map(|c| &c.value)
72    }
73}
74
75#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
76#[serde(rename_all = "snake_case")]
77pub enum SqlIsolation {
78    Default,
79    ReadCommitted,
80    RepeatableRead,
81    Serializable,
82}
83
84#[derive(Clone, Debug, Serialize, Deserialize)]
85pub struct SqlTxOptions {
86    pub read_only: bool,
87    pub isolation: SqlIsolation,
88    pub label: Option<String>,
89}
90
91impl Default for SqlTxOptions {
92    fn default() -> Self {
93        Self {
94            read_only: false,
95            isolation: SqlIsolation::Default,
96            label: None,
97        }
98    }
99}
100
101// -- Vector types --
102
103#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
104#[serde(rename_all = "snake_case")]
105pub enum VectorIndexKind {
106    Hnsw,
107    SqliteVec,
108    Flat,
109}
110
111/// Backend capability declaration for vector stores (ADR-041, ADR-044).
112///
113/// Returned by [`VectorStore::capabilities`]. Higher-level retrieval policy
114/// (hybrid search, HyDE fan-out, etc.) introspects this struct at construction
115/// time to select the optimal code path without relying on error-type matching.
116#[derive(Clone, Debug, Serialize, Deserialize)]
117pub struct VectorStoreCapabilities {
118    /// Supports metadata pre-filter pushdown into the index scan.
119    pub supports_filter: bool,
120    /// Supports batch search (multiple query vectors in one call).
121    pub supports_batch_search: bool,
122    /// Supports quantization (reduces memory; may trade recall).
123    pub supports_quantization: bool,
124    /// Supports in-place update without a delete+insert round-trip.
125    pub supports_update: bool,
126    /// Supports orphan sweep (deleting vectors with no live subject).
127    pub supports_orphan_sweep: bool,
128    /// Supports multiple named fields per subject (e.g. `entity.title` and
129    /// `entity.body` stored as separate vectors). sqlite-vec backends use a
130    /// `subject_id PRIMARY KEY` table and therefore only support one vector
131    /// per subject per namespace — this field is `false` for those backends.
132    #[serde(default)]
133    pub supports_multi_field: bool,
134    /// Maximum supported embedding dimension, or `None` if unbounded.
135    pub max_dimensions: Option<u32>,
136    /// Index algorithms available in this backend.
137    pub index_kinds: Vec<VectorIndexKind>,
138}
139
140/// A typed predicate for backend-pushable metadata filtering (ADR-041, ADR-044).
141#[derive(Clone, Debug, Default, Serialize, Deserialize)]
142pub struct VectorMetadataFilter {
143    /// Restrict to these namespaces.
144    pub namespaces: Vec<String>,
145    /// Restrict to these substrate kinds.
146    pub kinds: Vec<SubstrateKind>,
147    /// Typed property predicates (ADR-044).
148    pub property_filters: Vec<PropertyFilter>,
149}
150
151impl VectorMetadataFilter {
152    /// Returns `true` when no predicates are set (filter is a no-op).
153    pub fn is_empty(&self) -> bool {
154        self.namespaces.is_empty() && self.kinds.is_empty() && self.property_filters.is_empty()
155    }
156}
157
158/// A single typed metadata predicate (ADR-044).
159#[derive(Clone, Debug, Serialize, Deserialize)]
160pub struct PropertyFilter {
161    pub key: String,
162    pub op: PropertyOp,
163    pub value: serde_json::Value,
164}
165
166/// Comparison operators for [`PropertyFilter`] (ADR-044).
167#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
168#[serde(rename_all = "snake_case")]
169pub enum PropertyOp {
170    Eq,
171    Ne,
172    In,
173    Range,
174    Exists,
175}
176
177#[derive(Clone, Debug, Serialize, Deserialize)]
178pub struct VectorRecord {
179    pub subject_id: Uuid,
180    pub kind: SubstrateKind,
181    pub namespace: String,
182    /// Which embedding field this record represents (e.g. `"entity.body"`).
183    pub field: String,
184    /// One or many dense vectors; sqlite-vec backends enforce `vectors.len() == 1`.
185    pub vectors: Vec<Vec<f32>>,
186    pub updated_at: DateTime<Utc>,
187}
188
189#[derive(Clone, Debug, Serialize, Deserialize)]
190pub struct VectorSearchRequest {
191    /// One or many query vectors; sqlite-vec backends enforce `query_vectors.len() == 1`.
192    pub query_vectors: Vec<Vec<f32>>,
193    pub top_k: u32,
194    pub namespace: Option<String>,
195    pub kind: Option<SubstrateKind>,
196    /// Optional metadata filter for backends that support pushdown.
197    pub filter: Option<VectorMetadataFilter>,
198    /// Backend-specific hints (opaque JSON blob, ignored by default).
199    pub backend_hints: Option<serde_json::Value>,
200}
201
202/// Configuration for an orphan-sweep pass (ADR-044).
203#[derive(Clone, Debug, Serialize, Deserialize)]
204pub struct OrphanSweepConfig {
205    /// Optional allowlist of subject IDs to check. `None` = scan all rows.
206    /// `Some(ids)` restricts the sweep to only those IDs; rows not in the list
207    /// are untouched even if orphaned (ADR-044 §5).
208    pub subject_id_allowlist: Option<Vec<Uuid>>,
209    pub namespaces: Vec<String>,
210    pub substrate_kinds: Vec<SubstrateKind>,
211    pub max_delete: u32,
212    pub dry_run: bool,
213}
214
215/// Result of an orphan-sweep pass (ADR-044).
216#[derive(Clone, Debug, Serialize, Deserialize)]
217pub struct OrphanSweepResult {
218    pub scanned: u64,
219    pub deleted: u64,
220    pub would_delete: u64,
221    pub max_delete_hit: bool,
222}
223
224// -- Sparse vector types (ADR-031) --
225
226/// A sparse vector represented as parallel indices and values arrays.
227#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
228pub struct SparseVector {
229    /// Dimension indices (must be strictly increasing).
230    pub indices: Vec<u32>,
231    /// Corresponding non-zero values (must be finite).
232    pub values: Vec<f32>,
233}
234
235#[derive(Clone, Debug, Serialize, Deserialize)]
236pub struct SparseRecord {
237    pub subject_id: Uuid,
238    pub kind: SubstrateKind,
239    pub namespace: String,
240    pub field: String,
241    pub vector: SparseVector,
242    pub updated_at: DateTime<Utc>,
243}
244
245#[derive(Clone, Debug, Serialize, Deserialize)]
246pub struct SparseSearchRequest {
247    pub query: SparseVector,
248    pub top_k: u32,
249    pub namespace: Option<String>,
250    pub kind: Option<SubstrateKind>,
251}
252
253#[derive(Clone, Debug, Serialize, Deserialize)]
254pub struct SparseSearchHit {
255    pub subject_id: Uuid,
256    pub score: khive_score::DeterministicScore,
257    pub rank: u32,
258}
259
260#[derive(Clone, Debug, Serialize, Deserialize)]
261pub struct VectorSearchHit {
262    pub subject_id: Uuid,
263    pub score: khive_score::DeterministicScore,
264    pub rank: u32,
265}
266
267#[derive(Clone, Debug, Serialize, Deserialize)]
268pub struct VectorStoreInfo {
269    pub model_name: String,
270    pub dimensions: usize,
271    pub index_kind: VectorIndexKind,
272    pub entry_count: u64,
273    pub needs_rebuild: bool,
274    pub last_rebuild_at: Option<DateTime<Utc>>,
275}
276
277// -- Text search types --
278
279#[derive(Clone, Debug, Serialize, Deserialize)]
280pub struct TextDocument {
281    pub subject_id: Uuid,
282    pub kind: SubstrateKind,
283    pub namespace: String,
284    pub title: Option<String>,
285    pub body: String,
286    pub tags: Vec<String>,
287    pub metadata: Option<Value>,
288    pub updated_at: DateTime<Utc>,
289}
290
291#[derive(Clone, Debug, Default, Serialize, Deserialize)]
292pub struct TextFilter {
293    pub ids: Vec<Uuid>,
294    pub kinds: Vec<SubstrateKind>,
295    pub namespaces: Vec<String>,
296}
297
298#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
299#[serde(rename_all = "snake_case")]
300pub enum TextQueryMode {
301    Plain,
302    Phrase,
303}
304
305#[derive(Clone, Debug, Serialize, Deserialize)]
306pub struct TextSearchRequest {
307    pub query: String,
308    pub mode: TextQueryMode,
309    pub filter: Option<TextFilter>,
310    pub top_k: u32,
311    pub snippet_chars: usize,
312}
313
314#[derive(Clone, Debug, Serialize, Deserialize)]
315pub struct TextSearchHit {
316    pub subject_id: Uuid,
317    pub score: khive_score::DeterministicScore,
318    pub rank: u32,
319    pub title: Option<String>,
320    pub snippet: Option<String>,
321}
322
323#[derive(Clone, Debug, Serialize, Deserialize)]
324pub struct TextIndexStats {
325    pub document_count: u64,
326    pub needs_rebuild: bool,
327    pub last_rebuild_at: Option<DateTime<Utc>>,
328}
329
330#[derive(Clone, Debug, Serialize, Deserialize)]
331#[serde(rename_all = "snake_case")]
332pub enum IndexRebuildScope {
333    Full,
334    Entities(Vec<Uuid>),
335}
336
337// -- Pagination --
338
339#[derive(Clone, Debug, Serialize, Deserialize)]
340pub struct PageRequest {
341    pub offset: u64,
342    pub limit: u32,
343}
344
345impl Default for PageRequest {
346    fn default() -> Self {
347        Self {
348            offset: 0,
349            limit: 50,
350        }
351    }
352}
353
354#[derive(Clone, Debug, Serialize, Deserialize)]
355pub struct Page<T> {
356    pub items: Vec<T>,
357    pub total: Option<u64>,
358}
359
360// -- Graph types --
361
362/// A type-safe link ID (wraps Uuid).
363#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
364pub struct LinkId(pub Uuid);
365
366impl From<Uuid> for LinkId {
367    fn from(u: Uuid) -> Self {
368        Self(u)
369    }
370}
371
372impl From<LinkId> for Uuid {
373    fn from(l: LinkId) -> Uuid {
374        l.0
375    }
376}
377
378impl fmt::Display for LinkId {
379    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
380        self.0.fmt(f)
381    }
382}
383
384/// A directed edge in the graph.
385#[derive(Clone, Debug, Serialize, Deserialize)]
386pub struct Edge {
387    pub id: LinkId,
388    pub namespace: String,
389    pub source_id: Uuid,
390    pub target_id: Uuid,
391    pub relation: EdgeRelation,
392    pub weight: f64,
393    pub created_at: DateTime<Utc>,
394    pub updated_at: DateTime<Utc>,
395    pub deleted_at: Option<DateTime<Utc>>,
396    pub metadata: Option<Value>,
397    pub target_backend: Option<String>,
398}
399
400#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
401#[serde(rename_all = "snake_case")]
402pub enum Direction {
403    #[default]
404    Out,
405    In,
406    Both,
407}
408
409#[derive(Clone, Debug, Default, Serialize, Deserialize)]
410pub struct TimeRange {
411    pub start: Option<DateTime<Utc>>,
412    pub end: Option<DateTime<Utc>>,
413}
414
415#[derive(Clone, Debug, Default, Serialize, Deserialize)]
416pub struct EdgeFilter {
417    pub ids: Vec<LinkId>,
418    pub source_ids: Vec<Uuid>,
419    pub target_ids: Vec<Uuid>,
420    pub relations: Vec<EdgeRelation>,
421    pub min_weight: Option<f64>,
422    pub max_weight: Option<f64>,
423    pub created_at: Option<TimeRange>,
424}
425
426#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
427#[serde(rename_all = "snake_case")]
428pub enum EdgeSortField {
429    CreatedAt,
430    Weight,
431    Relation,
432}
433
434#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
435#[serde(rename_all = "snake_case")]
436pub enum SortDirection {
437    Asc,
438    Desc,
439}
440
441#[derive(Clone, Debug, Serialize, Deserialize)]
442pub struct SortOrder<F> {
443    pub field: F,
444    pub direction: SortDirection,
445}
446
447#[derive(Clone, Debug, Serialize, Deserialize)]
448pub struct NeighborQuery {
449    pub direction: Direction,
450    pub relations: Option<Vec<EdgeRelation>>,
451    pub limit: Option<u32>,
452    pub min_weight: Option<f64>,
453}
454
455/// One neighbor returned by a graph query.
456///
457/// Field naming (#148): on the JSON wire, the node identifier is serialized as
458/// `id` (not `node_id`) so it matches the verb-wide identifier convention.
459/// Internal Rust code still uses `.node_id` on the struct.
460///
461/// Enrichment (#162): `name` and `kind` are populated by the runtime layer
462/// after the storage call returns. Storage `GraphStore` impls leave them
463/// `None`; the runtime batch-fetches the entity rows and fills them in.
464#[derive(Clone, Debug, Serialize, Deserialize)]
465pub struct NeighborHit {
466    #[serde(rename = "id")]
467    pub node_id: Uuid,
468    pub edge_id: Uuid,
469    pub relation: EdgeRelation,
470    pub weight: f64,
471    #[serde(default, skip_serializing_if = "Option::is_none")]
472    pub name: Option<String>,
473    #[serde(default, skip_serializing_if = "Option::is_none")]
474    pub kind: Option<String>,
475}
476
477#[derive(Clone, Debug, Default, Serialize, Deserialize)]
478pub struct TraversalOptions {
479    pub max_depth: usize,
480    pub direction: Direction,
481    pub relations: Option<Vec<EdgeRelation>>,
482    pub min_weight: Option<f64>,
483    pub limit: Option<u32>,
484}
485
486impl TraversalOptions {
487    pub fn new(max_depth: usize) -> Self {
488        Self {
489            max_depth,
490            ..Default::default()
491        }
492    }
493
494    pub fn with_direction(mut self, d: Direction) -> Self {
495        self.direction = d;
496        self
497    }
498}
499
500#[derive(Clone, Debug, Serialize, Deserialize)]
501pub struct TraversalRequest {
502    pub roots: Vec<Uuid>,
503    pub options: TraversalOptions,
504    pub include_roots: bool,
505}
506
507/// One node along a traversal path.
508///
509/// Field naming (#148): JSON wire serialization is `id`. Enrichment (#162):
510/// `name`/`kind` are filled by the runtime layer after the storage call.
511#[derive(Clone, Debug, Serialize, Deserialize)]
512pub struct PathNode {
513    #[serde(rename = "id")]
514    pub node_id: Uuid,
515    pub via_edge: Option<Uuid>,
516    pub depth: usize,
517    #[serde(default, skip_serializing_if = "Option::is_none")]
518    pub name: Option<String>,
519    #[serde(default, skip_serializing_if = "Option::is_none")]
520    pub kind: Option<String>,
521}
522
523#[derive(Clone, Debug, Serialize, Deserialize)]
524pub struct GraphPath {
525    pub root_id: Uuid,
526    pub nodes: Vec<PathNode>,
527    pub total_weight: f64,
528}