rvcsi_ruvector/store.rs
1//! The [`RfMemoryStore`] trait and its value objects.
2//!
3//! An RF-memory store keeps embeddings of [`CsiWindow`](rvcsi_core::CsiWindow)s
4//! and [`CsiEvent`](rvcsi_core::CsiEvent)s plus per-room baseline embeddings,
5//! and answers similarity / drift queries over them. This is a standin for the
6//! production RuVector binding (ADR-095 FR8, D8) — see the crate docs.
7
8use serde::{Deserialize, Serialize};
9
10use rvcsi_core::{CsiEvent, CsiWindow, RvcsiError, SourceId};
11
12/// Identifier minted for each stored embedding (monotonic within a store).
13#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
14pub struct EmbeddingId(pub u64);
15
16impl EmbeddingId {
17 /// The raw integer value.
18 #[inline]
19 pub const fn value(self) -> u64 {
20 self.0
21 }
22}
23
24impl From<u64> for EmbeddingId {
25 #[inline]
26 fn from(v: u64) -> Self {
27 EmbeddingId(v)
28 }
29}
30
31/// Which kind of record an embedding came from.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
33pub enum RecordKind {
34 /// Embedding of a [`CsiWindow`](rvcsi_core::CsiWindow).
35 Window,
36 /// Embedding of a [`CsiEvent`](rvcsi_core::CsiEvent).
37 Event,
38}
39
40/// One hit returned by [`RfMemoryStore::query_similar`].
41#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
42pub struct SimilarHit {
43 /// Id of the matched stored embedding.
44 pub id: EmbeddingId,
45 /// Cosine similarity to the query in `[-1.0, 1.0]`.
46 pub score: f32,
47 /// Whether the matched record was a window or an event.
48 pub kind: RecordKind,
49 /// Source the matched record came from.
50 pub source_id: SourceId,
51 /// Timestamp of the matched record (ns).
52 pub timestamp_ns: u64,
53}
54
55/// Result of a baseline-drift comparison ([`RfMemoryStore::compute_drift`]).
56#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
57pub struct DriftReport {
58 /// Room the baseline belongs to.
59 pub room: String,
60 /// Baseline version that was compared against.
61 pub baseline_version: String,
62 /// Cosine *distance* `1 - cosine_similarity(baseline, current)` in `[0.0, 2.0]`.
63 pub distance: f32,
64 /// Threshold the distance was compared against.
65 pub threshold: f32,
66 /// Whether `distance > threshold`.
67 pub exceeded: bool,
68}
69
70/// A queryable RF-memory store: append window/event embeddings, search by
71/// cosine similarity, and track per-room baseline drift.
72///
73/// Implementations are deterministic given the same sequence of operations.
74pub trait RfMemoryStore {
75 /// Store the embedding of `w`, returning its newly-minted id.
76 fn store_window(&mut self, w: &CsiWindow) -> Result<EmbeddingId, RvcsiError>;
77
78 /// Store the embedding of `e`, returning its newly-minted id.
79 fn store_event(&mut self, e: &CsiEvent) -> Result<EmbeddingId, RvcsiError>;
80
81 /// Return up to `k` stored records most similar to `query`, by descending
82 /// cosine similarity. Records whose embedding length differs from `query`
83 /// (e.g. events vs. window queries) score `0.0` and so sort last.
84 fn query_similar(&self, query: &[f32], k: usize) -> Result<Vec<SimilarHit>, RvcsiError>;
85
86 /// Set (or replace) the baseline embedding for `room` at `version`.
87 fn set_baseline(
88 &mut self,
89 room: &str,
90 version: &str,
91 embedding: Vec<f32>,
92 ) -> Result<(), RvcsiError>;
93
94 /// Compare `current` against `room`'s baseline. Returns `None` if there is
95 /// no baseline for `room`, otherwise a [`DriftReport`] with
96 /// `distance = 1 - cosine_similarity(baseline, current)` and
97 /// `exceeded = distance > threshold`.
98 fn compute_drift(
99 &self,
100 room: &str,
101 current: &[f32],
102 threshold: f32,
103 ) -> Result<Option<DriftReport>, RvcsiError>;
104
105 /// Number of stored records (windows + events; baselines are not counted).
106 fn len(&self) -> usize;
107
108 /// Whether [`RfMemoryStore::len`] is zero.
109 fn is_empty(&self) -> bool {
110 self.len() == 0
111 }
112}
113
114#[cfg(test)]
115mod tests {
116 use super::*;
117
118 #[test]
119 fn embedding_id_roundtrips() {
120 let id = EmbeddingId::from(42);
121 assert_eq!(id.value(), 42);
122 let json = serde_json::to_string(&id).unwrap();
123 assert_eq!(serde_json::from_str::<EmbeddingId>(&json).unwrap(), id);
124 }
125
126 #[test]
127 fn value_objects_serde() {
128 let hit = SimilarHit {
129 id: EmbeddingId(1),
130 score: 0.9,
131 kind: RecordKind::Window,
132 source_id: SourceId::from("s"),
133 timestamp_ns: 5,
134 };
135 let json = serde_json::to_string(&hit).unwrap();
136 assert_eq!(serde_json::from_str::<SimilarHit>(&json).unwrap(), hit);
137
138 let d = DriftReport {
139 room: "lab".into(),
140 baseline_version: "v1".into(),
141 distance: 0.1,
142 threshold: 0.2,
143 exceeded: false,
144 };
145 let json = serde_json::to_string(&d).unwrap();
146 assert_eq!(serde_json::from_str::<DriftReport>(&json).unwrap(), d);
147 }
148}