Skip to main content

rvcsi_ruvector/
store.rs

1//! The [`RfMemoryStore`] trait and its value objects.
2//!
3//! An RF-memory store keeps embeddings of [`CsiWindow`](rvcsi_core::CsiWindow)s
4//! and [`CsiEvent`](rvcsi_core::CsiEvent)s plus per-room baseline embeddings,
5//! and answers similarity / drift queries over them. This is a standin for the
6//! production RuVector binding (ADR-095 FR8, D8) — see the crate docs.
7
8use serde::{Deserialize, Serialize};
9
10use rvcsi_core::{CsiEvent, CsiWindow, RvcsiError, SourceId};
11
12/// Identifier minted for each stored embedding (monotonic within a store).
13#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
14pub struct EmbeddingId(pub u64);
15
16impl EmbeddingId {
17    /// The raw integer value.
18    #[inline]
19    pub const fn value(self) -> u64 {
20        self.0
21    }
22}
23
24impl From<u64> for EmbeddingId {
25    #[inline]
26    fn from(v: u64) -> Self {
27        EmbeddingId(v)
28    }
29}
30
31/// Which kind of record an embedding came from.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
33pub enum RecordKind {
34    /// Embedding of a [`CsiWindow`](rvcsi_core::CsiWindow).
35    Window,
36    /// Embedding of a [`CsiEvent`](rvcsi_core::CsiEvent).
37    Event,
38}
39
40/// One hit returned by [`RfMemoryStore::query_similar`].
41#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
42pub struct SimilarHit {
43    /// Id of the matched stored embedding.
44    pub id: EmbeddingId,
45    /// Cosine similarity to the query in `[-1.0, 1.0]`.
46    pub score: f32,
47    /// Whether the matched record was a window or an event.
48    pub kind: RecordKind,
49    /// Source the matched record came from.
50    pub source_id: SourceId,
51    /// Timestamp of the matched record (ns).
52    pub timestamp_ns: u64,
53}
54
55/// Result of a baseline-drift comparison ([`RfMemoryStore::compute_drift`]).
56#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
57pub struct DriftReport {
58    /// Room the baseline belongs to.
59    pub room: String,
60    /// Baseline version that was compared against.
61    pub baseline_version: String,
62    /// Cosine *distance* `1 - cosine_similarity(baseline, current)` in `[0.0, 2.0]`.
63    pub distance: f32,
64    /// Threshold the distance was compared against.
65    pub threshold: f32,
66    /// Whether `distance > threshold`.
67    pub exceeded: bool,
68}
69
70/// A queryable RF-memory store: append window/event embeddings, search by
71/// cosine similarity, and track per-room baseline drift.
72///
73/// Implementations are deterministic given the same sequence of operations.
74pub trait RfMemoryStore {
75    /// Store the embedding of `w`, returning its newly-minted id.
76    fn store_window(&mut self, w: &CsiWindow) -> Result<EmbeddingId, RvcsiError>;
77
78    /// Store the embedding of `e`, returning its newly-minted id.
79    fn store_event(&mut self, e: &CsiEvent) -> Result<EmbeddingId, RvcsiError>;
80
81    /// Return up to `k` stored records most similar to `query`, by descending
82    /// cosine similarity. Records whose embedding length differs from `query`
83    /// (e.g. events vs. window queries) score `0.0` and so sort last.
84    fn query_similar(&self, query: &[f32], k: usize) -> Result<Vec<SimilarHit>, RvcsiError>;
85
86    /// Set (or replace) the baseline embedding for `room` at `version`.
87    fn set_baseline(
88        &mut self,
89        room: &str,
90        version: &str,
91        embedding: Vec<f32>,
92    ) -> Result<(), RvcsiError>;
93
94    /// Compare `current` against `room`'s baseline. Returns `None` if there is
95    /// no baseline for `room`, otherwise a [`DriftReport`] with
96    /// `distance = 1 - cosine_similarity(baseline, current)` and
97    /// `exceeded = distance > threshold`.
98    fn compute_drift(
99        &self,
100        room: &str,
101        current: &[f32],
102        threshold: f32,
103    ) -> Result<Option<DriftReport>, RvcsiError>;
104
105    /// Number of stored records (windows + events; baselines are not counted).
106    fn len(&self) -> usize;
107
108    /// Whether [`RfMemoryStore::len`] is zero.
109    fn is_empty(&self) -> bool {
110        self.len() == 0
111    }
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117
118    #[test]
119    fn embedding_id_roundtrips() {
120        let id = EmbeddingId::from(42);
121        assert_eq!(id.value(), 42);
122        let json = serde_json::to_string(&id).unwrap();
123        assert_eq!(serde_json::from_str::<EmbeddingId>(&json).unwrap(), id);
124    }
125
126    #[test]
127    fn value_objects_serde() {
128        let hit = SimilarHit {
129            id: EmbeddingId(1),
130            score: 0.9,
131            kind: RecordKind::Window,
132            source_id: SourceId::from("s"),
133            timestamp_ns: 5,
134        };
135        let json = serde_json::to_string(&hit).unwrap();
136        assert_eq!(serde_json::from_str::<SimilarHit>(&json).unwrap(), hit);
137
138        let d = DriftReport {
139            room: "lab".into(),
140            baseline_version: "v1".into(),
141            distance: 0.1,
142            threshold: 0.2,
143            exceeded: false,
144        };
145        let json = serde_json::to_string(&d).unwrap();
146        assert_eq!(serde_json::from_str::<DriftReport>(&json).unwrap(), d);
147    }
148}