Skip to main content

rvcsi_ruvector/
embedding.rs

1//! Deterministic, dependency-free embedding functions for RF memory records.
2//!
3//! [`window_embedding`] turns a [`CsiWindow`] into a fixed-length
4//! [`WINDOW_EMBEDDING_DIM`]-vector regardless of subcarrier count;
5//! [`event_embedding`] turns a [`CsiEvent`] into a fixed-length
6//! [`EVENT_EMBEDDING_DIM`]-vector. [`cosine_similarity`] is the comparison
7//! metric used by the [`crate::RfMemoryStore`] implementations.
8//!
9//! All functions are pure and deterministic — the same input always yields the
10//! same bytes, with no clocks, randomness, threads or floating-point
11//! reductions whose order could vary.
12
13use rvcsi_core::{CsiEvent, CsiEventKind, CsiWindow};
14
15/// Length of a [`window_embedding`] vector.
16///
17/// Layout (all indices into the returned `Vec<f32>`):
18/// * `0..32`  — `mean_amplitude` linearly resampled to 32 bins
19/// * `32..64` — `phase_variance` linearly resampled to 32 bins
20/// * `64`     — `motion_energy`
21/// * `65`     — `presence_score`
22/// * `66`     — `quality_score`
23/// * `67`     — `ln(1 + frame_count)`
24///
25/// The whole vector is then L2-normalized (left all-zero if its norm is 0,
26/// e.g. for an empty window).
27pub const WINDOW_EMBEDDING_DIM: usize = 68;
28
29/// Length of an [`event_embedding`] vector.
30///
31/// Layout:
32/// * `0..10` — one-hot of [`CsiEventKind`] in declaration order (see
33///   [`kind_index`])
34/// * `10`    — `confidence`
35/// * `11`    — `ln(1 + evidence_window_ids.len())`
36///
37/// Event embeddings are **not** normalized (the one-hot block already gives
38/// them a stable scale).
39pub const EVENT_EMBEDDING_DIM: usize = 12;
40
41/// Number of bins each per-subcarrier vector is resampled to.
42const SUBCARRIER_BINS: usize = 32;
43
44/// Linearly resample `src` (length `n`) to length `m`.
45///
46/// * `n == 0` → `vec![0.0; m]`
47/// * `n == 1` → `vec![src[0]; m]`
48/// * otherwise, for each output index `j`: `pos = j * (n-1) / (m-1)`,
49///   `lo = floor(pos)`, `frac = pos - lo`, value `src[lo] * (1 - frac) +
50///   src[min(lo+1, n-1)] * frac`.
51fn resample_linear(src: &[f32], m: usize) -> Vec<f32> {
52    let n = src.len();
53    if n == 0 {
54        return vec![0.0; m];
55    }
56    if n == 1 {
57        return vec![src[0]; m];
58    }
59    if m == 0 {
60        return Vec::new();
61    }
62    if m == 1 {
63        // Degenerate target: just take the first sample (avoids /0 below).
64        return vec![src[0]];
65    }
66    let mut out = Vec::with_capacity(m);
67    let denom = (m - 1) as f32;
68    let span = (n - 1) as f32;
69    for j in 0..m {
70        let pos = j as f32 * span / denom;
71        let lo = pos.floor() as usize;
72        let frac = pos - lo as f32;
73        let hi = (lo + 1).min(n - 1);
74        out.push(src[lo] * (1.0 - frac) + src[hi] * frac);
75    }
76    out
77}
78
79/// L2 norm of a slice (`0.0` for an empty slice).
80fn l2_norm(v: &[f32]) -> f32 {
81    v.iter().map(|x| x * x).sum::<f32>().sqrt()
82}
83
84/// In-place L2 normalization; leaves `v` unchanged if its norm is `0` or
85/// non-finite.
86fn l2_normalize(v: &mut [f32]) {
87    let norm = l2_norm(v);
88    if norm.is_finite() && norm > 0.0 {
89        for x in v.iter_mut() {
90            *x /= norm;
91        }
92    }
93}
94
95/// Build the deterministic embedding for a [`CsiWindow`].
96///
97/// The returned vector has length [`WINDOW_EMBEDDING_DIM`]; see that constant's
98/// docs for the exact bin layout. The result is L2-normalized (or all-zero for
99/// an empty window — i.e. `subcarrier_count == 0` and `frame_count == 0`).
100pub fn window_embedding(w: &CsiWindow) -> Vec<f32> {
101    let mut out = Vec::with_capacity(WINDOW_EMBEDDING_DIM);
102    out.extend(resample_linear(&w.mean_amplitude, SUBCARRIER_BINS));
103    out.extend(resample_linear(&w.phase_variance, SUBCARRIER_BINS));
104    out.push(w.motion_energy);
105    out.push(w.presence_score);
106    out.push(w.quality_score);
107    out.push((w.frame_count as f32).ln_1p());
108    debug_assert_eq!(out.len(), WINDOW_EMBEDDING_DIM);
109    l2_normalize(&mut out);
110    out
111}
112
113/// Fixed index of a [`CsiEventKind`] in the one-hot block of an event
114/// embedding — the variant declaration order in `rvcsi_core`.
115fn kind_index(k: CsiEventKind) -> usize {
116    match k {
117        CsiEventKind::PresenceStarted => 0,
118        CsiEventKind::PresenceEnded => 1,
119        CsiEventKind::MotionDetected => 2,
120        CsiEventKind::MotionSettled => 3,
121        CsiEventKind::BaselineChanged => 4,
122        CsiEventKind::SignalQualityDropped => 5,
123        CsiEventKind::DeviceDisconnected => 6,
124        CsiEventKind::BreathingCandidate => 7,
125        CsiEventKind::AnomalyDetected => 8,
126        CsiEventKind::CalibrationRequired => 9,
127    }
128}
129
130/// Build the deterministic embedding for a [`CsiEvent`].
131///
132/// The returned vector has length [`EVENT_EMBEDDING_DIM`]; see that constant's
133/// docs for the exact layout. Not normalized.
134pub fn event_embedding(e: &CsiEvent) -> Vec<f32> {
135    let mut out = vec![0.0_f32; EVENT_EMBEDDING_DIM];
136    out[kind_index(e.kind)] = 1.0;
137    out[10] = e.confidence;
138    out[11] = (e.evidence_window_ids.len() as f32).ln_1p();
139    out
140}
141
142/// Cosine similarity of two equal-length vectors.
143///
144/// Returns `0.0` if the lengths differ or either vector is all-zero (or has a
145/// non-finite norm); otherwise `dot(a, b) / (||a|| * ||b||)` clamped to
146/// `[-1.0, 1.0]`.
147pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
148    if a.len() != b.len() || a.is_empty() {
149        return 0.0;
150    }
151    let na = l2_norm(a);
152    let nb = l2_norm(b);
153    if !(na.is_finite() && nb.is_finite()) || na == 0.0 || nb == 0.0 {
154        return 0.0;
155    }
156    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
157    (dot / (na * nb)).clamp(-1.0, 1.0)
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163    use rvcsi_core::{EventId, SessionId, SourceId, WindowId};
164
165    fn window() -> CsiWindow {
166        CsiWindow {
167            window_id: WindowId(7),
168            session_id: SessionId(1),
169            source_id: SourceId::from("emb-test"),
170            start_ns: 1_000,
171            end_ns: 2_000,
172            frame_count: 12,
173            mean_amplitude: vec![1.0, 2.0, 3.0, 4.0, 5.0],
174            phase_variance: vec![0.1, 0.2, 0.1, 0.3, 0.2],
175            motion_energy: 0.42,
176            presence_score: 0.8,
177            quality_score: 0.9,
178        }
179    }
180
181    fn event(kind: CsiEventKind) -> CsiEvent {
182        CsiEvent::new(
183            EventId(3),
184            kind,
185            SessionId(1),
186            SourceId::from("emb-test"),
187            5_000,
188            0.75,
189            vec![WindowId(1), WindowId(2)],
190        )
191    }
192
193    #[test]
194    fn resample_edge_cases() {
195        assert_eq!(resample_linear(&[], 4), vec![0.0; 4]);
196        assert_eq!(resample_linear(&[2.5], 3), vec![2.5, 2.5, 2.5]);
197        // identity-ish: 3 -> 3 keeps endpoints
198        let r = resample_linear(&[0.0, 1.0, 2.0], 3);
199        assert!((r[0] - 0.0).abs() < 1e-6);
200        assert!((r[1] - 1.0).abs() < 1e-6);
201        assert!((r[2] - 2.0).abs() < 1e-6);
202        // upsample 2 -> 5 is a straight line
203        let r = resample_linear(&[0.0, 4.0], 5);
204        assert!((r[2] - 2.0).abs() < 1e-6);
205    }
206
207    #[test]
208    fn window_embedding_is_deterministic_and_unit_length() {
209        let w = window();
210        let a = window_embedding(&w);
211        let b = window_embedding(&w);
212        assert_eq!(a, b);
213        assert_eq!(a.len(), WINDOW_EMBEDDING_DIM);
214        let norm = l2_norm(&a);
215        assert!((norm - 1.0).abs() < 1e-5, "norm was {norm}");
216    }
217
218    #[test]
219    fn empty_window_embeds_to_zero() {
220        let mut w = window();
221        w.mean_amplitude.clear();
222        w.phase_variance.clear();
223        w.motion_energy = 0.0;
224        w.presence_score = 0.0;
225        w.quality_score = 0.0;
226        w.frame_count = 0;
227        let e = window_embedding(&w);
228        assert_eq!(e.len(), WINDOW_EMBEDDING_DIM);
229        assert!(e.iter().all(|x| *x == 0.0));
230    }
231
232    #[test]
233    fn window_embedding_length_independent_of_subcarrier_count() {
234        let mut a = window();
235        a.mean_amplitude = vec![1.0; 56];
236        a.phase_variance = vec![0.1; 56];
237        let mut b = window();
238        b.mean_amplitude = vec![1.0; 234];
239        b.phase_variance = vec![0.1; 234];
240        assert_eq!(window_embedding(&a).len(), window_embedding(&b).len());
241    }
242
243    #[test]
244    fn event_embedding_layout() {
245        let e = event(CsiEventKind::MotionDetected);
246        let v = event_embedding(&e);
247        assert_eq!(v.len(), EVENT_EMBEDDING_DIM);
248        assert_eq!(v[kind_index(CsiEventKind::MotionDetected)], 1.0);
249        // exactly one hot in the first 10
250        assert_eq!(v[..10].iter().filter(|x| **x == 1.0).count(), 1);
251        assert!((v[10] - 0.75).abs() < 1e-6);
252        assert!((v[11] - (2.0_f32).ln_1p()).abs() < 1e-6);
253
254        // a different kind lights a different bin
255        let v2 = event_embedding(&event(CsiEventKind::AnomalyDetected));
256        assert_eq!(v2[kind_index(CsiEventKind::AnomalyDetected)], 1.0);
257        assert_ne!(v, v2);
258    }
259
260    #[test]
261    fn cosine_basic_identities() {
262        let v = window_embedding(&window());
263        assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-5);
264        let neg: Vec<f32> = v.iter().map(|x| -x).collect();
265        assert!((cosine_similarity(&v, &neg) + 1.0).abs() < 1e-5);
266        // mismatched lengths -> 0
267        assert_eq!(cosine_similarity(&v, &v[..3]), 0.0);
268        // all-zero -> 0
269        assert_eq!(cosine_similarity(&[0.0; 4], &[1.0; 4]), 0.0);
270        assert_eq!(cosine_similarity(&[], &[]), 0.0);
271    }
272}