lora_store/
vector.rs

1//! First-class VECTOR value type.
2//!
3//! LoraDB VECTOR values are fixed-dimension, typed numeric coordinate
4//! collections. A `LoraVector` can be stored directly as a node or
5//! relationship property, returned through every binding, compared for
6//! equality, and used as input to the built-in vector math functions
7//! (`vector.similarity.cosine`, `vector.similarity.euclidean`,
8//! `vector_distance`, `vector_norm`, `vector_dimension_count`,
9//! `toIntegerList`, `toFloatList`).
10//!
11//! Vector indexes and approximate kNN are intentionally out of scope for
12//! this pass — exhaustive search via `ORDER BY vector.similarity.*(…)
13//! LIMIT k` works today; an index-backed variant is future work.
14
15use std::fmt;
16
17/// Maximum dimension accepted by LoraDB's `vector(...)` constructor.
18pub const MAX_VECTOR_DIMENSION: usize = 4096;
19
20/// Canonical coordinate type for a vector.
21///
22/// The external tag names (`FLOAT64`, `FLOAT32`, `INTEGER`, `INTEGER32`,
23/// `INTEGER16`, `INTEGER8`) are the serialization labels used by every
24/// binding. Aliases (`FLOAT`, `INT`, `INT64`, `INTEGER64`, `INT32`,
25/// `INT16`, `INT8`, `SIGNED INTEGER`) resolve to these canonical variants
26/// at construction time and are not reported back in output.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
28pub enum VectorCoordinateType {
29    Float64,
30    Float32,
31    Integer64,
32    Integer32,
33    Integer16,
34    Integer8,
35}
36
37impl VectorCoordinateType {
38    /// Canonical label emitted on the wire (tagged value `coordinateType`
39    /// field). Lowercase aliases and the multi-word `SIGNED INTEGER`
40    /// alias are accepted on input via [`parse`](Self::parse), but the
41    /// output is always one of these six tags.
42    pub fn as_str(self) -> &'static str {
43        match self {
44            VectorCoordinateType::Float64 => "FLOAT64",
45            VectorCoordinateType::Float32 => "FLOAT32",
46            VectorCoordinateType::Integer64 => "INTEGER",
47            VectorCoordinateType::Integer32 => "INTEGER32",
48            VectorCoordinateType::Integer16 => "INTEGER16",
49            VectorCoordinateType::Integer8 => "INTEGER8",
50        }
51    }
52
53    /// Parse a coordinate type from a user-supplied string. Accepts every
54    /// alias documented in `vector()` / binding helpers; returns `None`
55    /// when the name is unrecognised. Comparison is case-insensitive and
56    /// collapses runs of whitespace so `SIGNED INTEGER` and `signed
57    /// integer` both resolve.
58    pub fn parse(name: &str) -> Option<Self> {
59        let collapsed: String = name
60            .split_whitespace()
61            .collect::<Vec<_>>()
62            .join(" ")
63            .to_ascii_uppercase();
64        match collapsed.as_str() {
65            // `FLOAT` and `FLOAT64` are the two spellings the public
66            // `vector()` syntax accepts. `DOUBLE` is not part of the
67            // public surface; we reject it so typos surface as a clear
68            // "unknown coordinate type" instead of silently mapping to
69            // FLOAT64.
70            "FLOAT" | "FLOAT64" => Some(VectorCoordinateType::Float64),
71            "FLOAT32" => Some(VectorCoordinateType::Float32),
72            "INTEGER" | "INT" | "INT64" | "INTEGER64" | "SIGNED INTEGER" => {
73                Some(VectorCoordinateType::Integer64)
74            }
75            "INTEGER32" | "INT32" => Some(VectorCoordinateType::Integer32),
76            "INTEGER16" | "INT16" => Some(VectorCoordinateType::Integer16),
77            "INTEGER8" | "INT8" => Some(VectorCoordinateType::Integer8),
78            _ => None,
79        }
80    }
81
82    /// True for `FLOAT` / `FLOAT32` / `FLOAT64`.
83    pub fn is_float(self) -> bool {
84        matches!(
85            self,
86            VectorCoordinateType::Float64 | VectorCoordinateType::Float32
87        )
88    }
89}
90
91/// Internal storage for a vector. One variant per supported coordinate
92/// type; dimension is implicit in the inner `Vec`'s length.
93#[derive(Debug, Clone, PartialEq)]
94pub enum VectorValues {
95    Float64(Vec<f64>),
96    Float32(Vec<f32>),
97    Integer64(Vec<i64>),
98    Integer32(Vec<i32>),
99    Integer16(Vec<i16>),
100    Integer8(Vec<i8>),
101}
102
103impl VectorValues {
104    pub fn coordinate_type(&self) -> VectorCoordinateType {
105        match self {
106            VectorValues::Float64(_) => VectorCoordinateType::Float64,
107            VectorValues::Float32(_) => VectorCoordinateType::Float32,
108            VectorValues::Integer64(_) => VectorCoordinateType::Integer64,
109            VectorValues::Integer32(_) => VectorCoordinateType::Integer32,
110            VectorValues::Integer16(_) => VectorCoordinateType::Integer16,
111            VectorValues::Integer8(_) => VectorCoordinateType::Integer8,
112        }
113    }
114
115    pub fn len(&self) -> usize {
116        match self {
117            VectorValues::Float64(v) => v.len(),
118            VectorValues::Float32(v) => v.len(),
119            VectorValues::Integer64(v) => v.len(),
120            VectorValues::Integer32(v) => v.len(),
121            VectorValues::Integer16(v) => v.len(),
122            VectorValues::Integer8(v) => v.len(),
123        }
124    }
125
126    pub fn is_empty(&self) -> bool {
127        self.len() == 0
128    }
129
130    /// Lossless conversion of every coordinate to `f64`. Used by every
131    /// vector-math function so the implementations can share one
132    /// f32-precision accumulator irrespective of the underlying storage.
133    pub fn as_f64_vec(&self) -> Vec<f64> {
134        match self {
135            VectorValues::Float64(v) => v.clone(),
136            VectorValues::Float32(v) => v.iter().map(|x| *x as f64).collect(),
137            VectorValues::Integer64(v) => v.iter().map(|x| *x as f64).collect(),
138            VectorValues::Integer32(v) => v.iter().map(|x| *x as f64).collect(),
139            VectorValues::Integer16(v) => v.iter().map(|x| *x as f64).collect(),
140            VectorValues::Integer8(v) => v.iter().map(|x| *x as f64).collect(),
141        }
142    }
143
144    /// Convert every coordinate to `i64`, truncating fractional parts for
145    /// float-backed vectors. Matches the semantics required by
146    /// `toIntegerList(vector)`.
147    pub fn to_i64_vec(&self) -> Vec<i64> {
148        match self {
149            VectorValues::Float64(v) => v.iter().map(|x| *x as i64).collect(),
150            VectorValues::Float32(v) => v.iter().map(|x| *x as i64).collect(),
151            VectorValues::Integer64(v) => v.clone(),
152            VectorValues::Integer32(v) => v.iter().map(|x| *x as i64).collect(),
153            VectorValues::Integer16(v) => v.iter().map(|x| *x as i64).collect(),
154            VectorValues::Integer8(v) => v.iter().map(|x| *x as i64).collect(),
155        }
156    }
157}
158
159/// A first-class VECTOR value.
160#[derive(Debug, Clone, PartialEq)]
161pub struct LoraVector {
162    pub dimension: usize,
163    pub values: VectorValues,
164}
165
166impl LoraVector {
167    /// Total-order comparison key. Sorting vectors is mostly meaningful
168    /// for tie-breaking inside `ORDER BY` — the key orders first by
169    /// coordinate type tag, then by dimension, then by the coordinates
170    /// rendered as `f64` (matches `as_f64_vec`). Callers that need a
171    /// stable key for DISTINCT/grouping should use `to_key_string`.
172    pub fn coordinate_type(&self) -> VectorCoordinateType {
173        self.values.coordinate_type()
174    }
175
176    /// Canonical string form used for grouping / DISTINCT / UNION keys,
177    /// and for the fallback sort comparator. Not meant for user display.
178    pub fn to_key_string(&self) -> String {
179        let mut out = String::new();
180        out.push_str(self.coordinate_type().as_str());
181        out.push('|');
182        out.push_str(&self.dimension.to_string());
183        out.push('|');
184        let vals = self.values.as_f64_vec();
185        for (i, v) in vals.iter().enumerate() {
186            if i > 0 {
187                out.push(',');
188            }
189            // Use `{:?}` so NaN is encoded distinctly from ±Inf — mirrors
190            // the strategy used by GroupValueKey for `LoraValue::Float`.
191            out.push_str(&format!("{v:?}"));
192        }
193        out
194    }
195}
196
197impl fmt::Display for LoraVector {
198    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
199        write!(f, "vector(")?;
200        f.write_str("[")?;
201        let values = self.values.as_f64_vec();
202        for (i, v) in values.iter().enumerate() {
203            if i > 0 {
204                f.write_str(", ")?;
205            }
206            if self.coordinate_type().is_float() {
207                write!(f, "{v}")?;
208            } else {
209                write!(f, "{}", *v as i64)?;
210            }
211        }
212        f.write_str("], ")?;
213        write!(
214            f,
215            "{}, {})",
216            self.dimension,
217            self.coordinate_type().as_str()
218        )
219    }
220}
221
222// ---------------------------------------------------------------------------
223// Construction
224// ---------------------------------------------------------------------------
225
226/// Error returned by [`LoraVector::try_new`]. Kept as a concrete enum so
227/// the executor can render a single-line error message without inspecting
228/// the underlying cause.
229#[derive(Debug, Clone, PartialEq)]
230pub enum VectorBuildError {
231    InvalidDimension(i64),
232    DimensionMismatch {
233        expected: usize,
234        got: usize,
235    },
236    NestedListNotAllowed,
237    NonNumericCoordinate(String),
238    NonFiniteCoordinate,
239    OutOfRange {
240        coordinate_type: VectorCoordinateType,
241        value: String,
242    },
243    UnknownCoordinateType(String),
244}
245
246impl fmt::Display for VectorBuildError {
247    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248        match self {
249            VectorBuildError::InvalidDimension(d) => {
250                write!(
251                    f,
252                    "vector dimension must be between 1 and {MAX_VECTOR_DIMENSION}, got {d}"
253                )
254            }
255            VectorBuildError::DimensionMismatch { expected, got } => write!(
256                f,
257                "vector value length {got} does not match declared dimension {expected}"
258            ),
259            VectorBuildError::NestedListNotAllowed => {
260                write!(f, "vector coordinates cannot contain nested lists")
261            }
262            VectorBuildError::NonNumericCoordinate(kind) => {
263                write!(f, "vector coordinates must be numeric, got {kind}")
264            }
265            VectorBuildError::NonFiniteCoordinate => {
266                write!(f, "vector coordinates cannot be NaN or Infinity")
267            }
268            VectorBuildError::OutOfRange {
269                coordinate_type,
270                value,
271            } => write!(
272                f,
273                "value {value} is out of range for coordinate type {}",
274                coordinate_type.as_str()
275            ),
276            VectorBuildError::UnknownCoordinateType(name) => {
277                write!(f, "unknown vector coordinate type '{name}'")
278            }
279        }
280    }
281}
282
283impl std::error::Error for VectorBuildError {}
284
285/// Raw numeric input for one coordinate before it has been coerced into
286/// the destination coordinate type. Executors / binding layers feed
287/// values through this enum so the coercion rules live in one place.
288#[derive(Debug, Clone, Copy)]
289pub enum RawCoordinate {
290    Int(i64),
291    Float(f64),
292}
293
294impl RawCoordinate {
295    fn as_f64(self) -> f64 {
296        match self {
297            RawCoordinate::Int(v) => v as f64,
298            RawCoordinate::Float(v) => v,
299        }
300    }
301}
302
303impl LoraVector {
304    /// Build a vector from raw numeric coordinates, applying validation
305    /// and coordinate-type coercion. Single entry point used by both
306    /// `vector()` in Cypher and the binding-side constructors.
307    pub fn try_new(
308        raw: Vec<RawCoordinate>,
309        dimension: i64,
310        coordinate_type: VectorCoordinateType,
311    ) -> Result<Self, VectorBuildError> {
312        if dimension <= 0 || dimension as usize > MAX_VECTOR_DIMENSION {
313            return Err(VectorBuildError::InvalidDimension(dimension));
314        }
315        let dim = dimension as usize;
316        if raw.len() != dim {
317            return Err(VectorBuildError::DimensionMismatch {
318                expected: dim,
319                got: raw.len(),
320            });
321        }
322
323        for c in &raw {
324            if let RawCoordinate::Float(v) = c {
325                if !v.is_finite() {
326                    return Err(VectorBuildError::NonFiniteCoordinate);
327                }
328            }
329        }
330
331        let values = match coordinate_type {
332            VectorCoordinateType::Float64 => {
333                VectorValues::Float64(raw.iter().map(|c| c.as_f64()).collect())
334            }
335            VectorCoordinateType::Float32 => {
336                let mut out = Vec::with_capacity(dim);
337                for c in &raw {
338                    let v = c.as_f64();
339                    if v.abs() > f32::MAX as f64 {
340                        return Err(VectorBuildError::OutOfRange {
341                            coordinate_type,
342                            value: format!("{v}"),
343                        });
344                    }
345                    out.push(v as f32);
346                }
347                VectorValues::Float32(out)
348            }
349            VectorCoordinateType::Integer64 => {
350                let mut out = Vec::with_capacity(dim);
351                for c in &raw {
352                    out.push(coerce_to_int::<i64>(*c, coordinate_type)?);
353                }
354                VectorValues::Integer64(out)
355            }
356            VectorCoordinateType::Integer32 => {
357                let mut out = Vec::with_capacity(dim);
358                for c in &raw {
359                    out.push(coerce_to_int::<i32>(*c, coordinate_type)?);
360                }
361                VectorValues::Integer32(out)
362            }
363            VectorCoordinateType::Integer16 => {
364                let mut out = Vec::with_capacity(dim);
365                for c in &raw {
366                    out.push(coerce_to_int::<i16>(*c, coordinate_type)?);
367                }
368                VectorValues::Integer16(out)
369            }
370            VectorCoordinateType::Integer8 => {
371                let mut out = Vec::with_capacity(dim);
372                for c in &raw {
373                    out.push(coerce_to_int::<i8>(*c, coordinate_type)?);
374                }
375                VectorValues::Integer8(out)
376            }
377        };
378
379        Ok(LoraVector {
380            dimension: dim,
381            values,
382        })
383    }
384}
385
386/// Private helper: coerce a raw numeric coordinate into a specific signed
387/// integer target. Float inputs truncate toward zero per LoraDB vector
388/// coercion semantics; the result must fit in the target type or we
389/// raise `OutOfRange`.
390fn coerce_to_int<T>(
391    raw: RawCoordinate,
392    coordinate_type: VectorCoordinateType,
393) -> Result<T, VectorBuildError>
394where
395    T: TryFrom<i64> + Copy,
396{
397    let as_i64 = match raw {
398        RawCoordinate::Int(v) => v,
399        RawCoordinate::Float(v) => {
400            // `as i64` saturates on out-of-range floats, which would mask
401            // overflow — do the check explicitly against the range of
402            // i64 before truncating.
403            if v > i64::MAX as f64 || v < i64::MIN as f64 {
404                return Err(VectorBuildError::OutOfRange {
405                    coordinate_type,
406                    value: format!("{v}"),
407                });
408            }
409            v.trunc() as i64
410        }
411    };
412
413    T::try_from(as_i64).map_err(|_| VectorBuildError::OutOfRange {
414        coordinate_type,
415        value: as_i64.to_string(),
416    })
417}
418
419/// Parse a string-form coordinate list, e.g. `"[1.05e+00, 0.123, 5]"`.
420/// Used by `vector()` when `vectorValue` is a STRING.
421pub fn parse_string_values(input: &str) -> Result<Vec<RawCoordinate>, VectorBuildError> {
422    let trimmed = input.trim();
423    if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
424        return Err(VectorBuildError::NonNumericCoordinate(
425            "string must start with '[' and end with ']'".to_string(),
426        ));
427    }
428    let body = &trimmed[1..trimmed.len() - 1];
429    if body.trim().is_empty() {
430        return Ok(Vec::new());
431    }
432
433    let mut out = Vec::new();
434    for part in body.split(',') {
435        let token = part.trim();
436        if token.is_empty() {
437            return Err(VectorBuildError::NonNumericCoordinate(
438                "empty list entry".to_string(),
439            ));
440        }
441
442        // Accept integer-looking tokens as Int so integer coordinate
443        // types never go through float truncation unnecessarily.
444        if let Ok(i) = token.parse::<i64>() {
445            out.push(RawCoordinate::Int(i));
446            continue;
447        }
448        match token.parse::<f64>() {
449            Ok(f) if f.is_finite() => out.push(RawCoordinate::Float(f)),
450            Ok(_) => return Err(VectorBuildError::NonFiniteCoordinate),
451            Err(_) => {
452                return Err(VectorBuildError::NonNumericCoordinate(format!(
453                    "cannot parse '{token}'"
454                )))
455            }
456        }
457    }
458    Ok(out)
459}
460
461// ---------------------------------------------------------------------------
462// Vector math
463// ---------------------------------------------------------------------------
464
465/// Return Some(value) if both vectors have the same dimension; None if
466/// they don't. Callers route the None branch to a query error so that
467/// `vector_distance` / `vector.similarity.*` never silently return a
468/// bogus number.
469fn check_same_dim(a: &LoraVector, b: &LoraVector) -> Option<usize> {
470    if a.dimension == b.dimension {
471        Some(a.dimension)
472    } else {
473        None
474    }
475}
476
477/// Raw cosine similarity in the range [-1, 1]. Returns `None` when
478/// either vector has zero norm, since cosine is undefined in that case.
479pub fn cosine_similarity_raw(a: &LoraVector, b: &LoraVector) -> Option<f64> {
480    check_same_dim(a, b)?;
481    // Use f32 arithmetic for LoraDB's vector similarity implementation,
482    // then widen back to f64 for the result.
483    let av: Vec<f32> = a
484        .values
485        .as_f64_vec()
486        .into_iter()
487        .map(|x| x as f32)
488        .collect();
489    let bv: Vec<f32> = b
490        .values
491        .as_f64_vec()
492        .into_iter()
493        .map(|x| x as f32)
494        .collect();
495    let mut dot = 0f32;
496    let mut na = 0f32;
497    let mut nb = 0f32;
498    for (x, y) in av.iter().zip(bv.iter()) {
499        dot += x * y;
500        na += x * x;
501        nb += y * y;
502    }
503    if na == 0.0 || nb == 0.0 {
504        return None;
505    }
506    let denom = na.sqrt() * nb.sqrt();
507    if denom == 0.0 {
508        return None;
509    }
510    Some((dot / denom) as f64)
511}
512
513/// Cosine similarity squashed into [0, 1]. Matches the documented
514/// `vector.similarity.cosine` behaviour.
515pub fn cosine_similarity_bounded(a: &LoraVector, b: &LoraVector) -> Option<f64> {
516    cosine_similarity_raw(a, b).map(|raw| ((raw + 1.0) / 2.0).clamp(0.0, 1.0))
517}
518
519/// Squared Euclidean distance (sum of squared differences). Uses f32
520/// arithmetic to match LoraDB's vector function implementation.
521pub fn euclidean_distance_squared(a: &LoraVector, b: &LoraVector) -> Option<f64> {
522    check_same_dim(a, b)?;
523    let av: Vec<f32> = a
524        .values
525        .as_f64_vec()
526        .into_iter()
527        .map(|x| x as f32)
528        .collect();
529    let bv: Vec<f32> = b
530        .values
531        .as_f64_vec()
532        .into_iter()
533        .map(|x| x as f32)
534        .collect();
535    let mut sum = 0f32;
536    for (x, y) in av.iter().zip(bv.iter()) {
537        let d = x - y;
538        sum += d * d;
539    }
540    Some(sum as f64)
541}
542
543/// Euclidean (L2) distance.
544pub fn euclidean_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
545    euclidean_distance_squared(a, b).map(f64::sqrt)
546}
547
548/// Manhattan (L1) distance.
549pub fn manhattan_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
550    check_same_dim(a, b)?;
551    let av = a.values.as_f64_vec();
552    let bv = b.values.as_f64_vec();
553    let mut sum = 0f32;
554    for (x, y) in av.iter().zip(bv.iter()) {
555        sum += ((*x as f32) - (*y as f32)).abs();
556    }
557    Some(sum as f64)
558}
559
560/// Hamming distance: count of positions where the two vectors differ.
561pub fn hamming_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
562    check_same_dim(a, b)?;
563    let av = a.values.as_f64_vec();
564    let bv = b.values.as_f64_vec();
565    let mut count = 0i64;
566    for (x, y) in av.iter().zip(bv.iter()) {
567        if (*x as f32) != (*y as f32) {
568            count += 1;
569        }
570    }
571    Some(count as f64)
572}
573
574/// Dot product (f32 arithmetic, widened back to f64).
575pub fn dot_product(a: &LoraVector, b: &LoraVector) -> Option<f64> {
576    check_same_dim(a, b)?;
577    let av = a.values.as_f64_vec();
578    let bv = b.values.as_f64_vec();
579    let mut acc = 0f32;
580    for (x, y) in av.iter().zip(bv.iter()) {
581        acc += (*x as f32) * (*y as f32);
582    }
583    Some(acc as f64)
584}
585
586/// Euclidean (L2) norm.
587pub fn euclidean_norm(v: &LoraVector) -> f64 {
588    let values = v.values.as_f64_vec();
589    let mut sum = 0f32;
590    for x in &values {
591        let x32 = *x as f32;
592        sum += x32 * x32;
593    }
594    (sum.sqrt()) as f64
595}
596
597/// Manhattan (L1) norm.
598pub fn manhattan_norm(v: &LoraVector) -> f64 {
599    let values = v.values.as_f64_vec();
600    let mut sum = 0f32;
601    for x in &values {
602        sum += (*x as f32).abs();
603    }
604    sum as f64
605}
606
607/// Similarity score derived from squared Euclidean distance: `1 / (1 +
608/// d²)`. For the documented example where `distance² == 22`, this
609/// yields `1 / 23 ≈ 0.043478`.
610pub fn euclidean_similarity(a: &LoraVector, b: &LoraVector) -> Option<f64> {
611    euclidean_distance_squared(a, b).map(|d2| 1.0 / (1.0 + d2))
612}
613
614// ---------------------------------------------------------------------------
615// Tests
616// ---------------------------------------------------------------------------
617
618#[cfg(test)]
619mod tests {
620    use super::*;
621
622    #[test]
623    fn parse_coordinate_type_accepts_aliases() {
624        assert_eq!(
625            VectorCoordinateType::parse("INTEGER"),
626            Some(VectorCoordinateType::Integer64)
627        );
628        assert_eq!(
629            VectorCoordinateType::parse("int64"),
630            Some(VectorCoordinateType::Integer64)
631        );
632        assert_eq!(
633            VectorCoordinateType::parse("signed integer"),
634            Some(VectorCoordinateType::Integer64)
635        );
636        assert_eq!(
637            VectorCoordinateType::parse("  SIGNED    INTEGER "),
638            Some(VectorCoordinateType::Integer64)
639        );
640        assert_eq!(
641            VectorCoordinateType::parse("FLOAT"),
642            Some(VectorCoordinateType::Float64)
643        );
644        assert_eq!(
645            VectorCoordinateType::parse("float32"),
646            Some(VectorCoordinateType::Float32)
647        );
648        assert_eq!(VectorCoordinateType::parse("bogus"), None);
649    }
650
651    #[test]
652    fn try_new_rejects_zero_dim() {
653        let err = LoraVector::try_new(vec![], 0, VectorCoordinateType::Float64).unwrap_err();
654        assert!(matches!(err, VectorBuildError::InvalidDimension(0)));
655    }
656
657    #[test]
658    fn try_new_rejects_over_max_dim() {
659        let err = LoraVector::try_new(
660            vec![RawCoordinate::Int(1); 1],
661            (MAX_VECTOR_DIMENSION + 1) as i64,
662            VectorCoordinateType::Float64,
663        )
664        .unwrap_err();
665        assert!(matches!(err, VectorBuildError::InvalidDimension(_)));
666    }
667
668    #[test]
669    fn try_new_rejects_dimension_mismatch() {
670        let err = LoraVector::try_new(
671            vec![RawCoordinate::Int(1)],
672            2,
673            VectorCoordinateType::Integer64,
674        )
675        .unwrap_err();
676        assert!(matches!(
677            err,
678            VectorBuildError::DimensionMismatch {
679                expected: 2,
680                got: 1
681            }
682        ));
683    }
684
685    #[test]
686    fn int8_overflow_errors() {
687        let err = LoraVector::try_new(
688            vec![RawCoordinate::Int(128)],
689            1,
690            VectorCoordinateType::Integer8,
691        )
692        .unwrap_err();
693        assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
694    }
695
696    #[test]
697    fn float_to_int_truncates() {
698        let v = LoraVector::try_new(
699            vec![RawCoordinate::Float(1.9), RawCoordinate::Float(-1.9)],
700            2,
701            VectorCoordinateType::Integer64,
702        )
703        .unwrap();
704        match v.values {
705            VectorValues::Integer64(ref values) => assert_eq!(values, &[1, -1]),
706            _ => panic!("expected Integer64"),
707        }
708    }
709
710    #[test]
711    fn int_to_float_is_allowed() {
712        let v = LoraVector::try_new(
713            vec![RawCoordinate::Int(3), RawCoordinate::Int(4)],
714            2,
715            VectorCoordinateType::Float32,
716        )
717        .unwrap();
718        assert_eq!(v.values, VectorValues::Float32(vec![3.0, 4.0]));
719    }
720
721    #[test]
722    fn parse_string_values_handles_scientific() {
723        let parsed = parse_string_values("[1.05e+00, 0.123, 5]").unwrap();
724        assert_eq!(parsed.len(), 3);
725        match parsed[0] {
726            RawCoordinate::Float(f) => assert!((f - 1.05).abs() < 1e-9),
727            _ => panic!("expected float"),
728        }
729        match parsed[2] {
730            RawCoordinate::Int(i) => assert_eq!(i, 5),
731            _ => panic!("expected int"),
732        }
733    }
734
735    #[test]
736    fn cosine_similarity_is_bounded() {
737        let a = LoraVector::try_new(
738            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
739            2,
740            VectorCoordinateType::Float32,
741        )
742        .unwrap();
743        let b = LoraVector::try_new(
744            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
745            2,
746            VectorCoordinateType::Float32,
747        )
748        .unwrap();
749        let sim = cosine_similarity_bounded(&a, &b).unwrap();
750        assert!((sim - 1.0).abs() < 1e-6);
751    }
752
753    #[test]
754    fn euclidean_similarity_matches_documented_example() {
755        // Documented Euclidean similarity example:
756        // d^2 = (4-2)^2 + (5-8)^2 + (6-3)^2 = 22
757        // similarity = 1 / (1 + 22) ≈ 0.0434782
758        let a = LoraVector::try_new(
759            vec![
760                RawCoordinate::Float(4.0),
761                RawCoordinate::Float(5.0),
762                RawCoordinate::Float(6.0),
763            ],
764            3,
765            VectorCoordinateType::Float32,
766        )
767        .unwrap();
768        let b = LoraVector::try_new(
769            vec![
770                RawCoordinate::Float(2.0),
771                RawCoordinate::Float(8.0),
772                RawCoordinate::Float(3.0),
773            ],
774            3,
775            VectorCoordinateType::Float32,
776        )
777        .unwrap();
778        let sim = euclidean_similarity(&a, &b).unwrap();
779        assert!((sim - (1.0 / 23.0)).abs() < 1e-6, "got {sim}");
780    }
781
782    // ----------------------------------------------------------------------
783    // Coordinate type alias coverage
784    // ----------------------------------------------------------------------
785
786    /// Small deterministic table mapping every accepted input form to its
787    /// canonical variant. Keeps the alias list here exhaustive so adding a
788    /// new alias needs a corresponding table row.
789    #[test]
790    fn parse_coordinate_type_every_alias() {
791        use VectorCoordinateType::*;
792        let cases: &[(&str, VectorCoordinateType)] = &[
793            ("FLOAT", Float64),
794            ("Float", Float64),
795            ("float", Float64),
796            ("FLOAT64", Float64),
797            ("float64", Float64),
798            ("FLOAT32", Float32),
799            ("float32", Float32),
800            ("INTEGER", Integer64),
801            ("Integer", Integer64),
802            ("integer", Integer64),
803            ("INT", Integer64),
804            ("int", Integer64),
805            ("INT64", Integer64),
806            ("int64", Integer64),
807            ("INTEGER64", Integer64),
808            ("SIGNED INTEGER", Integer64),
809            ("signed integer", Integer64),
810            ("Signed  Integer", Integer64),
811            ("INTEGER32", Integer32),
812            ("int32", Integer32),
813            ("INT32", Integer32),
814            ("INTEGER16", Integer16),
815            ("INT16", Integer16),
816            ("int16", Integer16),
817            ("INTEGER8", Integer8),
818            ("INT8", Integer8),
819            ("int8", Integer8),
820        ];
821        for (input, expected) in cases {
822            assert_eq!(
823                VectorCoordinateType::parse(input),
824                Some(*expected),
825                "failed for input {input:?}"
826            );
827        }
828    }
829
830    #[test]
831    fn parse_coordinate_type_rejects_unsupported_aliases() {
832        for bogus in [
833            "DOUBLE",
834            "double",
835            "REAL",
836            "NUMBER",
837            "BIGINT",
838            "INT128",
839            "FLOAT128",
840            "UINT8",
841            "UNSIGNED INTEGER",
842            "BIT",
843            "",
844        ] {
845            assert_eq!(
846                VectorCoordinateType::parse(bogus),
847                None,
848                "should reject {bogus:?}"
849            );
850        }
851    }
852
853    #[test]
854    fn parse_coordinate_type_is_whitespace_tolerant() {
855        assert_eq!(
856            VectorCoordinateType::parse("\tinteger\n"),
857            Some(VectorCoordinateType::Integer64)
858        );
859        assert_eq!(
860            VectorCoordinateType::parse("   INTEGER   "),
861            Some(VectorCoordinateType::Integer64)
862        );
863    }
864
865    // ----------------------------------------------------------------------
866    // parse_string_values
867    // ----------------------------------------------------------------------
868
869    fn unwrap_float(raw: RawCoordinate) -> f64 {
870        match raw {
871            RawCoordinate::Float(f) => f,
872            RawCoordinate::Int(i) => i as f64,
873        }
874    }
875
876    fn unwrap_int(raw: RawCoordinate) -> i64 {
877        match raw {
878            RawCoordinate::Int(i) => i,
879            RawCoordinate::Float(f) => panic!("expected Int, got Float({f})"),
880        }
881    }
882
883    #[test]
884    fn parse_string_values_accepts_negatives_and_whitespace() {
885        let parsed = parse_string_values("  [ -1,  -2.5 ,   3 , -4.0e-2 ]  ").unwrap();
886        assert_eq!(unwrap_int(parsed[0]), -1);
887        assert!((unwrap_float(parsed[1]) + 2.5).abs() < 1e-9);
888        assert_eq!(unwrap_int(parsed[2]), 3);
889        assert!((unwrap_float(parsed[3]) + 0.04).abs() < 1e-12);
890    }
891
892    #[test]
893    fn parse_string_values_accepts_signed_exponents() {
894        let parsed = parse_string_values("[1e+10, 1e-10, -2.5e+3]").unwrap();
895        assert!((unwrap_float(parsed[0]) - 1e10).abs() < 1.0);
896        assert!((unwrap_float(parsed[1]) - 1e-10).abs() < 1e-20);
897        assert!((unwrap_float(parsed[2]) + 2500.0).abs() < 1e-9);
898    }
899
900    #[test]
901    fn parse_string_values_accepts_empty_brackets() {
902        let parsed = parse_string_values("[]").unwrap();
903        assert!(parsed.is_empty());
904    }
905
906    #[test]
907    fn parse_string_values_rejects_missing_brackets() {
908        assert!(parse_string_values("1, 2, 3").is_err());
909        assert!(parse_string_values("[1, 2, 3").is_err());
910        assert!(parse_string_values("1, 2, 3]").is_err());
911    }
912
913    #[test]
914    fn parse_string_values_rejects_empty_entries() {
915        assert!(parse_string_values("[1, , 3]").is_err());
916        assert!(parse_string_values("[,1,2]").is_err());
917        assert!(parse_string_values("[1,2,]").is_err());
918        assert!(parse_string_values("[ , ]").is_err());
919    }
920
921    #[test]
922    fn parse_string_values_rejects_non_numeric_tokens() {
923        assert!(parse_string_values("[1, abc, 3]").is_err());
924        assert!(parse_string_values("[true, false]").is_err());
925        assert!(parse_string_values("[\"1\", \"2\"]").is_err());
926    }
927
928    #[test]
929    fn parse_string_values_rejects_non_finite() {
930        for bad in ["[NaN]", "[Infinity]", "[-Infinity]", "[1, NaN, 3]"] {
931            assert!(parse_string_values(bad).is_err(), "should reject {bad:?}");
932        }
933    }
934
935    // ----------------------------------------------------------------------
936    // Dimension boundaries
937    // ----------------------------------------------------------------------
938
939    #[test]
940    fn try_new_accepts_exactly_max_dimension() {
941        let raw = vec![RawCoordinate::Int(0); MAX_VECTOR_DIMENSION];
942        let v = LoraVector::try_new(
943            raw,
944            MAX_VECTOR_DIMENSION as i64,
945            VectorCoordinateType::Integer8,
946        )
947        .expect("4096 should be accepted");
948        assert_eq!(v.dimension, MAX_VECTOR_DIMENSION);
949    }
950
951    #[test]
952    fn try_new_rejects_max_plus_one_dimension() {
953        let err = LoraVector::try_new(
954            vec![RawCoordinate::Int(0); MAX_VECTOR_DIMENSION + 1],
955            (MAX_VECTOR_DIMENSION + 1) as i64,
956            VectorCoordinateType::Integer8,
957        )
958        .unwrap_err();
959        assert!(matches!(err, VectorBuildError::InvalidDimension(_)));
960    }
961
962    #[test]
963    fn try_new_rejects_negative_dimension() {
964        let err = LoraVector::try_new(vec![], -1, VectorCoordinateType::Integer64).unwrap_err();
965        assert!(matches!(err, VectorBuildError::InvalidDimension(-1)));
966    }
967
968    // ----------------------------------------------------------------------
969    // Integer min/max boundaries and overflow
970    // ----------------------------------------------------------------------
971
972    /// Table-driven min/max test: each entry supplies the coordinate type
973    /// plus the min/max value that should fit and the just-out-of-range
974    /// values that must overflow.
975    #[test]
976    fn integer_boundaries_round_trip() {
977        let cases: &[(VectorCoordinateType, i64, i64, i64, i64)] = &[
978            // (type,                        min,                    max,                    under,            over)
979            (
980                VectorCoordinateType::Integer8,
981                i8::MIN as i64,
982                i8::MAX as i64,
983                i8::MIN as i64 - 1,
984                i8::MAX as i64 + 1,
985            ),
986            (
987                VectorCoordinateType::Integer16,
988                i16::MIN as i64,
989                i16::MAX as i64,
990                i16::MIN as i64 - 1,
991                i16::MAX as i64 + 1,
992            ),
993            (
994                VectorCoordinateType::Integer32,
995                i32::MIN as i64,
996                i32::MAX as i64,
997                i32::MIN as i64 - 1,
998                i32::MAX as i64 + 1,
999            ),
1000            (VectorCoordinateType::Integer64, i64::MIN, i64::MAX, 0, 0),
1001        ];
1002        for (ty, min, max, under, over) in cases {
1003            // min and max should succeed.
1004            LoraVector::try_new(vec![RawCoordinate::Int(*min)], 1, *ty)
1005                .unwrap_or_else(|e| panic!("{ty:?} min rejected: {e}"));
1006            LoraVector::try_new(vec![RawCoordinate::Int(*max)], 1, *ty)
1007                .unwrap_or_else(|e| panic!("{ty:?} max rejected: {e}"));
1008
1009            // Integer64 has no out-of-range at the i64 level — skip.
1010            if *ty == VectorCoordinateType::Integer64 {
1011                continue;
1012            }
1013
1014            let e = LoraVector::try_new(vec![RawCoordinate::Int(*under)], 1, *ty).unwrap_err();
1015            assert!(matches!(e, VectorBuildError::OutOfRange { .. }));
1016            let e = LoraVector::try_new(vec![RawCoordinate::Int(*over)], 1, *ty).unwrap_err();
1017            assert!(matches!(e, VectorBuildError::OutOfRange { .. }));
1018        }
1019    }
1020
1021    #[test]
1022    fn float32_overflow_errors() {
1023        // A value that fits comfortably in f64 but overflows f32's max.
1024        let huge = (f32::MAX as f64) * 10.0;
1025        let err = LoraVector::try_new(
1026            vec![RawCoordinate::Float(huge)],
1027            1,
1028            VectorCoordinateType::Float32,
1029        )
1030        .unwrap_err();
1031        assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
1032    }
1033
1034    #[test]
1035    fn float_to_int_truncates_toward_zero() {
1036        // Both 1.9 and -1.9 truncate toward 0, not toward -inf.
1037        let v = LoraVector::try_new(
1038            vec![
1039                RawCoordinate::Float(1.9),
1040                RawCoordinate::Float(-1.9),
1041                RawCoordinate::Float(0.999),
1042                RawCoordinate::Float(-0.999),
1043            ],
1044            4,
1045            VectorCoordinateType::Integer8,
1046        )
1047        .unwrap();
1048        match v.values {
1049            VectorValues::Integer8(ref values) => assert_eq!(values, &[1i8, -1, 0, 0]),
1050            _ => panic!("expected Integer8"),
1051        }
1052    }
1053
1054    #[test]
1055    fn float_out_of_range_i64_errors() {
1056        // An f64 well outside i64's range must error, not saturate.
1057        let err = LoraVector::try_new(
1058            vec![RawCoordinate::Float(f64::MAX)],
1059            1,
1060            VectorCoordinateType::Integer64,
1061        )
1062        .unwrap_err();
1063        assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
1064    }
1065
1066    #[test]
1067    fn non_finite_float_rejected_in_try_new() {
1068        for bad in [f64::NAN, f64::INFINITY, f64::NEG_INFINITY] {
1069            let err = LoraVector::try_new(
1070                vec![RawCoordinate::Float(bad)],
1071                1,
1072                VectorCoordinateType::Float64,
1073            )
1074            .unwrap_err();
1075            assert!(matches!(err, VectorBuildError::NonFiniteCoordinate));
1076        }
1077    }
1078
1079    // ----------------------------------------------------------------------
1080    // to_key_string
1081    // ----------------------------------------------------------------------
1082
1083    #[test]
1084    fn to_key_string_distinguishes_coord_type_dim_and_values() {
1085        fn v(coord: VectorCoordinateType, vals: &[i64], dim: i64) -> LoraVector {
1086            LoraVector::try_new(
1087                vals.iter().map(|x| RawCoordinate::Int(*x)).collect(),
1088                dim,
1089                coord,
1090            )
1091            .unwrap()
1092        }
1093
1094        // Different coord types with matching values must differ.
1095        let a = v(VectorCoordinateType::Integer64, &[1, 2, 3], 3);
1096        let b = v(VectorCoordinateType::Integer32, &[1, 2, 3], 3);
1097        assert_ne!(a.to_key_string(), b.to_key_string());
1098
1099        // Different dimensions differ.
1100        let c = v(VectorCoordinateType::Integer64, &[1, 2], 2);
1101        assert_ne!(a.to_key_string(), c.to_key_string());
1102
1103        // Different values differ.
1104        let d = v(VectorCoordinateType::Integer64, &[1, 2, 4], 3);
1105        assert_ne!(a.to_key_string(), d.to_key_string());
1106
1107        // Identical keys match — used by DISTINCT / grouping.
1108        let a2 = v(VectorCoordinateType::Integer64, &[1, 2, 3], 3);
1109        assert_eq!(a.to_key_string(), a2.to_key_string());
1110    }
1111
1112    // ----------------------------------------------------------------------
1113    // Math spot-checks (guard against silent regressions)
1114    // ----------------------------------------------------------------------
1115
1116    #[test]
1117    fn cosine_orthogonal_is_zero_raw_and_half_bounded() {
1118        let a = LoraVector::try_new(
1119            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1120            2,
1121            VectorCoordinateType::Float32,
1122        )
1123        .unwrap();
1124        let b = LoraVector::try_new(
1125            vec![RawCoordinate::Int(0), RawCoordinate::Int(1)],
1126            2,
1127            VectorCoordinateType::Float32,
1128        )
1129        .unwrap();
1130        assert!((cosine_similarity_raw(&a, &b).unwrap()).abs() < 1e-6);
1131        assert!((cosine_similarity_bounded(&a, &b).unwrap() - 0.5).abs() < 1e-6);
1132    }
1133
1134    #[test]
1135    fn cosine_opposite_is_neg_one_raw_and_zero_bounded() {
1136        let a = LoraVector::try_new(
1137            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1138            2,
1139            VectorCoordinateType::Float32,
1140        )
1141        .unwrap();
1142        let b = LoraVector::try_new(
1143            vec![RawCoordinate::Int(-1), RawCoordinate::Int(0)],
1144            2,
1145            VectorCoordinateType::Float32,
1146        )
1147        .unwrap();
1148        assert!((cosine_similarity_raw(&a, &b).unwrap() + 1.0).abs() < 1e-6);
1149        assert!(cosine_similarity_bounded(&a, &b).unwrap().abs() < 1e-6);
1150    }
1151
1152    #[test]
1153    fn cosine_zero_vector_returns_none() {
1154        let zero = LoraVector::try_new(
1155            vec![RawCoordinate::Int(0), RawCoordinate::Int(0)],
1156            2,
1157            VectorCoordinateType::Float32,
1158        )
1159        .unwrap();
1160        let other = LoraVector::try_new(
1161            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1162            2,
1163            VectorCoordinateType::Float32,
1164        )
1165        .unwrap();
1166        assert!(cosine_similarity_raw(&zero, &other).is_none());
1167        assert!(cosine_similarity_bounded(&zero, &other).is_none());
1168    }
1169
1170    #[test]
1171    fn distance_helpers_respect_dimension_mismatch() {
1172        let a = LoraVector::try_new(
1173            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1174            2,
1175            VectorCoordinateType::Float32,
1176        )
1177        .unwrap();
1178        let b = LoraVector::try_new(
1179            vec![
1180                RawCoordinate::Int(1),
1181                RawCoordinate::Int(0),
1182                RawCoordinate::Int(0),
1183            ],
1184            3,
1185            VectorCoordinateType::Float32,
1186        )
1187        .unwrap();
1188        assert!(euclidean_distance(&a, &b).is_none());
1189        assert!(euclidean_distance_squared(&a, &b).is_none());
1190        assert!(manhattan_distance(&a, &b).is_none());
1191        assert!(hamming_distance(&a, &b).is_none());
1192        assert!(dot_product(&a, &b).is_none());
1193    }
1194
1195    #[test]
1196    fn manhattan_and_euclidean_norm_match_hand_computed() {
1197        // v = [3, 4, 0, -12] — L1 = 19, L2 = 13.
1198        let v = LoraVector::try_new(
1199            vec![
1200                RawCoordinate::Float(3.0),
1201                RawCoordinate::Float(4.0),
1202                RawCoordinate::Float(0.0),
1203                RawCoordinate::Float(-12.0),
1204            ],
1205            4,
1206            VectorCoordinateType::Float32,
1207        )
1208        .unwrap();
1209        assert!((manhattan_norm(&v) - 19.0).abs() < 1e-5);
1210        assert!((euclidean_norm(&v) - 13.0).abs() < 1e-5);
1211    }
1212
1213    #[test]
1214    fn hamming_on_float_vectors_uses_f32_comparison() {
1215        // Both vectors store values that truncate to the same f32, so
1216        // hamming should report 0 mismatches — documents the f32 rule.
1217        let a = LoraVector::try_new(
1218            vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.0)],
1219            2,
1220            VectorCoordinateType::Float32,
1221        )
1222        .unwrap();
1223        let b = LoraVector::try_new(
1224            vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.0)],
1225            2,
1226            VectorCoordinateType::Float64,
1227        )
1228        .unwrap();
1229        assert!((hamming_distance(&a, &b).unwrap()).abs() < 1e-9);
1230
1231        // One position differs.
1232        let c = LoraVector::try_new(
1233            vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.5)],
1234            2,
1235            VectorCoordinateType::Float32,
1236        )
1237        .unwrap();
1238        assert!((hamming_distance(&a, &c).unwrap() - 1.0).abs() < 1e-9);
1239    }
1240}
lora_store/vector.rs

lora_store/
vector.rs