lora_store/
vector.rs

1//! First-class VECTOR value type.
2//!
3//! LoraDB VECTOR values are fixed-dimension, typed numeric coordinate
4//! collections. A `LoraVector` can be stored directly as a node or
5//! relationship property, returned through every binding, compared for
6//! equality, and used as input to the built-in vector math functions
7//! (`vector.similarity.cosine`, `vector.similarity.euclidean`,
8//! `vector_distance`, `vector_norm`, `vector_dimension_count`,
9//! `toIntegerList`, `toFloatList`).
10//!
11//! Vector indexes and approximate kNN are intentionally out of scope for
12//! this pass — exhaustive search via `ORDER BY vector.similarity.*(…)
13//! LIMIT k` works today; an index-backed variant is future work.
14
15use std::fmt;
16
17/// Maximum dimension accepted by LoraDB's `vector(...)` constructor.
18pub const MAX_VECTOR_DIMENSION: usize = 4096;
19
20/// Canonical coordinate type for a vector.
21///
22/// The external tag names (`FLOAT64`, `FLOAT32`, `INTEGER`, `INTEGER32`,
23/// `INTEGER16`, `INTEGER8`) are the serialization labels used by every
24/// binding. Aliases (`FLOAT`, `INT`, `INT64`, `INTEGER64`, `INT32`,
25/// `INT16`, `INT8`, `SIGNED INTEGER`) resolve to these canonical variants
26/// at construction time and are not reported back in output.
27#[derive(
28    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
29)]
30pub enum VectorCoordinateType {
31    Float64,
32    Float32,
33    Integer64,
34    Integer32,
35    Integer16,
36    Integer8,
37}
38
39impl VectorCoordinateType {
40    /// Canonical label emitted on the wire (tagged value `coordinateType`
41    /// field). Lowercase aliases and the multi-word `SIGNED INTEGER`
42    /// alias are accepted on input via [`parse`](Self::parse), but the
43    /// output is always one of these six tags.
44    pub fn as_str(self) -> &'static str {
45        match self {
46            VectorCoordinateType::Float64 => "FLOAT64",
47            VectorCoordinateType::Float32 => "FLOAT32",
48            VectorCoordinateType::Integer64 => "INTEGER",
49            VectorCoordinateType::Integer32 => "INTEGER32",
50            VectorCoordinateType::Integer16 => "INTEGER16",
51            VectorCoordinateType::Integer8 => "INTEGER8",
52        }
53    }
54
55    /// Parse a coordinate type from a user-supplied string. Accepts every
56    /// alias documented in `vector()` / binding helpers; returns `None`
57    /// when the name is unrecognised. Comparison is case-insensitive and
58    /// collapses runs of whitespace so `SIGNED INTEGER` and `signed
59    /// integer` both resolve.
60    pub fn parse(name: &str) -> Option<Self> {
61        let collapsed: String = name
62            .split_whitespace()
63            .collect::<Vec<_>>()
64            .join(" ")
65            .to_ascii_uppercase();
66        match collapsed.as_str() {
67            // `FLOAT` and `FLOAT64` are the two spellings the public
68            // `vector()` syntax accepts. `DOUBLE` is not part of the
69            // public surface; we reject it so typos surface as a clear
70            // "unknown coordinate type" instead of silently mapping to
71            // FLOAT64.
72            "FLOAT" | "FLOAT64" => Some(VectorCoordinateType::Float64),
73            "FLOAT32" => Some(VectorCoordinateType::Float32),
74            "INTEGER" | "INT" | "INT64" | "INTEGER64" | "SIGNED INTEGER" => {
75                Some(VectorCoordinateType::Integer64)
76            }
77            "INTEGER32" | "INT32" => Some(VectorCoordinateType::Integer32),
78            "INTEGER16" | "INT16" => Some(VectorCoordinateType::Integer16),
79            "INTEGER8" | "INT8" => Some(VectorCoordinateType::Integer8),
80            _ => None,
81        }
82    }
83
84    /// True for `FLOAT` / `FLOAT32` / `FLOAT64`.
85    pub fn is_float(self) -> bool {
86        matches!(
87            self,
88            VectorCoordinateType::Float64 | VectorCoordinateType::Float32
89        )
90    }
91}
92
93/// Internal storage for a vector. One variant per supported coordinate
94/// type; dimension is implicit in the inner `Vec`'s length.
95#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
96pub enum VectorValues {
97    Float64(Vec<f64>),
98    Float32(Vec<f32>),
99    Integer64(Vec<i64>),
100    Integer32(Vec<i32>),
101    Integer16(Vec<i16>),
102    Integer8(Vec<i8>),
103}
104
105impl VectorValues {
106    pub fn coordinate_type(&self) -> VectorCoordinateType {
107        match self {
108            VectorValues::Float64(_) => VectorCoordinateType::Float64,
109            VectorValues::Float32(_) => VectorCoordinateType::Float32,
110            VectorValues::Integer64(_) => VectorCoordinateType::Integer64,
111            VectorValues::Integer32(_) => VectorCoordinateType::Integer32,
112            VectorValues::Integer16(_) => VectorCoordinateType::Integer16,
113            VectorValues::Integer8(_) => VectorCoordinateType::Integer8,
114        }
115    }
116
117    pub fn len(&self) -> usize {
118        match self {
119            VectorValues::Float64(v) => v.len(),
120            VectorValues::Float32(v) => v.len(),
121            VectorValues::Integer64(v) => v.len(),
122            VectorValues::Integer32(v) => v.len(),
123            VectorValues::Integer16(v) => v.len(),
124            VectorValues::Integer8(v) => v.len(),
125        }
126    }
127
128    pub fn is_empty(&self) -> bool {
129        self.len() == 0
130    }
131
132    /// Lossless conversion of every coordinate to `f64`. Used by every
133    /// vector-math function so the implementations can share one
134    /// f32-precision accumulator irrespective of the underlying storage.
135    pub fn as_f64_vec(&self) -> Vec<f64> {
136        match self {
137            VectorValues::Float64(v) => v.clone(),
138            VectorValues::Float32(v) => v.iter().map(|x| *x as f64).collect(),
139            VectorValues::Integer64(v) => v.iter().map(|x| *x as f64).collect(),
140            VectorValues::Integer32(v) => v.iter().map(|x| *x as f64).collect(),
141            VectorValues::Integer16(v) => v.iter().map(|x| *x as f64).collect(),
142            VectorValues::Integer8(v) => v.iter().map(|x| *x as f64).collect(),
143        }
144    }
145
146    /// Convert every coordinate to `i64`, truncating fractional parts for
147    /// float-backed vectors. Matches the semantics required by
148    /// `toIntegerList(vector)`.
149    pub fn to_i64_vec(&self) -> Vec<i64> {
150        match self {
151            VectorValues::Float64(v) => v.iter().map(|x| *x as i64).collect(),
152            VectorValues::Float32(v) => v.iter().map(|x| *x as i64).collect(),
153            VectorValues::Integer64(v) => v.clone(),
154            VectorValues::Integer32(v) => v.iter().map(|x| *x as i64).collect(),
155            VectorValues::Integer16(v) => v.iter().map(|x| *x as i64).collect(),
156            VectorValues::Integer8(v) => v.iter().map(|x| *x as i64).collect(),
157        }
158    }
159}
160
161/// A first-class VECTOR value.
162#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
163pub struct LoraVector {
164    pub dimension: usize,
165    pub values: VectorValues,
166}
167
168impl LoraVector {
169    /// Total-order comparison key. Sorting vectors is mostly meaningful
170    /// for tie-breaking inside `ORDER BY` — the key orders first by
171    /// coordinate type tag, then by dimension, then by the coordinates
172    /// rendered as `f64` (matches `as_f64_vec`). Callers that need a
173    /// stable key for DISTINCT/grouping should use `to_key_string`.
174    pub fn coordinate_type(&self) -> VectorCoordinateType {
175        self.values.coordinate_type()
176    }
177
178    /// Canonical string form used for grouping / DISTINCT / UNION keys,
179    /// and for the fallback sort comparator. Not meant for user display.
180    pub fn to_key_string(&self) -> String {
181        let mut out = String::new();
182        out.push_str(self.coordinate_type().as_str());
183        out.push('|');
184        out.push_str(&self.dimension.to_string());
185        out.push('|');
186        let vals = self.values.as_f64_vec();
187        for (i, v) in vals.iter().enumerate() {
188            if i > 0 {
189                out.push(',');
190            }
191            // Use `{:?}` so NaN is encoded distinctly from ±Inf — mirrors
192            // the strategy used by GroupValueKey for `LoraValue::Float`.
193            out.push_str(&format!("{v:?}"));
194        }
195        out
196    }
197}
198
199impl fmt::Display for LoraVector {
200    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
201        write!(f, "vector(")?;
202        f.write_str("[")?;
203        let values = self.values.as_f64_vec();
204        for (i, v) in values.iter().enumerate() {
205            if i > 0 {
206                f.write_str(", ")?;
207            }
208            if self.coordinate_type().is_float() {
209                write!(f, "{v}")?;
210            } else {
211                write!(f, "{}", *v as i64)?;
212            }
213        }
214        f.write_str("], ")?;
215        write!(
216            f,
217            "{}, {})",
218            self.dimension,
219            self.coordinate_type().as_str()
220        )
221    }
222}
223
224// ---------------------------------------------------------------------------
225// Construction
226// ---------------------------------------------------------------------------
227
228/// Error returned by [`LoraVector::try_new`]. Kept as a concrete enum so
229/// the executor can render a single-line error message without inspecting
230/// the underlying cause.
231#[derive(Debug, Clone, PartialEq)]
232pub enum VectorBuildError {
233    InvalidDimension(i64),
234    DimensionMismatch {
235        expected: usize,
236        got: usize,
237    },
238    NestedListNotAllowed,
239    NonNumericCoordinate(String),
240    NonFiniteCoordinate,
241    OutOfRange {
242        coordinate_type: VectorCoordinateType,
243        value: String,
244    },
245    UnknownCoordinateType(String),
246}
247
248impl fmt::Display for VectorBuildError {
249    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250        match self {
251            VectorBuildError::InvalidDimension(d) => {
252                write!(
253                    f,
254                    "vector dimension must be between 1 and {MAX_VECTOR_DIMENSION}, got {d}"
255                )
256            }
257            VectorBuildError::DimensionMismatch { expected, got } => write!(
258                f,
259                "vector value length {got} does not match declared dimension {expected}"
260            ),
261            VectorBuildError::NestedListNotAllowed => {
262                write!(f, "vector coordinates cannot contain nested lists")
263            }
264            VectorBuildError::NonNumericCoordinate(kind) => {
265                write!(f, "vector coordinates must be numeric, got {kind}")
266            }
267            VectorBuildError::NonFiniteCoordinate => {
268                write!(f, "vector coordinates cannot be NaN or Infinity")
269            }
270            VectorBuildError::OutOfRange {
271                coordinate_type,
272                value,
273            } => write!(
274                f,
275                "value {value} is out of range for coordinate type {}",
276                coordinate_type.as_str()
277            ),
278            VectorBuildError::UnknownCoordinateType(name) => {
279                write!(f, "unknown vector coordinate type '{name}'")
280            }
281        }
282    }
283}
284
285impl std::error::Error for VectorBuildError {}
286
287/// Raw numeric input for one coordinate before it has been coerced into
288/// the destination coordinate type. Executors / binding layers feed
289/// values through this enum so the coercion rules live in one place.
290#[derive(Debug, Clone, Copy)]
291pub enum RawCoordinate {
292    Int(i64),
293    Float(f64),
294}
295
296impl RawCoordinate {
297    fn as_f64(self) -> f64 {
298        match self {
299            RawCoordinate::Int(v) => v as f64,
300            RawCoordinate::Float(v) => v,
301        }
302    }
303}
304
305impl LoraVector {
306    /// Build a vector from raw numeric coordinates, applying validation
307    /// and coordinate-type coercion. Single entry point used by both
308    /// `vector()` in Cypher and the binding-side constructors.
309    pub fn try_new(
310        raw: Vec<RawCoordinate>,
311        dimension: i64,
312        coordinate_type: VectorCoordinateType,
313    ) -> Result<Self, VectorBuildError> {
314        if dimension <= 0 || dimension as usize > MAX_VECTOR_DIMENSION {
315            return Err(VectorBuildError::InvalidDimension(dimension));
316        }
317        let dim = dimension as usize;
318        if raw.len() != dim {
319            return Err(VectorBuildError::DimensionMismatch {
320                expected: dim,
321                got: raw.len(),
322            });
323        }
324
325        for c in &raw {
326            if let RawCoordinate::Float(v) = c {
327                if !v.is_finite() {
328                    return Err(VectorBuildError::NonFiniteCoordinate);
329                }
330            }
331        }
332
333        let values = match coordinate_type {
334            VectorCoordinateType::Float64 => {
335                VectorValues::Float64(raw.iter().map(|c| c.as_f64()).collect())
336            }
337            VectorCoordinateType::Float32 => {
338                let mut out = Vec::with_capacity(dim);
339                for c in &raw {
340                    let v = c.as_f64();
341                    if v.abs() > f32::MAX as f64 {
342                        return Err(VectorBuildError::OutOfRange {
343                            coordinate_type,
344                            value: format!("{v}"),
345                        });
346                    }
347                    out.push(v as f32);
348                }
349                VectorValues::Float32(out)
350            }
351            VectorCoordinateType::Integer64 => {
352                let mut out = Vec::with_capacity(dim);
353                for c in &raw {
354                    out.push(coerce_to_int::<i64>(*c, coordinate_type)?);
355                }
356                VectorValues::Integer64(out)
357            }
358            VectorCoordinateType::Integer32 => {
359                let mut out = Vec::with_capacity(dim);
360                for c in &raw {
361                    out.push(coerce_to_int::<i32>(*c, coordinate_type)?);
362                }
363                VectorValues::Integer32(out)
364            }
365            VectorCoordinateType::Integer16 => {
366                let mut out = Vec::with_capacity(dim);
367                for c in &raw {
368                    out.push(coerce_to_int::<i16>(*c, coordinate_type)?);
369                }
370                VectorValues::Integer16(out)
371            }
372            VectorCoordinateType::Integer8 => {
373                let mut out = Vec::with_capacity(dim);
374                for c in &raw {
375                    out.push(coerce_to_int::<i8>(*c, coordinate_type)?);
376                }
377                VectorValues::Integer8(out)
378            }
379        };
380
381        Ok(LoraVector {
382            dimension: dim,
383            values,
384        })
385    }
386}
387
388/// Private helper: coerce a raw numeric coordinate into a specific signed
389/// integer target. Float inputs truncate toward zero per LoraDB vector
390/// coercion semantics; the result must fit in the target type or we
391/// raise `OutOfRange`.
392fn coerce_to_int<T>(
393    raw: RawCoordinate,
394    coordinate_type: VectorCoordinateType,
395) -> Result<T, VectorBuildError>
396where
397    T: TryFrom<i64> + Copy,
398{
399    let as_i64 = match raw {
400        RawCoordinate::Int(v) => v,
401        RawCoordinate::Float(v) => {
402            // `as i64` saturates on out-of-range floats, which would mask
403            // overflow — do the check explicitly against the range of
404            // i64 before truncating.
405            if v > i64::MAX as f64 || v < i64::MIN as f64 {
406                return Err(VectorBuildError::OutOfRange {
407                    coordinate_type,
408                    value: format!("{v}"),
409                });
410            }
411            v.trunc() as i64
412        }
413    };
414
415    T::try_from(as_i64).map_err(|_| VectorBuildError::OutOfRange {
416        coordinate_type,
417        value: as_i64.to_string(),
418    })
419}
420
421/// Parse a string-form coordinate list, e.g. `"[1.05e+00, 0.123, 5]"`.
422/// Used by `vector()` when `vectorValue` is a STRING.
423pub fn parse_string_values(input: &str) -> Result<Vec<RawCoordinate>, VectorBuildError> {
424    let trimmed = input.trim();
425    if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
426        return Err(VectorBuildError::NonNumericCoordinate(
427            "string must start with '[' and end with ']'".to_string(),
428        ));
429    }
430    let body = &trimmed[1..trimmed.len() - 1];
431    if body.trim().is_empty() {
432        return Ok(Vec::new());
433    }
434
435    let mut out = Vec::new();
436    for part in body.split(',') {
437        let token = part.trim();
438        if token.is_empty() {
439            return Err(VectorBuildError::NonNumericCoordinate(
440                "empty list entry".to_string(),
441            ));
442        }
443
444        // Accept integer-looking tokens as Int so integer coordinate
445        // types never go through float truncation unnecessarily.
446        if let Ok(i) = token.parse::<i64>() {
447            out.push(RawCoordinate::Int(i));
448            continue;
449        }
450        match token.parse::<f64>() {
451            Ok(f) if f.is_finite() => out.push(RawCoordinate::Float(f)),
452            Ok(_) => return Err(VectorBuildError::NonFiniteCoordinate),
453            Err(_) => {
454                return Err(VectorBuildError::NonNumericCoordinate(format!(
455                    "cannot parse '{token}'"
456                )))
457            }
458        }
459    }
460    Ok(out)
461}
462
463// ---------------------------------------------------------------------------
464// Vector math
465// ---------------------------------------------------------------------------
466
467/// Return Some(value) if both vectors have the same dimension; None if
468/// they don't. Callers route the None branch to a query error so that
469/// `vector_distance` / `vector.similarity.*` never silently return a
470/// bogus number.
471fn check_same_dim(a: &LoraVector, b: &LoraVector) -> Option<usize> {
472    if a.dimension == b.dimension {
473        Some(a.dimension)
474    } else {
475        None
476    }
477}
478
479/// Raw cosine similarity in the range [-1, 1]. Returns `None` when
480/// either vector has zero norm, since cosine is undefined in that case.
481pub fn cosine_similarity_raw(a: &LoraVector, b: &LoraVector) -> Option<f64> {
482    check_same_dim(a, b)?;
483    // Use f32 arithmetic for LoraDB's vector similarity implementation,
484    // then widen back to f64 for the result.
485    let av: Vec<f32> = a
486        .values
487        .as_f64_vec()
488        .into_iter()
489        .map(|x| x as f32)
490        .collect();
491    let bv: Vec<f32> = b
492        .values
493        .as_f64_vec()
494        .into_iter()
495        .map(|x| x as f32)
496        .collect();
497    let mut dot = 0f32;
498    let mut na = 0f32;
499    let mut nb = 0f32;
500    for (x, y) in av.iter().zip(bv.iter()) {
501        dot += x * y;
502        na += x * x;
503        nb += y * y;
504    }
505    if na == 0.0 || nb == 0.0 {
506        return None;
507    }
508    let denom = na.sqrt() * nb.sqrt();
509    if denom == 0.0 {
510        return None;
511    }
512    Some((dot / denom) as f64)
513}
514
515/// Cosine similarity squashed into [0, 1]. Matches the documented
516/// `vector.similarity.cosine` behaviour.
517pub fn cosine_similarity_bounded(a: &LoraVector, b: &LoraVector) -> Option<f64> {
518    cosine_similarity_raw(a, b).map(|raw| ((raw + 1.0) / 2.0).clamp(0.0, 1.0))
519}
520
521/// Squared Euclidean distance (sum of squared differences). Uses f32
522/// arithmetic to match LoraDB's vector function implementation.
523pub fn euclidean_distance_squared(a: &LoraVector, b: &LoraVector) -> Option<f64> {
524    check_same_dim(a, b)?;
525    let av: Vec<f32> = a
526        .values
527        .as_f64_vec()
528        .into_iter()
529        .map(|x| x as f32)
530        .collect();
531    let bv: Vec<f32> = b
532        .values
533        .as_f64_vec()
534        .into_iter()
535        .map(|x| x as f32)
536        .collect();
537    let mut sum = 0f32;
538    for (x, y) in av.iter().zip(bv.iter()) {
539        let d = x - y;
540        sum += d * d;
541    }
542    Some(sum as f64)
543}
544
545/// Euclidean (L2) distance.
546pub fn euclidean_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
547    euclidean_distance_squared(a, b).map(f64::sqrt)
548}
549
550/// Manhattan (L1) distance.
551pub fn manhattan_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
552    check_same_dim(a, b)?;
553    let av = a.values.as_f64_vec();
554    let bv = b.values.as_f64_vec();
555    let mut sum = 0f32;
556    for (x, y) in av.iter().zip(bv.iter()) {
557        sum += ((*x as f32) - (*y as f32)).abs();
558    }
559    Some(sum as f64)
560}
561
562/// Hamming distance: count of positions where the two vectors differ.
563pub fn hamming_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
564    check_same_dim(a, b)?;
565    let av = a.values.as_f64_vec();
566    let bv = b.values.as_f64_vec();
567    let mut count = 0i64;
568    for (x, y) in av.iter().zip(bv.iter()) {
569        if (*x as f32) != (*y as f32) {
570            count += 1;
571        }
572    }
573    Some(count as f64)
574}
575
576/// Dot product (f32 arithmetic, widened back to f64).
577pub fn dot_product(a: &LoraVector, b: &LoraVector) -> Option<f64> {
578    check_same_dim(a, b)?;
579    let av = a.values.as_f64_vec();
580    let bv = b.values.as_f64_vec();
581    let mut acc = 0f32;
582    for (x, y) in av.iter().zip(bv.iter()) {
583        acc += (*x as f32) * (*y as f32);
584    }
585    Some(acc as f64)
586}
587
588/// Euclidean (L2) norm.
589pub fn euclidean_norm(v: &LoraVector) -> f64 {
590    let values = v.values.as_f64_vec();
591    let mut sum = 0f32;
592    for x in &values {
593        let x32 = *x as f32;
594        sum += x32 * x32;
595    }
596    (sum.sqrt()) as f64
597}
598
599/// Manhattan (L1) norm.
600pub fn manhattan_norm(v: &LoraVector) -> f64 {
601    let values = v.values.as_f64_vec();
602    let mut sum = 0f32;
603    for x in &values {
604        sum += (*x as f32).abs();
605    }
606    sum as f64
607}
608
609/// Similarity score derived from squared Euclidean distance: `1 / (1 +
610/// d²)`. For the documented example where `distance² == 22`, this
611/// yields `1 / 23 ≈ 0.043478`.
612pub fn euclidean_similarity(a: &LoraVector, b: &LoraVector) -> Option<f64> {
613    euclidean_distance_squared(a, b).map(|d2| 1.0 / (1.0 + d2))
614}
615
616// ---------------------------------------------------------------------------
617// Tests
618// ---------------------------------------------------------------------------
619
620#[cfg(test)]
621mod tests {
622    use super::*;
623
624    #[test]
625    fn parse_coordinate_type_accepts_aliases() {
626        assert_eq!(
627            VectorCoordinateType::parse("INTEGER"),
628            Some(VectorCoordinateType::Integer64)
629        );
630        assert_eq!(
631            VectorCoordinateType::parse("int64"),
632            Some(VectorCoordinateType::Integer64)
633        );
634        assert_eq!(
635            VectorCoordinateType::parse("signed integer"),
636            Some(VectorCoordinateType::Integer64)
637        );
638        assert_eq!(
639            VectorCoordinateType::parse("  SIGNED    INTEGER "),
640            Some(VectorCoordinateType::Integer64)
641        );
642        assert_eq!(
643            VectorCoordinateType::parse("FLOAT"),
644            Some(VectorCoordinateType::Float64)
645        );
646        assert_eq!(
647            VectorCoordinateType::parse("float32"),
648            Some(VectorCoordinateType::Float32)
649        );
650        assert_eq!(VectorCoordinateType::parse("bogus"), None);
651    }
652
653    #[test]
654    fn try_new_rejects_zero_dim() {
655        let err = LoraVector::try_new(vec![], 0, VectorCoordinateType::Float64).unwrap_err();
656        assert!(matches!(err, VectorBuildError::InvalidDimension(0)));
657    }
658
659    #[test]
660    fn try_new_rejects_over_max_dim() {
661        let err = LoraVector::try_new(
662            vec![RawCoordinate::Int(1); 1],
663            (MAX_VECTOR_DIMENSION + 1) as i64,
664            VectorCoordinateType::Float64,
665        )
666        .unwrap_err();
667        assert!(matches!(err, VectorBuildError::InvalidDimension(_)));
668    }
669
670    #[test]
671    fn try_new_rejects_dimension_mismatch() {
672        let err = LoraVector::try_new(
673            vec![RawCoordinate::Int(1)],
674            2,
675            VectorCoordinateType::Integer64,
676        )
677        .unwrap_err();
678        assert!(matches!(
679            err,
680            VectorBuildError::DimensionMismatch {
681                expected: 2,
682                got: 1
683            }
684        ));
685    }
686
687    #[test]
688    fn int8_overflow_errors() {
689        let err = LoraVector::try_new(
690            vec![RawCoordinate::Int(128)],
691            1,
692            VectorCoordinateType::Integer8,
693        )
694        .unwrap_err();
695        assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
696    }
697
698    #[test]
699    fn float_to_int_truncates() {
700        let v = LoraVector::try_new(
701            vec![RawCoordinate::Float(1.9), RawCoordinate::Float(-1.9)],
702            2,
703            VectorCoordinateType::Integer64,
704        )
705        .unwrap();
706        match v.values {
707            VectorValues::Integer64(ref values) => assert_eq!(values, &[1, -1]),
708            _ => panic!("expected Integer64"),
709        }
710    }
711
712    #[test]
713    fn int_to_float_is_allowed() {
714        let v = LoraVector::try_new(
715            vec![RawCoordinate::Int(3), RawCoordinate::Int(4)],
716            2,
717            VectorCoordinateType::Float32,
718        )
719        .unwrap();
720        assert_eq!(v.values, VectorValues::Float32(vec![3.0, 4.0]));
721    }
722
723    #[test]
724    fn parse_string_values_handles_scientific() {
725        let parsed = parse_string_values("[1.05e+00, 0.123, 5]").unwrap();
726        assert_eq!(parsed.len(), 3);
727        match parsed[0] {
728            RawCoordinate::Float(f) => assert!((f - 1.05).abs() < 1e-9),
729            _ => panic!("expected float"),
730        }
731        match parsed[2] {
732            RawCoordinate::Int(i) => assert_eq!(i, 5),
733            _ => panic!("expected int"),
734        }
735    }
736
737    #[test]
738    fn cosine_similarity_is_bounded() {
739        let a = LoraVector::try_new(
740            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
741            2,
742            VectorCoordinateType::Float32,
743        )
744        .unwrap();
745        let b = LoraVector::try_new(
746            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
747            2,
748            VectorCoordinateType::Float32,
749        )
750        .unwrap();
751        let sim = cosine_similarity_bounded(&a, &b).unwrap();
752        assert!((sim - 1.0).abs() < 1e-6);
753    }
754
755    #[test]
756    fn euclidean_similarity_matches_documented_example() {
757        // Documented Euclidean similarity example:
758        // d^2 = (4-2)^2 + (5-8)^2 + (6-3)^2 = 22
759        // similarity = 1 / (1 + 22) ≈ 0.0434782
760        let a = LoraVector::try_new(
761            vec![
762                RawCoordinate::Float(4.0),
763                RawCoordinate::Float(5.0),
764                RawCoordinate::Float(6.0),
765            ],
766            3,
767            VectorCoordinateType::Float32,
768        )
769        .unwrap();
770        let b = LoraVector::try_new(
771            vec![
772                RawCoordinate::Float(2.0),
773                RawCoordinate::Float(8.0),
774                RawCoordinate::Float(3.0),
775            ],
776            3,
777            VectorCoordinateType::Float32,
778        )
779        .unwrap();
780        let sim = euclidean_similarity(&a, &b).unwrap();
781        assert!((sim - (1.0 / 23.0)).abs() < 1e-6, "got {sim}");
782    }
783
784    // ----------------------------------------------------------------------
785    // Coordinate type alias coverage
786    // ----------------------------------------------------------------------
787
788    /// Small deterministic table mapping every accepted input form to its
789    /// canonical variant. Keeps the alias list here exhaustive so adding a
790    /// new alias needs a corresponding table row.
791    #[test]
792    fn parse_coordinate_type_every_alias() {
793        use VectorCoordinateType::*;
794        let cases: &[(&str, VectorCoordinateType)] = &[
795            ("FLOAT", Float64),
796            ("Float", Float64),
797            ("float", Float64),
798            ("FLOAT64", Float64),
799            ("float64", Float64),
800            ("FLOAT32", Float32),
801            ("float32", Float32),
802            ("INTEGER", Integer64),
803            ("Integer", Integer64),
804            ("integer", Integer64),
805            ("INT", Integer64),
806            ("int", Integer64),
807            ("INT64", Integer64),
808            ("int64", Integer64),
809            ("INTEGER64", Integer64),
810            ("SIGNED INTEGER", Integer64),
811            ("signed integer", Integer64),
812            ("Signed  Integer", Integer64),
813            ("INTEGER32", Integer32),
814            ("int32", Integer32),
815            ("INT32", Integer32),
816            ("INTEGER16", Integer16),
817            ("INT16", Integer16),
818            ("int16", Integer16),
819            ("INTEGER8", Integer8),
820            ("INT8", Integer8),
821            ("int8", Integer8),
822        ];
823        for (input, expected) in cases {
824            assert_eq!(
825                VectorCoordinateType::parse(input),
826                Some(*expected),
827                "failed for input {input:?}"
828            );
829        }
830    }
831
832    #[test]
833    fn parse_coordinate_type_rejects_unsupported_aliases() {
834        for bogus in [
835            "DOUBLE",
836            "double",
837            "REAL",
838            "NUMBER",
839            "BIGINT",
840            "INT128",
841            "FLOAT128",
842            "UINT8",
843            "UNSIGNED INTEGER",
844            "BIT",
845            "",
846        ] {
847            assert_eq!(
848                VectorCoordinateType::parse(bogus),
849                None,
850                "should reject {bogus:?}"
851            );
852        }
853    }
854
855    #[test]
856    fn parse_coordinate_type_is_whitespace_tolerant() {
857        assert_eq!(
858            VectorCoordinateType::parse("\tinteger\n"),
859            Some(VectorCoordinateType::Integer64)
860        );
861        assert_eq!(
862            VectorCoordinateType::parse("   INTEGER   "),
863            Some(VectorCoordinateType::Integer64)
864        );
865    }
866
867    // ----------------------------------------------------------------------
868    // parse_string_values
869    // ----------------------------------------------------------------------
870
871    fn unwrap_float(raw: RawCoordinate) -> f64 {
872        match raw {
873            RawCoordinate::Float(f) => f,
874            RawCoordinate::Int(i) => i as f64,
875        }
876    }
877
878    fn unwrap_int(raw: RawCoordinate) -> i64 {
879        match raw {
880            RawCoordinate::Int(i) => i,
881            RawCoordinate::Float(f) => panic!("expected Int, got Float({f})"),
882        }
883    }
884
885    #[test]
886    fn parse_string_values_accepts_negatives_and_whitespace() {
887        let parsed = parse_string_values("  [ -1,  -2.5 ,   3 , -4.0e-2 ]  ").unwrap();
888        assert_eq!(unwrap_int(parsed[0]), -1);
889        assert!((unwrap_float(parsed[1]) + 2.5).abs() < 1e-9);
890        assert_eq!(unwrap_int(parsed[2]), 3);
891        assert!((unwrap_float(parsed[3]) + 0.04).abs() < 1e-12);
892    }
893
894    #[test]
895    fn parse_string_values_accepts_signed_exponents() {
896        let parsed = parse_string_values("[1e+10, 1e-10, -2.5e+3]").unwrap();
897        assert!((unwrap_float(parsed[0]) - 1e10).abs() < 1.0);
898        assert!((unwrap_float(parsed[1]) - 1e-10).abs() < 1e-20);
899        assert!((unwrap_float(parsed[2]) + 2500.0).abs() < 1e-9);
900    }
901
902    #[test]
903    fn parse_string_values_accepts_empty_brackets() {
904        let parsed = parse_string_values("[]").unwrap();
905        assert!(parsed.is_empty());
906    }
907
908    #[test]
909    fn parse_string_values_rejects_missing_brackets() {
910        assert!(parse_string_values("1, 2, 3").is_err());
911        assert!(parse_string_values("[1, 2, 3").is_err());
912        assert!(parse_string_values("1, 2, 3]").is_err());
913    }
914
915    #[test]
916    fn parse_string_values_rejects_empty_entries() {
917        assert!(parse_string_values("[1, , 3]").is_err());
918        assert!(parse_string_values("[,1,2]").is_err());
919        assert!(parse_string_values("[1,2,]").is_err());
920        assert!(parse_string_values("[ , ]").is_err());
921    }
922
923    #[test]
924    fn parse_string_values_rejects_non_numeric_tokens() {
925        assert!(parse_string_values("[1, abc, 3]").is_err());
926        assert!(parse_string_values("[true, false]").is_err());
927        assert!(parse_string_values("[\"1\", \"2\"]").is_err());
928    }
929
930    #[test]
931    fn parse_string_values_rejects_non_finite() {
932        for bad in ["[NaN]", "[Infinity]", "[-Infinity]", "[1, NaN, 3]"] {
933            assert!(parse_string_values(bad).is_err(), "should reject {bad:?}");
934        }
935    }
936
937    // ----------------------------------------------------------------------
938    // Dimension boundaries
939    // ----------------------------------------------------------------------
940
941    #[test]
942    fn try_new_accepts_exactly_max_dimension() {
943        let raw = vec![RawCoordinate::Int(0); MAX_VECTOR_DIMENSION];
944        let v = LoraVector::try_new(
945            raw,
946            MAX_VECTOR_DIMENSION as i64,
947            VectorCoordinateType::Integer8,
948        )
949        .expect("4096 should be accepted");
950        assert_eq!(v.dimension, MAX_VECTOR_DIMENSION);
951    }
952
953    #[test]
954    fn try_new_rejects_max_plus_one_dimension() {
955        let err = LoraVector::try_new(
956            vec![RawCoordinate::Int(0); MAX_VECTOR_DIMENSION + 1],
957            (MAX_VECTOR_DIMENSION + 1) as i64,
958            VectorCoordinateType::Integer8,
959        )
960        .unwrap_err();
961        assert!(matches!(err, VectorBuildError::InvalidDimension(_)));
962    }
963
964    #[test]
965    fn try_new_rejects_negative_dimension() {
966        let err = LoraVector::try_new(vec![], -1, VectorCoordinateType::Integer64).unwrap_err();
967        assert!(matches!(err, VectorBuildError::InvalidDimension(-1)));
968    }
969
970    // ----------------------------------------------------------------------
971    // Integer min/max boundaries and overflow
972    // ----------------------------------------------------------------------
973
974    /// Table-driven min/max test: each entry supplies the coordinate type
975    /// plus the min/max value that should fit and the just-out-of-range
976    /// values that must overflow.
977    #[test]
978    fn integer_boundaries_round_trip() {
979        let cases: &[(VectorCoordinateType, i64, i64, i64, i64)] = &[
980            // (type,                        min,                    max,                    under,            over)
981            (
982                VectorCoordinateType::Integer8,
983                i8::MIN as i64,
984                i8::MAX as i64,
985                i8::MIN as i64 - 1,
986                i8::MAX as i64 + 1,
987            ),
988            (
989                VectorCoordinateType::Integer16,
990                i16::MIN as i64,
991                i16::MAX as i64,
992                i16::MIN as i64 - 1,
993                i16::MAX as i64 + 1,
994            ),
995            (
996                VectorCoordinateType::Integer32,
997                i32::MIN as i64,
998                i32::MAX as i64,
999                i32::MIN as i64 - 1,
1000                i32::MAX as i64 + 1,
1001            ),
1002            (VectorCoordinateType::Integer64, i64::MIN, i64::MAX, 0, 0),
1003        ];
1004        for (ty, min, max, under, over) in cases {
1005            // min and max should succeed.
1006            LoraVector::try_new(vec![RawCoordinate::Int(*min)], 1, *ty)
1007                .unwrap_or_else(|e| panic!("{ty:?} min rejected: {e}"));
1008            LoraVector::try_new(vec![RawCoordinate::Int(*max)], 1, *ty)
1009                .unwrap_or_else(|e| panic!("{ty:?} max rejected: {e}"));
1010
1011            // Integer64 has no out-of-range at the i64 level — skip.
1012            if *ty == VectorCoordinateType::Integer64 {
1013                continue;
1014            }
1015
1016            let e = LoraVector::try_new(vec![RawCoordinate::Int(*under)], 1, *ty).unwrap_err();
1017            assert!(matches!(e, VectorBuildError::OutOfRange { .. }));
1018            let e = LoraVector::try_new(vec![RawCoordinate::Int(*over)], 1, *ty).unwrap_err();
1019            assert!(matches!(e, VectorBuildError::OutOfRange { .. }));
1020        }
1021    }
1022
1023    #[test]
1024    fn float32_overflow_errors() {
1025        // A value that fits comfortably in f64 but overflows f32's max.
1026        let huge = (f32::MAX as f64) * 10.0;
1027        let err = LoraVector::try_new(
1028            vec![RawCoordinate::Float(huge)],
1029            1,
1030            VectorCoordinateType::Float32,
1031        )
1032        .unwrap_err();
1033        assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
1034    }
1035
1036    #[test]
1037    fn float_to_int_truncates_toward_zero() {
1038        // Both 1.9 and -1.9 truncate toward 0, not toward -inf.
1039        let v = LoraVector::try_new(
1040            vec![
1041                RawCoordinate::Float(1.9),
1042                RawCoordinate::Float(-1.9),
1043                RawCoordinate::Float(0.999),
1044                RawCoordinate::Float(-0.999),
1045            ],
1046            4,
1047            VectorCoordinateType::Integer8,
1048        )
1049        .unwrap();
1050        match v.values {
1051            VectorValues::Integer8(ref values) => assert_eq!(values, &[1i8, -1, 0, 0]),
1052            _ => panic!("expected Integer8"),
1053        }
1054    }
1055
1056    #[test]
1057    fn float_out_of_range_i64_errors() {
1058        // An f64 well outside i64's range must error, not saturate.
1059        let err = LoraVector::try_new(
1060            vec![RawCoordinate::Float(f64::MAX)],
1061            1,
1062            VectorCoordinateType::Integer64,
1063        )
1064        .unwrap_err();
1065        assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
1066    }
1067
1068    #[test]
1069    fn non_finite_float_rejected_in_try_new() {
1070        for bad in [f64::NAN, f64::INFINITY, f64::NEG_INFINITY] {
1071            let err = LoraVector::try_new(
1072                vec![RawCoordinate::Float(bad)],
1073                1,
1074                VectorCoordinateType::Float64,
1075            )
1076            .unwrap_err();
1077            assert!(matches!(err, VectorBuildError::NonFiniteCoordinate));
1078        }
1079    }
1080
1081    // ----------------------------------------------------------------------
1082    // to_key_string
1083    // ----------------------------------------------------------------------
1084
1085    #[test]
1086    fn to_key_string_distinguishes_coord_type_dim_and_values() {
1087        fn v(coord: VectorCoordinateType, vals: &[i64], dim: i64) -> LoraVector {
1088            LoraVector::try_new(
1089                vals.iter().map(|x| RawCoordinate::Int(*x)).collect(),
1090                dim,
1091                coord,
1092            )
1093            .unwrap()
1094        }
1095
1096        // Different coord types with matching values must differ.
1097        let a = v(VectorCoordinateType::Integer64, &[1, 2, 3], 3);
1098        let b = v(VectorCoordinateType::Integer32, &[1, 2, 3], 3);
1099        assert_ne!(a.to_key_string(), b.to_key_string());
1100
1101        // Different dimensions differ.
1102        let c = v(VectorCoordinateType::Integer64, &[1, 2], 2);
1103        assert_ne!(a.to_key_string(), c.to_key_string());
1104
1105        // Different values differ.
1106        let d = v(VectorCoordinateType::Integer64, &[1, 2, 4], 3);
1107        assert_ne!(a.to_key_string(), d.to_key_string());
1108
1109        // Identical keys match — used by DISTINCT / grouping.
1110        let a2 = v(VectorCoordinateType::Integer64, &[1, 2, 3], 3);
1111        assert_eq!(a.to_key_string(), a2.to_key_string());
1112    }
1113
1114    // ----------------------------------------------------------------------
1115    // Math spot-checks (guard against silent regressions)
1116    // ----------------------------------------------------------------------
1117
1118    #[test]
1119    fn cosine_orthogonal_is_zero_raw_and_half_bounded() {
1120        let a = LoraVector::try_new(
1121            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1122            2,
1123            VectorCoordinateType::Float32,
1124        )
1125        .unwrap();
1126        let b = LoraVector::try_new(
1127            vec![RawCoordinate::Int(0), RawCoordinate::Int(1)],
1128            2,
1129            VectorCoordinateType::Float32,
1130        )
1131        .unwrap();
1132        assert!((cosine_similarity_raw(&a, &b).unwrap()).abs() < 1e-6);
1133        assert!((cosine_similarity_bounded(&a, &b).unwrap() - 0.5).abs() < 1e-6);
1134    }
1135
1136    #[test]
1137    fn cosine_opposite_is_neg_one_raw_and_zero_bounded() {
1138        let a = LoraVector::try_new(
1139            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1140            2,
1141            VectorCoordinateType::Float32,
1142        )
1143        .unwrap();
1144        let b = LoraVector::try_new(
1145            vec![RawCoordinate::Int(-1), RawCoordinate::Int(0)],
1146            2,
1147            VectorCoordinateType::Float32,
1148        )
1149        .unwrap();
1150        assert!((cosine_similarity_raw(&a, &b).unwrap() + 1.0).abs() < 1e-6);
1151        assert!(cosine_similarity_bounded(&a, &b).unwrap().abs() < 1e-6);
1152    }
1153
1154    #[test]
1155    fn cosine_zero_vector_returns_none() {
1156        let zero = LoraVector::try_new(
1157            vec![RawCoordinate::Int(0), RawCoordinate::Int(0)],
1158            2,
1159            VectorCoordinateType::Float32,
1160        )
1161        .unwrap();
1162        let other = LoraVector::try_new(
1163            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1164            2,
1165            VectorCoordinateType::Float32,
1166        )
1167        .unwrap();
1168        assert!(cosine_similarity_raw(&zero, &other).is_none());
1169        assert!(cosine_similarity_bounded(&zero, &other).is_none());
1170    }
1171
1172    #[test]
1173    fn distance_helpers_respect_dimension_mismatch() {
1174        let a = LoraVector::try_new(
1175            vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1176            2,
1177            VectorCoordinateType::Float32,
1178        )
1179        .unwrap();
1180        let b = LoraVector::try_new(
1181            vec![
1182                RawCoordinate::Int(1),
1183                RawCoordinate::Int(0),
1184                RawCoordinate::Int(0),
1185            ],
1186            3,
1187            VectorCoordinateType::Float32,
1188        )
1189        .unwrap();
1190        assert!(euclidean_distance(&a, &b).is_none());
1191        assert!(euclidean_distance_squared(&a, &b).is_none());
1192        assert!(manhattan_distance(&a, &b).is_none());
1193        assert!(hamming_distance(&a, &b).is_none());
1194        assert!(dot_product(&a, &b).is_none());
1195    }
1196
1197    #[test]
1198    fn manhattan_and_euclidean_norm_match_hand_computed() {
1199        // v = [3, 4, 0, -12] — L1 = 19, L2 = 13.
1200        let v = LoraVector::try_new(
1201            vec![
1202                RawCoordinate::Float(3.0),
1203                RawCoordinate::Float(4.0),
1204                RawCoordinate::Float(0.0),
1205                RawCoordinate::Float(-12.0),
1206            ],
1207            4,
1208            VectorCoordinateType::Float32,
1209        )
1210        .unwrap();
1211        assert!((manhattan_norm(&v) - 19.0).abs() < 1e-5);
1212        assert!((euclidean_norm(&v) - 13.0).abs() < 1e-5);
1213    }
1214
1215    #[test]
1216    fn hamming_on_float_vectors_uses_f32_comparison() {
1217        // Both vectors store values that truncate to the same f32, so
1218        // hamming should report 0 mismatches — documents the f32 rule.
1219        let a = LoraVector::try_new(
1220            vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.0)],
1221            2,
1222            VectorCoordinateType::Float32,
1223        )
1224        .unwrap();
1225        let b = LoraVector::try_new(
1226            vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.0)],
1227            2,
1228            VectorCoordinateType::Float64,
1229        )
1230        .unwrap();
1231        assert!((hamming_distance(&a, &b).unwrap()).abs() < 1e-9);
1232
1233        // One position differs.
1234        let c = LoraVector::try_new(
1235            vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.5)],
1236            2,
1237            VectorCoordinateType::Float32,
1238        )
1239        .unwrap();
1240        assert!((hamming_distance(&a, &c).unwrap() - 1.0).abs() < 1e-9);
1241    }
1242}
lora_store/vector.rs

lora_store/
vector.rs