1use std::fmt;
16
17pub const MAX_VECTOR_DIMENSION: usize = 4096;
19
20#[derive(
28 Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
29)]
30pub enum VectorCoordinateType {
31 Float64,
32 Float32,
33 Integer64,
34 Integer32,
35 Integer16,
36 Integer8,
37}
38
39impl VectorCoordinateType {
40 pub fn as_str(self) -> &'static str {
45 match self {
46 VectorCoordinateType::Float64 => "FLOAT64",
47 VectorCoordinateType::Float32 => "FLOAT32",
48 VectorCoordinateType::Integer64 => "INTEGER",
49 VectorCoordinateType::Integer32 => "INTEGER32",
50 VectorCoordinateType::Integer16 => "INTEGER16",
51 VectorCoordinateType::Integer8 => "INTEGER8",
52 }
53 }
54
55 pub fn parse(name: &str) -> Option<Self> {
61 let collapsed: String = name
62 .split_whitespace()
63 .collect::<Vec<_>>()
64 .join(" ")
65 .to_ascii_uppercase();
66 match collapsed.as_str() {
67 "FLOAT" | "FLOAT64" => Some(VectorCoordinateType::Float64),
73 "FLOAT32" => Some(VectorCoordinateType::Float32),
74 "INTEGER" | "INT" | "INT64" | "INTEGER64" | "SIGNED INTEGER" => {
75 Some(VectorCoordinateType::Integer64)
76 }
77 "INTEGER32" | "INT32" => Some(VectorCoordinateType::Integer32),
78 "INTEGER16" | "INT16" => Some(VectorCoordinateType::Integer16),
79 "INTEGER8" | "INT8" => Some(VectorCoordinateType::Integer8),
80 _ => None,
81 }
82 }
83
84 pub fn is_float(self) -> bool {
86 matches!(
87 self,
88 VectorCoordinateType::Float64 | VectorCoordinateType::Float32
89 )
90 }
91}
92
93#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
96pub enum VectorValues {
97 Float64(Vec<f64>),
98 Float32(Vec<f32>),
99 Integer64(Vec<i64>),
100 Integer32(Vec<i32>),
101 Integer16(Vec<i16>),
102 Integer8(Vec<i8>),
103}
104
105impl VectorValues {
106 pub fn coordinate_type(&self) -> VectorCoordinateType {
107 match self {
108 VectorValues::Float64(_) => VectorCoordinateType::Float64,
109 VectorValues::Float32(_) => VectorCoordinateType::Float32,
110 VectorValues::Integer64(_) => VectorCoordinateType::Integer64,
111 VectorValues::Integer32(_) => VectorCoordinateType::Integer32,
112 VectorValues::Integer16(_) => VectorCoordinateType::Integer16,
113 VectorValues::Integer8(_) => VectorCoordinateType::Integer8,
114 }
115 }
116
117 pub fn len(&self) -> usize {
118 match self {
119 VectorValues::Float64(v) => v.len(),
120 VectorValues::Float32(v) => v.len(),
121 VectorValues::Integer64(v) => v.len(),
122 VectorValues::Integer32(v) => v.len(),
123 VectorValues::Integer16(v) => v.len(),
124 VectorValues::Integer8(v) => v.len(),
125 }
126 }
127
128 pub fn is_empty(&self) -> bool {
129 self.len() == 0
130 }
131
132 pub fn as_f64_vec(&self) -> Vec<f64> {
136 match self {
137 VectorValues::Float64(v) => v.clone(),
138 VectorValues::Float32(v) => v.iter().map(|x| *x as f64).collect(),
139 VectorValues::Integer64(v) => v.iter().map(|x| *x as f64).collect(),
140 VectorValues::Integer32(v) => v.iter().map(|x| *x as f64).collect(),
141 VectorValues::Integer16(v) => v.iter().map(|x| *x as f64).collect(),
142 VectorValues::Integer8(v) => v.iter().map(|x| *x as f64).collect(),
143 }
144 }
145
146 pub fn to_i64_vec(&self) -> Vec<i64> {
150 match self {
151 VectorValues::Float64(v) => v.iter().map(|x| *x as i64).collect(),
152 VectorValues::Float32(v) => v.iter().map(|x| *x as i64).collect(),
153 VectorValues::Integer64(v) => v.clone(),
154 VectorValues::Integer32(v) => v.iter().map(|x| *x as i64).collect(),
155 VectorValues::Integer16(v) => v.iter().map(|x| *x as i64).collect(),
156 VectorValues::Integer8(v) => v.iter().map(|x| *x as i64).collect(),
157 }
158 }
159}
160
161#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
163pub struct LoraVector {
164 pub dimension: usize,
165 pub values: VectorValues,
166}
167
168impl LoraVector {
169 pub fn coordinate_type(&self) -> VectorCoordinateType {
175 self.values.coordinate_type()
176 }
177
178 pub fn to_key_string(&self) -> String {
181 let mut out = String::new();
182 out.push_str(self.coordinate_type().as_str());
183 out.push('|');
184 out.push_str(&self.dimension.to_string());
185 out.push('|');
186 let vals = self.values.as_f64_vec();
187 for (i, v) in vals.iter().enumerate() {
188 if i > 0 {
189 out.push(',');
190 }
191 out.push_str(&format!("{v:?}"));
194 }
195 out
196 }
197}
198
199impl fmt::Display for LoraVector {
200 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
201 write!(f, "vector(")?;
202 f.write_str("[")?;
203 let values = self.values.as_f64_vec();
204 for (i, v) in values.iter().enumerate() {
205 if i > 0 {
206 f.write_str(", ")?;
207 }
208 if self.coordinate_type().is_float() {
209 write!(f, "{v}")?;
210 } else {
211 write!(f, "{}", *v as i64)?;
212 }
213 }
214 f.write_str("], ")?;
215 write!(
216 f,
217 "{}, {})",
218 self.dimension,
219 self.coordinate_type().as_str()
220 )
221 }
222}
223
224#[derive(Debug, Clone, PartialEq)]
232pub enum VectorBuildError {
233 InvalidDimension(i64),
234 DimensionMismatch {
235 expected: usize,
236 got: usize,
237 },
238 NestedListNotAllowed,
239 NonNumericCoordinate(String),
240 NonFiniteCoordinate,
241 OutOfRange {
242 coordinate_type: VectorCoordinateType,
243 value: String,
244 },
245 UnknownCoordinateType(String),
246}
247
248impl fmt::Display for VectorBuildError {
249 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
250 match self {
251 VectorBuildError::InvalidDimension(d) => {
252 write!(
253 f,
254 "vector dimension must be between 1 and {MAX_VECTOR_DIMENSION}, got {d}"
255 )
256 }
257 VectorBuildError::DimensionMismatch { expected, got } => write!(
258 f,
259 "vector value length {got} does not match declared dimension {expected}"
260 ),
261 VectorBuildError::NestedListNotAllowed => {
262 write!(f, "vector coordinates cannot contain nested lists")
263 }
264 VectorBuildError::NonNumericCoordinate(kind) => {
265 write!(f, "vector coordinates must be numeric, got {kind}")
266 }
267 VectorBuildError::NonFiniteCoordinate => {
268 write!(f, "vector coordinates cannot be NaN or Infinity")
269 }
270 VectorBuildError::OutOfRange {
271 coordinate_type,
272 value,
273 } => write!(
274 f,
275 "value {value} is out of range for coordinate type {}",
276 coordinate_type.as_str()
277 ),
278 VectorBuildError::UnknownCoordinateType(name) => {
279 write!(f, "unknown vector coordinate type '{name}'")
280 }
281 }
282 }
283}
284
285impl std::error::Error for VectorBuildError {}
286
287#[derive(Debug, Clone, Copy)]
291pub enum RawCoordinate {
292 Int(i64),
293 Float(f64),
294}
295
296impl RawCoordinate {
297 fn as_f64(self) -> f64 {
298 match self {
299 RawCoordinate::Int(v) => v as f64,
300 RawCoordinate::Float(v) => v,
301 }
302 }
303}
304
305impl LoraVector {
306 pub fn try_new(
310 raw: Vec<RawCoordinate>,
311 dimension: i64,
312 coordinate_type: VectorCoordinateType,
313 ) -> Result<Self, VectorBuildError> {
314 if dimension <= 0 || dimension as usize > MAX_VECTOR_DIMENSION {
315 return Err(VectorBuildError::InvalidDimension(dimension));
316 }
317 let dim = dimension as usize;
318 if raw.len() != dim {
319 return Err(VectorBuildError::DimensionMismatch {
320 expected: dim,
321 got: raw.len(),
322 });
323 }
324
325 for c in &raw {
326 if let RawCoordinate::Float(v) = c {
327 if !v.is_finite() {
328 return Err(VectorBuildError::NonFiniteCoordinate);
329 }
330 }
331 }
332
333 let values = match coordinate_type {
334 VectorCoordinateType::Float64 => {
335 VectorValues::Float64(raw.iter().map(|c| c.as_f64()).collect())
336 }
337 VectorCoordinateType::Float32 => {
338 let mut out = Vec::with_capacity(dim);
339 for c in &raw {
340 let v = c.as_f64();
341 if v.abs() > f32::MAX as f64 {
342 return Err(VectorBuildError::OutOfRange {
343 coordinate_type,
344 value: format!("{v}"),
345 });
346 }
347 out.push(v as f32);
348 }
349 VectorValues::Float32(out)
350 }
351 VectorCoordinateType::Integer64 => {
352 let mut out = Vec::with_capacity(dim);
353 for c in &raw {
354 out.push(coerce_to_int::<i64>(*c, coordinate_type)?);
355 }
356 VectorValues::Integer64(out)
357 }
358 VectorCoordinateType::Integer32 => {
359 let mut out = Vec::with_capacity(dim);
360 for c in &raw {
361 out.push(coerce_to_int::<i32>(*c, coordinate_type)?);
362 }
363 VectorValues::Integer32(out)
364 }
365 VectorCoordinateType::Integer16 => {
366 let mut out = Vec::with_capacity(dim);
367 for c in &raw {
368 out.push(coerce_to_int::<i16>(*c, coordinate_type)?);
369 }
370 VectorValues::Integer16(out)
371 }
372 VectorCoordinateType::Integer8 => {
373 let mut out = Vec::with_capacity(dim);
374 for c in &raw {
375 out.push(coerce_to_int::<i8>(*c, coordinate_type)?);
376 }
377 VectorValues::Integer8(out)
378 }
379 };
380
381 Ok(LoraVector {
382 dimension: dim,
383 values,
384 })
385 }
386}
387
388fn coerce_to_int<T>(
393 raw: RawCoordinate,
394 coordinate_type: VectorCoordinateType,
395) -> Result<T, VectorBuildError>
396where
397 T: TryFrom<i64> + Copy,
398{
399 let as_i64 = match raw {
400 RawCoordinate::Int(v) => v,
401 RawCoordinate::Float(v) => {
402 if v > i64::MAX as f64 || v < i64::MIN as f64 {
406 return Err(VectorBuildError::OutOfRange {
407 coordinate_type,
408 value: format!("{v}"),
409 });
410 }
411 v.trunc() as i64
412 }
413 };
414
415 T::try_from(as_i64).map_err(|_| VectorBuildError::OutOfRange {
416 coordinate_type,
417 value: as_i64.to_string(),
418 })
419}
420
421pub fn parse_string_values(input: &str) -> Result<Vec<RawCoordinate>, VectorBuildError> {
424 let trimmed = input.trim();
425 if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
426 return Err(VectorBuildError::NonNumericCoordinate(
427 "string must start with '[' and end with ']'".to_string(),
428 ));
429 }
430 let body = &trimmed[1..trimmed.len() - 1];
431 if body.trim().is_empty() {
432 return Ok(Vec::new());
433 }
434
435 let mut out = Vec::new();
436 for part in body.split(',') {
437 let token = part.trim();
438 if token.is_empty() {
439 return Err(VectorBuildError::NonNumericCoordinate(
440 "empty list entry".to_string(),
441 ));
442 }
443
444 if let Ok(i) = token.parse::<i64>() {
447 out.push(RawCoordinate::Int(i));
448 continue;
449 }
450 match token.parse::<f64>() {
451 Ok(f) if f.is_finite() => out.push(RawCoordinate::Float(f)),
452 Ok(_) => return Err(VectorBuildError::NonFiniteCoordinate),
453 Err(_) => {
454 return Err(VectorBuildError::NonNumericCoordinate(format!(
455 "cannot parse '{token}'"
456 )))
457 }
458 }
459 }
460 Ok(out)
461}
462
463fn check_same_dim(a: &LoraVector, b: &LoraVector) -> Option<usize> {
472 if a.dimension == b.dimension {
473 Some(a.dimension)
474 } else {
475 None
476 }
477}
478
479pub fn cosine_similarity_raw(a: &LoraVector, b: &LoraVector) -> Option<f64> {
482 check_same_dim(a, b)?;
483 let av: Vec<f32> = a
486 .values
487 .as_f64_vec()
488 .into_iter()
489 .map(|x| x as f32)
490 .collect();
491 let bv: Vec<f32> = b
492 .values
493 .as_f64_vec()
494 .into_iter()
495 .map(|x| x as f32)
496 .collect();
497 let mut dot = 0f32;
498 let mut na = 0f32;
499 let mut nb = 0f32;
500 for (x, y) in av.iter().zip(bv.iter()) {
501 dot += x * y;
502 na += x * x;
503 nb += y * y;
504 }
505 if na == 0.0 || nb == 0.0 {
506 return None;
507 }
508 let denom = na.sqrt() * nb.sqrt();
509 if denom == 0.0 {
510 return None;
511 }
512 Some((dot / denom) as f64)
513}
514
515pub fn cosine_similarity_bounded(a: &LoraVector, b: &LoraVector) -> Option<f64> {
518 cosine_similarity_raw(a, b).map(|raw| ((raw + 1.0) / 2.0).clamp(0.0, 1.0))
519}
520
521pub fn euclidean_distance_squared(a: &LoraVector, b: &LoraVector) -> Option<f64> {
524 check_same_dim(a, b)?;
525 let av: Vec<f32> = a
526 .values
527 .as_f64_vec()
528 .into_iter()
529 .map(|x| x as f32)
530 .collect();
531 let bv: Vec<f32> = b
532 .values
533 .as_f64_vec()
534 .into_iter()
535 .map(|x| x as f32)
536 .collect();
537 let mut sum = 0f32;
538 for (x, y) in av.iter().zip(bv.iter()) {
539 let d = x - y;
540 sum += d * d;
541 }
542 Some(sum as f64)
543}
544
545pub fn euclidean_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
547 euclidean_distance_squared(a, b).map(f64::sqrt)
548}
549
550pub fn manhattan_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
552 check_same_dim(a, b)?;
553 let av = a.values.as_f64_vec();
554 let bv = b.values.as_f64_vec();
555 let mut sum = 0f32;
556 for (x, y) in av.iter().zip(bv.iter()) {
557 sum += ((*x as f32) - (*y as f32)).abs();
558 }
559 Some(sum as f64)
560}
561
562pub fn hamming_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
564 check_same_dim(a, b)?;
565 let av = a.values.as_f64_vec();
566 let bv = b.values.as_f64_vec();
567 let mut count = 0i64;
568 for (x, y) in av.iter().zip(bv.iter()) {
569 if (*x as f32) != (*y as f32) {
570 count += 1;
571 }
572 }
573 Some(count as f64)
574}
575
576pub fn dot_product(a: &LoraVector, b: &LoraVector) -> Option<f64> {
578 check_same_dim(a, b)?;
579 let av = a.values.as_f64_vec();
580 let bv = b.values.as_f64_vec();
581 let mut acc = 0f32;
582 for (x, y) in av.iter().zip(bv.iter()) {
583 acc += (*x as f32) * (*y as f32);
584 }
585 Some(acc as f64)
586}
587
588pub fn euclidean_norm(v: &LoraVector) -> f64 {
590 let values = v.values.as_f64_vec();
591 let mut sum = 0f32;
592 for x in &values {
593 let x32 = *x as f32;
594 sum += x32 * x32;
595 }
596 (sum.sqrt()) as f64
597}
598
599pub fn manhattan_norm(v: &LoraVector) -> f64 {
601 let values = v.values.as_f64_vec();
602 let mut sum = 0f32;
603 for x in &values {
604 sum += (*x as f32).abs();
605 }
606 sum as f64
607}
608
609pub fn euclidean_similarity(a: &LoraVector, b: &LoraVector) -> Option<f64> {
613 euclidean_distance_squared(a, b).map(|d2| 1.0 / (1.0 + d2))
614}
615
616#[cfg(test)]
621mod tests {
622 use super::*;
623
624 #[test]
625 fn parse_coordinate_type_accepts_aliases() {
626 assert_eq!(
627 VectorCoordinateType::parse("INTEGER"),
628 Some(VectorCoordinateType::Integer64)
629 );
630 assert_eq!(
631 VectorCoordinateType::parse("int64"),
632 Some(VectorCoordinateType::Integer64)
633 );
634 assert_eq!(
635 VectorCoordinateType::parse("signed integer"),
636 Some(VectorCoordinateType::Integer64)
637 );
638 assert_eq!(
639 VectorCoordinateType::parse(" SIGNED INTEGER "),
640 Some(VectorCoordinateType::Integer64)
641 );
642 assert_eq!(
643 VectorCoordinateType::parse("FLOAT"),
644 Some(VectorCoordinateType::Float64)
645 );
646 assert_eq!(
647 VectorCoordinateType::parse("float32"),
648 Some(VectorCoordinateType::Float32)
649 );
650 assert_eq!(VectorCoordinateType::parse("bogus"), None);
651 }
652
653 #[test]
654 fn try_new_rejects_zero_dim() {
655 let err = LoraVector::try_new(vec![], 0, VectorCoordinateType::Float64).unwrap_err();
656 assert!(matches!(err, VectorBuildError::InvalidDimension(0)));
657 }
658
659 #[test]
660 fn try_new_rejects_over_max_dim() {
661 let err = LoraVector::try_new(
662 vec![RawCoordinate::Int(1); 1],
663 (MAX_VECTOR_DIMENSION + 1) as i64,
664 VectorCoordinateType::Float64,
665 )
666 .unwrap_err();
667 assert!(matches!(err, VectorBuildError::InvalidDimension(_)));
668 }
669
670 #[test]
671 fn try_new_rejects_dimension_mismatch() {
672 let err = LoraVector::try_new(
673 vec![RawCoordinate::Int(1)],
674 2,
675 VectorCoordinateType::Integer64,
676 )
677 .unwrap_err();
678 assert!(matches!(
679 err,
680 VectorBuildError::DimensionMismatch {
681 expected: 2,
682 got: 1
683 }
684 ));
685 }
686
687 #[test]
688 fn int8_overflow_errors() {
689 let err = LoraVector::try_new(
690 vec![RawCoordinate::Int(128)],
691 1,
692 VectorCoordinateType::Integer8,
693 )
694 .unwrap_err();
695 assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
696 }
697
698 #[test]
699 fn float_to_int_truncates() {
700 let v = LoraVector::try_new(
701 vec![RawCoordinate::Float(1.9), RawCoordinate::Float(-1.9)],
702 2,
703 VectorCoordinateType::Integer64,
704 )
705 .unwrap();
706 match v.values {
707 VectorValues::Integer64(ref values) => assert_eq!(values, &[1, -1]),
708 _ => panic!("expected Integer64"),
709 }
710 }
711
712 #[test]
713 fn int_to_float_is_allowed() {
714 let v = LoraVector::try_new(
715 vec![RawCoordinate::Int(3), RawCoordinate::Int(4)],
716 2,
717 VectorCoordinateType::Float32,
718 )
719 .unwrap();
720 assert_eq!(v.values, VectorValues::Float32(vec![3.0, 4.0]));
721 }
722
723 #[test]
724 fn parse_string_values_handles_scientific() {
725 let parsed = parse_string_values("[1.05e+00, 0.123, 5]").unwrap();
726 assert_eq!(parsed.len(), 3);
727 match parsed[0] {
728 RawCoordinate::Float(f) => assert!((f - 1.05).abs() < 1e-9),
729 _ => panic!("expected float"),
730 }
731 match parsed[2] {
732 RawCoordinate::Int(i) => assert_eq!(i, 5),
733 _ => panic!("expected int"),
734 }
735 }
736
737 #[test]
738 fn cosine_similarity_is_bounded() {
739 let a = LoraVector::try_new(
740 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
741 2,
742 VectorCoordinateType::Float32,
743 )
744 .unwrap();
745 let b = LoraVector::try_new(
746 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
747 2,
748 VectorCoordinateType::Float32,
749 )
750 .unwrap();
751 let sim = cosine_similarity_bounded(&a, &b).unwrap();
752 assert!((sim - 1.0).abs() < 1e-6);
753 }
754
755 #[test]
756 fn euclidean_similarity_matches_documented_example() {
757 let a = LoraVector::try_new(
761 vec![
762 RawCoordinate::Float(4.0),
763 RawCoordinate::Float(5.0),
764 RawCoordinate::Float(6.0),
765 ],
766 3,
767 VectorCoordinateType::Float32,
768 )
769 .unwrap();
770 let b = LoraVector::try_new(
771 vec![
772 RawCoordinate::Float(2.0),
773 RawCoordinate::Float(8.0),
774 RawCoordinate::Float(3.0),
775 ],
776 3,
777 VectorCoordinateType::Float32,
778 )
779 .unwrap();
780 let sim = euclidean_similarity(&a, &b).unwrap();
781 assert!((sim - (1.0 / 23.0)).abs() < 1e-6, "got {sim}");
782 }
783
784 #[test]
792 fn parse_coordinate_type_every_alias() {
793 use VectorCoordinateType::*;
794 let cases: &[(&str, VectorCoordinateType)] = &[
795 ("FLOAT", Float64),
796 ("Float", Float64),
797 ("float", Float64),
798 ("FLOAT64", Float64),
799 ("float64", Float64),
800 ("FLOAT32", Float32),
801 ("float32", Float32),
802 ("INTEGER", Integer64),
803 ("Integer", Integer64),
804 ("integer", Integer64),
805 ("INT", Integer64),
806 ("int", Integer64),
807 ("INT64", Integer64),
808 ("int64", Integer64),
809 ("INTEGER64", Integer64),
810 ("SIGNED INTEGER", Integer64),
811 ("signed integer", Integer64),
812 ("Signed Integer", Integer64),
813 ("INTEGER32", Integer32),
814 ("int32", Integer32),
815 ("INT32", Integer32),
816 ("INTEGER16", Integer16),
817 ("INT16", Integer16),
818 ("int16", Integer16),
819 ("INTEGER8", Integer8),
820 ("INT8", Integer8),
821 ("int8", Integer8),
822 ];
823 for (input, expected) in cases {
824 assert_eq!(
825 VectorCoordinateType::parse(input),
826 Some(*expected),
827 "failed for input {input:?}"
828 );
829 }
830 }
831
832 #[test]
833 fn parse_coordinate_type_rejects_unsupported_aliases() {
834 for bogus in [
835 "DOUBLE",
836 "double",
837 "REAL",
838 "NUMBER",
839 "BIGINT",
840 "INT128",
841 "FLOAT128",
842 "UINT8",
843 "UNSIGNED INTEGER",
844 "BIT",
845 "",
846 ] {
847 assert_eq!(
848 VectorCoordinateType::parse(bogus),
849 None,
850 "should reject {bogus:?}"
851 );
852 }
853 }
854
855 #[test]
856 fn parse_coordinate_type_is_whitespace_tolerant() {
857 assert_eq!(
858 VectorCoordinateType::parse("\tinteger\n"),
859 Some(VectorCoordinateType::Integer64)
860 );
861 assert_eq!(
862 VectorCoordinateType::parse(" INTEGER "),
863 Some(VectorCoordinateType::Integer64)
864 );
865 }
866
867 fn unwrap_float(raw: RawCoordinate) -> f64 {
872 match raw {
873 RawCoordinate::Float(f) => f,
874 RawCoordinate::Int(i) => i as f64,
875 }
876 }
877
878 fn unwrap_int(raw: RawCoordinate) -> i64 {
879 match raw {
880 RawCoordinate::Int(i) => i,
881 RawCoordinate::Float(f) => panic!("expected Int, got Float({f})"),
882 }
883 }
884
885 #[test]
886 fn parse_string_values_accepts_negatives_and_whitespace() {
887 let parsed = parse_string_values(" [ -1, -2.5 , 3 , -4.0e-2 ] ").unwrap();
888 assert_eq!(unwrap_int(parsed[0]), -1);
889 assert!((unwrap_float(parsed[1]) + 2.5).abs() < 1e-9);
890 assert_eq!(unwrap_int(parsed[2]), 3);
891 assert!((unwrap_float(parsed[3]) + 0.04).abs() < 1e-12);
892 }
893
894 #[test]
895 fn parse_string_values_accepts_signed_exponents() {
896 let parsed = parse_string_values("[1e+10, 1e-10, -2.5e+3]").unwrap();
897 assert!((unwrap_float(parsed[0]) - 1e10).abs() < 1.0);
898 assert!((unwrap_float(parsed[1]) - 1e-10).abs() < 1e-20);
899 assert!((unwrap_float(parsed[2]) + 2500.0).abs() < 1e-9);
900 }
901
902 #[test]
903 fn parse_string_values_accepts_empty_brackets() {
904 let parsed = parse_string_values("[]").unwrap();
905 assert!(parsed.is_empty());
906 }
907
908 #[test]
909 fn parse_string_values_rejects_missing_brackets() {
910 assert!(parse_string_values("1, 2, 3").is_err());
911 assert!(parse_string_values("[1, 2, 3").is_err());
912 assert!(parse_string_values("1, 2, 3]").is_err());
913 }
914
915 #[test]
916 fn parse_string_values_rejects_empty_entries() {
917 assert!(parse_string_values("[1, , 3]").is_err());
918 assert!(parse_string_values("[,1,2]").is_err());
919 assert!(parse_string_values("[1,2,]").is_err());
920 assert!(parse_string_values("[ , ]").is_err());
921 }
922
923 #[test]
924 fn parse_string_values_rejects_non_numeric_tokens() {
925 assert!(parse_string_values("[1, abc, 3]").is_err());
926 assert!(parse_string_values("[true, false]").is_err());
927 assert!(parse_string_values("[\"1\", \"2\"]").is_err());
928 }
929
930 #[test]
931 fn parse_string_values_rejects_non_finite() {
932 for bad in ["[NaN]", "[Infinity]", "[-Infinity]", "[1, NaN, 3]"] {
933 assert!(parse_string_values(bad).is_err(), "should reject {bad:?}");
934 }
935 }
936
937 #[test]
942 fn try_new_accepts_exactly_max_dimension() {
943 let raw = vec![RawCoordinate::Int(0); MAX_VECTOR_DIMENSION];
944 let v = LoraVector::try_new(
945 raw,
946 MAX_VECTOR_DIMENSION as i64,
947 VectorCoordinateType::Integer8,
948 )
949 .expect("4096 should be accepted");
950 assert_eq!(v.dimension, MAX_VECTOR_DIMENSION);
951 }
952
953 #[test]
954 fn try_new_rejects_max_plus_one_dimension() {
955 let err = LoraVector::try_new(
956 vec![RawCoordinate::Int(0); MAX_VECTOR_DIMENSION + 1],
957 (MAX_VECTOR_DIMENSION + 1) as i64,
958 VectorCoordinateType::Integer8,
959 )
960 .unwrap_err();
961 assert!(matches!(err, VectorBuildError::InvalidDimension(_)));
962 }
963
964 #[test]
965 fn try_new_rejects_negative_dimension() {
966 let err = LoraVector::try_new(vec![], -1, VectorCoordinateType::Integer64).unwrap_err();
967 assert!(matches!(err, VectorBuildError::InvalidDimension(-1)));
968 }
969
970 #[test]
978 fn integer_boundaries_round_trip() {
979 let cases: &[(VectorCoordinateType, i64, i64, i64, i64)] = &[
980 (
982 VectorCoordinateType::Integer8,
983 i8::MIN as i64,
984 i8::MAX as i64,
985 i8::MIN as i64 - 1,
986 i8::MAX as i64 + 1,
987 ),
988 (
989 VectorCoordinateType::Integer16,
990 i16::MIN as i64,
991 i16::MAX as i64,
992 i16::MIN as i64 - 1,
993 i16::MAX as i64 + 1,
994 ),
995 (
996 VectorCoordinateType::Integer32,
997 i32::MIN as i64,
998 i32::MAX as i64,
999 i32::MIN as i64 - 1,
1000 i32::MAX as i64 + 1,
1001 ),
1002 (VectorCoordinateType::Integer64, i64::MIN, i64::MAX, 0, 0),
1003 ];
1004 for (ty, min, max, under, over) in cases {
1005 LoraVector::try_new(vec![RawCoordinate::Int(*min)], 1, *ty)
1007 .unwrap_or_else(|e| panic!("{ty:?} min rejected: {e}"));
1008 LoraVector::try_new(vec![RawCoordinate::Int(*max)], 1, *ty)
1009 .unwrap_or_else(|e| panic!("{ty:?} max rejected: {e}"));
1010
1011 if *ty == VectorCoordinateType::Integer64 {
1013 continue;
1014 }
1015
1016 let e = LoraVector::try_new(vec![RawCoordinate::Int(*under)], 1, *ty).unwrap_err();
1017 assert!(matches!(e, VectorBuildError::OutOfRange { .. }));
1018 let e = LoraVector::try_new(vec![RawCoordinate::Int(*over)], 1, *ty).unwrap_err();
1019 assert!(matches!(e, VectorBuildError::OutOfRange { .. }));
1020 }
1021 }
1022
1023 #[test]
1024 fn float32_overflow_errors() {
1025 let huge = (f32::MAX as f64) * 10.0;
1027 let err = LoraVector::try_new(
1028 vec![RawCoordinate::Float(huge)],
1029 1,
1030 VectorCoordinateType::Float32,
1031 )
1032 .unwrap_err();
1033 assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
1034 }
1035
1036 #[test]
1037 fn float_to_int_truncates_toward_zero() {
1038 let v = LoraVector::try_new(
1040 vec![
1041 RawCoordinate::Float(1.9),
1042 RawCoordinate::Float(-1.9),
1043 RawCoordinate::Float(0.999),
1044 RawCoordinate::Float(-0.999),
1045 ],
1046 4,
1047 VectorCoordinateType::Integer8,
1048 )
1049 .unwrap();
1050 match v.values {
1051 VectorValues::Integer8(ref values) => assert_eq!(values, &[1i8, -1, 0, 0]),
1052 _ => panic!("expected Integer8"),
1053 }
1054 }
1055
1056 #[test]
1057 fn float_out_of_range_i64_errors() {
1058 let err = LoraVector::try_new(
1060 vec![RawCoordinate::Float(f64::MAX)],
1061 1,
1062 VectorCoordinateType::Integer64,
1063 )
1064 .unwrap_err();
1065 assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
1066 }
1067
1068 #[test]
1069 fn non_finite_float_rejected_in_try_new() {
1070 for bad in [f64::NAN, f64::INFINITY, f64::NEG_INFINITY] {
1071 let err = LoraVector::try_new(
1072 vec![RawCoordinate::Float(bad)],
1073 1,
1074 VectorCoordinateType::Float64,
1075 )
1076 .unwrap_err();
1077 assert!(matches!(err, VectorBuildError::NonFiniteCoordinate));
1078 }
1079 }
1080
1081 #[test]
1086 fn to_key_string_distinguishes_coord_type_dim_and_values() {
1087 fn v(coord: VectorCoordinateType, vals: &[i64], dim: i64) -> LoraVector {
1088 LoraVector::try_new(
1089 vals.iter().map(|x| RawCoordinate::Int(*x)).collect(),
1090 dim,
1091 coord,
1092 )
1093 .unwrap()
1094 }
1095
1096 let a = v(VectorCoordinateType::Integer64, &[1, 2, 3], 3);
1098 let b = v(VectorCoordinateType::Integer32, &[1, 2, 3], 3);
1099 assert_ne!(a.to_key_string(), b.to_key_string());
1100
1101 let c = v(VectorCoordinateType::Integer64, &[1, 2], 2);
1103 assert_ne!(a.to_key_string(), c.to_key_string());
1104
1105 let d = v(VectorCoordinateType::Integer64, &[1, 2, 4], 3);
1107 assert_ne!(a.to_key_string(), d.to_key_string());
1108
1109 let a2 = v(VectorCoordinateType::Integer64, &[1, 2, 3], 3);
1111 assert_eq!(a.to_key_string(), a2.to_key_string());
1112 }
1113
1114 #[test]
1119 fn cosine_orthogonal_is_zero_raw_and_half_bounded() {
1120 let a = LoraVector::try_new(
1121 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1122 2,
1123 VectorCoordinateType::Float32,
1124 )
1125 .unwrap();
1126 let b = LoraVector::try_new(
1127 vec![RawCoordinate::Int(0), RawCoordinate::Int(1)],
1128 2,
1129 VectorCoordinateType::Float32,
1130 )
1131 .unwrap();
1132 assert!((cosine_similarity_raw(&a, &b).unwrap()).abs() < 1e-6);
1133 assert!((cosine_similarity_bounded(&a, &b).unwrap() - 0.5).abs() < 1e-6);
1134 }
1135
1136 #[test]
1137 fn cosine_opposite_is_neg_one_raw_and_zero_bounded() {
1138 let a = LoraVector::try_new(
1139 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1140 2,
1141 VectorCoordinateType::Float32,
1142 )
1143 .unwrap();
1144 let b = LoraVector::try_new(
1145 vec![RawCoordinate::Int(-1), RawCoordinate::Int(0)],
1146 2,
1147 VectorCoordinateType::Float32,
1148 )
1149 .unwrap();
1150 assert!((cosine_similarity_raw(&a, &b).unwrap() + 1.0).abs() < 1e-6);
1151 assert!(cosine_similarity_bounded(&a, &b).unwrap().abs() < 1e-6);
1152 }
1153
1154 #[test]
1155 fn cosine_zero_vector_returns_none() {
1156 let zero = LoraVector::try_new(
1157 vec![RawCoordinate::Int(0), RawCoordinate::Int(0)],
1158 2,
1159 VectorCoordinateType::Float32,
1160 )
1161 .unwrap();
1162 let other = LoraVector::try_new(
1163 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1164 2,
1165 VectorCoordinateType::Float32,
1166 )
1167 .unwrap();
1168 assert!(cosine_similarity_raw(&zero, &other).is_none());
1169 assert!(cosine_similarity_bounded(&zero, &other).is_none());
1170 }
1171
1172 #[test]
1173 fn distance_helpers_respect_dimension_mismatch() {
1174 let a = LoraVector::try_new(
1175 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1176 2,
1177 VectorCoordinateType::Float32,
1178 )
1179 .unwrap();
1180 let b = LoraVector::try_new(
1181 vec![
1182 RawCoordinate::Int(1),
1183 RawCoordinate::Int(0),
1184 RawCoordinate::Int(0),
1185 ],
1186 3,
1187 VectorCoordinateType::Float32,
1188 )
1189 .unwrap();
1190 assert!(euclidean_distance(&a, &b).is_none());
1191 assert!(euclidean_distance_squared(&a, &b).is_none());
1192 assert!(manhattan_distance(&a, &b).is_none());
1193 assert!(hamming_distance(&a, &b).is_none());
1194 assert!(dot_product(&a, &b).is_none());
1195 }
1196
1197 #[test]
1198 fn manhattan_and_euclidean_norm_match_hand_computed() {
1199 let v = LoraVector::try_new(
1201 vec![
1202 RawCoordinate::Float(3.0),
1203 RawCoordinate::Float(4.0),
1204 RawCoordinate::Float(0.0),
1205 RawCoordinate::Float(-12.0),
1206 ],
1207 4,
1208 VectorCoordinateType::Float32,
1209 )
1210 .unwrap();
1211 assert!((manhattan_norm(&v) - 19.0).abs() < 1e-5);
1212 assert!((euclidean_norm(&v) - 13.0).abs() < 1e-5);
1213 }
1214
1215 #[test]
1216 fn hamming_on_float_vectors_uses_f32_comparison() {
1217 let a = LoraVector::try_new(
1220 vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.0)],
1221 2,
1222 VectorCoordinateType::Float32,
1223 )
1224 .unwrap();
1225 let b = LoraVector::try_new(
1226 vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.0)],
1227 2,
1228 VectorCoordinateType::Float64,
1229 )
1230 .unwrap();
1231 assert!((hamming_distance(&a, &b).unwrap()).abs() < 1e-9);
1232
1233 let c = LoraVector::try_new(
1235 vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.5)],
1236 2,
1237 VectorCoordinateType::Float32,
1238 )
1239 .unwrap();
1240 assert!((hamming_distance(&a, &c).unwrap() - 1.0).abs() < 1e-9);
1241 }
1242}