1use std::fmt;
16
17pub const MAX_VECTOR_DIMENSION: usize = 4096;
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
28pub enum VectorCoordinateType {
29 Float64,
30 Float32,
31 Integer64,
32 Integer32,
33 Integer16,
34 Integer8,
35}
36
37impl VectorCoordinateType {
38 pub fn as_str(self) -> &'static str {
43 match self {
44 VectorCoordinateType::Float64 => "FLOAT64",
45 VectorCoordinateType::Float32 => "FLOAT32",
46 VectorCoordinateType::Integer64 => "INTEGER",
47 VectorCoordinateType::Integer32 => "INTEGER32",
48 VectorCoordinateType::Integer16 => "INTEGER16",
49 VectorCoordinateType::Integer8 => "INTEGER8",
50 }
51 }
52
53 pub fn parse(name: &str) -> Option<Self> {
59 let collapsed: String = name
60 .split_whitespace()
61 .collect::<Vec<_>>()
62 .join(" ")
63 .to_ascii_uppercase();
64 match collapsed.as_str() {
65 "FLOAT" | "FLOAT64" => Some(VectorCoordinateType::Float64),
71 "FLOAT32" => Some(VectorCoordinateType::Float32),
72 "INTEGER" | "INT" | "INT64" | "INTEGER64" | "SIGNED INTEGER" => {
73 Some(VectorCoordinateType::Integer64)
74 }
75 "INTEGER32" | "INT32" => Some(VectorCoordinateType::Integer32),
76 "INTEGER16" | "INT16" => Some(VectorCoordinateType::Integer16),
77 "INTEGER8" | "INT8" => Some(VectorCoordinateType::Integer8),
78 _ => None,
79 }
80 }
81
82 pub fn is_float(self) -> bool {
84 matches!(
85 self,
86 VectorCoordinateType::Float64 | VectorCoordinateType::Float32
87 )
88 }
89}
90
91#[derive(Debug, Clone, PartialEq)]
94pub enum VectorValues {
95 Float64(Vec<f64>),
96 Float32(Vec<f32>),
97 Integer64(Vec<i64>),
98 Integer32(Vec<i32>),
99 Integer16(Vec<i16>),
100 Integer8(Vec<i8>),
101}
102
103impl VectorValues {
104 pub fn coordinate_type(&self) -> VectorCoordinateType {
105 match self {
106 VectorValues::Float64(_) => VectorCoordinateType::Float64,
107 VectorValues::Float32(_) => VectorCoordinateType::Float32,
108 VectorValues::Integer64(_) => VectorCoordinateType::Integer64,
109 VectorValues::Integer32(_) => VectorCoordinateType::Integer32,
110 VectorValues::Integer16(_) => VectorCoordinateType::Integer16,
111 VectorValues::Integer8(_) => VectorCoordinateType::Integer8,
112 }
113 }
114
115 pub fn len(&self) -> usize {
116 match self {
117 VectorValues::Float64(v) => v.len(),
118 VectorValues::Float32(v) => v.len(),
119 VectorValues::Integer64(v) => v.len(),
120 VectorValues::Integer32(v) => v.len(),
121 VectorValues::Integer16(v) => v.len(),
122 VectorValues::Integer8(v) => v.len(),
123 }
124 }
125
126 pub fn is_empty(&self) -> bool {
127 self.len() == 0
128 }
129
130 pub fn as_f64_vec(&self) -> Vec<f64> {
134 match self {
135 VectorValues::Float64(v) => v.clone(),
136 VectorValues::Float32(v) => v.iter().map(|x| *x as f64).collect(),
137 VectorValues::Integer64(v) => v.iter().map(|x| *x as f64).collect(),
138 VectorValues::Integer32(v) => v.iter().map(|x| *x as f64).collect(),
139 VectorValues::Integer16(v) => v.iter().map(|x| *x as f64).collect(),
140 VectorValues::Integer8(v) => v.iter().map(|x| *x as f64).collect(),
141 }
142 }
143
144 pub fn to_i64_vec(&self) -> Vec<i64> {
148 match self {
149 VectorValues::Float64(v) => v.iter().map(|x| *x as i64).collect(),
150 VectorValues::Float32(v) => v.iter().map(|x| *x as i64).collect(),
151 VectorValues::Integer64(v) => v.clone(),
152 VectorValues::Integer32(v) => v.iter().map(|x| *x as i64).collect(),
153 VectorValues::Integer16(v) => v.iter().map(|x| *x as i64).collect(),
154 VectorValues::Integer8(v) => v.iter().map(|x| *x as i64).collect(),
155 }
156 }
157}
158
159#[derive(Debug, Clone, PartialEq)]
161pub struct LoraVector {
162 pub dimension: usize,
163 pub values: VectorValues,
164}
165
166impl LoraVector {
167 pub fn coordinate_type(&self) -> VectorCoordinateType {
173 self.values.coordinate_type()
174 }
175
176 pub fn to_key_string(&self) -> String {
179 let mut out = String::new();
180 out.push_str(self.coordinate_type().as_str());
181 out.push('|');
182 out.push_str(&self.dimension.to_string());
183 out.push('|');
184 let vals = self.values.as_f64_vec();
185 for (i, v) in vals.iter().enumerate() {
186 if i > 0 {
187 out.push(',');
188 }
189 out.push_str(&format!("{v:?}"));
192 }
193 out
194 }
195}
196
197impl fmt::Display for LoraVector {
198 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
199 write!(f, "vector(")?;
200 f.write_str("[")?;
201 let values = self.values.as_f64_vec();
202 for (i, v) in values.iter().enumerate() {
203 if i > 0 {
204 f.write_str(", ")?;
205 }
206 if self.coordinate_type().is_float() {
207 write!(f, "{v}")?;
208 } else {
209 write!(f, "{}", *v as i64)?;
210 }
211 }
212 f.write_str("], ")?;
213 write!(
214 f,
215 "{}, {})",
216 self.dimension,
217 self.coordinate_type().as_str()
218 )
219 }
220}
221
222#[derive(Debug, Clone, PartialEq)]
230pub enum VectorBuildError {
231 InvalidDimension(i64),
232 DimensionMismatch {
233 expected: usize,
234 got: usize,
235 },
236 NestedListNotAllowed,
237 NonNumericCoordinate(String),
238 NonFiniteCoordinate,
239 OutOfRange {
240 coordinate_type: VectorCoordinateType,
241 value: String,
242 },
243 UnknownCoordinateType(String),
244}
245
246impl fmt::Display for VectorBuildError {
247 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
248 match self {
249 VectorBuildError::InvalidDimension(d) => {
250 write!(
251 f,
252 "vector dimension must be between 1 and {MAX_VECTOR_DIMENSION}, got {d}"
253 )
254 }
255 VectorBuildError::DimensionMismatch { expected, got } => write!(
256 f,
257 "vector value length {got} does not match declared dimension {expected}"
258 ),
259 VectorBuildError::NestedListNotAllowed => {
260 write!(f, "vector coordinates cannot contain nested lists")
261 }
262 VectorBuildError::NonNumericCoordinate(kind) => {
263 write!(f, "vector coordinates must be numeric, got {kind}")
264 }
265 VectorBuildError::NonFiniteCoordinate => {
266 write!(f, "vector coordinates cannot be NaN or Infinity")
267 }
268 VectorBuildError::OutOfRange {
269 coordinate_type,
270 value,
271 } => write!(
272 f,
273 "value {value} is out of range for coordinate type {}",
274 coordinate_type.as_str()
275 ),
276 VectorBuildError::UnknownCoordinateType(name) => {
277 write!(f, "unknown vector coordinate type '{name}'")
278 }
279 }
280 }
281}
282
283impl std::error::Error for VectorBuildError {}
284
285#[derive(Debug, Clone, Copy)]
289pub enum RawCoordinate {
290 Int(i64),
291 Float(f64),
292}
293
294impl RawCoordinate {
295 fn as_f64(self) -> f64 {
296 match self {
297 RawCoordinate::Int(v) => v as f64,
298 RawCoordinate::Float(v) => v,
299 }
300 }
301}
302
303impl LoraVector {
304 pub fn try_new(
308 raw: Vec<RawCoordinate>,
309 dimension: i64,
310 coordinate_type: VectorCoordinateType,
311 ) -> Result<Self, VectorBuildError> {
312 if dimension <= 0 || dimension as usize > MAX_VECTOR_DIMENSION {
313 return Err(VectorBuildError::InvalidDimension(dimension));
314 }
315 let dim = dimension as usize;
316 if raw.len() != dim {
317 return Err(VectorBuildError::DimensionMismatch {
318 expected: dim,
319 got: raw.len(),
320 });
321 }
322
323 for c in &raw {
324 if let RawCoordinate::Float(v) = c {
325 if !v.is_finite() {
326 return Err(VectorBuildError::NonFiniteCoordinate);
327 }
328 }
329 }
330
331 let values = match coordinate_type {
332 VectorCoordinateType::Float64 => {
333 VectorValues::Float64(raw.iter().map(|c| c.as_f64()).collect())
334 }
335 VectorCoordinateType::Float32 => {
336 let mut out = Vec::with_capacity(dim);
337 for c in &raw {
338 let v = c.as_f64();
339 if v.abs() > f32::MAX as f64 {
340 return Err(VectorBuildError::OutOfRange {
341 coordinate_type,
342 value: format!("{v}"),
343 });
344 }
345 out.push(v as f32);
346 }
347 VectorValues::Float32(out)
348 }
349 VectorCoordinateType::Integer64 => {
350 let mut out = Vec::with_capacity(dim);
351 for c in &raw {
352 out.push(coerce_to_int::<i64>(*c, coordinate_type)?);
353 }
354 VectorValues::Integer64(out)
355 }
356 VectorCoordinateType::Integer32 => {
357 let mut out = Vec::with_capacity(dim);
358 for c in &raw {
359 out.push(coerce_to_int::<i32>(*c, coordinate_type)?);
360 }
361 VectorValues::Integer32(out)
362 }
363 VectorCoordinateType::Integer16 => {
364 let mut out = Vec::with_capacity(dim);
365 for c in &raw {
366 out.push(coerce_to_int::<i16>(*c, coordinate_type)?);
367 }
368 VectorValues::Integer16(out)
369 }
370 VectorCoordinateType::Integer8 => {
371 let mut out = Vec::with_capacity(dim);
372 for c in &raw {
373 out.push(coerce_to_int::<i8>(*c, coordinate_type)?);
374 }
375 VectorValues::Integer8(out)
376 }
377 };
378
379 Ok(LoraVector {
380 dimension: dim,
381 values,
382 })
383 }
384}
385
386fn coerce_to_int<T>(
391 raw: RawCoordinate,
392 coordinate_type: VectorCoordinateType,
393) -> Result<T, VectorBuildError>
394where
395 T: TryFrom<i64> + Copy,
396{
397 let as_i64 = match raw {
398 RawCoordinate::Int(v) => v,
399 RawCoordinate::Float(v) => {
400 if v > i64::MAX as f64 || v < i64::MIN as f64 {
404 return Err(VectorBuildError::OutOfRange {
405 coordinate_type,
406 value: format!("{v}"),
407 });
408 }
409 v.trunc() as i64
410 }
411 };
412
413 T::try_from(as_i64).map_err(|_| VectorBuildError::OutOfRange {
414 coordinate_type,
415 value: as_i64.to_string(),
416 })
417}
418
419pub fn parse_string_values(input: &str) -> Result<Vec<RawCoordinate>, VectorBuildError> {
422 let trimmed = input.trim();
423 if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
424 return Err(VectorBuildError::NonNumericCoordinate(
425 "string must start with '[' and end with ']'".to_string(),
426 ));
427 }
428 let body = &trimmed[1..trimmed.len() - 1];
429 if body.trim().is_empty() {
430 return Ok(Vec::new());
431 }
432
433 let mut out = Vec::new();
434 for part in body.split(',') {
435 let token = part.trim();
436 if token.is_empty() {
437 return Err(VectorBuildError::NonNumericCoordinate(
438 "empty list entry".to_string(),
439 ));
440 }
441
442 if let Ok(i) = token.parse::<i64>() {
445 out.push(RawCoordinate::Int(i));
446 continue;
447 }
448 match token.parse::<f64>() {
449 Ok(f) if f.is_finite() => out.push(RawCoordinate::Float(f)),
450 Ok(_) => return Err(VectorBuildError::NonFiniteCoordinate),
451 Err(_) => {
452 return Err(VectorBuildError::NonNumericCoordinate(format!(
453 "cannot parse '{token}'"
454 )))
455 }
456 }
457 }
458 Ok(out)
459}
460
461fn check_same_dim(a: &LoraVector, b: &LoraVector) -> Option<usize> {
470 if a.dimension == b.dimension {
471 Some(a.dimension)
472 } else {
473 None
474 }
475}
476
477pub fn cosine_similarity_raw(a: &LoraVector, b: &LoraVector) -> Option<f64> {
480 check_same_dim(a, b)?;
481 let av: Vec<f32> = a
484 .values
485 .as_f64_vec()
486 .into_iter()
487 .map(|x| x as f32)
488 .collect();
489 let bv: Vec<f32> = b
490 .values
491 .as_f64_vec()
492 .into_iter()
493 .map(|x| x as f32)
494 .collect();
495 let mut dot = 0f32;
496 let mut na = 0f32;
497 let mut nb = 0f32;
498 for (x, y) in av.iter().zip(bv.iter()) {
499 dot += x * y;
500 na += x * x;
501 nb += y * y;
502 }
503 if na == 0.0 || nb == 0.0 {
504 return None;
505 }
506 let denom = na.sqrt() * nb.sqrt();
507 if denom == 0.0 {
508 return None;
509 }
510 Some((dot / denom) as f64)
511}
512
513pub fn cosine_similarity_bounded(a: &LoraVector, b: &LoraVector) -> Option<f64> {
516 cosine_similarity_raw(a, b).map(|raw| ((raw + 1.0) / 2.0).clamp(0.0, 1.0))
517}
518
519pub fn euclidean_distance_squared(a: &LoraVector, b: &LoraVector) -> Option<f64> {
522 check_same_dim(a, b)?;
523 let av: Vec<f32> = a
524 .values
525 .as_f64_vec()
526 .into_iter()
527 .map(|x| x as f32)
528 .collect();
529 let bv: Vec<f32> = b
530 .values
531 .as_f64_vec()
532 .into_iter()
533 .map(|x| x as f32)
534 .collect();
535 let mut sum = 0f32;
536 for (x, y) in av.iter().zip(bv.iter()) {
537 let d = x - y;
538 sum += d * d;
539 }
540 Some(sum as f64)
541}
542
543pub fn euclidean_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
545 euclidean_distance_squared(a, b).map(f64::sqrt)
546}
547
548pub fn manhattan_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
550 check_same_dim(a, b)?;
551 let av = a.values.as_f64_vec();
552 let bv = b.values.as_f64_vec();
553 let mut sum = 0f32;
554 for (x, y) in av.iter().zip(bv.iter()) {
555 sum += ((*x as f32) - (*y as f32)).abs();
556 }
557 Some(sum as f64)
558}
559
560pub fn hamming_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
562 check_same_dim(a, b)?;
563 let av = a.values.as_f64_vec();
564 let bv = b.values.as_f64_vec();
565 let mut count = 0i64;
566 for (x, y) in av.iter().zip(bv.iter()) {
567 if (*x as f32) != (*y as f32) {
568 count += 1;
569 }
570 }
571 Some(count as f64)
572}
573
574pub fn dot_product(a: &LoraVector, b: &LoraVector) -> Option<f64> {
576 check_same_dim(a, b)?;
577 let av = a.values.as_f64_vec();
578 let bv = b.values.as_f64_vec();
579 let mut acc = 0f32;
580 for (x, y) in av.iter().zip(bv.iter()) {
581 acc += (*x as f32) * (*y as f32);
582 }
583 Some(acc as f64)
584}
585
586pub fn euclidean_norm(v: &LoraVector) -> f64 {
588 let values = v.values.as_f64_vec();
589 let mut sum = 0f32;
590 for x in &values {
591 let x32 = *x as f32;
592 sum += x32 * x32;
593 }
594 (sum.sqrt()) as f64
595}
596
597pub fn manhattan_norm(v: &LoraVector) -> f64 {
599 let values = v.values.as_f64_vec();
600 let mut sum = 0f32;
601 for x in &values {
602 sum += (*x as f32).abs();
603 }
604 sum as f64
605}
606
607pub fn euclidean_similarity(a: &LoraVector, b: &LoraVector) -> Option<f64> {
611 euclidean_distance_squared(a, b).map(|d2| 1.0 / (1.0 + d2))
612}
613
614#[cfg(test)]
619mod tests {
620 use super::*;
621
622 #[test]
623 fn parse_coordinate_type_accepts_aliases() {
624 assert_eq!(
625 VectorCoordinateType::parse("INTEGER"),
626 Some(VectorCoordinateType::Integer64)
627 );
628 assert_eq!(
629 VectorCoordinateType::parse("int64"),
630 Some(VectorCoordinateType::Integer64)
631 );
632 assert_eq!(
633 VectorCoordinateType::parse("signed integer"),
634 Some(VectorCoordinateType::Integer64)
635 );
636 assert_eq!(
637 VectorCoordinateType::parse(" SIGNED INTEGER "),
638 Some(VectorCoordinateType::Integer64)
639 );
640 assert_eq!(
641 VectorCoordinateType::parse("FLOAT"),
642 Some(VectorCoordinateType::Float64)
643 );
644 assert_eq!(
645 VectorCoordinateType::parse("float32"),
646 Some(VectorCoordinateType::Float32)
647 );
648 assert_eq!(VectorCoordinateType::parse("bogus"), None);
649 }
650
651 #[test]
652 fn try_new_rejects_zero_dim() {
653 let err = LoraVector::try_new(vec![], 0, VectorCoordinateType::Float64).unwrap_err();
654 assert!(matches!(err, VectorBuildError::InvalidDimension(0)));
655 }
656
657 #[test]
658 fn try_new_rejects_over_max_dim() {
659 let err = LoraVector::try_new(
660 vec![RawCoordinate::Int(1); 1],
661 (MAX_VECTOR_DIMENSION + 1) as i64,
662 VectorCoordinateType::Float64,
663 )
664 .unwrap_err();
665 assert!(matches!(err, VectorBuildError::InvalidDimension(_)));
666 }
667
668 #[test]
669 fn try_new_rejects_dimension_mismatch() {
670 let err = LoraVector::try_new(
671 vec![RawCoordinate::Int(1)],
672 2,
673 VectorCoordinateType::Integer64,
674 )
675 .unwrap_err();
676 assert!(matches!(
677 err,
678 VectorBuildError::DimensionMismatch {
679 expected: 2,
680 got: 1
681 }
682 ));
683 }
684
685 #[test]
686 fn int8_overflow_errors() {
687 let err = LoraVector::try_new(
688 vec![RawCoordinate::Int(128)],
689 1,
690 VectorCoordinateType::Integer8,
691 )
692 .unwrap_err();
693 assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
694 }
695
696 #[test]
697 fn float_to_int_truncates() {
698 let v = LoraVector::try_new(
699 vec![RawCoordinate::Float(1.9), RawCoordinate::Float(-1.9)],
700 2,
701 VectorCoordinateType::Integer64,
702 )
703 .unwrap();
704 match v.values {
705 VectorValues::Integer64(ref values) => assert_eq!(values, &[1, -1]),
706 _ => panic!("expected Integer64"),
707 }
708 }
709
710 #[test]
711 fn int_to_float_is_allowed() {
712 let v = LoraVector::try_new(
713 vec![RawCoordinate::Int(3), RawCoordinate::Int(4)],
714 2,
715 VectorCoordinateType::Float32,
716 )
717 .unwrap();
718 assert_eq!(v.values, VectorValues::Float32(vec![3.0, 4.0]));
719 }
720
721 #[test]
722 fn parse_string_values_handles_scientific() {
723 let parsed = parse_string_values("[1.05e+00, 0.123, 5]").unwrap();
724 assert_eq!(parsed.len(), 3);
725 match parsed[0] {
726 RawCoordinate::Float(f) => assert!((f - 1.05).abs() < 1e-9),
727 _ => panic!("expected float"),
728 }
729 match parsed[2] {
730 RawCoordinate::Int(i) => assert_eq!(i, 5),
731 _ => panic!("expected int"),
732 }
733 }
734
735 #[test]
736 fn cosine_similarity_is_bounded() {
737 let a = LoraVector::try_new(
738 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
739 2,
740 VectorCoordinateType::Float32,
741 )
742 .unwrap();
743 let b = LoraVector::try_new(
744 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
745 2,
746 VectorCoordinateType::Float32,
747 )
748 .unwrap();
749 let sim = cosine_similarity_bounded(&a, &b).unwrap();
750 assert!((sim - 1.0).abs() < 1e-6);
751 }
752
753 #[test]
754 fn euclidean_similarity_matches_documented_example() {
755 let a = LoraVector::try_new(
759 vec![
760 RawCoordinate::Float(4.0),
761 RawCoordinate::Float(5.0),
762 RawCoordinate::Float(6.0),
763 ],
764 3,
765 VectorCoordinateType::Float32,
766 )
767 .unwrap();
768 let b = LoraVector::try_new(
769 vec![
770 RawCoordinate::Float(2.0),
771 RawCoordinate::Float(8.0),
772 RawCoordinate::Float(3.0),
773 ],
774 3,
775 VectorCoordinateType::Float32,
776 )
777 .unwrap();
778 let sim = euclidean_similarity(&a, &b).unwrap();
779 assert!((sim - (1.0 / 23.0)).abs() < 1e-6, "got {sim}");
780 }
781
782 #[test]
790 fn parse_coordinate_type_every_alias() {
791 use VectorCoordinateType::*;
792 let cases: &[(&str, VectorCoordinateType)] = &[
793 ("FLOAT", Float64),
794 ("Float", Float64),
795 ("float", Float64),
796 ("FLOAT64", Float64),
797 ("float64", Float64),
798 ("FLOAT32", Float32),
799 ("float32", Float32),
800 ("INTEGER", Integer64),
801 ("Integer", Integer64),
802 ("integer", Integer64),
803 ("INT", Integer64),
804 ("int", Integer64),
805 ("INT64", Integer64),
806 ("int64", Integer64),
807 ("INTEGER64", Integer64),
808 ("SIGNED INTEGER", Integer64),
809 ("signed integer", Integer64),
810 ("Signed Integer", Integer64),
811 ("INTEGER32", Integer32),
812 ("int32", Integer32),
813 ("INT32", Integer32),
814 ("INTEGER16", Integer16),
815 ("INT16", Integer16),
816 ("int16", Integer16),
817 ("INTEGER8", Integer8),
818 ("INT8", Integer8),
819 ("int8", Integer8),
820 ];
821 for (input, expected) in cases {
822 assert_eq!(
823 VectorCoordinateType::parse(input),
824 Some(*expected),
825 "failed for input {input:?}"
826 );
827 }
828 }
829
830 #[test]
831 fn parse_coordinate_type_rejects_unsupported_aliases() {
832 for bogus in [
833 "DOUBLE",
834 "double",
835 "REAL",
836 "NUMBER",
837 "BIGINT",
838 "INT128",
839 "FLOAT128",
840 "UINT8",
841 "UNSIGNED INTEGER",
842 "BIT",
843 "",
844 ] {
845 assert_eq!(
846 VectorCoordinateType::parse(bogus),
847 None,
848 "should reject {bogus:?}"
849 );
850 }
851 }
852
853 #[test]
854 fn parse_coordinate_type_is_whitespace_tolerant() {
855 assert_eq!(
856 VectorCoordinateType::parse("\tinteger\n"),
857 Some(VectorCoordinateType::Integer64)
858 );
859 assert_eq!(
860 VectorCoordinateType::parse(" INTEGER "),
861 Some(VectorCoordinateType::Integer64)
862 );
863 }
864
865 fn unwrap_float(raw: RawCoordinate) -> f64 {
870 match raw {
871 RawCoordinate::Float(f) => f,
872 RawCoordinate::Int(i) => i as f64,
873 }
874 }
875
876 fn unwrap_int(raw: RawCoordinate) -> i64 {
877 match raw {
878 RawCoordinate::Int(i) => i,
879 RawCoordinate::Float(f) => panic!("expected Int, got Float({f})"),
880 }
881 }
882
883 #[test]
884 fn parse_string_values_accepts_negatives_and_whitespace() {
885 let parsed = parse_string_values(" [ -1, -2.5 , 3 , -4.0e-2 ] ").unwrap();
886 assert_eq!(unwrap_int(parsed[0]), -1);
887 assert!((unwrap_float(parsed[1]) + 2.5).abs() < 1e-9);
888 assert_eq!(unwrap_int(parsed[2]), 3);
889 assert!((unwrap_float(parsed[3]) + 0.04).abs() < 1e-12);
890 }
891
892 #[test]
893 fn parse_string_values_accepts_signed_exponents() {
894 let parsed = parse_string_values("[1e+10, 1e-10, -2.5e+3]").unwrap();
895 assert!((unwrap_float(parsed[0]) - 1e10).abs() < 1.0);
896 assert!((unwrap_float(parsed[1]) - 1e-10).abs() < 1e-20);
897 assert!((unwrap_float(parsed[2]) + 2500.0).abs() < 1e-9);
898 }
899
900 #[test]
901 fn parse_string_values_accepts_empty_brackets() {
902 let parsed = parse_string_values("[]").unwrap();
903 assert!(parsed.is_empty());
904 }
905
906 #[test]
907 fn parse_string_values_rejects_missing_brackets() {
908 assert!(parse_string_values("1, 2, 3").is_err());
909 assert!(parse_string_values("[1, 2, 3").is_err());
910 assert!(parse_string_values("1, 2, 3]").is_err());
911 }
912
913 #[test]
914 fn parse_string_values_rejects_empty_entries() {
915 assert!(parse_string_values("[1, , 3]").is_err());
916 assert!(parse_string_values("[,1,2]").is_err());
917 assert!(parse_string_values("[1,2,]").is_err());
918 assert!(parse_string_values("[ , ]").is_err());
919 }
920
921 #[test]
922 fn parse_string_values_rejects_non_numeric_tokens() {
923 assert!(parse_string_values("[1, abc, 3]").is_err());
924 assert!(parse_string_values("[true, false]").is_err());
925 assert!(parse_string_values("[\"1\", \"2\"]").is_err());
926 }
927
928 #[test]
929 fn parse_string_values_rejects_non_finite() {
930 for bad in ["[NaN]", "[Infinity]", "[-Infinity]", "[1, NaN, 3]"] {
931 assert!(parse_string_values(bad).is_err(), "should reject {bad:?}");
932 }
933 }
934
935 #[test]
940 fn try_new_accepts_exactly_max_dimension() {
941 let raw = vec![RawCoordinate::Int(0); MAX_VECTOR_DIMENSION];
942 let v = LoraVector::try_new(
943 raw,
944 MAX_VECTOR_DIMENSION as i64,
945 VectorCoordinateType::Integer8,
946 )
947 .expect("4096 should be accepted");
948 assert_eq!(v.dimension, MAX_VECTOR_DIMENSION);
949 }
950
951 #[test]
952 fn try_new_rejects_max_plus_one_dimension() {
953 let err = LoraVector::try_new(
954 vec![RawCoordinate::Int(0); MAX_VECTOR_DIMENSION + 1],
955 (MAX_VECTOR_DIMENSION + 1) as i64,
956 VectorCoordinateType::Integer8,
957 )
958 .unwrap_err();
959 assert!(matches!(err, VectorBuildError::InvalidDimension(_)));
960 }
961
962 #[test]
963 fn try_new_rejects_negative_dimension() {
964 let err = LoraVector::try_new(vec![], -1, VectorCoordinateType::Integer64).unwrap_err();
965 assert!(matches!(err, VectorBuildError::InvalidDimension(-1)));
966 }
967
968 #[test]
976 fn integer_boundaries_round_trip() {
977 let cases: &[(VectorCoordinateType, i64, i64, i64, i64)] = &[
978 (
980 VectorCoordinateType::Integer8,
981 i8::MIN as i64,
982 i8::MAX as i64,
983 i8::MIN as i64 - 1,
984 i8::MAX as i64 + 1,
985 ),
986 (
987 VectorCoordinateType::Integer16,
988 i16::MIN as i64,
989 i16::MAX as i64,
990 i16::MIN as i64 - 1,
991 i16::MAX as i64 + 1,
992 ),
993 (
994 VectorCoordinateType::Integer32,
995 i32::MIN as i64,
996 i32::MAX as i64,
997 i32::MIN as i64 - 1,
998 i32::MAX as i64 + 1,
999 ),
1000 (VectorCoordinateType::Integer64, i64::MIN, i64::MAX, 0, 0),
1001 ];
1002 for (ty, min, max, under, over) in cases {
1003 LoraVector::try_new(vec![RawCoordinate::Int(*min)], 1, *ty)
1005 .unwrap_or_else(|e| panic!("{ty:?} min rejected: {e}"));
1006 LoraVector::try_new(vec![RawCoordinate::Int(*max)], 1, *ty)
1007 .unwrap_or_else(|e| panic!("{ty:?} max rejected: {e}"));
1008
1009 if *ty == VectorCoordinateType::Integer64 {
1011 continue;
1012 }
1013
1014 let e = LoraVector::try_new(vec![RawCoordinate::Int(*under)], 1, *ty).unwrap_err();
1015 assert!(matches!(e, VectorBuildError::OutOfRange { .. }));
1016 let e = LoraVector::try_new(vec![RawCoordinate::Int(*over)], 1, *ty).unwrap_err();
1017 assert!(matches!(e, VectorBuildError::OutOfRange { .. }));
1018 }
1019 }
1020
1021 #[test]
1022 fn float32_overflow_errors() {
1023 let huge = (f32::MAX as f64) * 10.0;
1025 let err = LoraVector::try_new(
1026 vec![RawCoordinate::Float(huge)],
1027 1,
1028 VectorCoordinateType::Float32,
1029 )
1030 .unwrap_err();
1031 assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
1032 }
1033
1034 #[test]
1035 fn float_to_int_truncates_toward_zero() {
1036 let v = LoraVector::try_new(
1038 vec![
1039 RawCoordinate::Float(1.9),
1040 RawCoordinate::Float(-1.9),
1041 RawCoordinate::Float(0.999),
1042 RawCoordinate::Float(-0.999),
1043 ],
1044 4,
1045 VectorCoordinateType::Integer8,
1046 )
1047 .unwrap();
1048 match v.values {
1049 VectorValues::Integer8(ref values) => assert_eq!(values, &[1i8, -1, 0, 0]),
1050 _ => panic!("expected Integer8"),
1051 }
1052 }
1053
1054 #[test]
1055 fn float_out_of_range_i64_errors() {
1056 let err = LoraVector::try_new(
1058 vec![RawCoordinate::Float(f64::MAX)],
1059 1,
1060 VectorCoordinateType::Integer64,
1061 )
1062 .unwrap_err();
1063 assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
1064 }
1065
1066 #[test]
1067 fn non_finite_float_rejected_in_try_new() {
1068 for bad in [f64::NAN, f64::INFINITY, f64::NEG_INFINITY] {
1069 let err = LoraVector::try_new(
1070 vec![RawCoordinate::Float(bad)],
1071 1,
1072 VectorCoordinateType::Float64,
1073 )
1074 .unwrap_err();
1075 assert!(matches!(err, VectorBuildError::NonFiniteCoordinate));
1076 }
1077 }
1078
1079 #[test]
1084 fn to_key_string_distinguishes_coord_type_dim_and_values() {
1085 fn v(coord: VectorCoordinateType, vals: &[i64], dim: i64) -> LoraVector {
1086 LoraVector::try_new(
1087 vals.iter().map(|x| RawCoordinate::Int(*x)).collect(),
1088 dim,
1089 coord,
1090 )
1091 .unwrap()
1092 }
1093
1094 let a = v(VectorCoordinateType::Integer64, &[1, 2, 3], 3);
1096 let b = v(VectorCoordinateType::Integer32, &[1, 2, 3], 3);
1097 assert_ne!(a.to_key_string(), b.to_key_string());
1098
1099 let c = v(VectorCoordinateType::Integer64, &[1, 2], 2);
1101 assert_ne!(a.to_key_string(), c.to_key_string());
1102
1103 let d = v(VectorCoordinateType::Integer64, &[1, 2, 4], 3);
1105 assert_ne!(a.to_key_string(), d.to_key_string());
1106
1107 let a2 = v(VectorCoordinateType::Integer64, &[1, 2, 3], 3);
1109 assert_eq!(a.to_key_string(), a2.to_key_string());
1110 }
1111
1112 #[test]
1117 fn cosine_orthogonal_is_zero_raw_and_half_bounded() {
1118 let a = LoraVector::try_new(
1119 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1120 2,
1121 VectorCoordinateType::Float32,
1122 )
1123 .unwrap();
1124 let b = LoraVector::try_new(
1125 vec![RawCoordinate::Int(0), RawCoordinate::Int(1)],
1126 2,
1127 VectorCoordinateType::Float32,
1128 )
1129 .unwrap();
1130 assert!((cosine_similarity_raw(&a, &b).unwrap()).abs() < 1e-6);
1131 assert!((cosine_similarity_bounded(&a, &b).unwrap() - 0.5).abs() < 1e-6);
1132 }
1133
1134 #[test]
1135 fn cosine_opposite_is_neg_one_raw_and_zero_bounded() {
1136 let a = LoraVector::try_new(
1137 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1138 2,
1139 VectorCoordinateType::Float32,
1140 )
1141 .unwrap();
1142 let b = LoraVector::try_new(
1143 vec![RawCoordinate::Int(-1), RawCoordinate::Int(0)],
1144 2,
1145 VectorCoordinateType::Float32,
1146 )
1147 .unwrap();
1148 assert!((cosine_similarity_raw(&a, &b).unwrap() + 1.0).abs() < 1e-6);
1149 assert!(cosine_similarity_bounded(&a, &b).unwrap().abs() < 1e-6);
1150 }
1151
1152 #[test]
1153 fn cosine_zero_vector_returns_none() {
1154 let zero = LoraVector::try_new(
1155 vec![RawCoordinate::Int(0), RawCoordinate::Int(0)],
1156 2,
1157 VectorCoordinateType::Float32,
1158 )
1159 .unwrap();
1160 let other = LoraVector::try_new(
1161 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1162 2,
1163 VectorCoordinateType::Float32,
1164 )
1165 .unwrap();
1166 assert!(cosine_similarity_raw(&zero, &other).is_none());
1167 assert!(cosine_similarity_bounded(&zero, &other).is_none());
1168 }
1169
1170 #[test]
1171 fn distance_helpers_respect_dimension_mismatch() {
1172 let a = LoraVector::try_new(
1173 vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
1174 2,
1175 VectorCoordinateType::Float32,
1176 )
1177 .unwrap();
1178 let b = LoraVector::try_new(
1179 vec![
1180 RawCoordinate::Int(1),
1181 RawCoordinate::Int(0),
1182 RawCoordinate::Int(0),
1183 ],
1184 3,
1185 VectorCoordinateType::Float32,
1186 )
1187 .unwrap();
1188 assert!(euclidean_distance(&a, &b).is_none());
1189 assert!(euclidean_distance_squared(&a, &b).is_none());
1190 assert!(manhattan_distance(&a, &b).is_none());
1191 assert!(hamming_distance(&a, &b).is_none());
1192 assert!(dot_product(&a, &b).is_none());
1193 }
1194
1195 #[test]
1196 fn manhattan_and_euclidean_norm_match_hand_computed() {
1197 let v = LoraVector::try_new(
1199 vec![
1200 RawCoordinate::Float(3.0),
1201 RawCoordinate::Float(4.0),
1202 RawCoordinate::Float(0.0),
1203 RawCoordinate::Float(-12.0),
1204 ],
1205 4,
1206 VectorCoordinateType::Float32,
1207 )
1208 .unwrap();
1209 assert!((manhattan_norm(&v) - 19.0).abs() < 1e-5);
1210 assert!((euclidean_norm(&v) - 13.0).abs() < 1e-5);
1211 }
1212
1213 #[test]
1214 fn hamming_on_float_vectors_uses_f32_comparison() {
1215 let a = LoraVector::try_new(
1218 vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.0)],
1219 2,
1220 VectorCoordinateType::Float32,
1221 )
1222 .unwrap();
1223 let b = LoraVector::try_new(
1224 vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.0)],
1225 2,
1226 VectorCoordinateType::Float64,
1227 )
1228 .unwrap();
1229 assert!((hamming_distance(&a, &b).unwrap()).abs() < 1e-9);
1230
1231 let c = LoraVector::try_new(
1233 vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.5)],
1234 2,
1235 VectorCoordinateType::Float32,
1236 )
1237 .unwrap();
1238 assert!((hamming_distance(&a, &c).unwrap() - 1.0).abs() < 1e-9);
1239 }
1240}