use std::fmt;
pub const MAX_VECTOR_DIMENSION: usize = 4096;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum VectorCoordinateType {
Float64,
Float32,
Integer64,
Integer32,
Integer16,
Integer8,
}
impl VectorCoordinateType {
pub fn as_str(self) -> &'static str {
match self {
VectorCoordinateType::Float64 => "FLOAT64",
VectorCoordinateType::Float32 => "FLOAT32",
VectorCoordinateType::Integer64 => "INTEGER",
VectorCoordinateType::Integer32 => "INTEGER32",
VectorCoordinateType::Integer16 => "INTEGER16",
VectorCoordinateType::Integer8 => "INTEGER8",
}
}
pub fn parse(name: &str) -> Option<Self> {
let collapsed: String = name
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_ascii_uppercase();
match collapsed.as_str() {
"FLOAT" | "FLOAT64" => Some(VectorCoordinateType::Float64),
"FLOAT32" => Some(VectorCoordinateType::Float32),
"INTEGER" | "INT" | "INT64" | "INTEGER64" | "SIGNED INTEGER" => {
Some(VectorCoordinateType::Integer64)
}
"INTEGER32" | "INT32" => Some(VectorCoordinateType::Integer32),
"INTEGER16" | "INT16" => Some(VectorCoordinateType::Integer16),
"INTEGER8" | "INT8" => Some(VectorCoordinateType::Integer8),
_ => None,
}
}
pub fn is_float(self) -> bool {
matches!(
self,
VectorCoordinateType::Float64 | VectorCoordinateType::Float32
)
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum VectorValues {
Float64(Vec<f64>),
Float32(Vec<f32>),
Integer64(Vec<i64>),
Integer32(Vec<i32>),
Integer16(Vec<i16>),
Integer8(Vec<i8>),
}
impl VectorValues {
pub fn coordinate_type(&self) -> VectorCoordinateType {
match self {
VectorValues::Float64(_) => VectorCoordinateType::Float64,
VectorValues::Float32(_) => VectorCoordinateType::Float32,
VectorValues::Integer64(_) => VectorCoordinateType::Integer64,
VectorValues::Integer32(_) => VectorCoordinateType::Integer32,
VectorValues::Integer16(_) => VectorCoordinateType::Integer16,
VectorValues::Integer8(_) => VectorCoordinateType::Integer8,
}
}
pub fn len(&self) -> usize {
match self {
VectorValues::Float64(v) => v.len(),
VectorValues::Float32(v) => v.len(),
VectorValues::Integer64(v) => v.len(),
VectorValues::Integer32(v) => v.len(),
VectorValues::Integer16(v) => v.len(),
VectorValues::Integer8(v) => v.len(),
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn as_f64_vec(&self) -> Vec<f64> {
match self {
VectorValues::Float64(v) => v.clone(),
VectorValues::Float32(v) => v.iter().map(|x| *x as f64).collect(),
VectorValues::Integer64(v) => v.iter().map(|x| *x as f64).collect(),
VectorValues::Integer32(v) => v.iter().map(|x| *x as f64).collect(),
VectorValues::Integer16(v) => v.iter().map(|x| *x as f64).collect(),
VectorValues::Integer8(v) => v.iter().map(|x| *x as f64).collect(),
}
}
pub fn to_i64_vec(&self) -> Vec<i64> {
match self {
VectorValues::Float64(v) => v.iter().map(|x| *x as i64).collect(),
VectorValues::Float32(v) => v.iter().map(|x| *x as i64).collect(),
VectorValues::Integer64(v) => v.clone(),
VectorValues::Integer32(v) => v.iter().map(|x| *x as i64).collect(),
VectorValues::Integer16(v) => v.iter().map(|x| *x as i64).collect(),
VectorValues::Integer8(v) => v.iter().map(|x| *x as i64).collect(),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct LoraVector {
pub dimension: usize,
pub values: VectorValues,
}
impl LoraVector {
pub fn coordinate_type(&self) -> VectorCoordinateType {
self.values.coordinate_type()
}
pub fn to_key_string(&self) -> String {
let mut out = String::new();
out.push_str(self.coordinate_type().as_str());
out.push('|');
out.push_str(&self.dimension.to_string());
out.push('|');
let vals = self.values.as_f64_vec();
for (i, v) in vals.iter().enumerate() {
if i > 0 {
out.push(',');
}
out.push_str(&format!("{v:?}"));
}
out
}
}
impl fmt::Display for LoraVector {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "vector(")?;
f.write_str("[")?;
let values = self.values.as_f64_vec();
for (i, v) in values.iter().enumerate() {
if i > 0 {
f.write_str(", ")?;
}
if self.coordinate_type().is_float() {
write!(f, "{v}")?;
} else {
write!(f, "{}", *v as i64)?;
}
}
f.write_str("], ")?;
write!(
f,
"{}, {})",
self.dimension,
self.coordinate_type().as_str()
)
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum VectorBuildError {
InvalidDimension(i64),
DimensionMismatch {
expected: usize,
got: usize,
},
NestedListNotAllowed,
NonNumericCoordinate(String),
NonFiniteCoordinate,
OutOfRange {
coordinate_type: VectorCoordinateType,
value: String,
},
UnknownCoordinateType(String),
}
impl fmt::Display for VectorBuildError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
VectorBuildError::InvalidDimension(d) => {
write!(
f,
"vector dimension must be between 1 and {MAX_VECTOR_DIMENSION}, got {d}"
)
}
VectorBuildError::DimensionMismatch { expected, got } => write!(
f,
"vector value length {got} does not match declared dimension {expected}"
),
VectorBuildError::NestedListNotAllowed => {
write!(f, "vector coordinates cannot contain nested lists")
}
VectorBuildError::NonNumericCoordinate(kind) => {
write!(f, "vector coordinates must be numeric, got {kind}")
}
VectorBuildError::NonFiniteCoordinate => {
write!(f, "vector coordinates cannot be NaN or Infinity")
}
VectorBuildError::OutOfRange {
coordinate_type,
value,
} => write!(
f,
"value {value} is out of range for coordinate type {}",
coordinate_type.as_str()
),
VectorBuildError::UnknownCoordinateType(name) => {
write!(f, "unknown vector coordinate type '{name}'")
}
}
}
}
impl std::error::Error for VectorBuildError {}
#[derive(Debug, Clone, Copy)]
pub enum RawCoordinate {
Int(i64),
Float(f64),
}
impl RawCoordinate {
fn as_f64(self) -> f64 {
match self {
RawCoordinate::Int(v) => v as f64,
RawCoordinate::Float(v) => v,
}
}
}
impl LoraVector {
pub fn try_new(
raw: Vec<RawCoordinate>,
dimension: i64,
coordinate_type: VectorCoordinateType,
) -> Result<Self, VectorBuildError> {
if dimension <= 0 || dimension as usize > MAX_VECTOR_DIMENSION {
return Err(VectorBuildError::InvalidDimension(dimension));
}
let dim = dimension as usize;
if raw.len() != dim {
return Err(VectorBuildError::DimensionMismatch {
expected: dim,
got: raw.len(),
});
}
for c in &raw {
if let RawCoordinate::Float(v) = c {
if !v.is_finite() {
return Err(VectorBuildError::NonFiniteCoordinate);
}
}
}
let values = match coordinate_type {
VectorCoordinateType::Float64 => {
VectorValues::Float64(raw.iter().map(|c| c.as_f64()).collect())
}
VectorCoordinateType::Float32 => {
let mut out = Vec::with_capacity(dim);
for c in &raw {
let v = c.as_f64();
if v.abs() > f32::MAX as f64 {
return Err(VectorBuildError::OutOfRange {
coordinate_type,
value: format!("{v}"),
});
}
out.push(v as f32);
}
VectorValues::Float32(out)
}
VectorCoordinateType::Integer64 => {
let mut out = Vec::with_capacity(dim);
for c in &raw {
out.push(coerce_to_int::<i64>(*c, coordinate_type)?);
}
VectorValues::Integer64(out)
}
VectorCoordinateType::Integer32 => {
let mut out = Vec::with_capacity(dim);
for c in &raw {
out.push(coerce_to_int::<i32>(*c, coordinate_type)?);
}
VectorValues::Integer32(out)
}
VectorCoordinateType::Integer16 => {
let mut out = Vec::with_capacity(dim);
for c in &raw {
out.push(coerce_to_int::<i16>(*c, coordinate_type)?);
}
VectorValues::Integer16(out)
}
VectorCoordinateType::Integer8 => {
let mut out = Vec::with_capacity(dim);
for c in &raw {
out.push(coerce_to_int::<i8>(*c, coordinate_type)?);
}
VectorValues::Integer8(out)
}
};
Ok(LoraVector {
dimension: dim,
values,
})
}
}
fn coerce_to_int<T>(
raw: RawCoordinate,
coordinate_type: VectorCoordinateType,
) -> Result<T, VectorBuildError>
where
T: TryFrom<i64> + Copy,
{
let as_i64 = match raw {
RawCoordinate::Int(v) => v,
RawCoordinate::Float(v) => {
if v > i64::MAX as f64 || v < i64::MIN as f64 {
return Err(VectorBuildError::OutOfRange {
coordinate_type,
value: format!("{v}"),
});
}
v.trunc() as i64
}
};
T::try_from(as_i64).map_err(|_| VectorBuildError::OutOfRange {
coordinate_type,
value: as_i64.to_string(),
})
}
pub fn parse_string_values(input: &str) -> Result<Vec<RawCoordinate>, VectorBuildError> {
let trimmed = input.trim();
if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
return Err(VectorBuildError::NonNumericCoordinate(
"string must start with '[' and end with ']'".to_string(),
));
}
let body = &trimmed[1..trimmed.len() - 1];
if body.trim().is_empty() {
return Ok(Vec::new());
}
let mut out = Vec::new();
for part in body.split(',') {
let token = part.trim();
if token.is_empty() {
return Err(VectorBuildError::NonNumericCoordinate(
"empty list entry".to_string(),
));
}
if let Ok(i) = token.parse::<i64>() {
out.push(RawCoordinate::Int(i));
continue;
}
match token.parse::<f64>() {
Ok(f) if f.is_finite() => out.push(RawCoordinate::Float(f)),
Ok(_) => return Err(VectorBuildError::NonFiniteCoordinate),
Err(_) => {
return Err(VectorBuildError::NonNumericCoordinate(format!(
"cannot parse '{token}'"
)))
}
}
}
Ok(out)
}
fn check_same_dim(a: &LoraVector, b: &LoraVector) -> Option<usize> {
if a.dimension == b.dimension {
Some(a.dimension)
} else {
None
}
}
pub fn cosine_similarity_raw(a: &LoraVector, b: &LoraVector) -> Option<f64> {
check_same_dim(a, b)?;
let av: Vec<f32> = a
.values
.as_f64_vec()
.into_iter()
.map(|x| x as f32)
.collect();
let bv: Vec<f32> = b
.values
.as_f64_vec()
.into_iter()
.map(|x| x as f32)
.collect();
let mut dot = 0f32;
let mut na = 0f32;
let mut nb = 0f32;
for (x, y) in av.iter().zip(bv.iter()) {
dot += x * y;
na += x * x;
nb += y * y;
}
if na == 0.0 || nb == 0.0 {
return None;
}
let denom = na.sqrt() * nb.sqrt();
if denom == 0.0 {
return None;
}
Some((dot / denom) as f64)
}
pub fn cosine_similarity_bounded(a: &LoraVector, b: &LoraVector) -> Option<f64> {
cosine_similarity_raw(a, b).map(|raw| ((raw + 1.0) / 2.0).clamp(0.0, 1.0))
}
pub fn euclidean_distance_squared(a: &LoraVector, b: &LoraVector) -> Option<f64> {
check_same_dim(a, b)?;
let av: Vec<f32> = a
.values
.as_f64_vec()
.into_iter()
.map(|x| x as f32)
.collect();
let bv: Vec<f32> = b
.values
.as_f64_vec()
.into_iter()
.map(|x| x as f32)
.collect();
let mut sum = 0f32;
for (x, y) in av.iter().zip(bv.iter()) {
let d = x - y;
sum += d * d;
}
Some(sum as f64)
}
pub fn euclidean_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
euclidean_distance_squared(a, b).map(f64::sqrt)
}
pub fn manhattan_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
check_same_dim(a, b)?;
let av = a.values.as_f64_vec();
let bv = b.values.as_f64_vec();
let mut sum = 0f32;
for (x, y) in av.iter().zip(bv.iter()) {
sum += ((*x as f32) - (*y as f32)).abs();
}
Some(sum as f64)
}
pub fn hamming_distance(a: &LoraVector, b: &LoraVector) -> Option<f64> {
check_same_dim(a, b)?;
let av = a.values.as_f64_vec();
let bv = b.values.as_f64_vec();
let mut count = 0i64;
for (x, y) in av.iter().zip(bv.iter()) {
if (*x as f32) != (*y as f32) {
count += 1;
}
}
Some(count as f64)
}
pub fn dot_product(a: &LoraVector, b: &LoraVector) -> Option<f64> {
check_same_dim(a, b)?;
let av = a.values.as_f64_vec();
let bv = b.values.as_f64_vec();
let mut acc = 0f32;
for (x, y) in av.iter().zip(bv.iter()) {
acc += (*x as f32) * (*y as f32);
}
Some(acc as f64)
}
pub fn euclidean_norm(v: &LoraVector) -> f64 {
let values = v.values.as_f64_vec();
let mut sum = 0f32;
for x in &values {
let x32 = *x as f32;
sum += x32 * x32;
}
(sum.sqrt()) as f64
}
pub fn manhattan_norm(v: &LoraVector) -> f64 {
let values = v.values.as_f64_vec();
let mut sum = 0f32;
for x in &values {
sum += (*x as f32).abs();
}
sum as f64
}
pub fn euclidean_similarity(a: &LoraVector, b: &LoraVector) -> Option<f64> {
euclidean_distance_squared(a, b).map(|d2| 1.0 / (1.0 + d2))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_coordinate_type_accepts_aliases() {
assert_eq!(
VectorCoordinateType::parse("INTEGER"),
Some(VectorCoordinateType::Integer64)
);
assert_eq!(
VectorCoordinateType::parse("int64"),
Some(VectorCoordinateType::Integer64)
);
assert_eq!(
VectorCoordinateType::parse("signed integer"),
Some(VectorCoordinateType::Integer64)
);
assert_eq!(
VectorCoordinateType::parse(" SIGNED INTEGER "),
Some(VectorCoordinateType::Integer64)
);
assert_eq!(
VectorCoordinateType::parse("FLOAT"),
Some(VectorCoordinateType::Float64)
);
assert_eq!(
VectorCoordinateType::parse("float32"),
Some(VectorCoordinateType::Float32)
);
assert_eq!(VectorCoordinateType::parse("bogus"), None);
}
#[test]
fn try_new_rejects_zero_dim() {
let err = LoraVector::try_new(vec![], 0, VectorCoordinateType::Float64).unwrap_err();
assert!(matches!(err, VectorBuildError::InvalidDimension(0)));
}
#[test]
fn try_new_rejects_over_max_dim() {
let err = LoraVector::try_new(
vec![RawCoordinate::Int(1); 1],
(MAX_VECTOR_DIMENSION + 1) as i64,
VectorCoordinateType::Float64,
)
.unwrap_err();
assert!(matches!(err, VectorBuildError::InvalidDimension(_)));
}
#[test]
fn try_new_rejects_dimension_mismatch() {
let err = LoraVector::try_new(
vec![RawCoordinate::Int(1)],
2,
VectorCoordinateType::Integer64,
)
.unwrap_err();
assert!(matches!(
err,
VectorBuildError::DimensionMismatch {
expected: 2,
got: 1
}
));
}
#[test]
fn int8_overflow_errors() {
let err = LoraVector::try_new(
vec![RawCoordinate::Int(128)],
1,
VectorCoordinateType::Integer8,
)
.unwrap_err();
assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
}
#[test]
fn float_to_int_truncates() {
let v = LoraVector::try_new(
vec![RawCoordinate::Float(1.9), RawCoordinate::Float(-1.9)],
2,
VectorCoordinateType::Integer64,
)
.unwrap();
match v.values {
VectorValues::Integer64(ref values) => assert_eq!(values, &[1, -1]),
_ => panic!("expected Integer64"),
}
}
#[test]
fn int_to_float_is_allowed() {
let v = LoraVector::try_new(
vec![RawCoordinate::Int(3), RawCoordinate::Int(4)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
assert_eq!(v.values, VectorValues::Float32(vec![3.0, 4.0]));
}
#[test]
fn parse_string_values_handles_scientific() {
let parsed = parse_string_values("[1.05e+00, 0.123, 5]").unwrap();
assert_eq!(parsed.len(), 3);
match parsed[0] {
RawCoordinate::Float(f) => assert!((f - 1.05).abs() < 1e-9),
_ => panic!("expected float"),
}
match parsed[2] {
RawCoordinate::Int(i) => assert_eq!(i, 5),
_ => panic!("expected int"),
}
}
#[test]
fn cosine_similarity_is_bounded() {
let a = LoraVector::try_new(
vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
let b = LoraVector::try_new(
vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
let sim = cosine_similarity_bounded(&a, &b).unwrap();
assert!((sim - 1.0).abs() < 1e-6);
}
#[test]
fn euclidean_similarity_matches_documented_example() {
let a = LoraVector::try_new(
vec![
RawCoordinate::Float(4.0),
RawCoordinate::Float(5.0),
RawCoordinate::Float(6.0),
],
3,
VectorCoordinateType::Float32,
)
.unwrap();
let b = LoraVector::try_new(
vec![
RawCoordinate::Float(2.0),
RawCoordinate::Float(8.0),
RawCoordinate::Float(3.0),
],
3,
VectorCoordinateType::Float32,
)
.unwrap();
let sim = euclidean_similarity(&a, &b).unwrap();
assert!((sim - (1.0 / 23.0)).abs() < 1e-6, "got {sim}");
}
#[test]
fn parse_coordinate_type_every_alias() {
use VectorCoordinateType::*;
let cases: &[(&str, VectorCoordinateType)] = &[
("FLOAT", Float64),
("Float", Float64),
("float", Float64),
("FLOAT64", Float64),
("float64", Float64),
("FLOAT32", Float32),
("float32", Float32),
("INTEGER", Integer64),
("Integer", Integer64),
("integer", Integer64),
("INT", Integer64),
("int", Integer64),
("INT64", Integer64),
("int64", Integer64),
("INTEGER64", Integer64),
("SIGNED INTEGER", Integer64),
("signed integer", Integer64),
("Signed Integer", Integer64),
("INTEGER32", Integer32),
("int32", Integer32),
("INT32", Integer32),
("INTEGER16", Integer16),
("INT16", Integer16),
("int16", Integer16),
("INTEGER8", Integer8),
("INT8", Integer8),
("int8", Integer8),
];
for (input, expected) in cases {
assert_eq!(
VectorCoordinateType::parse(input),
Some(*expected),
"failed for input {input:?}"
);
}
}
#[test]
fn parse_coordinate_type_rejects_unsupported_aliases() {
for bogus in [
"DOUBLE",
"double",
"REAL",
"NUMBER",
"BIGINT",
"INT128",
"FLOAT128",
"UINT8",
"UNSIGNED INTEGER",
"BIT",
"",
] {
assert_eq!(
VectorCoordinateType::parse(bogus),
None,
"should reject {bogus:?}"
);
}
}
#[test]
fn parse_coordinate_type_is_whitespace_tolerant() {
assert_eq!(
VectorCoordinateType::parse("\tinteger\n"),
Some(VectorCoordinateType::Integer64)
);
assert_eq!(
VectorCoordinateType::parse(" INTEGER "),
Some(VectorCoordinateType::Integer64)
);
}
fn unwrap_float(raw: RawCoordinate) -> f64 {
match raw {
RawCoordinate::Float(f) => f,
RawCoordinate::Int(i) => i as f64,
}
}
fn unwrap_int(raw: RawCoordinate) -> i64 {
match raw {
RawCoordinate::Int(i) => i,
RawCoordinate::Float(f) => panic!("expected Int, got Float({f})"),
}
}
#[test]
fn parse_string_values_accepts_negatives_and_whitespace() {
let parsed = parse_string_values(" [ -1, -2.5 , 3 , -4.0e-2 ] ").unwrap();
assert_eq!(unwrap_int(parsed[0]), -1);
assert!((unwrap_float(parsed[1]) + 2.5).abs() < 1e-9);
assert_eq!(unwrap_int(parsed[2]), 3);
assert!((unwrap_float(parsed[3]) + 0.04).abs() < 1e-12);
}
#[test]
fn parse_string_values_accepts_signed_exponents() {
let parsed = parse_string_values("[1e+10, 1e-10, -2.5e+3]").unwrap();
assert!((unwrap_float(parsed[0]) - 1e10).abs() < 1.0);
assert!((unwrap_float(parsed[1]) - 1e-10).abs() < 1e-20);
assert!((unwrap_float(parsed[2]) + 2500.0).abs() < 1e-9);
}
#[test]
fn parse_string_values_accepts_empty_brackets() {
let parsed = parse_string_values("[]").unwrap();
assert!(parsed.is_empty());
}
#[test]
fn parse_string_values_rejects_missing_brackets() {
assert!(parse_string_values("1, 2, 3").is_err());
assert!(parse_string_values("[1, 2, 3").is_err());
assert!(parse_string_values("1, 2, 3]").is_err());
}
#[test]
fn parse_string_values_rejects_empty_entries() {
assert!(parse_string_values("[1, , 3]").is_err());
assert!(parse_string_values("[,1,2]").is_err());
assert!(parse_string_values("[1,2,]").is_err());
assert!(parse_string_values("[ , ]").is_err());
}
#[test]
fn parse_string_values_rejects_non_numeric_tokens() {
assert!(parse_string_values("[1, abc, 3]").is_err());
assert!(parse_string_values("[true, false]").is_err());
assert!(parse_string_values("[\"1\", \"2\"]").is_err());
}
#[test]
fn parse_string_values_rejects_non_finite() {
for bad in ["[NaN]", "[Infinity]", "[-Infinity]", "[1, NaN, 3]"] {
assert!(parse_string_values(bad).is_err(), "should reject {bad:?}");
}
}
#[test]
fn try_new_accepts_exactly_max_dimension() {
let raw = vec![RawCoordinate::Int(0); MAX_VECTOR_DIMENSION];
let v = LoraVector::try_new(
raw,
MAX_VECTOR_DIMENSION as i64,
VectorCoordinateType::Integer8,
)
.expect("4096 should be accepted");
assert_eq!(v.dimension, MAX_VECTOR_DIMENSION);
}
#[test]
fn try_new_rejects_max_plus_one_dimension() {
let err = LoraVector::try_new(
vec![RawCoordinate::Int(0); MAX_VECTOR_DIMENSION + 1],
(MAX_VECTOR_DIMENSION + 1) as i64,
VectorCoordinateType::Integer8,
)
.unwrap_err();
assert!(matches!(err, VectorBuildError::InvalidDimension(_)));
}
#[test]
fn try_new_rejects_negative_dimension() {
let err = LoraVector::try_new(vec![], -1, VectorCoordinateType::Integer64).unwrap_err();
assert!(matches!(err, VectorBuildError::InvalidDimension(-1)));
}
#[test]
fn integer_boundaries_round_trip() {
let cases: &[(VectorCoordinateType, i64, i64, i64, i64)] = &[
(
VectorCoordinateType::Integer8,
i8::MIN as i64,
i8::MAX as i64,
i8::MIN as i64 - 1,
i8::MAX as i64 + 1,
),
(
VectorCoordinateType::Integer16,
i16::MIN as i64,
i16::MAX as i64,
i16::MIN as i64 - 1,
i16::MAX as i64 + 1,
),
(
VectorCoordinateType::Integer32,
i32::MIN as i64,
i32::MAX as i64,
i32::MIN as i64 - 1,
i32::MAX as i64 + 1,
),
(VectorCoordinateType::Integer64, i64::MIN, i64::MAX, 0, 0),
];
for (ty, min, max, under, over) in cases {
LoraVector::try_new(vec![RawCoordinate::Int(*min)], 1, *ty)
.unwrap_or_else(|e| panic!("{ty:?} min rejected: {e}"));
LoraVector::try_new(vec![RawCoordinate::Int(*max)], 1, *ty)
.unwrap_or_else(|e| panic!("{ty:?} max rejected: {e}"));
if *ty == VectorCoordinateType::Integer64 {
continue;
}
let e = LoraVector::try_new(vec![RawCoordinate::Int(*under)], 1, *ty).unwrap_err();
assert!(matches!(e, VectorBuildError::OutOfRange { .. }));
let e = LoraVector::try_new(vec![RawCoordinate::Int(*over)], 1, *ty).unwrap_err();
assert!(matches!(e, VectorBuildError::OutOfRange { .. }));
}
}
#[test]
fn float32_overflow_errors() {
let huge = (f32::MAX as f64) * 10.0;
let err = LoraVector::try_new(
vec![RawCoordinate::Float(huge)],
1,
VectorCoordinateType::Float32,
)
.unwrap_err();
assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
}
#[test]
fn float_to_int_truncates_toward_zero() {
let v = LoraVector::try_new(
vec![
RawCoordinate::Float(1.9),
RawCoordinate::Float(-1.9),
RawCoordinate::Float(0.999),
RawCoordinate::Float(-0.999),
],
4,
VectorCoordinateType::Integer8,
)
.unwrap();
match v.values {
VectorValues::Integer8(ref values) => assert_eq!(values, &[1i8, -1, 0, 0]),
_ => panic!("expected Integer8"),
}
}
#[test]
fn float_out_of_range_i64_errors() {
let err = LoraVector::try_new(
vec![RawCoordinate::Float(f64::MAX)],
1,
VectorCoordinateType::Integer64,
)
.unwrap_err();
assert!(matches!(err, VectorBuildError::OutOfRange { .. }));
}
#[test]
fn non_finite_float_rejected_in_try_new() {
for bad in [f64::NAN, f64::INFINITY, f64::NEG_INFINITY] {
let err = LoraVector::try_new(
vec![RawCoordinate::Float(bad)],
1,
VectorCoordinateType::Float64,
)
.unwrap_err();
assert!(matches!(err, VectorBuildError::NonFiniteCoordinate));
}
}
#[test]
fn to_key_string_distinguishes_coord_type_dim_and_values() {
fn v(coord: VectorCoordinateType, vals: &[i64], dim: i64) -> LoraVector {
LoraVector::try_new(
vals.iter().map(|x| RawCoordinate::Int(*x)).collect(),
dim,
coord,
)
.unwrap()
}
let a = v(VectorCoordinateType::Integer64, &[1, 2, 3], 3);
let b = v(VectorCoordinateType::Integer32, &[1, 2, 3], 3);
assert_ne!(a.to_key_string(), b.to_key_string());
let c = v(VectorCoordinateType::Integer64, &[1, 2], 2);
assert_ne!(a.to_key_string(), c.to_key_string());
let d = v(VectorCoordinateType::Integer64, &[1, 2, 4], 3);
assert_ne!(a.to_key_string(), d.to_key_string());
let a2 = v(VectorCoordinateType::Integer64, &[1, 2, 3], 3);
assert_eq!(a.to_key_string(), a2.to_key_string());
}
#[test]
fn cosine_orthogonal_is_zero_raw_and_half_bounded() {
let a = LoraVector::try_new(
vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
let b = LoraVector::try_new(
vec![RawCoordinate::Int(0), RawCoordinate::Int(1)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
assert!((cosine_similarity_raw(&a, &b).unwrap()).abs() < 1e-6);
assert!((cosine_similarity_bounded(&a, &b).unwrap() - 0.5).abs() < 1e-6);
}
#[test]
fn cosine_opposite_is_neg_one_raw_and_zero_bounded() {
let a = LoraVector::try_new(
vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
let b = LoraVector::try_new(
vec![RawCoordinate::Int(-1), RawCoordinate::Int(0)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
assert!((cosine_similarity_raw(&a, &b).unwrap() + 1.0).abs() < 1e-6);
assert!(cosine_similarity_bounded(&a, &b).unwrap().abs() < 1e-6);
}
#[test]
fn cosine_zero_vector_returns_none() {
let zero = LoraVector::try_new(
vec![RawCoordinate::Int(0), RawCoordinate::Int(0)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
let other = LoraVector::try_new(
vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
assert!(cosine_similarity_raw(&zero, &other).is_none());
assert!(cosine_similarity_bounded(&zero, &other).is_none());
}
#[test]
fn distance_helpers_respect_dimension_mismatch() {
let a = LoraVector::try_new(
vec![RawCoordinate::Int(1), RawCoordinate::Int(0)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
let b = LoraVector::try_new(
vec![
RawCoordinate::Int(1),
RawCoordinate::Int(0),
RawCoordinate::Int(0),
],
3,
VectorCoordinateType::Float32,
)
.unwrap();
assert!(euclidean_distance(&a, &b).is_none());
assert!(euclidean_distance_squared(&a, &b).is_none());
assert!(manhattan_distance(&a, &b).is_none());
assert!(hamming_distance(&a, &b).is_none());
assert!(dot_product(&a, &b).is_none());
}
#[test]
fn manhattan_and_euclidean_norm_match_hand_computed() {
let v = LoraVector::try_new(
vec![
RawCoordinate::Float(3.0),
RawCoordinate::Float(4.0),
RawCoordinate::Float(0.0),
RawCoordinate::Float(-12.0),
],
4,
VectorCoordinateType::Float32,
)
.unwrap();
assert!((manhattan_norm(&v) - 19.0).abs() < 1e-5);
assert!((euclidean_norm(&v) - 13.0).abs() < 1e-5);
}
#[test]
fn hamming_on_float_vectors_uses_f32_comparison() {
let a = LoraVector::try_new(
vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.0)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
let b = LoraVector::try_new(
vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.0)],
2,
VectorCoordinateType::Float64,
)
.unwrap();
assert!((hamming_distance(&a, &b).unwrap()).abs() < 1e-9);
let c = LoraVector::try_new(
vec![RawCoordinate::Float(1.0), RawCoordinate::Float(2.5)],
2,
VectorCoordinateType::Float32,
)
.unwrap();
assert!((hamming_distance(&a, &c).unwrap() - 1.0).abs() < 1e-9);
}
}