#![forbid(unsafe_code)]
#![doc = include_str!("../README.md")]
use core::fmt;
use std::error::Error;
macro_rules! string_newtype {
($(#[$meta:meta])* $name:ident) => {
$(#[$meta])*
#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct $name(String);
impl $name {
pub fn new(value: impl Into<String>) -> Self {
Self(value.into())
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl AsRef<str> for $name {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl From<String> for $name {
fn from(value: String) -> Self {
Self::new(value)
}
}
impl From<&str> for $name {
fn from(value: &str) -> Self {
Self::new(value)
}
}
impl fmt::Display for $name {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str(self.as_str())
}
}
};
}
string_newtype! {
VectorId
}
string_newtype! {
VectorCollectionName
}
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct VectorDimension(usize);
impl VectorDimension {
pub const fn new(value: usize) -> Self {
Self(value)
}
pub const fn value(self) -> usize {
self.0
}
}
impl fmt::Display for VectorDimension {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(formatter, "{}", self.0)
}
}
#[derive(Clone, Debug, Default, PartialEq)]
pub struct Embedding(Vec<f32>);
impl Embedding {
pub fn new(values: Vec<f32>) -> Self {
Self(values)
}
pub fn values(&self) -> &[f32] {
&self.0
}
pub fn dimension(&self) -> VectorDimension {
VectorDimension::new(self.0.len())
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub struct InvalidDimensionError {
expected: VectorDimension,
actual: VectorDimension,
}
impl InvalidDimensionError {
pub const fn new(expected: VectorDimension, actual: VectorDimension) -> Self {
Self { expected, actual }
}
pub const fn expected(self) -> VectorDimension {
self.expected
}
pub const fn actual(self) -> VectorDimension {
self.actual
}
}
impl fmt::Display for InvalidDimensionError {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
formatter,
"vector dimension mismatch: expected {}, got {}",
self.expected, self.actual
)
}
}
impl Error for InvalidDimensionError {}
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum SimilarityMetric {
Cosine,
DotProduct,
Euclidean,
Manhattan,
Hamming,
#[default]
Unknown,
}
impl SimilarityMetric {
pub const fn as_str(self) -> &'static str {
match self {
Self::Cosine => "cosine",
Self::DotProduct => "dot-product",
Self::Euclidean => "euclidean",
Self::Manhattan => "manhattan",
Self::Hamming => "hamming",
Self::Unknown => "unknown",
}
}
}
impl fmt::Display for SimilarityMetric {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str(self.as_str())
}
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct VectorMetadata {
entries: Vec<(String, String)>,
}
impl VectorMetadata {
pub const fn new() -> Self {
Self {
entries: Vec::new(),
}
}
pub fn with_entry(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
self.entries.push((key.into(), value.into()));
self
}
pub fn entries(&self) -> &[(String, String)] {
&self.entries
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct VectorRecord {
id: VectorId,
embedding: Embedding,
dimension: Option<VectorDimension>,
similarity_metric: Option<SimilarityMetric>,
metadata: VectorMetadata,
}
impl VectorRecord {
pub fn new(id: VectorId, embedding: Embedding) -> Self {
Self {
id,
embedding,
dimension: None,
similarity_metric: None,
metadata: VectorMetadata::new(),
}
}
pub fn with_dimension(
mut self,
dimension: VectorDimension,
) -> Result<Self, InvalidDimensionError> {
let actual = self.embedding.dimension();
if dimension != actual {
return Err(InvalidDimensionError::new(dimension, actual));
}
self.dimension = Some(dimension);
Ok(self)
}
pub const fn with_similarity_metric(mut self, similarity_metric: SimilarityMetric) -> Self {
self.similarity_metric = Some(similarity_metric);
self
}
pub fn with_metadata(mut self, metadata: VectorMetadata) -> Self {
self.metadata = metadata;
self
}
pub const fn id(&self) -> &VectorId {
&self.id
}
pub const fn embedding(&self) -> &Embedding {
&self.embedding
}
pub const fn dimension(&self) -> Option<VectorDimension> {
self.dimension
}
pub const fn similarity_metric(&self) -> Option<SimilarityMetric> {
self.similarity_metric
}
pub const fn metadata(&self) -> &VectorMetadata {
&self.metadata
}
}
#[cfg(test)]
mod tests {
use super::{
Embedding, InvalidDimensionError, SimilarityMetric, VectorCollectionName, VectorDimension,
VectorId, VectorMetadata, VectorRecord,
};
#[test]
fn constructs_vector_labels_and_embedding() {
let id = VectorId::new("review_embedding");
let collection = VectorCollectionName::new("reviews");
let embedding = Embedding::new(vec![0.1, 0.2, 0.3]);
assert_eq!(id.to_string(), "review_embedding");
assert_eq!(collection.as_ref(), "reviews");
assert_eq!(embedding.dimension(), VectorDimension::new(3));
}
#[test]
fn validates_vector_dimensions() -> Result<(), InvalidDimensionError> {
let metadata = VectorMetadata::new().with_entry("source", "review");
let record = VectorRecord::new(VectorId::new("review_1"), Embedding::new(vec![1.0, 0.0]))
.with_dimension(VectorDimension::new(2))?
.with_similarity_metric(SimilarityMetric::Cosine)
.with_metadata(metadata);
assert_eq!(record.dimension(), Some(VectorDimension::new(2)));
assert_eq!(record.similarity_metric(), Some(SimilarityMetric::Cosine));
assert_eq!(record.metadata().entries().len(), 1);
assert_eq!(SimilarityMetric::DotProduct.to_string(), "dot-product");
Ok(())
}
#[test]
fn rejects_dimension_mismatches() {
let result = VectorRecord::new(VectorId::new("review_1"), Embedding::new(vec![1.0, 0.0]))
.with_dimension(VectorDimension::new(3));
assert_eq!(
result,
Err(InvalidDimensionError::new(
VectorDimension::new(3),
VectorDimension::new(2)
))
);
}
}