use serde::{Deserialize, Serialize};
use uuid::Uuid;
use std::collections::HashMap;
use crate::vector::Vector;
use crate::multivector::MultiVector;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct SparseVector {
pub indices: Vec<u32>,
pub values: Vec<f32>,
}
impl SparseVector {
pub fn new(indices: Vec<u32>, values: Vec<f32>) -> Self {
Self { indices, values }
}
pub fn dot(&self, other: &SparseVector) -> f32 {
let mut result = 0.0f32;
let other_map: HashMap<u32, f32> = other.indices.iter()
.zip(other.values.iter())
.map(|(&i, &v)| (i, v))
.collect();
for (&idx, &val) in self.indices.iter().zip(self.values.iter()) {
if let Some(&other_val) = other_map.get(&idx) {
result += val * other_val;
}
}
result
}
pub fn is_empty(&self) -> bool {
self.indices.is_empty()
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum VectorData {
Single(Vector),
Multi(MultiVector),
}
impl VectorData {
pub fn dim(&self) -> usize {
match self {
VectorData::Single(v) => v.dim(),
VectorData::Multi(mv) => mv.dim(),
}
}
pub fn is_multi(&self) -> bool {
matches!(self, VectorData::Multi(_))
}
pub fn as_single(&self) -> Vector {
match self {
VectorData::Single(v) => v.clone(),
VectorData::Multi(mv) => mv.to_single_vector(),
}
}
pub fn as_slice(&self) -> &[f32] {
match self {
VectorData::Single(v) => v.as_slice(),
VectorData::Multi(mv) => mv.vectors().first().map(|v| v.as_slice()).unwrap_or(&[]),
}
}
}
impl From<Vector> for VectorData {
fn from(v: Vector) -> Self {
VectorData::Single(v)
}
}
impl From<MultiVector> for VectorData {
fn from(mv: MultiVector) -> Self {
VectorData::Multi(mv)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Point {
pub id: PointId,
#[serde(default)]
pub version: u64,
#[serde(alias = "vectors")]
pub vector: Vector,
#[serde(skip_serializing_if = "Option::is_none")]
pub multivector: Option<MultiVector>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub sparse_vectors: HashMap<String, SparseVector>,
pub payload: Option<serde_json::Value>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(untagged)]
pub enum PointId {
String(String),
Uuid(Uuid),
Integer(u64),
}
impl std::fmt::Display for PointId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PointId::String(s) => write!(f, "{}", s),
PointId::Uuid(u) => write!(f, "{}", u),
PointId::Integer(i) => write!(f, "{}", i),
}
}
}
impl From<String> for PointId {
fn from(s: String) -> Self {
PointId::String(s)
}
}
impl From<u64> for PointId {
fn from(i: u64) -> Self {
PointId::Integer(i)
}
}
impl From<Uuid> for PointId {
fn from(u: Uuid) -> Self {
PointId::Uuid(u)
}
}
impl Point {
#[inline]
#[must_use]
pub fn new(id: PointId, vector: Vector, payload: Option<serde_json::Value>) -> Self {
Self {
id,
version: 0,
vector,
multivector: None,
sparse_vectors: HashMap::new(),
payload,
}
}
#[inline]
#[must_use]
pub fn new_multi(id: PointId, multivector: MultiVector, payload: Option<serde_json::Value>) -> Self {
let vector = multivector.to_single_vector();
Self {
id,
version: 0,
vector,
multivector: Some(multivector),
sparse_vectors: HashMap::new(),
payload,
}
}
#[inline]
#[must_use]
pub fn new_sparse(id: PointId, sparse_vectors: HashMap<String, SparseVector>, payload: Option<serde_json::Value>) -> Self {
Self {
id,
version: 0,
vector: Vector::new(vec![0.0]), multivector: None,
sparse_vectors,
payload,
}
}
pub fn add_sparse_vector(&mut self, name: String, sparse: SparseVector) {
self.sparse_vectors.insert(name, sparse);
}
pub fn get_sparse_vector(&self, name: &str) -> Option<&SparseVector> {
self.sparse_vectors.get(name)
}
#[inline]
pub fn has_multivector(&self) -> bool {
self.multivector.is_some()
}
#[inline]
pub fn get_multivector(&self) -> Option<&MultiVector> {
self.multivector.as_ref()
}
#[inline]
#[must_use]
pub fn with_payload(mut self, payload: serde_json::Value) -> Self {
self.payload = Some(payload);
self
}
#[inline]
#[must_use]
pub fn with_multivector(mut self, multivector: MultiVector) -> Self {
self.multivector = Some(multivector);
self
}
}