use std::collections::HashMap;
use async_trait::async_trait;
use cognis_core::schemars::{self, JsonSchema};
use serde::{Deserialize, Serialize};
use cognis_core::Result;
mod in_memory;
pub use in_memory::InMemoryVectorStore;
#[cfg(feature = "vectorstore-chroma")]
pub mod chroma;
#[cfg(feature = "vectorstore-chroma")]
pub use chroma::{ChromaBuilder, ChromaProvider};
#[cfg(feature = "vectorstore-qdrant")]
pub mod qdrant;
#[cfg(feature = "vectorstore-qdrant")]
pub use qdrant::{QdrantBuilder, QdrantProvider};
#[cfg(feature = "vectorstore-pinecone")]
pub mod pinecone;
#[cfg(feature = "vectorstore-pinecone")]
pub use pinecone::{PineconeBuilder, PineconeProvider};
#[cfg(feature = "vectorstore-weaviate")]
pub mod weaviate;
#[cfg(feature = "vectorstore-weaviate")]
pub use weaviate::{WeaviateBuilder, WeaviateProvider};
#[cfg(feature = "vectorstore-faiss")]
pub mod faiss;
#[cfg(feature = "vectorstore-faiss")]
pub use faiss::{
FaissConfig, FaissIndex, FaissIndexType, FaissMetric, FaissVectorStore, FlatIndex, HNSWIndex,
IVFFlatIndex,
};
#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
pub struct Filter {
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub equals: HashMap<String, serde_json::Value>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub r#in: HashMap<String, Vec<serde_json::Value>>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub gte: HashMap<String, f64>,
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub lte: HashMap<String, f64>,
}
impl Filter {
pub fn new() -> Self {
Self::default()
}
pub fn equals(mut self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self {
self.equals.insert(key.into(), value.into());
self
}
pub fn one_of<I, V>(mut self, key: impl Into<String>, values: I) -> Self
where
I: IntoIterator<Item = V>,
V: Into<serde_json::Value>,
{
self.r#in
.insert(key.into(), values.into_iter().map(Into::into).collect());
self
}
pub fn gte(mut self, key: impl Into<String>, n: f64) -> Self {
self.gte.insert(key.into(), n);
self
}
pub fn lte(mut self, key: impl Into<String>, n: f64) -> Self {
self.lte.insert(key.into(), n);
self
}
pub fn is_empty(&self) -> bool {
self.equals.is_empty() && self.r#in.is_empty() && self.gte.is_empty() && self.lte.is_empty()
}
pub fn matches(&self, metadata: &HashMap<String, serde_json::Value>) -> bool {
for (k, v) in &self.equals {
match metadata.get(k) {
Some(actual) if actual == v => {}
_ => return false,
}
}
for (k, allowed) in &self.r#in {
match metadata.get(k) {
Some(actual) if allowed.iter().any(|v| v == actual) => {}
_ => return false,
}
}
for (k, lo) in &self.gte {
match metadata.get(k).and_then(|v| v.as_f64()) {
Some(n) if n >= *lo => {}
_ => return false,
}
}
for (k, hi) in &self.lte {
match metadata.get(k).and_then(|v| v.as_f64()) {
Some(n) if n <= *hi => {}
_ => return false,
}
}
true
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResult {
pub id: String,
pub text: String,
pub score: f32,
pub metadata: HashMap<String, serde_json::Value>,
}
#[async_trait]
pub trait VectorStore: Send + Sync {
async fn add_texts(
&mut self,
texts: Vec<String>,
metadata: Option<Vec<HashMap<String, serde_json::Value>>>,
) -> Result<Vec<String>>;
async fn add_vectors(
&mut self,
vectors: Vec<Vec<f32>>,
texts: Vec<String>,
metadata: Option<Vec<HashMap<String, serde_json::Value>>>,
) -> Result<Vec<String>>;
async fn similarity_search(&self, query: &str, k: usize) -> Result<Vec<SearchResult>>;
async fn similarity_search_by_vector(
&self,
query_vector: Vec<f32>,
k: usize,
) -> Result<Vec<SearchResult>>;
async fn similarity_search_with_filter(
&self,
query: &str,
k: usize,
filter: &Filter,
) -> Result<Vec<SearchResult>> {
if filter.is_empty() {
return self.similarity_search(query, k).await;
}
let candidates = self.similarity_search(query, k.saturating_mul(4)).await?;
Ok(candidates
.into_iter()
.filter(|r| filter.matches(&r.metadata))
.take(k)
.collect())
}
async fn delete(&mut self, ids: Vec<String>) -> Result<()>;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
}