1use std::collections::HashMap;
4
5use async_trait::async_trait;
6use cognis_core::schemars::{self, JsonSchema};
7use serde::{Deserialize, Serialize};
8
9use cognis_core::Result;
10
11pub mod dedup;
12pub use dedup::{normalized_fingerprint, DedupVectorStore};
13
14mod in_memory;
15pub use in_memory::InMemoryVectorStore;
16
17#[cfg(feature = "vectorstore-chroma")]
18pub mod chroma;
19#[cfg(feature = "vectorstore-chroma")]
20pub use chroma::{ChromaBuilder, ChromaProvider};
21
22#[cfg(feature = "vectorstore-qdrant")]
23pub mod qdrant;
24#[cfg(feature = "vectorstore-qdrant")]
25pub use qdrant::{QdrantBuilder, QdrantProvider};
26
27#[cfg(feature = "vectorstore-pinecone")]
28pub mod pinecone;
29#[cfg(feature = "vectorstore-pinecone")]
30pub use pinecone::{PineconeBuilder, PineconeProvider};
31
32#[cfg(feature = "vectorstore-weaviate")]
33pub mod weaviate;
34#[cfg(feature = "vectorstore-weaviate")]
35pub use weaviate::{WeaviateBuilder, WeaviateProvider};
36
37#[cfg(feature = "vectorstore-faiss")]
38pub mod faiss;
39#[cfg(feature = "vectorstore-faiss")]
40pub use faiss::{
41 FaissConfig, FaissIndex, FaissIndexType, FaissMetric, FaissVectorStore, FlatIndex, HNSWIndex,
42 IVFFlatIndex,
43};
44
45#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
50pub struct Filter {
51 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
55 pub equals: HashMap<String, serde_json::Value>,
56 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
59 pub r#in: HashMap<String, Vec<serde_json::Value>>,
60 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
62 pub gte: HashMap<String, f64>,
63 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
65 pub lte: HashMap<String, f64>,
66}
67
68impl Filter {
69 pub fn new() -> Self {
71 Self::default()
72 }
73
74 pub fn equals(mut self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self {
76 self.equals.insert(key.into(), value.into());
77 self
78 }
79
80 pub fn one_of<I, V>(mut self, key: impl Into<String>, values: I) -> Self
82 where
83 I: IntoIterator<Item = V>,
84 V: Into<serde_json::Value>,
85 {
86 self.r#in
87 .insert(key.into(), values.into_iter().map(Into::into).collect());
88 self
89 }
90
91 pub fn gte(mut self, key: impl Into<String>, n: f64) -> Self {
93 self.gte.insert(key.into(), n);
94 self
95 }
96
97 pub fn lte(mut self, key: impl Into<String>, n: f64) -> Self {
99 self.lte.insert(key.into(), n);
100 self
101 }
102
103 pub fn is_empty(&self) -> bool {
105 self.equals.is_empty() && self.r#in.is_empty() && self.gte.is_empty() && self.lte.is_empty()
106 }
107
108 pub fn matches(&self, metadata: &HashMap<String, serde_json::Value>) -> bool {
110 for (k, v) in &self.equals {
111 match metadata.get(k) {
112 Some(actual) if actual == v => {}
113 _ => return false,
114 }
115 }
116 for (k, allowed) in &self.r#in {
117 match metadata.get(k) {
118 Some(actual) if allowed.iter().any(|v| v == actual) => {}
119 _ => return false,
120 }
121 }
122 for (k, lo) in &self.gte {
123 match metadata.get(k).and_then(|v| v.as_f64()) {
124 Some(n) if n >= *lo => {}
125 _ => return false,
126 }
127 }
128 for (k, hi) in &self.lte {
129 match metadata.get(k).and_then(|v| v.as_f64()) {
130 Some(n) if n <= *hi => {}
131 _ => return false,
132 }
133 }
134 true
135 }
136}
137
138#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct SearchResult {
141 pub id: String,
143 pub text: String,
145 pub score: f32,
148 pub metadata: HashMap<String, serde_json::Value>,
150}
151
152#[async_trait]
155pub trait VectorStore: Send + Sync {
156 async fn add_texts(
159 &mut self,
160 texts: Vec<String>,
161 metadata: Option<Vec<HashMap<String, serde_json::Value>>>,
162 ) -> Result<Vec<String>>;
163
164 async fn add_vectors(
167 &mut self,
168 vectors: Vec<Vec<f32>>,
169 texts: Vec<String>,
170 metadata: Option<Vec<HashMap<String, serde_json::Value>>>,
171 ) -> Result<Vec<String>>;
172
173 async fn similarity_search(&self, query: &str, k: usize) -> Result<Vec<SearchResult>>;
175
176 async fn similarity_search_by_vector(
178 &self,
179 query_vector: Vec<f32>,
180 k: usize,
181 ) -> Result<Vec<SearchResult>>;
182
183 async fn similarity_search_with_filter(
189 &self,
190 query: &str,
191 k: usize,
192 filter: &Filter,
193 ) -> Result<Vec<SearchResult>> {
194 if filter.is_empty() {
195 return self.similarity_search(query, k).await;
196 }
197 let candidates = self.similarity_search(query, k.saturating_mul(4)).await?;
198 Ok(candidates
199 .into_iter()
200 .filter(|r| filter.matches(&r.metadata))
201 .take(k)
202 .collect())
203 }
204
205 async fn delete(&mut self, ids: Vec<String>) -> Result<()>;
207
208 fn len(&self) -> usize;
210
211 fn is_empty(&self) -> bool {
213 self.len() == 0
214 }
215}