cognis_rag/vectorstore/
mod.rs1use std::collections::HashMap;
4
5use async_trait::async_trait;
6use cognis_core::schemars::{self, JsonSchema};
7use serde::{Deserialize, Serialize};
8
9use cognis_core::Result;
10
11mod in_memory;
12pub use in_memory::InMemoryVectorStore;
13
14#[cfg(feature = "vectorstore-chroma")]
15pub mod chroma;
16#[cfg(feature = "vectorstore-chroma")]
17pub use chroma::{ChromaBuilder, ChromaProvider};
18
19#[cfg(feature = "vectorstore-qdrant")]
20pub mod qdrant;
21#[cfg(feature = "vectorstore-qdrant")]
22pub use qdrant::{QdrantBuilder, QdrantProvider};
23
24#[cfg(feature = "vectorstore-pinecone")]
25pub mod pinecone;
26#[cfg(feature = "vectorstore-pinecone")]
27pub use pinecone::{PineconeBuilder, PineconeProvider};
28
29#[cfg(feature = "vectorstore-weaviate")]
30pub mod weaviate;
31#[cfg(feature = "vectorstore-weaviate")]
32pub use weaviate::{WeaviateBuilder, WeaviateProvider};
33
34#[cfg(feature = "vectorstore-faiss")]
35pub mod faiss;
36#[cfg(feature = "vectorstore-faiss")]
37pub use faiss::{
38 FaissConfig, FaissIndex, FaissIndexType, FaissMetric, FaissVectorStore, FlatIndex, HNSWIndex,
39 IVFFlatIndex,
40};
41
42#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)]
47pub struct Filter {
48 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
52 pub equals: HashMap<String, serde_json::Value>,
53 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
56 pub r#in: HashMap<String, Vec<serde_json::Value>>,
57 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
59 pub gte: HashMap<String, f64>,
60 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
62 pub lte: HashMap<String, f64>,
63}
64
65impl Filter {
66 pub fn new() -> Self {
68 Self::default()
69 }
70
71 pub fn equals(mut self, key: impl Into<String>, value: impl Into<serde_json::Value>) -> Self {
73 self.equals.insert(key.into(), value.into());
74 self
75 }
76
77 pub fn one_of<I, V>(mut self, key: impl Into<String>, values: I) -> Self
79 where
80 I: IntoIterator<Item = V>,
81 V: Into<serde_json::Value>,
82 {
83 self.r#in
84 .insert(key.into(), values.into_iter().map(Into::into).collect());
85 self
86 }
87
88 pub fn gte(mut self, key: impl Into<String>, n: f64) -> Self {
90 self.gte.insert(key.into(), n);
91 self
92 }
93
94 pub fn lte(mut self, key: impl Into<String>, n: f64) -> Self {
96 self.lte.insert(key.into(), n);
97 self
98 }
99
100 pub fn is_empty(&self) -> bool {
102 self.equals.is_empty() && self.r#in.is_empty() && self.gte.is_empty() && self.lte.is_empty()
103 }
104
105 pub fn matches(&self, metadata: &HashMap<String, serde_json::Value>) -> bool {
107 for (k, v) in &self.equals {
108 match metadata.get(k) {
109 Some(actual) if actual == v => {}
110 _ => return false,
111 }
112 }
113 for (k, allowed) in &self.r#in {
114 match metadata.get(k) {
115 Some(actual) if allowed.iter().any(|v| v == actual) => {}
116 _ => return false,
117 }
118 }
119 for (k, lo) in &self.gte {
120 match metadata.get(k).and_then(|v| v.as_f64()) {
121 Some(n) if n >= *lo => {}
122 _ => return false,
123 }
124 }
125 for (k, hi) in &self.lte {
126 match metadata.get(k).and_then(|v| v.as_f64()) {
127 Some(n) if n <= *hi => {}
128 _ => return false,
129 }
130 }
131 true
132 }
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct SearchResult {
138 pub id: String,
140 pub text: String,
142 pub score: f32,
145 pub metadata: HashMap<String, serde_json::Value>,
147}
148
149#[async_trait]
152pub trait VectorStore: Send + Sync {
153 async fn add_texts(
156 &mut self,
157 texts: Vec<String>,
158 metadata: Option<Vec<HashMap<String, serde_json::Value>>>,
159 ) -> Result<Vec<String>>;
160
161 async fn add_vectors(
164 &mut self,
165 vectors: Vec<Vec<f32>>,
166 texts: Vec<String>,
167 metadata: Option<Vec<HashMap<String, serde_json::Value>>>,
168 ) -> Result<Vec<String>>;
169
170 async fn similarity_search(&self, query: &str, k: usize) -> Result<Vec<SearchResult>>;
172
173 async fn similarity_search_by_vector(
175 &self,
176 query_vector: Vec<f32>,
177 k: usize,
178 ) -> Result<Vec<SearchResult>>;
179
180 async fn similarity_search_with_filter(
186 &self,
187 query: &str,
188 k: usize,
189 filter: &Filter,
190 ) -> Result<Vec<SearchResult>> {
191 if filter.is_empty() {
192 return self.similarity_search(query, k).await;
193 }
194 let candidates = self.similarity_search(query, k.saturating_mul(4)).await?;
195 Ok(candidates
196 .into_iter()
197 .filter(|r| filter.matches(&r.metadata))
198 .take(k)
199 .collect())
200 }
201
202 async fn delete(&mut self, ids: Vec<String>) -> Result<()>;
204
205 fn len(&self) -> usize;
207
208 fn is_empty(&self) -> bool {
210 self.len() == 0
211 }
212}