#![warn(missing_docs)]
pub mod memory;
pub use memory::MemoryVectorStore;
use async_trait::async_trait;
use cognate_core::{EmbeddingProvider, Error};
use serde::{Deserialize, Serialize};
pub type Vector = Vec<f32>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Document {
pub id: String,
pub content: String,
pub metadata: serde_json::Value,
pub embedding: Option<Vector>,
}
#[async_trait]
pub trait VectorStore: Send + Sync {
async fn add_documents(
&self,
docs: Vec<Document>,
) -> Result<(), Box<dyn std::error::Error + Send + Sync>>;
async fn search(
&self,
query_vector: Vector,
limit: usize,
) -> Result<Vec<Document>, Box<dyn std::error::Error + Send + Sync>>;
}
pub struct RagPipeline<E, V> {
embedder: E,
store: V,
}
impl<E: EmbeddingProvider, V: VectorStore> RagPipeline<E, V> {
pub fn new(embedder: E, store: V) -> Self {
Self { embedder, store }
}
pub async fn ingest(
&self,
texts: Vec<String>,
metadata: Vec<serde_json::Value>,
) -> cognate_core::Result<()> {
if texts.len() != metadata.len() {
return Err(Error::InvalidRequest(
"texts and metadata must have the same length".to_string(),
));
}
let embeddings = self
.embedder
.embed(texts.clone())
.await
.map_err(|e| Error::VectorStore(e.to_string()))?;
let docs: Vec<Document> = texts
.into_iter()
.zip(embeddings)
.zip(metadata)
.enumerate()
.map(|(i, ((content, emb), meta))| Document {
id: i.to_string(),
content,
metadata: meta,
embedding: Some(emb),
})
.collect();
self.store
.add_documents(docs)
.await
.map_err(|e| Error::VectorStore(e.to_string()))
}
pub async fn retrieve(
&self,
query: &str,
limit: usize,
) -> cognate_core::Result<Vec<Document>> {
let mut embeddings = self
.embedder
.embed(vec![query.to_string()])
.await
.map_err(|e| Error::VectorStore(e.to_string()))?;
let query_vec = if embeddings.is_empty() {
return Err(Error::VectorStore(
"embedding provider returned no vectors".to_string(),
));
} else {
embeddings.remove(0)
};
self.store
.search(query_vec, limit)
.await
.map_err(|e| Error::VectorStore(e.to_string()))
}
}