use anyhow::Result;
use async_trait::async_trait;
use std::sync::Arc;
use crate::librarian::catalog::Catalog;
#[cfg(feature = "server-stack")]
use crate::retrieval::qdrant::QdrantWrap;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ArtifactBackend {
Qdrant,
SqliteVec,
}
impl ArtifactBackend {
pub fn resolve(project_path: Option<&str>) -> Self {
if let Ok(v) = std::env::var("CODESCOUT_ARTIFACT_BACKEND") {
if let Some(b) = Self::parse(&v) {
return b;
}
}
if let Some(root) = project_path {
let cfg = std::path::Path::new(root)
.join(".codescout")
.join("project.toml");
if let Ok(text) = std::fs::read_to_string(&cfg) {
if let Ok(parsed) = toml::from_str::<toml::Value>(&text) {
if let Some(v) = parsed
.get("librarian")
.and_then(|t| t.get("vector_backend"))
.and_then(|v| v.as_str())
{
if let Some(b) = Self::parse(v) {
return b;
}
}
}
}
}
#[cfg(feature = "server-stack")]
{
ArtifactBackend::Qdrant
}
#[cfg(not(feature = "server-stack"))]
{
ArtifactBackend::SqliteVec
}
}
fn parse(s: &str) -> Option<Self> {
match s.trim().to_ascii_lowercase().as_str() {
"qdrant" => Some(Self::Qdrant),
"sqlite-vec" | "sqlite_vec" | "sqlite" | "local" => Some(Self::SqliteVec),
_ => None,
}
}
}
#[async_trait]
pub trait ArtifactVectorStore: Send + Sync {
async fn upsert(&self, project_id: &str, id: &str, vector: &[f32]) -> Result<()>;
async fn delete(&self, id: &str) -> Result<()>;
async fn knn(&self, project_id: Option<&str>, query: &[f32], k: usize) -> Result<Vec<String>>;
}
#[cfg(feature = "server-stack")]
pub struct QdrantArtifactStore {
qdrant: QdrantWrap,
collection: String,
ensured: tokio::sync::OnceCell<()>,
}
#[cfg(feature = "server-stack")]
impl QdrantArtifactStore {
pub fn new(qdrant: QdrantWrap, collection: impl Into<String>) -> Self {
Self {
qdrant,
collection: collection.into(),
ensured: tokio::sync::OnceCell::new(),
}
}
async fn ensure(&self, dim: u64) -> Result<()> {
self.ensured
.get_or_try_init(|| {
self.qdrant
.ensure_artifacts_collection(&self.collection, dim)
})
.await
.map(|_| ())
}
}
#[cfg(feature = "server-stack")]
#[async_trait]
impl ArtifactVectorStore for QdrantArtifactStore {
async fn upsert(&self, project_id: &str, id: &str, vector: &[f32]) -> Result<()> {
if vector.is_empty() {
anyhow::bail!("artifact embedding dim is 0 (embedder returned an empty vector)");
}
self.ensure(vector.len() as u64).await?;
self.qdrant
.artifact_upsert(&self.collection, project_id, id, vector.to_vec())
.await
}
async fn delete(&self, id: &str) -> Result<()> {
if !self.qdrant.collection_exists(&self.collection).await? {
return Ok(());
}
self.qdrant.artifact_delete(&self.collection, id).await
}
async fn knn(&self, project_id: Option<&str>, query: &[f32], k: usize) -> Result<Vec<String>> {
if !self.qdrant.collection_exists(&self.collection).await? {
return Ok(vec![]);
}
self.qdrant
.artifact_knn_ids(&self.collection, project_id, query.to_vec(), k)
.await
}
}
pub struct SqliteVecArtifactStore {
catalog: Arc<parking_lot::Mutex<Catalog>>,
}
impl SqliteVecArtifactStore {
pub fn new(catalog: Arc<parking_lot::Mutex<Catalog>>) -> Self {
Self { catalog }
}
}
#[async_trait]
impl ArtifactVectorStore for SqliteVecArtifactStore {
async fn upsert(&self, _project_id: &str, id: &str, vector: &[f32]) -> Result<()> {
let cat = self.catalog.lock();
crate::librarian::indexer::write_embeddings(&cat, &[(id.to_string(), vector.to_vec())])
}
async fn delete(&self, id: &str) -> Result<()> {
let cat = self.catalog.lock();
cat.conn.execute(
"DELETE FROM artifact_vec WHERE id = ?1",
rusqlite::params![id],
)?;
Ok(())
}
async fn knn(&self, _project_id: Option<&str>, query: &[f32], k: usize) -> Result<Vec<String>> {
let blob: Vec<u8> = query.iter().flat_map(|f| f.to_le_bytes()).collect();
let cat = self.catalog.lock();
let mut stmt = cat.conn.prepare(
"SELECT id FROM artifact_vec WHERE embedding MATCH vec_f32(?1) ORDER BY distance LIMIT ?2",
)?;
let ids = stmt
.query_map(rusqlite::params![blob, k as i64], |row| {
row.get::<_, String>(0)
})?
.collect::<rusqlite::Result<Vec<String>>>()?;
Ok(ids)
}
}
#[cfg(test)]
pub mod test_support {
use super::*;
use std::collections::HashMap;
#[derive(Default)]
pub struct InMemoryArtifactStore {
points: parking_lot::Mutex<HashMap<String, (String, Vec<f32>)>>,
}
fn cosine(a: &[f32], b: &[f32]) -> f32 {
let dot: f32 = a.iter().zip(b).map(|(x, y)| x * y).sum();
let na = a.iter().map(|x| x * x).sum::<f32>().sqrt();
let nb = b.iter().map(|x| x * x).sum::<f32>().sqrt();
if na == 0.0 || nb == 0.0 {
0.0
} else {
dot / (na * nb)
}
}
#[async_trait]
impl ArtifactVectorStore for InMemoryArtifactStore {
async fn upsert(&self, project_id: &str, id: &str, vector: &[f32]) -> Result<()> {
self.points
.lock()
.insert(id.to_string(), (project_id.to_string(), vector.to_vec()));
Ok(())
}
async fn delete(&self, id: &str) -> Result<()> {
self.points.lock().remove(id);
Ok(())
}
async fn knn(
&self,
project_id: Option<&str>,
query: &[f32],
k: usize,
) -> Result<Vec<String>> {
let pts = self.points.lock();
let mut scored: Vec<(String, f32)> = pts
.iter()
.filter(|(_, (pid, _))| project_id.is_none_or(|p| p == pid))
.map(|(id, (_, v))| (id.clone(), cosine(query, v)))
.collect();
scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
Ok(scored.into_iter().take(k).map(|(id, _)| id).collect())
}
}
}
#[cfg(test)]
mod backend_tests {
use super::test_support::InMemoryArtifactStore;
use super::*;
#[test]
fn parse_recognizes_known_backends() {
assert_eq!(
ArtifactBackend::parse("qdrant"),
Some(ArtifactBackend::Qdrant)
);
assert_eq!(
ArtifactBackend::parse("sqlite-vec"),
Some(ArtifactBackend::SqliteVec)
);
assert_eq!(
ArtifactBackend::parse(" SQLite "),
Some(ArtifactBackend::SqliteVec)
);
assert_eq!(
ArtifactBackend::parse("local"),
Some(ArtifactBackend::SqliteVec)
);
assert_eq!(ArtifactBackend::parse("nonsense"), None);
}
#[tokio::test]
async fn knn_filters_by_project_id() {
let store = InMemoryArtifactStore::default();
store.upsert("p1", "a", &[1.0, 0.0]).await.unwrap();
store.upsert("p2", "b", &[1.0, 0.0]).await.unwrap();
assert_eq!(
store.knn(Some("p1"), &[1.0, 0.0], 10).await.unwrap(),
vec!["a".to_string()]
);
let mut all = store.knn(None, &[1.0, 0.0], 10).await.unwrap();
all.sort();
assert_eq!(all, vec!["a".to_string(), "b".to_string()]);
}
#[tokio::test]
async fn delete_is_idempotent() {
let store = InMemoryArtifactStore::default();
store.upsert("p", "a", &[1.0]).await.unwrap();
store.delete("a").await.unwrap();
store.delete("a").await.unwrap(); assert!(store.knn(None, &[1.0], 10).await.unwrap().is_empty());
}
}