brainwires_storage/databases/traits.rs
1//! Unified database traits for the Brainwires storage layer.
2//!
3//! Two traits define the database capabilities:
4//!
5//! - [`StorageBackend`] — generic CRUD + vector search for domain stores
6//! (conversations, messages, tasks, plans, etc.)
7//! - [`VectorDatabase`] — RAG-style embedding storage with hybrid search
8//! for the codebase indexing subsystem
9//!
10//! A single database struct (e.g. `LanceDatabase`, `PostgresDatabase`) can
11//! implement **both** traits, sharing one connection pool.
12
13use anyhow::Result;
14
15use super::types::{FieldDef, Filter, Record, ScoredRecord};
16
17// Re-export core types so consumers can use `databases::traits::*`.
18pub use brainwires_core::{ChunkMetadata, DatabaseStats, SearchResult};
19
20// ── StorageBackend ──────────────────────────────────────────────────────
21
22/// Backend-agnostic storage operations.
23///
24/// Domain stores ([`MessageStore`](crate::stores::message_store::MessageStore), etc.)
25/// are generic over this trait so they can work with any supported database.
26#[async_trait::async_trait]
27pub trait StorageBackend: Send + Sync {
28 /// Ensure a table exists with the given schema.
29 ///
30 /// Implementations should be idempotent — calling this on an existing table
31 /// is a no-op (or verifies compatibility).
32 async fn ensure_table(&self, table_name: &str, schema: &[FieldDef]) -> Result<()>;
33
34 /// Insert one or more records into a table.
35 async fn insert(&self, table_name: &str, records: Vec<Record>) -> Result<()>;
36
37 /// Query records matching an optional filter.
38 ///
39 /// Pass `None` for `filter` to return all rows (up to `limit`).
40 async fn query(
41 &self,
42 table_name: &str,
43 filter: Option<&Filter>,
44 limit: Option<usize>,
45 ) -> Result<Vec<Record>>;
46
47 /// Delete records matching a filter.
48 async fn delete(&self, table_name: &str, filter: &Filter) -> Result<()>;
49
50 /// Count records matching an optional filter.
51 async fn count(&self, table_name: &str, filter: Option<&Filter>) -> Result<usize> {
52 // Default implementation: count via query.
53 Ok(self.query(table_name, filter, None).await?.len())
54 }
55
56 /// Vector similarity search.
57 ///
58 /// Returns up to `limit` records ordered by descending similarity to `vector`.
59 /// An optional `filter` narrows the candidates before ranking.
60 async fn vector_search(
61 &self,
62 table_name: &str,
63 vector_column: &str,
64 vector: Vec<f32>,
65 limit: usize,
66 filter: Option<&Filter>,
67 ) -> Result<Vec<ScoredRecord>>;
68}
69
70// ── VectorDatabase ──────────────────────────────────────────────────────
71
72/// Trait for vector database operations used by the RAG subsystem.
73///
74/// Implementations handle connection management, BM25 keyword indexing, and
75/// hybrid search fusion internally.
76#[async_trait::async_trait]
77pub trait VectorDatabase: Send + Sync {
78 /// Initialize the database and create collections if needed.
79 async fn initialize(&self, dimension: usize) -> Result<()>;
80
81 /// Store embeddings with metadata.
82 ///
83 /// `root_path` is the normalized root of the indexed project — used for
84 /// per-project BM25 isolation.
85 async fn store_embeddings(
86 &self,
87 embeddings: Vec<Vec<f32>>,
88 metadata: Vec<ChunkMetadata>,
89 contents: Vec<String>,
90 root_path: &str,
91 ) -> Result<usize>;
92
93 /// Search for similar vectors.
94 #[allow(clippy::too_many_arguments)]
95 async fn search(
96 &self,
97 query_vector: Vec<f32>,
98 query_text: &str,
99 limit: usize,
100 min_score: f32,
101 project: Option<String>,
102 root_path: Option<String>,
103 hybrid: bool,
104 ) -> Result<Vec<SearchResult>>;
105
106 /// Search with additional filters (extensions, languages, path patterns).
107 #[allow(clippy::too_many_arguments)]
108 async fn search_filtered(
109 &self,
110 query_vector: Vec<f32>,
111 query_text: &str,
112 limit: usize,
113 min_score: f32,
114 project: Option<String>,
115 root_path: Option<String>,
116 hybrid: bool,
117 file_extensions: Vec<String>,
118 languages: Vec<String>,
119 path_patterns: Vec<String>,
120 ) -> Result<Vec<SearchResult>>;
121
122 /// Delete embeddings for a specific file.
123 async fn delete_by_file(&self, file_path: &str) -> Result<usize>;
124
125 /// Clear all embeddings.
126 async fn clear(&self) -> Result<()>;
127
128 /// Get statistics about the stored data.
129 async fn get_statistics(&self) -> Result<DatabaseStats>;
130
131 /// Flush/save changes to disk.
132 async fn flush(&self) -> Result<()>;
133
134 /// Count embeddings for a specific root path.
135 async fn count_by_root_path(&self, root_path: &str) -> Result<usize>;
136
137 /// Get unique file paths indexed for a specific root path.
138 async fn get_indexed_files(&self, root_path: &str) -> Result<Vec<String>>;
139
140 /// Search and return results together with their embedding vectors.
141 ///
142 /// Used by the spectral diversity reranker which needs the raw embeddings
143 /// to compute pairwise similarities. The default implementation delegates
144 /// to [`search`](VectorDatabase::search) and returns empty embedding vectors.
145 #[allow(clippy::too_many_arguments)]
146 async fn search_with_embeddings(
147 &self,
148 query_vector: Vec<f32>,
149 query_text: &str,
150 limit: usize,
151 min_score: f32,
152 project: Option<String>,
153 root_path: Option<String>,
154 hybrid: bool,
155 ) -> Result<(Vec<SearchResult>, Vec<Vec<f32>>)> {
156 let results = self
157 .search(
158 query_vector,
159 query_text,
160 limit,
161 min_score,
162 project,
163 root_path,
164 hybrid,
165 )
166 .await?;
167 let empty_embeddings = vec![Vec::new(); results.len()];
168 Ok((results, empty_embeddings))
169 }
170}