use std::collections::HashMap;
use std::path::Path;
use crate::{embed::Embedder, error::Result, vector_store::VecInfo};
#[derive(Debug, Clone)]
pub struct FtsQuery<'a> {
pub query: &'a str,
pub limit: usize,
pub path_prefix: Option<&'a Path>,
}
impl<'a> FtsQuery<'a> {
pub fn new(query: &'a str) -> Self {
Self {
query,
limit: 10,
path_prefix: None,
}
}
pub fn limit(mut self, n: usize) -> Self {
self.limit = n;
self
}
pub fn path_prefix(mut self, p: &'a Path) -> Self {
self.path_prefix = Some(p);
self
}
}
pub struct VectorQuery<'a> {
pub query: &'a str,
pub embedder: &'a dyn Embedder,
pub limit: usize,
pub path_prefix: Option<&'a Path>,
}
impl<'a> VectorQuery<'a> {
pub fn new(query: &'a str, embedder: &'a dyn Embedder) -> Self {
Self {
query,
embedder,
limit: 10,
path_prefix: None,
}
}
pub fn limit(mut self, n: usize) -> Self {
self.limit = n;
self
}
pub fn path_prefix(mut self, p: &'a Path) -> Self {
self.path_prefix = Some(p);
self
}
}
impl std::fmt::Debug for VectorQuery<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("VectorQuery")
.field("query", &self.query)
.field("limit", &self.limit)
.field("path_prefix", &self.path_prefix)
.finish_non_exhaustive()
}
}
pub struct HybridQuery<'a> {
pub query: &'a str,
pub embedder: Option<&'a dyn Embedder>,
pub limit: usize,
pub path_prefix: Option<&'a Path>,
pub rrf_k: f64,
pub weight_fts: f64,
pub weight_sem: f64,
}
impl<'a> HybridQuery<'a> {
pub fn new(query: &'a str) -> Self {
Self {
query,
embedder: None,
limit: 10,
path_prefix: None,
rrf_k: 60.0,
weight_fts: 1.0,
weight_sem: 1.0,
}
}
pub fn embedder(mut self, e: &'a dyn Embedder) -> Self {
self.embedder = Some(e);
self
}
pub fn limit(mut self, n: usize) -> Self {
self.limit = n;
self
}
pub fn path_prefix(mut self, p: &'a Path) -> Self {
self.path_prefix = Some(p);
self
}
pub fn rrf_k(mut self, k: f64) -> Self {
self.rrf_k = k;
self
}
pub fn weight_fts(mut self, w: f64) -> Self {
self.weight_fts = w;
self
}
pub fn weight_sem(mut self, w: f64) -> Self {
self.weight_sem = w;
self
}
}
impl std::fmt::Debug for HybridQuery<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("HybridQuery")
.field("query", &self.query)
.field("limit", &self.limit)
.field("path_prefix", &self.path_prefix)
.field("rrf_k", &self.rrf_k)
.field("weight_fts", &self.weight_fts)
.field("weight_sem", &self.weight_sem)
.finish_non_exhaustive()
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Document {
pub id: i64,
pub body: String,
pub path: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub chunks: Option<Vec<(usize, usize, String)>>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ChunkHit {
pub line_start: usize,
pub line_end: usize,
pub text: String,
pub score: f64,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct FileSearchResult {
pub id: i64,
pub path: String,
pub score: f64,
pub chunks: Vec<ChunkHit>,
}
pub trait RetrieveStore: Send + Sync {
fn file_mtimes(&self) -> Result<HashMap<String, i64>>;
fn upsert_file(&self, path: &str, mtime: i64) -> Result<()>;
fn remove_file(&self, path: &str) -> Result<()>;
fn file_count(&self) -> Result<u64>;
fn upsert_document(&self, doc: &Document) -> Result<()>;
fn remove_document(&self, id: i64) -> Result<()>;
fn rebuild_fts(&self) -> Result<()>;
fn document_ids(&self) -> Result<Vec<i64>>;
fn document_count(&self) -> Result<u64>;
fn embed_pending(
&self,
embedder: &dyn Embedder,
on_progress: &dyn Fn(usize, usize),
) -> Result<usize>;
fn vec_info(&self) -> Result<VecInfo>;
fn search_fts(&self, q: &FtsQuery<'_>) -> Result<Vec<FileSearchResult>>;
fn search_similar(&self, q: &VectorQuery<'_>) -> Result<Vec<FileSearchResult>>;
fn search_hybrid(&self, q: &HybridQuery<'_>) -> Result<Vec<FileSearchResult>> {
crate::db::default_hybrid(self, q)
}
}