use serde::Deserialize;
#[derive(Debug, Clone, Deserialize)]
pub struct Config {
pub quorum_threshold: usize,
pub top_k: usize,
pub cluster_threshold: f32,
pub retrievers: Vec<RetrieverConfig>,
pub ollama: OllamaConfig,
#[serde(default = "default_corpus_dir")]
pub corpus_dir: String,
#[serde(default = "default_cache_dir")]
pub cache_dir: String,
#[serde(default = "default_rrf_k")]
pub rrf_k: f32,
#[serde(default = "default_embed_batch")]
pub embed_batch: usize,
#[serde(default = "default_rank_alpha")]
pub rank_alpha: f32,
#[serde(default = "default_rank_beta")]
pub rank_beta: f32,
#[serde(default = "default_max_context_clusters")]
pub max_context_clusters: usize,
}
#[derive(Debug, Clone, Deserialize)]
pub struct RetrieverConfig {
pub retriever_type: RetrieverType,
pub chunk_size: usize,
#[serde(default)]
pub overlap: usize,
}
#[derive(Debug, Clone, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum RetrieverType {
Dense,
Bm25,
}
#[derive(Debug, Clone, Deserialize)]
pub struct OllamaConfig {
pub url: String,
pub model: String,
#[serde(default = "default_embed_model")]
pub embed_model: String,
}
fn default_corpus_dir() -> String {
"data/corpus".to_string()
}
fn default_cache_dir() -> String {
"data/cache".to_string()
}
fn default_rrf_k() -> f32 {
60.0
}
fn default_embed_batch() -> usize {
8
}
fn default_rank_alpha() -> f32 {
0.7
}
fn default_rank_beta() -> f32 {
0.3
}
fn default_max_context_clusters() -> usize {
5
}
fn default_embed_model() -> String {
"nomic-embed-text".to_string()
}