use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use crate::config::{
ChunkerConfig, FastembedEmbedderConfig, FramerConfig, RuntimeConfig, SourceConfig,
};
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct GoldQuery {
pub query: String,
pub gold_doc_id: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct MatrixConfig {
pub embedders: Vec<FastembedEmbedderConfig>,
pub chunkers: Vec<ChunkerConfig>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct BakeoffTargetConfig {
pub dsn_env: String,
#[serde(rename = "schema")]
pub schema_name: String,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum BakeoffTargetEntry {
Postgres(BakeoffPostgresTarget),
Mariadb(BakeoffMariadbTarget),
Sqlite(BakeoffSqliteTarget),
Clickhouse(BakeoffClickhouseTarget),
}
#[derive(Debug, Clone, Deserialize)]
pub struct BakeoffPostgresTarget {
pub dsn_env: String,
#[serde(rename = "database")]
pub database_name: String,
#[serde(default = "default_vector_metric")]
pub vector_metric: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct BakeoffMariadbTarget {
pub dsn_env: String,
#[serde(rename = "database")]
pub database_name: String,
}
#[derive(Debug, Clone, Deserialize)]
pub struct BakeoffSqliteTarget {
pub dsn_env: String,
#[serde(rename = "database", default = "default_sqlite_db_name")]
pub database_name: String,
}
fn default_sqlite_db_name() -> String {
"ignored".to_string()
}
fn default_vector_metric() -> String {
"cosine".to_string()
}
#[derive(Debug, Clone, Deserialize)]
pub struct BakeoffClickhouseTarget {
pub dsn_env: String,
#[serde(rename = "database")]
pub database_name: String,
#[serde(default)]
pub engine: Option<String>,
}
impl BakeoffTargetEntry {
pub fn dsn_env(&self) -> &str {
match self {
Self::Postgres(t) => &t.dsn_env,
Self::Mariadb(t) => &t.dsn_env,
Self::Sqlite(t) => &t.dsn_env,
Self::Clickhouse(t) => &t.dsn_env,
}
}
pub fn database_name(&self) -> &str {
match self {
Self::Postgres(t) => &t.database_name,
Self::Mariadb(t) => &t.database_name,
Self::Sqlite(t) => &t.database_name,
Self::Clickhouse(t) => &t.database_name,
}
}
pub fn backend_name(&self) -> &'static str {
match self {
Self::Postgres(_) => "postgres",
Self::Mariadb(_) => "mariadb",
Self::Sqlite(_) => "sqlite",
Self::Clickhouse(_) => "clickhouse",
}
}
}
#[derive(Debug, Clone, Deserialize)]
pub struct ScoringConfig {
#[serde(default = "default_k")]
pub k: Vec<usize>,
#[serde(default = "default_include_mrr")]
pub include_mrr: bool,
#[serde(default = "default_top_k")]
pub top_k: usize,
}
fn default_k() -> Vec<usize> {
vec![1, 3, 5]
}
fn default_include_mrr() -> bool {
true
}
fn default_top_k() -> usize {
5
}
impl Default for ScoringConfig {
fn default() -> Self {
Self {
k: default_k(),
include_mrr: default_include_mrr(),
top_k: default_top_k(),
}
}
}
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
pub enum GoldQueriesSpec {
Inline(Vec<GoldQuery>),
Path(String),
}
#[derive(Debug, Clone, Deserialize)]
pub struct BakeoffConfig {
pub name: String,
pub source: SourceConfig,
#[serde(default)]
pub framer: Option<FramerConfig>,
pub gold_queries: GoldQueriesSpec,
pub matrix: MatrixConfig,
#[serde(default)]
pub target: Option<BakeoffTargetConfig>,
#[serde(default)]
pub targets: Vec<BakeoffTargetEntry>,
#[serde(default)]
pub scoring: ScoringConfig,
#[serde(default)]
pub output_dir: Option<String>,
#[serde(default)]
pub runtime: Option<RuntimeConfig>,
}
impl BakeoffConfig {
pub fn effective_targets(&self) -> anyhow::Result<Vec<BakeoffTargetEntry>> {
match (&self.target, self.targets.is_empty()) {
(Some(_), false) => Err(anyhow::anyhow!(
"bakeoff YAML has BOTH 'target:' (legacy single-PG) and 'targets:' \
(multi-backend) — set exactly one."
)),
(Some(legacy), true) => Ok(vec![BakeoffTargetEntry::Postgres(BakeoffPostgresTarget {
dsn_env: legacy.dsn_env.clone(),
database_name: legacy.schema_name.clone(),
vector_metric: default_vector_metric(),
})]),
(None, false) => Ok(self.targets.clone()),
(None, true) => Err(anyhow::anyhow!(
"bakeoff YAML must set either 'target:' (legacy single-PG) or \
'targets:' (multi-backend)."
)),
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct ComboResult {
#[serde(default)]
pub backend: String,
pub chunker_key: String,
pub embedder_key: String,
pub chunker_label: String,
pub embedder_label: String,
pub table: String,
pub ingest_chunks: i64,
pub ingest_wall_seconds: f64,
#[serde(default)]
pub query_wall_ms_mean: f64,
#[serde(default)]
pub ingest_embed_seconds: f64,
pub aggregate: BTreeMap<String, f64>,
pub per_query: Vec<PerQueryResult>,
}
#[derive(Debug, Clone, Serialize)]
pub struct PerQueryResult {
pub query: String,
pub gold_doc_id: String,
pub top_k: Vec<TopKHit>,
#[serde(flatten)]
pub scores: BTreeMap<String, f64>,
}
#[derive(Debug, Clone, Serialize)]
pub struct TopKHit {
pub doc_id: String,
pub seq_num: i32,
}
#[derive(Debug, Clone, Serialize)]
pub struct BakeoffResults {
pub run_name: String,
pub started_at: String,
pub corpus_label: String,
pub n_queries: usize,
pub n_combos: usize,
pub combos: Vec<ComboResult>,
pub gold_queries: Vec<GoldQuery>,
#[serde(default)]
pub query_embed_seconds_by_embedder: BTreeMap<String, f64>,
}