use std::collections::BTreeMap;
#[cfg(feature = "embedding-novelty")]
use futures::stream::{self, StreamExt, TryStreamExt};
#[cfg(feature = "embedding-novelty")]
use rig::embeddings::EmbeddingModel;
use serde::{Deserialize, Serialize};
#[cfg(feature = "embedding-novelty")]
use tracing::instrument;
use crate::dataset::Qrels;
#[cfg(feature = "embedding-novelty")]
use crate::error::{Error, Result};
use crate::report::{QueryDelta, ReportDiff};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KnowledgeGainConfig {
pub metric_weights: BTreeMap<String, f64>,
#[serde(default = "default_document_relevance_weight")]
pub document_relevance_weight: f64,
#[serde(default)]
pub novelty_weight: f64,
}
impl Default for KnowledgeGainConfig {
fn default() -> Self {
Self::new()
}
}
impl KnowledgeGainConfig {
#[must_use]
pub fn new() -> Self {
Self {
metric_weights: BTreeMap::new(),
document_relevance_weight: 1.0,
novelty_weight: 0.0,
}
}
#[must_use]
pub fn with_metric_weight(mut self, metric: impl Into<String>, weight: f64) -> Self {
self.metric_weights
.insert(metric.into(), clean_weight(weight));
self
}
#[must_use]
pub fn with_document_relevance_weight(mut self, weight: f64) -> Self {
self.document_relevance_weight = clean_weight(weight);
self
}
#[must_use]
pub fn with_novelty_weight(mut self, weight: f64) -> Self {
self.novelty_weight = clean_weight(weight);
self
}
fn weight_for(&self, metric: &str) -> Option<f64> {
if self.metric_weights.is_empty() {
Some(1.0)
} else {
self.metric_weights.get(metric).copied().map(clean_weight)
}
}
}
fn default_document_relevance_weight() -> f64 {
1.0
}
fn clean_weight(weight: f64) -> f64 {
if weight.is_finite() {
weight.max(0.0)
} else {
0.0
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetricGain {
pub metric: String,
pub weight: f64,
pub delta: f64,
pub contribution: f64,
pub winners: usize,
pub losers: usize,
pub unchanged: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QueryGain {
pub query_id: String,
pub score: f64,
pub weight: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CandidateDocumentGainInput {
pub doc_id: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub novelty: Option<f64>,
}
#[cfg(feature = "embedding-novelty")]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CandidateNoveltyInput {
pub doc_id: String,
pub chunks: Vec<String>,
}
#[cfg(feature = "embedding-novelty")]
impl CandidateNoveltyInput {
#[must_use]
pub fn new(doc_id: impl Into<String>, chunks: impl IntoIterator<Item = String>) -> Self {
Self {
doc_id: doc_id.into(),
chunks: chunks.into_iter().collect(),
}
}
}
#[cfg(feature = "embedding-novelty")]
pub struct EmbeddingNoveltyAdapter<M> {
model: M,
concurrency: usize,
}
#[cfg(feature = "embedding-novelty")]
impl<M> EmbeddingNoveltyAdapter<M>
where
M: EmbeddingModel,
{
#[must_use]
pub fn new(model: M) -> Self {
Self {
model,
concurrency: 1,
}
}
#[must_use]
pub fn with_concurrency(mut self, concurrency: usize) -> Self {
self.concurrency = concurrency.max(1);
self
}
#[instrument(level = "debug", skip(self, candidates, reference_chunks), fields(candidates = candidates.len(), reference_chunks = reference_chunks.len(), concurrency = self.concurrency, max_batch = M::MAX_DOCUMENTS))]
pub async fn score_candidates(
&self,
candidates: &[CandidateNoveltyInput],
reference_chunks: &[String],
) -> Result<Vec<CandidateDocumentGainInput>> {
let reference_embeddings = self.embed_batched(reference_chunks.to_vec()).await?;
let mut offsets: Vec<(usize, usize)> = Vec::with_capacity(candidates.len());
let mut flat_chunks: Vec<String> = Vec::new();
for candidate in candidates {
let start = flat_chunks.len();
flat_chunks.extend(candidate.chunks.iter().cloned());
let end = flat_chunks.len();
offsets.push((start, end));
}
let candidate_embeddings = self.embed_batched(flat_chunks).await?;
let mut scored = Vec::with_capacity(candidates.len());
for (candidate, (start, end)) in candidates.iter().zip(offsets) {
let slice = candidate_embeddings.get(start..end).unwrap_or(&[]);
let novelty = compute_novelty(slice, &reference_embeddings);
scored.push(
CandidateDocumentGainInput::new(candidate.doc_id.clone()).with_novelty(novelty),
);
}
Ok(scored)
}
#[instrument(level = "trace", skip(self, texts), fields(total = texts.len(), concurrency = self.concurrency, max_batch = M::MAX_DOCUMENTS))]
async fn embed_batched(&self, texts: Vec<String>) -> Result<Vec<Vec<f64>>> {
if texts.is_empty() {
return Ok(Vec::new());
}
let batch_size = M::MAX_DOCUMENTS.max(1);
let total = texts.len();
let mut batches: Vec<Vec<String>> = Vec::new();
let mut iter = texts.into_iter();
loop {
let batch: Vec<String> = iter.by_ref().take(batch_size).collect();
if batch.is_empty() {
break;
}
batches.push(batch);
}
let concurrency = self.concurrency.max(1);
let model = &self.model;
let batch_results: Vec<Vec<Vec<f64>>> =
stream::iter(batches.into_iter().map(|batch| async move {
let embeddings = model.embed_texts(batch).await?;
Ok::<Vec<Vec<f64>>, Error>(
embeddings
.into_iter()
.map(|embedding| embedding.vec)
.collect(),
)
}))
.buffered(concurrency)
.try_collect()
.await?;
let mut out: Vec<Vec<f64>> = Vec::with_capacity(total);
for batch in batch_results {
out.extend(batch);
}
Ok(out)
}
}
#[cfg(feature = "embedding-novelty")]
fn compute_novelty(candidate_embeddings: &[Vec<f64>], reference_embeddings: &[Vec<f64>]) -> f64 {
if candidate_embeddings.is_empty() {
return 0.0;
}
if reference_embeddings.is_empty() {
return 1.0;
}
let mut total = 0.0;
for candidate_embedding in candidate_embeddings {
let max_similarity = reference_embeddings
.iter()
.map(|reference_embedding| cosine(candidate_embedding, reference_embedding))
.fold(0.0_f64, f64::max);
total += 1.0 - max_similarity.clamp(0.0, 1.0);
}
total / candidate_embeddings.len() as f64
}
impl CandidateDocumentGainInput {
#[must_use]
pub fn new(doc_id: impl Into<String>) -> Self {
Self {
doc_id: doc_id.into(),
novelty: None,
}
}
#[must_use]
pub fn with_novelty(mut self, novelty: f64) -> Self {
self.novelty = Some(clean_novelty(novelty));
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CandidateQueryGain {
pub query_id: String,
pub grade: u8,
pub query_gain: f64,
pub contribution: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CandidateDocumentGain {
pub doc_id: String,
pub score: f64,
pub relevance_gain: f64,
pub weighted_relevance_gain: f64,
pub weighted_novelty_gain: f64,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub novelty: Option<f64>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub query_gains: Vec<CandidateQueryGain>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct KnowledgeGainReport {
pub score: f64,
pub total_weight: f64,
pub metrics: Vec<MetricGain>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub queries: Vec<QueryGain>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub candidate_documents: Vec<CandidateDocumentGain>,
}
impl KnowledgeGainReport {
#[must_use]
pub fn from_diff(diff: &ReportDiff, config: &KnowledgeGainConfig) -> Self {
let mut total_weight = 0.0;
let mut total_contribution = 0.0;
let mut metrics = Vec::new();
let mut query_accumulator: BTreeMap<String, (f64, f64)> = BTreeMap::new();
for row in &diff.rows {
let Some(delta) = row.delta else {
continue;
};
let Some(weight) = config.weight_for(&row.metric) else {
continue;
};
if weight == 0.0 {
continue;
}
let contribution = weight * delta;
total_weight += weight;
total_contribution += contribution;
metrics.push(MetricGain {
metric: row.metric.clone(),
weight,
delta,
contribution,
winners: row.winners,
losers: row.losers,
unchanged: row.unchanged,
});
accumulate_query_gains(&mut query_accumulator, &row.query_changes, weight);
}
let score = if total_weight > 0.0 {
total_contribution / total_weight
} else {
0.0
};
let mut queries = query_accumulator
.into_iter()
.map(|(query_id, (contribution, weight))| QueryGain {
query_id,
score: if weight > 0.0 {
contribution / weight
} else {
0.0
},
weight,
})
.collect::<Vec<_>>();
queries.sort_by(|left, right| {
right
.score
.abs()
.total_cmp(&left.score.abs())
.then_with(|| left.query_id.cmp(&right.query_id))
});
Self {
score,
total_weight,
metrics,
queries,
candidate_documents: Vec::new(),
}
}
#[must_use]
pub fn with_candidate_documents(
mut self,
qrels: &Qrels,
candidates: &[CandidateDocumentGainInput],
config: &KnowledgeGainConfig,
) -> Self {
self.candidate_documents = self.rank_candidate_documents(qrels, candidates, config);
self
}
#[must_use]
pub fn rank_candidate_documents(
&self,
qrels: &Qrels,
candidates: &[CandidateDocumentGainInput],
config: &KnowledgeGainConfig,
) -> Vec<CandidateDocumentGain> {
let query_scores = self
.queries
.iter()
.map(|query| (query.query_id.as_str(), query.score))
.collect::<BTreeMap<_, _>>();
let mut ranked = candidates
.iter()
.map(|candidate| rank_candidate_document(candidate, qrels, &query_scores, config))
.collect::<Vec<_>>();
ranked.sort_by(|left, right| {
right
.score
.total_cmp(&left.score)
.then_with(|| left.doc_id.cmp(&right.doc_id))
});
ranked
}
#[must_use]
pub fn to_markdown(&self) -> String {
let mut out = String::new();
out.push_str(&format!("knowledge_gain={:.4}\n\n", self.score));
out.push_str("| metric | weight | delta | contribution | win | lose | same |\n");
out.push_str("|---|---:|---:|---:|---:|---:|---:|\n");
for metric in &self.metrics {
out.push_str(&format!(
"| {} | {:.4} | {:+.4} | {:+.4} | {} | {} | {} |\n",
metric.metric,
metric.weight,
metric.delta,
metric.contribution,
metric.winners,
metric.losers,
metric.unchanged
));
}
if !self.queries.is_empty() {
out.push_str("\n| query | gain | weight |\n");
out.push_str("|---|---:|---:|\n");
for query in &self.queries {
out.push_str(&format!(
"| {} | {:+.4} | {:.4} |\n",
query.query_id, query.score, query.weight
));
}
}
if !self.candidate_documents.is_empty() {
out.push_str(
"\n| candidate_doc | score | weighted_relevance | weighted_novelty | novelty |\n",
);
out.push_str("|---|---:|---:|---:|---:|\n");
for candidate in &self.candidate_documents {
let novelty = candidate
.novelty
.map(|value| format!("{value:.4}"))
.unwrap_or_else(|| "-".to_string());
out.push_str(&format!(
"| {} | {:+.4} | {:+.4} | {:+.4} | {} |\n",
candidate.doc_id,
candidate.score,
candidate.weighted_relevance_gain,
candidate.weighted_novelty_gain,
novelty
));
}
}
out
}
}
fn rank_candidate_document(
candidate: &CandidateDocumentGainInput,
qrels: &Qrels,
query_scores: &BTreeMap<&str, f64>,
config: &KnowledgeGainConfig,
) -> CandidateDocumentGain {
let mut relevance_gain = 0.0;
let mut query_gains = Vec::new();
for query in &qrels.queries {
let grade = query.grade(&candidate.doc_id);
if grade == 0 {
continue;
}
let Some(query_gain) = query_scores.get(query.query_id.as_str()).copied() else {
continue;
};
let contribution = query_gain * f64::from(grade);
relevance_gain += contribution;
query_gains.push(CandidateQueryGain {
query_id: query.query_id.clone(),
grade,
query_gain,
contribution,
});
}
let weighted_relevance_gain = relevance_gain * config.document_relevance_weight;
let novelty = candidate.novelty.map(clean_novelty);
let weighted_novelty_gain = novelty.unwrap_or(0.0) * config.novelty_weight;
CandidateDocumentGain {
doc_id: candidate.doc_id.clone(),
score: weighted_relevance_gain + weighted_novelty_gain,
relevance_gain,
weighted_relevance_gain,
weighted_novelty_gain,
novelty,
query_gains,
}
}
fn clean_novelty(novelty: f64) -> f64 {
if novelty.is_finite() {
novelty.clamp(0.0, 1.0)
} else {
0.0
}
}
#[cfg(feature = "embedding-novelty")]
fn cosine(left: &[f64], right: &[f64]) -> f64 {
if left.len() != right.len() || left.is_empty() {
return 0.0;
}
let mut dot = 0.0;
let mut left_norm = 0.0;
let mut right_norm = 0.0;
for (left_value, right_value) in left.iter().zip(right) {
dot += left_value * right_value;
left_norm += left_value * left_value;
right_norm += right_value * right_value;
}
if left_norm == 0.0 || right_norm == 0.0 {
0.0
} else {
dot / (left_norm.sqrt() * right_norm.sqrt())
}
}
fn accumulate_query_gains(
accumulator: &mut BTreeMap<String, (f64, f64)>,
changes: &[QueryDelta],
weight: f64,
) {
for change in changes {
let entry = accumulator
.entry(change.query_id.clone())
.or_insert((0.0, 0.0));
entry.0 += change.delta * weight;
entry.1 += weight;
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::panic, clippy::indexing_slicing)]
mod tests {
use super::*;
use crate::dataset::GoldQuery;
use crate::report::{MetricDelta, QueryDelta};
#[cfg(feature = "embedding-novelty")]
use rig::embeddings::{Embedding, EmbeddingError};
#[test]
fn default_config_averages_all_comparable_metrics() {
let diff = ReportDiff {
rows: vec![
row("recall@3", 0.5, vec![query("q1", 0.5)]),
row("mrr", 1.0, vec![query("q1", 1.0)]),
],
};
let gain = KnowledgeGainReport::from_diff(&diff, &KnowledgeGainConfig::default());
assert!((gain.score - 0.75).abs() < 1e-9);
assert_eq!(gain.metrics.len(), 2);
assert_eq!(gain.queries[0].query_id, "q1");
assert!((gain.queries[0].score - 0.75).abs() < 1e-9);
}
#[test]
fn weighted_config_includes_only_named_metrics() {
let diff = ReportDiff {
rows: vec![
row("recall@3", 0.5, vec![query("q1", 0.5)]),
row("mrr", 1.0, vec![query("q1", 1.0)]),
],
};
let config = KnowledgeGainConfig::new().with_metric_weight("recall@3", 2.0);
let gain = KnowledgeGainReport::from_diff(&diff, &config);
assert_eq!(gain.metrics.len(), 1);
assert!((gain.score - 0.5).abs() < 1e-9);
assert!((gain.total_weight - 2.0).abs() < 1e-9);
}
#[test]
fn candidate_document_ranking_uses_query_gain_and_novelty() {
let diff = ReportDiff {
rows: vec![row(
"recall@3",
1.0,
vec![query("q1", 1.0), query("q2", 0.25)],
)],
};
let qrels = qrels();
let config = KnowledgeGainConfig::new()
.with_metric_weight("recall@3", 1.0)
.with_document_relevance_weight(1.0)
.with_novelty_weight(0.5);
let gain = KnowledgeGainReport::from_diff(&diff, &config).with_candidate_documents(
&qrels,
&[
CandidateDocumentGainInput::new("d1").with_novelty(0.2),
CandidateDocumentGainInput::new("d2").with_novelty(1.0),
CandidateDocumentGainInput::new("d3").with_novelty(0.1),
],
&config,
);
let d1 = &gain.candidate_documents[0];
assert_eq!(d1.doc_id, "d1");
assert!((d1.relevance_gain - 2.0).abs() < 1e-9);
assert!((d1.weighted_relevance_gain - 2.0).abs() < 1e-9);
assert!((d1.weighted_novelty_gain - 0.1).abs() < 1e-9);
assert!((d1.score - 2.1).abs() < 1e-9);
let d2 = &gain.candidate_documents[1];
assert_eq!(d2.doc_id, "d2");
assert!((d2.relevance_gain - 0.25).abs() < 1e-9);
assert!((d2.weighted_relevance_gain - 0.25).abs() < 1e-9);
assert!((d2.weighted_novelty_gain - 0.5).abs() < 1e-9);
assert!((d2.score - 0.75).abs() < 1e-9);
}
#[test]
fn negative_diff_produces_negative_knowledge_gain() {
let diff = ReportDiff {
rows: vec![
row("recall@3", -0.5, vec![query("q1", -0.5)]),
row("mrr", -0.25, vec![query("q1", -0.25)]),
],
};
let gain = KnowledgeGainReport::from_diff(&diff, &KnowledgeGainConfig::default());
assert!(gain.score < 0.0);
assert!((gain.score + 0.375).abs() < 1e-9);
assert!(gain.queries[0].score < 0.0);
}
#[cfg(feature = "embedding-novelty")]
#[tokio::test]
async fn embedding_novelty_adapter_scores_candidate_chunks() {
let adapter = EmbeddingNoveltyAdapter::new(FakeEmbeddingModel::<32>::new());
let scored = adapter
.score_candidates(
&[
CandidateNoveltyInput::new("same", vec!["alpha".to_string()]),
CandidateNoveltyInput::new("new", vec!["beta".to_string()]),
],
&["alpha".to_string()],
)
.await
.unwrap();
let same = scored
.iter()
.find(|candidate| candidate.doc_id == "same")
.unwrap();
let new_doc = scored
.iter()
.find(|candidate| candidate.doc_id == "new")
.unwrap();
assert_eq!(same.novelty, Some(0.0));
assert_eq!(new_doc.novelty, Some(1.0));
}
#[cfg(feature = "embedding-novelty")]
#[tokio::test]
async fn embedding_novelty_adapter_handles_partial_similarity_and_empties() {
let adapter = EmbeddingNoveltyAdapter::new(FakeEmbeddingModel::<32>::new());
let scored = adapter
.score_candidates(
&[
CandidateNoveltyInput::new("partial", vec!["mid".to_string()]),
CandidateNoveltyInput::new("empty_candidate", Vec::<String>::new()),
CandidateNoveltyInput::new(
"multi",
vec!["alpha".to_string(), "beta".to_string()],
),
],
&["alpha".to_string()],
)
.await
.unwrap();
let by_id = |id: &str| -> CandidateDocumentGainInput {
scored
.iter()
.find(|candidate| candidate.doc_id == id)
.cloned()
.unwrap()
};
let partial = by_id("partial").novelty.unwrap();
assert!((partial - 0.4).abs() < 1e-9, "got {partial}");
assert_eq!(by_id("empty_candidate").novelty, Some(0.0));
let multi = by_id("multi").novelty.unwrap();
assert!((multi - 0.5).abs() < 1e-9, "got {multi}");
}
#[cfg(feature = "embedding-novelty")]
#[tokio::test]
async fn embedding_novelty_adapter_empty_reference_yields_full_novelty() {
let adapter = EmbeddingNoveltyAdapter::new(FakeEmbeddingModel::<32>::new());
let scored = adapter
.score_candidates(
&[CandidateNoveltyInput::new(
"isolated",
vec!["alpha".to_string()],
)],
&[],
)
.await
.unwrap();
assert_eq!(scored[0].novelty, Some(1.0));
}
#[cfg(feature = "embedding-novelty")]
#[tokio::test]
async fn embedding_novelty_adapter_respects_max_documents() {
let model = FakeEmbeddingModel::<2>::new();
let counter = model.call_counter();
let adapter = EmbeddingNoveltyAdapter::new(model).with_concurrency(2);
let reference: Vec<String> = (0..5).map(|i| format!("ref{i}")).collect();
let candidate_chunks: Vec<String> = (0..4).map(|i| format!("c{i}")).collect();
let candidates = vec![CandidateNoveltyInput::new("doc", candidate_chunks)];
let _ = adapter
.score_candidates(&candidates, &reference)
.await
.unwrap();
let calls = counter.lock().unwrap();
assert_eq!(*calls, 5, "expected 5 batched calls, observed {}", *calls);
}
#[cfg(feature = "embedding-novelty")]
#[tokio::test]
async fn embedding_novelty_adapter_propagates_provider_errors() {
let adapter =
EmbeddingNoveltyAdapter::new(FakeEmbeddingModel::<32>::new().failing("provider boom"));
let result = adapter
.score_candidates(
&[CandidateNoveltyInput::new("doc", vec!["alpha".to_string()])],
&["alpha".to_string()],
)
.await;
let err = result.unwrap_err();
let message = err.to_string();
assert!(
message.contains("embedding"),
"expected typed embedding error, got: {message}"
);
}
fn row(metric: &str, delta: f64, query_changes: Vec<QueryDelta>) -> MetricDelta {
MetricDelta {
metric: metric.to_string(),
current_mean: delta,
baseline_mean: Some(0.0),
delta: Some(delta),
winners: 1,
losers: 0,
unchanged: 0,
query_changes,
current_ci: None,
baseline_ci: None,
}
}
fn query(query_id: &str, delta: f64) -> QueryDelta {
QueryDelta {
query_id: query_id.to_string(),
current: delta,
baseline: 0.0,
delta,
}
}
fn qrels() -> Qrels {
Qrels {
queries: vec![
GoldQuery {
query_id: "q1".to_string(),
query: "one".to_string(),
relevant_docs: BTreeMap::from([("d1".to_string(), 2u8)])
.into_iter()
.collect(),
reference_answer: None,
},
GoldQuery {
query_id: "q2".to_string(),
query: "two".to_string(),
relevant_docs: BTreeMap::from([("d2".to_string(), 1u8)])
.into_iter()
.collect(),
reference_answer: None,
},
],
}
}
#[cfg(feature = "embedding-novelty")]
use std::sync::{Arc, Mutex};
#[cfg(feature = "embedding-novelty")]
#[derive(Clone)]
struct FakeEmbeddingModel<const MAX: usize> {
calls: Arc<Mutex<usize>>,
failure: Option<String>,
}
#[cfg(feature = "embedding-novelty")]
impl<const MAX: usize> FakeEmbeddingModel<MAX> {
fn new() -> Self {
Self {
calls: Arc::new(Mutex::new(0)),
failure: None,
}
}
fn failing(mut self, message: impl Into<String>) -> Self {
self.failure = Some(message.into());
self
}
fn call_counter(&self) -> Arc<Mutex<usize>> {
Arc::clone(&self.calls)
}
}
#[cfg(feature = "embedding-novelty")]
impl<const MAX: usize> EmbeddingModel for FakeEmbeddingModel<MAX> {
const MAX_DOCUMENTS: usize = MAX;
type Client = ();
fn make(_client: &Self::Client, _model: impl Into<String>, _dims: Option<usize>) -> Self {
Self::new()
}
fn ndims(&self) -> usize {
2
}
async fn embed_texts(
&self,
texts: impl IntoIterator<Item = String> + rig::wasm_compat::WasmCompatSend,
) -> std::result::Result<Vec<Embedding>, EmbeddingError> {
let texts: Vec<String> = texts.into_iter().collect();
if let Some(message) = self.failure.as_ref() {
return Err(EmbeddingError::ProviderError(message.clone()));
}
if texts.len() > MAX {
return Err(EmbeddingError::ProviderError(format!(
"fake model batch overflow: {} > {MAX}",
texts.len()
)));
}
{
let mut calls = self.calls.lock().unwrap();
*calls = calls.saturating_add(1);
}
Ok(texts
.into_iter()
.map(|document| Embedding {
vec: fake_vector(&document),
document,
})
.collect())
}
}
#[cfg(feature = "embedding-novelty")]
fn fake_vector(text: &str) -> Vec<f64> {
match text {
"alpha" => vec![1.0, 0.0],
"beta" => vec![0.0, 1.0],
"mid" => vec![0.6, 0.8],
_ => vec![0.0, 0.0],
}
}
}