mod matchers;
use bumpalo::Bump;
use jiff::Timestamp;
use serde::Deserialize;
use serde_inline_default::serde_inline_default;
use crate::model::{Entity, SearchEntity};
pub(crate) mod comparers;
pub(crate) mod extractors;
pub(crate) mod logic_v1;
pub(crate) mod name_based;
pub(crate) mod name_qualified;
pub(crate) mod replacers;
pub(crate) mod validators;
#[derive(Clone, Copy, Debug, Default, Deserialize)]
pub enum Algorithm {
#[serde(rename = "name-based")]
NameBased,
#[serde(rename = "name-qualified")]
NameQualified,
#[default]
#[serde(rename = "logic-v1")]
LogicV1,
}
pub trait MatchingAlgorithm {
fn name() -> &'static str;
fn score(bump: &Bump, lhs: &SearchEntity, rhs: &Entity, cutoff: f64) -> (f64, Vec<(&'static str, f64)>);
}
pub trait Feature<'e>: Send + Sync {
fn name(&self) -> &'static str;
fn score_feature(&self, bump: &Bump, lhs: &'e SearchEntity, rhs: &'e Entity) -> f64;
}
fn run_features<'e>(bump: &Bump, lhs: &'e SearchEntity, rhs: &'e Entity, cutoff: f64, init: f64, features: &[(&dyn Feature<'e>, f64)], results: &mut Vec<(&'static str, f64)>) -> f64 {
features.iter().fold(init, move |score, (func, weight)| {
if score < cutoff && weight < &0.0 {
return score;
}
let feature_score = func.score_feature(bump, lhs, rhs);
results.push((func.name(), feature_score));
tracing::debug!(feature = func.name(), score = feature_score, "computed feature score");
score + (feature_score * weight)
})
}
#[serde_inline_default]
#[derive(Clone, Debug, Default, Deserialize)]
pub struct MatchParams {
#[serde(skip_deserializing)]
pub scope: String,
#[serde_inline_default(5)]
pub limit: usize,
#[serde(skip)]
pub candidate_factor: usize,
#[serde_inline_default(0.7)]
pub threshold: f64,
#[serde_inline_default(0.5)]
pub cutoff: f64,
#[serde_inline_default(Algorithm::LogicV1)]
pub algorithm: Algorithm,
pub topics: Option<Vec<String>>,
#[serde(default)]
pub include_dataset: Vec<String>,
#[serde(default)]
pub exclude_dataset: Vec<String>,
pub changed_since: Option<Timestamp>,
#[serde(default)]
pub exclude_schema: Vec<String>,
}
impl MatchParams {
pub fn candidate_limit(&self) -> usize {
(self.limit * self.candidate_factor).clamp(20, 9999)
}
}