use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FuzzyMatchConfig {
pub threshold: f64,
pub levenshtein_weight: f64,
pub jaro_winkler_weight: f64,
pub use_aliases: bool,
pub use_ecosystem_rules: bool,
pub max_candidates: usize,
#[serde(default)]
pub field_weights: Option<MultiFieldWeights>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MultiFieldWeights {
pub name: f64,
pub version: f64,
pub ecosystem: f64,
pub licenses: f64,
pub supplier: f64,
pub group: f64,
#[serde(default)]
pub ecosystem_mismatch_penalty: f64,
#[serde(default = "default_true")]
pub version_divergence_enabled: bool,
#[serde(default = "default_version_major_penalty")]
pub version_major_penalty: f64,
#[serde(default = "default_version_minor_penalty")]
pub version_minor_penalty: f64,
}
const fn default_true() -> bool {
true
}
const fn default_version_major_penalty() -> f64 {
0.10
}
const fn default_version_minor_penalty() -> f64 {
0.02
}
impl MultiFieldWeights {
#[must_use]
pub const fn name_focused() -> Self {
Self {
name: 0.80,
version: 0.05,
ecosystem: 0.10,
licenses: 0.03,
supplier: 0.01,
group: 0.01,
ecosystem_mismatch_penalty: -0.15,
version_divergence_enabled: true,
version_major_penalty: 0.10,
version_minor_penalty: 0.02,
}
}
#[must_use]
pub const fn balanced() -> Self {
Self {
name: 0.60,
version: 0.10,
ecosystem: 0.15,
licenses: 0.08,
supplier: 0.04,
group: 0.03,
ecosystem_mismatch_penalty: -0.15, version_divergence_enabled: true,
version_major_penalty: 0.10,
version_minor_penalty: 0.02,
}
}
#[must_use]
pub const fn security_focused() -> Self {
Self {
name: 0.50,
version: 0.20,
ecosystem: 0.20,
licenses: 0.05,
supplier: 0.03,
group: 0.02,
ecosystem_mismatch_penalty: -0.25, version_divergence_enabled: true,
version_major_penalty: 0.15, version_minor_penalty: 0.03,
}
}
#[must_use]
pub const fn legacy() -> Self {
Self {
name: 0.60,
version: 0.10,
ecosystem: 0.15,
licenses: 0.08,
supplier: 0.04,
group: 0.03,
ecosystem_mismatch_penalty: 0.0, version_divergence_enabled: false, version_major_penalty: 0.0,
version_minor_penalty: 0.0,
}
}
#[must_use]
pub fn is_normalized(&self) -> bool {
let sum =
self.name + self.version + self.ecosystem + self.licenses + self.supplier + self.group;
(sum - 1.0).abs() < 0.001
}
pub fn normalize(&mut self) {
let sum =
self.name + self.version + self.ecosystem + self.licenses + self.supplier + self.group;
if sum > 0.0 {
self.name /= sum;
self.version /= sum;
self.ecosystem /= sum;
self.licenses /= sum;
self.supplier /= sum;
self.group /= sum;
}
}
}
impl Default for MultiFieldWeights {
fn default() -> Self {
Self::balanced()
}
}
impl FuzzyMatchConfig {
#[must_use]
pub const fn strict() -> Self {
Self {
threshold: 0.95,
levenshtein_weight: 0.5,
jaro_winkler_weight: 0.5,
use_aliases: true,
use_ecosystem_rules: true,
max_candidates: 100,
field_weights: None, }
}
#[must_use]
pub const fn balanced() -> Self {
Self {
threshold: 0.85,
levenshtein_weight: 0.4,
jaro_winkler_weight: 0.6,
use_aliases: true,
use_ecosystem_rules: true,
max_candidates: 500,
field_weights: None, }
}
#[must_use]
pub const fn permissive() -> Self {
Self {
threshold: 0.70,
levenshtein_weight: 0.3,
jaro_winkler_weight: 0.7,
use_aliases: true,
use_ecosystem_rules: true,
max_candidates: 1000,
field_weights: None, }
}
#[must_use]
pub const fn with_multi_field(mut self, weights: MultiFieldWeights) -> Self {
self.field_weights = Some(weights);
self
}
#[must_use]
pub const fn with_threshold(mut self, threshold: f64) -> Self {
self.threshold = threshold;
self
}
#[must_use]
pub const fn strict_multi_field() -> Self {
Self::strict().with_multi_field(MultiFieldWeights::security_focused())
}
#[must_use]
pub const fn balanced_multi_field() -> Self {
Self::balanced().with_multi_field(MultiFieldWeights::balanced())
}
#[must_use]
pub fn from_preset(name: &str) -> Option<Self> {
match name.to_lowercase().as_str() {
"strict" => Some(Self::strict()),
"balanced" => Some(Self::balanced()),
"permissive" => Some(Self::permissive()),
"strict-multi" | "strict_multi" => Some(Self::strict_multi_field()),
"balanced-multi" | "balanced_multi" => Some(Self::balanced_multi_field()),
"security-focused" | "security_focused" => Some(Self::security_focused()),
_ => None,
}
}
#[must_use]
pub fn security_focused() -> Self {
Self {
threshold: 0.85,
field_weights: Some(MultiFieldWeights::security_focused()),
..Self::strict()
}
}
}
impl Default for FuzzyMatchConfig {
fn default() -> Self {
Self::balanced()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CrossEcosystemConfig {
pub enabled: bool,
pub min_score: f64,
pub score_penalty: f64,
pub max_candidates: usize,
pub verified_only: bool,
}
impl Default for CrossEcosystemConfig {
fn default() -> Self {
Self {
enabled: true,
min_score: 0.80,
score_penalty: 0.10,
max_candidates: 10,
verified_only: false,
}
}
}
impl CrossEcosystemConfig {
#[must_use]
pub fn disabled() -> Self {
Self {
enabled: false,
..Default::default()
}
}
#[must_use]
pub const fn strict() -> Self {
Self {
enabled: true,
min_score: 0.90,
score_penalty: 0.15,
max_candidates: 5,
verified_only: true,
}
}
#[must_use]
pub const fn permissive() -> Self {
Self {
enabled: true,
min_score: 0.70,
score_penalty: 0.05,
max_candidates: 20,
verified_only: false,
}
}
}