use std::collections::HashMap;
use panproto_gat::Name;
use panproto_mig::align::{Anchor, StrategyTag};
use panproto_schema::Schema;
use crate::Lens;
use crate::protolens::{Protolens, ProtolensChain};
#[derive(Clone, Debug)]
pub struct CandidateStep {
pub kind: String,
pub explanation: String,
pub confidence: f64,
pub strategy: Option<StrategyTag>,
}
#[derive(Debug)]
pub struct LensCandidate {
pub chain: ProtolensChain,
pub lens: Lens,
pub quality: f64,
pub coverage: f64,
pub seed_anchors: Vec<Anchor>,
pub steps: Vec<CandidateStep>,
pub strategies_used: Vec<StrategyTag>,
}
impl LensCandidate {
#[must_use]
pub fn score(&self) -> f64 {
#[allow(clippy::cast_precision_loss)]
let avg_step_conf = if self.steps.is_empty() {
1.0
} else {
self.steps.iter().map(|s| s.confidence).sum::<f64>() / self.steps.len() as f64
};
0.2f64.mul_add(avg_step_conf, 0.5f64.mul_add(self.coverage, self.quality))
}
}
#[must_use]
pub fn coverage_ratio(src: &Schema, tgt: &Schema, matched: usize) -> f64 {
let denom = src.vertex_count().max(tgt.vertex_count()).max(1);
debug_assert!(
matched <= denom,
"matched ({matched}) exceeds max(|src|, |tgt|) = {denom}; \
candidate builder produced a nonsensical vertex_map"
);
#[allow(clippy::cast_precision_loss)]
{
let raw = matched as f64 / denom as f64;
raw.clamp(0.0, 1.0)
}
}
#[must_use]
pub fn enrich_steps(chain: &ProtolensChain, anchors: &[Anchor]) -> Vec<CandidateStep> {
let priority = |tag: StrategyTag| -> u8 {
match tag {
StrategyTag::UserHint => 100,
StrategyTag::Exact => 90,
StrategyTag::EdgeLabel => 85,
StrategyTag::ExactSuffix => 80,
StrategyTag::Alias => 70,
StrategyTag::TypeSignature => 60,
StrategyTag::WrapUnwrap => 55,
StrategyTag::TokenSimilarity => 50,
StrategyTag::DescriptionSimilarity => 45,
StrategyTag::Coerce => 40,
StrategyTag::Neighborhood => 35,
StrategyTag::WlRefinement => 32,
StrategyTag::Structural => 30,
StrategyTag::Llm => 20,
}
};
let better = |a: &Anchor, b: &Anchor| -> bool {
match a.confidence.total_cmp(&b.confidence) {
std::cmp::Ordering::Greater => true,
std::cmp::Ordering::Less => false,
std::cmp::Ordering::Equal => match priority(a.strategy).cmp(&priority(b.strategy)) {
std::cmp::Ordering::Greater => true,
std::cmp::Ordering::Less => false,
std::cmp::Ordering::Equal => a.tgt.as_str() < b.tgt.as_str(),
},
}
};
let fold_by = |key_fn: fn(&Anchor) -> &str| -> HashMap<&str, &Anchor> {
let mut acc: HashMap<&str, &Anchor> = HashMap::new();
for anchor in anchors {
let key = key_fn(anchor);
acc.entry(key)
.and_modify(|existing| {
if better(anchor, existing) {
*existing = anchor;
}
})
.or_insert(anchor);
}
acc
};
let by_src: HashMap<&str, &Anchor> = fold_by(|a| a.src.as_str());
let by_tgt: HashMap<&str, &Anchor> = fold_by(|a| a.tgt.as_str());
chain
.steps
.iter()
.map(|step| step_to_candidate(step, &by_src, &by_tgt, anchors))
.collect()
}
fn step_to_candidate<'a>(
step: &Protolens,
by_src: &HashMap<&str, &'a Anchor>,
by_tgt: &HashMap<&str, &'a Anchor>,
anchors: &'a [Anchor],
) -> CandidateStep {
use panproto_gat::TheoryTransform;
let kind = step.name.to_string();
let matched_anchor: Option<&Anchor> = match &step.target.transform {
TheoryTransform::RenameSort { old, new } | TheoryTransform::RenameOp { old, new } => by_src
.get(old.as_ref())
.or_else(|| by_tgt.get(new.as_ref()))
.copied(),
TheoryTransform::AddSort { sort, .. }
| TheoryTransform::AddSortWithDefault { sort, .. } => {
by_tgt.get(sort.name.as_ref()).copied()
}
TheoryTransform::DropSort(name) | TheoryTransform::DropOp(name) => {
by_src.get(name.as_ref()).copied()
}
TheoryTransform::AddOp(op) => by_tgt.get(op.name.as_ref()).copied(),
TheoryTransform::CoerceSort { sort_name, .. } => {
let key = sort_name.as_ref();
by_src
.get(key)
.or_else(|| by_tgt.get(key))
.copied()
.or_else(|| {
anchors.iter().find(|a| {
a.strategy == StrategyTag::Coerce
&& (a.src.as_str().contains(key) || a.tgt.as_str().contains(key))
})
})
.or_else(|| anchors.iter().find(|a| a.strategy == StrategyTag::Coerce))
}
_ => None,
};
match matched_anchor {
None => CandidateStep {
kind,
explanation: structural_explanation(step),
confidence: 1.0,
strategy: None,
},
Some(anchor) => CandidateStep {
kind,
explanation: anchor.explanation.clone(),
confidence: anchor.confidence,
strategy: Some(anchor.strategy),
},
}
}
fn structural_explanation(step: &Protolens) -> String {
use panproto_gat::TheoryTransform;
match &step.target.transform {
TheoryTransform::AddSort { sort, .. }
| TheoryTransform::AddSortWithDefault { sort, .. } => {
format!("structural: added sort `{}`", sort.name)
}
TheoryTransform::DropSort(name) => format!("structural: dropped sort `{name}`"),
TheoryTransform::AddOp(op) => format!("structural: added op `{}`", op.name),
TheoryTransform::DropOp(name) => format!("structural: dropped op `{name}`"),
TheoryTransform::RenameSort { old, new } => {
format!("structural: renamed sort `{old}` → `{new}`")
}
TheoryTransform::RenameOp { old, new } => {
format!("structural: renamed op `{old}` → `{new}`")
}
TheoryTransform::CoerceSort {
sort_name,
target_kind,
coercion_class,
..
} => format!(
"structural: coerce sort `{sort_name}` to `{target_kind:?}` ({coercion_class:?})"
),
other => format!("structural: {other:?}"),
}
}
#[must_use]
pub fn strategies_used(anchors: &[Anchor]) -> Vec<StrategyTag> {
let mut seen = std::collections::HashSet::new();
let mut out = Vec::new();
for anchor in anchors {
if seen.insert(anchor.strategy) {
out.push(anchor.strategy);
}
}
out
}
#[must_use]
pub fn matched_count(vertex_map: &HashMap<Name, Name>) -> usize {
vertex_map.len()
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use panproto_gat::Name;
use panproto_mig::align::{Anchor, StrategyTag};
fn mk_anchor(src: &str, tgt: &str, conf: f64, tag: StrategyTag, explanation: &str) -> Anchor {
Anchor {
src: Name::from(src),
tgt: Name::from(tgt),
confidence: conf,
strategy: tag,
explanation: explanation.to_owned(),
}
}
#[test]
fn coverage_ratio_identity() {
let proto = panproto_schema::Protocol {
name: "test".into(),
schema_theory: "ThTest".into(),
instance_theory: "ThWType".into(),
edge_rules: vec![],
obj_kinds: vec!["object".into()],
constraint_sorts: vec![],
..panproto_schema::Protocol::default()
};
let s = panproto_schema::SchemaBuilder::new(&proto)
.vertex("a", "object", None::<&str>)
.unwrap()
.build()
.unwrap();
assert!((coverage_ratio(&s, &s, 1) - 1.0).abs() < 1e-9);
assert!((coverage_ratio(&s, &s, 0) - 0.0).abs() < 1e-9);
}
#[test]
fn coverage_ratio_asymmetric_clamps_below_one() {
let proto = panproto_schema::Protocol {
name: "test".into(),
schema_theory: "ThTest".into(),
instance_theory: "ThWType".into(),
edge_rules: vec![],
obj_kinds: vec!["object".into()],
constraint_sorts: vec![],
..panproto_schema::Protocol::default()
};
let small = panproto_schema::SchemaBuilder::new(&proto)
.vertex("a", "object", None::<&str>)
.unwrap()
.build()
.unwrap();
let large = panproto_schema::SchemaBuilder::new(&proto)
.vertex("a", "object", None::<&str>)
.unwrap()
.vertex("b", "object", None::<&str>)
.unwrap()
.vertex("c", "object", None::<&str>)
.unwrap()
.build()
.unwrap();
let r = coverage_ratio(&small, &large, 1);
assert!((r - (1.0 / 3.0)).abs() < 1e-9, "1/3 expected, got {r}");
let full = coverage_ratio(&small, &large, 3);
assert!(
(full - 1.0).abs() < 1e-9,
"max match over max denominator must land at 1.0, got {full}"
);
}
#[test]
fn score_combines_quality_coverage_confidence() {
use crate::protolens::ProtolensChain;
let proto = panproto_schema::Protocol {
name: "test".into(),
schema_theory: "ThTest".into(),
instance_theory: "ThWType".into(),
edge_rules: vec![],
obj_kinds: vec!["object".into()],
constraint_sorts: vec![],
..panproto_schema::Protocol::default()
};
let s = panproto_schema::SchemaBuilder::new(&proto)
.vertex("a", "object", None::<&str>)
.unwrap()
.build()
.unwrap();
let chain = ProtolensChain::new(vec![]);
let lens = chain.instantiate(&s, &proto).unwrap();
let cand = LensCandidate {
chain,
lens,
quality: 0.8,
coverage: 0.6,
seed_anchors: vec![],
steps: vec![],
strategies_used: vec![],
};
assert!(
(cand.score() - 1.3).abs() < 1e-9,
"expected 1.3, got {}",
cand.score()
);
let steps = vec![
CandidateStep {
kind: "rename_sort".into(),
explanation: "x".into(),
confidence: 0.4,
strategy: None,
},
CandidateStep {
kind: "rename_sort".into(),
explanation: "y".into(),
confidence: 0.6,
strategy: None,
},
];
let chain2 = ProtolensChain::new(vec![]);
let lens2 = chain2.instantiate(&s, &proto).unwrap();
let cand2 = LensCandidate {
chain: chain2,
lens: lens2,
quality: 0.5,
coverage: 0.0,
seed_anchors: vec![],
steps,
strategies_used: vec![],
};
assert!(
(cand2.score() - 0.6).abs() < 1e-9,
"expected 0.6, got {}",
cand2.score()
);
}
#[test]
fn coverage_ratio_clamps_out_of_range() {
let proto = panproto_schema::Protocol {
name: "test".into(),
schema_theory: "ThTest".into(),
instance_theory: "ThWType".into(),
edge_rules: vec![],
obj_kinds: vec!["object".into()],
constraint_sorts: vec![],
..panproto_schema::Protocol::default()
};
let s = panproto_schema::SchemaBuilder::new(&proto)
.vertex("a", "object", None::<&str>)
.unwrap()
.vertex("b", "object", None::<&str>)
.unwrap()
.build()
.unwrap();
if cfg!(debug_assertions) {
assert!(coverage_ratio(&s, &s, 2) <= 1.0);
} else {
let r = coverage_ratio(&s, &s, 99);
assert!((0.0..=1.0).contains(&r), "clamp failed: got {r}");
}
}
#[test]
fn enrich_steps_prefers_src_match_over_tgt_on_exact_rename() {
let step = crate::protolens::elementary::rename_sort(Name::from("foo"), Name::from("bar"));
let chain = crate::protolens::ProtolensChain::new(vec![step]);
let anchors = vec![
mk_anchor("foo", "bar", 0.95, StrategyTag::Alias, "src-alias"),
mk_anchor("elsewhere", "bar", 0.99, StrategyTag::UserHint, "tgt-hint"),
];
let steps = enrich_steps(&chain, &anchors);
assert_eq!(steps.len(), 1);
assert_eq!(
steps[0].explanation, "src-alias",
"expected src-keyed anchor to win over tgt-keyed anchor; got {steps:?}",
);
}
#[test]
fn enrich_steps_correlates_coerce_sort_via_sort_name() {
use panproto_expr::Expr;
use panproto_gat::{CoercionClass, ValueKind};
let step = crate::protolens::elementary::sort_coerce(
Name::from("integer"),
ValueKind::Str,
Expr::Lit(panproto_expr::Literal::Int(0)),
Some(Expr::Lit(panproto_expr::Literal::Int(0))),
CoercionClass::Retraction,
);
let chain = crate::protolens::ProtolensChain::new(vec![step]);
let anchors = vec![mk_anchor(
"integer",
"string",
0.9,
StrategyTag::Coerce,
"coerce-int-to-str",
)];
let steps = enrich_steps(&chain, &anchors);
assert_eq!(steps.len(), 1);
assert_eq!(
steps[0].explanation, "coerce-int-to-str",
"CoerceSort step must look up by sort_name, not fall to structural"
);
}
#[test]
fn structural_explanation_handles_coerce_sort() {
use panproto_expr::Expr;
use panproto_gat::{CoercionClass, ValueKind};
let step = crate::protolens::elementary::sort_coerce(
Name::from("myint"),
ValueKind::Str,
Expr::Lit(panproto_expr::Literal::Int(0)),
Some(Expr::Lit(panproto_expr::Literal::Int(0))),
CoercionClass::Retraction,
);
let explanation = structural_explanation(&step);
assert!(
explanation.contains("coerce") && explanation.contains("myint"),
"coerce_sort structural explanation should name the sort; got: {explanation}"
);
}
#[test]
fn enrich_steps_coerce_sort_falls_back_to_strategy_scan() {
use panproto_expr::Expr;
use panproto_gat::{CoercionClass, ValueKind};
let step = crate::protolens::elementary::sort_coerce(
Name::from("integer"),
ValueKind::Str,
Expr::Lit(panproto_expr::Literal::Int(0)),
Some(Expr::Lit(panproto_expr::Literal::Int(0))),
CoercionClass::Retraction,
);
let chain = crate::protolens::ProtolensChain::new(vec![step]);
let anchors = vec![mk_anchor(
"r.n",
"r.s",
0.9,
StrategyTag::Coerce,
"int→str via r.n/r.s",
)];
let steps = enrich_steps(&chain, &anchors);
assert_eq!(steps.len(), 1);
assert_eq!(
steps[0].explanation, "int→str via r.n/r.s",
"Coerce strategy scan must find vertex-id-keyed anchors"
);
assert_eq!(steps[0].strategy, Some(StrategyTag::Coerce));
}
#[test]
fn enrich_steps_deterministic_under_anchor_permutations() {
let step = crate::protolens::elementary::rename_sort(Name::from("foo"), Name::from("bar"));
let chain = crate::protolens::ProtolensChain::new(vec![step]);
let exact = mk_anchor("foo", "bar", 0.8, StrategyTag::Exact, "exact-match");
let alias = mk_anchor("foo", "bar", 0.8, StrategyTag::Alias, "alias-match");
let a = enrich_steps(&chain, &[exact.clone(), alias.clone()]);
let b = enrich_steps(&chain, &[alias, exact]);
assert_eq!(a[0].explanation, b[0].explanation);
assert_eq!(a[0].explanation, "exact-match");
}
#[test]
fn score_monotonic_in_quality_coverage_confidence() {
use crate::protolens::ProtolensChain;
let proto = panproto_schema::Protocol {
name: "t".into(),
schema_theory: "ThTest".into(),
instance_theory: "ThWType".into(),
edge_rules: vec![],
obj_kinds: vec!["object".into()],
constraint_sorts: vec![],
..panproto_schema::Protocol::default()
};
let s = panproto_schema::SchemaBuilder::new(&proto)
.vertex("a", "object", None::<&str>)
.unwrap()
.build()
.unwrap();
let mk = |q: f64, c: f64, conf: f64| -> LensCandidate {
let chain = ProtolensChain::new(vec![]);
let lens = chain.instantiate(&s, &proto).unwrap();
LensCandidate {
chain,
lens,
quality: q,
coverage: c,
seed_anchors: vec![],
steps: vec![CandidateStep {
kind: "k".into(),
explanation: "e".into(),
confidence: conf,
strategy: None,
}],
strategies_used: vec![],
}
};
let xs = [0.0_f64, 0.25, 0.5, 0.75, 1.0];
for &q1 in &xs {
for &c1 in &xs {
for &f1 in &xs {
for &q2 in &xs {
for &c2 in &xs {
for &f2 in &xs {
if q1 > q2 && c1 >= c2 && f1 >= f2 {
let s1 = mk(q1, c1, f1).score();
let s2 = mk(q2, c2, f2).score();
assert!(
s1 > s2,
"dominance violated: q=({q1},{q2}) c=({c1},{c2}) \
f=({f1},{f2}) → score=({s1},{s2})"
);
}
}
}
}
}
}
}
}
#[test]
fn strategies_used_dedups() {
let anchors = vec![
mk_anchor("a", "A", 1.0, StrategyTag::Exact, "exact"),
mk_anchor("b", "B", 0.9, StrategyTag::Alias, "alias"),
mk_anchor("c", "C", 0.8, StrategyTag::Alias, "alias-2"),
];
let used = strategies_used(&anchors);
assert_eq!(used, vec![StrategyTag::Exact, StrategyTag::Alias]);
}
}