use std::collections::HashMap;
use std::sync::Arc;
use panproto_gat::{Name, Theory, TheoryEndofunctor, TheoryMorphism, TheoryTransform, factorize};
use panproto_inst::value::Value;
use panproto_mig::align::{self, AliasDict, Anchor, CoerceAnchor, default_alias_dict};
use panproto_mig::hom_search::{
DomainConstraints, FoundMorphism, SearchOptions, find_best_morphism,
find_best_morphism_constrained, find_morphisms, find_morphisms_constrained,
};
use panproto_schema::{Protocol, Schema};
use crate::Lens;
use crate::coercion_laws::CoercionSampleRegistry;
use crate::error::LensError;
use crate::protolens::{Protolens, ProtolensChain, elementary};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Stringency {
Strict,
#[default]
Balanced,
Lenient,
Exploratory,
}
impl Stringency {
#[must_use]
pub const fn uses_token_similarity(self) -> bool {
!matches!(self, Self::Strict)
}
#[must_use]
pub const fn uses_description_similarity(self) -> bool {
!matches!(self, Self::Strict)
}
#[must_use]
pub const fn description_similarity_threshold(self) -> f64 {
match self {
Self::Strict => 1.0,
Self::Balanced => 0.55,
Self::Lenient => 0.45,
Self::Exploratory => 0.35,
}
}
#[must_use]
pub const fn uses_neighborhood_propagation(self) -> bool {
matches!(self, Self::Lenient | Self::Exploratory)
}
#[must_use]
pub const fn neighborhood_threshold(self) -> f64 {
match self {
Self::Lenient => 0.6,
Self::Exploratory => 0.45,
_ => 1.0,
}
}
#[must_use]
pub const fn uses_wl_refinement(self) -> bool {
matches!(self, Self::Lenient | Self::Exploratory)
}
#[must_use]
pub const fn wl_iterations(self) -> usize {
match self {
Self::Exploratory => 3,
_ => 2,
}
}
#[must_use]
pub const fn uses_alias_dict(self) -> bool {
!matches!(self, Self::Strict)
}
#[must_use]
pub const fn uses_wrap_unwrap(self) -> bool {
matches!(self, Self::Lenient | Self::Exploratory)
}
#[must_use]
pub const fn uses_type_signature(self) -> bool {
matches!(self, Self::Lenient | Self::Exploratory)
}
#[must_use]
pub const fn uses_structural(self) -> bool {
matches!(self, Self::Exploratory)
}
#[must_use]
pub const fn uses_coerce(self) -> bool {
matches!(self, Self::Exploratory)
}
#[must_use]
pub const fn structural_threshold(self) -> f64 {
match self {
Self::Exploratory => 0.40,
_ => 1.0,
}
}
#[must_use]
pub const fn default_try_overlap(self) -> bool {
matches!(self, Self::Lenient | Self::Exploratory)
}
#[must_use]
pub const fn allow_spans(self) -> bool {
matches!(self, Self::Lenient | Self::Exploratory)
}
#[must_use]
pub const fn token_similarity_threshold(self) -> f64 {
match self {
Self::Strict => 1.0,
Self::Balanced => 0.75,
Self::Lenient => 0.55,
Self::Exploratory => 0.40,
}
}
#[must_use]
pub const fn type_signature_threshold(self) -> f64 {
match self {
Self::Strict | Self::Balanced => 1.0,
Self::Lenient => 0.75,
Self::Exploratory => 0.50,
}
}
#[must_use]
pub const fn relax_edge_name_pruning(self) -> bool {
!matches!(self, Self::Strict)
}
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::Strict => "strict",
Self::Balanced => "balanced",
Self::Lenient => "lenient",
Self::Exploratory => "exploratory",
}
}
}
impl std::fmt::Display for Stringency {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
pub struct AutoLensResult {
pub chain: ProtolensChain,
pub lens: Lens,
pub alignment_quality: f64,
pub seed_anchors: Vec<Anchor>,
pub coerce_proposals: Vec<CoerceAnchor>,
}
#[derive(Debug, Clone)]
pub struct AutoLensConfig {
pub defaults: HashMap<Name, Value>,
pub search_opts: SearchOptions,
pub try_overlap: bool,
pub stringency: Stringency,
pub alias_dict: AliasDict,
pub coercion_law_registry: Option<CoercionSampleRegistry>,
pub filter_options: FilterOptions,
}
impl Default for AutoLensConfig {
fn default() -> Self {
Self {
defaults: HashMap::new(),
search_opts: SearchOptions::default(),
try_overlap: false,
stringency: Stringency::default(),
alias_dict: default_alias_dict(),
coercion_law_registry: None,
filter_options: FilterOptions::default(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum UnknownSamplesPolicy {
#[default]
Keep,
Drop,
}
#[derive(Debug, Clone, Copy, Default)]
pub struct FilterOptions {
pub unknown: UnknownSamplesPolicy,
}
impl FilterOptions {
#[must_use]
pub const fn with_unknown(unknown: UnknownSamplesPolicy) -> Self {
Self { unknown }
}
}
#[must_use]
pub fn filter_coerce_proposals_by_law_check(
proposals: Vec<CoerceAnchor>,
registry: &CoercionSampleRegistry,
) -> (Vec<CoerceAnchor>, Vec<(CoerceAnchor, String)>) {
filter_coerce_proposals_by_law_check_with_policy(proposals, registry, FilterOptions::default())
}
#[must_use]
pub fn filter_coerce_proposals_by_law_check_with_policy(
proposals: Vec<CoerceAnchor>,
registry: &CoercionSampleRegistry,
options: FilterOptions,
) -> (Vec<CoerceAnchor>, Vec<(CoerceAnchor, String)>) {
let library = panproto_mig::coerce::default_witness_library();
let mut kept = Vec::with_capacity(proposals.len());
let mut dropped = Vec::new();
for proposal in proposals {
let Some(witness) = library.witness_by_name(&proposal.witness_name) else {
match options.unknown {
UnknownSamplesPolicy::Keep => kept.push(proposal),
UnknownSamplesPolicy::Drop => {
dropped.push((proposal, "witness not found in default library".to_owned()));
}
}
continue;
};
let samples = registry.samples_for(witness.source_kind);
if samples.is_empty() {
match options.unknown {
UnknownSamplesPolicy::Keep => kept.push(proposal),
UnknownSamplesPolicy::Drop => {
dropped.push((
proposal,
format!(
"no samples registered for source kind {:?}",
witness.source_kind
),
));
}
}
continue;
}
if let Some(reason) = check_witness_backward_law(witness, samples) {
dropped.push((proposal, reason));
} else {
kept.push(proposal);
}
}
(kept, dropped)
}
fn check_witness_backward_law(
witness: &panproto_mig::coerce::SortLensWitness,
samples: &[panproto_expr::Literal],
) -> Option<String> {
use panproto_expr::{Env, EvalConfig, eval};
let config = EvalConfig::default();
let inverse = witness.inverse.as_ref();
let inverse_param = witness.inverse_param.as_ref();
for sample in samples {
let forward_env = Env::new().extend(Arc::clone(&witness.forward_param), sample.clone());
let forward_result = match eval(&witness.forward, &forward_env, &config) {
Ok(v) => v,
Err(e) => {
return Some(format!("forward eval failed on {sample:?}: {e}"));
}
};
let Some(inv) = inverse else {
return Some(format!(
"witness declares {:?} without an inverse",
witness.class
));
};
let Some(inv_param) = inverse_param else {
return Some("witness has inverse expression but no inverse_param".to_owned());
};
let inverse_env = Env::new().extend(Arc::clone(inv_param), forward_result);
match eval(inv, &inverse_env, &config) {
Ok(round_trip) => {
if &round_trip != sample {
return Some(format!(
"round-trip mismatch on {sample:?}: got {round_trip:?}"
));
}
}
Err(e) => {
return Some(format!("inverse eval failed on {sample:?}: {e}"));
}
}
}
None
}
#[doc(hidden)]
#[must_use]
pub fn run_strategies_for_tests(
src: &Schema,
tgt: &Schema,
config: &AutoLensConfig,
) -> (Vec<Anchor>, Vec<CoerceAnchor>) {
run_strategies(src, tgt, config)
}
fn run_strategies(
src: &Schema,
tgt: &Schema,
config: &AutoLensConfig,
) -> (Vec<Anchor>, Vec<CoerceAnchor>) {
let mut anchors = Vec::new();
anchors.extend(align::exact_anchors(src, tgt));
anchors.extend(align::suffix_anchors(src, tgt));
anchors.extend(align::edge_label_anchors(src, tgt));
if config.stringency.uses_alias_dict() {
anchors.extend(align::alias_anchors(src, tgt, &config.alias_dict));
}
if config.stringency.uses_token_similarity() {
let threshold = config.stringency.token_similarity_threshold();
anchors.extend(align::token_anchors(src, tgt, threshold));
}
if config.stringency.uses_description_similarity() {
let threshold = config.stringency.description_similarity_threshold();
anchors.extend(align::description_anchors(src, tgt, threshold));
}
if config.stringency.uses_wrap_unwrap() {
anchors.extend(align::wrap_unwrap_anchors(src, tgt));
}
if config.stringency.uses_type_signature() {
let threshold = config.stringency.type_signature_threshold();
anchors.extend(align::type_signature_anchors(src, tgt, threshold));
}
if config.stringency.uses_structural() {
let threshold = config.stringency.structural_threshold();
anchors.extend(align::structural_anchors(src, tgt, threshold));
}
if config.stringency.uses_wl_refinement() {
let iterations = config.stringency.wl_iterations();
anchors.extend(align::wl_anchors(src, tgt, iterations));
}
let coerce_proposals = if config.stringency.uses_coerce() {
let library = panproto_mig::coerce::default_witness_library();
let raw_proposals = align::coerce_anchors(src, tgt, &library);
let proposals = if let Some(registry) = config.coercion_law_registry.as_ref() {
let (kept, _dropped) = filter_coerce_proposals_by_law_check_with_policy(
raw_proposals,
registry,
config.filter_options,
);
kept
} else {
raw_proposals
};
for ca in &proposals {
anchors.push(ca.anchor.clone());
}
proposals
} else {
Vec::new()
};
align::adjust_anchors_by_required_sets(&mut anchors, src, tgt);
if config.stringency.uses_neighborhood_propagation() {
let seeds = align::resolve_anchors(&anchors, false);
let threshold = config.stringency.neighborhood_threshold();
let neighborhood = align::neighborhood_anchors(src, tgt, &seeds, threshold);
anchors.extend(neighborhood);
}
(anchors, coerce_proposals)
}
fn merge_seed_anchors(opts: &mut SearchOptions, additional: &HashMap<Name, Name>) {
for (s, t) in additional {
opts.initial.entry(s.clone()).or_insert_with(|| t.clone());
}
}
const fn apply_stringency_search_opts(opts: &mut SearchOptions, stringency: Stringency) {
if stringency.relax_edge_name_pruning() {
opts.relax_edge_name_pruning = true;
}
}
fn sources_without_compatible_targets(src: &Schema, tgt: &Schema) -> Vec<Name> {
let tgt_kinds: std::collections::HashSet<&str> =
tgt.vertices.values().map(|v| v.kind.as_str()).collect();
let mut out: Vec<Name> = src
.vertices
.iter()
.filter_map(|(id, vertex)| {
if tgt_kinds.contains(vertex.kind.as_str()) {
None
} else {
Some(id.clone())
}
})
.collect();
out.sort_by(|a, b| a.as_str().cmp(b.as_str()));
out
}
fn sources_without_naturality_compatible_targets(
src: &Schema,
tgt: &Schema,
anchors: &HashMap<Name, Name>,
strict_edge_names: bool,
) -> Vec<Name> {
let target_edge_index = build_target_edge_index(tgt);
let mut out: Vec<Name> = src
.vertices
.keys()
.filter(|s| {
!tgt.vertices.keys().any(|t| {
naturality_feasible(
src,
s,
tgt,
t,
anchors,
strict_edge_names,
&target_edge_index,
)
})
})
.cloned()
.collect();
out.sort_by(|a, b| a.as_str().cmp(b.as_str()));
out
}
fn build_target_edge_index<'a>(
tgt: &'a Schema,
) -> rustc_hash::FxHashMap<&'a Name, rustc_hash::FxHashMap<&'a Name, Vec<&'a panproto_schema::Edge>>>
{
let mut index: rustc_hash::FxHashMap<
&'a Name,
rustc_hash::FxHashMap<&'a Name, Vec<&'a panproto_schema::Edge>>,
> = rustc_hash::FxHashMap::default();
for t in tgt.vertices.keys() {
let mut bucket: rustc_hash::FxHashMap<&'a Name, Vec<&'a panproto_schema::Edge>> =
rustc_hash::FxHashMap::default();
for edge in tgt.outgoing_edges(t.as_str()) {
bucket.entry(&edge.kind).or_default().push(edge);
}
index.insert(t, bucket);
}
index
}
fn naturality_feasible<'a>(
src: &Schema,
s: &Name,
tgt: &Schema,
t: &Name,
anchors: &HashMap<Name, Name>,
strict_edge_names: bool,
target_edge_index: &rustc_hash::FxHashMap<
&'a Name,
rustc_hash::FxHashMap<&'a Name, Vec<&'a panproto_schema::Edge>>,
>,
) -> bool {
if !align::kinds_compatible(src, s, tgt, t) {
return false;
}
let empty_bucket: rustc_hash::FxHashMap<&'a Name, Vec<&'a panproto_schema::Edge>> =
rustc_hash::FxHashMap::default();
let by_kind = target_edge_index.get(t).unwrap_or(&empty_bucket);
src.outgoing_edges(s.as_str()).iter().all(|se| {
let Some(candidates) = by_kind.get(&se.kind) else {
return false;
};
candidates.iter().any(|te| {
(!strict_edge_names || edge_labels_compatible(se, te))
&& child_target_respects_anchor(src, se, tgt, te, anchors)
})
})
}
fn edge_labels_compatible(se: &panproto_schema::Edge, te: &panproto_schema::Edge) -> bool {
match (&se.name, &te.name) {
(None, _) | (_, None) => true,
(Some(a), Some(b)) => a == b,
}
}
fn child_target_respects_anchor(
src: &Schema,
se: &panproto_schema::Edge,
tgt: &Schema,
te: &panproto_schema::Edge,
anchors: &HashMap<Name, Name>,
) -> bool {
anchors.get(&se.tgt).map_or_else(
|| align::kinds_compatible(src, &se.tgt, tgt, &te.tgt),
|anchored| &te.tgt == anchored,
)
}
pub fn auto_generate(
src: &Schema,
tgt: &Schema,
protocol: &Protocol,
config: &AutoLensConfig,
) -> Result<AutoLensResult, LensError> {
let (seed_anchors, coerce_proposals) = run_strategies(src, tgt, config);
let resolved = align::resolve_anchors(&seed_anchors, config.search_opts.monic);
let mut search_opts = config.search_opts.clone();
apply_stringency_search_opts(&mut search_opts, config.stringency);
merge_seed_anchors(&mut search_opts, &resolved);
let mut effective = config.clone();
if config.stringency.default_try_overlap() {
effective.try_overlap = true;
}
let span_constraints = span_exclusions_at_lenient(
src,
tgt,
config.stringency,
&resolved,
!search_opts.relax_edge_name_pruning,
);
let result = run_search(
src,
tgt,
protocol,
&effective,
&search_opts,
span_constraints.as_ref(),
DEFAULT_QUALITY_FLOOR,
)?;
Ok(AutoLensResult {
chain: result.chain,
lens: result.lens,
alignment_quality: result.alignment_quality,
seed_anchors,
coerce_proposals,
})
}
const DEFAULT_QUALITY_FLOOR: f64 = 0.5;
struct SearchResult {
chain: ProtolensChain,
lens: Lens,
alignment_quality: f64,
}
fn run_search(
src: &Schema,
tgt: &Schema,
protocol: &Protocol,
config: &AutoLensConfig,
search_opts: &SearchOptions,
domain_constraints: Option<&DomainConstraints>,
quality_floor: f64,
) -> Result<SearchResult, LensError> {
let search = |opts: &SearchOptions| -> Option<FoundMorphism> {
domain_constraints.map_or_else(
|| find_best_morphism(src, tgt, opts),
|dc| find_best_morphism_constrained(src, tgt, opts, dc),
)
};
let mut alignment = search(search_opts);
if config.try_overlap {
let should_try_overlap = alignment.as_ref().is_none_or(|a| a.quality < quality_floor);
if should_try_overlap {
let overlap = panproto_mig::discover_overlap(src, tgt);
if !overlap.vertex_pairs.is_empty() {
let mut overlap_opts = search_opts.clone();
for (src_id, tgt_id) in &overlap.vertex_pairs {
overlap_opts
.initial
.entry(src_id.clone())
.or_insert_with(|| tgt_id.clone());
}
if let Some(oa) = search(&overlap_opts) {
let is_better = alignment.as_ref().is_none_or(|a| oa.quality > a.quality);
if is_better {
alignment = Some(oa);
}
}
}
}
}
let alignment = alignment
.ok_or_else(|| LensError::ProtolensError("no morphism found between schemas".into()))?;
let quality = alignment.quality;
let chain =
protolens_from_alignment_mode(&alignment, src, tgt, config.stringency.allow_spans())?;
let mut lens = chain.instantiate(src, protocol)?;
let field_transforms = derive_field_transforms(&chain, src, tgt);
lens.compiled.field_transforms = field_transforms;
Ok(SearchResult {
chain,
lens,
alignment_quality: quality,
})
}
pub fn auto_generate_with_hints(
src: &Schema,
tgt: &Schema,
protocol: &Protocol,
config: &AutoLensConfig,
anchors: &HashMap<Name, Name>,
domain_constraints: &DomainConstraints,
quality_threshold: Option<f64>,
) -> Result<AutoLensResult, LensError> {
let quality_floor = match quality_threshold {
None => DEFAULT_QUALITY_FLOOR,
Some(x) if x.is_nan() => {
return Err(LensError::ProtolensError(
"quality_threshold must not be NaN".into(),
));
}
Some(x) => x.clamp(0.0, 1.0),
};
let (strategy_anchors, coerce_proposals) = run_strategies(src, tgt, config);
let resolved_strategy = align::resolve_anchors(&strategy_anchors, config.search_opts.monic);
let mut search_opts = config.search_opts.clone();
apply_stringency_search_opts(&mut search_opts, config.stringency);
for (src_v, tgt_v) in anchors {
search_opts.initial.insert(src_v.clone(), tgt_v.clone());
}
merge_seed_anchors(&mut search_opts, &resolved_strategy);
let mut effective = config.clone();
if config.stringency.default_try_overlap() {
effective.try_overlap = true;
}
let mut merged_domain = domain_constraints.clone();
let mut feasibility_anchors: HashMap<Name, Name> = resolved_strategy.clone();
for (s, t) in anchors {
feasibility_anchors.insert(s.clone(), t.clone());
}
if let Some(span) = span_exclusions_at_lenient(
src,
tgt,
config.stringency,
&feasibility_anchors,
!search_opts.relax_edge_name_pruning,
) {
for src_v in span.excluded_sources {
if anchors.contains_key(&src_v) {
continue;
}
merged_domain.excluded_sources.insert(src_v);
}
}
let result = run_search(
src,
tgt,
protocol,
&effective,
&search_opts,
Some(&merged_domain),
quality_floor,
)?;
let mut combined = Vec::with_capacity(strategy_anchors.len() + anchors.len());
let mut user_pairs: Vec<(&Name, &Name)> = anchors.iter().collect();
user_pairs.sort_by(|a, b| a.0.as_str().cmp(b.0.as_str()));
for (src_v, tgt_v) in user_pairs {
combined.push(Anchor {
src: src_v.clone(),
tgt: tgt_v.clone(),
confidence: 1.0,
strategy: align::StrategyTag::UserHint,
explanation: format!("user hint: {} ↔ {}", src_v.as_str(), tgt_v.as_str()),
});
}
combined.extend(strategy_anchors);
Ok(AutoLensResult {
chain: result.chain,
lens: result.lens,
alignment_quality: result.alignment_quality,
seed_anchors: combined,
coerce_proposals,
})
}
pub fn protolens_from_alignment(
alignment: &FoundMorphism,
src: &Schema,
tgt: &Schema,
) -> Result<ProtolensChain, LensError> {
protolens_from_alignment_mode(alignment, src, tgt, false)
}
pub fn protolens_from_alignment_mode(
alignment: &FoundMorphism,
src: &Schema,
tgt: &Schema,
emit_spans: bool,
) -> Result<ProtolensChain, LensError> {
let src_theory = schema_to_implicit_theory(src);
let tgt_theory = schema_to_implicit_theory(tgt);
let morphism = alignment_to_theory_morphism_mode(alignment, src, tgt, emit_spans);
let factorization = factorize(&morphism, &src_theory, &tgt_theory)
.map_err(|e| LensError::ProtolensError(format!("factorization failed: {e}")))?;
let mut steps = Vec::new();
for endofunctor in &factorization.steps {
let protolens = endofunctor_to_protolens(endofunctor)?;
steps.push(protolens);
}
Ok(ProtolensChain::new(steps))
}
fn derive_field_transforms(
chain: &ProtolensChain,
src: &Schema,
_tgt: &Schema,
) -> std::collections::HashMap<Name, Vec<panproto_inst::FieldTransform>> {
use panproto_gat::TheoryTransform;
use panproto_inst::FieldTransform;
let mut transforms: std::collections::HashMap<Name, Vec<FieldTransform>> =
std::collections::HashMap::new();
for step in &chain.steps {
match &step.target.transform {
TheoryTransform::RenameOp { old, new } => {
for vid in src.vertices.keys() {
let has_edge = src
.outgoing_edges(vid)
.iter()
.any(|e| e.name.as_deref() == Some(old.as_ref()));
if has_edge {
transforms.entry(vid.clone()).or_default().push(
FieldTransform::RenameField {
old_key: old.to_string(),
new_key: new.to_string(),
},
);
}
}
}
TheoryTransform::DropOp(name) => {
for vid in src.vertices.keys() {
let has_edge = src
.outgoing_edges(vid)
.iter()
.any(|e| e.name.as_deref() == Some(name.as_ref()));
if has_edge {
transforms.entry(vid.clone()).or_default().push(
FieldTransform::DropField {
key: name.to_string(),
},
);
}
}
}
TheoryTransform::AddDirectedEquation(deq) => {
let Some(key) = (match &deq.lhs {
panproto_gat::Term::App { op, .. } => Some(op.to_string()),
panproto_gat::Term::Var(_)
| panproto_gat::Term::Case { .. }
| panproto_gat::Term::Hole { .. }
| panproto_gat::Term::Let { .. } => None,
}) else {
continue;
};
for vid in src.vertices.keys() {
let has_edge = src
.outgoing_edges(vid)
.iter()
.any(|e| e.name.as_deref() == Some(key.as_str()));
if !has_edge {
continue;
}
transforms
.entry(vid.clone())
.or_default()
.push(FieldTransform::ApplyExpr {
key: key.clone(),
expr: deq.impl_term.clone(),
inverse: deq.inverse.clone(),
coercion_class: deq.coercion_class,
});
}
}
TheoryTransform::CoerceSort {
sort_name,
coercion_expr,
inverse_expr,
coercion_class,
..
} => {
for vid in src.vertices.keys() {
if src.vertex(vid).is_some_and(|v| *v.kind == **sort_name) {
transforms.entry(vid.clone()).or_default().push(
FieldTransform::ApplyExpr {
key: "__value__".to_string(),
expr: coercion_expr.clone(),
inverse: inverse_expr.clone(),
coercion_class: *coercion_class,
},
);
}
}
}
_ => {} }
}
transforms
}
fn schema_to_implicit_theory(schema: &Schema) -> Theory {
crate::protolens::schema_to_implicit_theory(schema)
}
pub fn auto_generate_candidates(
src: &Schema,
tgt: &Schema,
protocol: &Protocol,
config: &AutoLensConfig,
top_n: usize,
) -> Result<Vec<crate::candidate::LensCandidate>, LensError> {
let n = top_n.max(1);
let (seed_anchors, _coerce_proposals) = run_strategies(src, tgt, config);
let resolved = align::resolve_anchors(&seed_anchors, config.search_opts.monic);
let mut search_opts = config.search_opts.clone();
apply_stringency_search_opts(&mut search_opts, config.stringency);
merge_seed_anchors(&mut search_opts, &resolved);
search_opts.max_results = n;
let span_constraints = span_exclusions_at_lenient(
src,
tgt,
config.stringency,
&resolved,
!search_opts.relax_edge_name_pruning,
);
candidates_from_search(
src,
tgt,
protocol,
&search_opts,
span_constraints.as_ref(),
&seed_anchors,
n,
config.stringency.allow_spans(),
)
}
fn span_exclusions_at_lenient(
src: &Schema,
tgt: &Schema,
stringency: Stringency,
anchors: &HashMap<Name, Name>,
strict_edge_names: bool,
) -> Option<DomainConstraints> {
if !stringency.allow_spans() {
return None;
}
let to_drop = if anchors.is_empty() {
sources_without_compatible_targets(src, tgt)
} else {
sources_without_naturality_compatible_targets(src, tgt, anchors, strict_edge_names)
};
if to_drop.is_empty() {
return None;
}
let mut dc = DomainConstraints::default();
dc.excluded_sources.extend(to_drop);
Some(dc)
}
pub fn auto_generate_candidates_with_hints(
src: &Schema,
tgt: &Schema,
protocol: &Protocol,
config: &AutoLensConfig,
anchors: &HashMap<Name, Name>,
domain_constraints: &DomainConstraints,
top_n: usize,
) -> Result<Vec<crate::candidate::LensCandidate>, LensError> {
let n = top_n.max(1);
let (strategy_anchors, _coerce_proposals) = run_strategies(src, tgt, config);
let resolved_strategy = align::resolve_anchors(&strategy_anchors, config.search_opts.monic);
let mut search_opts = config.search_opts.clone();
apply_stringency_search_opts(&mut search_opts, config.stringency);
for (src_v, tgt_v) in anchors {
search_opts.initial.insert(src_v.clone(), tgt_v.clone());
}
merge_seed_anchors(&mut search_opts, &resolved_strategy);
search_opts.max_results = n;
let mut merged_domain = domain_constraints.clone();
let mut feasibility_anchors: HashMap<Name, Name> = resolved_strategy.clone();
for (s, t) in anchors {
feasibility_anchors.insert(s.clone(), t.clone());
}
if let Some(span) = span_exclusions_at_lenient(
src,
tgt,
config.stringency,
&feasibility_anchors,
!search_opts.relax_edge_name_pruning,
) {
for src_v in span.excluded_sources {
if anchors.contains_key(&src_v) {
continue;
}
merged_domain.excluded_sources.insert(src_v);
}
}
let mut combined = Vec::with_capacity(strategy_anchors.len() + anchors.len());
let mut user_pairs: Vec<(&Name, &Name)> = anchors.iter().collect();
user_pairs.sort_by(|a, b| a.0.as_str().cmp(b.0.as_str()));
for (src_v, tgt_v) in user_pairs {
combined.push(Anchor {
src: src_v.clone(),
tgt: tgt_v.clone(),
confidence: 1.0,
strategy: align::StrategyTag::UserHint,
explanation: format!("user hint: {} ↔ {}", src_v.as_str(), tgt_v.as_str()),
});
}
combined.extend(strategy_anchors);
candidates_from_search(
src,
tgt,
protocol,
&search_opts,
Some(&merged_domain),
&combined,
n,
config.stringency.allow_spans(),
)
}
fn chain_step_names(chain: &ProtolensChain) -> String {
let mut out = String::new();
for step in &chain.steps {
out.push_str(step.name.as_str());
out.push('|');
}
out
}
#[allow(clippy::too_many_arguments)]
fn candidates_from_search(
src: &Schema,
tgt: &Schema,
protocol: &Protocol,
search_opts: &SearchOptions,
domain_constraints: Option<&DomainConstraints>,
seed_anchors: &[Anchor],
n: usize,
emit_spans: bool,
) -> Result<Vec<crate::candidate::LensCandidate>, LensError> {
let morphisms = domain_constraints.map_or_else(
|| find_morphisms(src, tgt, search_opts),
|dc| find_morphisms_constrained(src, tgt, search_opts, dc),
);
if morphisms.is_empty() {
return Err(LensError::ProtolensError(
"no morphism found between schemas".into(),
));
}
let mut candidates = Vec::with_capacity(morphisms.len());
let mut last_failure: Option<LensError> = None;
for morphism in morphisms {
match candidate_from_morphism(src, tgt, protocol, &morphism, seed_anchors, emit_spans) {
Ok(cand) => candidates.push(cand),
Err(e) => {
last_failure = Some(e);
continue;
}
}
if candidates.len() >= n.saturating_mul(2) {
break;
}
}
if candidates.is_empty() {
return Err(last_failure.map_or_else(
|| LensError::ProtolensError("no morphism could be realized as a protolens".into()),
|e| {
LensError::ProtolensError(format!(
"no morphism could be realized as a protolens; \
last factorization failure: {e}"
))
},
));
}
candidates.sort_by(|a, b| {
b.score()
.total_cmp(&a.score())
.then_with(|| a.chain.steps.len().cmp(&b.chain.steps.len()))
.then_with(|| chain_step_names(&a.chain).cmp(&chain_step_names(&b.chain)))
});
candidates.truncate(n);
Ok(candidates)
}
fn candidate_from_morphism(
src: &Schema,
tgt: &Schema,
protocol: &Protocol,
morphism: &FoundMorphism,
seed_anchors: &[Anchor],
emit_spans: bool,
) -> Result<crate::candidate::LensCandidate, LensError> {
let chain = protolens_from_alignment_mode(morphism, src, tgt, emit_spans)?;
let mut lens = chain.instantiate(src, protocol)?;
lens.compiled.field_transforms = derive_field_transforms(&chain, src, tgt);
let coverage = crate::candidate::coverage_ratio(
src,
tgt,
crate::candidate::matched_count(&morphism.vertex_map),
);
let steps = crate::candidate::enrich_steps(&chain, seed_anchors);
let strategies_used = crate::candidate::strategies_used(seed_anchors);
Ok(crate::candidate::LensCandidate {
chain,
lens,
quality: morphism.quality,
coverage,
seed_anchors: seed_anchors.to_vec(),
steps,
strategies_used,
})
}
#[cfg(test)]
fn alignment_to_theory_morphism(
found: &FoundMorphism,
src: &Schema,
tgt: &Schema,
) -> TheoryMorphism {
alignment_to_theory_morphism_mode(found, src, tgt, false)
}
fn alignment_to_theory_morphism_mode(
found: &FoundMorphism,
src: &Schema,
tgt: &Schema,
emit_spans: bool,
) -> TheoryMorphism {
let mut sort_map: HashMap<Arc<str>, Arc<str>> = HashMap::new();
let mut vertex_pairs: Vec<(&Name, &Name)> = found.vertex_map.iter().collect();
vertex_pairs.sort_by(|a, b| a.0.as_str().cmp(b.0.as_str()));
for (src_id, tgt_id) in vertex_pairs {
if let (Some(src_v), Some(tgt_v)) = (src.vertices.get(src_id), tgt.vertices.get(tgt_id)) {
let src_kind: Arc<str> = Arc::from(src_v.kind.as_str());
let tgt_kind: Arc<str> = Arc::from(tgt_v.kind.as_str());
sort_map.entry(src_kind).or_insert(tgt_kind);
}
}
let mut op_map: HashMap<Arc<str>, Arc<str>> = HashMap::new();
let mut edge_pairs: Vec<(&panproto_schema::Edge, &panproto_schema::Edge)> =
found.edge_map.iter().collect();
edge_pairs.sort_by(|a, b| {
a.0.src
.as_str()
.cmp(b.0.src.as_str())
.then_with(|| a.0.tgt.as_str().cmp(b.0.tgt.as_str()))
.then_with(|| a.0.kind.as_str().cmp(b.0.kind.as_str()))
});
for (src_edge, tgt_edge) in edge_pairs {
let src_kind: Arc<str> = Arc::from(src_edge.kind.as_str());
let tgt_kind: Arc<str> = Arc::from(tgt_edge.kind.as_str());
op_map.entry(src_kind).or_insert(tgt_kind);
}
if !emit_spans {
let src_theory = crate::protolens::schema_to_implicit_theory(src);
for sort in &src_theory.sorts {
sort_map
.entry(Arc::clone(&sort.name))
.or_insert_with(|| Arc::clone(&sort.name));
}
for op in &src_theory.ops {
op_map
.entry(Arc::clone(&op.name))
.or_insert_with(|| Arc::clone(&op.name));
}
}
TheoryMorphism::new(
"auto_morphism",
"src_implicit",
"tgt_implicit",
sort_map,
op_map,
)
}
fn endofunctor_to_protolens(endofunctor: &TheoryEndofunctor) -> Result<Protolens, LensError> {
match &endofunctor.transform {
TheoryTransform::AddSort { sort, vertex_kind } => {
let vk = vertex_kind
.as_ref()
.map_or_else(|| sort.default_vertex_kind(), Arc::clone);
Ok(elementary::add_sort(
Name::from(&*sort.name),
Name::from(&*vk),
Value::Null,
))
}
TheoryTransform::AddSortWithDefault {
sort,
vertex_kind,
default_expr,
} => {
let vk = vertex_kind
.as_ref()
.map_or_else(|| sort.default_vertex_kind(), Arc::clone);
Ok(elementary::add_sort_with_default(
Name::from(&*sort.name),
Name::from(&*vk),
default_expr.clone(),
))
}
TheoryTransform::DropSort(name) => Ok(elementary::drop_sort(Name::from(&**name))),
TheoryTransform::RenameSort { old, new } => Ok(elementary::rename_sort(
Name::from(&**old),
Name::from(&**new),
)),
TheoryTransform::AddOp(op) => {
let Some((_, input_sort, _)) = op.inputs.first() else {
return Err(LensError::ProtolensError(format!(
"AddOp '{}' has no inputs; elementary add_op requires a source sort. \
Supply an explicit input sort or route constants through AddSortWithDefault.",
op.name
)));
};
Ok(elementary::add_op(
Name::from(&*op.name),
Name::from(input_sort.head().as_ref()),
Name::from(op.output.head().as_ref()),
Name::from(&*op.name),
))
}
TheoryTransform::DropOp(name) => Ok(elementary::drop_op(Name::from(&**name))),
TheoryTransform::RenameOp { old, new } => Ok(elementary::rename_op(
Name::from(&**old),
Name::from(&**new),
)),
TheoryTransform::AddEquation(eq) => Ok(elementary::add_equation(eq.clone())),
TheoryTransform::DropEquation(name) => Ok(elementary::drop_equation(Name::from(&**name))),
TheoryTransform::Pullback(morphism) => Ok(elementary::pullback(morphism.clone())),
TheoryTransform::AddDirectedEquation(deq) => Ok(elementary::directed_eq(deq.clone())),
TheoryTransform::DropDirectedEquation(name) => {
Ok(elementary::drop_directed_eq(Name::from(&**name)))
}
TheoryTransform::CoerceSort {
sort_name,
target_kind,
coercion_expr,
inverse_expr,
coercion_class,
} => Ok(elementary::sort_coerce(
Name::from(&**sort_name),
*target_kind,
coercion_expr.clone(),
inverse_expr.clone(),
*coercion_class,
)),
TheoryTransform::MergeSorts { .. } => Err(LensError::ProtolensError(
"merge transforms not yet supported as protolenses".into(),
)),
TheoryTransform::Identity => Err(LensError::ProtolensError(
"unexpected Identity in factorization".into(),
)),
TheoryTransform::Compose(_, _) => Err(LensError::ProtolensError(
"unexpected Compose in factorization".into(),
)),
TheoryTransform::RenameEdgeName { .. } => Err(LensError::ProtolensError(
"unexpected RenameEdgeName in factorization (user-constructed only)".into(),
)),
TheoryTransform::AddEdge { .. } => Err(LensError::ProtolensError(
"unexpected AddEdge in factorization (user-constructed only)".into(),
)),
TheoryTransform::DropEdge { .. } => Err(LensError::ProtolensError(
"unexpected DropEdge in factorization (user-constructed only)".into(),
)),
TheoryTransform::ScopedTransform { .. } => Err(LensError::ProtolensError(
"unexpected ScopedTransform in factorization (user-constructed only)".into(),
)),
TheoryTransform::StripEnrichment(_) | TheoryTransform::AddEnrichment { .. } => {
Err(LensError::ProtolensError(
"unexpected enrichment transform in factorization (user-constructed only)".into(),
))
}
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use panproto_gat::Sort;
use panproto_schema::{Protocol, SchemaBuilder};
#[test]
fn stringency_as_str_matches_display_and_serde() {
for s in [
Stringency::Strict,
Stringency::Balanced,
Stringency::Lenient,
Stringency::Exploratory,
] {
let as_str = s.as_str();
let display = format!("{s}");
let serde = serde_json::to_string(&s).expect("serialize stringency");
assert_eq!(as_str, display, "Display disagrees with as_str");
assert_eq!(
format!("\"{as_str}\""),
serde,
"serde wire format disagrees with as_str",
);
}
assert_eq!(Stringency::Strict.as_str(), "strict");
assert_eq!(Stringency::Balanced.as_str(), "balanced");
assert_eq!(Stringency::Lenient.as_str(), "lenient");
assert_eq!(Stringency::Exploratory.as_str(), "exploratory");
}
fn test_protocol() -> Protocol {
Protocol {
name: "test".into(),
schema_theory: "ThGraph".into(),
instance_theory: "ThWType".into(),
edge_rules: vec![],
obj_kinds: vec![
"record".into(),
"string".into(),
"boolean".into(),
"array".into(),
],
constraint_sorts: vec![],
..Protocol::default()
}
}
fn schema_v1(protocol: &Protocol) -> Schema {
SchemaBuilder::new(protocol)
.vertex("post", "record", None::<&str>)
.unwrap()
.vertex("post.text", "string", None::<&str>)
.unwrap()
.vertex("post.done", "boolean", None::<&str>)
.unwrap()
.edge("post", "post.text", "prop", Some("text"))
.unwrap()
.edge("post", "post.done", "prop", Some("done"))
.unwrap()
.build()
.unwrap()
}
fn schema_v2(protocol: &Protocol) -> Schema {
SchemaBuilder::new(protocol)
.vertex("post", "record", None::<&str>)
.unwrap()
.vertex("post.text", "string", None::<&str>)
.unwrap()
.vertex("post.status", "string", None::<&str>)
.unwrap()
.edge("post", "post.text", "prop", Some("text"))
.unwrap()
.edge("post", "post.status", "prop", Some("status"))
.unwrap()
.build()
.unwrap()
}
fn schema_post_with_created(protocol: &Protocol) -> Schema {
SchemaBuilder::new(protocol)
.vertex("post", "record", None::<&str>)
.unwrap()
.vertex("post.text", "string", None::<&str>)
.unwrap()
.vertex("post.createdAt", "string", None::<&str>)
.unwrap()
.edge("post", "post.text", "prop", Some("text"))
.unwrap()
.edge("post", "post.createdAt", "prop", Some("createdAt"))
.unwrap()
.build()
.unwrap()
}
fn schema_message_with_sent(protocol: &Protocol) -> Schema {
SchemaBuilder::new(protocol)
.vertex("message", "record", None::<&str>)
.unwrap()
.vertex("message.body", "string", None::<&str>)
.unwrap()
.vertex("message.sentAt", "string", None::<&str>)
.unwrap()
.edge("message", "message.body", "prop", Some("body"))
.unwrap()
.edge("message", "message.sentAt", "prop", Some("sentAt"))
.unwrap()
.build()
.unwrap()
}
#[test]
fn balanced_finds_alias_aligned_morphism_when_strict_cannot() {
let protocol = test_protocol();
let src = schema_post_with_created(&protocol);
let tgt = schema_message_with_sent(&protocol);
let strict = AutoLensConfig {
stringency: Stringency::Strict,
..Default::default()
};
let strict_result = auto_generate(&src, &tgt, &protocol, &strict);
let balanced = AutoLensConfig {
stringency: Stringency::Balanced,
..Default::default()
};
let balanced_result = auto_generate(&src, &tgt, &protocol, &balanced).unwrap();
assert!(
balanced_result.alignment_quality > 0.0,
"Balanced should find a non-trivial alignment"
);
let names: Vec<(String, String)> = balanced_result
.seed_anchors
.iter()
.map(|a| (a.src.as_str().to_owned(), a.tgt.as_str().to_owned()))
.collect();
assert!(
names.iter().any(|(s, t)| s == "post" && t == "message"),
"alias strategy should seed post ↔ message anchor; got {names:?}"
);
if let Ok(r) = strict_result {
assert!(
r.alignment_quality <= balanced_result.alignment_quality,
"Strict should not outperform Balanced on this case"
);
}
}
#[test]
fn auto_generate_candidates_returns_ranked_non_empty_list() {
let protocol = test_protocol();
let src = schema_post_with_created(&protocol);
let tgt = schema_message_with_sent(&protocol);
let config = AutoLensConfig {
stringency: Stringency::Balanced,
..Default::default()
};
let candidates = auto_generate_candidates(&src, &tgt, &protocol, &config, 5)
.unwrap_or_else(|e| panic!("expected candidates: {e}"));
assert!(!candidates.is_empty(), "candidates must be non-empty");
for pair in candidates.windows(2) {
assert!(
pair[0].score() >= pair[1].score(),
"candidates must be sorted by descending composite score"
);
}
for cand in &candidates {
assert!(
cand.steps.iter().all(|s| !s.explanation.is_empty()),
"every step needs an explanation; got {:?}",
cand.steps
);
}
}
#[test]
fn auto_generate_candidates_reports_coverage_and_strategies() {
let protocol = test_protocol();
let src = schema_post_with_created(&protocol);
let tgt = schema_message_with_sent(&protocol);
let config = AutoLensConfig {
stringency: Stringency::Balanced,
..Default::default()
};
let candidates = auto_generate_candidates(&src, &tgt, &protocol, &config, 1)
.unwrap_or_else(|e| panic!("candidates: {e}"));
let top = &candidates[0];
assert!(
top.coverage > 0.0 && top.coverage <= 1.0,
"coverage must be in (0, 1]: {}",
top.coverage
);
assert!(
!top.strategies_used.is_empty(),
"Balanced tier should engage at least one strategy"
);
}
#[test]
fn auto_generate_candidates_errors_when_no_morphism() {
use panproto_mig::hom_search::SearchOptions;
let protocol = Protocol {
name: "test".into(),
schema_theory: "ThGraph".into(),
instance_theory: "ThWType".into(),
edge_rules: vec![],
obj_kinds: vec!["alpha".into(), "beta".into()],
constraint_sorts: vec![],
..Protocol::default()
};
let src = SchemaBuilder::new(&protocol)
.vertex("x", "alpha", None::<&str>)
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("y", "beta", None::<&str>)
.unwrap()
.build()
.unwrap();
let config = AutoLensConfig {
stringency: Stringency::Strict,
search_opts: SearchOptions {
monic: true,
..Default::default()
},
..Default::default()
};
let res = auto_generate_candidates(&src, &tgt, &protocol, &config, 1);
assert!(
res.is_err(),
"expected no-morphism error between disjoint-kind schemas, got {res:?}"
);
}
#[test]
fn endofunctor_to_protolens_rejects_add_op_with_no_inputs() {
use panproto_gat::{Operation, TheoryEndofunctor, TheoryTransform};
use std::sync::Arc;
let endo = TheoryEndofunctor {
name: Arc::from("add_op_constant"),
precondition: panproto_gat::TheoryConstraint::Unconstrained,
transform: TheoryTransform::AddOp(Operation {
name: Arc::from("constant"),
inputs: Vec::new(),
output: panproto_gat::SortExpr::Name(Arc::from("int")),
}),
};
let err = endofunctor_to_protolens(&endo)
.expect_err("AddOp with empty inputs must error, not synthesize 'unknown'");
let msg = format!("{err}");
assert!(
msg.contains("no inputs") && msg.contains("constant"),
"error must name the op and the reason; got: {msg}"
);
}
#[test]
fn endofunctor_to_protolens_preserves_add_sort_default_expr() {
use panproto_expr::Expr;
use panproto_gat::{Sort, TheoryEndofunctor, TheoryTransform};
use std::sync::Arc;
let expr = Expr::Lit(panproto_expr::Literal::Int(42));
let endo = TheoryEndofunctor {
name: Arc::from("add_counter"),
precondition: panproto_gat::TheoryConstraint::Unconstrained,
transform: TheoryTransform::AddSortWithDefault {
sort: Sort::simple(Arc::from("counter")),
vertex_kind: Some(Arc::from("integer")),
default_expr: expr.clone(),
},
};
let protolens =
endofunctor_to_protolens(&endo).expect("AddSortWithDefault must produce a protolens");
match &protolens.target.transform {
TheoryTransform::AddSortWithDefault { default_expr, .. } => {
assert_eq!(
default_expr, &expr,
"default_expr must be forwarded verbatim, not replaced with Value::Null"
);
}
other => panic!("expected AddSortWithDefault in target transform, got {other:?}"),
}
}
#[test]
fn auto_generate_with_hints_rejects_nan_quality_threshold() {
let protocol = test_protocol();
let src = schema_v1(&protocol);
let tgt = schema_v2(&protocol);
let result = auto_generate_with_hints(
&src,
&tgt,
&protocol,
&AutoLensConfig::default(),
&HashMap::new(),
&DomainConstraints::default(),
Some(f64::NAN),
);
let Err(err) = result else {
panic!("NaN quality_threshold must be rejected, but got Ok");
};
assert!(
format!("{err}").contains("NaN"),
"error must mention NaN; got: {err}"
);
}
#[test]
fn endofunctor_to_protolens_roundtrips_coerce_sort() {
use panproto_expr::{Expr, Literal};
use panproto_gat::{CoercionClass, TheoryEndofunctor, TheoryTransform, ValueKind};
use std::sync::Arc;
let fwd = Expr::Lit(Literal::Int(1));
let inv = Expr::Lit(Literal::Int(-1));
let endo = TheoryEndofunctor {
name: Arc::from("coerce_counter_to_float"),
precondition: panproto_gat::TheoryConstraint::HasSort(Arc::from("counter")),
transform: TheoryTransform::CoerceSort {
sort_name: Arc::from("counter"),
target_kind: ValueKind::Float,
coercion_expr: fwd.clone(),
inverse_expr: Some(inv.clone()),
coercion_class: CoercionClass::Retraction,
},
};
let protolens =
endofunctor_to_protolens(&endo).expect("CoerceSort must produce a protolens");
match &protolens.target.transform {
TheoryTransform::CoerceSort {
sort_name,
target_kind,
coercion_expr,
inverse_expr,
coercion_class,
} => {
assert_eq!(&**sort_name, "counter");
assert_eq!(*target_kind, ValueKind::Float);
assert_eq!(coercion_expr, &fwd);
assert_eq!(inverse_expr, &Some(inv));
assert_eq!(*coercion_class, CoercionClass::Retraction);
}
other => panic!("expected CoerceSort target transform, got {other:?}"),
}
}
#[test]
fn alignment_to_theory_morphism_is_deterministic_across_hash_iterations() {
use panproto_mig::FoundMorphism;
use panproto_schema::Edge;
let protocol = test_protocol();
let src = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.a", "string", None::<&str>)
.unwrap()
.vertex("r.b", "string", None::<&str>)
.unwrap()
.edge("r", "r.a", "prop", Some("a"))
.unwrap()
.edge("r", "r.b", "prop", Some("b"))
.unwrap()
.build()
.unwrap();
let tgt = src.clone();
let mk = |pairs: &[(&str, &str)]| -> FoundMorphism {
let mut vm = HashMap::new();
for (a, b) in pairs {
vm.insert(Name::from(*a), Name::from(*b));
}
let mut em = HashMap::new();
let e1 = Edge {
src: Name::from("r"),
tgt: Name::from("r.a"),
kind: Name::from("prop"),
name: Some(Name::from("a")),
};
let e2 = Edge {
src: Name::from("r"),
tgt: Name::from("r.b"),
kind: Name::from("prop"),
name: Some(Name::from("b")),
};
em.insert(e1.clone(), e1);
em.insert(e2.clone(), e2);
FoundMorphism {
vertex_map: vm,
edge_map: em,
quality: 1.0,
}
};
let fm_a = mk(&[("r", "r"), ("r.a", "r.a"), ("r.b", "r.b")]);
let fm_b = mk(&[("r.b", "r.b"), ("r.a", "r.a"), ("r", "r")]);
let ma = alignment_to_theory_morphism_mode(&fm_a, &src, &tgt, false);
let mb = alignment_to_theory_morphism_mode(&fm_b, &src, &tgt, false);
let to_sorted = |m: &HashMap<Arc<str>, Arc<str>>| -> Vec<(String, String)> {
let mut out: Vec<_> = m
.iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
out.sort();
out
};
assert_eq!(
to_sorted(&ma.sort_map),
to_sorted(&mb.sort_map),
"sort_map must not depend on vertex_map insertion order"
);
assert_eq!(
to_sorted(&ma.op_map),
to_sorted(&mb.op_map),
"op_map must not depend on edge_map insertion order"
);
}
#[test]
fn lenient_span_search_drops_orphan_source_sorts() {
let protocol = test_protocol();
let src = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.flag", "boolean", None::<&str>)
.unwrap()
.edge("r", "r.flag", "prop", Some("flag"))
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.build()
.unwrap();
let strict = AutoLensConfig {
stringency: Stringency::Strict,
..Default::default()
};
let strict_res = auto_generate(&src, &tgt, &protocol, &strict);
let lenient = AutoLensConfig {
stringency: Stringency::Lenient,
..Default::default()
};
let lenient_res = auto_generate(&src, &tgt, &protocol, &lenient)
.unwrap_or_else(|e| panic!("Lenient should find a span: {e}"));
let has_boolean_drop = lenient_res.chain.steps.iter().any(|step| {
matches!(
&step.target.transform,
TheoryTransform::DropSort(name) if &**name == "boolean"
)
});
assert!(
has_boolean_drop,
"Lenient span should emit DropSort(boolean); chain: {:?}",
lenient_res
.chain
.steps
.iter()
.map(|s| s.name.to_string())
.collect::<Vec<_>>()
);
if let Ok(r) = strict_res {
assert!(
r.chain.steps.iter().all(|step| !matches!(
&step.target.transform,
TheoryTransform::DropSort(name) if &**name == "boolean"
)),
"Strict must not emit a drop step (drops require span search)"
);
}
}
#[test]
fn stringency_thresholds_form_monotone_ladder() {
assert!(
Stringency::Strict.token_similarity_threshold()
>= Stringency::Balanced.token_similarity_threshold()
);
assert!(
Stringency::Balanced.token_similarity_threshold()
>= Stringency::Lenient.token_similarity_threshold()
);
assert!(
Stringency::Lenient.token_similarity_threshold()
>= Stringency::Exploratory.token_similarity_threshold()
);
assert!(!Stringency::Strict.uses_alias_dict());
assert!(Stringency::Balanced.uses_alias_dict());
assert!(Stringency::Lenient.uses_alias_dict());
assert!(Stringency::Exploratory.uses_alias_dict());
}
#[test]
fn auto_generate_between_same_schemas() {
let protocol = test_protocol();
let s = schema_v1(&protocol);
let config = AutoLensConfig::default();
let result = auto_generate(&s, &s, &protocol, &config).unwrap();
assert!(result.chain.is_empty() || result.alignment_quality > 0.0);
}
#[test]
fn auto_generate_between_different_schemas() {
let protocol = test_protocol();
let v1 = schema_v1(&protocol);
let v2 = schema_v2(&protocol);
let config = AutoLensConfig::default();
let result = auto_generate(&v1, &v2, &protocol, &config);
match result {
Ok(r) => {
assert!(!r.chain.is_empty());
assert!(r.alignment_quality > 0.0);
}
Err(e) => {
assert!(e.to_string().contains("morphism"));
}
}
}
#[test]
fn alignment_to_morphism_preserves_kinds() {
let protocol = test_protocol();
let v1 = schema_v1(&protocol);
let v2 = schema_v1(&protocol); let alignment = FoundMorphism {
vertex_map: v1.vertices.keys().map(|k| (k.clone(), k.clone())).collect(),
edge_map: v1.edges.keys().map(|e| (e.clone(), e.clone())).collect(),
quality: 1.0,
};
let morphism = alignment_to_theory_morphism(&alignment, &v1, &v2);
let src_theory = schema_to_implicit_theory(&v1);
for sort in &src_theory.sorts {
assert!(morphism.sort_map.contains_key(&sort.name));
}
}
#[test]
fn protolens_from_identity_alignment() {
let protocol = test_protocol();
let v1 = schema_v1(&protocol);
let alignment = FoundMorphism {
vertex_map: v1.vertices.keys().map(|k| (k.clone(), k.clone())).collect(),
edge_map: v1.edges.keys().map(|e| (e.clone(), e.clone())).collect(),
quality: 1.0,
};
let chain = protolens_from_alignment(&alignment, &v1, &v1).unwrap();
assert!(chain.len() <= 1);
}
#[test]
fn endofunctor_to_protolens_add_sort() {
let ef = TheoryEndofunctor {
name: Arc::from("add_tags"),
precondition: panproto_gat::TheoryConstraint::Unconstrained,
transform: TheoryTransform::AddSort {
sort: Sort::simple("tags"),
vertex_kind: None,
},
};
let p = endofunctor_to_protolens(&ef).unwrap();
assert!(p.name.contains("add_sort"));
}
#[test]
fn endofunctor_to_protolens_drop_sort() {
let ef = TheoryEndofunctor {
name: Arc::from("drop_foo"),
precondition: panproto_gat::TheoryConstraint::HasSort(Arc::from("foo")),
transform: TheoryTransform::DropSort(Arc::from("foo")),
};
let p = endofunctor_to_protolens(&ef).unwrap();
assert!(p.name.contains("drop_sort"));
assert!(!p.is_lossless());
}
#[test]
fn endofunctor_to_protolens_rename() {
let ef = TheoryEndofunctor {
name: Arc::from("rename"),
precondition: panproto_gat::TheoryConstraint::HasSort(Arc::from("old")),
transform: TheoryTransform::RenameSort {
old: Arc::from("old"),
new: Arc::from("new"),
},
};
let p = endofunctor_to_protolens(&ef).unwrap();
assert!(p.is_lossless());
}
#[test]
fn endofunctor_to_protolens_rejects_identity() {
let ef = TheoryEndofunctor {
name: Arc::from("id"),
precondition: panproto_gat::TheoryConstraint::Unconstrained,
transform: TheoryTransform::Identity,
};
assert!(endofunctor_to_protolens(&ef).is_err());
}
#[test]
fn endofunctor_to_protolens_coerce_sort_tags_target_kind_in_name() {
use panproto_expr::{BuiltinOp, Expr};
let v: Arc<str> = Arc::from("v");
let to_str = TheoryEndofunctor {
name: Arc::from("coerce_n_str"),
precondition: panproto_gat::TheoryConstraint::HasSort(Arc::from("n")),
transform: TheoryTransform::CoerceSort {
sort_name: Arc::from("n"),
target_kind: panproto_gat::ValueKind::Str,
coercion_expr: Expr::Builtin(BuiltinOp::IntToStr, vec![Expr::Var(Arc::clone(&v))]),
inverse_expr: None,
coercion_class: panproto_gat::CoercionClass::Retraction,
},
};
let to_float = TheoryEndofunctor {
name: Arc::from("coerce_n_float"),
precondition: panproto_gat::TheoryConstraint::HasSort(Arc::from("n")),
transform: TheoryTransform::CoerceSort {
sort_name: Arc::from("n"),
target_kind: panproto_gat::ValueKind::Float,
coercion_expr: Expr::Builtin(BuiltinOp::IntToFloat, vec![Expr::Var(v)]),
inverse_expr: None,
coercion_class: panproto_gat::CoercionClass::Retraction,
},
};
let p1 = endofunctor_to_protolens(&to_str).unwrap();
let p2 = endofunctor_to_protolens(&to_float).unwrap();
assert_ne!(
p1.name, p2.name,
"CoerceSort protolens names must distinguish target kinds"
);
assert!(
p1.name.as_str().contains("str"),
"expected target kind in name; got {}",
p1.name
);
assert!(
p2.name.as_str().contains("float"),
"expected target kind in name; got {}",
p2.name
);
}
#[test]
fn uses_coerce_is_exploratory_only() {
assert!(!Stringency::Strict.uses_coerce());
assert!(!Stringency::Balanced.uses_coerce());
assert!(!Stringency::Lenient.uses_coerce());
assert!(Stringency::Exploratory.uses_coerce());
}
#[test]
fn sources_without_compatible_targets_is_sorted() {
let protocol = Protocol {
name: "test".into(),
schema_theory: "ThGraph".into(),
instance_theory: "ThWType".into(),
edge_rules: vec![],
obj_kinds: vec![
"record".into(),
"alpha".into(),
"beta".into(),
"gamma".into(),
],
constraint_sorts: vec![],
..Protocol::default()
};
let src = SchemaBuilder::new(&protocol)
.vertex("zeta", "alpha", None::<&str>)
.unwrap()
.vertex("aardvark", "beta", None::<&str>)
.unwrap()
.vertex("mango", "gamma", None::<&str>)
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.build()
.unwrap();
let out = sources_without_compatible_targets(&src, &tgt);
let names: Vec<&str> = out.iter().map(panproto_gat::Name::as_str).collect();
assert_eq!(
names,
vec!["aardvark", "mango", "zeta"],
"HashMap iteration order leaked into output"
);
}
#[test]
fn lenient_partial_kind_coverage_keeps_sort() {
let protocol = test_protocol();
let src = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.keep", "string", None::<&str>)
.unwrap()
.vertex("r.extra", "string", None::<&str>)
.unwrap()
.edge("r", "r.keep", "prop", Some("keep"))
.unwrap()
.edge("r", "r.extra", "prop", Some("extra"))
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.keep", "string", None::<&str>)
.unwrap()
.edge("r", "r.keep", "prop", Some("keep"))
.unwrap()
.build()
.unwrap();
let cfg = AutoLensConfig {
stringency: Stringency::Lenient,
..Default::default()
};
let result = auto_generate(&src, &tgt, &protocol, &cfg).unwrap();
let dropped_string = result.chain.steps.iter().any(|step| {
matches!(
&step.target.transform,
TheoryTransform::DropSort(name) if &**name == "string"
)
});
assert!(
!dropped_string,
"Lenient must not drop the `string` sort when at least one \
target vertex has that kind; chain: {:?}",
result
.chain
.steps
.iter()
.map(|s| s.name.to_string())
.collect::<Vec<_>>()
);
}
#[test]
fn auto_generate_candidates_ordering_is_stable_on_ties() {
let protocol = test_protocol();
let src = schema_post_with_created(&protocol);
let tgt = schema_message_with_sent(&protocol);
let config = AutoLensConfig {
stringency: Stringency::Balanced,
..Default::default()
};
let a = auto_generate_candidates(&src, &tgt, &protocol, &config, 5)
.expect("candidates should exist");
let b = auto_generate_candidates(&src, &tgt, &protocol, &config, 5)
.expect("candidates should exist");
let key = |cands: &[crate::candidate::LensCandidate]| -> Vec<String> {
cands
.iter()
.map(|c| format!("{:.6}:{}", c.score(), chain_step_names(&c.chain)))
.collect()
};
assert_eq!(key(&a), key(&b), "candidate ordering is not deterministic");
}
#[test]
fn exploratory_surfaces_coerce_proposals_on_result() {
let protocol = Protocol {
name: "test".into(),
schema_theory: "ThGraph".into(),
instance_theory: "ThWType".into(),
edge_rules: vec![],
obj_kinds: vec!["record".into(), "integer".into(), "string".into()],
constraint_sorts: vec![],
..Protocol::default()
};
let src = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.n", "integer", None::<&str>)
.unwrap()
.edge("r", "r.n", "prop", Some("n"))
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.n", "string", None::<&str>)
.unwrap()
.edge("r", "r.n", "prop", Some("n"))
.unwrap()
.build()
.unwrap();
let balanced = AutoLensConfig {
stringency: Stringency::Balanced,
..Default::default()
};
let hints = std::collections::HashMap::new();
let dc = panproto_mig::hom_search::DomainConstraints::default();
if let Ok(res) =
auto_generate_with_hints(&src, &tgt, &protocol, &balanced, &hints, &dc, None)
{
assert!(
res.coerce_proposals.is_empty(),
"Balanced must not populate coerce_proposals"
);
}
let exploratory = AutoLensConfig {
stringency: Stringency::Exploratory,
..Default::default()
};
if let Ok(res) =
auto_generate_with_hints(&src, &tgt, &protocol, &exploratory, &hints, &dc, None)
{
assert!(
res.coerce_proposals
.iter()
.any(|p| p.witness_name == "int_to_str"),
"Exploratory should expose int_to_str in coerce_proposals; got {:?}",
res.coerce_proposals
.iter()
.map(|p| p.witness_name.as_str())
.collect::<Vec<_>>()
);
}
}
#[test]
fn stringency_uses_coerce_only_at_exploratory() {
assert!(!Stringency::Strict.uses_coerce());
assert!(!Stringency::Balanced.uses_coerce());
assert!(!Stringency::Lenient.uses_coerce());
assert!(Stringency::Exploratory.uses_coerce());
}
#[test]
fn stringency_display_matches_serde_tokens() {
assert_eq!(Stringency::Strict.to_string(), "strict");
assert_eq!(Stringency::Balanced.to_string(), "balanced");
assert_eq!(Stringency::Lenient.to_string(), "lenient");
assert_eq!(Stringency::Exploratory.to_string(), "exploratory");
for tier in [
Stringency::Strict,
Stringency::Balanced,
Stringency::Lenient,
Stringency::Exploratory,
] {
let wire = serde_json::to_string(&tier).expect("serde");
assert_eq!(
wire.trim_matches('"'),
tier.as_str(),
"Display must match serde output"
);
}
}
#[test]
fn auto_generate_is_deterministic_across_runs() {
let protocol = test_protocol();
let src = schema_post_with_created(&protocol);
let tgt = schema_message_with_sent(&protocol);
let config = AutoLensConfig {
stringency: Stringency::Balanced,
..Default::default()
};
let step_names = |r: &AutoLensResult| -> Vec<String> {
r.chain.steps.iter().map(|s| s.name.to_string()).collect()
};
let baseline = auto_generate(&src, &tgt, &protocol, &config).unwrap();
let baseline_steps = step_names(&baseline);
for i in 0..100 {
let r = auto_generate(&src, &tgt, &protocol, &config).unwrap();
assert_eq!(
step_names(&r),
baseline_steps,
"step order drift at iter {i}"
);
assert!(
(r.alignment_quality - baseline.alignment_quality).abs() < 1e-12,
"quality drift at iter {i}",
);
}
}
#[test]
fn auto_generate_candidates_top_n_zero_returns_one() {
let protocol = test_protocol();
let src = schema_post_with_created(&protocol);
let tgt = schema_message_with_sent(&protocol);
let config = AutoLensConfig {
stringency: Stringency::Balanced,
..Default::default()
};
let c = auto_generate_candidates(&src, &tgt, &protocol, &config, 0)
.unwrap_or_else(|e| panic!("candidates: {e}"));
assert!(
!c.is_empty() && c.len() == 1,
"top_n=0 must yield exactly one candidate, got {}",
c.len()
);
}
#[test]
fn alignment_to_theory_morphism_emit_spans_full_coverage_no_drops() {
use panproto_mig::FoundMorphism;
let protocol = test_protocol();
let s = schema_v1(&protocol);
let alignment = FoundMorphism {
vertex_map: s.vertices.keys().map(|k| (k.clone(), k.clone())).collect(),
edge_map: s.edges.keys().map(|e| (e.clone(), e.clone())).collect(),
quality: 1.0,
};
let chain = protolens_from_alignment_mode(&alignment, &s, &s, true)
.expect("span-mode chain on identity alignment");
assert!(
chain.steps.iter().all(|step| !matches!(
&step.target.transform,
TheoryTransform::DropSort(_) | TheoryTransform::DropOp(_)
)),
"emit_spans=true with complete vertex_map must not drop any sort/op; got {:?}",
chain
.steps
.iter()
.map(|s| s.name.to_string())
.collect::<Vec<_>>()
);
}
#[test]
fn stringency_serde_round_trip() {
for tier in [
Stringency::Strict,
Stringency::Balanced,
Stringency::Lenient,
Stringency::Exploratory,
] {
let wire = serde_json::to_string(&tier).expect("serialize");
let back: Stringency = serde_json::from_str(&wire).expect("deserialize");
assert_eq!(back, tier, "round-trip drift for {tier:?}");
assert_eq!(wire.trim_matches('"'), tier.to_string());
}
}
#[test]
fn auto_generate_surfaces_coerce_proposals_at_exploratory() {
let protocol = Protocol {
name: "test".into(),
schema_theory: "ThGraph".into(),
instance_theory: "ThWType".into(),
edge_rules: vec![],
obj_kinds: vec!["record".into(), "integer".into(), "string".into()],
constraint_sorts: vec![],
..Protocol::default()
};
let src = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.n", "integer", None::<&str>)
.unwrap()
.edge("r", "r.n", "prop", Some("n"))
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.n", "string", None::<&str>)
.unwrap()
.edge("r", "r.n", "prop", Some("n"))
.unwrap()
.build()
.unwrap();
let cfg = AutoLensConfig {
stringency: Stringency::Exploratory,
..Default::default()
};
if let Ok(res) = auto_generate(&src, &tgt, &protocol, &cfg) {
assert!(
res.coerce_proposals
.iter()
.any(|p| p.witness_name == "int_to_str"),
"auto_generate at Exploratory must expose int_to_str in coerce_proposals"
);
}
let balanced = AutoLensConfig {
stringency: Stringency::Balanced,
..Default::default()
};
if let Ok(res) = auto_generate(&src, &tgt, &protocol, &balanced) {
assert!(
res.coerce_proposals.is_empty(),
"Balanced must not populate coerce_proposals via auto_generate"
);
}
}
#[test]
fn score_weights_are_pinned() {
use crate::candidate::{CandidateStep, LensCandidate};
let protocol = test_protocol();
let s = schema_v1(&protocol);
let chain = crate::protolens::ProtolensChain::new(vec![]);
let lens = chain.instantiate(&s, &protocol).unwrap();
let cand = LensCandidate {
chain,
lens,
quality: 1.0,
coverage: 1.0,
seed_anchors: vec![],
steps: vec![CandidateStep {
kind: "k".into(),
explanation: "e".into(),
confidence: 1.0,
strategy: None,
}],
strategies_used: vec![],
};
assert!(
(cand.score() - 1.7).abs() < 1e-9,
"weight drift: expected 1.7, got {}",
cand.score()
);
}
#[test]
fn auto_generate_with_hints_user_anchor_overrides_span_exclusion() {
let protocol = test_protocol();
let src = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.flag", "boolean", None::<&str>)
.unwrap()
.edge("r", "r.flag", "prop", Some("flag"))
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.build()
.unwrap();
let span =
span_exclusions_at_lenient(&src, &tgt, Stringency::Lenient, &HashMap::new(), false)
.expect("Lenient should auto-exclude r.flag");
assert!(
span.excluded_sources.contains(&Name::from("r.flag")),
"span auto-exclusion must name r.flag"
);
let mut anchors: HashMap<Name, Name> = HashMap::new();
anchors.insert(Name::from("r.flag"), Name::from("r"));
let mut merged = DomainConstraints::default();
for src_v in span.excluded_sources {
if anchors.contains_key(&src_v) {
continue;
}
merged.excluded_sources.insert(src_v);
}
assert!(
!merged.excluded_sources.contains(&Name::from("r.flag")),
"user-hinted source must not end up in excluded_sources"
);
}
#[test]
fn auto_generate_with_hints_preserves_caller_excluded_sources() {
let protocol = test_protocol();
let src = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.keep", "string", None::<&str>)
.unwrap()
.vertex("r.extra", "string", None::<&str>)
.unwrap()
.edge("r", "r.keep", "prop", Some("keep"))
.unwrap()
.edge("r", "r.extra", "prop", Some("extra"))
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.keep", "string", None::<&str>)
.unwrap()
.edge("r", "r.keep", "prop", Some("keep"))
.unwrap()
.build()
.unwrap();
let mut dc = DomainConstraints::default();
dc.excluded_sources.insert(Name::from("r.extra"));
let cfg = AutoLensConfig {
stringency: Stringency::Lenient,
..Default::default()
};
let res = auto_generate_with_hints(&src, &tgt, &protocol, &cfg, &HashMap::new(), &dc, None)
.unwrap_or_else(|e| panic!("expected success: {e}"));
let _ = res; }
#[test]
fn auto_generate_candidates_with_hints_user_anchor_overrides_span_exclusion() {
let protocol = test_protocol();
let src = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.vertex("r.flag", "boolean", None::<&str>)
.unwrap()
.edge("r", "r.flag", "prop", Some("flag"))
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("r", "record", None::<&str>)
.unwrap()
.build()
.unwrap();
let span =
span_exclusions_at_lenient(&src, &tgt, Stringency::Lenient, &HashMap::new(), false)
.expect("Lenient should auto-exclude r.flag");
assert!(
span.excluded_sources.contains(&Name::from("r.flag")),
"span auto-exclusion must name r.flag"
);
let mut anchors: HashMap<Name, Name> = HashMap::new();
anchors.insert(Name::from("r.flag"), Name::from("r"));
let cfg = AutoLensConfig {
stringency: Stringency::Lenient,
..Default::default()
};
let mut merged = DomainConstraints::default();
for src_v in span.excluded_sources {
if anchors.contains_key(&src_v) {
continue;
}
merged.excluded_sources.insert(src_v);
}
assert!(
!merged.excluded_sources.contains(&Name::from("r.flag")),
"user-hinted source must not end up in excluded_sources"
);
let _ = auto_generate_candidates_with_hints(
&src,
&tgt,
&protocol,
&cfg,
&anchors,
&DomainConstraints::default(),
1,
);
}
#[test]
fn naturality_feasibility_excludes_sources_with_unmatchable_outgoing_edges() {
let protocol = test_protocol();
let src = SchemaBuilder::new(&protocol)
.vertex("post", "record", None::<&str>)
.unwrap()
.vertex("post.media", "array", None::<&str>)
.unwrap()
.edge("post", "post.media", "blob", Some("media"))
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("doc", "record", None::<&str>)
.unwrap()
.vertex("doc.title", "string", None::<&str>)
.unwrap()
.edge("doc", "doc.title", "prop", Some("title"))
.unwrap()
.build()
.unwrap();
let anchors: HashMap<Name, Name> = HashMap::new();
let kind_only = sources_without_compatible_targets(&src, &tgt);
assert!(
!kind_only.contains(&Name::from("post")),
"kind-only predicate retains `post` because target also has a record kind",
);
let naturality = sources_without_naturality_compatible_targets(
&src, &tgt, &anchors, false,
);
assert!(
naturality.contains(&Name::from("post")),
"naturality predicate must exclude `post`: no target record has a `blob` \
outgoing edge, so no naturality-consistent mapping exists (got {naturality:?})",
);
}
#[test]
fn stronger_predicate_unblocks_csp_when_kind_only_retains_unmatchable_sources() {
let protocol = test_protocol();
let src = SchemaBuilder::new(&protocol)
.vertex("recA", "record", None::<&str>)
.unwrap()
.vertex("recA.name", "string", None::<&str>)
.unwrap()
.vertex("recB", "record", None::<&str>)
.unwrap()
.vertex("recB.media", "array", None::<&str>)
.unwrap()
.edge("recA", "recA.name", "prop", Some("name"))
.unwrap()
.edge("recB", "recB.media", "blob", Some("media"))
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("rec", "record", None::<&str>)
.unwrap()
.vertex("rec.name", "string", None::<&str>)
.unwrap()
.edge("rec", "rec.name", "prop", Some("name"))
.unwrap()
.build()
.unwrap();
let mut anchors: HashMap<Name, Name> = HashMap::new();
anchors.insert(Name::from("recA"), Name::from("rec"));
anchors.insert(Name::from("recA.name"), Name::from("rec.name"));
let naturality = sources_without_naturality_compatible_targets(&src, &tgt, &anchors, false);
assert!(
naturality.contains(&Name::from("recB")),
"recB must be excluded: its `blob` outgoing edge has no target counterpart \
(got {naturality:?})",
);
assert!(
!naturality.contains(&Name::from("recA")),
"recA must survive: its outgoing `prop` edge aligns with the target's \
outgoing `prop` edge on the anchored `rec` vertex (got {naturality:?})",
);
}
fn synthetic_coerce_anchor(witness_name: &str) -> CoerceAnchor {
use panproto_mig::align::StrategyTag;
CoerceAnchor {
anchor: Anchor {
src: Name::from("src_v"),
tgt: Name::from("tgt_v"),
confidence: 0.5,
strategy: StrategyTag::Coerce,
explanation: format!("synthetic coerce anchor for {witness_name}"),
},
witness_name: witness_name.to_owned(),
witness_class: panproto_gat::CoercionClass::Retraction,
}
}
#[test]
fn filter_coerce_proposals_keeps_honest_witness() {
let registry = CoercionSampleRegistry::with_defaults();
let proposals = vec![synthetic_coerce_anchor("int_to_str")];
let (kept, dropped) = filter_coerce_proposals_by_law_check(proposals, ®istry);
assert_eq!(kept.len(), 1);
assert!(dropped.is_empty());
}
#[test]
fn filter_coerce_proposals_drops_unknown_witness_under_drop_policy() {
let registry = CoercionSampleRegistry::with_defaults();
let proposals = vec![synthetic_coerce_anchor("no_such_witness")];
let (kept, dropped) = filter_coerce_proposals_by_law_check_with_policy(
proposals,
®istry,
FilterOptions::with_unknown(UnknownSamplesPolicy::Drop),
);
assert!(kept.is_empty());
assert_eq!(dropped.len(), 1);
}
#[test]
fn filter_coerce_proposals_keeps_unknown_witness_under_keep_policy() {
let registry = CoercionSampleRegistry::with_defaults();
let proposals = vec![synthetic_coerce_anchor("no_such_witness")];
let (kept, dropped) = filter_coerce_proposals_by_law_check_with_policy(
proposals,
®istry,
FilterOptions::with_unknown(UnknownSamplesPolicy::Keep),
);
assert_eq!(kept.len(), 1);
assert!(dropped.is_empty());
}
#[test]
fn filter_coerce_proposals_keeps_missing_samples_under_keep_policy() {
let registry = CoercionSampleRegistry::new();
let proposals = vec![synthetic_coerce_anchor("int_to_str")];
let (kept, dropped) = filter_coerce_proposals_by_law_check_with_policy(
proposals,
®istry,
FilterOptions::with_unknown(UnknownSamplesPolicy::Keep),
);
assert_eq!(kept.len(), 1);
assert!(dropped.is_empty());
}
#[test]
fn filter_coerce_proposals_drops_missing_samples_under_drop_policy() {
let registry = CoercionSampleRegistry::new();
let proposals = vec![synthetic_coerce_anchor("int_to_str")];
let (kept, dropped) = filter_coerce_proposals_by_law_check_with_policy(
proposals,
®istry,
FilterOptions::with_unknown(UnknownSamplesPolicy::Drop),
);
assert!(kept.is_empty());
assert_eq!(dropped.len(), 1);
}
#[test]
fn filter_coerce_proposals_default_entry_point_uses_keep_policy() {
let registry = CoercionSampleRegistry::with_defaults();
let proposals = vec![synthetic_coerce_anchor("no_such_witness")];
let (kept, dropped) = filter_coerce_proposals_by_law_check(proposals, ®istry);
assert_eq!(kept.len(), 1);
assert!(dropped.is_empty());
}
#[test]
fn filter_coerce_proposals_drops_on_law_violation() {
let mut registry = CoercionSampleRegistry::new();
registry.register(
panproto_gat::ValueKind::Int,
vec![panproto_expr::Literal::Str("not an int".to_owned())],
);
let proposals = vec![synthetic_coerce_anchor("int_to_str")];
let (kept, dropped) = filter_coerce_proposals_by_law_check(proposals, ®istry);
assert!(kept.is_empty());
assert_eq!(dropped.len(), 1);
}
#[test]
fn filter_coerce_proposals_no_registry_path_is_identity() {
let registry = CoercionSampleRegistry::new();
let proposals = vec![synthetic_coerce_anchor("int_to_str")];
let (kept, dropped) = filter_coerce_proposals_by_law_check(proposals, ®istry);
assert_eq!(kept.len(), 1);
assert!(dropped.is_empty());
}
#[test]
fn leaf_source_is_kept_when_kind_compatible_target_exists() {
let protocol = test_protocol();
let src = SchemaBuilder::new(&protocol)
.vertex("leaf", "string", None::<&str>)
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("doc", "record", None::<&str>)
.unwrap()
.vertex("doc.title", "string", None::<&str>)
.unwrap()
.edge("doc", "doc.title", "prop", Some("title"))
.unwrap()
.build()
.unwrap();
let anchors: HashMap<Name, Name> = HashMap::new();
let excluded = sources_without_naturality_compatible_targets(&src, &tgt, &anchors, false);
assert!(
!excluded.contains(&Name::from("leaf")),
"leaf source with zero outgoing edges must not be excluded when a \
kind-compatible target exists (got {excluded:?})",
);
}
#[test]
fn leaf_source_is_excluded_when_no_kind_compatible_target_exists() {
let protocol = test_protocol();
let src = SchemaBuilder::new(&protocol)
.vertex("leaf", "string", None::<&str>)
.unwrap()
.build()
.unwrap();
let tgt = SchemaBuilder::new(&protocol)
.vertex("doc", "record", None::<&str>)
.unwrap()
.vertex("items", "array", None::<&str>)
.unwrap()
.build()
.unwrap();
let anchors: HashMap<Name, Name> = HashMap::new();
let excluded = sources_without_naturality_compatible_targets(&src, &tgt, &anchors, false);
assert!(
excluded.contains(&Name::from("leaf")),
"leaf source must be excluded when no target vertex has a compatible kind \
(got {excluded:?})",
);
}
#[test]
fn sources_without_naturality_compatible_targets_scales_on_larger_schemas() {
let protocol = test_protocol();
let size: usize = 30;
let mut src_builder = SchemaBuilder::new(&protocol);
for i in 0..size {
src_builder = src_builder
.vertex(&format!("src_rec_{i}"), "record", None::<&str>)
.unwrap()
.vertex(&format!("src_rec_{i}.name"), "string", None::<&str>)
.unwrap()
.edge(
&format!("src_rec_{i}"),
&format!("src_rec_{i}.name"),
"prop",
Some("name"),
)
.unwrap();
}
let src = src_builder.build().unwrap();
let mut tgt_builder = SchemaBuilder::new(&protocol);
for i in 0..size {
tgt_builder = tgt_builder
.vertex(&format!("tgt_rec_{i}"), "record", None::<&str>)
.unwrap()
.vertex(&format!("tgt_rec_{i}.name"), "string", None::<&str>)
.unwrap()
.edge(
&format!("tgt_rec_{i}"),
&format!("tgt_rec_{i}.name"),
"prop",
Some("name"),
)
.unwrap();
}
let tgt = tgt_builder.build().unwrap();
let anchors: HashMap<Name, Name> = HashMap::new();
let excluded = sources_without_naturality_compatible_targets(&src, &tgt, &anchors, false);
assert!(
excluded.is_empty(),
"no source should be excluded on a symmetric schema pair; got {excluded:?}",
);
}
}