use std::collections::BTreeMap;
use brk_types::TreeNode;
use super::{
find_common_prefix, find_common_suffix, get_node_fields, get_shortest_leaf_name,
normalize_prefix,
};
use crate::{PatternBaseResult, PatternField, PatternMode, StructuralPattern, build_child_path};
#[derive(Debug, Clone)]
struct InstanceAnalysis {
base: String,
field_parts: BTreeMap<String, String>,
is_suffix_mode: bool,
has_outlier: bool,
}
pub fn analyze_pattern_modes(
tree: &TreeNode,
patterns: &mut [StructuralPattern],
pattern_lookup: &BTreeMap<Vec<PatternField>, String>,
) -> BTreeMap<String, PatternBaseResult> {
let mut all_analyses: BTreeMap<String, Vec<InstanceAnalysis>> = BTreeMap::new();
let mut node_bases: BTreeMap<String, PatternBaseResult> = BTreeMap::new();
let mut path_to_pattern: BTreeMap<String, String> = BTreeMap::new();
collect_instance_analyses(
tree,
"",
pattern_lookup,
&mut all_analyses,
&mut node_bases,
&mut path_to_pattern,
);
for pattern in patterns.iter_mut() {
if let Some(analyses) = all_analyses.get(&pattern.name) {
pattern.mode = determine_pattern_mode(analyses, &pattern.fields);
}
}
fill_mixed_empty_field_parts(tree, "", pattern_lookup, patterns, &mut node_bases);
let mut updated_analyses: BTreeMap<String, Vec<InstanceAnalysis>> = BTreeMap::new();
for (path, pattern_name) in &path_to_pattern {
if let Some(br) = node_bases.get(path) {
updated_analyses
.entry(pattern_name.clone())
.or_default()
.push(InstanceAnalysis {
base: br.base.clone(),
field_parts: br.field_parts.clone(),
is_suffix_mode: br.is_suffix_mode,
has_outlier: br.has_outlier,
});
}
}
for pattern in patterns.iter_mut() {
if let Some(analyses) = updated_analyses.get(&pattern.name) {
pattern.mode = determine_pattern_mode(analyses, &pattern.fields);
}
}
node_bases
}
fn fill_mixed_empty_field_parts(
node: &TreeNode,
path: &str,
pattern_lookup: &BTreeMap<Vec<PatternField>, String>,
patterns: &[StructuralPattern],
node_bases: &mut BTreeMap<String, PatternBaseResult>,
) {
let TreeNode::Branch(children) = node else {
return;
};
for (field_name, child_node) in children {
let child_path = build_child_path(path, field_name);
fill_mixed_empty_field_parts(child_node, &child_path, pattern_lookup, patterns, node_bases);
}
let Some(base_result) = node_bases.get(path) else {
return;
};
let has_empty = base_result.field_parts.values().any(|v| v.is_empty());
let has_nonempty = base_result.field_parts.values().any(|v| !v.is_empty());
if !has_empty || !has_nonempty {
return;
}
let prefix = format!("{}_", base_result.base);
let mut updates: Vec<(String, String)> = Vec::new();
for (field_name, child_node) in children {
let part = base_result.field_parts.get(field_name.as_str());
if !part.is_some_and(|p| p.is_empty()) {
continue;
}
let child_pattern_is_templated = if let TreeNode::Branch(ch) = child_node {
let child_fields = get_node_fields(ch, pattern_lookup);
pattern_lookup
.get(&child_fields)
.and_then(|name| patterns.iter().find(|p| &p.name == name))
.is_some_and(|p| p.is_templated())
} else {
false
};
let is_leaf = matches!(child_node, TreeNode::Leaf(_));
if !child_pattern_is_templated && !is_leaf {
continue;
}
if let Some(leaf) = get_shortest_leaf_name(child_node)
&& let Some(suffix) = leaf.strip_prefix(&prefix)
&& !suffix.is_empty()
&& suffix.contains(field_name.trim_start_matches('_'))
&& suffix.len() >= field_name.trim_start_matches('_').len()
{
updates.push((field_name.clone(), suffix.to_string()));
}
}
if !updates.is_empty() {
let base_result = node_bases.get_mut(path).unwrap();
for (field_name, suffix) in updates {
base_result.field_parts.insert(field_name, suffix);
}
}
}
fn collect_instance_analyses(
node: &TreeNode,
path: &str,
pattern_lookup: &BTreeMap<Vec<PatternField>, String>,
all_analyses: &mut BTreeMap<String, Vec<InstanceAnalysis>>,
node_bases: &mut BTreeMap<String, PatternBaseResult>,
path_to_pattern: &mut BTreeMap<String, String>,
) -> Option<String> {
match node {
TreeNode::Leaf(leaf) => {
Some(leaf.name().to_string())
}
TreeNode::Branch(children) => {
let mut child_bases: BTreeMap<String, String> = BTreeMap::new();
for (field_name, child_node) in children {
let child_path = build_child_path(path, field_name);
if let Some(base) = collect_instance_analyses(
child_node,
&child_path,
pattern_lookup,
all_analyses,
node_bases,
path_to_pattern,
) {
child_bases.insert(field_name.clone(), base);
}
}
if child_bases.is_empty() {
return None;
}
let mut analysis = analyze_instance(&child_bases);
let all_empty = analysis.field_parts.len() > 1
&& analysis.field_parts.values().all(|v| v.is_empty());
if all_empty {
let prefix = format!("{}_", analysis.base);
let mut any_filled = false;
for (field_name, child_node) in children {
if let Some(part) = analysis.field_parts.get(field_name)
&& part.is_empty()
&& let Some(leaf) = get_shortest_leaf_name(child_node)
&& let Some(suffix) = leaf.strip_prefix(&prefix)
&& !suffix.is_empty()
&& suffix.starts_with(field_name.trim_start_matches('_'))
{
analysis
.field_parts
.insert(field_name.clone(), suffix.to_string());
any_filled = true;
}
}
if !any_filled {
let child_fields = get_node_fields(children, pattern_lookup);
let all_same_type = child_fields
.windows(2)
.all(|w| w[0].rust_type == w[1].rust_type);
if all_same_type {
analysis.has_outlier = true;
}
}
}
node_bases.insert(
path.to_string(),
PatternBaseResult {
base: analysis.base.clone(),
has_outlier: analysis.has_outlier,
is_suffix_mode: analysis.is_suffix_mode,
field_parts: analysis.field_parts.clone(),
},
);
let fields = get_node_fields(children, pattern_lookup);
if let Some(pattern_name) = pattern_lookup.get(&fields) {
path_to_pattern.insert(path.to_string(), pattern_name.clone());
all_analyses
.entry(pattern_name.clone())
.or_default()
.push(analysis.clone());
}
if analysis.has_outlier {
Some(get_shortest_leaf_name(node).unwrap_or(analysis.base))
} else {
Some(analysis.base)
}
}
}
}
fn try_detect_template(
majority: &[&InstanceAnalysis],
fields: &[PatternField],
) -> Option<PatternMode> {
if majority.len() < 2 {
return None;
}
if let Some(mode) = try_suffix_disc(majority, fields) {
return Some(mode);
}
try_embedded_disc(majority, fields)
}
fn try_embedded_disc(
majority: &[&InstanceAnalysis],
fields: &[PatternField],
) -> Option<PatternMode> {
let first = &majority[0];
let second = &majority[1];
let disc_field = fields
.iter()
.filter_map(|f| first.field_parts.get(&f.name).map(|v| (&f.name, v)))
.filter(|(_, v)| !v.is_empty())
.min_by_key(|(_, v)| v.len())?;
let disc_first = disc_field.1;
let disc_second = second.field_parts.get(disc_field.0)?;
if disc_first == disc_second || disc_first.is_empty() || disc_second.is_empty() {
return None;
}
let mut templates = BTreeMap::new();
for field in fields {
let part = first.field_parts.get(&field.name)?;
let template = part.replacen(disc_first, "{disc}", 1);
templates.insert(field.name.clone(), template);
}
for analysis in majority {
let inst_disc = analysis.field_parts.get(disc_field.0)?;
for field in fields {
let part = analysis.field_parts.get(&field.name)?;
let expected = templates.get(&field.name)?.replace("{disc}", inst_disc);
if part != &expected {
return None;
}
}
}
Some(PatternMode::Templated { templates })
}
fn try_suffix_disc(
majority: &[&InstanceAnalysis],
fields: &[PatternField],
) -> Option<PatternMode> {
let first = &majority[0];
let ref_field = fields
.iter()
.find(|f| first.field_parts.get(&f.name).is_some_and(|v| !v.is_empty()))
.map(|f| &f.name)?;
let ref_first = first.field_parts.get(ref_field)?;
let mut templates = BTreeMap::new();
for field in fields {
let part = first.field_parts.get(&field.name)?;
if part.is_empty() {
templates.insert(field.name.clone(), String::new());
} else {
templates.insert(field.name.clone(), format!("{part}{{disc}}"));
}
}
for analysis in &majority[1..] {
let ref_other = analysis.field_parts.get(ref_field)?;
let suffix = ref_other.strip_prefix(ref_first)?;
for field in fields {
let first_part = first.field_parts.get(&field.name)?;
let other_part = analysis.field_parts.get(&field.name)?;
if first_part.is_empty() {
if other_part.is_empty() {
} else if other_part == suffix {
templates.insert(field.name.clone(), "{disc}".to_string());
} else {
return None;
}
} else {
let expected = format!("{first_part}{suffix}");
if other_part != &expected {
return None;
}
}
}
}
Some(PatternMode::Templated { templates })
}
fn analyze_instance(child_bases: &BTreeMap<String, String>) -> InstanceAnalysis {
let bases: Vec<&str> = child_bases.values().map(|s| s.as_str()).collect();
if let Some(common_prefix) = find_common_prefix(&bases) {
let base = common_prefix.trim_end_matches('_').to_string();
let mut field_parts = BTreeMap::new();
for (field_name, child_base) in child_bases {
let relative = if child_base == &base {
String::new()
} else {
child_base
.strip_prefix(&common_prefix)
.unwrap_or(child_base)
.to_string()
};
field_parts.insert(field_name.clone(), relative);
}
return InstanceAnalysis {
base,
field_parts,
is_suffix_mode: true,
has_outlier: false,
};
}
if let Some(common_suffix) = find_common_suffix(&bases) {
let base = common_suffix.trim_start_matches('_').to_string();
let mut field_parts = BTreeMap::new();
for (field_name, child_base) in child_bases {
let prefix = child_base
.strip_suffix(&common_suffix)
.map(normalize_prefix)
.unwrap_or_default();
field_parts.insert(field_name.clone(), prefix);
}
return InstanceAnalysis {
base,
field_parts,
is_suffix_mode: false,
has_outlier: false,
};
}
let field_parts = child_bases
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
InstanceAnalysis {
base: String::new(),
field_parts,
is_suffix_mode: true,
has_outlier: true,
}
}
fn determine_pattern_mode(
analyses: &[InstanceAnalysis],
fields: &[PatternField],
) -> Option<PatternMode> {
analyses.first()?;
let non_outlier: Vec<&InstanceAnalysis> = analyses.iter().filter(|a| !a.has_outlier).collect();
if non_outlier.is_empty() {
return None;
}
let suffix_count = non_outlier.iter().filter(|a| a.is_suffix_mode).count();
let is_suffix = suffix_count * 2 >= non_outlier.len();
let majority: Vec<&InstanceAnalysis> = non_outlier
.into_iter()
.filter(|a| a.is_suffix_mode == is_suffix)
.collect();
let first_majority = majority.first()?;
for field in fields {
if !first_majority.field_parts.contains_key(&field.name) {
return None;
}
}
if majority
.iter()
.all(|a| a.field_parts == first_majority.field_parts)
{
let field_parts = first_majority.field_parts.clone();
return if is_suffix {
Some(PatternMode::Suffix {
relatives: field_parts,
})
} else {
Some(PatternMode::Prefix {
prefixes: field_parts,
})
};
}
if is_suffix {
try_detect_template(&majority, fields)
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_analyze_instance_suffix_mode() {
let mut child_bases = BTreeMap::new();
child_bases.insert("max".to_string(), "lth_cost_basis_max".to_string());
child_bases.insert("min".to_string(), "lth_cost_basis_min".to_string());
child_bases.insert("percentiles".to_string(), "lth_cost_basis".to_string());
let analysis = analyze_instance(&child_bases);
assert!(analysis.is_suffix_mode);
assert_eq!(analysis.base, "lth_cost_basis");
assert_eq!(analysis.field_parts.get("max"), Some(&"max".to_string()));
assert_eq!(analysis.field_parts.get("min"), Some(&"min".to_string()));
assert_eq!(
analysis.field_parts.get("percentiles"),
Some(&"".to_string())
);
}
#[test]
fn test_analyze_instance_prefix_mode() {
let mut child_bases = BTreeMap::new();
child_bases.insert("_1y".to_string(), "1y_lump_sum_stack".to_string());
child_bases.insert("_1m".to_string(), "1m_lump_sum_stack".to_string());
child_bases.insert("_1w".to_string(), "1w_lump_sum_stack".to_string());
let analysis = analyze_instance(&child_bases);
assert!(!analysis.is_suffix_mode);
assert_eq!(analysis.base, "lump_sum_stack");
assert_eq!(analysis.field_parts.get("_1y"), Some(&"1y_".to_string()));
assert_eq!(analysis.field_parts.get("_1m"), Some(&"1m_".to_string()));
assert_eq!(analysis.field_parts.get("_1w"), Some(&"1w_".to_string()));
}
#[test]
fn test_analyze_instance_root_suffix() {
let mut child_bases = BTreeMap::new();
child_bases.insert("max".to_string(), "cost_basis_max".to_string());
child_bases.insert("min".to_string(), "cost_basis_min".to_string());
child_bases.insert("percentiles".to_string(), "cost_basis".to_string());
let analysis = analyze_instance(&child_bases);
assert!(analysis.is_suffix_mode);
assert_eq!(analysis.base, "cost_basis");
assert_eq!(analysis.field_parts.get("max"), Some(&"max".to_string()));
assert_eq!(analysis.field_parts.get("min"), Some(&"min".to_string()));
assert_eq!(
analysis.field_parts.get("percentiles"),
Some(&"".to_string())
);
}
#[test]
fn test_determine_pattern_mode_majority_voting() {
use std::collections::BTreeSet;
let fields = vec![
PatternField {
name: "max".to_string(),
rust_type: "TestType".to_string(),
json_type: "number".to_string(),
indexes: BTreeSet::new(),
type_param: None,
},
PatternField {
name: "min".to_string(),
rust_type: "TestType".to_string(),
json_type: "number".to_string(),
indexes: BTreeSet::new(),
type_param: None,
},
PatternField {
name: "percentiles".to_string(),
rust_type: "TestType".to_string(),
json_type: "number".to_string(),
indexes: BTreeSet::new(),
type_param: None,
},
];
let suffix1 = InstanceAnalysis {
base: "lth_cost_basis".to_string(),
field_parts: [
("max".to_string(), "max".to_string()),
("min".to_string(), "min".to_string()),
("percentiles".to_string(), "".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: true,
has_outlier: false,
};
let suffix2 = InstanceAnalysis {
base: "sth_cost_basis".to_string(),
field_parts: [
("max".to_string(), "max".to_string()),
("min".to_string(), "min".to_string()),
("percentiles".to_string(), "".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: true,
has_outlier: false,
};
let suffix3 = InstanceAnalysis {
base: "utxo_cost_basis".to_string(),
field_parts: [
("max".to_string(), "max".to_string()),
("min".to_string(), "min".to_string()),
("percentiles".to_string(), "".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: true,
has_outlier: false,
};
let prefix1 = InstanceAnalysis {
base: "cost_basis".to_string(),
field_parts: [
("max".to_string(), "max_".to_string()),
("min".to_string(), "min_".to_string()),
("percentiles".to_string(), "".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: false,
has_outlier: false,
};
let analyses = vec![suffix1, suffix2, suffix3, prefix1];
let mode = determine_pattern_mode(&analyses, &fields);
assert!(mode.is_some());
match mode.unwrap() {
PatternMode::Suffix { relatives } => {
assert_eq!(relatives.get("max"), Some(&"max".to_string()));
assert_eq!(relatives.get("min"), Some(&"min".to_string()));
assert_eq!(relatives.get("percentiles"), Some(&"".to_string()));
}
PatternMode::Prefix { .. } => panic!("Expected suffix mode, got prefix mode"),
PatternMode::Templated { .. } => panic!("Expected suffix mode, got templated mode"),
}
}
#[test]
fn test_determine_pattern_mode_all_same() {
use std::collections::BTreeSet;
let fields = vec![
PatternField {
name: "max".to_string(),
rust_type: "TestType".to_string(),
json_type: "number".to_string(),
indexes: BTreeSet::new(),
type_param: None,
},
PatternField {
name: "min".to_string(),
rust_type: "TestType".to_string(),
json_type: "number".to_string(),
indexes: BTreeSet::new(),
type_param: None,
},
];
let instance1 = InstanceAnalysis {
base: "series_a".to_string(),
field_parts: [
("max".to_string(), "max".to_string()),
("min".to_string(), "min".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: true,
has_outlier: false,
};
let instance2 = InstanceAnalysis {
base: "series_b".to_string(),
field_parts: [
("max".to_string(), "max".to_string()),
("min".to_string(), "min".to_string()),
]
.into_iter()
.collect(),
is_suffix_mode: true,
has_outlier: false,
};
let analyses = vec![instance1, instance2];
let mode = determine_pattern_mode(&analyses, &fields);
assert!(mode.is_some());
match mode.unwrap() {
PatternMode::Suffix { relatives } => {
assert_eq!(relatives.get("max"), Some(&"max".to_string()));
assert_eq!(relatives.get("min"), Some(&"min".to_string()));
}
PatternMode::Prefix { .. } => panic!("Expected suffix mode"),
PatternMode::Templated { .. } => panic!("Expected suffix mode, got templated"),
}
}
#[test]
fn test_embedded_disc_percentile_bands() {
use std::collections::BTreeSet;
let fields = vec![
PatternField { name: "bps".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "price".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "ratio".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
];
let pct99 = InstanceAnalysis {
base: "realized_price".into(),
field_parts: [("bps".into(), "ratio_pct99_bps".into()), ("price".into(), "pct99".into()), ("ratio".into(), "ratio_pct99".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let pct1 = InstanceAnalysis {
base: "realized_price".into(),
field_parts: [("bps".into(), "ratio_pct1_bps".into()), ("price".into(), "pct1".into()), ("ratio".into(), "ratio_pct1".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let mode = determine_pattern_mode(&[pct99, pct1], &fields);
assert!(mode.is_some());
match mode.unwrap() {
PatternMode::Templated { templates } => {
assert_eq!(templates.get("bps").unwrap(), "ratio_{disc}_bps");
assert_eq!(templates.get("price").unwrap(), "{disc}");
assert_eq!(templates.get("ratio").unwrap(), "ratio_{disc}");
}
other => panic!("Expected Templated, got {:?}", other),
}
}
#[test]
fn test_suffix_disc_period_windows() {
use std::collections::BTreeSet;
let fields = vec![
PatternField { name: "p1sd".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "sd".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "zscore".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
];
let all_time = InstanceAnalysis {
base: "realized_price".into(),
field_parts: [("p1sd".into(), "p1sd".into()), ("sd".into(), "ratio_sd".into()), ("zscore".into(), "ratio_zscore".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let four_year = InstanceAnalysis {
base: "realized_price".into(),
field_parts: [("p1sd".into(), "p1sd_4y".into()), ("sd".into(), "ratio_sd_4y".into()), ("zscore".into(), "ratio_zscore_4y".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let mode = determine_pattern_mode(&[all_time, four_year], &fields);
assert!(mode.is_some());
match mode.unwrap() {
PatternMode::Templated { templates } => {
assert_eq!(templates.get("p1sd").unwrap(), "p1sd{disc}");
assert_eq!(templates.get("sd").unwrap(), "ratio_sd{disc}");
assert_eq!(templates.get("zscore").unwrap(), "ratio_zscore{disc}");
}
other => panic!("Expected Templated, got {:?}", other),
}
}
#[test]
fn test_suffix_disc_with_empty_fields() {
use std::collections::BTreeSet;
let fields = vec![
PatternField { name: "band".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "sd".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
];
let all_time = InstanceAnalysis {
base: "price".into(),
field_parts: [("band".into(), "".into()), ("sd".into(), "ratio_sd".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let four_year = InstanceAnalysis {
base: "price".into(),
field_parts: [("band".into(), "".into()), ("sd".into(), "ratio_sd_4y".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let mode = determine_pattern_mode(&[all_time, four_year], &fields);
assert!(mode.is_some());
match mode.unwrap() {
PatternMode::Templated { templates } => {
assert_eq!(templates.get("band").unwrap(), "");
assert_eq!(templates.get("sd").unwrap(), "ratio_sd{disc}");
}
other => panic!("Expected Templated, got {:?}", other),
}
}
#[test]
fn test_suffix_disc_empty_to_nonempty() {
use std::collections::BTreeSet;
let fields = vec![
PatternField { name: "all".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "sth".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
];
let regular = InstanceAnalysis {
base: "supply".into(),
field_parts: [("all".into(), "".into()), ("sth".into(), "sth_".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let profitability = InstanceAnalysis {
base: "utxos_in_profit".into(),
field_parts: [("all".into(), "supply".into()), ("sth".into(), "sth_supply".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let mode = determine_pattern_mode(&[regular, profitability], &fields);
assert!(mode.is_some());
match mode.unwrap() {
PatternMode::Templated { templates } => {
assert_eq!(templates.get("all").unwrap(), "{disc}");
assert_eq!(templates.get("sth").unwrap(), "sth_{disc}");
}
other => panic!("Expected Templated, got {:?}", other),
}
}
#[test]
fn test_outlier_rejects_pattern() {
use std::collections::BTreeSet;
let fields = vec![
PatternField { name: "ratio".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "value".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
];
let normal = InstanceAnalysis {
base: "series".into(),
field_parts: [("ratio".into(), "ratio".into()), ("value".into(), "value".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let outlier = InstanceAnalysis {
base: "".into(),
field_parts: [("ratio".into(), "asopr".into()), ("value".into(), "adj_value".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: true,
};
let mode = determine_pattern_mode(&[normal, outlier], &fields);
assert!(mode.is_some(), "Outlier should be filtered out, leaving a valid pattern from non-outlier instances");
}
#[test]
fn test_unanimity_rejects_disagreeing_instances() {
use std::collections::BTreeSet;
let fields = vec![
PatternField { name: "a".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "b".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
];
let inst1 = InstanceAnalysis {
base: "x".into(),
field_parts: [("a".into(), "foo".into()), ("b".into(), "bar".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let inst2 = InstanceAnalysis {
base: "y".into(),
field_parts: [("a".into(), "baz".into()), ("b".into(), "qux".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let mode = determine_pattern_mode(&[inst1, inst2], &fields);
assert!(mode.is_none(), "Should be non-parameterizable when no pattern detected");
}
#[test]
fn test_all_empty_different_types_uses_identity() {
use std::collections::BTreeSet;
let fields = vec![
PatternField { name: "absolute".into(), rust_type: "TypeA".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "rate".into(), rust_type: "TypeB".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
];
let inst = InstanceAnalysis {
base: "supply_delta".into(),
field_parts: [("absolute".into(), "".into()), ("rate".into(), "".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: false,
};
let mode = determine_pattern_mode(&[inst], &fields);
assert!(mode.is_some());
match mode.unwrap() {
PatternMode::Suffix { relatives } => {
assert_eq!(relatives.get("absolute"), Some(&"".to_string()), "absolute should be identity");
assert_eq!(relatives.get("rate"), Some(&"".to_string()), "rate should be identity");
}
other => panic!("Expected Suffix with identity, got {:?}", other),
}
}
#[test]
fn test_all_empty_same_type_marks_outlier() {
let mut child_bases = BTreeMap::new();
child_bases.insert("all".to_string(), "realized_price".to_string());
child_bases.insert("_4y".to_string(), "realized_price".to_string());
child_bases.insert("_2y".to_string(), "realized_price".to_string());
child_bases.insert("_1y".to_string(), "realized_price".to_string());
let analysis = analyze_instance(&child_bases);
assert_eq!(analysis.base, "realized_price");
assert!(
analysis.field_parts.values().all(|v| v.is_empty()),
"All field_parts should be empty when children return same base"
);
}
#[test]
fn test_non_parameterizable_cascade() {
use std::collections::BTreeSet;
let fields = vec![
PatternField { name: "a".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
];
let inst = InstanceAnalysis {
base: "".into(),
field_parts: [("a".into(), "standalone_name".into())].into_iter().collect(),
is_suffix_mode: true, has_outlier: true,
};
let mode = determine_pattern_mode(&[inst], &fields);
assert!(mode.is_none(), "Pattern with outlier should be non-parameterizable");
}
#[test]
fn test_extract_disc_from_instance() {
use crate::StructuralPattern;
use std::collections::BTreeSet;
let pattern = StructuralPattern {
name: "TestPattern".into(),
fields: vec![
PatternField { name: "_0sd".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "p1sd".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
PatternField { name: "sd".into(), rust_type: "T".into(), json_type: "n".into(), indexes: BTreeSet::new(), type_param: None },
],
mode: Some(PatternMode::Templated {
templates: [
("_0sd".into(), "0sd{disc}".into()),
("p1sd".into(), "p1sd{disc}".into()),
("sd".into(), "ratio_sd{disc}".into()),
]
.into_iter()
.collect(),
}),
is_generic: false,
};
let field_parts_4y: BTreeMap<String, String> = [
("_0sd".into(), "0sd_4y".into()),
("p1sd".into(), "p1sd_4y".into()),
("sd".into(), "ratio_sd_4y".into()),
]
.into_iter()
.collect();
let disc = pattern.extract_disc_from_instance(&field_parts_4y);
assert_eq!(disc, Some("4y".to_string()));
let field_parts_all: BTreeMap<String, String> = [
("_0sd".into(), "0sd".into()),
("p1sd".into(), "p1sd".into()),
("sd".into(), "ratio_sd".into()),
]
.into_iter()
.collect();
let disc = pattern.extract_disc_from_instance(&field_parts_all);
assert_eq!(disc, Some(String::new()));
}
#[test]
fn test_mixed_empty_fills_with_longer_suffix() {
let mut child_bases = BTreeMap::new();
child_bases.insert("cap".to_string(), "utxos_realized_cap".to_string());
child_bases.insert("loss".to_string(), "utxos".to_string()); child_bases.insert("mvrv".to_string(), "utxos_mvrv".to_string());
child_bases.insert("price".to_string(), "utxos_realized_price".to_string());
child_bases.insert("supply".to_string(), "utxos".to_string());
let analysis = analyze_instance(&child_bases);
assert_eq!(analysis.base, "utxos");
assert_eq!(analysis.field_parts.get("loss"), Some(&"".to_string()));
assert_eq!(analysis.field_parts.get("supply"), Some(&"".to_string()));
assert_eq!(analysis.field_parts.get("cap"), Some(&"realized_cap".to_string()));
assert_eq!(analysis.field_parts.get("mvrv"), Some(&"mvrv".to_string()));
assert_eq!(analysis.field_parts.get("price"), Some(&"realized_price".to_string()));
}
#[test]
fn test_loss_with_neg_suffix_has_correct_field_parts() {
use brk_types::{SeriesLeaf, SeriesLeafWithSchema, TreeNode};
fn leaf(name: &str) -> TreeNode {
TreeNode::Leaf(SeriesLeafWithSchema::new(
SeriesLeaf::new(name.into(), "f32".into(), std::collections::BTreeSet::new()),
serde_json::Value::Null,
))
}
let parent = TreeNode::Branch(
[
("cap".into(), leaf("utxos_realized_cap")),
(
"loss".into(),
TreeNode::Branch(
[
("base".into(), leaf("utxos_realized_loss")),
("negative".into(), leaf("utxos_realized_loss_neg")),
]
.into_iter()
.collect(),
),
),
("mvrv".into(), leaf("utxos_mvrv")),
]
.into_iter()
.collect(),
);
let mut all_analyses = BTreeMap::new();
let mut node_bases = BTreeMap::new();
let mut path_to_pattern = BTreeMap::new();
let pattern_lookup = BTreeMap::new();
collect_instance_analyses(
&parent,
"test",
&pattern_lookup,
&mut all_analyses,
&mut node_bases,
&mut path_to_pattern,
);
let result = node_bases.get("test").expect("should have node_bases entry");
assert_eq!(result.base, "utxos");
assert!(!result.has_outlier);
assert_eq!(result.field_parts.get("cap"), Some(&"realized_cap".to_string()));
assert_eq!(result.field_parts.get("mvrv"), Some(&"mvrv".to_string()));
assert_eq!(result.field_parts.get("loss"), Some(&"realized_loss".to_string()));
}
}