1use std::collections::{BTreeMap, BTreeSet};
7
8use brk_types::{Index, TreeNode, extract_json_type};
9use indexmap::IndexMap;
10
11use crate::{IndexSetPattern, PatternField, child_type_name};
12
13use super::{find_common_prefix, find_common_suffix, normalize_prefix};
14
15pub(super) fn get_shortest_leaf_name(node: &TreeNode) -> Option<String> {
20 match node {
21 TreeNode::Leaf(leaf) => Some(leaf.name().to_string()),
22 TreeNode::Branch(children) => children
23 .values()
24 .filter_map(get_shortest_leaf_name)
25 .min_by_key(|name| name.len()),
26 }
27}
28
29pub fn get_node_fields(
32 children: &IndexMap<String, TreeNode>,
33 pattern_lookup: &BTreeMap<Vec<PatternField>, String>,
34) -> Vec<PatternField> {
35 let mut fields: Vec<PatternField> = children
36 .iter()
37 .map(|(name, node)| {
38 let (rust_type, json_type, indexes) = match node {
39 TreeNode::Leaf(leaf) => (
40 leaf.kind().to_string(),
41 extract_json_type(&leaf.schema),
42 leaf.indexes().clone(),
43 ),
44 TreeNode::Branch(grandchildren) => {
45 let child_fields = get_node_fields(grandchildren, pattern_lookup);
46 let pattern_name = pattern_lookup
47 .get(&child_fields)
48 .cloned()
49 .unwrap_or_else(|| "Unknown".to_string());
50 (pattern_name.clone(), pattern_name, BTreeSet::new())
51 }
52 };
53 PatternField {
54 name: name.clone(),
55 rust_type,
56 json_type,
57 indexes,
58 type_param: None,
59 }
60 })
61 .collect();
62 fields.sort_by(|a, b| a.name.cmp(&b.name));
64 fields
65}
66
67pub fn detect_index_patterns(tree: &TreeNode) -> Vec<IndexSetPattern> {
69 let mut unique_index_sets: BTreeSet<BTreeSet<Index>> = BTreeSet::new();
70 collect_index_sets_from_tree(tree, &mut unique_index_sets);
71
72 let mut sorted_sets: Vec<_> = unique_index_sets
74 .into_iter()
75 .filter(|indexes| !indexes.is_empty())
76 .collect();
77 sorted_sets.sort_by(|a, b| {
78 b.len()
79 .cmp(&a.len())
80 .then_with(|| a.iter().next().cmp(&b.iter().next()))
81 });
82
83 sorted_sets
85 .into_iter()
86 .enumerate()
87 .map(|(i, indexes)| IndexSetPattern {
88 name: format!("SeriesPattern{}", i + 1),
89 indexes,
90 })
91 .collect()
92}
93
94fn collect_index_sets_from_tree(
95 node: &TreeNode,
96 unique_index_sets: &mut BTreeSet<BTreeSet<Index>>,
97) {
98 match node {
99 TreeNode::Leaf(leaf) => {
100 unique_index_sets.insert(leaf.indexes().clone());
101 }
102 TreeNode::Branch(children) => {
103 for child in children.values() {
104 collect_index_sets_from_tree(child, unique_index_sets);
105 }
106 }
107 }
108}
109
110#[derive(Debug, Clone)]
112pub struct PatternBaseResult {
113 pub base: String,
115 pub has_outlier: bool,
118 pub is_suffix_mode: bool,
121 pub field_parts: BTreeMap<String, String>,
124}
125
126impl PatternBaseResult {
127 pub fn force_inline() -> Self {
130 Self {
131 base: String::new(),
132 has_outlier: true,
133 is_suffix_mode: true,
134 field_parts: BTreeMap::new(),
135 }
136 }
137
138 pub fn empty() -> Self {
141 Self {
142 base: String::new(),
143 has_outlier: false,
144 is_suffix_mode: true,
145 field_parts: BTreeMap::new(),
146 }
147 }
148}
149
150pub fn get_pattern_instance_base(node: &TreeNode) -> PatternBaseResult {
160 let child_names = get_direct_children_for_analysis(node);
161 if child_names.is_empty() {
162 return PatternBaseResult::empty();
163 }
164
165 if let Some(result) = try_find_base(&child_names, false) {
167 return PatternBaseResult {
168 base: result.base,
169 has_outlier: result.has_outlier,
170 is_suffix_mode: result.is_suffix_mode,
171 field_parts: result.field_parts,
172 };
173 }
174
175 if child_names.len() > 2 {
177 for i in 0..child_names.len() {
178 let filtered: Vec<_> = child_names
179 .iter()
180 .enumerate()
181 .filter(|(j, _)| *j != i)
182 .map(|(_, v)| v.clone())
183 .collect();
184
185 if let Some(result) = try_find_base(&filtered, true) {
186 return PatternBaseResult {
187 base: result.base,
188 has_outlier: true,
189 is_suffix_mode: result.is_suffix_mode,
190 field_parts: result.field_parts,
191 };
192 }
193 }
194 }
195
196 PatternBaseResult::empty()
199}
200
201struct FindBaseResult {
203 base: String,
204 has_outlier: bool,
205 is_suffix_mode: bool,
206 field_parts: BTreeMap<String, String>,
207}
208
209fn try_find_base(
212 child_names: &[(String, String)],
213 is_outlier_attempt: bool,
214) -> Option<FindBaseResult> {
215 let leaf_names: Vec<&str> = child_names.iter().map(|(_, n)| n.as_str()).collect();
216
217 if let Some(prefix) = find_common_prefix(&leaf_names) {
219 let base = prefix.trim_end_matches('_').to_string();
220 let mut field_parts = BTreeMap::new();
221 for (field_name, leaf_name) in child_names {
222 let suffix = if leaf_name == &base {
224 String::new()
225 } else {
226 leaf_name
227 .strip_prefix(&prefix)
228 .unwrap_or(leaf_name)
229 .to_string()
230 };
231 field_parts.insert(field_name.clone(), suffix);
232 }
233 return Some(FindBaseResult {
234 base,
235 has_outlier: is_outlier_attempt,
236 is_suffix_mode: true,
237 field_parts,
238 });
239 }
240
241 if let Some(suffix) = find_common_suffix(&leaf_names) {
243 let base = suffix.trim_start_matches('_').to_string();
244 let mut field_parts = BTreeMap::new();
245 for (field_name, leaf_name) in child_names {
246 let prefix_part = leaf_name
248 .strip_suffix(&suffix)
249 .map(normalize_prefix)
250 .unwrap_or_default();
251 field_parts.insert(field_name.clone(), prefix_part);
252 }
253 return Some(FindBaseResult {
254 base,
255 has_outlier: is_outlier_attempt,
256 is_suffix_mode: false,
257 field_parts,
258 });
259 }
260
261 None
262}
263
264fn get_direct_children_for_analysis(node: &TreeNode) -> Vec<(String, String)> {
269 match node {
270 TreeNode::Leaf(leaf) => vec![(leaf.name().to_string(), leaf.name().to_string())],
271 TreeNode::Branch(children) => children
272 .iter()
273 .filter_map(|(field_name, child)| {
274 get_shortest_leaf_name(child).map(|leaf_name| (field_name.clone(), leaf_name))
275 })
276 .collect(),
277 }
278}
279
280pub fn infer_accumulated_name(parent_acc: &str, field_name: &str, descendant_leaf: &str) -> String {
282 if let Some(pos) = descendant_leaf.find(field_name) {
283 if pos == 0 {
284 return field_name.to_string();
285 }
286 if pos > 0 && descendant_leaf.chars().nth(pos - 1) == Some('_') {
287 return if parent_acc.is_empty() {
288 field_name.to_string()
289 } else {
290 format!("{}_{}", parent_acc, field_name)
291 };
292 }
293 }
294
295 if parent_acc.is_empty() {
296 field_name.to_string()
297 } else {
298 format!("{}_{}", parent_acc, field_name)
299 }
300}
301
302pub fn get_fields_with_child_info(
304 children: &IndexMap<String, TreeNode>,
305 parent_name: &str,
306 pattern_lookup: &BTreeMap<Vec<PatternField>, String>,
307) -> Vec<(PatternField, Option<Vec<PatternField>>)> {
308 children
309 .iter()
310 .map(|(name, node)| {
311 let (rust_type, json_type, indexes, child_fields) = match node {
312 TreeNode::Leaf(leaf) => (
313 leaf.kind().to_string(),
314 extract_json_type(&leaf.schema),
315 leaf.indexes().clone(),
316 None,
317 ),
318 TreeNode::Branch(grandchildren) => {
319 let child_fields = get_node_fields(grandchildren, pattern_lookup);
320 let pattern_name = pattern_lookup
321 .get(&child_fields)
322 .cloned()
323 .unwrap_or_else(|| child_type_name(parent_name, name));
324 (
325 pattern_name.clone(),
326 pattern_name,
327 BTreeSet::new(),
328 Some(child_fields),
329 )
330 }
331 };
332 (
333 PatternField {
334 name: name.clone(),
335 rust_type,
336 json_type,
337 indexes,
338 type_param: None,
339 },
340 child_fields,
341 )
342 })
343 .collect()
344}
345
346#[cfg(test)]
347mod tests {
348 use super::*;
349 use brk_types::{SeriesLeaf, SeriesLeafWithSchema, TreeNode};
350
351 fn make_leaf(name: &str) -> TreeNode {
352 let leaf = SeriesLeaf {
353 name: name.to_string(),
354 kind: "TestType".to_string(),
355 indexes: BTreeSet::new(),
356 };
357 TreeNode::Leaf(SeriesLeafWithSchema::new(leaf, serde_json::json!({})))
358 }
359
360 fn make_branch(children: Vec<(&str, TreeNode)>) -> TreeNode {
361 let map: IndexMap<String, TreeNode> = children
362 .into_iter()
363 .map(|(k, v)| (k.to_string(), v))
364 .collect();
365 TreeNode::Branch(map)
366 }
367
368 #[test]
369 fn test_get_pattern_instance_base_with_base_field() {
370 let tree = make_branch(vec![
372 (
373 "base",
374 make_branch(vec![("day1", make_leaf("block_vbytes"))]),
375 ),
376 (
377 "average",
378 make_branch(vec![("day1", make_leaf("block_vbytes_average"))]),
379 ),
380 (
381 "sum",
382 make_branch(vec![("day1", make_leaf("block_vbytes_sum"))]),
383 ),
384 ]);
385
386 let result = get_pattern_instance_base(&tree);
387 assert_eq!(result.base, "block_vbytes");
388 assert!(!result.has_outlier);
389 }
390
391 #[test]
392 fn test_get_pattern_instance_base_without_base_field() {
393 let tree = make_branch(vec![
395 (
396 "average",
397 make_branch(vec![("day1", make_leaf("block_weight_average"))]),
398 ),
399 (
400 "sum",
401 make_branch(vec![("day1", make_leaf("block_weight_sum"))]),
402 ),
403 (
404 "cumulative",
405 make_branch(vec![("day1", make_leaf("block_weight_cumulative"))]),
406 ),
407 (
408 "max",
409 make_branch(vec![("day1", make_leaf("block_weight_max"))]),
410 ),
411 (
412 "min",
413 make_branch(vec![("day1", make_leaf("block_weight_min"))]),
414 ),
415 ]);
416
417 let result = get_pattern_instance_base(&tree);
418 assert_eq!(result.base, "block_weight");
419 assert!(!result.has_outlier);
420 }
421
422 #[test]
423 fn test_get_pattern_instance_base_with_duplicate_base_field() {
424 let tree = make_branch(vec![
427 (
428 "base",
429 make_branch(vec![("day1", make_leaf("block_weight_average"))]),
430 ),
431 (
432 "average",
433 make_branch(vec![("day1", make_leaf("block_weight_average"))]),
434 ),
435 (
436 "sum",
437 make_branch(vec![("day1", make_leaf("block_weight_sum"))]),
438 ),
439 ]);
440
441 let result = get_pattern_instance_base(&tree);
442 assert_eq!(result.base, "block_weight");
444 assert!(!result.has_outlier);
445 }
446
447 #[test]
448 fn test_get_pattern_instance_base_with_mismatched_base_name() {
449 let tree = make_branch(vec![
453 ("base", make_leaf("weight")), ("average", make_leaf("block_weight_average")),
455 ("sum", make_leaf("block_weight_sum")),
456 ("cumulative", make_leaf("block_weight_cumulative")),
457 ("max", make_leaf("block_weight_max")),
458 ("min", make_leaf("block_weight_min")),
459 ]);
460
461 let result = get_pattern_instance_base(&tree);
462 assert_eq!(result.base, "block_weight");
464 assert!(result.has_outlier); }
466
467 #[test]
468 fn test_get_pattern_instance_base_root_level_no_common_pattern() {
469 let tree = make_branch(vec![
473 ("alpha", make_leaf("foo_series")),
474 ("beta", make_leaf("bar_value")),
475 ("gamma", make_leaf("baz_count")),
476 ]);
477
478 let result = get_pattern_instance_base(&tree);
479 assert_eq!(result.base, "");
481 assert!(!result.has_outlier);
482 }
483
484 #[test]
485 fn test_get_pattern_instance_base_two_children_no_pattern() {
486 let tree = make_branch(vec![
488 ("foo", make_leaf("alpha")),
489 ("bar", make_leaf("beta")),
490 ]);
491
492 let result = get_pattern_instance_base(&tree);
493 assert_eq!(result.base, "");
494 assert!(!result.has_outlier);
495 }
496
497 #[test]
498 fn test_get_pattern_instance_base_with_outlier_excluded() {
499 let tree = make_branch(vec![
503 ("adjustedSopr", make_leaf("adjusted_sopr")),
504 ("sopr", make_leaf("sopr")),
505 ("asopr", make_leaf("asopr")),
506 ]);
507
508 let result = get_pattern_instance_base(&tree);
509 assert_eq!(result.base, "sopr");
511 assert!(result.has_outlier); }
513
514 #[test]
515 fn test_get_pattern_instance_base_suffix_mode_price_ago() {
516 let tree = make_branch(vec![
519 ("_24h", make_leaf("price_24h_ago")),
520 ("_1w", make_leaf("price_1w_ago")),
521 ("_1m", make_leaf("price_1m_ago")),
522 ("_10y", make_leaf("price_10y_ago")),
523 ]);
524
525 let result = get_pattern_instance_base(&tree);
526 assert_eq!(result.base, "price");
527 assert!(result.is_suffix_mode); assert!(!result.has_outlier);
529 }
530
531 #[test]
532 fn test_get_pattern_instance_base_prefix_mode_price_returns() {
533 let tree = make_branch(vec![
536 ("_24h", make_leaf("24h_price_returns")),
537 ("_1w", make_leaf("1w_price_returns")),
538 ("_1m", make_leaf("1m_price_returns")),
539 ("_10y", make_leaf("10y_price_returns")),
540 ]);
541
542 let result = get_pattern_instance_base(&tree);
543 assert_eq!(result.base, "price_returns");
544 assert!(!result.is_suffix_mode); assert!(!result.has_outlier);
546 }
547
548 #[test]
549 fn test_mode_detection_distinguishes_similar_structures() {
550 let suffix_tree = make_branch(vec![
555 ("_1y", make_leaf("lump_sum_1y")),
556 ("_2y", make_leaf("lump_sum_2y")),
557 ("_5y", make_leaf("lump_sum_5y")),
558 ]);
559 let suffix_result = get_pattern_instance_base(&suffix_tree);
560 assert_eq!(suffix_result.base, "lump_sum");
561 assert!(suffix_result.is_suffix_mode);
562
563 let prefix_tree = make_branch(vec![
565 ("_1y", make_leaf("1y_returns")),
566 ("_2y", make_leaf("2y_returns")),
567 ("_5y", make_leaf("5y_returns")),
568 ]);
569 let prefix_result = get_pattern_instance_base(&prefix_tree);
570 assert_eq!(prefix_result.base, "returns");
571 assert!(!prefix_result.is_suffix_mode);
572 }
573}