concision_core/utils/
patterns.rs

1/*
2    Appellation: similarity <module>
3    Contrib: @FL03
4*/
5use num_traits::{Float, FromPrimitive};
6
7/// Calculate similarity between two patterns
8pub fn calculate_pattern_similarity<T>(pattern1: &[[T; 3]], pattern2: &[[T; 3]]) -> T
9where
10    T: core::iter::Sum + Float + FromPrimitive,
11{
12    if pattern1.len() != pattern2.len() {
13        return T::zero();
14    }
15
16    let mut total_diff = T::zero();
17    for i in 0..pattern1.len() {
18        for j in 0..3 {
19            total_diff = total_diff + (pattern1[i][j] - pattern2[i][j]).abs();
20        }
21    }
22
23    // Convert difference to similarity (1.0 = identical, 0.0 = completely different)
24    let max_diff = T::from_usize(pattern1.len() * 3).unwrap();
25    T::one() - (total_diff / max_diff)
26}
27/// Extract common patterns from historical sequences
28pub fn extract_patterns<T>(history: &[Vec<[T; 3]>], min_length: usize) -> Vec<Vec<[T; 3]>>
29where
30    T: core::iter::Sum + Float + FromPrimitive,
31{
32    let mut common_patterns = Vec::new();
33    let mut pattern_scores = Vec::new();
34
35    // Skip if not enough history
36    if history.len() < 2 {
37        return common_patterns;
38    }
39
40    // For each sequence in history
41    for i in 0..history.len() {
42        let seq = &history[i];
43
44        // Skip sequences that are too short
45        if seq.len() < min_length {
46            continue;
47        }
48
49        // For each possible pattern start position
50        for start in 0..=(seq.len() - min_length) {
51            // For each possible pattern length
52            for len in min_length..=(seq.len() - start) {
53                let pattern = &seq[start..start + len];
54
55                // Calculate how many times this pattern appears in other sequences
56                let mut occurrence_count = 0;
57                let mut similarity_score = T::zero();
58
59                // Check other sequences
60                for j in 0..history.len() {
61                    if i == j {
62                        continue; // Skip self
63                    }
64
65                    let other_seq = &history[j];
66                    if other_seq.len() < pattern.len() {
67                        continue;
68                    }
69
70                    // Sliding window comparison
71                    for k in 0..=other_seq.len() - pattern.len() {
72                        let window = &other_seq[k..k + pattern.len()];
73
74                        // Calculate similarity
75                        let similarity = calculate_pattern_similarity(pattern, window);
76                        if similarity > T::from_f32(0.8).unwrap() {
77                            occurrence_count += 1;
78                            similarity_score = similarity_score + similarity;
79                            break; // Count only once per sequence
80                        }
81                    }
82                }
83
84                // If pattern occurs in multiple sequences, consider it significant
85                if occurrence_count >= history.len() / 3 {
86                    // Clone pattern to owned data
87                    let owned_pattern = pattern.to_vec();
88                    common_patterns.push(owned_pattern);
89                    pattern_scores.push(similarity_score);
90                }
91            }
92        }
93    }
94
95    // Sort patterns by score and remove duplicates
96    // (taking only the top N distinct patterns)
97    let mut unique_patterns = Vec::new();
98    let mut indices: Vec<usize> = (0..common_patterns.len()).collect();
99    indices.sort_by(|&i, &j| {
100        pattern_scores[j]
101            .partial_cmp(&pattern_scores[i])
102            .unwrap_or(std::cmp::Ordering::Equal)
103    });
104
105    for &idx in indices.iter().take(5) {
106        // Take top 5 patterns
107        let pattern = &common_patterns[idx];
108        if !unique_patterns
109            .iter()
110            .any(|p: &Vec<[T; 3]>| is_similar_pattern(p, pattern))
111        {
112            unique_patterns.push(pattern.clone());
113        }
114    }
115
116    unique_patterns
117}
118
119/// Check if two patterns are similar enough to be considered duplicates
120pub fn is_similar_pattern<T>(pattern1: &[[T; 3]], pattern2: &[[T; 3]]) -> bool
121where
122    T: core::iter::Sum + Float + FromPrimitive,
123{
124    if pattern1.len() != pattern2.len() {
125        return false;
126    }
127
128    calculate_pattern_similarity(pattern1, pattern2) > T::from_f32(0.9).unwrap()
129}