concision_utils/utils/
patterns.rs

1/*
2    Appellation: similarity <module>
3    Contrib: @FL03
4*/
5#[cfg(feature = "alloc")]
6use alloc::vec::Vec;
7use num_traits::{Float, FromPrimitive};
8
9/// Calculate similarity between two patterns
10pub fn calculate_pattern_similarity<T>(pattern1: &[[T; 3]], pattern2: &[[T; 3]]) -> T
11where
12    T: core::iter::Sum + Float + FromPrimitive,
13{
14    if pattern1.len() != pattern2.len() {
15        return T::zero();
16    }
17
18    let mut total_diff = T::zero();
19    for i in 0..pattern1.len() {
20        for j in 0..3 {
21            total_diff = total_diff + (pattern1[i][j] - pattern2[i][j]).abs();
22        }
23    }
24
25    // Convert difference to similarity (1.0 = identical, 0.0 = completely different)
26    let max_diff = T::from_usize(pattern1.len() * 3).unwrap();
27    T::one() - (total_diff / max_diff)
28}
29/// Check if two patterns are similar enough to be considered duplicates
30pub fn is_similar_pattern<T>(pattern1: &[[T; 3]], pattern2: &[[T; 3]]) -> bool
31where
32    T: core::iter::Sum + Float + FromPrimitive,
33{
34    if pattern1.len() != pattern2.len() {
35        return false;
36    }
37
38    calculate_pattern_similarity(pattern1, pattern2) > T::from_f32(0.9).unwrap()
39}
40
41#[cfg(feature = "alloc")]
42/// Extract common patterns from historical sequences
43pub fn extract_patterns<T>(history: &[Vec<[T; 3]>], min_length: usize) -> Vec<Vec<[T; 3]>>
44where
45    T: core::iter::Sum + Float + FromPrimitive,
46{
47    let mut common_patterns = Vec::new();
48    let mut pattern_scores = Vec::new();
49
50    // Skip if not enough history
51    if history.len() < 2 {
52        return common_patterns;
53    }
54
55    // For each sequence in history
56    for i in 0..history.len() {
57        let seq = &history[i];
58
59        // Skip sequences that are too short
60        if seq.len() < min_length {
61            continue;
62        }
63
64        // For each possible pattern start position
65        for start in 0..=(seq.len() - min_length) {
66            // For each possible pattern length
67            for len in min_length..=(seq.len() - start) {
68                let pattern = &seq[start..start + len];
69
70                // Calculate how many times this pattern appears in other sequences
71                let mut occurrence_count = 0;
72                let mut similarity_score = T::zero();
73
74                // Check other sequences
75                for (j, other_seq) in history.iter().enumerate() {
76                    if i == j {
77                        continue; // Skip self
78                    }
79
80                    if other_seq.len() < pattern.len() {
81                        continue;
82                    }
83
84                    // Sliding window comparison
85                    for k in 0..=other_seq.len() - pattern.len() {
86                        let window = &other_seq[k..k + pattern.len()];
87
88                        // Calculate similarity
89                        let similarity = calculate_pattern_similarity(pattern, window);
90                        if similarity > T::from_f32(0.8).unwrap() {
91                            occurrence_count += 1;
92                            similarity_score = similarity_score + similarity;
93                            break; // Count only once per sequence
94                        }
95                    }
96                }
97
98                // If pattern occurs in multiple sequences, consider it significant
99                if occurrence_count >= history.len() / 3 {
100                    // Clone pattern to owned data
101                    let owned_pattern = pattern.to_vec();
102                    common_patterns.push(owned_pattern);
103                    pattern_scores.push(similarity_score);
104                }
105            }
106        }
107    }
108
109    // Sort patterns by score and remove duplicates
110    // (taking only the top N distinct patterns)
111    let mut unique_patterns = Vec::new();
112    let mut indices: Vec<usize> = (0..common_patterns.len()).collect();
113    indices.sort_by(|&i, &j| {
114        pattern_scores[j]
115            .partial_cmp(&pattern_scores[i])
116            .unwrap_or(core::cmp::Ordering::Equal)
117    });
118
119    for &idx in indices.iter().take(5) {
120        // Take top 5 patterns
121        let pattern = &common_patterns[idx];
122        if !unique_patterns
123            .iter()
124            .any(|p: &Vec<[T; 3]>| is_similar_pattern(p, pattern))
125        {
126            unique_patterns.push(pattern.clone());
127        }
128    }
129
130    unique_patterns
131}