concision_utils/utils/
patterns.rs

1/*
2    Appellation: similarity <module>
3    Contrib: @FL03
4*/
5#[cfg(feature = "alloc")]
6use alloc::vec::Vec;
7use num_traits::{Float, FromPrimitive};
8
9/// Calculate similarity between two patterns
10pub fn calculate_pattern_similarity<T>(pattern1: &[[T; 3]], pattern2: &[[T; 3]]) -> T
11where
12    T: core::iter::Sum + Float + FromPrimitive,
13{
14    if pattern1.len() != pattern2.len() {
15        return T::zero();
16    }
17
18    let mut total_diff = T::zero();
19    for i in 0..pattern1.len() {
20        for j in 0..3 {
21            total_diff = total_diff + (pattern1[i][j] - pattern2[i][j]).abs();
22        }
23    }
24
25    // Convert difference to similarity (1.0 = identical, 0.0 = completely different)
26    let max_diff = T::from_usize(pattern1.len() * 3).unwrap();
27    T::one() - (total_diff / max_diff)
28}
29/// Check if two patterns are similar enough to be considered duplicates
30pub fn is_similar_pattern<T>(pattern1: &[[T; 3]], pattern2: &[[T; 3]]) -> bool
31where
32    T: core::iter::Sum + Float + FromPrimitive,
33{
34    if pattern1.len() != pattern2.len() {
35        return false;
36    }
37
38    calculate_pattern_similarity(pattern1, pattern2) > T::from_f32(0.9).unwrap()
39}
40
41#[cfg(feature = "alloc")]
42/// Extract common patterns from historical sequences
43pub fn extract_patterns<T>(history: &[Vec<[T; 3]>], min_length: usize) -> Vec<Vec<[T; 3]>>
44where
45    T: core::iter::Sum + Float + FromPrimitive,
46{
47    let mut common_patterns = Vec::new();
48    let mut pattern_scores = Vec::new();
49
50    // Skip if not enough history
51    if history.len() < 2 {
52        return common_patterns;
53    }
54
55    // For each sequence in history
56    for i in 0..history.len() {
57        let seq = &history[i];
58
59        // Skip sequences that are too short
60        if seq.len() < min_length {
61            continue;
62        }
63
64        // For each possible pattern start position
65        for start in 0..=(seq.len() - min_length) {
66            // For each possible pattern length
67            for len in min_length..=(seq.len() - start) {
68                let pattern = &seq[start..start + len];
69
70                // Calculate how many times this pattern appears in other sequences
71                let mut occurrence_count = 0;
72                let mut similarity_score = T::zero();
73
74                // Check other sequences
75                for j in 0..history.len() {
76                    if i == j {
77                        continue; // Skip self
78                    }
79
80                    let other_seq = &history[j];
81                    if other_seq.len() < pattern.len() {
82                        continue;
83                    }
84
85                    // Sliding window comparison
86                    for k in 0..=other_seq.len() - pattern.len() {
87                        let window = &other_seq[k..k + pattern.len()];
88
89                        // Calculate similarity
90                        let similarity = calculate_pattern_similarity(pattern, window);
91                        if similarity > T::from_f32(0.8).unwrap() {
92                            occurrence_count += 1;
93                            similarity_score = similarity_score + similarity;
94                            break; // Count only once per sequence
95                        }
96                    }
97                }
98
99                // If pattern occurs in multiple sequences, consider it significant
100                if occurrence_count >= history.len() / 3 {
101                    // Clone pattern to owned data
102                    let owned_pattern = pattern.to_vec();
103                    common_patterns.push(owned_pattern);
104                    pattern_scores.push(similarity_score);
105                }
106            }
107        }
108    }
109
110    // Sort patterns by score and remove duplicates
111    // (taking only the top N distinct patterns)
112    let mut unique_patterns = Vec::new();
113    let mut indices: Vec<usize> = (0..common_patterns.len()).collect();
114    indices.sort_by(|&i, &j| {
115        pattern_scores[j]
116            .partial_cmp(&pattern_scores[i])
117            .unwrap_or(core::cmp::Ordering::Equal)
118    });
119
120    for &idx in indices.iter().take(5) {
121        // Take top 5 patterns
122        let pattern = &common_patterns[idx];
123        if !unique_patterns
124            .iter()
125            .any(|p: &Vec<[T; 3]>| is_similar_pattern(p, pattern))
126        {
127            unique_patterns.push(pattern.clone());
128        }
129    }
130
131    unique_patterns
132}