1use scirs2_core::ndarray::{s, Array1, Array2};
8use scirs2_core::numeric::{Float, FromPrimitive};
9use std::fmt::Debug;
10
11use super::utils::{calculate_entropy, euclidean_distance_subsequence, gaussian_breakpoints};
12use crate::error::{Result, TimeSeriesError};
13
14#[derive(Debug, Clone)]
16pub struct TemporalPatternFeatures<F> {
17 pub motifs: Vec<MotifInfo<F>>,
19 pub discord_scores: Array1<F>,
21 pub sax_symbols: Vec<char>,
23 pub shapelets: Vec<ShapeletInfo<F>>,
25}
26
27impl<F> Default for TemporalPatternFeatures<F>
28where
29 F: Float + FromPrimitive,
30{
31 fn default() -> Self {
32 Self {
33 motifs: Vec::new(),
34 discord_scores: Array1::zeros(0),
35 sax_symbols: Vec::new(),
36 shapelets: Vec::new(),
37 }
38 }
39}
40
41#[derive(Debug, Clone)]
43pub struct MotifInfo<F> {
44 pub length: usize,
46 pub positions: Vec<usize>,
48 pub frequency: usize,
50 pub avg_distance: F,
52}
53
54impl<F> Default for MotifInfo<F>
55where
56 F: Float + FromPrimitive,
57{
58 fn default() -> Self {
59 Self {
60 length: 0,
61 positions: Vec::new(),
62 frequency: 0,
63 avg_distance: F::zero(),
64 }
65 }
66}
67
68#[derive(Debug, Clone)]
70pub struct ShapeletInfo<F> {
71 pub pattern: Array1<F>,
73 pub position: usize,
75 pub length: usize,
77 pub information_gain: F,
79}
80
81impl<F> Default for ShapeletInfo<F>
82where
83 F: Float + FromPrimitive,
84{
85 fn default() -> Self {
86 Self {
87 pattern: Array1::zeros(0),
88 position: 0,
89 length: 0,
90 information_gain: F::zero(),
91 }
92 }
93}
94
95#[allow(dead_code)]
101pub fn calculate_temporal_pattern_features<F>(
102 ts: &Array1<F>,
103 motif_length: Option<usize>,
104 max_motifs: usize,
105 k_neighbors: usize,
106 sax_word_length: usize,
107 sax_alphabet_size: usize,
108 detect_patterns: bool,
109) -> Result<TemporalPatternFeatures<F>>
110where
111 F: Float + FromPrimitive + Debug + Clone + scirs2_core::ndarray::ScalarOperand,
112{
113 if !detect_patterns {
114 return Ok(TemporalPatternFeatures::default());
115 }
116
117 let actual_motif_length = motif_length.unwrap_or(ts.len() / 10).max(3);
118
119 let motifs = discover_motifs(ts, actual_motif_length, max_motifs)?;
121
122 let discord_scores = calculate_discord_scores(ts, actual_motif_length, k_neighbors)?;
124
125 let sax_symbols = time_series_to_sax(ts, sax_word_length, sax_alphabet_size)?;
127
128 let shapelets = Vec::new();
131
132 Ok(TemporalPatternFeatures {
133 motifs,
134 discord_scores,
135 sax_symbols,
136 shapelets,
137 })
138}
139
140#[allow(dead_code)]
159pub fn discover_motifs<F>(
160 ts: &Array1<F>,
161 motif_length: usize,
162 max_motifs: usize,
163) -> Result<Vec<MotifInfo<F>>>
164where
165 F: Float + FromPrimitive + Debug + Clone,
166{
167 let n = ts.len();
168 if n < motif_length * 2 {
169 return Err(TimeSeriesError::FeatureExtractionError(
170 "Time series too short for motif discovery".to_string(),
171 ));
172 }
173
174 let num_subsequences = n - motif_length + 1;
175 let mut distances = Array2::zeros((num_subsequences, num_subsequences));
176
177 for i in 0..num_subsequences {
179 for j in (i + 1)..num_subsequences {
180 let dist = euclidean_distance_subsequence(ts, i, j, motif_length);
181 distances[[i, j]] = dist;
182 distances[[j, i]] = dist;
183 }
184 }
185
186 let mut motifs = Vec::new();
187 let mut used_indices = vec![false; num_subsequences];
188
189 for _ in 0..max_motifs {
190 let mut min_dist = F::infinity();
191 let mut best_pair = (0, 0);
192
193 for i in 0..num_subsequences {
195 if used_indices[i] {
196 continue;
197 }
198 for j in (i + motif_length)..num_subsequences {
199 if used_indices[j] {
200 continue;
201 }
202 if distances[[i, j]] < min_dist {
203 min_dist = distances[[i, j]];
204 best_pair = (i, j);
205 }
206 }
207 }
208
209 if min_dist.is_infinite() {
210 break;
211 }
212
213 let threshold = min_dist * F::from(1.5).expect("Failed to convert constant to float");
215 let mut positions = vec![best_pair.0, best_pair.1];
216
217 for k in 0..num_subsequences {
218 if used_indices[k] || k == best_pair.0 || k == best_pair.1 {
219 continue;
220 }
221
222 let dist_to_first = distances[[best_pair.0, k]];
223 let dist_to_second = distances[[best_pair.1, k]];
224
225 if dist_to_first <= threshold || dist_to_second <= threshold {
226 positions.push(k);
227 }
228 }
229
230 for &pos in &positions {
232 for offset in 0..motif_length {
233 if pos + offset < used_indices.len() {
234 used_indices[pos + offset] = true;
235 }
236 }
237 }
238
239 let mut total_dist = F::zero();
241 let mut count = 0;
242 for i in 0..positions.len() {
243 for j in (i + 1)..positions.len() {
244 total_dist = total_dist + distances[[positions[i], positions[j]]];
245 count += 1;
246 }
247 }
248
249 let avg_distance = if count > 0 {
250 total_dist / F::from(count).expect("Failed to convert to float")
251 } else {
252 F::zero()
253 };
254
255 motifs.push(MotifInfo {
256 length: motif_length,
257 frequency: positions.len(),
258 positions,
259 avg_distance,
260 });
261 }
262
263 Ok(motifs)
264}
265
266#[allow(dead_code)]
285pub fn calculate_discord_scores<F>(
286 ts: &Array1<F>,
287 discord_length: usize,
288 k_neighbors: usize,
289) -> Result<Array1<F>>
290where
291 F: Float + FromPrimitive + Debug + Clone,
292{
293 let n = ts.len();
294 if n < discord_length * 2 {
295 return Err(TimeSeriesError::FeatureExtractionError(
296 "Time series too short for discord detection".to_string(),
297 ));
298 }
299
300 let num_subsequences = n - discord_length + 1;
301 let mut discord_scores = Array1::zeros(num_subsequences);
302
303 for i in 0..num_subsequences {
304 let mut distances = Vec::new();
305
306 for j in 0..num_subsequences {
308 if (i as i32 - j as i32).abs() < discord_length as i32 {
309 continue; }
311
312 let dist = euclidean_distance_subsequence(ts, i, j, discord_length);
313 distances.push(dist);
314 }
315
316 distances.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
318
319 if distances.len() >= k_neighbors {
320 discord_scores[i] = distances[k_neighbors - 1];
322 } else if !distances.is_empty() {
323 discord_scores[i] = distances[distances.len() - 1];
324 }
325 }
326
327 Ok(discord_scores)
328}
329
330#[allow(dead_code)]
349pub fn time_series_to_sax<F>(
350 ts: &Array1<F>,
351 word_length: usize,
352 alphabet_size: usize,
353) -> Result<Vec<char>>
354where
355 F: Float + FromPrimitive + Debug + Clone,
356{
357 let n = ts.len();
358 if n < word_length {
359 return Err(TimeSeriesError::FeatureExtractionError(
360 "Time series too short for SAX conversion".to_string(),
361 ));
362 }
363
364 if !(2..=26).contains(&alphabet_size) {
365 return Err(TimeSeriesError::FeatureExtractionError(
366 "Alphabet _size must be between 2 and 26".to_string(),
367 ));
368 }
369
370 let mean = ts.sum() / F::from(n).expect("Failed to convert to float");
372 let variance = ts.mapv(|x| (x - mean) * (x - mean)).sum()
373 / F::from(n).expect("Failed to convert to float");
374 let std_dev = variance.sqrt();
375
376 let normalized = if std_dev > F::zero() {
377 ts.mapv(|x| (x - mean) / std_dev)
378 } else {
379 Array1::zeros(n)
380 };
381
382 let segment_size = n / word_length;
384 let mut paa = Array1::zeros(word_length);
385
386 for i in 0..word_length {
387 let start = i * segment_size;
388 let end = if i == word_length - 1 {
389 n
390 } else {
391 (i + 1) * segment_size
392 };
393
394 let segment_sum = normalized.slice(s![start..end]).sum();
395 let segment_len = end - start;
396 paa[i] = segment_sum / F::from(segment_len).expect("Failed to convert to float");
397 }
398
399 let breakpoints = gaussian_breakpoints(alphabet_size);
401 let mut sax_symbols = Vec::with_capacity(word_length);
402
403 for &value in paa.iter() {
404 let symbol_index = breakpoints
405 .iter()
406 .position(|&bp| value.to_f64().unwrap_or(0.0) <= bp)
407 .unwrap_or(alphabet_size - 1);
408
409 let symbol = (b'a' + symbol_index as u8) as char;
410 sax_symbols.push(symbol);
411 }
412
413 Ok(sax_symbols)
414}
415
416#[allow(dead_code)]
437pub fn extractshapelets<F>(
438 ts_class1: &[Array1<F>],
439 ts_class2: &[Array1<F>],
440 min_length: usize,
441 max_length: usize,
442 maxshapelets: usize,
443) -> Result<Vec<ShapeletInfo<F>>>
444where
445 F: Float + FromPrimitive + Debug + Clone,
446{
447 if ts_class1.is_empty() || ts_class2.is_empty() {
448 return Err(TimeSeriesError::FeatureExtractionError(
449 "Need at least one time series from each class".to_string(),
450 ));
451 }
452
453 let mut all_candidates = Vec::new();
454
455 for ts in ts_class1.iter() {
457 for length in min_length..=max_length.min(ts.len() / 2) {
458 for start in 0..=(ts.len() - length) {
459 let shapelet = ts.slice(s![start..start + length]).to_owned();
460
461 let info_gain =
463 calculateshapelet_information_gain(&shapelet, ts_class1, ts_class2)?;
464
465 all_candidates.push(ShapeletInfo {
466 pattern: shapelet,
467 position: start,
468 length,
469 information_gain: info_gain,
470 });
471 }
472 }
473 }
474
475 all_candidates.sort_by(|a, b| {
477 b.information_gain
478 .partial_cmp(&a.information_gain)
479 .unwrap_or(std::cmp::Ordering::Equal)
480 });
481
482 all_candidates.truncate(maxshapelets);
483 Ok(all_candidates)
484}
485
486#[allow(dead_code)]
492fn calculateshapelet_information_gain<F>(
493 shapelet: &Array1<F>,
494 ts_class1: &[Array1<F>],
495 ts_class2: &[Array1<F>],
496) -> Result<F>
497where
498 F: Float + FromPrimitive + Debug + Clone,
499{
500 let total_count = ts_class1.len() + ts_class2.len();
501 let class1_count = ts_class1.len();
502 let class2_count = ts_class2.len();
503
504 if total_count == 0 {
505 return Ok(F::zero());
506 }
507
508 let p1 = class1_count as f64 / total_count as f64;
510 let p2 = class2_count as f64 / total_count as f64;
511 let original_entropy = if p1 > 0.0 && p2 > 0.0 {
512 -(p1 * p1.ln() + p2 * p2.ln())
513 } else {
514 0.0
515 };
516
517 let mut distances_class1 = Vec::new();
519 let mut distances_class2 = Vec::new();
520
521 for ts in ts_class1 {
522 let min_dist = find_min_distance_toshapelet(ts, shapelet);
523 distances_class1.push(min_dist);
524 }
525
526 for ts in ts_class2 {
527 let min_dist = find_min_distance_toshapelet(ts, shapelet);
528 distances_class2.push(min_dist);
529 }
530
531 let mut all_distances: Vec<F> = distances_class1
533 .iter()
534 .cloned()
535 .chain(distances_class2.iter().cloned())
536 .collect();
537 all_distances.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
538
539 let mut best_info_gain = F::zero();
540
541 for &threshold in &all_distances {
542 let left_class1 = distances_class1.iter().filter(|&&d| d <= threshold).count();
544 let left_class2 = distances_class2.iter().filter(|&&d| d <= threshold).count();
545 let right_class1 = class1_count - left_class1;
546 let right_class2 = class2_count - left_class2;
547
548 let left_total = left_class1 + left_class2;
549 let right_total = right_class1 + right_class2;
550
551 if left_total == 0 || right_total == 0 {
552 continue;
553 }
554
555 let left_entropy = calculate_entropy(left_class1, left_class2);
557 let right_entropy = calculate_entropy(right_class1, right_class2);
558
559 let weighted_entropy = (left_total as f64 / total_count as f64) * left_entropy
560 + (right_total as f64 / total_count as f64) * right_entropy;
561
562 let info_gain = original_entropy - weighted_entropy;
563
564 if F::from(info_gain).expect("Failed to convert to float") > best_info_gain {
565 best_info_gain = F::from(info_gain).expect("Failed to convert to float");
566 }
567 }
568
569 Ok(best_info_gain)
570}
571
572#[allow(dead_code)]
574fn find_min_distance_toshapelet<F>(ts: &Array1<F>, shapelet: &Array1<F>) -> F
575where
576 F: Float + FromPrimitive,
577{
578 let shapelet_len = shapelet.len();
579 let ts_len = ts.len();
580
581 if ts_len < shapelet_len {
582 return F::infinity();
583 }
584
585 let mut min_distance = F::infinity();
586
587 for start in 0..=(ts_len - shapelet_len) {
588 let mut sum = F::zero();
589 for i in 0..shapelet_len {
590 let diff = ts[start + i] - shapelet[i];
591 sum = sum + diff * diff;
592 }
593 let distance = sum.sqrt();
594
595 if distance < min_distance {
596 min_distance = distance;
597 }
598 }
599
600 min_distance
601}
602
603#[allow(dead_code)]
605pub fn calculate_distance_matrix<F>(ts: &Array1<F>, subsequence_length: usize) -> Result<Array2<F>>
606where
607 F: Float + FromPrimitive + Debug + Clone,
608{
609 let n = ts.len();
610 if n < subsequence_length {
611 return Err(TimeSeriesError::FeatureExtractionError(
612 "Time series too short for distance matrix calculation".to_string(),
613 ));
614 }
615
616 let num_subsequences = n - subsequence_length + 1;
617 let mut distances = Array2::zeros((num_subsequences, num_subsequences));
618
619 for i in 0..num_subsequences {
620 for j in (i + 1)..num_subsequences {
621 let dist = euclidean_distance_subsequence(ts, i, j, subsequence_length);
622 distances[[i, j]] = dist;
623 distances[[j, i]] = dist;
624 }
625 }
626
627 Ok(distances)
628}
629
630#[allow(dead_code)]
632pub fn find_nearest_neighbors<F>(distance_matrix: &Array2<F>, k: usize) -> Result<Vec<Vec<usize>>>
633where
634 F: Float + FromPrimitive + Debug + Clone,
635{
636 let n = distance_matrix.nrows();
637 let mut neighbors = Vec::with_capacity(n);
638
639 for i in 0..n {
640 let mut distances_with_indices: Vec<(F, usize)> = (0..n)
641 .filter(|&j| j != i)
642 .map(|j| (distance_matrix[[i, j]], j))
643 .collect();
644
645 distances_with_indices
646 .sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
647
648 let nearest_k: Vec<usize> = distances_with_indices
649 .into_iter()
650 .take(k)
651 .map(|(_, idx)| idx)
652 .collect();
653
654 neighbors.push(nearest_k);
655 }
656
657 Ok(neighbors)
658}
659
660#[allow(dead_code)]
662pub fn calculate_local_intrinsic_dimensionality<F>(
663 distance_matrix: &Array2<F>,
664 k: usize,
665) -> Result<Vec<F>>
666where
667 F: Float + FromPrimitive + Debug + Clone,
668{
669 let n = distance_matrix.nrows();
670 let mut lid_values = Vec::with_capacity(n);
671
672 for i in 0..n {
673 let mut distances: Vec<F> = (0..n)
674 .filter(|&j| j != i)
675 .map(|j| distance_matrix[[i, j]])
676 .collect();
677
678 distances.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
679
680 if distances.len() < k || distances[k - 1] == F::zero() {
681 lid_values.push(F::zero());
682 continue;
683 }
684
685 let mut sum = F::zero();
687 for j in 0..k {
688 if distances[j] > F::zero() && distances[k - 1] > F::zero() {
689 sum = sum + (distances[k - 1] / distances[j]).ln();
690 }
691 }
692
693 let lid = F::from(k).expect("Failed to convert to float") / sum;
694 lid_values.push(lid);
695 }
696
697 Ok(lid_values)
698}