advanced_patterns/
advanced_patterns.rs

1//! # Advanced Iterator Patterns - Complex Data Processing Workflows
2//!
3//! ## Overview
4//!
5//! This example demonstrates sophisticated iterator patterns and complex data processing
6//! workflows using Train Station's tensor iterator system. It showcases advanced
7//! functional programming techniques, data transformation pipelines, and real-world
8//! processing scenarios.
9//!
10//! ## Learning Objectives
11//!
12//! - Master complex iterator chains and transformations
13//! - Learn advanced functional programming patterns
14//! - Understand data processing pipeline design
15//! - Explore real-world tensor processing scenarios
16//!
17//! ## Prerequisites
18//!
19//! - Understanding of basic iterator concepts (see element_iteration.rs)
20//! - Familiarity with functional programming patterns
21//! - Knowledge of tensor operations and gradient tracking
22//! - Experience with data processing workflows
23//!
24//! ## Key Concepts Demonstrated
25//!
26//! - **Pipeline Processing**: Multi-stage data transformation workflows
27//! - **Conditional Processing**: Dynamic filtering and transformation based on data
28//! - **Batch Operations**: Efficient processing of large datasets
29//! - **Error Handling**: Robust processing with fallback strategies
30//! - **Performance Optimization**: Memory-efficient processing patterns
31//!
32//! ## Example Code Structure
33//!
34//! 1. **Data Pipeline Processing**: Multi-stage transformation workflows
35//! 2. **Conditional Processing**: Dynamic filtering and transformation
36//! 3. **Batch Operations**: Efficient large-scale processing
37//! 4. **Real-world Scenarios**: Practical data processing applications
38//!
39//! ## Expected Output
40//!
41//! The example will demonstrate complex data processing workflows, showing
42//! how to build sophisticated transformation pipelines using iterator patterns
43//! while maintaining performance and gradient tracking capabilities.
44//!
45//! ## Performance Notes
46//!
47//! - Pipeline processing minimizes memory allocations
48//! - Conditional processing avoids unnecessary computations
49//! - Batch operations leverage SIMD optimizations
50//! - Lazy evaluation patterns improve memory efficiency
51
52use train_station::Tensor;
53
54/// Main example function demonstrating advanced iterator patterns
55///
56/// This function showcases sophisticated data processing workflows
57/// using complex iterator chains and transformation pipelines.
58fn main() -> Result<(), Box<dyn std::error::Error>> {
59    println!("Starting Advanced Iterator Patterns Example");
60
61    demonstrate_data_pipeline()?;
62    demonstrate_conditional_processing()?;
63    demonstrate_batch_operations()?;
64    demonstrate_real_world_scenarios()?;
65
66    println!("Advanced Iterator Patterns Example completed successfully!");
67    Ok(())
68}
69
70/// Demonstrate multi-stage data processing pipeline
71///
72/// Shows how to build sophisticated transformation workflows using
73/// iterator chains for data preprocessing and feature engineering.
74fn demonstrate_data_pipeline() -> Result<(), Box<dyn std::error::Error>> {
75    println!("\n--- Data Processing Pipeline ---");
76
77    // Simulate raw sensor data with noise
78    let raw_data: Vec<f32> = (0..20)
79        .map(|i| {
80            let base = i as f32 * 0.5;
81            let noise = (i % 3) as f32 * 0.1;
82            base + noise
83        })
84        .collect();
85
86    let tensor = Tensor::from_slice(&raw_data, vec![20])?;
87    println!("Raw sensor data: {:?}", tensor.data());
88
89    // Multi-stage processing pipeline
90    println!("\nProcessing pipeline:");
91    println!("1. Normalize data (z-score)");
92    println!("2. Apply smoothing filter");
93    println!("3. Detect outliers");
94    println!("4. Apply feature scaling");
95
96    // Stage 1: Normalization
97    let mean = tensor.mean().value();
98    let std = tensor.std().value();
99    let normalized: Tensor = tensor
100        .iter()
101        .map(|elem| elem.sub_scalar(mean).div_scalar(std))
102        .collect();
103    println!(
104        "  Normalized (mean={:.3}, std={:.3}): {:?}",
105        mean,
106        std,
107        normalized.data()
108    );
109
110    // Stage 2: Smoothing (simple moving average)
111    let smoothed: Tensor = normalized
112        .iter()
113        .enumerate()
114        .map(|(i, elem)| {
115            if i == 0 || i == normalized.size() - 1 {
116                elem.clone()
117            } else {
118                // Simple 3-point average
119                let prev = normalized.element_view(i - 1);
120                let next = normalized.element_view(i + 1);
121                elem.add_tensor(&prev).add_tensor(&next).div_scalar(3.0)
122            }
123        })
124        .collect();
125    println!("  Smoothed: {:?}", smoothed.data());
126
127    // Stage 3: Outlier detection and removal
128    let outlier_threshold = 2.0;
129    let cleaned: Tensor = smoothed
130        .iter()
131        .filter(|elem| elem.value().abs() < outlier_threshold)
132        .collect();
133    println!(
134        "  Outliers removed (threshold={}): {:?}",
135        outlier_threshold,
136        cleaned.data()
137    );
138
139    // Stage 4: Feature scaling to [0, 1] range
140    let min_val = cleaned
141        .iter()
142        .map(|e| e.value())
143        .fold(f32::INFINITY, f32::min);
144    let max_val = cleaned
145        .iter()
146        .map(|e| e.value())
147        .fold(f32::NEG_INFINITY, f32::max);
148    let scaled: Tensor = cleaned
149        .iter()
150        .map(|elem| elem.sub_scalar(min_val).div_scalar(max_val - min_val))
151        .collect();
152    println!("  Scaled to [0,1]: {:?}", scaled.data());
153
154    Ok(())
155}
156
157/// Demonstrate conditional processing patterns
158///
159/// Shows how to implement dynamic filtering and transformation
160/// based on data characteristics and conditions.
161fn demonstrate_conditional_processing() -> Result<(), Box<dyn std::error::Error>> {
162    println!("\n--- Conditional Processing ---");
163
164    // Create data with mixed characteristics
165    let data = vec![1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0];
166    let tensor = Tensor::from_slice(&data, vec![10])?;
167    println!("Input data: {:?}", tensor.data());
168
169    // Conditional transformation based on sign
170    println!("\nConditional transformation (positive/negative handling):");
171    let processed: Tensor = tensor
172        .iter()
173        .map(|elem| {
174            let val = elem.value();
175            if val > 0.0 {
176                elem.pow_scalar(2.0) // Square positive values
177            } else {
178                elem.mul_scalar(-1.0).sqrt() // Square root of absolute negative values
179            }
180        })
181        .collect();
182    println!("  Processed: {:?}", processed.data());
183
184    // Adaptive filtering based on local statistics
185    println!("\nAdaptive filtering (remove values > 2 std from local mean):");
186    let window_size = 3;
187    let adaptive_filtered: Tensor = tensor
188        .iter()
189        .enumerate()
190        .filter(|(i, elem)| {
191            let start = i.saturating_sub(window_size / 2);
192            let end = (i + window_size / 2 + 1).min(tensor.size());
193
194            // Calculate local mean and std
195            let local_values: Vec<f32> = (start..end)
196                .map(|j| tensor.element_view(j).value())
197                .collect();
198
199            let local_mean = local_values.iter().sum::<f32>() / local_values.len() as f32;
200            let local_variance = local_values
201                .iter()
202                .map(|v| (v - local_mean).powi(2))
203                .sum::<f32>()
204                / local_values.len() as f32;
205            let local_std = local_variance.sqrt();
206
207            let threshold = local_mean + 2.0 * local_std;
208            elem.value() <= threshold
209        })
210        .map(|(_, elem)| elem)
211        .collect();
212    println!("  Adaptive filtered: {:?}", adaptive_filtered.data());
213
214    // Multi-condition processing
215    println!("\nMulti-condition processing:");
216    let multi_processed: Tensor = tensor
217        .iter()
218        .map(|elem| {
219            let val = elem.value();
220            match () {
221                _ if val > 5.0 => elem.mul_scalar(2.0), // Double large values
222                _ if val < -5.0 => elem.div_scalar(2.0), // Halve small values
223                _ if val.abs() < 2.0 => elem.add_scalar(1.0), // Add 1 to small values
224                _ => elem.clone(),                      // Keep others unchanged
225            }
226        })
227        .collect();
228    println!("  Multi-condition: {:?}", multi_processed.data());
229
230    Ok(())
231}
232
233/// Demonstrate batch processing operations
234///
235/// Shows efficient processing of large datasets using iterator
236/// patterns and batch operations for performance optimization.
237fn demonstrate_batch_operations() -> Result<(), Box<dyn std::error::Error>> {
238    println!("\n--- Batch Operations ---");
239
240    // Create a larger dataset for batch processing
241    let size = 100;
242    let data: Vec<f32> = (0..size)
243        .map(|i| {
244            let x = i as f32 / size as f32;
245            x * x + 0.1 * (i % 7) as f32 // Quadratic with some noise
246        })
247        .collect();
248
249    let tensor = Tensor::from_slice(&data, vec![size])?;
250    println!("Dataset size: {}", tensor.size());
251
252    // Batch processing with windowing
253    println!("\nBatch processing with sliding windows:");
254    let batch_size = 10;
255    let batches: Vec<Tensor> = tensor
256        .iter()
257        .collect::<Vec<_>>()
258        .chunks(batch_size)
259        .map(|chunk| {
260            // Process each batch independently
261            chunk
262                .iter()
263                .map(|elem| elem.pow_scalar(2.0).add_scalar(1.0))
264                .collect()
265        })
266        .collect();
267
268    println!(
269        "  Processed {} batches of size {}",
270        batches.len(),
271        batch_size
272    );
273    for (i, batch) in batches.iter().enumerate() {
274        println!(
275            "    Batch {}: mean={:.3}, std={:.3}",
276            i,
277            batch.mean().value(),
278            batch.std().value()
279        );
280    }
281
282    // Parallel-like processing with stride
283    println!("\nStrided processing (every nth element):");
284    let stride = 5;
285    let strided: Tensor = tensor
286        .iter()
287        .enumerate()
288        .filter(|(i, _)| i % stride == 0)
289        .map(|(_, elem)| elem)
290        .collect();
291    println!("  Strided (every {}th): {:?}", stride, strided.data());
292
293    // Hierarchical processing
294    println!("\nHierarchical processing (coarse to fine):");
295    let coarse: Tensor = tensor
296        .iter()
297        .enumerate()
298        .filter(|(i, _)| i % 4 == 0) // Take every 4th element
299        .map(|(_, elem)| elem)
300        .collect();
301
302    let fine: Tensor = tensor
303        .iter()
304        .enumerate()
305        .filter(|(i, _)| i % 4 != 0) // Take the rest
306        .map(|(_, elem)| elem)
307        .collect();
308
309    println!("  Coarse (every 4th): {:?}", coarse.data());
310    println!("  Fine (rest): {:?}", fine.data());
311
312    // Combine coarse and fine with different processing
313    let combined: Tensor = coarse
314        .iter()
315        .map(|elem| elem.mul_scalar(2.0)) // Scale coarse
316        .chain(fine.iter().map(|elem| elem.div_scalar(2.0))) // Scale fine
317        .collect();
318    println!("  Combined: {:?}", combined.data());
319
320    Ok(())
321}
322
323/// Demonstrate real-world processing scenarios
324///
325/// Shows practical applications of iterator patterns for
326/// common data processing tasks in machine learning and analytics.
327fn demonstrate_real_world_scenarios() -> Result<(), Box<dyn std::error::Error>> {
328    println!("\n--- Real-world Scenarios ---");
329
330    // Scenario 1: Time series analysis
331    println!("\nScenario 1: Time Series Analysis");
332    let time_series: Vec<f32> = (0..24)
333        .map(|hour| {
334            let base = 20.0 + 10.0 * (hour as f32 * std::f32::consts::PI / 12.0).sin();
335            base + (hour % 3) as f32 * 2.0 // Add some noise
336        })
337        .collect();
338
339    let series = Tensor::from_slice(&time_series, vec![24])?;
340    println!("  Time series (24 hours): {:?}", series.data());
341
342    // Calculate moving average
343    let window_size = 3;
344    let moving_avg: Tensor = series
345        .iter()
346        .enumerate()
347        .map(|(i, _)| {
348            let start = i.saturating_sub(window_size / 2);
349            let end = (i + window_size / 2 + 1).min(series.size());
350            let window = series.iter_range(start, end);
351            window.fold(0.0, |acc, elem| acc + elem.value()) / (end - start) as f32
352        })
353        .map(|val| Tensor::from_slice(&[val], vec![1]).unwrap())
354        .collect();
355    println!(
356        "  Moving average (window={}): {:?}",
357        window_size,
358        moving_avg.data()
359    );
360
361    // Scenario 2: Feature engineering
362    println!("\nScenario 2: Feature Engineering");
363    let features = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0], vec![5])?;
364    println!("  Original features: {:?}", features.data());
365
366    // Create polynomial features
367    let poly_features: Tensor = features
368        .iter()
369        .flat_map(|elem| {
370            vec![
371                elem.clone(),         // x^1
372                elem.pow_scalar(2.0), // x^2
373                elem.pow_scalar(3.0), // x^3
374            ]
375        })
376        .collect();
377    println!(
378        "  Polynomial features (x, x^2, x^3): {:?}",
379        poly_features.data()
380    );
381
382    // Scenario 3: Data augmentation
383    println!("\nScenario 3: Data Augmentation");
384    let original = Tensor::from_slice(&[1.0, 2.0, 3.0], vec![3])?;
385    println!("  Original data: {:?}", original.data());
386
387    // Augment with noise and scaling
388    let augmented: Tensor = original
389        .iter()
390        .flat_map(|elem| {
391            vec![
392                elem.clone(),         // Original
393                elem.add_scalar(0.1), // Add noise
394                elem.sub_scalar(0.1), // Subtract noise
395                elem.mul_scalar(1.1), // Scale up
396                elem.mul_scalar(0.9), // Scale down
397            ]
398        })
399        .collect();
400    println!("  Augmented data: {:?}", augmented.data());
401
402    // Scenario 4: Statistical analysis
403    println!("\nScenario 4: Statistical Analysis");
404    let sample_data = Tensor::from_slice(&[1.1, 2.3, 1.8, 2.1, 1.9, 2.0, 1.7, 2.2], vec![8])?;
405    println!("  Sample data: {:?}", sample_data.data());
406
407    // Calculate various statistics
408    let mean = sample_data.mean().value();
409    let std = sample_data.std().value();
410    let min = sample_data
411        .iter()
412        .map(|e| e.value())
413        .fold(f32::INFINITY, f32::min);
414    let max = sample_data
415        .iter()
416        .map(|e| e.value())
417        .fold(f32::NEG_INFINITY, f32::max);
418
419    // Z-score normalization
420    let z_scores: Tensor = sample_data
421        .iter()
422        .map(|elem| elem.sub_scalar(mean).div_scalar(std))
423        .collect();
424
425    println!(
426        "  Statistics: mean={:.3}, std={:.3}, min={:.3}, max={:.3}",
427        mean, std, min, max
428    );
429    println!("  Z-scores: {:?}", z_scores.data());
430
431    Ok(())
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437
438    /// Test data pipeline processing
439    #[test]
440    fn test_data_pipeline() {
441        let tensor = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0], vec![4]).unwrap();
442        let normalized: Tensor = tensor.iter().map(|elem| elem.mul_scalar(2.0)).collect();
443
444        assert_eq!(normalized.data(), &[2.0, 4.0, 6.0, 8.0]);
445    }
446
447    /// Test conditional processing
448    #[test]
449    fn test_conditional_processing() {
450        let tensor = Tensor::from_slice(&[1.0, -2.0, 3.0], vec![3]).unwrap();
451        let processed: Tensor = tensor
452            .iter()
453            .map(|elem| {
454                if elem.value() > 0.0 {
455                    elem.mul_scalar(2.0)
456                } else {
457                    elem.abs()
458                }
459            })
460            .collect();
461
462        assert_eq!(processed.data(), &[2.0, 2.0, 6.0]);
463    }
464
465    /// Test batch operations
466    #[test]
467    fn test_batch_operations() {
468        let tensor = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0], vec![4]).unwrap();
469        let strided: Tensor = tensor
470            .iter()
471            .enumerate()
472            .filter(|(i, _)| i % 2 == 0)
473            .map(|(_, elem)| elem)
474            .collect();
475
476        assert_eq!(strided.data(), &[1.0, 3.0]);
477    }
478}