batch_operations_demo/
batch_operations_demo.rs

1//! Batch operations demonstration
2//!
3//! This example demonstrates the batch processing utilities and selective cache management
4//! features of the scirs2-datasets caching system.
5
6use scirs2_datasets::{BatchOperations, CacheManager};
7use std::time::Duration;
8
9#[allow(dead_code)]
10fn main() {
11    println!("=== Batch Operations Demonstration ===\n");
12
13    // Create a cache manager for demonstration
14    let cache_manager = CacheManager::new().expect("Failed to create cache manager");
15
16    println!("=== Setting up Batch Operations Manager =====");
17    let batch_ops = BatchOperations::new(cache_manager)
18        .with_parallel(false) // Use sequential for deterministic demo output
19        .with_retry_config(2, Duration::from_millis(500));
20
21    println!("Batch operations manager configured:");
22    println!("  - Parallel processing: disabled (for demo)");
23    println!("  - Max retries: 2");
24    println!("  - Retry delay: 500ms");
25
26    // Demonstrate cache setup with sample data
27    println!("\n=== Sample Data Setup ========================");
28    setup_sample_cachedata(&batch_ops);
29
30    // Demonstrate batch statistics
31    println!("\n=== Cache Statistics ==========================");
32    demonstrate_cache_statistics(&batch_ops);
33
34    // Demonstrate batch processing
35    println!("\n=== Batch Processing =========================");
36    demonstrate_batch_processing(&batch_ops);
37
38    // Demonstrate selective cleanup
39    println!("\n=== Selective Cache Cleanup ==================");
40    demonstrate_selective_cleanup(&batch_ops);
41
42    // Show final cache state
43    println!("\n=== Final Cache State =========================");
44    show_final_cache_state(&batch_ops);
45
46    // Performance considerations
47    println!("\n=== Performance Considerations ================");
48    demonstrate_performance_features();
49
50    println!("\n=== Batch Operations Demo Complete ===========");
51}
52
53#[allow(dead_code)]
54fn setup_sample_cachedata(batch_ops: &BatchOperations) {
55    println!("Creating sample cached datasets...");
56
57    // Create various types of sample data
58    let sample_datasets = [
59        ("iris_processed.csv", create_csvdata()),
60        ("experiment_001.json", create_jsondata()),
61        ("temp_file_001.tmp", create_binarydata(100)),
62        ("temp_file_002.tmp", create_binarydata(200)),
63        ("largedataset.dat", create_binarydata(1024)),
64        ("model_weights.bin", create_binarydata(512)),
65        ("results_summary.txt", createtextdata()),
66    ];
67
68    for (name, data) in sample_datasets {
69        if let Err(e) = batch_ops.write_cached(name, &data) {
70            println!("  Warning: Failed to cache {name}: {e}");
71        } else {
72            println!("  ✓ Cached {name} ({} bytes)", data.len());
73        }
74    }
75}
76
77#[allow(dead_code)]
78fn demonstrate_cache_statistics(batch_ops: &BatchOperations) {
79    match batch_ops.get_cache_statistics() {
80        Ok(result) => {
81            println!("{}", result.summary());
82            println!("Cache analysis:");
83            println!("  - Files processed: {}", result.success_count);
84            println!("  - Total cache size: {}", formatbytes(result.total_bytes));
85            println!(
86                "  - Analysis time: {:.2}ms",
87                result.elapsed_time.as_millis()
88            );
89
90            if result.failure_count > 0 {
91                println!("  - Failed files: {}", result.failure_count);
92                for (file, error) in &result.failures {
93                    println!("    • {file}: {error}");
94                }
95            }
96        }
97        Err(e) => println!("Failed to get cache statistics: {e}"),
98    }
99}
100
101#[allow(dead_code)]
102fn demonstrate_batch_processing(batch_ops: &BatchOperations) {
103    println!("Processing multiple cached files in batch...");
104
105    // Get list of cached files
106    let cached_files = batch_ops.list_cached_files().unwrap_or_default();
107
108    if cached_files.is_empty() {
109        println!("No cached files found for processing");
110        return;
111    }
112
113    println!("Found {} files to process", cached_files.len());
114
115    // Example 1: Validate file sizes
116    println!("\n1. File Size Validation:");
117    let result = batch_ops.batch_process(&cached_files, |name, data| {
118        if data.len() < 10 {
119            Err(format!("File {name} too small ({} bytes)", data.len()))
120        } else {
121            Ok(data.len())
122        }
123    });
124
125    println!("   {}", result.summary());
126    if result.failure_count > 0 {
127        for (file, error) in &result.failures {
128            println!("   ⚠ {file}: {error}");
129        }
130    }
131
132    // Example 2: Content type detection
133    println!("\n2. Content Type Detection:");
134    let result = batch_ops.batch_process(&cached_files, |name, data| {
135        let content_type = detect_content_type(name, data);
136        println!("   {name} -> {content_type}");
137        Ok::<String, String>(content_type)
138    });
139
140    println!("   {}", result.summary());
141
142    // Example 3: Data integrity check
143    println!("\n3. Data Integrity Check:");
144    let result = batch_ops.batch_process(&cached_files, |name, data| {
145        // Simple check: ensure data is not all zeros
146        let all_zeros = data.iter().all(|&b| b == 0);
147        if all_zeros && data.len() > 100 {
148            Err("Suspicious: large file with all zeros".to_string())
149        } else {
150            let checksum = data.iter().map(|&b| b as u32).sum::<u32>();
151            println!("   {name} checksum: {checksum}");
152            Ok(checksum)
153        }
154    });
155
156    println!("   {}", result.summary());
157}
158
159#[allow(dead_code)]
160fn demonstrate_selective_cleanup(batch_ops: &BatchOperations) {
161    println!("Demonstrating selective cache cleanup...");
162
163    // Show current cache state
164    let initial_stats = batch_ops.get_cache_statistics().unwrap();
165    println!(
166        "Before cleanup: {} files, {}",
167        initial_stats.success_count,
168        formatbytes(initial_stats.total_bytes)
169    );
170
171    // Example 1: Clean up temporary files
172    println!("\n1. Cleaning up temporary files (*.tmp):");
173    match batch_ops.selective_cleanup(&["*.tmp"], None) {
174        Ok(result) => {
175            println!("   {}", result.summary());
176            if result.success_count > 0 {
177                println!("   Removed {} temporary files", result.success_count);
178            }
179        }
180        Err(e) => println!("   Failed: {e}"),
181    }
182
183    // Example 2: Clean up old files (demo with 0 days to show functionality)
184    println!("\n2. Age-based cleanup (files older than 0 days - for demo):");
185    match batch_ops.selective_cleanup(&["*"], Some(0)) {
186        Ok(result) => {
187            println!("   {}", result.summary());
188            println!("   (Note: Using 0 days for demonstration - all files are 'old')");
189        }
190        Err(e) => println!("   Failed: {e}"),
191    }
192
193    // Show final cache state
194    let final_stats = batch_ops.get_cache_statistics().unwrap_or_default();
195    println!(
196        "\nAfter cleanup: {} files, {}",
197        final_stats.success_count,
198        formatbytes(final_stats.total_bytes)
199    );
200
201    let freed_space = initial_stats
202        .total_bytes
203        .saturating_sub(final_stats.total_bytes);
204    if freed_space > 0 {
205        println!("Space freed: {}", formatbytes(freed_space));
206    }
207}
208
209#[allow(dead_code)]
210fn show_final_cache_state(batch_ops: &BatchOperations) {
211    println!("Final cache contents:");
212
213    match batch_ops.list_cached_files() {
214        Ok(files) => {
215            if files.is_empty() {
216                println!("  Cache is empty");
217            } else {
218                for file in files {
219                    if let Ok(data) = batch_ops.read_cached(&file) {
220                        println!("  {file} ({} bytes)", data.len());
221                    }
222                }
223            }
224        }
225        Err(e) => println!("  Failed to list files: {e}"),
226    }
227
228    // Print detailed cache report
229    if let Err(e) = batch_ops.print_cache_report() {
230        println!("Failed to generate cache report: {e}");
231    }
232}
233
234#[allow(dead_code)]
235fn demonstrate_performance_features() {
236    println!("Performance and configuration options:");
237
238    println!("\n**Parallel vs Sequential Processing:**");
239    println!("- Parallel: Faster for I/O-bound operations, multiple files");
240    println!("- Sequential: Better for CPU-bound operations, deterministic order");
241    println!("- Configure with: batch_ops.with_parallel(true/false)");
242
243    println!("\n**Retry Configuration:**");
244    println!("- Configurable retry count and delay for robust operations");
245    println!("- Useful for network downloads or temporary file locks");
246    println!("- Configure with: batch_ops.with_retry_config(max_retries, delay)");
247
248    println!("\n**Selective Cleanup Patterns:**");
249    println!("- Glob patterns: *.tmp, *.cache, dataset_*");
250    println!("- Age-based cleanup: Remove files older than N days");
251    println!("- Pattern examples:");
252    println!("  • '*.tmp' - all temporary files");
253    println!("  • 'old_*' - files starting with 'old_'");
254    println!("  • '*_backup' - files ending with '_backup'");
255
256    println!("\n**Use Cases:**");
257    println!("- **Batch Downloads**: Download multiple datasets efficiently");
258    println!("- **Data Validation**: Verify integrity of multiple cached files");
259    println!("- **Cleanup Operations**: Remove outdated or temporary files");
260    println!("- **Data Processing**: Apply transformations to multiple datasets");
261    println!("- **Cache Maintenance**: Monitor and manage cache size and content");
262}
263
264// Helper functions for creating sample data
265
266#[allow(dead_code)]
267fn create_csvdata() -> Vec<u8> {
268    "sepal_length,sepal_width,petal_length,petal_width,species\n\
269     5.1,3.5,1.4,0.2,setosa\n\
270     4.9,3.0,1.4,0.2,setosa\n\
271     4.7,3.2,1.3,0.2,setosa\n"
272        .as_bytes()
273        .to_vec()
274}
275
276#[allow(dead_code)]
277fn create_jsondata() -> Vec<u8> {
278    r#"{"experiment_id": "001", "results": {"accuracy": 0.95, "precision": 0.92}, "timestamp": "2024-01-01T12:00:00Z"}"#
279        .as_bytes().to_vec()
280}
281
282#[allow(dead_code)]
283fn create_binarydata(size: usize) -> Vec<u8> {
284    (0..size).map(|i| (i % 256) as u8).collect()
285}
286
287#[allow(dead_code)]
288fn createtextdata() -> Vec<u8> {
289    "Experimental Results Summary\n\
290     ============================\n\
291     Total samples: 1000\n\
292     Success rate: 95.2%\n\
293     Processing time: 12.3s\n"
294        .as_bytes()
295        .to_vec()
296}
297
298#[allow(dead_code)]
299fn detect_content_type(name: &str, data: &[u8]) -> String {
300    if name.ends_with(".csv") {
301        "text/csv".to_string()
302    } else if name.ends_with(".json") {
303        "application/json".to_string()
304    } else if name.ends_with(".txt") {
305        "text/plain".to_string()
306    } else if data.iter().all(|&b| b.is_ascii()) {
307        "text/plain (detected)".to_string()
308    } else {
309        "application/octet-stream".to_string()
310    }
311}
312
313#[allow(dead_code)]
314fn formatbytes(bytes: u64) -> String {
315    let size = bytes as f64;
316    if size < 1024.0 {
317        format!("{size} B")
318    } else if size < 1024.0 * 1024.0 {
319        format!("{:.1} KB", size / 1024.0)
320    } else if size < 1024.0 * 1024.0 * 1024.0 {
321        format!("{:.1} MB", size / (1024.0 * 1024.0))
322    } else {
323        format!("{:.1} GB", size / (1024.0 * 1024.0 * 1024.0))
324    }
325}