scirs2_io/lib.rs
1#![allow(deprecated)]
2#![allow(clippy::redundant_closure)]
3#![allow(clippy::clone_on_copy)]
4#![allow(clippy::unnecessary_lazy_evaluations)]
5#![allow(clippy::option_map_or_none)]
6#![allow(clippy::redundant_pattern_matching)]
7#![allow(clippy::field_reassign_with_default)]
8#![allow(clippy::new_without_default)]
9#![allow(clippy::new_ret_no_self)]
10#![allow(clippy::unwrap_or_default)]
11#![allow(clippy::needless_range_loop)]
12#![allow(clippy::single_match)]
13#![allow(clippy::needless_borrow)]
14#![allow(clippy::manual_clamp)]
15#![allow(clippy::vec_init_then_push)]
16#![allow(clippy::empty_line_after_outer_attr)]
17#![allow(clippy::duplicate_attributes)]
18#![allow(clippy::only_used_in_recursion)]
19#![allow(clippy::manual_ok_or)]
20#![allow(clippy::repeat_once)]
21#![allow(clippy::redundant_guards)]
22#![allow(clippy::single_match_else)]
23#![allow(clippy::manual_map)]
24#![allow(clippy::iter_repeat_n)]
25#![allow(clippy::option_if_let_else)]
26#![allow(clippy::option_as_ref_cloned)]
27#![allow(clippy::manual_ok_err)]
28#![allow(clippy::manual_repeat_n)]
29#![allow(clippy::match_single_binding)]
30#![allow(clippy::unnecessary_map_or)]
31//! # SciRS2 IO - Scientific Data Input/Output
32//!
33//! **scirs2-io** provides comprehensive file I/O capabilities for scientific computing,
34//! supporting MATLAB, NetCDF, HDF5, CSV, WAV, image formats, and more, with streaming,
35//! compression, async support, and database connectivity.
36//!
37//! ## 🎯 Key Features
38//!
39//! - **SciPy Compatibility**: Similar to `scipy.io` for MATLAB, WAV, ARFF files
40//! - **Multiple Formats**: MATLAB (.mat), NetCDF, HDF5, CSV, WAV, images (PNG, JPEG, TIFF)
41//! - **Matrix Market**: Sparse matrix exchange format
42//! - **Streaming I/O**: Memory-efficient reading/writing of large datasets
43//! - **Compression**: GZIP, ZSTD, LZ4, BZIP2 for data compression
44//! - **Async I/O**: Non-blocking operations with tokio
45//! - **Database**: SQL/NoSQL connectivity (PostgreSQL, MongoDB, InfluxDB)
46//!
47//! ## 📦 Module Overview
48//!
49//! | SciRS2 Module | SciPy Equivalent | Description |
50//! |---------------|------------------|-------------|
51//! | `matlab` | `scipy.io.loadmat`, `savemat` | MATLAB .mat file I/O |
52//! | `wavfile` | `scipy.io.wavfile` | WAV audio file I/O |
53//! | `netcdf` | `scipy.io.netcdf` | NetCDF scientific data format |
54//! | `matrix_market` | `scipy.io.mmread`, `mmwrite` | Matrix Market sparse format |
55//! | `csv` | - | CSV with type conversion |
56//! | `image` | - | PNG, JPEG, BMP, TIFF image I/O |
57//!
58//! ## 🚀 Quick Start
59//!
60//! ```toml
61//! [dependencies]
62//! scirs2-io = "0.1.0-rc.2"
63//! ```
64//!
65//! ```rust,no_run
66//! use scirs2_io::csv::{read_csv, CsvReaderConfig};
67//!
68//! // Read CSV file
69//! let config = CsvReaderConfig {
70//! has_header: true,
71//! delimiter: ',',
72//! ..Default::default()
73//! };
74//! let (headers, data) = read_csv("data.csv", Some(config)).unwrap();
75//! ```
76//!
77//! ## 🔒 Version: 0.1.0-rc.2 (October 03, 2025)
78//!
79//! ## Modules
80//!
81//! - `arff`: Support for ARFF (Attribute-Relation File Format) files
82//! - `compression`: Utilities for data compression and decompression
83//! - `csv`: Support for CSV (Comma-Separated Values) files
84//! - `image`: Support for image file formats (PNG, JPEG, BMP, TIFF)
85//! - `matlab`: Support for MATLAB (.mat) files
86//! - `matrix_market`: Support for Matrix Market sparse and dense matrix files
87//! - `netcdf`: Support for NetCDF scientific data files
88//! - `serialize`: Utilities for data serialization and deserialization
89//! - `validation`: Utilities for data validation and integrity checking
90//! - `wavfile`: Support for WAV audio files
91//! - `error`: Error types for the IO module
92//! - `fortran`: Support for Fortran unformatted files
93
94#![warn(missing_docs)]
95// Allow specific Clippy warnings with justifications
96#![allow(clippy::manual_div_ceil)] // Manual div_ceil implementation for compatibility with Rust versions without div_ceil
97#![allow(clippy::should_implement_trait)] // from_str methods are used consistently across modules
98#![allow(clippy::type_complexity)] // Complex type is necessary for format validators
99
100/// Advanced Mode Coordinator - Unified Intelligence for I/O Operations
101///
102/// Provides the highest level of intelligent I/O processing by coordinating multiple advanced systems:
103/// - Neural adaptive optimization with reinforcement learning
104/// - Quantum-inspired parallel processing with superposition algorithms
105/// - GPU acceleration with multi-backend support
106/// - Advanced memory management and resource allocation
107/// - Real-time performance monitoring and self-optimization
108/// - Meta-learning for cross-domain adaptation
109/// - Emergent behavior detection and autonomous system improvement
110pub mod advanced_coordinator;
111pub mod arff;
112/// Enhanced algorithms for Advanced Mode
113///
114/// Provides advanced algorithmic enhancements for the Advanced coordinator:
115/// - Advanced pattern recognition with deep learning capabilities
116/// - Multi-scale feature extraction and analysis
117/// - Emergent pattern detection and meta-pattern recognition
118/// - Sophisticated optimization recommendation systems
119/// - Self-improving algorithmic components with adaptive learning
120pub mod enhanced_algorithms;
121
122/// Async I/O support for streaming capabilities
123///
124/// Provides asynchronous I/O interfaces for non-blocking processing of large datasets:
125/// - Async file reading and writing with tokio
126/// - Asynchronous stream processing with backpressure
127/// - Concurrent processing with configurable concurrency levels
128/// - Network I/O support for remote data access
129/// - Cancellation support for long-running operations
130/// - Real-time progress monitoring for async operations
131#[cfg(feature = "async")]
132pub mod async_io;
133/// Data compression module
134///
135/// Provides utilities for compressing and decompressing scientific data:
136/// - Lossless compression algorithms (GZIP, ZSTD, LZ4, BZIP2)
137/// - Array compression with metadata preservation
138/// - Chunked compression for large datasets
139/// - Compression level configuration
140pub mod compression;
141/// CSV (Comma-Separated Values) file format module
142///
143/// Provides functionality for reading and writing CSV files with various options:
144/// - Basic CSV reading and writing
145/// - Type conversion and automatic type detection
146/// - Missing value handling with customizable options
147/// - Memory-efficient processing of large files using chunked reading
148/// - Support for specialized data types (date, time, complex numbers)
149/// - Column-based operations with flexible configuration
150pub mod csv;
151/// Database connectivity
152///
153/// Provides interfaces for database operations:
154/// - Support for SQL databases (PostgreSQL, MySQL, SQLite)
155/// - NoSQL database support (MongoDB, Redis, Cassandra)
156/// - Time series databases (InfluxDB)
157/// - Query builder and ORM-like features
158/// - Bulk loading and export capabilities
159/// - Integration with scientific data formats
160pub mod database;
161/// Distributed I/O processing
162///
163/// Provides infrastructure for distributed processing of large datasets:
164/// - Distributed file reading with partitioning strategies
165/// - Parallel writing with merge capabilities
166/// - Distributed array operations
167/// - Load balancing and fault tolerance
168/// - Progress tracking for distributed operations
169pub mod distributed;
170pub mod error;
171/// Domain-specific file formats
172///
173/// Provides specialized support for scientific file formats:
174/// - Bioinformatics: FASTA, FASTQ, SAM/BAM, VCF
175/// - Geospatial: GeoTIFF, Shapefile, GeoJSON, KML
176/// - Astronomical: FITS, VOTable
177pub mod formats;
178/// Fortran unformatted file format module
179///
180/// Provides functionality for reading and writing Fortran unformatted files:
181/// - Sequential, direct, and stream access modes
182/// - Support for different endianness and record marker sizes
183/// - Automatic format detection
184/// - Arrays stored in column-major order (Fortran convention)
185/// - Support for all common Fortran data types
186pub mod fortran;
187/// GPU-accelerated I/O operations
188///
189/// Provides GPU-accelerated implementations of I/O operations using the scirs2-core GPU abstraction:
190/// - GPU-accelerated compression and decompression
191/// - GPU-accelerated data type conversions
192/// - GPU-accelerated matrix operations for file I/O
193/// - GPU-accelerated checksum computation
194/// - Support for multiple GPU backends (CUDA, Metal, OpenCL)
195/// - Automatic fallback to CPU when GPU is not available
196#[cfg(feature = "gpu")]
197/// GPU-accelerated I/O operations
198///
199/// Provides comprehensive GPU acceleration for I/O operations including:
200/// - Multi-backend GPU support (CUDA, Metal, OpenCL)
201/// - GPU-accelerated compression and decompression
202/// - Advanced GPU memory management with pooling
203/// - Performance monitoring and optimization
204/// - Intelligent backend selection and workload optimization
205pub mod gpu;
206/// Harwell-Boeing sparse matrix format module
207///
208/// Provides functionality for reading and writing Harwell-Boeing sparse matrix files:
209/// - Support for real and complex matrices
210/// - Different matrix symmetry types (general, symmetric, hermitian, skew-symmetric)
211/// - Pattern matrices (structure only, no values)
212/// - Conversion to/from column-compressed sparse (CCS) format
213/// - Integration with ndarray for efficient matrix operations
214pub mod harwell_boeing;
215/// HDF5 file format module
216///
217/// Provides functionality for reading and writing HDF5 (Hierarchical Data Format) files:
218/// - Reading and writing HDF5 groups and datasets
219/// - Support for attributes on groups and datasets
220/// - Multiple datatypes (integers, floats, strings, compound types)
221/// - Chunking and compression options
222/// - Integration with ndarray for efficient array operations
223pub mod hdf5;
224/// IDL (Interactive Data Language) save file format module
225///
226/// Provides functionality for reading and writing IDL save files (.sav):
227/// - Support for all standard IDL data types
228/// - Arrays, strings, structures, and complex numbers
229/// - Automatic endianness detection and handling
230/// - Compatible with IDL 8.0 format
231/// - Commonly used in astronomy and remote sensing
232pub mod idl;
233/// Image file format module
234///
235/// Provides functionality for reading and writing common image formats:
236/// - Reading and writing PNG, JPEG, BMP, and TIFF images
237/// - Metadata extraction and manipulation
238/// - Conversion between different image formats
239/// - Basic image processing operations
240pub mod image;
241pub mod matlab;
242/// Matrix Market file format module
243///
244/// Provides functionality for reading and writing Matrix Market files:
245/// - Support for sparse matrix coordinate format (COO)
246/// - Support for dense array format
247/// - Real, complex, integer, and pattern data types
248/// - Different matrix symmetry types (general, symmetric, hermitian, skew-symmetric)
249/// - Integration with ndarray for efficient matrix operations
250pub mod matrix_market;
251/// Advanced metadata management
252///
253/// Provides comprehensive metadata handling across different file formats:
254/// - Unified metadata interface for all formats
255/// - Metadata validation with schemas
256/// - Processing history tracking
257/// - Format conversion between JSON, YAML, TOML
258/// - Format-specific extensions
259/// - Standard metadata keys for scientific data
260pub mod metadata;
261/// Machine learning framework compatibility
262///
263/// Provides conversion utilities and interfaces for ML frameworks:
264/// - Support for PyTorch, TensorFlow, ONNX, SafeTensors formats
265/// - Model and tensor serialization/deserialization
266/// - Data type conversions between frameworks
267/// - Dataset utilities for ML pipelines
268/// - Seamless integration with ndarray
269pub mod ml_framework;
270/// Data pipeline APIs
271///
272/// Provides memory-mapped file operations for efficient handling of large arrays:
273/// - Memory-mapped arrays for minimal memory usage
274/// - Read-only and read-write access modes
275/// - Support for multi-dimensional arrays
276/// - Cross-platform compatibility (Unix and Windows)
277/// - Type-safe operations with generic numeric types
278///
279/// # Examples
280///
281/// ```rust,no_run
282/// use scirs2_io::mmap::{MmapArray, create_mmap_array};
283/// use scirs2_core::ndarray::Array2;
284///
285/// // Create a large array file
286/// let data = Array2::from_shape_fn((1000, 1000), |(i, j)| (i + j) as f64);
287/// let file_path = "large_array.bin";
288///
289/// // Write array to file
290/// create_mmap_array(file_path, &data)?;
291///
292/// // Memory-map the array for reading
293/// let mmap_array: MmapArray<f64> = MmapArray::open(file_path)?;
294/// let shape = mmap_array.shape()?;
295/// let array_view = mmap_array.as_array_view(&shape)?;
296///
297/// // Access data without loading entire file into memory
298/// let slice = mmap_array.as_slice()?;
299/// let value = slice[500 * 1000 + 500]; // Access element at (500, 500)
300/// println!("Value at (500, 500): {}", value);
301/// # Ok::<(), scirs2_io::error::IoError>(())
302/// ```
303pub mod mmap;
304/// NetCDF file format module
305///
306/// Provides functionality for reading and writing NetCDF files:
307/// - Reading and writing NetCDF3 files
308/// - Support for dimensions, variables, and attributes
309/// - Conversion between NetCDF and ndarray data structures
310/// - Memory-efficient access to large datasets
311pub mod netcdf;
312/// Network I/O and cloud storage integration
313///
314/// Provides functionality for reading and writing files over network protocols
315/// and integrating with cloud storage services:
316/// - HTTP/HTTPS file download and upload with progress tracking
317/// - Cloud storage integration (AWS S3, Google Cloud Storage, Azure Blob Storage)
318/// - Streaming I/O for efficient handling of large files over network
319/// - Authentication and secure credential management
320/// - Retry logic and error recovery for network operations
321/// - Local caching for offline access and performance optimization
322///
323/// # Examples
324///
325/// ```rust,no_run
326/// use scirs2_io::network::NetworkClient;
327///
328/// // Create a network client for downloading files
329/// let client = NetworkClient::new();
330/// println!("Network client created for file operations");
331/// ```
332pub mod network;
333/// Neural-adaptive I/O optimization with advanced-level intelligence
334///
335/// Provides AI-driven adaptive optimization for I/O operations:
336/// - Machine learning-based performance optimization
337/// - Dynamic parameter adaptation based on system metrics
338/// - Neural network-driven decision making for resource allocation
339/// - Real-time performance feedback and learning
340/// - Advanced-high performance processing with adaptive algorithms
341/// - SIMD-accelerated neural inference for low-latency decisions
342pub mod neural_adaptive_io;
343/// Out-of-core processing for terabyte-scale datasets
344///
345/// Provides infrastructure for processing datasets too large for memory:
346/// - Memory-mapped arrays with virtual memory management
347/// - Chunked processing with configurable chunk sizes
348/// - Disk-based algorithms for sorting and aggregation
349/// - Virtual arrays combining multiple data sources
350/// - Sliding window iterators for streaming operations
351pub mod out_of_core;
352/// Apache Parquet columnar file format module
353///
354/// Provides functionality for reading and writing Apache Parquet files:
355/// - Efficient columnar storage for large datasets
356/// - Multiple compression codecs (Snappy, Gzip, LZ4, ZSTD, Brotli)
357/// - Schema inference and validation
358/// - Column projection for selective reading
359/// - Memory-efficient chunked reading for large files
360/// - Integration with Apache Arrow for high-performance I/O
361/// - Python interoperability (Pandas, Polars, PyArrow compatible)
362///
363/// # Examples
364///
365/// ```rust,no_run
366/// use scirs2_io::parquet::{read_parquet, write_parquet, ParquetWriteOptions};
367/// use scirs2_core::ndarray::Array1;
368///
369/// // Write data to Parquet
370/// let data = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0]);
371/// write_parquet("data.parquet", &data, Default::default())?;
372///
373/// // Read data from Parquet
374/// let loaded = read_parquet("data.parquet")?;
375/// println!("Loaded {} rows", loaded.num_rows());
376/// # Ok::<(), scirs2_io::error::IoError>(())
377/// ```
378#[cfg(feature = "parquet")]
379pub mod parquet;
380/// Data pipeline APIs
381///
382/// Provides a flexible framework for building data processing pipelines:
383/// - Composable pipeline stages for reading, transforming, and writing data
384/// - Multiple execution strategies (sequential, parallel, streaming, async)
385/// - Built-in transformations (normalization, encoding, aggregation)
386/// - Error handling and recovery mechanisms
387/// - Progress tracking and monitoring
388/// - Caching and checkpointing for long-running pipelines
389pub mod pipeline;
390/// Quantum-inspired I/O processing algorithms with advanced capabilities
391///
392/// Provides quantum-inspired algorithms for advanced-high performance I/O:
393/// - Quantum superposition for parallel processing paths
394/// - Quantum entanglement for correlated data operations
395/// - Quantum annealing for parameter optimization
396/// - Quantum interference patterns for data compression
397/// - Quantum tunneling for barrier-free processing
398/// - Quantum measurement for adaptive decision making
399pub mod quantum_inspired_io;
400/// Real-time data streaming protocols
401///
402/// Provides infrastructure for real-time data streaming and processing:
403/// - WebSocket and Server-Sent Events support
404/// - gRPC and MQTT streaming protocols
405/// - Backpressure handling and flow control
406/// - Stream transformations and filtering
407/// - Multi-stream synchronization
408/// - Time series buffering and aggregation
409#[cfg(feature = "async")]
410pub mod realtime;
411/// Data serialization utilities
412///
413/// Provides functionality for serializing and deserializing scientific data:
414/// - Binary, JSON, and MessagePack serialization formats
415/// - Array serialization with metadata
416/// - Structured data serialization
417/// - Sparse matrix serialization
418pub mod serialize;
419/// SIMD-accelerated I/O operations
420///
421/// Provides SIMD-optimized implementations of common I/O operations:
422/// - Data type conversions with SIMD
423/// - Audio normalization and processing
424/// - CSV parsing acceleration
425/// - Compression utilities with SIMD
426/// - Checksum calculations
427pub mod simd_io;
428/// Comprehensive sparse matrix format support
429///
430/// Provides unified support for common sparse matrix formats:
431/// - COO (Coordinate), CSR (Compressed Sparse Row), and CSC (Compressed Sparse Column) formats
432/// - Efficient format conversion algorithms
433/// - Matrix operations (addition, multiplication, transpose)
434/// - I/O support with Matrix Market integration
435/// - Performance-optimized algorithms for large sparse matrices
436/// - Memory-efficient sparse data handling
437///
438/// # Examples
439///
440/// ```rust,no_run
441/// use scirs2_io::sparse::SparseMatrix;
442/// use scirs2_core::ndarray::Array2;
443///
444/// // Create a sparse matrix from a dense array
445/// let dense = Array2::from_shape_vec((3, 3), vec![
446/// 1.0_f64, 0.0_f64, 2.0_f64,
447/// 0.0_f64, 3.0_f64, 0.0_f64,
448/// 4.0_f64, 0.0_f64, 5.0_f64
449/// ]).unwrap();
450///
451/// let mut sparse = SparseMatrix::from_dense_2d(&dense, 0.0_f64)?;
452/// println!("Sparse matrix: {} non-zeros", sparse.nnz());
453///
454/// // Convert to different formats
455/// let _csr = sparse.to_csr()?;
456/// let _csc = sparse.to_csc()?;
457///
458/// // Save to file
459/// sparse.save_matrix_market("matrix.mtx")?;
460/// # Ok::<(), scirs2_io::error::IoError>(())
461/// ```
462pub mod sparse;
463/// Streaming and iterator interfaces for large data handling
464///
465/// Provides memory-efficient streaming interfaces for processing large datasets:
466/// - Chunked reading for processing files in configurable chunks
467/// - Iterator-based APIs for seamless integration with Rust's iterator ecosystem
468/// - Streaming CSV processing with header support
469/// - Memory-efficient processing without loading entire files
470/// - Performance monitoring and statistics tracking
471///
472/// # Examples
473///
474/// ```rust,no_run
475/// use scirs2_io::streaming::{StreamingConfig, process_file_chunked};
476///
477/// // Process a large file in chunks
478/// let config = StreamingConfig::default().chunk_size(64 * 1024);
479///
480/// let (result, stats) = process_file_chunked("large_file.dat", config, |chunk_data, chunk_id| {
481/// println!("Processing chunk {}: {} bytes", chunk_id, chunk_data.len());
482/// Ok(())
483/// })?;
484/// # Ok::<(), scirs2_io::error::IoError>(())
485/// ```
486pub mod streaming;
487/// Thread pool for parallel I/O operations
488///
489/// Provides a high-performance thread pool optimized for I/O operations:
490/// - Separate thread pools for I/O-bound and CPU-bound tasks
491/// - Work stealing for load balancing
492/// - Performance monitoring and statistics
493/// - Configurable thread counts and queue sizes
494/// - Global thread pool for convenience
495pub mod thread_pool;
496/// Data validation and integrity checking module
497///
498/// Provides functionality for validating data integrity through checksums,
499/// format validation, and other verification methods:
500/// - File integrity validation with multiple checksum algorithms (CRC32, SHA256, BLAKE3)
501/// - Format-specific validation for scientific data formats
502/// - Directory manifests for data validation
503/// - Integrity metadata for tracking data provenance
504pub mod validation;
505/// Visualization tool integration
506///
507/// Provides interfaces for integrating with visualization libraries:
508/// - Export to multiple visualization formats (Plotly, Matplotlib, Gnuplot, Vega-Lite)
509/// - Fluent API for building plots
510/// - Support for various plot types (line, scatter, histogram, heatmap)
511/// - Quick plotting functions for common use cases
512/// - Configurable styling and theming
513pub mod visualization;
514pub mod wavfile;
515/// Workflow automation tools
516///
517/// Provides framework for building automated data processing workflows:
518/// - Task definition and dependency management
519/// - Workflow scheduling and execution
520/// - Resource management and allocation
521/// - Retry policies and error handling
522/// - Progress monitoring and notifications
523/// - Common workflow templates (ETL, batch processing)
524pub mod workflow;
525/// Zero-copy I/O optimizations
526///
527/// Provides zero-copy implementations for various I/O operations:
528/// - Memory-mapped file access
529/// - Zero-copy array views
530/// - CSV parsing without allocation
531/// - Binary data reading without copying
532/// - Minimized memory allocations for large datasets
533pub mod zero_copy;
534
535// Re-export commonly used functionality
536pub use advanced_coordinator::{
537 AdaptiveImprovements, AdvancedCoordinator, AdvancedStatistics, IntelligenceLevel,
538 PerformanceIntelligenceStats, ProcessingResult, QualityMetrics, StrategyType,
539};
540pub use enhanced_algorithms::{
541 AdvancedPatternAnalysis, AdvancedPatternRecognizer, DataCharacteristics, EmergentPattern,
542 MetaPattern, OptimizationRecommendation, SynergyType,
543};