sklears_utils/
file_io.rs

1//! File I/O utilities for efficient data handling in machine learning workflows
2//!
3//! This module provides utilities for efficient file reading/writing, compression,
4//! format conversion, streaming I/O operations, and data serialization.
5
6use crate::{UtilsError, UtilsResult};
7use scirs2_core::ndarray::{Array1, Array2};
8use std::collections::HashMap;
9use std::fs::{File, OpenOptions};
10use std::io::{BufRead, BufReader, BufWriter, Read, Write};
11use std::path::Path;
12
13// ===== EFFICIENT FILE OPERATIONS =====
14
15/// Efficient file reader with buffering and memory management
16pub struct EfficientFileReader {
17    reader: BufReader<File>,
18    #[allow(dead_code)]
19    buffer_size: usize,
20}
21
22impl EfficientFileReader {
23    /// Create a new efficient file reader
24    pub fn new<P: AsRef<Path>>(path: P, buffer_size: Option<usize>) -> UtilsResult<Self> {
25        let file = File::open(path)
26            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to open file: {e}")))?;
27
28        let buffer_size = buffer_size.unwrap_or(8192);
29        let reader = BufReader::with_capacity(buffer_size, file);
30
31        Ok(Self {
32            reader,
33            buffer_size,
34        })
35    }
36
37    /// Read lines efficiently with iterator
38    pub fn read_lines(&mut self) -> impl Iterator<Item = UtilsResult<String>> + '_ {
39        std::io::BufRead::lines(&mut self.reader).map(|line| {
40            line.map_err(|e| UtilsError::InvalidParameter(format!("Failed to read line: {e}")))
41        })
42    }
43
44    /// Read fixed-size chunks
45    pub fn read_chunk(&mut self, size: usize) -> UtilsResult<Vec<u8>> {
46        let mut buffer = vec![0u8; size];
47        let bytes_read = self
48            .reader
49            .read(&mut buffer)
50            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to read chunk: {e}")))?;
51
52        buffer.truncate(bytes_read);
53        Ok(buffer)
54    }
55
56    /// Read all content efficiently
57    pub fn read_all(&mut self) -> UtilsResult<Vec<u8>> {
58        let mut content = Vec::new();
59        self.reader
60            .read_to_end(&mut content)
61            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to read file: {e}")))?;
62        Ok(content)
63    }
64
65    /// Read numerical data as arrays
66    pub fn read_array1(&mut self, delimiter: &str) -> UtilsResult<Array1<f64>> {
67        let mut line = String::new();
68        self.reader
69            .read_line(&mut line)
70            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to read line: {e}")))?;
71
72        let values: Result<Vec<f64>, _> = line
73            .trim()
74            .split(delimiter)
75            .filter(|s| !s.is_empty())
76            .map(|s| s.parse::<f64>())
77            .collect();
78
79        let values = values
80            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to parse numbers: {e}")))?;
81
82        Ok(Array1::from_vec(values))
83    }
84
85    /// Read 2D numerical data
86    pub fn read_array2(&mut self, delimiter: &str) -> UtilsResult<Array2<f64>> {
87        let mut rows = Vec::new();
88        let mut line = String::new();
89
90        while self.reader.read_line(&mut line).unwrap_or(0) > 0 {
91            if line.trim().is_empty() {
92                line.clear();
93                continue;
94            }
95
96            let values: Result<Vec<f64>, _> = line
97                .trim()
98                .split(delimiter)
99                .filter(|s| !s.is_empty())
100                .map(|s| s.parse::<f64>())
101                .collect();
102
103            let values = values.map_err(|e| {
104                UtilsError::InvalidParameter(format!("Failed to parse numbers: {e}"))
105            })?;
106
107            rows.push(values);
108            line.clear();
109        }
110
111        if rows.is_empty() {
112            return Err(UtilsError::EmptyInput);
113        }
114
115        let ncols = rows[0].len();
116        let nrows = rows.len();
117
118        // Verify all rows have the same length
119        for row in &rows {
120            if row.len() != ncols {
121                return Err(UtilsError::ShapeMismatch {
122                    expected: vec![ncols],
123                    actual: vec![row.len()],
124                });
125            }
126        }
127
128        let flat: Vec<f64> = rows.into_iter().flatten().collect();
129        Array2::from_shape_vec((nrows, ncols), flat)
130            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to create array: {e}")))
131    }
132}
133
134/// Efficient file writer with buffering
135pub struct EfficientFileWriter {
136    writer: BufWriter<File>,
137}
138
139impl EfficientFileWriter {
140    /// Create a new efficient file writer
141    pub fn new<P: AsRef<Path>>(path: P, buffer_size: Option<usize>) -> UtilsResult<Self> {
142        let file = OpenOptions::new()
143            .write(true)
144            .create(true)
145            .truncate(true)
146            .open(path)
147            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to create file: {e}")))?;
148
149        let buffer_size = buffer_size.unwrap_or(8192);
150        let writer = BufWriter::with_capacity(buffer_size, file);
151
152        Ok(Self { writer })
153    }
154
155    /// Append to existing file
156    pub fn append<P: AsRef<Path>>(path: P, buffer_size: Option<usize>) -> UtilsResult<Self> {
157        let file = OpenOptions::new()
158            .create(true)
159            .append(true)
160            .open(path)
161            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to open file: {e}")))?;
162
163        let buffer_size = buffer_size.unwrap_or(8192);
164        let writer = BufWriter::with_capacity(buffer_size, file);
165
166        Ok(Self { writer })
167    }
168
169    /// Write data with automatic flushing
170    pub fn write_data(&mut self, data: &[u8]) -> UtilsResult<()> {
171        self.writer
172            .write_all(data)
173            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to write data: {e}")))?;
174        Ok(())
175    }
176
177    /// Write string lines
178    pub fn write_lines<I>(&mut self, lines: I) -> UtilsResult<()>
179    where
180        I: IntoIterator<Item = String>,
181    {
182        for line in lines {
183            writeln!(self.writer, "{line}")
184                .map_err(|e| UtilsError::InvalidParameter(format!("Failed to write line: {e}")))?;
185        }
186        Ok(())
187    }
188
189    /// Write array data
190    pub fn write_array1(&mut self, array: &Array1<f64>, delimiter: &str) -> UtilsResult<()> {
191        let line = array
192            .iter()
193            .map(|x| x.to_string())
194            .collect::<Vec<_>>()
195            .join(delimiter);
196
197        writeln!(self.writer, "{line}")
198            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to write array: {e}")))?;
199        Ok(())
200    }
201
202    /// Write 2D array data
203    pub fn write_array2(&mut self, array: &Array2<f64>, delimiter: &str) -> UtilsResult<()> {
204        for row in array.outer_iter() {
205            let line = row
206                .iter()
207                .map(|x| x.to_string())
208                .collect::<Vec<_>>()
209                .join(delimiter);
210
211            writeln!(self.writer, "{line}")
212                .map_err(|e| UtilsError::InvalidParameter(format!("Failed to write row: {e}")))?;
213        }
214        Ok(())
215    }
216
217    /// Flush the buffer
218    pub fn flush(&mut self) -> UtilsResult<()> {
219        self.writer
220            .flush()
221            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to flush: {e}")))?;
222        Ok(())
223    }
224}
225
226// ===== COMPRESSION UTILITIES =====
227
228/// Simple compression utilities using built-in algorithms
229pub struct CompressionUtils;
230
231impl CompressionUtils {
232    /// Compress data using flate2 (gzip-compatible)
233    #[cfg(feature = "compression")]
234    pub fn compress_gzip(data: &[u8]) -> UtilsResult<Vec<u8>> {
235        use flate2::{write::GzEncoder, Compression};
236
237        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
238        encoder
239            .write_all(data)
240            .map_err(|e| UtilsError::InvalidParameter(format!("Compression failed: {e}")))?;
241
242        encoder
243            .finish()
244            .map_err(|e| UtilsError::InvalidParameter(format!("Compression failed: {e}")))
245    }
246
247    /// Decompress gzip data
248    #[cfg(feature = "compression")]
249    pub fn decompress_gzip(data: &[u8]) -> UtilsResult<Vec<u8>> {
250        use flate2::read::GzDecoder;
251
252        let mut decoder = GzDecoder::new(data);
253        let mut decompressed = Vec::new();
254        decoder
255            .read_to_end(&mut decompressed)
256            .map_err(|e| UtilsError::InvalidParameter(format!("Decompression failed: {e}")))?;
257
258        Ok(decompressed)
259    }
260
261    /// Simple run-length encoding for sparse data
262    pub fn run_length_encode(data: &[u8]) -> Vec<(u8, usize)> {
263        if data.is_empty() {
264            return Vec::new();
265        }
266
267        let mut result = Vec::new();
268        let mut current_value = data[0];
269        let mut count = 1;
270
271        for &byte in &data[1..] {
272            if byte == current_value {
273                count += 1;
274            } else {
275                result.push((current_value, count));
276                current_value = byte;
277                count = 1;
278            }
279        }
280        result.push((current_value, count));
281        result
282    }
283
284    /// Decode run-length encoded data
285    pub fn run_length_decode(encoded: &[(u8, usize)]) -> Vec<u8> {
286        let mut result = Vec::new();
287        for &(value, count) in encoded {
288            result.extend(std::iter::repeat(value).take(count));
289        }
290        result
291    }
292}
293
294// ===== STREAMING I/O =====
295
296/// Streaming data processor for large files
297pub struct StreamProcessor<R: Read> {
298    reader: R,
299    chunk_size: usize,
300}
301
302impl<R: Read> StreamProcessor<R> {
303    /// Create a new stream processor
304    pub fn new(reader: R, chunk_size: usize) -> Self {
305        Self { reader, chunk_size }
306    }
307
308    /// Process data in chunks with a callback function
309    pub fn process_chunks<F>(&mut self, mut processor: F) -> UtilsResult<()>
310    where
311        F: FnMut(&[u8]) -> UtilsResult<()>,
312    {
313        let mut buffer = vec![0u8; self.chunk_size];
314
315        loop {
316            let bytes_read = self
317                .reader
318                .read(&mut buffer)
319                .map_err(|e| UtilsError::InvalidParameter(format!("Failed to read chunk: {e}")))?;
320
321            if bytes_read == 0 {
322                break;
323            }
324
325            processor(&buffer[..bytes_read])?;
326        }
327
328        Ok(())
329    }
330
331    /// Process lines from a text stream
332    pub fn process_lines<F>(&mut self, mut processor: F) -> UtilsResult<()>
333    where
334        F: FnMut(&str) -> UtilsResult<()>,
335        R: BufRead,
336    {
337        let mut line = String::new();
338
339        loop {
340            line.clear();
341            let bytes_read = self
342                .reader
343                .read_line(&mut line)
344                .map_err(|e| UtilsError::InvalidParameter(format!("Failed to read line: {e}")))?;
345
346            if bytes_read == 0 {
347                break;
348            }
349
350            processor(line.trim_end())?;
351        }
352
353        Ok(())
354    }
355}
356
357// ===== FORMAT CONVERSION =====
358
359/// Format conversion utilities
360pub struct FormatConverter;
361
362impl FormatConverter {
363    /// Convert CSV to structured data
364    pub fn csv_to_arrays<P: AsRef<Path>>(
365        path: P,
366        delimiter: char,
367        has_header: bool,
368    ) -> UtilsResult<(Option<Vec<String>>, Array2<f64>)> {
369        let mut reader = EfficientFileReader::new(path, None)?;
370        let mut lines = reader.read_lines();
371
372        let header = if has_header {
373            if let Some(line_result) = lines.next() {
374                let line = line_result?;
375                Some(
376                    line.split(delimiter)
377                        .map(|s| s.trim().to_string())
378                        .collect(),
379                )
380            } else {
381                return Err(UtilsError::EmptyInput);
382            }
383        } else {
384            None
385        };
386
387        let mut rows = Vec::new();
388        for line_result in lines {
389            let line = line_result?;
390            if line.trim().is_empty() {
391                continue;
392            }
393
394            let values: Result<Vec<f64>, _> = line
395                .split(delimiter)
396                .map(|s| s.trim().parse::<f64>())
397                .collect();
398
399            let values = values.map_err(|e| {
400                UtilsError::InvalidParameter(format!("Failed to parse CSV values: {e}"))
401            })?;
402
403            rows.push(values);
404        }
405
406        if rows.is_empty() {
407            return Err(UtilsError::EmptyInput);
408        }
409
410        let ncols = rows[0].len();
411        let nrows = rows.len();
412
413        for row in &rows {
414            if row.len() != ncols {
415                return Err(UtilsError::ShapeMismatch {
416                    expected: vec![ncols],
417                    actual: vec![row.len()],
418                });
419            }
420        }
421
422        let flat: Vec<f64> = rows.into_iter().flatten().collect();
423        let array = Array2::from_shape_vec((nrows, ncols), flat)
424            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to create array: {e}")))?;
425
426        Ok((header, array))
427    }
428
429    /// Convert arrays to CSV format
430    pub fn arrays_to_csv<P: AsRef<Path>>(
431        path: P,
432        data: &Array2<f64>,
433        header: Option<&[String]>,
434        delimiter: char,
435    ) -> UtilsResult<()> {
436        let mut writer = EfficientFileWriter::new(path, None)?;
437
438        if let Some(header) = header {
439            let header_line = header.join(&delimiter.to_string());
440            writeln!(writer.writer, "{header_line}").map_err(|e| {
441                UtilsError::InvalidParameter(format!("Failed to write header: {e}"))
442            })?;
443        }
444
445        for row in data.outer_iter() {
446            let line = row
447                .iter()
448                .map(|x| x.to_string())
449                .collect::<Vec<_>>()
450                .join(&delimiter.to_string());
451
452            writeln!(writer.writer, "{line}")
453                .map_err(|e| UtilsError::InvalidParameter(format!("Failed to write row: {e}")))?;
454        }
455
456        writer.flush()?;
457        Ok(())
458    }
459
460    /// Convert JSON-like key-value data
461    pub fn json_to_map(json_str: &str) -> UtilsResult<HashMap<String, serde_json::Value>> {
462        serde_json::from_str(json_str)
463            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to parse JSON: {e}")))
464    }
465
466    /// Convert map to JSON string
467    pub fn map_to_json(map: &HashMap<String, serde_json::Value>) -> UtilsResult<String> {
468        serde_json::to_string_pretty(map)
469            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to serialize JSON: {e}")))
470    }
471
472    /// Convert YAML string to map
473    #[cfg(feature = "yaml")]
474    pub fn yaml_to_map(yaml_str: &str) -> UtilsResult<HashMap<String, serde_json::Value>> {
475        let yaml_value: serde_yaml::Value = serde_yaml::from_str(yaml_str)
476            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to parse YAML: {e}")))?;
477
478        // Convert YAML value to JSON value for consistency
479        let json_str = serde_json::to_string(&yaml_value).map_err(|e| {
480            UtilsError::InvalidParameter(format!("Failed to convert YAML to JSON: {e}"))
481        })?;
482
483        Self::json_to_map(&json_str)
484    }
485
486    /// Convert map to YAML string
487    #[cfg(feature = "yaml")]
488    pub fn map_to_yaml(map: &HashMap<String, serde_json::Value>) -> UtilsResult<String> {
489        serde_yaml::to_string(map)
490            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to serialize YAML: {e}")))
491    }
492
493    /// Convert TOML string to map
494    #[cfg(feature = "toml_support")]
495    pub fn toml_to_map(toml_str: &str) -> UtilsResult<HashMap<String, serde_json::Value>> {
496        let toml_value: toml::Value = toml::from_str(toml_str)
497            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to parse TOML: {e}")))?;
498
499        // Convert TOML value to JSON value for consistency
500        let json_str = serde_json::to_string(&toml_value).map_err(|e| {
501            UtilsError::InvalidParameter(format!("Failed to convert TOML to JSON: {e}"))
502        })?;
503
504        Self::json_to_map(&json_str)
505    }
506
507    /// Convert map to TOML string
508    #[cfg(feature = "toml_support")]
509    pub fn map_to_toml(map: &HashMap<String, serde_json::Value>) -> UtilsResult<String> {
510        // Convert JSON values to TOML-compatible values
511        let toml_value = serde_json::from_str::<toml::Value>(&serde_json::to_string(map)?)
512            .map_err(|e| {
513                UtilsError::InvalidParameter(format!("Failed to convert to TOML value: {e}"))
514            })?;
515
516        toml::to_string_pretty(&toml_value)
517            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to serialize TOML: {e}")))
518    }
519
520    /// Convert XML string to simplified map structure
521    #[cfg(feature = "xml")]
522    pub fn xml_to_simple_map(xml_str: &str) -> UtilsResult<HashMap<String, String>> {
523        use quick_xml::escape::unescape;
524        use quick_xml::events::Event;
525        use quick_xml::Reader;
526
527        let mut reader = Reader::from_str(xml_str);
528        reader.config_mut().trim_text(true);
529
530        let mut result = HashMap::new();
531        let mut current_element = String::new();
532        let mut buf = Vec::new();
533
534        loop {
535            match reader.read_event_into(&mut buf) {
536                Ok(Event::Start(ref e)) => {
537                    current_element = String::from_utf8_lossy(e.name().as_ref()).to_string();
538                }
539                Ok(Event::Text(e)) => {
540                    if !current_element.is_empty() {
541                        let raw_text = e.decode().map_err(|e| {
542                            UtilsError::InvalidParameter(format!(
543                                "Failed to decode XML text: {}",
544                                e
545                            ))
546                        })?;
547                        let text = unescape(&raw_text).map_err(|e| {
548                            UtilsError::InvalidParameter(format!(
549                                "Failed to unescape XML text: {}",
550                                e
551                            ))
552                        })?;
553                        result.insert(current_element.clone(), text.into_owned());
554                    }
555                }
556                Ok(Event::End(_)) => {
557                    current_element.clear();
558                }
559                Ok(Event::Eof) => break,
560                Err(e) => {
561                    return Err(UtilsError::InvalidParameter(format!(
562                        "Failed to parse XML: {}",
563                        e
564                    )));
565                }
566                _ => {}
567            }
568            buf.clear();
569        }
570
571        Ok(result)
572    }
573
574    /// Convert map to simple XML structure
575    #[cfg(feature = "xml")]
576    pub fn simple_map_to_xml(
577        map: &HashMap<String, String>,
578        root_name: &str,
579    ) -> UtilsResult<String> {
580        use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
581        use quick_xml::Writer;
582        use std::io::Cursor;
583
584        let mut writer = Writer::new(Cursor::new(Vec::new()));
585
586        // Write XML declaration
587        writer
588            .write_event(Event::Decl(quick_xml::events::BytesDecl::new(
589                "1.0",
590                Some("UTF-8"),
591                None,
592            )))
593            .map_err(|e| {
594                UtilsError::InvalidParameter(format!("Failed to write XML declaration: {e}"))
595            })?;
596
597        // Write root element start
598        writer
599            .write_event(Event::Start(BytesStart::new(root_name)))
600            .map_err(|e| {
601                UtilsError::InvalidParameter(format!("Failed to write root element: {e}"))
602            })?;
603
604        // Write map entries
605        for (key, value) in map {
606            writer
607                .write_event(Event::Start(BytesStart::new(key)))
608                .map_err(|e| {
609                    UtilsError::InvalidParameter(format!("Failed to write element start: {e}"))
610                })?;
611
612            writer
613                .write_event(Event::Text(BytesText::new(value)))
614                .map_err(|e| UtilsError::InvalidParameter(format!("Failed to write text: {e}")))?;
615
616            writer
617                .write_event(Event::End(BytesEnd::new(key)))
618                .map_err(|e| {
619                    UtilsError::InvalidParameter(format!("Failed to write element end: {e}"))
620                })?;
621        }
622
623        // Write root element end
624        writer
625            .write_event(Event::End(BytesEnd::new(root_name)))
626            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to write root end: {e}")))?;
627
628        let result = writer.into_inner().into_inner();
629        String::from_utf8(result).map_err(|e| {
630            UtilsError::InvalidParameter(format!("Failed to convert XML to string: {e}"))
631        })
632    }
633
634    /// Enhanced JSON utilities for ML data structures
635    pub fn json_to_arrays(json_str: &str) -> UtilsResult<HashMap<String, Array2<f64>>> {
636        let data: HashMap<String, Vec<Vec<f64>>> = serde_json::from_str(json_str).map_err(|e| {
637            UtilsError::InvalidParameter(format!("Failed to parse JSON arrays: {e}"))
638        })?;
639
640        let mut result = HashMap::new();
641        for (key, matrix) in data {
642            if matrix.is_empty() {
643                continue;
644            }
645
646            let nrows = matrix.len();
647            let ncols = matrix[0].len();
648
649            // Verify all rows have the same length
650            for row in matrix.iter() {
651                if row.len() != ncols {
652                    return Err(UtilsError::ShapeMismatch {
653                        expected: vec![ncols],
654                        actual: vec![row.len()],
655                    });
656                }
657            }
658
659            let flat: Vec<f64> = matrix.into_iter().flatten().collect();
660            let array = Array2::from_shape_vec((nrows, ncols), flat).map_err(|e| {
661                UtilsError::InvalidParameter(format!("Failed to create array: {e}"))
662            })?;
663
664            result.insert(key, array);
665        }
666
667        Ok(result)
668    }
669
670    /// Convert arrays to JSON format for ML data
671    pub fn arrays_to_json(arrays: &HashMap<String, &Array2<f64>>) -> UtilsResult<String> {
672        let mut data = HashMap::new();
673
674        for (key, array) in arrays {
675            let matrix: Vec<Vec<f64>> = array.outer_iter().map(|row| row.to_vec()).collect();
676            data.insert(key.clone(), matrix);
677        }
678
679        serde_json::to_string_pretty(&data).map_err(|e| {
680            UtilsError::InvalidParameter(format!("Failed to serialize arrays to JSON: {e}"))
681        })
682    }
683}
684
685// ===== DATA SERIALIZATION =====
686
687/// Serialization utilities for ML data structures
688pub struct SerializationUtils;
689
690impl SerializationUtils {
691    /// Serialize array to binary format
692    pub fn serialize_array2(array: &Array2<f64>) -> UtilsResult<Vec<u8>> {
693        let shape = array.shape();
694        let mut data = Vec::new();
695
696        // Write shape information
697        data.extend_from_slice(&(shape[0] as u64).to_le_bytes());
698        data.extend_from_slice(&(shape[1] as u64).to_le_bytes());
699
700        // Write array data
701        for &value in array.iter() {
702            data.extend_from_slice(&value.to_le_bytes());
703        }
704
705        Ok(data)
706    }
707
708    /// Deserialize array from binary format
709    pub fn deserialize_array2(data: &[u8]) -> UtilsResult<Array2<f64>> {
710        if data.len() < 16 {
711            return Err(UtilsError::InvalidParameter(
712                "Insufficient data for array header".to_string(),
713            ));
714        }
715
716        // Read shape information
717        let nrows = u64::from_le_bytes([
718            data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7],
719        ]) as usize;
720
721        let ncols = u64::from_le_bytes([
722            data[8], data[9], data[10], data[11], data[12], data[13], data[14], data[15],
723        ]) as usize;
724
725        let expected_len = 16 + nrows * ncols * 8;
726        if data.len() != expected_len {
727            return Err(UtilsError::InvalidParameter(format!(
728                "Data length mismatch: expected {}, got {}",
729                expected_len,
730                data.len()
731            )));
732        }
733
734        // Read array data
735        let mut values = Vec::with_capacity(nrows * ncols);
736        for i in 0..(nrows * ncols) {
737            let start = 16 + i * 8;
738            let bytes = [
739                data[start],
740                data[start + 1],
741                data[start + 2],
742                data[start + 3],
743                data[start + 4],
744                data[start + 5],
745                data[start + 6],
746                data[start + 7],
747            ];
748            values.push(f64::from_le_bytes(bytes));
749        }
750
751        Array2::from_shape_vec((nrows, ncols), values)
752            .map_err(|e| UtilsError::InvalidParameter(format!("Failed to create array: {e}")))
753    }
754
755    /// Serialize to file
756    pub fn serialize_to_file<P: AsRef<Path>>(path: P, array: &Array2<f64>) -> UtilsResult<()> {
757        let data = Self::serialize_array2(array)?;
758        let mut writer = EfficientFileWriter::new(path, None)?;
759        writer.write_data(&data)?;
760        writer.flush()?;
761        Ok(())
762    }
763
764    /// Deserialize from file
765    pub fn deserialize_from_file<P: AsRef<Path>>(path: P) -> UtilsResult<Array2<f64>> {
766        let mut reader = EfficientFileReader::new(path, None)?;
767        let data = reader.read_all()?;
768        Self::deserialize_array2(&data)
769    }
770}
771
772#[allow(non_snake_case)]
773#[cfg(test)]
774mod tests {
775    use super::*;
776    use std::io::Cursor;
777    use tempfile::NamedTempFile;
778
779    #[test]
780    fn test_efficient_file_reader_writer() {
781        let temp_file = NamedTempFile::new().unwrap();
782        let path = temp_file.path();
783
784        // Write test data
785        let mut writer = EfficientFileWriter::new(path, None).unwrap();
786        writer
787            .write_lines(vec!["line1".to_string(), "line2".to_string()])
788            .unwrap();
789        writer.flush().unwrap();
790        drop(writer);
791
792        // Read test data
793        let mut reader = EfficientFileReader::new(path, None).unwrap();
794        let lines: Result<Vec<_>, _> = reader.read_lines().collect();
795        let lines = lines.unwrap();
796
797        assert_eq!(lines.len(), 2);
798        assert_eq!(lines[0], "line1");
799        assert_eq!(lines[1], "line2");
800    }
801
802    #[test]
803    fn test_array_io() {
804        let temp_file = NamedTempFile::new().unwrap();
805        let path = temp_file.path();
806
807        // Create test array
808        let original = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
809
810        // Write as CSV
811        FormatConverter::arrays_to_csv(path, &original, None, ',').unwrap();
812
813        // Read back as CSV
814        let (header, loaded) = FormatConverter::csv_to_arrays(path, ',', false).unwrap();
815        assert!(header.is_none());
816        assert_eq!(original.shape(), loaded.shape());
817        assert!((original - loaded).mapv(f64::abs).sum() < 1e-10);
818    }
819
820    #[test]
821    fn test_serialization() {
822        let original = Array2::from_shape_vec((2, 3), vec![1.1, 2.2, 3.3, 4.4, 5.5, 6.6]).unwrap();
823
824        // Serialize and deserialize
825        let serialized = SerializationUtils::serialize_array2(&original).unwrap();
826        let deserialized = SerializationUtils::deserialize_array2(&serialized).unwrap();
827
828        assert_eq!(original.shape(), deserialized.shape());
829        assert!((original - deserialized).mapv(f64::abs).sum() < 1e-10);
830    }
831
832    #[test]
833    fn test_compression() {
834        let data = b"Hello, World! This is a test string for compression.";
835
836        let encoded = CompressionUtils::run_length_encode(data);
837        let decoded = CompressionUtils::run_length_decode(&encoded);
838
839        assert_eq!(data.to_vec(), decoded);
840    }
841
842    #[test]
843    fn test_stream_processor() {
844        let data = b"chunk1chunk2chunk3";
845        let cursor = Cursor::new(data);
846        let mut processor = StreamProcessor::new(cursor, 6);
847
848        let mut chunks = Vec::new();
849        processor
850            .process_chunks(|chunk| {
851                chunks.push(chunk.to_vec());
852                Ok(())
853            })
854            .unwrap();
855
856        assert_eq!(chunks.len(), 3);
857        assert_eq!(&chunks[0], b"chunk1");
858        assert_eq!(&chunks[1], b"chunk2");
859        assert_eq!(&chunks[2], b"chunk3");
860    }
861
862    #[test]
863    fn test_format_conversion() {
864        let temp_file = NamedTempFile::new().unwrap();
865        let path = temp_file.path();
866
867        // Create CSV content with only numeric data
868        std::fs::write(path, "age,score\n25,95.5\n30,87.2").unwrap();
869
870        let (header, data) = FormatConverter::csv_to_arrays(path, ',', true).unwrap();
871
872        assert!(header.is_some());
873        let header = header.unwrap();
874        assert_eq!(header, vec!["age", "score"]);
875
876        assert_eq!(data.shape(), &[2, 2]);
877        assert!((data[[0, 0]] - 25.0).abs() < 1e-10);
878        assert!((data[[0, 1]] - 95.5).abs() < 1e-10);
879        assert!((data[[1, 0]] - 30.0).abs() < 1e-10);
880        assert!((data[[1, 1]] - 87.2).abs() < 1e-10);
881    }
882
883    #[test]
884    fn test_enhanced_json_arrays() {
885        // Test JSON to arrays conversion
886        let json_data = r#"
887        {
888            "features": [[1.0, 2.0], [3.0, 4.0]],
889            "targets": [[5.0], [6.0]]
890        }"#;
891
892        let arrays = FormatConverter::json_to_arrays(json_data).unwrap();
893
894        assert_eq!(arrays.len(), 2);
895        assert!(arrays.contains_key("features"));
896        assert!(arrays.contains_key("targets"));
897
898        let features = &arrays["features"];
899        assert_eq!(features.shape(), &[2, 2]);
900        assert!((features[[0, 0]] - 1.0).abs() < 1e-10);
901        assert!((features[[1, 1]] - 4.0).abs() < 1e-10);
902
903        // Test arrays to JSON conversion
904        let mut array_refs = HashMap::new();
905        array_refs.insert("features".to_string(), features);
906
907        let json_result = FormatConverter::arrays_to_json(&array_refs).unwrap();
908        assert!(json_result.contains("features"));
909        assert!(json_result.contains("1.0"));
910    }
911
912    #[test]
913    #[cfg(feature = "yaml")]
914    fn test_yaml_conversion() {
915        let yaml_data = r#"
916        name: test
917        value: 42
918        nested:
919          key: "hello"
920        "#;
921
922        let map = FormatConverter::yaml_to_map(yaml_data).unwrap();
923        assert!(map.contains_key("name"));
924        assert!(map.contains_key("value"));
925
926        let yaml_result = FormatConverter::map_to_yaml(&map).unwrap();
927        assert!(yaml_result.contains("name"));
928        assert!(yaml_result.contains("42"));
929    }
930
931    #[test]
932    #[cfg(feature = "toml_support")]
933    fn test_toml_conversion() {
934        let toml_data = r#"
935        name = "test"
936        value = 42
937
938        [nested]
939        key = "hello"
940        "#;
941
942        let map = FormatConverter::toml_to_map(toml_data).unwrap();
943        assert!(map.contains_key("name"));
944        assert!(map.contains_key("value"));
945
946        let toml_result = FormatConverter::map_to_toml(&map).unwrap();
947        assert!(toml_result.contains("name"));
948        assert!(toml_result.contains("42"));
949    }
950
951    #[test]
952    #[cfg(feature = "xml")]
953    fn test_xml_conversion() {
954        let xml_data = r#"<?xml version="1.0" encoding="UTF-8"?>
955        <root>
956            <name>test</name>
957            <value>42</value>
958        </root>"#;
959
960        let map = FormatConverter::xml_to_simple_map(xml_data).unwrap();
961        assert!(map.contains_key("name"));
962        assert!(map.contains_key("value"));
963        assert_eq!(map["name"], "test");
964        assert_eq!(map["value"], "42");
965
966        let xml_result = FormatConverter::simple_map_to_xml(&map, "root").unwrap();
967        assert!(xml_result.contains("<name>test</name>"));
968        assert!(xml_result.contains("<value>42</value>"));
969    }
970}