easy_rs/
easy_reader.rs

1use anyhow::{anyhow, Result};
2use chrono::{DateTime, Utc};
3use flate2::read::GzDecoder;
4use ndarray::{s, Array2};
5use std::fs::File;
6use std::io::{BufRead, BufReader, Read};
7use std::path::Path;
8
9const DELIMITER: u8 = b'\t';
10pub type Float = f64;
11
12/// Struct representing a reader for EEG data stored in `.easy` files.
13///
14/// This struct is responsible for parsing and storing the data from a `.easy` file,
15/// which may include EEG signals, accelerometer data, and associated markers. The struct
16/// loads the data from `.easy` and `.easy.gz` and optional `.info` files, provides methods for
17/// accessing the data, and tracks relevant metadata about the file, including the
18/// start date and number of channels.
19#[derive(Debug)]
20#[allow(dead_code)]
21pub struct EasyReader {
22    /// scale used to divide raw eeg values
23    scale: Float,
24
25    verbose: bool,
26
27    /// Path to the `.easy` file being read.
28    ///
29    /// This is the full path to the `.easy` file that contains the EEG and accelerometer data.
30    /// The file is parsed to extract the signals and metadata.
31    filepath: String,
32
33    /// Base name of the file without the extension.
34    ///
35    /// This is derived from the `filepath` and excludes the extension (e.g., `.easy` or `.easy.gz`).
36    /// It is used for naming related files like the `.info` file.
37    basename: String,
38
39    /// The extension of the file (either "easy" or "easy.gz").
40    ///
41    /// This is used to identify the file type and determine how to process it.
42    extension: String,
43
44    /// Root of the file name (file path without extension).
45    ///
46    /// Used to construct the path for the associated `.info` file.
47    filenameroot: String,
48
49    /// Path to the associated `.info` file.
50    ///
51    /// If available, this file provides information about the electrode names and other metadata.
52    infofilepath: String,
53
54    /// Flag indicating whether accelerometer data is present.
55    ///
56    /// This flag is set to `true` if accelerometer data is found in the `.easy` file or the `.info` file.
57    acc_data: bool,
58
59    /// List of electrode names.
60    ///
61    /// If the `.info` file is available, this field will contain the names of the EEG channels (electrodes).
62    /// If the `.info` file is not present, this will be populated with default channel names.
63    electrodes: Vec<String>,
64
65    /// Number of EEG channels.
66    ///
67    /// This represents the number of electrodes in the dataset (excluding accelerometer data).
68    /// It is determined from the `.info` file or the `.easy` file.
69    num_channels: Option<usize>,
70
71    /// Start date of the EEG recording.
72    ///
73    /// This date is extracted from the first timestamp in the `.easy` file. It represents the
74    /// time when the EEG recording began.
75    eegstartdate: Option<String>,
76
77    /// Array representing the time vector of the dataset in seconds.
78    ///
79    /// This array contains the time of each sample relative to the start of the recording.
80    np_time: Option<Array2<Float>>,
81
82    /// 2D array of EEG data.
83    ///
84    /// This is a 2D array where each row represents an EEG sample, and each column represents
85    /// an individual channel (electrode). The data is in microvolts (uV).
86    np_eeg: Option<Array2<Float>>,
87
88    /// 2D array of stimulus data (optional).
89    ///
90    /// If present, this array contains stimulus information related to the EEG recording. It is typically used
91    /// for event-marking or stimulus presentation data, but it may not always be available.
92    np_stim: Option<Array2<Float>>,
93
94    /// 2D array of accelerometer data.
95    ///
96    /// If accelerometer data is available, this array will contain the 3-axis accelerometer readings for each sample.
97    /// The data represents the X, Y, and Z axes of the accelerometer. The array has shape `(num_samples, 3)`.
98    np_acc: Option<Array2<Float>>,
99
100    /// Array of markers associated with the EEG data.
101    ///
102    /// This array holds marker values that can represent events, triggers, or annotations
103    /// in the EEG signal. Markers are typically used to mark specific moments in time during the recording.
104    np_markers: Option<Array2<Float>>,
105
106    /// Log of the events related to the processing of the `.easy` file.
107    ///
108    /// This is a collection of strings that logs important events, like the creation of the `EasyReader` instance
109    /// and when key steps in the file processing were completed. This can be useful for debugging and tracking processing.
110    log: Vec<String>,
111}
112
113impl EasyReader {
114    /// Initializes a new `EasyReader` instance from the given file path.
115    pub fn new(filepath: &str, scale: Float, verbose: bool) -> Result<Self> {
116        if verbose {
117            println!("Initializing in file path: {}", filepath);
118        }
119
120        let extension;
121        let (filenameroot, basename) = if filepath.ends_with(".easy.gz") {
122            extension = "easy.gz".to_string();
123            let filenameroot = filepath.trim_end_matches(".gz");
124            let basename = Path::new(filepath)
125                .file_name()
126                .unwrap()
127                .to_str()
128                .unwrap()
129                .trim_end_matches(".gz")
130                .to_string();
131            (filenameroot.to_string(), basename)
132        } else if filepath.ends_with(".easy") {
133            extension = "easy".to_string();
134            let filenameroot = filepath.trim_end_matches(".easy");
135            let basename = Path::new(filepath)
136                .file_name()
137                .unwrap()
138                .to_str()
139                .unwrap()
140                .trim_end_matches(".easy")
141                .to_string();
142            (filenameroot.to_string(), basename)
143        } else {
144            return Err(anyhow!("ERROR: Proposed file has wrong extension."));
145        };
146
147        let infofilepath = format!("{}.info", filenameroot);
148
149        let mut reader = EasyReader {
150            scale,
151            verbose,
152            filepath: filepath.to_string(),
153            basename,
154            extension,
155            filenameroot,
156            infofilepath,
157            acc_data: false,
158            electrodes: Vec::new(),
159            num_channels: None,
160            eegstartdate: None,
161            np_time: None,
162            np_eeg: None,
163            np_stim: None,
164            np_acc: None,
165            np_markers: None,
166            log: vec![format!("capsule created: {}", Utc::now())],
167        };
168
169        // Try to read the info file
170        reader.get_info()?;
171
172        Ok(reader)
173    }
174
175    /// Reads and processes the `.info` file for metadata about channels and accelerometer data.
176    fn get_info(&mut self) -> Result<()> {
177        let file = File::open(&self.infofilepath);
178
179        match file {
180            Ok(file) => {
181                let reader = BufReader::new(file);
182                let mut electrodes = Vec::new();
183                let mut acc_data = false;
184
185                for line in reader.lines() {
186                    let line = line.unwrap();
187                    if line.contains("Channel ") {
188                        let electrode = line.split_whitespace().last().unwrap().to_string();
189                        electrodes.push(electrode);
190                    }
191                    if line.contains("Accelerometer data: ") {
192                        acc_data = true;
193                    }
194                }
195
196                self.electrodes = electrodes;
197                self.acc_data = acc_data;
198                self.num_channels = Some(self.electrodes.len());
199
200                Ok(())
201            }
202            Err(_) => {
203                // If no info file is found, read the .easy file to determine the number of channels
204                self.read_easy_file_for_channels()
205            }
206        }
207    }
208
209    /// Reads the `.easy` file to determine the number of channels based on the file structure.
210    fn read_easy_file_for_channels(&mut self) -> Result<()> {
211        let reader = self.get_file_reader(&self.filepath)?;
212
213        let mut rdr = csv::ReaderBuilder::new()
214            .delimiter(DELIMITER)
215            .has_headers(false)
216            .from_reader(reader);
217
218        // Read the first 5 lines to determine number of columns
219        let mut header = rdr.records().take(5);
220        let first_record = header.next().unwrap().unwrap();
221
222        let num_columns = first_record.len();
223
224        let num_channels = if [13, 25, 37].contains(&num_columns) {
225            num_columns - 5
226        } else if [10, 22, 34].contains(&num_columns) {
227            num_columns - 2
228        } else {
229            return Err(anyhow!("Number of columns mismatch with expected values."));
230        };
231
232        self.num_channels = Some(num_channels);
233        self.electrodes = (1..=num_channels).map(|x| format!("Ch{}", x)).collect();
234        Ok(())
235    }
236
237    /// Reads and processes raw EEG and accelerometer data from the `.easy` file.
238    ///
239    /// This method reads the `.easy` file (or the data section of it), converts the EEG data
240    /// into microvolts (uV), and extracts time, accelerometer, and marker data. It stores the
241    /// resulting data in the struct's fields (e.g., `np_eeg`, `np_time`, `np_acc`, `np_markers`).
242    /// It also logs key processing steps and ensures that the number of channels is consistent
243    /// with the data found in the file.
244    ///
245    /// # Returns
246    ///
247    /// - `Ok(())` if the data was successfully read and processed.
248    /// - `Err(String)` if there was an error reading or processing the file data. The error
249    ///   string provides details about the failure (e.g., column mismatches or data format issues).
250    ///
251    /// # Details
252    ///
253    /// - The function expects the `.easy` file to have the following general format:
254    ///   EEG data followed by accelerometer data (if available), markers, and timestamps.
255    /// - The EEG data is divided by channels, and the accelerometer data (if present) consists
256    ///   of three columns representing X, Y, and Z axes.
257
258    pub fn parse_data(&mut self) -> Result<()> {
259        let reader = self.get_file_reader(&self.filepath)?;
260        let mut rdr = csv::ReaderBuilder::new()
261            .delimiter(DELIMITER)
262            .has_headers(false)
263            .from_reader(reader);
264
265        let mut records = rdr.records();
266        let first_record = records.next().unwrap().unwrap();
267
268        if self.verbose {
269            println!("first_record - {first_record:?}");
270        }
271
272        let num_columns = first_record.len();
273
274        let num_channels = if [13, 25, 37].contains(&num_columns) {
275            num_columns - 5
276        } else if [10, 22, 34].contains(&num_columns) {
277            num_columns - 2
278        } else {
279            return Err(anyhow!("Number of columns mismatch with expected values."));
280        };
281
282        // Handle timestamp
283        let timestamp = first_record[first_record.len() - 1].parse::<u64>().unwrap();
284        if let Some(start_date) = DateTime::from_timestamp((timestamp / 1000) as i64, 0) {
285            self.eegstartdate = Some(start_date.format("%Y-%m-%d %H:%M:%S").to_string());
286        }
287
288        if self.verbose {
289            println!("Number of channels detected: {}", num_channels);
290            println!(
291                "First sample recorded: {}",
292                self.eegstartdate.clone().unwrap()
293            );
294        }
295
296        // Read the rest of the file into numpy-like data
297        let mut eeg_data = Vec::new();
298        let mut acc_data = Vec::new();
299        let mut markers = Vec::new();
300
301        for record in records {
302            let record = record.unwrap();
303            let eeg_values: Vec<Float> = record
304                .iter()
305                .take(num_channels)
306                .map(|x| x.parse::<Float>().unwrap())
307                .map(|f| f / self.scale)
308                .collect();
309            let acc_values: Vec<Float> = record
310                .iter()
311                .skip(num_channels)
312                .take(3)
313                .map(|x| x.parse::<Float>().unwrap())
314                .collect();
315            let marker_value: Float = record[num_channels + 3].parse().unwrap();
316
317            eeg_data.push(eeg_values);
318            acc_data.push(acc_values);
319            markers.push(marker_value);
320        }
321
322        self.np_eeg = Some(
323            Array2::from_shape_vec(
324                (eeg_data.len(), num_channels),
325                eeg_data.into_iter().flatten().collect(),
326            )
327            .unwrap(),
328        );
329        self.np_acc = Some(
330            Array2::from_shape_vec(
331                (acc_data.len(), 3),
332                acc_data.into_iter().flatten().collect(),
333            )
334            .unwrap(),
335        );
336        self.np_markers = Some(Array2::from_shape_vec((markers.len(), 1), markers).unwrap());
337
338        Ok(())
339    }
340
341    /// Reads and processes raw EEG and accelerometer data from the `.easy` file in a streaming manner.
342    ///
343    /// This function reads the `.easy` file in chunks and processes each chunk as it is read. This approach
344    /// helps to minimize memory usage when dealing with large files by avoiding the need to load the entire
345    /// file into memory at once.
346    ///
347    /// The function uses a callback (`process_chunk`) to handle each chunk of data. The callback is invoked
348    /// after processing each chunk, and it receives the following data:
349    /// - `eeg_chunk`: A `Vec<Vec<f32>>` representing a chunk of EEG data (one row per sample, one column per channel).
350    /// - `acc_chunk`: A `Vec<Vec<f32>>` representing a chunk of accelerometer data (three values per sample: X, Y, Z).
351    /// - `markers_chunk`: A `Vec<f32>` representing the marker data for each sample in the chunk.
352    ///
353    /// The chunk size can be customized by passing a `chunk_size` value (in number of rows). If no chunk size
354    /// is provided, the default chunk size will be `1000` rows.
355    ///
356    /// # Parameters:
357    /// - `chunk_size`: An optional parameter specifying the number of rows to process per chunk. If `None`
358    ///   is provided, the default chunk size will be `1000`.
359    /// - `process_chunk`: A callback function that takes three arguments: `eeg_chunk`, `acc_chunk`, and
360    ///   `markers_chunk`. This function will be called once a chunk is read and parsed.
361    ///
362    /// # Returns:
363    /// - `Ok(())` if the data was successfully read and processed.
364    /// - `Err(String)` if there was an error
365    pub fn stream<F>(&mut self, chunk_size: Option<usize>, mut process_chunk: F) -> Result<()>
366    where
367        F: FnMut(Vec<Vec<Float>>, Vec<Vec<Float>>, Vec<Float>), // Callback to process each chunk of data
368    {
369        let chunk_size = match chunk_size {
370            Some(chunk_size) => chunk_size,
371            None => 1000,
372        };
373        let reader = self.get_file_reader(&self.filepath)?;
374        let mut rdr = csv::ReaderBuilder::new()
375            .delimiter(DELIMITER)
376            .has_headers(false)
377            .from_reader(reader);
378
379        let mut records = rdr.records();
380        let first_record = records.next().unwrap().unwrap();
381
382        let num_columns = first_record.len();
383        let num_channels = if [13, 25, 37].contains(&num_columns) {
384            num_columns - 5
385        } else if [10, 22, 34].contains(&num_columns) {
386            num_columns - 2
387        } else {
388            return Err(anyhow!("Number of columns mismatch with expected values."));
389        };
390
391        // Handle timestamp
392        let timestamp = first_record[first_record.len() - 1].parse::<u64>().unwrap();
393        if let Some(start_date) = DateTime::from_timestamp((timestamp / 1000) as i64, 0) {
394            self.eegstartdate = Some(start_date.format("%Y-%m-%d %H:%M:%S").to_string());
395        }
396
397        if self.verbose {
398            println!(
399                "First sample recorded: {}",
400                self.eegstartdate.clone().unwrap()
401            );
402        }
403
404        // Process the records in chunks
405        let mut eeg_chunk = Vec::new();
406        let mut acc_chunk = Vec::new();
407        let mut markers_chunk = Vec::new();
408
409        for record in records {
410            let record = record.unwrap();
411
412            // Process EEG data (channels)
413            let eeg_values: Vec<Float> = record
414                .iter()
415                .take(num_channels)
416                .map(|x| x.parse::<Float>().unwrap())
417                .map(|f| f / self.scale)
418                .collect();
419            eeg_chunk.push(eeg_values);
420
421            // Process accelerometer data (3 axes)
422            let acc_values: Vec<Float> = record
423                .iter()
424                .skip(num_channels)
425                .take(3)
426                .map(|x| x.parse::<Float>().unwrap())
427                .collect();
428            acc_chunk.push(acc_values);
429
430            // Process marker data
431            let marker_value: Float = record[num_channels + 3].parse().unwrap();
432            markers_chunk.push(marker_value);
433
434            // Once a chunk is ready, call the callback to process the chunk
435            if eeg_chunk.len() >= chunk_size {
436                // Process every 1000 rows as a chunk
437                process_chunk(eeg_chunk.clone(), acc_chunk.clone(), markers_chunk.clone());
438                // Clear the chunk data after processing
439                eeg_chunk.clear();
440                acc_chunk.clear();
441                markers_chunk.clear();
442            }
443        }
444
445        // Process any remaining data in the chunk
446        if !eeg_chunk.is_empty() {
447            process_chunk(eeg_chunk, acc_chunk, markers_chunk);
448        }
449
450        Ok(())
451    }
452
453    /// Helper function to get a reader for the file, whether it's gzipped or not.
454    fn get_file_reader(&self, filepath: &str) -> Result<Box<dyn Read>> {
455        if filepath.ends_with(".gz") {
456            let file = File::open(filepath).map_err(|e| anyhow!(e.to_string()))?;
457            let decoder = GzDecoder::new(file);
458            Ok(Box::new(decoder))
459        } else {
460            let file = File::open(filepath).map_err(|e| anyhow!(e.to_string()))?;
461            Ok(Box::new(file))
462        }
463    }
464
465    /// Prints a summary of the `EasyReader` instance, displaying important metadata and previews of data.
466    ///
467    /// This function outputs the file path, base name, extension, number of channels, EEG start date,
468    /// and any log entries related to the processing steps. It also prints the first few rows of the EEG,
469    /// accelerometer, and markers data, if available. This method avoids printing the entire datasets.
470    pub fn print_summary(&self) {
471        // Print metadata
472        println!("File Path: {}", self.filepath);
473        println!("Base Name: {}", self.basename);
474        println!("Extension: {}", self.extension);
475
476        match &self.num_channels {
477            Some(channels) => println!("Number of Channels: {}", channels),
478            None => println!("Number of Channels: Not available"),
479        }
480
481        match &self.eegstartdate {
482            Some(start_date) => println!("EEG Start Date: {}", start_date),
483            None => println!("EEG Start Date: Not available"),
484        }
485
486        // Print a preview of EEG data (first 5 samples)
487        match &self.np_eeg {
488            Some(eeg) => {
489                let total_samples = eeg.shape()[0];
490                println!("\nEEG Data (First 5 of {total_samples} Samples):");
491                let preview_count = total_samples.min(5); // Preview the first 5 samples or total samples if less than 5
492                let preview: Vec<Vec<Float>> = eeg
493                    .slice(s![..preview_count, ..]) // Get the first `preview_count` rows and all columns
494                    .axis_iter(ndarray::Axis(0)) // Iterate over rows
495                    .map(|row| row.to_owned().to_vec()) // Convert each row into a Vec<Float>
496                    .collect(); // Collect all rows into a Vec<Vec<Float>>
497
498                for (i, row) in preview.iter().enumerate() {
499                    println!("Sample {}: {:?}", i + 1, row);
500                }
501                println!(
502                    "Showing {} out of {} EEG samples.",
503                    preview_count, total_samples
504                );
505            }
506            None => println!("EEG Data: Not available"),
507        }
508
509        // Print a preview of accelerometer data (first 5 samples if available)
510        match &self.np_acc {
511            Some(acc) => {
512                let total_samples = acc.shape()[0];
513                println!("\nAccelerometer Data (First 5 of {total_samples} Samples):");
514                let preview_count = total_samples.min(5); // Preview the first 5 samples or total samples if less than 5
515                let preview: Vec<Vec<Float>> = acc
516                    .slice(s![..preview_count, ..]) // Get the first `preview_count` rows and all columns
517                    .axis_iter(ndarray::Axis(0)) // Iterate over rows
518                    .map(|row| row.to_owned().to_vec()) // Convert each row into a Vec<f32>
519                    .collect(); // Collect all rows into a Vec<Vec<Float>>
520
521                for (i, row) in preview.iter().enumerate() {
522                    println!("Sample {}: {:?}", i + 1, row);
523                }
524            }
525            None => println!("Accelerometer Data: Not available"),
526        }
527
528        // Print a preview of markers (first 5 samples if available)
529        match &self.np_markers {
530            Some(markers) => {
531                let total_samples = markers.shape()[0];
532                println!("\nMarkers Data (First 5 of {total_samples} Samples):");
533                let preview_count = total_samples.min(5); // Preview the first 5 samples or total samples if less than 5
534                let (preview, _) = markers
535                    .slice(s![..preview_count, ..]) // Get the first `preview_count` elements
536                    .to_owned() // Copy the values from the slice
537                    .into_raw_vec_and_offset(); // Convert it into a Vec<Float>
538
539                for (i, marker) in preview.iter().enumerate() {
540                    println!("Marker {}: {}", i + 1, marker);
541                }
542            }
543            None => println!("Markers Data: Not available"),
544        }
545
546        // Print log entries
547        println!("\nLog Entries:");
548        for entry in &self.log {
549            println!("- {}", entry);
550        }
551    }
552}