easy_rs/easy_reader.rs
1use anyhow::{anyhow, Result};
2use chrono::{DateTime, Utc};
3use flate2::read::GzDecoder;
4use ndarray::{s, Array2};
5use std::fs::File;
6use std::io::{BufRead, BufReader, Read};
7use std::path::Path;
8
9const DELIMITER: u8 = b'\t';
10pub type Float = f64;
11
12/// Struct representing a reader for EEG data stored in `.easy` files.
13///
14/// This struct is responsible for parsing and storing the data from a `.easy` file,
15/// which may include EEG signals, accelerometer data, and associated markers. The struct
16/// loads the data from `.easy` and `.easy.gz` and optional `.info` files, provides methods for
17/// accessing the data, and tracks relevant metadata about the file, including the
18/// start date and number of channels.
19#[derive(Debug)]
20#[allow(dead_code)]
21pub struct EasyReader {
22 /// scale used to divide raw eeg values
23 scale: Float,
24
25 verbose: bool,
26
27 /// Path to the `.easy` file being read.
28 ///
29 /// This is the full path to the `.easy` file that contains the EEG and accelerometer data.
30 /// The file is parsed to extract the signals and metadata.
31 filepath: String,
32
33 /// Base name of the file without the extension.
34 ///
35 /// This is derived from the `filepath` and excludes the extension (e.g., `.easy` or `.easy.gz`).
36 /// It is used for naming related files like the `.info` file.
37 basename: String,
38
39 /// The extension of the file (either "easy" or "easy.gz").
40 ///
41 /// This is used to identify the file type and determine how to process it.
42 extension: String,
43
44 /// Root of the file name (file path without extension).
45 ///
46 /// Used to construct the path for the associated `.info` file.
47 filenameroot: String,
48
49 /// Path to the associated `.info` file.
50 ///
51 /// If available, this file provides information about the electrode names and other metadata.
52 infofilepath: String,
53
54 /// Flag indicating whether accelerometer data is present.
55 ///
56 /// This flag is set to `true` if accelerometer data is found in the `.easy` file or the `.info` file.
57 acc_data: bool,
58
59 /// List of electrode names.
60 ///
61 /// If the `.info` file is available, this field will contain the names of the EEG channels (electrodes).
62 /// If the `.info` file is not present, this will be populated with default channel names.
63 electrodes: Vec<String>,
64
65 /// Number of EEG channels.
66 ///
67 /// This represents the number of electrodes in the dataset (excluding accelerometer data).
68 /// It is determined from the `.info` file or the `.easy` file.
69 num_channels: Option<usize>,
70
71 /// Start date of the EEG recording.
72 ///
73 /// This date is extracted from the first timestamp in the `.easy` file. It represents the
74 /// time when the EEG recording began.
75 eegstartdate: Option<String>,
76
77 /// Array representing the time vector of the dataset in seconds.
78 ///
79 /// This array contains the time of each sample relative to the start of the recording.
80 np_time: Option<Array2<Float>>,
81
82 /// 2D array of EEG data.
83 ///
84 /// This is a 2D array where each row represents an EEG sample, and each column represents
85 /// an individual channel (electrode). The data is in microvolts (uV).
86 np_eeg: Option<Array2<Float>>,
87
88 /// 2D array of stimulus data (optional).
89 ///
90 /// If present, this array contains stimulus information related to the EEG recording. It is typically used
91 /// for event-marking or stimulus presentation data, but it may not always be available.
92 np_stim: Option<Array2<Float>>,
93
94 /// 2D array of accelerometer data.
95 ///
96 /// If accelerometer data is available, this array will contain the 3-axis accelerometer readings for each sample.
97 /// The data represents the X, Y, and Z axes of the accelerometer. The array has shape `(num_samples, 3)`.
98 np_acc: Option<Array2<Float>>,
99
100 /// Array of markers associated with the EEG data.
101 ///
102 /// This array holds marker values that can represent events, triggers, or annotations
103 /// in the EEG signal. Markers are typically used to mark specific moments in time during the recording.
104 np_markers: Option<Array2<Float>>,
105
106 /// Log of the events related to the processing of the `.easy` file.
107 ///
108 /// This is a collection of strings that logs important events, like the creation of the `EasyReader` instance
109 /// and when key steps in the file processing were completed. This can be useful for debugging and tracking processing.
110 log: Vec<String>,
111}
112
113impl EasyReader {
114 /// Initializes a new `EasyReader` instance from the given file path.
115 pub fn new(filepath: &str, scale: Float, verbose: bool) -> Result<Self> {
116 if verbose {
117 println!("Initializing in file path: {}", filepath);
118 }
119
120 let extension;
121 let (filenameroot, basename) = if filepath.ends_with(".easy.gz") {
122 extension = "easy.gz".to_string();
123 let filenameroot = filepath.trim_end_matches(".gz");
124 let basename = Path::new(filepath)
125 .file_name()
126 .unwrap()
127 .to_str()
128 .unwrap()
129 .trim_end_matches(".gz")
130 .to_string();
131 (filenameroot.to_string(), basename)
132 } else if filepath.ends_with(".easy") {
133 extension = "easy".to_string();
134 let filenameroot = filepath.trim_end_matches(".easy");
135 let basename = Path::new(filepath)
136 .file_name()
137 .unwrap()
138 .to_str()
139 .unwrap()
140 .trim_end_matches(".easy")
141 .to_string();
142 (filenameroot.to_string(), basename)
143 } else {
144 return Err(anyhow!("ERROR: Proposed file has wrong extension."));
145 };
146
147 let infofilepath = format!("{}.info", filenameroot);
148
149 let mut reader = EasyReader {
150 scale,
151 verbose,
152 filepath: filepath.to_string(),
153 basename,
154 extension,
155 filenameroot,
156 infofilepath,
157 acc_data: false,
158 electrodes: Vec::new(),
159 num_channels: None,
160 eegstartdate: None,
161 np_time: None,
162 np_eeg: None,
163 np_stim: None,
164 np_acc: None,
165 np_markers: None,
166 log: vec![format!("capsule created: {}", Utc::now())],
167 };
168
169 // Try to read the info file
170 reader.get_info()?;
171
172 Ok(reader)
173 }
174
175 /// Reads and processes the `.info` file for metadata about channels and accelerometer data.
176 fn get_info(&mut self) -> Result<()> {
177 let file = File::open(&self.infofilepath);
178
179 match file {
180 Ok(file) => {
181 let reader = BufReader::new(file);
182 let mut electrodes = Vec::new();
183 let mut acc_data = false;
184
185 for line in reader.lines() {
186 let line = line.unwrap();
187 if line.contains("Channel ") {
188 let electrode = line.split_whitespace().last().unwrap().to_string();
189 electrodes.push(electrode);
190 }
191 if line.contains("Accelerometer data: ") {
192 acc_data = true;
193 }
194 }
195
196 self.electrodes = electrodes;
197 self.acc_data = acc_data;
198 self.num_channels = Some(self.electrodes.len());
199
200 Ok(())
201 }
202 Err(_) => {
203 // If no info file is found, read the .easy file to determine the number of channels
204 self.read_easy_file_for_channels()
205 }
206 }
207 }
208
209 /// Reads the `.easy` file to determine the number of channels based on the file structure.
210 fn read_easy_file_for_channels(&mut self) -> Result<()> {
211 let reader = self.get_file_reader(&self.filepath)?;
212
213 let mut rdr = csv::ReaderBuilder::new()
214 .delimiter(DELIMITER)
215 .has_headers(false)
216 .from_reader(reader);
217
218 // Read the first 5 lines to determine number of columns
219 let mut header = rdr.records().take(5);
220 let first_record = header.next().unwrap().unwrap();
221
222 let num_columns = first_record.len();
223
224 let num_channels = if [13, 25, 37].contains(&num_columns) {
225 num_columns - 5
226 } else if [10, 22, 34].contains(&num_columns) {
227 num_columns - 2
228 } else {
229 return Err(anyhow!("Number of columns mismatch with expected values."));
230 };
231
232 self.num_channels = Some(num_channels);
233 self.electrodes = (1..=num_channels).map(|x| format!("Ch{}", x)).collect();
234 Ok(())
235 }
236
237 /// Reads and processes raw EEG and accelerometer data from the `.easy` file.
238 ///
239 /// This method reads the `.easy` file (or the data section of it), converts the EEG data
240 /// into microvolts (uV), and extracts time, accelerometer, and marker data. It stores the
241 /// resulting data in the struct's fields (e.g., `np_eeg`, `np_time`, `np_acc`, `np_markers`).
242 /// It also logs key processing steps and ensures that the number of channels is consistent
243 /// with the data found in the file.
244 ///
245 /// # Returns
246 ///
247 /// - `Ok(())` if the data was successfully read and processed.
248 /// - `Err(String)` if there was an error reading or processing the file data. The error
249 /// string provides details about the failure (e.g., column mismatches or data format issues).
250 ///
251 /// # Details
252 ///
253 /// - The function expects the `.easy` file to have the following general format:
254 /// EEG data followed by accelerometer data (if available), markers, and timestamps.
255 /// - The EEG data is divided by channels, and the accelerometer data (if present) consists
256 /// of three columns representing X, Y, and Z axes.
257
258 pub fn parse_data(&mut self) -> Result<()> {
259 let reader = self.get_file_reader(&self.filepath)?;
260 let mut rdr = csv::ReaderBuilder::new()
261 .delimiter(DELIMITER)
262 .has_headers(false)
263 .from_reader(reader);
264
265 let mut records = rdr.records();
266 let first_record = records.next().unwrap().unwrap();
267
268 if self.verbose {
269 println!("first_record - {first_record:?}");
270 }
271
272 let num_columns = first_record.len();
273
274 let num_channels = if [13, 25, 37].contains(&num_columns) {
275 num_columns - 5
276 } else if [10, 22, 34].contains(&num_columns) {
277 num_columns - 2
278 } else {
279 return Err(anyhow!("Number of columns mismatch with expected values."));
280 };
281
282 // Handle timestamp
283 let timestamp = first_record[first_record.len() - 1].parse::<u64>().unwrap();
284 if let Some(start_date) = DateTime::from_timestamp((timestamp / 1000) as i64, 0) {
285 self.eegstartdate = Some(start_date.format("%Y-%m-%d %H:%M:%S").to_string());
286 }
287
288 if self.verbose {
289 println!("Number of channels detected: {}", num_channels);
290 println!(
291 "First sample recorded: {}",
292 self.eegstartdate.clone().unwrap()
293 );
294 }
295
296 // Read the rest of the file into numpy-like data
297 let mut eeg_data = Vec::new();
298 let mut acc_data = Vec::new();
299 let mut markers = Vec::new();
300
301 for record in records {
302 let record = record.unwrap();
303 let eeg_values: Vec<Float> = record
304 .iter()
305 .take(num_channels)
306 .map(|x| x.parse::<Float>().unwrap())
307 .map(|f| f / self.scale)
308 .collect();
309 let acc_values: Vec<Float> = record
310 .iter()
311 .skip(num_channels)
312 .take(3)
313 .map(|x| x.parse::<Float>().unwrap())
314 .collect();
315 let marker_value: Float = record[num_channels + 3].parse().unwrap();
316
317 eeg_data.push(eeg_values);
318 acc_data.push(acc_values);
319 markers.push(marker_value);
320 }
321
322 self.np_eeg = Some(
323 Array2::from_shape_vec(
324 (eeg_data.len(), num_channels),
325 eeg_data.into_iter().flatten().collect(),
326 )
327 .unwrap(),
328 );
329 self.np_acc = Some(
330 Array2::from_shape_vec(
331 (acc_data.len(), 3),
332 acc_data.into_iter().flatten().collect(),
333 )
334 .unwrap(),
335 );
336 self.np_markers = Some(Array2::from_shape_vec((markers.len(), 1), markers).unwrap());
337
338 Ok(())
339 }
340
341 /// Reads and processes raw EEG and accelerometer data from the `.easy` file in a streaming manner.
342 ///
343 /// This function reads the `.easy` file in chunks and processes each chunk as it is read. This approach
344 /// helps to minimize memory usage when dealing with large files by avoiding the need to load the entire
345 /// file into memory at once.
346 ///
347 /// The function uses a callback (`process_chunk`) to handle each chunk of data. The callback is invoked
348 /// after processing each chunk, and it receives the following data:
349 /// - `eeg_chunk`: A `Vec<Vec<f32>>` representing a chunk of EEG data (one row per sample, one column per channel).
350 /// - `acc_chunk`: A `Vec<Vec<f32>>` representing a chunk of accelerometer data (three values per sample: X, Y, Z).
351 /// - `markers_chunk`: A `Vec<f32>` representing the marker data for each sample in the chunk.
352 ///
353 /// The chunk size can be customized by passing a `chunk_size` value (in number of rows). If no chunk size
354 /// is provided, the default chunk size will be `1000` rows.
355 ///
356 /// # Parameters:
357 /// - `chunk_size`: An optional parameter specifying the number of rows to process per chunk. If `None`
358 /// is provided, the default chunk size will be `1000`.
359 /// - `process_chunk`: A callback function that takes three arguments: `eeg_chunk`, `acc_chunk`, and
360 /// `markers_chunk`. This function will be called once a chunk is read and parsed.
361 ///
362 /// # Returns:
363 /// - `Ok(())` if the data was successfully read and processed.
364 /// - `Err(String)` if there was an error
365 pub fn stream<F>(&mut self, chunk_size: Option<usize>, mut process_chunk: F) -> Result<()>
366 where
367 F: FnMut(Vec<Vec<Float>>, Vec<Vec<Float>>, Vec<Float>), // Callback to process each chunk of data
368 {
369 let chunk_size = match chunk_size {
370 Some(chunk_size) => chunk_size,
371 None => 1000,
372 };
373 let reader = self.get_file_reader(&self.filepath)?;
374 let mut rdr = csv::ReaderBuilder::new()
375 .delimiter(DELIMITER)
376 .has_headers(false)
377 .from_reader(reader);
378
379 let mut records = rdr.records();
380 let first_record = records.next().unwrap().unwrap();
381
382 let num_columns = first_record.len();
383 let num_channels = if [13, 25, 37].contains(&num_columns) {
384 num_columns - 5
385 } else if [10, 22, 34].contains(&num_columns) {
386 num_columns - 2
387 } else {
388 return Err(anyhow!("Number of columns mismatch with expected values."));
389 };
390
391 // Handle timestamp
392 let timestamp = first_record[first_record.len() - 1].parse::<u64>().unwrap();
393 if let Some(start_date) = DateTime::from_timestamp((timestamp / 1000) as i64, 0) {
394 self.eegstartdate = Some(start_date.format("%Y-%m-%d %H:%M:%S").to_string());
395 }
396
397 if self.verbose {
398 println!(
399 "First sample recorded: {}",
400 self.eegstartdate.clone().unwrap()
401 );
402 }
403
404 // Process the records in chunks
405 let mut eeg_chunk = Vec::new();
406 let mut acc_chunk = Vec::new();
407 let mut markers_chunk = Vec::new();
408
409 for record in records {
410 let record = record.unwrap();
411
412 // Process EEG data (channels)
413 let eeg_values: Vec<Float> = record
414 .iter()
415 .take(num_channels)
416 .map(|x| x.parse::<Float>().unwrap())
417 .map(|f| f / self.scale)
418 .collect();
419 eeg_chunk.push(eeg_values);
420
421 // Process accelerometer data (3 axes)
422 let acc_values: Vec<Float> = record
423 .iter()
424 .skip(num_channels)
425 .take(3)
426 .map(|x| x.parse::<Float>().unwrap())
427 .collect();
428 acc_chunk.push(acc_values);
429
430 // Process marker data
431 let marker_value: Float = record[num_channels + 3].parse().unwrap();
432 markers_chunk.push(marker_value);
433
434 // Once a chunk is ready, call the callback to process the chunk
435 if eeg_chunk.len() >= chunk_size {
436 // Process every 1000 rows as a chunk
437 process_chunk(eeg_chunk.clone(), acc_chunk.clone(), markers_chunk.clone());
438 // Clear the chunk data after processing
439 eeg_chunk.clear();
440 acc_chunk.clear();
441 markers_chunk.clear();
442 }
443 }
444
445 // Process any remaining data in the chunk
446 if !eeg_chunk.is_empty() {
447 process_chunk(eeg_chunk, acc_chunk, markers_chunk);
448 }
449
450 Ok(())
451 }
452
453 /// Helper function to get a reader for the file, whether it's gzipped or not.
454 fn get_file_reader(&self, filepath: &str) -> Result<Box<dyn Read>> {
455 if filepath.ends_with(".gz") {
456 let file = File::open(filepath).map_err(|e| anyhow!(e.to_string()))?;
457 let decoder = GzDecoder::new(file);
458 Ok(Box::new(decoder))
459 } else {
460 let file = File::open(filepath).map_err(|e| anyhow!(e.to_string()))?;
461 Ok(Box::new(file))
462 }
463 }
464
465 /// Prints a summary of the `EasyReader` instance, displaying important metadata and previews of data.
466 ///
467 /// This function outputs the file path, base name, extension, number of channels, EEG start date,
468 /// and any log entries related to the processing steps. It also prints the first few rows of the EEG,
469 /// accelerometer, and markers data, if available. This method avoids printing the entire datasets.
470 pub fn print_summary(&self) {
471 // Print metadata
472 println!("File Path: {}", self.filepath);
473 println!("Base Name: {}", self.basename);
474 println!("Extension: {}", self.extension);
475
476 match &self.num_channels {
477 Some(channels) => println!("Number of Channels: {}", channels),
478 None => println!("Number of Channels: Not available"),
479 }
480
481 match &self.eegstartdate {
482 Some(start_date) => println!("EEG Start Date: {}", start_date),
483 None => println!("EEG Start Date: Not available"),
484 }
485
486 // Print a preview of EEG data (first 5 samples)
487 match &self.np_eeg {
488 Some(eeg) => {
489 let total_samples = eeg.shape()[0];
490 println!("\nEEG Data (First 5 of {total_samples} Samples):");
491 let preview_count = total_samples.min(5); // Preview the first 5 samples or total samples if less than 5
492 let preview: Vec<Vec<Float>> = eeg
493 .slice(s![..preview_count, ..]) // Get the first `preview_count` rows and all columns
494 .axis_iter(ndarray::Axis(0)) // Iterate over rows
495 .map(|row| row.to_owned().to_vec()) // Convert each row into a Vec<Float>
496 .collect(); // Collect all rows into a Vec<Vec<Float>>
497
498 for (i, row) in preview.iter().enumerate() {
499 println!("Sample {}: {:?}", i + 1, row);
500 }
501 println!(
502 "Showing {} out of {} EEG samples.",
503 preview_count, total_samples
504 );
505 }
506 None => println!("EEG Data: Not available"),
507 }
508
509 // Print a preview of accelerometer data (first 5 samples if available)
510 match &self.np_acc {
511 Some(acc) => {
512 let total_samples = acc.shape()[0];
513 println!("\nAccelerometer Data (First 5 of {total_samples} Samples):");
514 let preview_count = total_samples.min(5); // Preview the first 5 samples or total samples if less than 5
515 let preview: Vec<Vec<Float>> = acc
516 .slice(s![..preview_count, ..]) // Get the first `preview_count` rows and all columns
517 .axis_iter(ndarray::Axis(0)) // Iterate over rows
518 .map(|row| row.to_owned().to_vec()) // Convert each row into a Vec<f32>
519 .collect(); // Collect all rows into a Vec<Vec<Float>>
520
521 for (i, row) in preview.iter().enumerate() {
522 println!("Sample {}: {:?}", i + 1, row);
523 }
524 }
525 None => println!("Accelerometer Data: Not available"),
526 }
527
528 // Print a preview of markers (first 5 samples if available)
529 match &self.np_markers {
530 Some(markers) => {
531 let total_samples = markers.shape()[0];
532 println!("\nMarkers Data (First 5 of {total_samples} Samples):");
533 let preview_count = total_samples.min(5); // Preview the first 5 samples or total samples if less than 5
534 let (preview, _) = markers
535 .slice(s![..preview_count, ..]) // Get the first `preview_count` elements
536 .to_owned() // Copy the values from the slice
537 .into_raw_vec_and_offset(); // Convert it into a Vec<Float>
538
539 for (i, marker) in preview.iter().enumerate() {
540 println!("Marker {}: {}", i + 1, marker);
541 }
542 }
543 None => println!("Markers Data: Not available"),
544 }
545
546 // Print log entries
547 println!("\nLog Entries:");
548 for entry in &self.log {
549 println!("- {}", entry);
550 }
551 }
552}