Skip to main content

exg/edf/
mod.rs

1//! EDF/EDF+ file reader.
2//!
3//! Implements reading of `.edf` EEG recordings compatible with
4//! [MNE-Python](https://mne.tools) `mne.io.read_raw_edf()`.
5//!
6//! # Quick start
7//! ```no_run
8//! use exg::edf::open_raw_edf;
9//!
10//! let raw = open_raw_edf("data/recording.edf").unwrap();
11//! println!("{} channels @ {} Hz", raw.header.num_signals, raw.header.sample_rate);
12//! let data = raw.read_all_data().unwrap();  // [C, T] f32, physical units
13//! ```
14//!
15//! # EDF Format Reference
16//!
17//! The European Data Format (EDF) stores continuous polygraphic recordings.
18//! Each file has a fixed-size header followed by data records containing
19//! interleaved 16-bit integer samples for all channels.
20//!
21//! EDF+ extends EDF with annotations stored in special "EDF Annotations"
22//! channels. This reader parses EDF+ annotations and exposes them via
23//! [`RawEdf::annotations`].
24
25use anyhow::{bail, Context, Result};
26use ndarray::Array2;
27use std::io::{Read, Seek, SeekFrom};
28use std::path::Path;
29
30// ── Header structures ─────────────────────────────────────────────────────────
31
32/// Parsed EDF/EDF+ file header.
33#[derive(Debug, Clone)]
34pub struct EdfHeader {
35    /// Version of data format (usually "0").
36    pub version: String,
37    /// Local patient identification.
38    pub patient_id: String,
39    /// Local recording identification.
40    pub recording_id: String,
41    /// Start date as string (dd.mm.yy).
42    pub start_date: String,
43    /// Start time as string (hh.mm.ss).
44    pub start_time: String,
45    /// Number of bytes in the header.
46    pub header_bytes: usize,
47    /// Reserved field (contains "EDF+C" or "EDF+D" for EDF+).
48    pub reserved: String,
49    /// Number of data records in the file.
50    pub num_records: usize,
51    /// Duration of each data record in seconds.
52    pub record_duration: f64,
53    /// Number of signals (channels) in the file.
54    pub num_signals: usize,
55    /// Per-signal information.
56    pub signals: Vec<SignalHeader>,
57    /// Maximum sampling rate across all signals (the effective sfreq).
58    pub sample_rate: f32,
59    /// Whether this is an EDF+ file.
60    pub is_edfplus: bool,
61}
62
63/// Per-signal (channel) header information.
64#[derive(Debug, Clone)]
65pub struct SignalHeader {
66    /// Channel label (e.g. "EEG FP1-REF").
67    pub label: String,
68    /// Transducer type.
69    pub transducer: String,
70    /// Physical dimension (e.g. "uV").
71    pub physical_dimension: String,
72    /// Physical minimum.
73    pub physical_min: f64,
74    /// Physical maximum.
75    pub physical_max: f64,
76    /// Digital minimum.
77    pub digital_min: f64,
78    /// Digital maximum.
79    pub digital_max: f64,
80    /// Prefiltering string.
81    pub prefiltering: String,
82    /// Number of samples per data record for this signal.
83    pub samples_per_record: usize,
84    /// Reserved field.
85    pub reserved: String,
86}
87
88impl SignalHeader {
89    /// Compute the calibration factor: physical_range / digital_range.
90    pub fn cal(&self) -> f64 {
91        let phys_range = self.physical_max - self.physical_min;
92        let dig_range = self.digital_max - self.digital_min;
93        if dig_range == 0.0 { 1.0 } else { phys_range / dig_range }
94    }
95
96    /// Compute the offset for converting digital to physical values.
97    pub fn offset(&self) -> f64 {
98        self.physical_min - self.digital_min * self.cal()
99    }
100
101    /// Get the unit scaling factor (to convert to volts).
102    pub fn unit_scale(&self) -> f64 {
103        let dim = self.physical_dimension.trim().to_lowercase();
104        // Allow μ (greek mu), µ (micro symbol), u prefix
105        if dim == "uv" || dim == "µv" || dim == "\u{03bc}v" {
106            1e-6
107        } else if dim == "mv" {
108            1e-3
109        } else {
110            1.0 // "v" or unknown unit — no scaling
111        }
112    }
113}
114
115/// An EDF+ annotation event.
116#[derive(Debug, Clone)]
117pub struct EdfAnnotation {
118    /// Onset time in seconds from the start of the recording.
119    pub onset: f64,
120    /// Duration in seconds (0 if not specified).
121    pub duration: f64,
122    /// Annotation text.
123    pub description: String,
124}
125
126// ── Raw EDF structure ─────────────────────────────────────────────────────────
127
128/// A loaded EDF/EDF+ file.
129pub struct RawEdf {
130    /// Parsed header.
131    pub header: EdfHeader,
132    /// Path to the file (for lazy reading).
133    pub path: std::path::PathBuf,
134    /// Parsed EDF+ annotations (empty for plain EDF).
135    pub annotations: Vec<EdfAnnotation>,
136}
137
138/// Open an EDF/EDF+ file and parse the header.
139///
140/// # Arguments
141/// * `path` — Path to the `.edf` file.
142///
143/// # Returns
144/// A [`RawEdf`] struct with the parsed header. Use [`RawEdf::read_all_data()`]
145/// to read the signal data.
146pub fn open_raw_edf<P: AsRef<Path>>(path: P) -> Result<RawEdf> {
147    let path = path.as_ref().to_path_buf();
148    let mut file = std::fs::File::open(&path)
149        .with_context(|| format!("opening EDF file: {}", path.display()))?;
150
151    let header = read_header(&mut file)?;
152
153    // Read annotations if EDF+
154    let annotations = if header.is_edfplus {
155        read_annotations(&mut file, &header)?
156    } else {
157        vec![]
158    };
159
160    Ok(RawEdf { header, path, annotations })
161}
162
163impl RawEdf {
164    /// Read all signal data from the EDF file.
165    ///
166    /// Returns an `Array2<f32>` of shape `[C, T]` where:
167    /// * `C` = number of non-annotation channels
168    /// * `T` = total samples (num_records × samples_per_record for the max-rate channel)
169    ///
170    /// Data is converted to physical units (scaled by cal + offset).
171    /// Annotation channels (labeled "EDF Annotations") are excluded.
172    pub fn read_all_data(&self) -> Result<Array2<f32>> {
173        let mut file = std::fs::File::open(&self.path)?;
174        read_data(&mut file, &self.header)
175    }
176
177    /// Channel names (excluding annotation channels).
178    pub fn channel_names(&self) -> Vec<String> {
179        self.header.signals.iter()
180            .filter(|s| !is_annotation_channel(&s.label))
181            .map(|s| s.label.clone())
182            .collect()
183    }
184
185    /// Number of non-annotation channels.
186    pub fn num_channels(&self) -> usize {
187        self.header.signals.iter()
188            .filter(|s| !is_annotation_channel(&s.label))
189            .count()
190    }
191
192    /// Total number of samples per channel at the maximum sample rate.
193    pub fn num_samples(&self) -> usize {
194        let max_spr = self.header.signals.iter()
195            .filter(|s| !is_annotation_channel(&s.label))
196            .map(|s| s.samples_per_record)
197            .max()
198            .unwrap_or(0);
199        self.header.num_records * max_spr
200    }
201}
202
203fn is_annotation_channel(label: &str) -> bool {
204    let l = label.trim().to_lowercase();
205    l.contains("edf annotation") || l.contains("edf+annotation")
206        || l == "edf annotations" || l == "bdf annotations"
207}
208
209// ── Header parsing ────────────────────────────────────────────────────────────
210
211fn read_fixed_str<R: Read>(reader: &mut R, n: usize) -> Result<String> {
212    let mut buf = vec![0u8; n];
213    reader.read_exact(&mut buf)?;
214    // EDF uses ASCII/Latin-1
215    Ok(String::from_utf8_lossy(&buf).trim_end().to_string())
216}
217
218fn read_fixed_f64<R: Read>(reader: &mut R, n: usize) -> Result<f64> {
219    let s = read_fixed_str(reader, n)?;
220    let s = s.replace(',', ".");
221    s.trim().parse::<f64>()
222        .with_context(|| format!("parsing float from EDF header: '{s}'"))
223}
224
225fn read_fixed_usize<R: Read>(reader: &mut R, n: usize) -> Result<usize> {
226    let s = read_fixed_str(reader, n)?;
227    s.trim().parse::<usize>()
228        .with_context(|| format!("parsing int from EDF header: '{s}'"))
229}
230
231fn read_header<R: Read>(reader: &mut R) -> Result<EdfHeader> {
232    let version = read_fixed_str(reader, 8)?;
233    let patient_id = read_fixed_str(reader, 80)?;
234    let recording_id = read_fixed_str(reader, 80)?;
235    let start_date = read_fixed_str(reader, 8)?;
236    let start_time = read_fixed_str(reader, 8)?;
237    let header_bytes = read_fixed_usize(reader, 8)?;
238    let reserved = read_fixed_str(reader, 44)?;
239    let num_records = read_fixed_usize(reader, 8)?;
240    let record_duration = read_fixed_f64(reader, 8)?;
241    let num_signals = read_fixed_usize(reader, 4)?;
242
243    let is_edfplus = reserved.contains("EDF+");
244
245    // Read per-signal headers (each field is num_signals × field_width)
246    let mut labels = Vec::with_capacity(num_signals);
247    for _ in 0..num_signals {
248        labels.push(read_fixed_str(reader, 16)?);
249    }
250
251    let mut transducers = Vec::with_capacity(num_signals);
252    for _ in 0..num_signals {
253        transducers.push(read_fixed_str(reader, 80)?);
254    }
255
256    let mut phys_dims = Vec::with_capacity(num_signals);
257    for _ in 0..num_signals {
258        phys_dims.push(read_fixed_str(reader, 8)?);
259    }
260
261    let mut phys_mins = Vec::with_capacity(num_signals);
262    for _ in 0..num_signals {
263        phys_mins.push(read_fixed_f64(reader, 8)?);
264    }
265
266    let mut phys_maxs = Vec::with_capacity(num_signals);
267    for _ in 0..num_signals {
268        phys_maxs.push(read_fixed_f64(reader, 8)?);
269    }
270
271    let mut dig_mins = Vec::with_capacity(num_signals);
272    for _ in 0..num_signals {
273        dig_mins.push(read_fixed_f64(reader, 8)?);
274    }
275
276    let mut dig_maxs = Vec::with_capacity(num_signals);
277    for _ in 0..num_signals {
278        dig_maxs.push(read_fixed_f64(reader, 8)?);
279    }
280
281    let mut prefilterings = Vec::with_capacity(num_signals);
282    for _ in 0..num_signals {
283        prefilterings.push(read_fixed_str(reader, 80)?);
284    }
285
286    let mut samples_per_record = Vec::with_capacity(num_signals);
287    for _ in 0..num_signals {
288        samples_per_record.push(read_fixed_usize(reader, 8)?);
289    }
290
291    // Reserved per signal (32 bytes each)
292    let mut sig_reserved = Vec::with_capacity(num_signals);
293    for _ in 0..num_signals {
294        sig_reserved.push(read_fixed_str(reader, 32)?);
295    }
296
297    let mut signals = Vec::with_capacity(num_signals);
298    for i in 0..num_signals {
299        signals.push(SignalHeader {
300            label: labels[i].clone(),
301            transducer: transducers[i].clone(),
302            physical_dimension: phys_dims[i].clone(),
303            physical_min: phys_mins[i],
304            physical_max: phys_maxs[i],
305            digital_min: dig_mins[i],
306            digital_max: dig_maxs[i],
307            prefiltering: prefilterings[i].clone(),
308            samples_per_record: samples_per_record[i],
309            reserved: sig_reserved[i].clone(),
310        });
311    }
312
313    // Compute the maximum sample rate
314    let max_spr = signals.iter()
315        .filter(|s| !is_annotation_channel(&s.label))
316        .map(|s| s.samples_per_record)
317        .max()
318        .unwrap_or(1);
319    let sample_rate = if record_duration > 0.0 {
320        max_spr as f32 / record_duration as f32
321    } else {
322        max_spr as f32
323    };
324
325    Ok(EdfHeader {
326        version,
327        patient_id,
328        recording_id,
329        start_date,
330        start_time,
331        header_bytes,
332        reserved,
333        num_records,
334        record_duration,
335        num_signals,
336        signals,
337        sample_rate,
338        is_edfplus,
339    })
340}
341
342// ── Data reading ──────────────────────────────────────────────────────────────
343
344fn read_data<R: Read + Seek>(reader: &mut R, header: &EdfHeader) -> Result<Array2<f32>> {
345    reader.seek(SeekFrom::Start(header.header_bytes as u64))?;
346
347    // Identify non-annotation signal indices
348    let sig_indices: Vec<usize> = (0..header.num_signals)
349        .filter(|&i| !is_annotation_channel(&header.signals[i].label))
350        .collect();
351
352    if sig_indices.is_empty() {
353        bail!("No non-annotation channels found in EDF file");
354    }
355
356    let max_spr = sig_indices.iter()
357        .map(|&i| header.signals[i].samples_per_record)
358        .max()
359        .unwrap_or(1);
360
361    let n_ch = sig_indices.len();
362    let n_total = header.num_records * max_spr;
363    let mut data = Array2::<f32>::zeros((n_ch, n_total));
364
365    // Total samples in one data record across all signals
366    let record_samples: usize = header.signals.iter()
367        .map(|s| s.samples_per_record)
368        .sum();
369
370    // Read record by record
371    let mut record_buf = vec![0i16; record_samples];
372    let mut byte_buf = vec![0u8; record_samples * 2];
373
374    for rec in 0..header.num_records {
375        reader.read_exact(&mut byte_buf)?;
376        // Convert bytes to i16 (little-endian)
377        for (i, chunk) in byte_buf.chunks_exact(2).enumerate() {
378            record_buf[i] = i16::from_le_bytes([chunk[0], chunk[1]]);
379        }
380
381        // Extract each non-annotation channel
382        let mut offset = 0;
383        let mut out_ch = 0;
384        for sig_idx in 0..header.num_signals {
385            let spr = header.signals[sig_idx].samples_per_record;
386
387            if sig_indices.contains(&sig_idx) {
388                let sig = &header.signals[sig_idx];
389                let cal = sig.cal();
390                let off = sig.offset();
391                let unit = sig.unit_scale();
392
393                let dst_start = rec * max_spr;
394
395                if spr == max_spr {
396                    // Same sample rate as max → direct copy
397                    for j in 0..spr {
398                        let digital = record_buf[offset + j] as f64;
399                        let physical = (digital * cal + off) * unit;
400                        data[[out_ch, dst_start + j]] = physical as f32;
401                    }
402                } else {
403                    // Different sample rate → simple nearest-neighbor upsampling
404                    // (MNE uses scipy resample for mixed rates, but for LUNA
405                    // all channels typically have the same rate)
406                    for j in 0..max_spr {
407                        let src_j = (j * spr) / max_spr;
408                        let digital = record_buf[offset + src_j] as f64;
409                        let physical = (digital * cal + off) * unit;
410                        data[[out_ch, dst_start + j]] = physical as f32;
411                    }
412                }
413                out_ch += 1;
414            }
415
416            offset += spr;
417        }
418    }
419
420    Ok(data)
421}
422
423// ── EDF+ Annotations parsing ─────────────────────────────────────────────────
424
425fn read_annotations<R: Read + Seek>(reader: &mut R, header: &EdfHeader) -> Result<Vec<EdfAnnotation>> {
426    reader.seek(SeekFrom::Start(header.header_bytes as u64))?;
427
428    let tal_indices: Vec<usize> = (0..header.num_signals)
429        .filter(|&i| is_annotation_channel(&header.signals[i].label))
430        .collect();
431
432    if tal_indices.is_empty() {
433        return Ok(vec![]);
434    }
435
436    let record_samples: usize = header.signals.iter()
437        .map(|s| s.samples_per_record)
438        .sum();
439    let record_bytes = record_samples * 2;
440
441    let mut annotations = Vec::new();
442    let mut record_buf = vec![0u8; record_bytes];
443
444    for _rec in 0..header.num_records {
445        reader.read_exact(&mut record_buf)?;
446
447        // Extract TAL bytes for each annotation channel
448        for &tal_idx in &tal_indices {
449            let mut byte_offset = 0;
450            for i in 0..tal_idx {
451                byte_offset += header.signals[i].samples_per_record * 2;
452            }
453            let tal_bytes = header.signals[tal_idx].samples_per_record * 2;
454            let tal_data = &record_buf[byte_offset..byte_offset + tal_bytes];
455
456            // Parse TAL (Time-stamped Annotation List)
457            // Format: +onset\x15duration\x14annotation\x14\x00
458            let tal_str = String::from_utf8_lossy(tal_data);
459            for entry in tal_str.split('\x00') {
460                if entry.is_empty() { continue; }
461                parse_tal_entry(entry, &mut annotations);
462            }
463        }
464    }
465
466    Ok(annotations)
467}
468
469fn parse_tal_entry(entry: &str, annotations: &mut Vec<EdfAnnotation>) {
470    // TAL entry format: +onset[\x15duration]\x14annotation[\x14annotation...]
471    let parts: Vec<&str> = entry.split('\x14').collect();
472    if parts.is_empty() { return; }
473
474    let time_part = parts[0];
475    if time_part.is_empty() { return; }
476
477    // Parse onset and optional duration separated by \x15
478    let (onset_str, dur_str) = if let Some(pos) = time_part.find('\x15') {
479        (&time_part[..pos], &time_part[pos+1..])
480    } else {
481        (time_part, "")
482    };
483
484    let onset = match onset_str.replace('+', "").trim().parse::<f64>() {
485        Ok(v) => v,
486        Err(_) => return,
487    };
488
489    let duration = if dur_str.is_empty() {
490        0.0
491    } else {
492        dur_str.parse::<f64>().unwrap_or(0.0)
493    };
494
495    for &desc in &parts[1..] {
496        if desc.is_empty() { continue; }
497        annotations.push(EdfAnnotation {
498            onset,
499            duration,
500            description: desc.to_string(),
501        });
502    }
503}
504
505#[cfg(test)]
506mod tests {
507    use super::*;
508
509    #[test]
510    fn test_signal_header_cal_offset() {
511        let sig = SignalHeader {
512            label: "EEG FP1".into(),
513            transducer: String::new(),
514            physical_dimension: "uV".into(),
515            physical_min: -3200.0,
516            physical_max: 3200.0,
517            digital_min: -32768.0,
518            digital_max: 32767.0,
519            prefiltering: String::new(),
520            samples_per_record: 256,
521            reserved: String::new(),
522        };
523        let cal = sig.cal();
524        // physical_range = 6400, digital_range = 65535
525        approx::assert_abs_diff_eq!(cal, 6400.0 / 65535.0, epsilon = 1e-6);
526    }
527
528    #[test]
529    fn test_unit_scale() {
530        let mut sig = SignalHeader {
531            label: String::new(), transducer: String::new(),
532            physical_dimension: "uV".into(),
533            physical_min: 0.0, physical_max: 0.0,
534            digital_min: 0.0, digital_max: 0.0,
535            prefiltering: String::new(), samples_per_record: 0,
536            reserved: String::new(),
537        };
538        assert_eq!(sig.unit_scale(), 1e-6);
539        sig.physical_dimension = "mV".into();
540        assert_eq!(sig.unit_scale(), 1e-3);
541        sig.physical_dimension = "V".into();
542        assert_eq!(sig.unit_scale(), 1.0);
543    }
544
545    #[test]
546    fn test_annotation_channel_detection() {
547        assert!(is_annotation_channel("EDF Annotations"));
548        assert!(is_annotation_channel("EDF Annotations "));
549        assert!(!is_annotation_channel("EEG FP1-REF"));
550    }
551
552    #[test]
553    fn test_parse_tal_entry() {
554        let mut anns = Vec::new();
555        parse_tal_entry("+0.0\x15\x14", &mut anns);
556        // The time-keeping annotation (no description) produces nothing
557        assert_eq!(anns.len(), 0);
558
559        parse_tal_entry("+1.5\x152.0\x14Seizure onset\x14", &mut anns);
560        assert_eq!(anns.len(), 1);
561        approx::assert_abs_diff_eq!(anns[0].onset, 1.5, epsilon = 1e-9);
562        approx::assert_abs_diff_eq!(anns[0].duration, 2.0, epsilon = 1e-9);
563        assert_eq!(anns[0].description, "Seizure onset");
564    }
565}