Skip to main content

opentfraw/
reader.rs

1use std::collections::HashMap;
2use std::io::{Read, Seek, SeekFrom};
3
4use crate::error::{Error, Result};
5use crate::error_log::ErrorEntry;
6use crate::generic_data::{GenericDataHeader, GenericRecord, GenericValue};
7use crate::header::FileHeader;
8use crate::raw_file_info::RawFileInfo;
9use crate::run_header::RunHeader;
10use crate::scan_data::{
11    read_flat_peaks, read_scan_srm_v66, search_v63_transition, Peak, ScanDataPacket,
12};
13use crate::scan_event::{ScanEvent, ScanEventPreamble};
14use crate::scan_index::ScanIndexEntry;
15use crate::seq_row::SeqRow;
16
17/// Low-level binary reading helpers.
18pub(crate) struct BinaryReader<R> {
19    inner: R,
20    pos: u64,
21}
22
23impl<R: Read + Seek> BinaryReader<R> {
24    pub fn new(inner: R) -> Self {
25        Self { inner, pos: 0 }
26    }
27
28    pub fn into_inner(self) -> R {
29        self.inner
30    }
31
32    #[allow(dead_code)]
33    pub(crate) fn position(&self) -> u64 {
34        self.pos
35    }
36
37    pub fn seek_to(&mut self, offset: u64) -> Result<()> {
38        self.inner.seek(SeekFrom::Start(offset))?;
39        self.pos = offset;
40        Ok(())
41    }
42
43    pub fn read_bytes(&mut self, n: usize) -> Result<Vec<u8>> {
44        let mut buf = vec![0u8; n];
45        self.inner.read_exact(&mut buf).map_err(|e| {
46            if e.kind() == std::io::ErrorKind::UnexpectedEof {
47                Error::UnexpectedEof {
48                    offset: self.pos,
49                    needed: n,
50                }
51            } else {
52                Error::Io(e)
53            }
54        })?;
55        self.pos += n as u64;
56        Ok(buf)
57    }
58
59    pub fn read_bytes_into(&mut self, buf: &mut [u8]) -> Result<()> {
60        let n = buf.len();
61        self.inner.read_exact(buf).map_err(|e| {
62            if e.kind() == std::io::ErrorKind::UnexpectedEof {
63                Error::UnexpectedEof {
64                    offset: self.pos,
65                    needed: n,
66                }
67            } else {
68                Error::Io(e)
69            }
70        })?;
71        self.pos += n as u64;
72        Ok(())
73    }
74
75    pub fn skip(&mut self, n: usize) -> Result<()> {
76        self.inner.seek(SeekFrom::Current(n as i64))?;
77        self.pos += n as u64;
78        Ok(())
79    }
80
81    pub fn length(&mut self) -> Result<u64> {
82        let cur = self.pos;
83        let end = self.inner.seek(SeekFrom::End(0))?;
84        self.inner.seek(SeekFrom::Start(cur))?;
85        self.pos = cur;
86        Ok(end)
87    }
88
89    pub fn read_u8(&mut self) -> Result<u8> {
90        let mut buf = [0u8; 1];
91        self.read_bytes_into(&mut buf)?;
92        Ok(buf[0])
93    }
94
95    pub fn read_u16(&mut self) -> Result<u16> {
96        let mut buf = [0u8; 2];
97        self.read_bytes_into(&mut buf)?;
98        Ok(u16::from_le_bytes(buf))
99    }
100
101    pub fn read_i16(&mut self) -> Result<i16> {
102        let mut buf = [0u8; 2];
103        self.read_bytes_into(&mut buf)?;
104        Ok(i16::from_le_bytes(buf))
105    }
106
107    pub fn read_u32(&mut self) -> Result<u32> {
108        let mut buf = [0u8; 4];
109        self.read_bytes_into(&mut buf)?;
110        Ok(u32::from_le_bytes(buf))
111    }
112
113    pub fn read_i32(&mut self) -> Result<i32> {
114        let mut buf = [0u8; 4];
115        self.read_bytes_into(&mut buf)?;
116        Ok(i32::from_le_bytes(buf))
117    }
118
119    pub fn read_u64(&mut self) -> Result<u64> {
120        let mut buf = [0u8; 8];
121        self.read_bytes_into(&mut buf)?;
122        Ok(u64::from_le_bytes(buf))
123    }
124
125    pub fn read_f32(&mut self) -> Result<f32> {
126        let mut buf = [0u8; 4];
127        self.read_bytes_into(&mut buf)?;
128        Ok(f32::from_le_bytes(buf))
129    }
130
131    pub fn read_f64(&mut self) -> Result<f64> {
132        let mut buf = [0u8; 8];
133        self.read_bytes_into(&mut buf)?;
134        Ok(f64::from_le_bytes(buf))
135    }
136
137    pub fn read_i8(&mut self) -> Result<i8> {
138        let mut buf = [0u8; 1];
139        self.read_bytes_into(&mut buf)?;
140        Ok(buf[0] as i8)
141    }
142
143    /// Read a fixed-width UTF-16-LE string of `byte_len` bytes, stripping null padding.
144    pub fn read_utf16_fixed(&mut self, byte_len: usize) -> Result<String> {
145        let pos = self.pos;
146        let raw = self.read_bytes(byte_len)?;
147        if byte_len % 2 != 0 {
148            return Err(Error::InvalidUtf16(pos));
149        }
150        let units: Vec<u16> = raw
151            .chunks_exact(2)
152            .map(|c| u16::from_le_bytes([c[0], c[1]]))
153            .collect();
154        // Find null terminator
155        let end = units.iter().position(|&u| u == 0).unwrap_or(units.len());
156        String::from_utf16(&units[..end]).map_err(|_| Error::InvalidUtf16(pos))
157    }
158
159    /// Read a PascalStringWin32: UInt32 char count, then that many UTF-16-LE code units.
160    pub fn read_pascal_string(&mut self) -> Result<String> {
161        let pos = self.pos;
162        let char_count = self.read_u32()? as usize;
163        if char_count == 0 {
164            return Ok(String::new());
165        }
166        let byte_len = char_count.checked_mul(2).ok_or(Error::InvalidUtf16(pos))?;
167        let raw = self.read_bytes(byte_len)?;
168        let units: Vec<u16> = raw
169            .chunks_exact(2)
170            .map(|c| u16::from_le_bytes([c[0], c[1]]))
171            .collect();
172        // Strip trailing nulls
173        let end = units.iter().position(|&u| u == 0).unwrap_or(units.len());
174        String::from_utf16(&units[..end]).map_err(|_| Error::InvalidUtf16(pos))
175    }
176
177    /// Read a Windows FILETIME and return Unix timestamp as f64 seconds.
178    pub fn read_windows_filetime(&mut self) -> Result<f64> {
179        let ft = self.read_u64()?;
180        if ft == 0 {
181            return Ok(0.0);
182        }
183        Ok((ft as f64 / 10_000_000.0) - 11_644_473_600.0)
184    }
185}
186
187/// A parsed Thermo Fisher RAW file.
188pub struct RawFileReader {
189    pub header: FileHeader,
190    pub seq_row: SeqRow,
191    pub raw_file_info: RawFileInfo,
192    pub run_header: RunHeader,
193    pub scan_index: Vec<ScanIndexEntry>,
194    pub scan_events: Vec<ScanEvent>,
195    pub scan_parameters_header: GenericDataHeader,
196    pub scan_parameters: Vec<GenericRecord>,
197    pub error_log: Vec<ErrorEntry>,
198    // Instrument log uses same structure
199    pub inst_log_header: GenericDataHeader,
200    pub inst_log: Vec<GenericRecord>,
201    /// Raw file version from the header.
202    pub version: u32,
203    /// Number of scans.
204    pub num_scans: u32,
205    /// Data stream base address (for computing absolute scan offsets).
206    pub data_addr: u64,
207    /// True if scan data uses flat-peak format (TSQ/SRM) instead of PacketHeader.
208    pub flat_peaks: bool,
209    /// Detected scan-data encoding (the format used by [`Self::read_scan_peaks`]).
210    pub scan_format: crate::scan_format::ScanDataFormat,
211    /// Detected device family (informational).
212    pub device_family: crate::device::DeviceFamily,
213    /// Canonical instrument model name if one was detected in the file's
214    /// metadata region (e.g. `"Orbitrap Fusion Lumos"`). `None` means only
215    /// the coarse family could be inferred.
216    pub instrument_model: Option<&'static str>,
217    /// For SRM (flat-peak) files: maps scan_event index → Q1 precursor mass (m/z).
218    ///
219    /// Populated at open time by scanning the method/transition table stored
220    /// in the pre-scan-data header region. Empty for non-SRM instruments.
221    pub srm_q1_by_event: HashMap<u16, f64>,
222    /// For SRM (flat-peak) files: maps scan_event index → Q3 isolation window pairs (lo, hi) in m/z.
223    ///
224    /// Populated at open time by reading the Q3 window table from the first scan record
225    /// of each unique scan event class. Empty for non-SRM instruments.
226    pub srm_q3_windows: HashMap<u16, Vec<(f32, f32)>>,
227    /// For SRM (flat-peak) files: maps scan_event index → collision energy (eV).
228    ///
229    /// Populated from the v63 transition table at open time.  For v66 files the
230    /// collision energy is read from per-scan parameters instead, so this map is
231    /// empty for v66/TSQ Altis files.
232    pub srm_ce_by_event: HashMap<u16, f64>,
233}
234
235// ─── Multi-controller metadata ───────────────────────────────────────────────
236
237/// Controller type codes as used in Thermo RAW files.
238#[derive(Debug, Clone, Copy, PartialEq, Eq)]
239pub enum ControllerType {
240    Ms,
241    Analog,
242    Adc,
243    Pda,
244    Uv,
245    Other,
246}
247
248impl ControllerType {
249    fn from_nsegs_ntrailer(ntrailer: u32, nsegs: u32) -> Self {
250        // Heuristic: MS controller always has ntrailer > 0 (v64+) or nsegs > 0.
251        // Non-MS controllers (UV, analog, PDA) have ntrailer == 0 and nsegs == 1.
252        // We can't reliably distinguish between non-MS types without parsing
253        // the InstID/method block, so we fall back to Other for those.
254        if ntrailer > 0 || nsegs > 1 {
255            Self::Ms
256        } else {
257            Self::Other
258        }
259    }
260}
261
262/// Minimal metadata about one controller in a multi-controller RAW file.
263#[derive(Debug, Clone)]
264pub struct ControllerInfo {
265    /// Zero-based controller index (position in `run_header_addrs`).
266    pub index: usize,
267    /// File offset to this controller's RunHeader.
268    pub run_header_addr: u64,
269    /// Whether this controller is the primary MS controller.
270    pub is_ms_controller: bool,
271    /// Inferred controller type.
272    pub controller_type: ControllerType,
273    /// First scan number.
274    pub first_scan: u32,
275    /// Last scan number.
276    pub last_scan: u32,
277    /// Acquisition start time (minutes).
278    pub start_time: f64,
279    /// Acquisition end time (minutes).
280    pub end_time: f64,
281}
282
283impl RawFileReader {
284    /// Open and parse a RAW file from a reader.
285    pub fn open<R: Read + Seek>(source: R) -> Result<Self> {
286        let mut r = BinaryReader::new(source);
287
288        // 1. FileHeader
289        let header = FileHeader::read(&mut r)?;
290        let version = header.version;
291
292        // 2. SeqRow
293        let seq_row = SeqRow::read(&mut r, version)?;
294
295        // 3. ASInfo (read and discard preamble + string)
296        let _as_preamble = r.read_bytes(24)?; // ASInfoPreamble: 24 bytes
297        let _as_text = r.read_pascal_string()?;
298
299        // 4. RawFileInfo
300        let raw_file_info = RawFileInfo::read(&mut r, version)?;
301
302        // 5. Extract addresses
303        let data_addr = raw_file_info.preamble.data_addr;
304
305        // 6. Select the MS controller RunHeader.
306        // Multi-controller files (e.g. UV + MS) have one RunHeader per controller.
307        // The MS controller has ntrailer > 0 (v64+) or first_scan <= last_scan with
308        // nsegs > 0 (v63 and earlier). We iterate all addresses and pick the best.
309        let run_header = {
310            let addrs = &raw_file_info.preamble.run_header_addrs;
311            let mut chosen = None;
312            for &addr in addrs {
313                if addr == 0 {
314                    continue;
315                }
316                r.seek_to(addr)?;
317                let rh = RunHeader::read(&mut r, version)?;
318                // Heuristic for identifying the MS controller:
319                // 1. For v64+: ntrailer > 0 (scan events present) - catches most instruments.
320                // 2. For all versions: RunHeader.data_addr == preamble.data_addr - the MS
321                //    controller's scan data begins at the same address the preamble declares.
322                //    This catches TSQ/triple-quad instruments where ntrailer=0 (no scan events).
323                // 3. Pre-v64 fallback: valid scan range with nsegs > 0.
324                let is_ms = if version >= 64 {
325                    rh.ntrailer > 0 || rh.data_addr == data_addr
326                } else {
327                    rh.sample_info.last_scan_number >= rh.sample_info.first_scan_number
328                        && rh.nsegs > 0
329                };
330                if is_ms {
331                    chosen = Some(rh);
332                    break;
333                }
334            }
335            // Fall back to first address if no MS controller found
336            match chosen {
337                Some(rh) => rh,
338                None => {
339                    r.seek_to(addrs[0])?;
340                    RunHeader::read(&mut r, version)?
341                }
342            }
343        };
344
345        let first_scan = run_header.sample_info.first_scan_number;
346        let last_scan = run_header.sample_info.last_scan_number;
347
348        let num_scans = if last_scan >= first_scan {
349            last_scan - first_scan + 1
350        } else {
351            0
352        };
353
354        // 7. Scan index
355        r.seek_to(run_header.scan_index_addr)?;
356        let mut scan_index = Vec::with_capacity(num_scans as usize);
357        for _ in 0..num_scans {
358            scan_index.push(ScanIndexEntry::read(&mut r, version)?);
359        }
360
361        // 8. Scan event trailer
362        r.seek_to(run_header.scan_trailer_addr)?;
363        let n_events = if version >= 64 {
364            // v64+: first u32 is a preamble (not count); use ntrailer from RunHeader
365            let _preamble = r.read_u32()?;
366            run_header.ntrailer
367        } else {
368            r.read_u32()?
369        };
370        // For v66, compute per-event body sizes from the stream's address range.
371        // The scan event stream spans [scan_trailer_addr+4 .. scan_params_addr).
372        // Each event = preamble (136 bytes) + body.
373        //
374        // Simple instruments (Q Exactive, Exploris): all events are identical in
375        // size so stream_bytes divides evenly by n_events.
376        //
377        // Tribrid instruments (Eclipse, Fusion Lumos): primary (MS1) scans and
378        // dependent (MS2+) scans have different body layouts:
379        //   Primary event:   232 bytes total (preamble 136 + body 96)
380        //   Dependent event: 344 bytes total (preamble 136 + body 208)
381        // Confirmed empirically across Orbitrap Eclipse (EThcD) and Fusion Lumos
382        // (DIA, MS3) files.
383        let preamble_size = ScanEventPreamble::size_for_version(version);
384        let (v66_body_primary, v66_body_dependent): (usize, usize) =
385            if version >= 66 && n_events > 0 {
386                let stream_bytes = run_header
387                    .scan_params_addr
388                    .saturating_sub(run_header.scan_trailer_addr)
389                    .saturating_sub(4);
390                let remainder = stream_bytes % n_events as u64;
391                if remainder == 0 {
392                    // Uniform event size (Q Exactive, Exploris, etc.)
393                    let body = (stream_bytes / n_events as u64) as usize;
394                    let body = body.saturating_sub(preamble_size);
395                    (body, body)
396                } else {
397                    // Variable-length events: tribrid Orbitrap instruments.
398                    // Known sizes: primary=232, dependent=344 (body 96 and 208).
399                    const PRIMARY_EVENT: u64 = 232;
400                    const DEPENDENT_EVENT: u64 = 344;
401                    let gap = DEPENDENT_EVENT - PRIMARY_EVENT;
402                    let n = n_events as u64;
403                    // n_primary * PRIMARY_EVENT + n_dependent * DEPENDENT_EVENT = stream_bytes
404                    // n_primary + n_dependent = n
405                    // => n_primary = (n * DEPENDENT_EVENT - stream_bytes) / gap
406                    let n_primary_numerator = n
407                        .saturating_mul(DEPENDENT_EVENT)
408                        .saturating_sub(stream_bytes);
409                    if n_primary_numerator % gap == 0 {
410                        let n_primary = n_primary_numerator / gap;
411                        let n_dependent = n.saturating_sub(n_primary);
412                        let total_check = n_primary * PRIMARY_EVENT + n_dependent * DEPENDENT_EVENT;
413                        if total_check == stream_bytes {
414                            // Verified: use the tribrid sizes.
415                            (
416                                (PRIMARY_EVENT as usize).saturating_sub(preamble_size),
417                                (DEPENDENT_EVENT as usize).saturating_sub(preamble_size),
418                            )
419                        } else {
420                            // Fallback: use floor-average uniform body
421                            let body = ((stream_bytes / n) as usize).saturating_sub(preamble_size);
422                            (body, body)
423                        }
424                    } else {
425                        // Fallback: use floor-average uniform body
426                        let body = ((stream_bytes / n) as usize).saturating_sub(preamble_size);
427                        (body, body)
428                    }
429                }
430            } else {
431                (0, 0)
432            };
433        let mut scan_events = Vec::with_capacity(n_events as usize);
434        for _ in 0..n_events {
435            scan_events.push(ScanEvent::read(
436                &mut r,
437                version,
438                v66_body_primary,
439                v66_body_dependent,
440            )?);
441        }
442
443        // 9. Error log
444        let n_errors = run_header.sample_info.error_log_length;
445        let error_log = if n_errors > 0 {
446            r.seek_to(run_header.error_log_addr)?;
447            if version >= 64 {
448                let _preamble = r.read_u32()?;
449            }
450            let mut log = Vec::with_capacity(n_errors as usize);
451            for _ in 0..n_errors {
452                log.push(ErrorEntry::read(&mut r)?);
453            }
454            log
455        } else {
456            // Ensure reader is positioned at error_log_addr even when empty.
457            r.seek_to(run_header.error_log_addr)?;
458            Vec::new()
459        };
460        // The GDH for scan parameters immediately follows the error-log entries.
461        // Do NOT seek back to error_log_addr - doing so would cause find_forward
462        // to scan over the scan_index (which may sit between error_log and
463        // scan_trailer in some file layouts), creating a CPU-spinning O(n) search
464        // through megabytes of binary scan data.
465        let after_error_log = r.position();
466
467        // 10. Scan parameters (trailer extra) - GenericData format in v64+.
468        //     The schema (GDH) is written just after the error-log entries;
469        //     the records are written at `scan_params_addr` (tail of file)
470        //     with NO stream preamble - records begin directly at
471        //     scan_params_addr. Any bytes after the last record are trailing
472        //     padding and can be ignored.
473        let (scan_parameters_header, scan_parameters) = if version >= 64 {
474            // Search from after the error log entries up to scan_trailer.
475            // This skips any scan_index data that may sit in between.
476            let scan_distance = run_header.scan_trailer_addr.saturating_sub(after_error_log);
477            // Estimate per-record size from the tail of the file using integer
478            // division. Any remainder bytes are trailing data, not a preamble.
479            let file_size = r.length()?;
480            let tail = file_size.saturating_sub(run_header.scan_params_addr);
481            let expected_record_size = if num_scans > 0 && tail > 0 {
482                let per_scan = tail / num_scans as u64;
483                if per_scan >= 4 {
484                    Some(per_scan as usize)
485                } else {
486                    None
487                }
488            } else {
489                None
490            };
491            match GenericDataHeader::find_forward(&mut r, scan_distance, expected_record_size)? {
492                Some(hdr) => {
493                    // Records start directly at scan_params_addr - no stream preamble.
494                    r.seek_to(run_header.scan_params_addr)?;
495                    let mut params = Vec::with_capacity(num_scans as usize);
496                    for _ in 0..num_scans {
497                        params.push(GenericRecord::read(&mut r, &hdr)?);
498                    }
499                    (hdr, params)
500                }
501                None => (GenericDataHeader { fields: Vec::new() }, Vec::new()),
502            }
503        } else {
504            (GenericDataHeader { fields: Vec::new() }, Vec::new())
505        };
506
507        // 11. Instrument log - GenericData format in v64+
508        let (inst_log_header, inst_log) = if version >= 64 {
509            r.seek_to(run_header.inst_log_addr)?;
510            match GenericDataHeader::try_read(&mut r)? {
511                Some(hdr) => {
512                    let n_inst = run_header.sample_info.inst_log_length;
513                    let mut log = Vec::with_capacity(n_inst as usize);
514                    for _ in 0..n_inst {
515                        log.push(GenericRecord::read(&mut r, &hdr)?);
516                    }
517                    (hdr, log)
518                }
519                None => (GenericDataHeader { fields: Vec::new() }, Vec::new()),
520            }
521        } else {
522            (GenericDataHeader { fields: Vec::new() }, Vec::new())
523        };
524
525        // Detect flat-peak (TSQ/SRM) format.
526        // Reliable indicator: ntrailer == 0 means no scan event trailer was written, which
527        // is the case for all TSQ/triple-quad SRM instruments.
528        // Fallback: first scan data_size < 100 (catches edge cases with tiny SRM windows).
529        // In the flat format, data_size is the number of MRM peaks, not bytes.
530        let flat_peaks = run_header.ntrailer == 0
531            || scan_index
532                .first()
533                .map(|e| e.data_size < 100)
534                .unwrap_or(false);
535
536        // Classify scan format and device family.
537        let scan_format = crate::scan_format::ScanDataFormat::detect(version, flat_peaks);
538        let first_analyzer = scan_events.first().and_then(|e| e.preamble.analyzer());
539
540        // For SRM (flat-peak) files, read the entire pre-scan-data region so that
541        // we can extract Q1 values from the method/transition table stored there.
542        // For other instruments, read only 64 KB for instrument model detection.
543        let scan_window_cap = if flat_peaks { data_addr } else { 64 * 1024u64 };
544        let window_len = scan_window_cap.min(data_addr);
545        let metadata_window = if window_len > 0 {
546            r.seek_to(0)?;
547            r.read_bytes(window_len as usize).unwrap_or_default()
548        } else {
549            Vec::new()
550        };
551        // All BinaryReader operations are complete; reclaim the underlying source so
552        // it can be used for on-demand reads (e.g. Q3 window table from scan records).
553        let mut source = r.into_inner();
554        let detected = crate::device::DeviceFamily::detect_instrument(
555            &metadata_window,
556            &header.audit_start.tag2,
557            &seq_row.inst_method,
558            first_analyzer,
559        );
560        let device_family = detected.family;
561        let instrument_model = detected.model;
562
563        // For SRM files: extract Q1 masses, Q3 window pairs, and (for v63) collision energies
564        // from the pre-scan-data header region and/or the scan data records.
565        //
566        // v66 (TSQ Quantiva / TSQ Altis, FlatV66):
567        //   Transition table layout: [Q1: f64][Q3_lo: f64][Q3_hi: f64] per channel.
568        //   Anchor: scan_index.high_mz equals the Q3_hi of the highest-Q3 channel for each
569        //   event class.  Q3 window pairs come from the per-scan record header.
570        //
571        // v63 (TSQ Quantum / TSQ Vantage, FlatV63):
572        //   Transition table layout: 72-byte records; Q1 at [+16], Q3_center at [+24],
573        //   Q3_width at [+32], CE at [+48].  scan_index.low_mz/high_mz hold the instrument
574        //   scan range (not per-transition values), so the high_mz anchor does not apply.
575        //   Q3 centers come from the first scan's peak list; Q3 windows are computed as
576        //   Q3_center ± Q3_width/2.
577        let (srm_q1_by_event, srm_q3_windows, srm_ce_by_event) = {
578            use crate::scan_format::ScanDataFormat;
579            match (flat_peaks, scan_format) {
580                (true, ScanDataFormat::FlatV66) if metadata_window.len() >= 24 => {
581                    // --- v66 Q1 extraction: anchor on scan_index.high_mz ---
582                    let mut event_q3_hi: HashMap<u16, f64> = HashMap::new();
583                    for entry in &scan_index {
584                        if entry.high_mz > 50.0 && entry.high_mz < 2000.0 {
585                            event_q3_hi.entry(entry.scan_event).or_insert(entry.high_mz);
586                        }
587                    }
588                    let data = &metadata_window;
589                    let mut q1_map: HashMap<u16, f64> = HashMap::new();
590                    'outer_v66: for (&event, &q3_hi_target) in &event_q3_hi {
591                        let end = data.len().saturating_sub(8);
592                        for i in 16..end {
593                            let hi = f64::from_le_bytes(data[i..i + 8].try_into().unwrap());
594                            if (hi - q3_hi_target).abs() < 0.002 {
595                                let lo = f64::from_le_bytes(data[i - 8..i].try_into().unwrap());
596                                if hi > lo && (hi - lo) < 0.1 {
597                                    let q1 =
598                                        f64::from_le_bytes(data[i - 16..i - 8].try_into().unwrap());
599                                    if q1 > 50.0 && q1 < 3000.0 {
600                                        q1_map.insert(event, q1);
601                                        continue 'outer_v66;
602                                    }
603                                }
604                            }
605                        }
606                    }
607                    // --- v66 Q3 window extraction: read per-scan record header ---
608                    let mut seen: HashMap<u16, bool> = HashMap::new();
609                    let mut q3_map: HashMap<u16, Vec<(f32, f32)>> = HashMap::new();
610                    for entry in &scan_index {
611                        if seen.contains_key(&entry.scan_event) {
612                            continue;
613                        }
614                        seen.insert(entry.scan_event, true);
615                        if let Ok(windows) = crate::scan_data::read_scan_srm_v66_windows(
616                            &mut source,
617                            data_addr,
618                            entry.offset,
619                        ) {
620                            if !windows.is_empty() {
621                                q3_map.insert(entry.scan_event, windows);
622                            }
623                        }
624                    }
625                    (q1_map, q3_map, HashMap::new())
626                }
627                (true, ScanDataFormat::FlatV63) => {
628                    // --- v63 Q1 + Q3 window + CE extraction ---
629                    // Read peaks from the first scan of each event class; each peak's mz
630                    // is the Q3 center for that channel.  Search the pre-data region for
631                    // the Q3_center value to find Q1, Q3_width, and CE from the transition
632                    // table.  Q3 windows are computed as (Q3_center - width/2, Q3_center + width/2).
633                    let mut seen: HashMap<u16, bool> = HashMap::new();
634                    let mut q1_map: HashMap<u16, f64> = HashMap::new();
635                    let mut q3_map: HashMap<u16, Vec<(f32, f32)>> = HashMap::new();
636                    let mut ce_map: HashMap<u16, f64> = HashMap::new();
637                    let data = &metadata_window;
638                    for entry in &scan_index {
639                        let ev = entry.scan_event;
640                        if seen.contains_key(&ev) {
641                            continue;
642                        }
643                        seen.insert(ev, true);
644                        let peaks = match read_flat_peaks(
645                            &mut source,
646                            data_addr,
647                            entry.offset,
648                            entry.data_size,
649                        ) {
650                            Ok(p) if !p.is_empty() => p,
651                            _ => continue,
652                        };
653                        // Use the first peak's mz as Q3_center anchor.
654                        if let Some((q1, q3w, ce)) = search_v63_transition(data, peaks[0].mz) {
655                            q1_map.insert(ev, q1);
656                            ce_map.insert(ev, ce);
657                            let half = (q3w / 2.0) as f32;
658                            let windows: Vec<(f32, f32)> = peaks
659                                .iter()
660                                .map(|p| (p.mz as f32 - half, p.mz as f32 + half))
661                                .collect();
662                            q3_map.insert(ev, windows);
663                        }
664                    }
665                    (q1_map, q3_map, ce_map)
666                }
667                _ => (HashMap::new(), HashMap::new(), HashMap::new()),
668            }
669        };
670
671        Ok(Self {
672            header,
673            seq_row,
674            raw_file_info,
675            run_header,
676            scan_index,
677            scan_events,
678            scan_parameters_header,
679            scan_parameters,
680            error_log,
681            inst_log_header,
682            inst_log,
683            version,
684            num_scans,
685            data_addr,
686            flat_peaks,
687            scan_format,
688            device_family,
689            instrument_model,
690            srm_q1_by_event,
691            srm_q3_windows,
692            srm_ce_by_event,
693        })
694    }
695
696    /// Open a RAW file from a path.
697    pub fn open_path(path: impl AsRef<std::path::Path>) -> Result<Self> {
698        let file = std::fs::File::open(path)?;
699        let reader = std::io::BufReader::new(file);
700        Self::open(reader)
701    }
702
703    /// Enumerate all controllers in this RAW file.
704    ///
705    /// Multi-detector acquisition systems write one [`RunHeader`] per
706    /// controller (MS, UV, PDA, Analog). This method parses all controller
707    /// headers and returns a `Vec<ControllerInfo>` with basic metadata for
708    /// each. The primary MS controller can be identified via
709    /// [`ControllerInfo::is_ms_controller`].
710    ///
711    /// For single-controller files (the common case), this returns a
712    /// one-element vec with the MS controller.
713    pub fn controllers<R: Read + Seek>(&self, source: &mut R) -> Result<Vec<ControllerInfo>> {
714        let mut r = BinaryReader::new(source);
715        let addrs = &self.raw_file_info.preamble.run_header_addrs;
716        let mut infos = Vec::with_capacity(addrs.len());
717        for (i, &addr) in addrs.iter().enumerate() {
718            if addr == 0 {
719                continue;
720            }
721            r.seek_to(addr)?;
722            let rh = RunHeader::read(&mut r, self.version)?;
723            let is_ms = if self.version >= 64 {
724                rh.ntrailer > 0 || rh.data_addr == self.data_addr
725            } else {
726                rh.nsegs > 0
727            };
728            let ct = if is_ms {
729                ControllerType::Ms
730            } else {
731                ControllerType::from_nsegs_ntrailer(rh.ntrailer, rh.nsegs)
732            };
733            infos.push(ControllerInfo {
734                index: i,
735                run_header_addr: addr,
736                is_ms_controller: is_ms,
737                controller_type: ct,
738                first_scan: rh.sample_info.first_scan_number,
739                last_scan: rh.sample_info.last_scan_number,
740                start_time: rh.sample_info.start_time,
741                end_time: rh.sample_info.end_time,
742            });
743        }
744        Ok(infos)
745    }
746
747    /// Read a single scan data packet (PacketHeader format).
748    pub fn read_scan<R: Read + Seek>(
749        &self,
750        source: &mut R,
751        scan_number: u32,
752    ) -> Result<ScanDataPacket> {
753        let idx = (scan_number - self.run_header.sample_info.first_scan_number) as usize;
754        if idx >= self.scan_index.len() {
755            return Err(Error::AddressOutOfRange(scan_number as u64));
756        }
757        let entry = &self.scan_index[idx];
758        let abs_offset = self.data_addr + entry.offset;
759        source.seek(SeekFrom::Start(abs_offset))?;
760        let mut r = BinaryReader::new(source);
761        ScanDataPacket::read(&mut r)
762    }
763
764    /// Read a single scan as flat peaks (TSQ/SRM format).
765    ///
766    /// In this format, `entry.offset` is the cumulative end byte offset within
767    /// the data stream. Peaks are (f32, f32) pairs at the end of each record.
768    pub fn read_scan_flat<R: Read + Seek>(
769        &self,
770        source: &mut R,
771        scan_number: u32,
772    ) -> Result<Vec<Peak>> {
773        let idx = (scan_number - self.run_header.sample_info.first_scan_number) as usize;
774        if idx >= self.scan_index.len() {
775            return Err(Error::AddressOutOfRange(scan_number as u64));
776        }
777        let entry = &self.scan_index[idx];
778        read_flat_peaks(source, self.data_addr, entry.offset, entry.data_size)
779    }
780
781    /// Read a single scan in v66 SRM format (TSQ Quantiva / TSQ Altis).
782    ///
783    /// `entry.offset` is the START byte offset within the data stream.
784    /// The record is fixed-size (`entry.data_size` bytes) and contains:
785    ///   n_peaks (u32), header, m/z window table, then peak triplets.
786    pub fn read_scan_srm_v66<R: Read + Seek>(
787        &self,
788        source: &mut R,
789        scan_number: u32,
790    ) -> Result<Vec<Peak>> {
791        let idx = (scan_number - self.run_header.sample_info.first_scan_number) as usize;
792        if idx >= self.scan_index.len() {
793            return Err(Error::AddressOutOfRange(scan_number as u64));
794        }
795        let entry = &self.scan_index[idx];
796        read_scan_srm_v66(source, self.data_addr, entry.offset, entry.data_size)
797    }
798
799    /// Read a single scan's peaks using whichever decoder matches this file's
800    /// scan-data format.
801    ///
802    /// This is the recommended high-level entry point. It dispatches on
803    /// [`Self::scan_format`] so callers do not have to know whether a file is
804    /// a TSQ SRM run (flat peaks) or an Orbitrap/ion-trap acquisition
805    /// (PacketHeader records).
806    ///
807    /// The returned `Vec<Peak>` contains centroided peaks regardless of the
808    /// underlying format. For PacketHeader files that also contain a profile
809    /// signal, use [`Self::read_scan`] to access both.
810    pub fn read_scan_peaks<R: Read + Seek>(
811        &self,
812        source: &mut R,
813        scan_number: u32,
814    ) -> Result<Vec<Peak>> {
815        use crate::scan_format::ScanDataFormat;
816        match self.scan_format {
817            ScanDataFormat::PacketHeader => {
818                let pkt = self.read_scan(source, scan_number)?;
819                Ok(pkt.peaks)
820            }
821            ScanDataFormat::FlatV63 => self.read_scan_flat(source, scan_number),
822            ScanDataFormat::FlatV66 => self.read_scan_srm_v66(source, scan_number),
823        }
824    }
825
826    /// Read centroided peaks only, skipping profile data.
827    ///
828    /// For PacketHeader files (Orbitrap / ion-trap), this skips the large
829    /// profile-data section, making it 2-10× faster than
830    /// [`Self::read_scan_peaks`] when only centroided m/z and intensity values
831    /// are needed (e.g. mzML export, peak area queries).
832    ///
833    /// For TSQ/SRM files this is identical to [`Self::read_scan_peaks`].
834    pub fn read_peaks_only<R: Read + Seek>(
835        &self,
836        source: &mut R,
837        scan_number: u32,
838    ) -> Result<Vec<Peak>> {
839        use crate::scan_format::ScanDataFormat;
840        match self.scan_format {
841            ScanDataFormat::PacketHeader => {
842                let idx = (scan_number - self.run_header.sample_info.first_scan_number) as usize;
843                if idx >= self.scan_index.len() {
844                    return Err(Error::AddressOutOfRange(scan_number as u64));
845                }
846                let entry = &self.scan_index[idx];
847                let abs_offset = self.data_addr + entry.offset;
848                source.seek(SeekFrom::Start(abs_offset))?;
849                let mut r = BinaryReader::new(source);
850                ScanDataPacket::read_peaks_only(&mut r)
851            }
852            ScanDataFormat::FlatV63 => self.read_scan_flat(source, scan_number),
853            ScanDataFormat::FlatV66 => self.read_scan_srm_v66(source, scan_number),
854        }
855    }
856
857    /// Return the scan-parameter record for a given 1-based scan number.
858    ///
859    /// Returns `None` if the file has no scan-parameter stream or if
860    /// `scan_number` is outside the valid scan range.
861    pub fn scan_parameters(&self, scan_number: u32) -> Option<&GenericRecord> {
862        let first = self.run_header.sample_info.first_scan_number;
863        let idx = scan_number.checked_sub(first)? as usize;
864        self.scan_parameters.get(idx)
865    }
866
867    /// Return a typed view of the scan-parameter record for a given scan.
868    ///
869    /// This wraps [`Self::scan_parameters`] in a [`ScanParams`] accessor that
870    /// provides named, type-safe fields and handles label-name variations
871    /// across instrument families.
872    pub fn scan_params(&self, scan_number: u32) -> Option<ScanParams<'_>> {
873        self.scan_parameters(scan_number).map(ScanParams)
874    }
875
876    /// Return the raw instrument-log record for a given scan number, or
877    /// `None` if the scan is out of range or no instrument log was found.
878    ///
879    /// The instrument log contains per-scan instrument-state values:
880    /// temperatures, voltages, pressures, ion counts, etc.
881    pub fn inst_log_record(&self, scan_number: u32) -> Option<&GenericRecord> {
882        let first = self.run_header.sample_info.first_scan_number;
883        let idx = scan_number.checked_sub(first)? as usize;
884        self.inst_log.get(idx)
885    }
886
887    /// Return a typed [`StatusLogEntry`] view for the given scan number.
888    ///
889    /// This wraps [`Self::inst_log_record`] and provides named, type-safe
890    /// accessors for common instrument-status fields.
891    pub fn status_log_entry(&self, scan_number: u32) -> Option<StatusLogEntry<'_>> {
892        self.inst_log_record(scan_number).map(StatusLogEntry)
893    }
894
895    /// Return the canonical Thermo scan filter string for a given scan
896    /// (1-based scan number), or `None` if the scan is out of range.
897    ///
898    /// Example output: `"FTMS + p NSI Full ms [350.0000-1500.0000]"`.
899    ///
900    /// See [`crate::scan_filter`] for grammar details.
901    pub fn scan_filter(&self, scan_number: u32) -> Option<String> {
902        let first = self.run_header.sample_info.first_scan_number;
903        let idx = scan_number.checked_sub(first)? as usize;
904        let entry = self.scan_index.get(idx)?;
905
906        // SRM files have no scan events; build the filter string from
907        // the pre-loaded Q1 and Q3 window maps.
908        if self.flat_peaks {
909            let q1 = self.srm_q1_by_event.get(&entry.scan_event).copied()?;
910            let windows = self.srm_q3_windows.get(&entry.scan_event)?;
911            // v63 (TSQ Quantum/Vantage): NSI ionization, @cid{CE:.2} after Q1.
912            // v66 (TSQ Quantiva/Altis): ESI ionization, no CE in filter.
913            use crate::scan_format::ScanDataFormat;
914            let ionization = match self.scan_format {
915                ScanDataFormat::FlatV63 => "NSI",
916                _ => "ESI",
917            };
918            let ce_part = if self.scan_format == ScanDataFormat::FlatV63 {
919                self.srm_ce_by_event
920                    .get(&entry.scan_event)
921                    .map(|&ce| format!("@cid{:.2}", ce))
922                    .unwrap_or_default()
923            } else {
924                String::new()
925            };
926            // Format: "+ c {ION} SRM ms2 {Q1:.3}{@cidCE} [{lo1:.3}-{hi1:.3}, ...]"
927            let mut s = format!("+ c {} SRM ms2 {:.3}{}", ionization, q1, ce_part);
928            if !windows.is_empty() {
929                s.push(' ');
930                s.push('[');
931                for (i, (lo, hi)) in windows.iter().enumerate() {
932                    if i > 0 {
933                        s.push_str(", ");
934                    }
935                    s.push_str(&format!("{:.3}-{:.3}", lo, hi));
936                }
937                s.push(']');
938            }
939            return Some(s);
940        }
941
942        let event = self.scan_events.get(idx)?;
943        // Precursor m/z and activation energy come from the per-scan params
944        // table (not the event body) for v66+. Fall back silently if missing.
945        let params = self.scan_params(scan_number);
946        let precursor = params.as_ref().and_then(|p| p.monoisotopic_mz());
947        let energy = params.as_ref().and_then(|p| p.activation_energy());
948        let supplemental = params
949            .as_ref()
950            .and_then(|p| p.supplemental_activation_energy());
951        Some(crate::scan_filter::build_filter(
952            event,
953            entry,
954            precursor,
955            energy,
956            supplemental,
957        ))
958    }
959
960    /// Return all scan retention times (minutes) in scan order (1-based scan numbers).
961    ///
962    /// This is equivalent to collecting `scan_index[i].start_time` for every scan.
963    /// The returned `Vec` is indexed by `scan_number - first_scan_number`.
964    pub fn retention_times(&self) -> Vec<f64> {
965        self.scan_index.iter().map(|e| e.start_time).collect()
966    }
967
968    /// Return a per-scan chromatogram as `(retention_time_min, tic)` pairs.
969    pub fn tic_chromatogram(&self) -> Vec<(f64, f64)> {
970        self.scan_index
971            .iter()
972            .map(|e| (e.start_time, e.total_current))
973            .collect()
974    }
975
976    /// Return a per-scan base-peak chromatogram as `(retention_time_min, bpi, base_mz)` triples.
977    pub fn bpc_chromatogram(&self) -> Vec<(f64, f64, f64)> {
978        self.scan_index
979            .iter()
980            .map(|e| (e.start_time, e.base_intensity, e.base_mz))
981            .collect()
982    }
983
984    /// Return the instrument method file path or name as stored in the
985    /// sequence row. This is the name of the method used during acquisition
986    /// (e.g. `"Standard_HCD.meth"`), not the embedded method text.
987    ///
988    /// See also [`Self::instrument_method_text`] for extracting the embedded
989    /// XML/text method body from the file.
990    pub fn instrument_method_name(&self) -> &str {
991        &self.seq_row.inst_method
992    }
993
994    /// Attempt to extract the embedded instrument method text from the RAW file.
995    ///
996    /// Thermo RAW files embed the acquisition method as a UTF-16LE text or
997    /// XML blob in the metadata region. This method scans the bytes between
998    /// the start of the file and the scan data for the longest contiguous
999    /// block of valid UTF-16LE text (at least 256 characters long) and returns
1000    /// it as a `String`.
1001    ///
1002    /// Returns `None` if no suitable text block is found or if the method was
1003    /// not embedded (`method_file_present == false`).
1004    ///
1005    /// Note: This is a best-effort extraction. The result is the raw text
1006    /// content; callers may wish to trim or parse it further.
1007    pub fn instrument_method_text<R: Read + Seek>(&self, source: &mut R) -> Option<String> {
1008        if !self.raw_file_info.preamble.method_file_present {
1009            return None;
1010        }
1011        // Read metadata region: from byte 0 up to (but not including) scan data.
1012        // Cap at 512 KB to avoid reading very large files entirely.
1013        const MAX_WINDOW: u64 = 512 * 1024;
1014        let window_len = MAX_WINDOW.min(self.data_addr) as usize;
1015        if window_len < 4 {
1016            return None;
1017        }
1018        source.seek(std::io::SeekFrom::Start(0)).ok()?;
1019        let mut buf = vec![0u8; window_len];
1020        source.read_exact(&mut buf).ok()?;
1021
1022        // Scan for the longest valid UTF-16LE text block (min 256 chars = 512 bytes).
1023        // Strategy: find aligned 2-byte sequences where every pair decodes to a
1024        // printable/whitespace Unicode scalar (U+0020..U+FFFD).
1025        extract_utf16le_text(&buf, 256)
1026    }
1027}
1028
1029/// Scan `buf` for the longest contiguous UTF-16LE text block of at least
1030/// `min_chars` characters and return it as a String. Returns `None` if no
1031/// such block exists.
1032fn extract_utf16le_text(buf: &[u8], min_chars: usize) -> Option<String> {
1033    if buf.len() < 2 {
1034        return None;
1035    }
1036    let mut best: Option<String> = None;
1037    let mut best_len = 0usize;
1038
1039    // Try each even alignment (0 or 1 byte offset from start).
1040    for alignment in 0..2usize {
1041        let start = alignment;
1042        let usable = buf.len().saturating_sub(start);
1043        let n_units = usable / 2;
1044        if n_units < min_chars {
1045            continue;
1046        }
1047
1048        let mut run_start = 0usize;
1049        let mut run_chars: Vec<u16> = Vec::with_capacity(min_chars);
1050
1051        let flush = |run_chars: &Vec<u16>,
1052                     run_start: usize,
1053                     best: &mut Option<String>,
1054                     best_len: &mut usize| {
1055            if run_chars.len() >= min_chars {
1056                if let Ok(s) = String::from_utf16(run_chars) {
1057                    let _ = run_start; // suppress unused warning
1058                    if run_chars.len() > *best_len {
1059                        *best_len = run_chars.len();
1060                        *best = Some(s);
1061                    }
1062                }
1063            }
1064        };
1065
1066        for i in 0..n_units {
1067            let off = start + i * 2;
1068            let u = u16::from_le_bytes([buf[off], buf[off + 1]]);
1069            let is_ok = matches!(u, 0x0009 | 0x000A | 0x000D | 0x0020..=0xFFFD);
1070            if is_ok {
1071                run_chars.push(u);
1072            } else {
1073                flush(&run_chars, run_start, &mut best, &mut best_len);
1074                run_start = i + 1;
1075                run_chars.clear();
1076            }
1077        }
1078        flush(&run_chars, run_start, &mut best, &mut best_len);
1079    }
1080    best
1081}
1082
1083// ─── High-level typed accessor for scan parameters ──────────────────────────
1084
1085/// Typed accessor for a scan's extra parameters (`ScanParams` stream).
1086///
1087/// The underlying [`GenericRecord`] stores named fields whose labels vary
1088/// slightly across Thermo instrument families. This wrapper normalises the
1089/// most common labels so callers do not need to hard-code instrument-specific
1090/// strings.
1091///
1092/// # Example
1093/// ```no_run
1094/// use opentfraw::RawFileReader;
1095/// let raw = RawFileReader::open_path("experiment.raw").unwrap();
1096/// if let Some(p) = raw.scan_params(1) {
1097///     println!("Injection time: {:?} ms", p.ion_injection_time_ms());
1098///     println!("Charge state:   {:?}", p.charge_state());
1099/// }
1100/// ```
1101pub struct ScanParams<'a>(pub &'a GenericRecord);
1102
1103impl<'a> ScanParams<'a> {
1104    /// Return the raw `GenericRecord` for direct field access.
1105    #[inline]
1106    pub fn record(&self) -> &GenericRecord {
1107        self.0
1108    }
1109
1110    /// Ion injection / fill time in milliseconds.
1111    ///
1112    /// Label varies: `"Ion Injection Time (ms):"` (Orbitrap family) vs
1113    /// `"Ion Inject Time (ms):"` (older LTQ variants).
1114    pub fn ion_injection_time_ms(&self) -> Option<f64> {
1115        // Try canonical label first; fall back to legacy label.
1116        self.0
1117            .get_f64("Ion Injection Time (ms):")
1118            .or_else(|| self.0.get_f64("Ion Inject Time (ms):"))
1119    }
1120
1121    /// Precursor charge state (0 = unknown / MS1 scan).
1122    pub fn charge_state(&self) -> Option<i32> {
1123        self.0
1124            .get_i32("Charge State:")
1125            // Some LCQ files use UInt8 for charge state.
1126            .or_else(|| {
1127                self.0.get("Charge State:").and_then(|v| match v {
1128                    GenericValue::UInt8(n) => Some(*n as i32),
1129                    _ => None,
1130                })
1131            })
1132    }
1133
1134    /// Monoisotopic precursor m/z (0 = not determined).
1135    ///
1136    /// Tries multiple label variants for compatibility across instrument families:
1137    ///
1138    /// - `"Monoisotopic M/Z:"` - most common (Q Exactive, Orbitrap Fusion)
1139    /// - `"MS2 Isolation M/Z:"` - some older LTQ firmware
1140    ///
1141    /// Returns `None` when the value is absent or zero (not determined).
1142    pub fn monoisotopic_mz(&self) -> Option<f64> {
1143        let v = self
1144            .0
1145            .get_f64("Monoisotopic M/Z:")
1146            .or_else(|| self.0.get_f64("MS2 Isolation M/Z:"))
1147            .or_else(|| self.0.get_f64("Isolation Center M/Z:"))
1148            .or_else(|| self.0.get_f64("Precursor M/Z:"))?;
1149        if v > 0.0 {
1150            Some(v)
1151        } else {
1152            None
1153        }
1154    }
1155
1156    /// Number of micro-scans averaged into this scan.
1157    pub fn micro_scan_count(&self) -> Option<i32> {
1158        self.0.get_i32("Micro Scan Count:")
1159    }
1160
1161    /// Scan number of the master (MS1) scan that triggered this dependent scan.
1162    /// Returns `None` if this is not a dependent scan.
1163    pub fn master_scan_number(&self) -> Option<i32> {
1164        self.0
1165            .get_i32("Master Scan Number:")
1166            .or_else(|| self.0.get_i32("Master Index:"))
1167    }
1168
1169    /// Orbitrap / FT resolving power (e.g. 60000, 120000).
1170    pub fn ft_resolution(&self) -> Option<i32> {
1171        self.orbitrap_resolution()
1172    }
1173
1174    /// Number of lock masses found / matched.
1175    pub fn number_of_lm_found(&self) -> Option<i32> {
1176        self.number_of_lock_masses()
1177    }
1178
1179    /// Lock-mass m/z correction applied (ppm).
1180    pub fn lm_correction_ppm(&self) -> Option<f64> {
1181        self.lock_mass_correction_ppm()
1182    }
1183
1184    /// AGC target fill value (ion count).
1185    pub fn agc_target(&self) -> Option<i32> {
1186        self.0.get_i32("AGC Target:")
1187    }
1188
1189    /// Whether automated gain control (AGC) was active.
1190    pub fn agc_enabled(&self) -> Option<bool> {
1191        match self.0.get("AGC:")? {
1192            GenericValue::Bool(b) => Some(*b),
1193            GenericValue::String(s) => Some(s.to_ascii_lowercase().contains("on")),
1194            _ => None,
1195        }
1196    }
1197
1198    /// Elapsed scan time in seconds (Orbitrap instruments only).
1199    pub fn elapsed_scan_time_s(&self) -> Option<f64> {
1200        self.0.get_f64("Elapsed Scan Time (sec):")
1201    }
1202
1203    /// Maximum allowed ion injection time in milliseconds.
1204    pub fn max_ion_time_ms(&self) -> Option<f64> {
1205        self.0.get_f64("Max. Ion Time (ms):")
1206    }
1207
1208    /// MSn isolation window width in m/z.
1209    ///
1210    /// Label varies: `"MS2 Isolation Width:"` (most common), `"MSn Isolation Width:"`,
1211    /// or `"Isolation Width (M/Z):"` on some firmware.
1212    pub fn isolation_width_mz(&self) -> Option<f64> {
1213        self.0
1214            .get_f64("MS2 Isolation Width:")
1215            .or_else(|| self.0.get_f64("MSn Isolation Width:"))
1216            .or_else(|| self.0.get_f64("Isolation Width (M/Z):"))
1217            .or_else(|| self.0.get_f64("MS2 Isolation Width (M/Z):"))
1218    }
1219
1220    /// MSn isolation window target m/z (the center of the isolation window).
1221    ///
1222    /// Some instruments write this separately from the precursor m/z; when
1223    /// absent, callers should fall back to [`Self::monoisotopic_mz`] or to
1224    /// the event's first reaction `precursor_mz`.
1225    pub fn isolation_target_mz(&self) -> Option<f64> {
1226        self.0
1227            .get_f64("MS2 Isolation Offset:")
1228            .or_else(|| self.0.get_f64("Target M/Z:"))
1229    }
1230
1231    /// Activation energy (eV or %) for the primary activation step.
1232    ///
1233    /// Tries several label variants present across instrument families.
1234    /// NCE (normalized collision energy) labels are checked first because
1235    /// they reflect the user-set method value and are what reference tools
1236    /// (ThermoRawFileParser, Proteome Discoverer) report.  eV labels are
1237    /// used as a fallback when no NCE label is present.
1238    ///
1239    /// Label priority:
1240    /// 1. `"HCD Energy:"` / `"HCD Energy V:"` / `"CE:"` - NCE string form
1241    /// 2. `"Normalized Collision Energy:"` - ion-trap CID NCE
1242    /// 3. `"HCD Energy (eV):"` - explicit eV label (Q Exactive HF-X, Exploris)
1243    /// 4. `"HCD Energy eV:"` - eV variant
1244    /// 5. `"Collision Energy (eV):"` - ITMS CID eV
1245    pub fn activation_energy(&self) -> Option<f64> {
1246        // NCE labels: preferred because they match the user-set method value.
1247        // Skip 0.0 (sentinel for "not set").
1248        for label in &["HCD Energy:", "HCD Energy V:", "CE:"] {
1249            if let Some(s) = self.0.get_string(label) {
1250                if let Ok(v) = s.trim().trim_end_matches('%').parse::<f64>() {
1251                    if v > 0.0 {
1252                        return Some(v);
1253                    }
1254                }
1255            }
1256        }
1257        if let Some(v) = self
1258            .0
1259            .get_f64("Normalized Collision Energy:")
1260            .filter(|&v| v > 0.0)
1261        {
1262            return Some(v);
1263        }
1264        // eV labels: used when no NCE label is available.
1265        if let Some(v) = self.0.get_f64("HCD Energy (eV):").filter(|&v| v > 0.0) {
1266            return Some(v);
1267        }
1268        if let Some(v) = self.0.get_f64("HCD Energy eV:").filter(|&v| v > 0.0) {
1269            return Some(v);
1270        }
1271        self.0
1272            .get_f64("Collision Energy (eV):")
1273            .filter(|&v| v > 0.0)
1274    }
1275
1276    /// Whether the value returned by [`activation_energy`] is a normalized
1277    /// collision energy (NCE, dimensionless %) rather than an absolute eV value.
1278    ///
1279    /// Returns `true` when `activation_energy` found a value from an NCE label
1280    /// (`HCD Energy:`, `HCD Energy V:`, `CE:`, or `Normalized Collision Energy:`).
1281    /// Returns `false` when only eV labels were present or no energy was found.
1282    pub fn activation_energy_is_nce(&self) -> bool {
1283        // Returns true if activation_energy() took the NCE path.
1284        for label in &["HCD Energy:", "HCD Energy V:", "CE:"] {
1285            if let Some(s) = self.0.get_string(label) {
1286                if let Ok(v) = s.trim().trim_end_matches('%').parse::<f64>() {
1287                    if v > 0.0 {
1288                        return true;
1289                    }
1290                }
1291            }
1292        }
1293        self.0
1294            .get_f64("Normalized Collision Energy:")
1295            .filter(|&v| v > 0.0)
1296            .is_some()
1297    }
1298
1299    /// Supplemental activation energy for EThcD scans (the HCD component).
1300    ///
1301    /// Returns `None` for non-EThcD scans.
1302    pub fn supplemental_activation_energy(&self) -> Option<f64> {
1303        if let Some(v) = self.0.get_f64("Supplemental Activation CE:") {
1304            return Some(v);
1305        }
1306        if let Some(s) = self.0.get_string("Supplemental Activation:") {
1307            return s.trim().trim_end_matches('%').parse::<f64>().ok();
1308        }
1309        None
1310    }
1311
1312    /// All possible charge states reported by the precursor selection algorithm.
1313    ///
1314    /// Returns `None` when the instrument did not report possible charges.
1315    /// Some firmware stores them as a space-delimited string (e.g. `"2 3"`);
1316    /// others use a typed integer for the single selected charge.
1317    pub fn possible_charge_states(&self) -> Option<Vec<u32>> {
1318        // String variant: "2 3 4"
1319        if let Some(s) = self.0.get_string("Possible Charge States:") {
1320            let v: Vec<u32> = s
1321                .split_whitespace()
1322                .filter_map(|t| t.parse::<u32>().ok())
1323                .collect();
1324            if !v.is_empty() {
1325                return Some(v);
1326            }
1327        }
1328        // Integer variant (single charge)
1329        if let Some(c) = self.charge_state() {
1330            if c > 0 {
1331                return Some(vec![c as u32]);
1332            }
1333        }
1334        None
1335    }
1336
1337    /// FAIMS compensation voltage in V (Orbitrap Fusion/Lumos with FAIMS Pro).
1338    pub fn faims_cv(&self) -> Option<f64> {
1339        self.0
1340            .get_f64("FAIMS CV:")
1341            .or_else(|| self.0.get_f32("FAIMS CV:").map(f64::from))
1342    }
1343
1344    /// Whether FAIMS voltage was active for this scan.
1345    pub fn faims_voltage_on(&self) -> Option<bool> {
1346        match self.0.get("FAIMS Voltage On:")? {
1347            GenericValue::Bool(b) => Some(*b),
1348            GenericValue::String(s) => Some(s.to_ascii_lowercase().contains("on")),
1349            _ => None,
1350        }
1351    }
1352
1353    /// S-Lens RF level (V), typically reported on Q Exactive family.
1354    pub fn s_lens_rf_level(&self) -> Option<f64> {
1355        self.0.get_f64("S-Lens RF Level:")
1356    }
1357
1358    /// AGC fill percentage (0.0-1.0), reported on Q Exactive HF family.
1359    pub fn agc_fill(&self) -> Option<f64> {
1360        self.0.get_f64("AGC Fill:")
1361    }
1362
1363    /// Orbitrap analyzer temperature (°C), where available.
1364    pub fn analyzer_temperature(&self) -> Option<f64> {
1365        self.0.get_f64("Analyzer Temperature:")
1366    }
1367
1368    /// PS injection time in milliseconds (pre-scan injection for Q Exactive).
1369    pub fn ps_injection_time_ms(&self) -> Option<f64> {
1370        self.0.get_f64("PS Inj. Time (ms):")
1371    }
1372
1373    /// Reagent ion injection time in milliseconds (ETD reagent).
1374    pub fn reagent_ion_injection_time_ms(&self) -> Option<f64> {
1375        self.0
1376            .get_f32("Reagent Ion Injection Time (ms):")
1377            .map(f64::from)
1378    }
1379
1380    /// Whether the reagent AGC was active.
1381    pub fn reagent_ion_agc(&self) -> Option<bool> {
1382        match self.0.get("Reagent Ion AGC:")? {
1383            GenericValue::Bool(b) => Some(*b),
1384            _ => None,
1385        }
1386    }
1387
1388    /// Source CID energy applied in the ion source (eV).
1389    pub fn source_cid_energy_ev(&self) -> Option<f64> {
1390        self.0
1391            .get_f64("Source CID eV:")
1392            .or_else(|| self.0.get_f32("API Source CID Energy:").map(f64::from))
1393    }
1394
1395    /// Dynamic retention time shift in minutes (Q Exactive HF-X AutoQC).
1396    pub fn dynamic_rt_shift_min(&self) -> Option<f64> {
1397        self.0.get_f64("Dynamic RT Shift (min):")
1398    }
1399
1400    /// Lock mass correction applied (ppm) - tries several label variants.
1401    pub fn lock_mass_correction_ppm(&self) -> Option<f64> {
1402        self.0
1403            .get_f64("LM Correction (ppm):")
1404            .or_else(|| self.0.get_f64("LM m/z-Correction (ppm):"))
1405    }
1406
1407    /// Number of lock masses found.
1408    pub fn number_of_lock_masses(&self) -> Option<i32> {
1409        self.0
1410            .get_i32("Number of LM Found:")
1411            .or_else(|| self.0.get_i32("Number of Lock Masses:"))
1412    }
1413
1414    /// Orbitrap resolution setting (not measured, but requested).
1415    pub fn orbitrap_resolution(&self) -> Option<i32> {
1416        self.0
1417            .get_i32("Orbitrap Resolution:")
1418            .or_else(|| self.0.get_i32("FT Resolution:"))
1419    }
1420
1421    /// SPS (Synchronous Precursor Selection) mass for MS3 channel N (0-based index).
1422    ///
1423    /// SPS masses are stored as `"SPS Mass 1:"`, `"SPS Mass 2:"`, ... (1-based).
1424    pub fn sps_mass(&self, channel: usize) -> Option<f32> {
1425        let label = format!("SPS Mass {}:", channel + 1);
1426        self.0.get_f32(&label)
1427    }
1428
1429    /// Conversion parameter A (Orbitrap m/z conversion polynomial).
1430    pub fn conversion_parameter_a(&self) -> Option<f64> {
1431        self.0.get_f64("Conversion Parameter A:")
1432    }
1433
1434    /// Conversion parameter B.
1435    pub fn conversion_parameter_b(&self) -> Option<f64> {
1436        self.0.get_f64("Conversion Parameter B:")
1437    }
1438
1439    /// Conversion parameter C.
1440    pub fn conversion_parameter_c(&self) -> Option<f64> {
1441        self.0.get_f64("Conversion Parameter C:")
1442    }
1443
1444    /// Raw over-fill time T (used for AGC computation).
1445    pub fn raw_ovft(&self) -> Option<f64> {
1446        self.0.get_f64("RawOvFtT:")
1447    }
1448
1449    /// Error in the isotopic envelope fit (used for charge-state scoring).
1450    pub fn isotopic_fit_error(&self) -> Option<f64> {
1451        self.0.get_f64("Error in isotopic envelope fit:")
1452    }
1453
1454    /// Scan description string (arbitrary text, set by method or real-time software).
1455    pub fn scan_description(&self) -> Option<&str> {
1456        self.0.get_string("Scan Description:")
1457    }
1458
1459    /// Multi-inject info string (e.g. `"IT=45 "` for ion-trap fill time).
1460    pub fn multi_inject_info(&self) -> Option<&str> {
1461        self.0.get_string("Multi Inject Info:")
1462    }
1463
1464    /// HCD energy string - raw value as stored (may be `"28.00"`, `"28%"`, or `"N/A"`).
1465    pub fn hcd_energy(&self) -> Option<&str> {
1466        self.0
1467            .get_string("HCD Energy:")
1468            .or_else(|| self.0.get_string("HCD Energy V:"))
1469    }
1470}
1471
1472// ─── Status log (instrument log) typed accessor ─────────────────────────────
1473
1474/// Typed accessor for a per-scan instrument-status log entry.
1475///
1476/// The instrument log records instrument-state values (temperatures, voltages,
1477/// pressures, etc.) at the time each scan was acquired. The schema varies
1478/// across instrument models.
1479pub struct StatusLogEntry<'a>(pub &'a GenericRecord);
1480
1481impl<'a> StatusLogEntry<'a> {
1482    /// Return the raw record for direct field access.
1483    #[inline]
1484    pub fn record(&self) -> &GenericRecord {
1485        self.0
1486    }
1487
1488    /// Ion injection time in milliseconds (present on Orbitrap family).
1489    pub fn ion_injection_time_ms(&self) -> Option<f64> {
1490        self.0
1491            .get_f64("Ion Injection Time (ms):")
1492            .or_else(|| self.0.get_f64("Ion Inject Time (ms):"))
1493    }
1494
1495    /// Orbitrap / FT resolving power setting.
1496    pub fn ft_resolution(&self) -> Option<i32> {
1497        self.0
1498            .get_i32("Orbitrap Resolution:")
1499            .or_else(|| self.0.get_i32("FT Resolution:"))
1500    }
1501
1502    /// FAIMS compensation voltage (V).
1503    pub fn faims_cv(&self) -> Option<f64> {
1504        self.0
1505            .get_f64("FAIMS CV:")
1506            .or_else(|| self.0.get_f32("FAIMS CV:").map(f64::from))
1507    }
1508
1509    /// S-Lens RF level (V).
1510    pub fn s_lens_rf_level(&self) -> Option<f64> {
1511        self.0.get_f64("S-Lens RF Level:")
1512    }
1513
1514    /// Orbitrap / analyzer temperature (°C).
1515    pub fn analyzer_temperature(&self) -> Option<f64> {
1516        self.0
1517            .get_f64("Analyzer Temperature:")
1518            .or_else(|| self.0.get_f32("Analyzer Temperature:").map(f64::from))
1519    }
1520
1521    /// API (spray) source voltage (V).
1522    pub fn spray_voltage(&self) -> Option<f64> {
1523        self.0
1524            .get_f64("Spray Voltage (V):")
1525            .or_else(|| self.0.get_f64("Spray Voltage:"))
1526            .or_else(|| self.0.get_f32("Spray Voltage:").map(f64::from))
1527    }
1528
1529    /// Lock mass reference correction (ppm).
1530    pub fn lock_mass_correction_ppm(&self) -> Option<f64> {
1531        self.0
1532            .get_f64("LM Correction (ppm):")
1533            .or_else(|| self.0.get_f64("LM m/z-Correction (ppm):"))
1534    }
1535
1536    /// Capillary temperature (°C).
1537    pub fn capillary_temperature(&self) -> Option<f64> {
1538        self.0
1539            .get_f64("Capillary Temp (°C):")
1540            .or_else(|| self.0.get_f64("Capillary Temp:"))
1541            .or_else(|| self.0.get_f32("Capillary Temp:").map(f64::from))
1542    }
1543
1544    /// Number of lock masses found.
1545    pub fn number_of_lock_masses(&self) -> Option<i32> {
1546        self.0
1547            .get_i32("Number of LM Found:")
1548            .or_else(|| self.0.get_i32("Number of Lock Masses:"))
1549    }
1550
1551    /// Get any field by name (pass-through to the underlying record).
1552    pub fn get(&self, label: &str) -> Option<&GenericValue> {
1553        self.0.get(label)
1554    }
1555
1556    /// Get a float64 field by name.
1557    pub fn get_f64(&self, label: &str) -> Option<f64> {
1558        self.0.get_f64(label)
1559    }
1560
1561    /// Get an int32 field by name.
1562    pub fn get_i32(&self, label: &str) -> Option<i32> {
1563        self.0.get_i32(label)
1564    }
1565
1566    /// Get a string field by name.
1567    pub fn get_string(&self, label: &str) -> Option<&str> {
1568        self.0.get_string(label)
1569    }
1570}