opentfraw/reader.rs
1use std::collections::HashMap;
2use std::io::{Read, Seek, SeekFrom};
3
4use crate::error::{Error, Result};
5use crate::error_log::ErrorEntry;
6use crate::generic_data::{GenericDataHeader, GenericRecord, GenericValue};
7use crate::header::FileHeader;
8use crate::raw_file_info::RawFileInfo;
9use crate::run_header::RunHeader;
10use crate::scan_data::{
11 read_flat_peaks, read_scan_srm_v66, search_v63_transition, Peak, ScanDataPacket,
12};
13use crate::scan_event::{ScanEvent, ScanEventPreamble};
14use crate::scan_index::ScanIndexEntry;
15use crate::seq_row::SeqRow;
16
17/// Low-level binary reading helpers.
18pub(crate) struct BinaryReader<R> {
19 inner: R,
20 pos: u64,
21}
22
23impl<R: Read + Seek> BinaryReader<R> {
24 pub fn new(inner: R) -> Self {
25 Self { inner, pos: 0 }
26 }
27
28 pub fn into_inner(self) -> R {
29 self.inner
30 }
31
32 #[allow(dead_code)]
33 pub(crate) fn position(&self) -> u64 {
34 self.pos
35 }
36
37 pub fn seek_to(&mut self, offset: u64) -> Result<()> {
38 self.inner.seek(SeekFrom::Start(offset))?;
39 self.pos = offset;
40 Ok(())
41 }
42
43 pub fn read_bytes(&mut self, n: usize) -> Result<Vec<u8>> {
44 let mut buf = vec![0u8; n];
45 self.inner.read_exact(&mut buf).map_err(|e| {
46 if e.kind() == std::io::ErrorKind::UnexpectedEof {
47 Error::UnexpectedEof {
48 offset: self.pos,
49 needed: n,
50 }
51 } else {
52 Error::Io(e)
53 }
54 })?;
55 self.pos += n as u64;
56 Ok(buf)
57 }
58
59 pub fn read_bytes_into(&mut self, buf: &mut [u8]) -> Result<()> {
60 let n = buf.len();
61 self.inner.read_exact(buf).map_err(|e| {
62 if e.kind() == std::io::ErrorKind::UnexpectedEof {
63 Error::UnexpectedEof {
64 offset: self.pos,
65 needed: n,
66 }
67 } else {
68 Error::Io(e)
69 }
70 })?;
71 self.pos += n as u64;
72 Ok(())
73 }
74
75 pub fn skip(&mut self, n: usize) -> Result<()> {
76 self.inner.seek(SeekFrom::Current(n as i64))?;
77 self.pos += n as u64;
78 Ok(())
79 }
80
81 pub fn length(&mut self) -> Result<u64> {
82 let cur = self.pos;
83 let end = self.inner.seek(SeekFrom::End(0))?;
84 self.inner.seek(SeekFrom::Start(cur))?;
85 self.pos = cur;
86 Ok(end)
87 }
88
89 pub fn read_u8(&mut self) -> Result<u8> {
90 let mut buf = [0u8; 1];
91 self.read_bytes_into(&mut buf)?;
92 Ok(buf[0])
93 }
94
95 pub fn read_u16(&mut self) -> Result<u16> {
96 let mut buf = [0u8; 2];
97 self.read_bytes_into(&mut buf)?;
98 Ok(u16::from_le_bytes(buf))
99 }
100
101 pub fn read_i16(&mut self) -> Result<i16> {
102 let mut buf = [0u8; 2];
103 self.read_bytes_into(&mut buf)?;
104 Ok(i16::from_le_bytes(buf))
105 }
106
107 pub fn read_u32(&mut self) -> Result<u32> {
108 let mut buf = [0u8; 4];
109 self.read_bytes_into(&mut buf)?;
110 Ok(u32::from_le_bytes(buf))
111 }
112
113 pub fn read_i32(&mut self) -> Result<i32> {
114 let mut buf = [0u8; 4];
115 self.read_bytes_into(&mut buf)?;
116 Ok(i32::from_le_bytes(buf))
117 }
118
119 pub fn read_u64(&mut self) -> Result<u64> {
120 let mut buf = [0u8; 8];
121 self.read_bytes_into(&mut buf)?;
122 Ok(u64::from_le_bytes(buf))
123 }
124
125 pub fn read_f32(&mut self) -> Result<f32> {
126 let mut buf = [0u8; 4];
127 self.read_bytes_into(&mut buf)?;
128 Ok(f32::from_le_bytes(buf))
129 }
130
131 pub fn read_f64(&mut self) -> Result<f64> {
132 let mut buf = [0u8; 8];
133 self.read_bytes_into(&mut buf)?;
134 Ok(f64::from_le_bytes(buf))
135 }
136
137 pub fn read_i8(&mut self) -> Result<i8> {
138 let mut buf = [0u8; 1];
139 self.read_bytes_into(&mut buf)?;
140 Ok(buf[0] as i8)
141 }
142
143 /// Read a fixed-width UTF-16-LE string of `byte_len` bytes, stripping null padding.
144 pub fn read_utf16_fixed(&mut self, byte_len: usize) -> Result<String> {
145 let pos = self.pos;
146 let raw = self.read_bytes(byte_len)?;
147 if byte_len % 2 != 0 {
148 return Err(Error::InvalidUtf16(pos));
149 }
150 let units: Vec<u16> = raw
151 .chunks_exact(2)
152 .map(|c| u16::from_le_bytes([c[0], c[1]]))
153 .collect();
154 // Find null terminator
155 let end = units.iter().position(|&u| u == 0).unwrap_or(units.len());
156 String::from_utf16(&units[..end]).map_err(|_| Error::InvalidUtf16(pos))
157 }
158
159 /// Read a PascalStringWin32: UInt32 char count, then that many UTF-16-LE code units.
160 pub fn read_pascal_string(&mut self) -> Result<String> {
161 let pos = self.pos;
162 let char_count = self.read_u32()? as usize;
163 if char_count == 0 {
164 return Ok(String::new());
165 }
166 let byte_len = char_count.checked_mul(2).ok_or(Error::InvalidUtf16(pos))?;
167 let raw = self.read_bytes(byte_len)?;
168 let units: Vec<u16> = raw
169 .chunks_exact(2)
170 .map(|c| u16::from_le_bytes([c[0], c[1]]))
171 .collect();
172 // Strip trailing nulls
173 let end = units.iter().position(|&u| u == 0).unwrap_or(units.len());
174 String::from_utf16(&units[..end]).map_err(|_| Error::InvalidUtf16(pos))
175 }
176
177 /// Read a Windows FILETIME and return Unix timestamp as f64 seconds.
178 pub fn read_windows_filetime(&mut self) -> Result<f64> {
179 let ft = self.read_u64()?;
180 if ft == 0 {
181 return Ok(0.0);
182 }
183 Ok((ft as f64 / 10_000_000.0) - 11_644_473_600.0)
184 }
185}
186
187/// A parsed Thermo Fisher RAW file.
188pub struct RawFileReader {
189 pub header: FileHeader,
190 pub seq_row: SeqRow,
191 pub raw_file_info: RawFileInfo,
192 pub run_header: RunHeader,
193 pub scan_index: Vec<ScanIndexEntry>,
194 pub scan_events: Vec<ScanEvent>,
195 pub scan_parameters_header: GenericDataHeader,
196 pub scan_parameters: Vec<GenericRecord>,
197 pub error_log: Vec<ErrorEntry>,
198 // Instrument log uses same structure
199 pub inst_log_header: GenericDataHeader,
200 pub inst_log: Vec<GenericRecord>,
201 /// Raw file version from the header.
202 pub version: u32,
203 /// Number of scans.
204 pub num_scans: u32,
205 /// Data stream base address (for computing absolute scan offsets).
206 pub data_addr: u64,
207 /// True if scan data uses flat-peak format (TSQ/SRM) instead of PacketHeader.
208 pub flat_peaks: bool,
209 /// Detected scan-data encoding (the format used by [`Self::read_scan_peaks`]).
210 pub scan_format: crate::scan_format::ScanDataFormat,
211 /// Detected device family (informational).
212 pub device_family: crate::device::DeviceFamily,
213 /// Canonical instrument model name if one was detected in the file's
214 /// metadata region (e.g. `"Orbitrap Fusion Lumos"`). `None` means only
215 /// the coarse family could be inferred.
216 pub instrument_model: Option<&'static str>,
217 /// For SRM (flat-peak) files: maps scan_event index → Q1 precursor mass (m/z).
218 ///
219 /// Populated at open time by scanning the method/transition table stored
220 /// in the pre-scan-data header region. Empty for non-SRM instruments.
221 pub srm_q1_by_event: HashMap<u16, f64>,
222 /// For SRM (flat-peak) files: maps scan_event index → Q3 isolation window pairs (lo, hi) in m/z.
223 ///
224 /// Populated at open time by reading the Q3 window table from the first scan record
225 /// of each unique scan event class. Empty for non-SRM instruments.
226 pub srm_q3_windows: HashMap<u16, Vec<(f32, f32)>>,
227 /// For SRM (flat-peak) files: maps scan_event index → collision energy (eV).
228 ///
229 /// Populated from the v63 transition table at open time. For v66 files the
230 /// collision energy is read from per-scan parameters instead, so this map is
231 /// empty for v66/TSQ Altis files.
232 pub srm_ce_by_event: HashMap<u16, f64>,
233}
234
235// ─── Multi-controller metadata ───────────────────────────────────────────────
236
237/// Controller type codes as used in Thermo RAW files.
238#[derive(Debug, Clone, Copy, PartialEq, Eq)]
239pub enum ControllerType {
240 Ms,
241 Analog,
242 Adc,
243 Pda,
244 Uv,
245 Other,
246}
247
248impl ControllerType {
249 fn from_nsegs_ntrailer(ntrailer: u32, nsegs: u32) -> Self {
250 // Heuristic: MS controller always has ntrailer > 0 (v64+) or nsegs > 0.
251 // Non-MS controllers (UV, analog, PDA) have ntrailer == 0 and nsegs == 1.
252 // We can't reliably distinguish between non-MS types without parsing
253 // the InstID/method block, so we fall back to Other for those.
254 if ntrailer > 0 || nsegs > 1 {
255 Self::Ms
256 } else {
257 Self::Other
258 }
259 }
260}
261
262/// Minimal metadata about one controller in a multi-controller RAW file.
263#[derive(Debug, Clone)]
264pub struct ControllerInfo {
265 /// Zero-based controller index (position in `run_header_addrs`).
266 pub index: usize,
267 /// File offset to this controller's RunHeader.
268 pub run_header_addr: u64,
269 /// Whether this controller is the primary MS controller.
270 pub is_ms_controller: bool,
271 /// Inferred controller type.
272 pub controller_type: ControllerType,
273 /// First scan number.
274 pub first_scan: u32,
275 /// Last scan number.
276 pub last_scan: u32,
277 /// Acquisition start time (minutes).
278 pub start_time: f64,
279 /// Acquisition end time (minutes).
280 pub end_time: f64,
281}
282
283impl RawFileReader {
284 /// Open and parse a RAW file from a reader.
285 pub fn open<R: Read + Seek>(source: R) -> Result<Self> {
286 let mut r = BinaryReader::new(source);
287
288 // 1. FileHeader
289 let header = FileHeader::read(&mut r)?;
290 let version = header.version;
291
292 // 2. SeqRow
293 let seq_row = SeqRow::read(&mut r, version)?;
294
295 // 3. ASInfo (read and discard preamble + string)
296 let _as_preamble = r.read_bytes(24)?; // ASInfoPreamble: 24 bytes
297 let _as_text = r.read_pascal_string()?;
298
299 // 4. RawFileInfo
300 let raw_file_info = RawFileInfo::read(&mut r, version)?;
301
302 // 5. Extract addresses
303 let data_addr = raw_file_info.preamble.data_addr;
304
305 // 6. Select the MS controller RunHeader.
306 // Multi-controller files (e.g. UV + MS) have one RunHeader per controller.
307 // The MS controller has ntrailer > 0 (v64+) or first_scan <= last_scan with
308 // nsegs > 0 (v63 and earlier). We iterate all addresses and pick the best.
309 let run_header = {
310 let addrs = &raw_file_info.preamble.run_header_addrs;
311 let mut chosen = None;
312 for &addr in addrs {
313 if addr == 0 {
314 continue;
315 }
316 r.seek_to(addr)?;
317 let rh = RunHeader::read(&mut r, version)?;
318 // Heuristic for identifying the MS controller:
319 // 1. For v64+: ntrailer > 0 (scan events present) - catches most instruments.
320 // 2. For all versions: RunHeader.data_addr == preamble.data_addr - the MS
321 // controller's scan data begins at the same address the preamble declares.
322 // This catches TSQ/triple-quad instruments where ntrailer=0 (no scan events).
323 // 3. Pre-v64 fallback: valid scan range with nsegs > 0.
324 let is_ms = if version >= 64 {
325 rh.ntrailer > 0 || rh.data_addr == data_addr
326 } else {
327 rh.sample_info.last_scan_number >= rh.sample_info.first_scan_number
328 && rh.nsegs > 0
329 };
330 if is_ms {
331 chosen = Some(rh);
332 break;
333 }
334 }
335 // Fall back to first address if no MS controller found
336 match chosen {
337 Some(rh) => rh,
338 None => {
339 r.seek_to(addrs[0])?;
340 RunHeader::read(&mut r, version)?
341 }
342 }
343 };
344
345 let first_scan = run_header.sample_info.first_scan_number;
346 let last_scan = run_header.sample_info.last_scan_number;
347
348 let num_scans = if last_scan >= first_scan {
349 last_scan - first_scan + 1
350 } else {
351 0
352 };
353
354 // 7. Scan index
355 r.seek_to(run_header.scan_index_addr)?;
356 let mut scan_index = Vec::with_capacity(num_scans as usize);
357 for _ in 0..num_scans {
358 scan_index.push(ScanIndexEntry::read(&mut r, version)?);
359 }
360
361 // 8. Scan event trailer
362 r.seek_to(run_header.scan_trailer_addr)?;
363 let n_events = if version >= 64 {
364 // v64+: first u32 is a preamble (not count); use ntrailer from RunHeader
365 let _preamble = r.read_u32()?;
366 run_header.ntrailer
367 } else {
368 r.read_u32()?
369 };
370 // For v66, compute per-event body sizes from the stream's address range.
371 // The scan event stream spans [scan_trailer_addr+4 .. scan_params_addr).
372 // Each event = preamble (136 bytes) + body.
373 //
374 // Simple instruments (Q Exactive, Exploris): all events are identical in
375 // size so stream_bytes divides evenly by n_events.
376 //
377 // Tribrid instruments (Eclipse, Fusion Lumos): primary (MS1) scans and
378 // dependent (MS2+) scans have different body layouts:
379 // Primary event: 232 bytes total (preamble 136 + body 96)
380 // Dependent event: 344 bytes total (preamble 136 + body 208)
381 // Confirmed empirically across Orbitrap Eclipse (EThcD) and Fusion Lumos
382 // (DIA, MS3) files.
383 let preamble_size = ScanEventPreamble::size_for_version(version);
384 let (v66_body_primary, v66_body_dependent): (usize, usize) =
385 if version >= 66 && n_events > 0 {
386 let stream_bytes = run_header
387 .scan_params_addr
388 .saturating_sub(run_header.scan_trailer_addr)
389 .saturating_sub(4);
390 let remainder = stream_bytes % n_events as u64;
391 if remainder == 0 {
392 // Uniform event size (Q Exactive, Exploris, etc.)
393 let body = (stream_bytes / n_events as u64) as usize;
394 let body = body.saturating_sub(preamble_size);
395 (body, body)
396 } else {
397 // Variable-length events: tribrid Orbitrap instruments.
398 // Known sizes: primary=232, dependent=344 (body 96 and 208).
399 const PRIMARY_EVENT: u64 = 232;
400 const DEPENDENT_EVENT: u64 = 344;
401 let gap = DEPENDENT_EVENT - PRIMARY_EVENT;
402 let n = n_events as u64;
403 // n_primary * PRIMARY_EVENT + n_dependent * DEPENDENT_EVENT = stream_bytes
404 // n_primary + n_dependent = n
405 // => n_primary = (n * DEPENDENT_EVENT - stream_bytes) / gap
406 let n_primary_numerator = n
407 .saturating_mul(DEPENDENT_EVENT)
408 .saturating_sub(stream_bytes);
409 if n_primary_numerator % gap == 0 {
410 let n_primary = n_primary_numerator / gap;
411 let n_dependent = n.saturating_sub(n_primary);
412 let total_check = n_primary * PRIMARY_EVENT + n_dependent * DEPENDENT_EVENT;
413 if total_check == stream_bytes {
414 // Verified: use the tribrid sizes.
415 (
416 (PRIMARY_EVENT as usize).saturating_sub(preamble_size),
417 (DEPENDENT_EVENT as usize).saturating_sub(preamble_size),
418 )
419 } else {
420 // Fallback: use floor-average uniform body
421 let body = ((stream_bytes / n) as usize).saturating_sub(preamble_size);
422 (body, body)
423 }
424 } else {
425 // Fallback: use floor-average uniform body
426 let body = ((stream_bytes / n) as usize).saturating_sub(preamble_size);
427 (body, body)
428 }
429 }
430 } else {
431 (0, 0)
432 };
433 let mut scan_events = Vec::with_capacity(n_events as usize);
434 for _ in 0..n_events {
435 scan_events.push(ScanEvent::read(
436 &mut r,
437 version,
438 v66_body_primary,
439 v66_body_dependent,
440 )?);
441 }
442
443 // 9. Error log
444 let n_errors = run_header.sample_info.error_log_length;
445 let error_log = if n_errors > 0 {
446 r.seek_to(run_header.error_log_addr)?;
447 if version >= 64 {
448 let _preamble = r.read_u32()?;
449 }
450 let mut log = Vec::with_capacity(n_errors as usize);
451 for _ in 0..n_errors {
452 log.push(ErrorEntry::read(&mut r)?);
453 }
454 log
455 } else {
456 // Ensure reader is positioned at error_log_addr even when empty.
457 r.seek_to(run_header.error_log_addr)?;
458 Vec::new()
459 };
460 // The GDH for scan parameters immediately follows the error-log entries.
461 // Do NOT seek back to error_log_addr - doing so would cause find_forward
462 // to scan over the scan_index (which may sit between error_log and
463 // scan_trailer in some file layouts), creating a CPU-spinning O(n) search
464 // through megabytes of binary scan data.
465 let after_error_log = r.position();
466
467 // 10. Scan parameters (trailer extra) - GenericData format in v64+.
468 // The schema (GDH) is written just after the error-log entries;
469 // the records are written at `scan_params_addr` (tail of file)
470 // with NO stream preamble - records begin directly at
471 // scan_params_addr. Any bytes after the last record are trailing
472 // padding and can be ignored.
473 let (scan_parameters_header, scan_parameters) = if version >= 64 {
474 // Search from after the error log entries up to scan_trailer.
475 // This skips any scan_index data that may sit in between.
476 let scan_distance = run_header.scan_trailer_addr.saturating_sub(after_error_log);
477 // Estimate per-record size from the tail of the file using integer
478 // division. Any remainder bytes are trailing data, not a preamble.
479 let file_size = r.length()?;
480 let tail = file_size.saturating_sub(run_header.scan_params_addr);
481 let expected_record_size = if num_scans > 0 && tail > 0 {
482 let per_scan = tail / num_scans as u64;
483 if per_scan >= 4 {
484 Some(per_scan as usize)
485 } else {
486 None
487 }
488 } else {
489 None
490 };
491 match GenericDataHeader::find_forward(&mut r, scan_distance, expected_record_size)? {
492 Some(hdr) => {
493 // Records start directly at scan_params_addr - no stream preamble.
494 r.seek_to(run_header.scan_params_addr)?;
495 let mut params = Vec::with_capacity(num_scans as usize);
496 for _ in 0..num_scans {
497 params.push(GenericRecord::read(&mut r, &hdr)?);
498 }
499 (hdr, params)
500 }
501 None => (GenericDataHeader { fields: Vec::new() }, Vec::new()),
502 }
503 } else {
504 (GenericDataHeader { fields: Vec::new() }, Vec::new())
505 };
506
507 // 11. Instrument log - GenericData format in v64+
508 let (inst_log_header, inst_log) = if version >= 64 {
509 r.seek_to(run_header.inst_log_addr)?;
510 match GenericDataHeader::try_read(&mut r)? {
511 Some(hdr) => {
512 let n_inst = run_header.sample_info.inst_log_length;
513 let mut log = Vec::with_capacity(n_inst as usize);
514 for _ in 0..n_inst {
515 log.push(GenericRecord::read(&mut r, &hdr)?);
516 }
517 (hdr, log)
518 }
519 None => (GenericDataHeader { fields: Vec::new() }, Vec::new()),
520 }
521 } else {
522 (GenericDataHeader { fields: Vec::new() }, Vec::new())
523 };
524
525 // Detect flat-peak (TSQ/SRM) format.
526 // Reliable indicator: ntrailer == 0 means no scan event trailer was written, which
527 // is the case for all TSQ/triple-quad SRM instruments.
528 // Fallback: first scan data_size < 100 (catches edge cases with tiny SRM windows).
529 // In the flat format, data_size is the number of MRM peaks, not bytes.
530 let flat_peaks = run_header.ntrailer == 0
531 || scan_index
532 .first()
533 .map(|e| e.data_size < 100)
534 .unwrap_or(false);
535
536 // Classify scan format and device family.
537 let scan_format = crate::scan_format::ScanDataFormat::detect(version, flat_peaks);
538 let first_analyzer = scan_events.first().and_then(|e| e.preamble.analyzer());
539
540 // For SRM (flat-peak) files, read the entire pre-scan-data region so that
541 // we can extract Q1 values from the method/transition table stored there.
542 // For other instruments, read only 64 KB for instrument model detection.
543 let scan_window_cap = if flat_peaks { data_addr } else { 64 * 1024u64 };
544 let window_len = scan_window_cap.min(data_addr);
545 let metadata_window = if window_len > 0 {
546 r.seek_to(0)?;
547 r.read_bytes(window_len as usize).unwrap_or_default()
548 } else {
549 Vec::new()
550 };
551 // All BinaryReader operations are complete; reclaim the underlying source so
552 // it can be used for on-demand reads (e.g. Q3 window table from scan records).
553 let mut source = r.into_inner();
554 let detected = crate::device::DeviceFamily::detect_instrument(
555 &metadata_window,
556 &header.audit_start.tag2,
557 &seq_row.inst_method,
558 first_analyzer,
559 );
560 let device_family = detected.family;
561 let instrument_model = detected.model;
562
563 // For SRM files: extract Q1 masses, Q3 window pairs, and (for v63) collision energies
564 // from the pre-scan-data header region and/or the scan data records.
565 //
566 // v66 (TSQ Quantiva / TSQ Altis, FlatV66):
567 // Transition table layout: [Q1: f64][Q3_lo: f64][Q3_hi: f64] per channel.
568 // Anchor: scan_index.high_mz equals the Q3_hi of the highest-Q3 channel for each
569 // event class. Q3 window pairs come from the per-scan record header.
570 //
571 // v63 (TSQ Quantum / TSQ Vantage, FlatV63):
572 // Transition table layout: 72-byte records; Q1 at [+16], Q3_center at [+24],
573 // Q3_width at [+32], CE at [+48]. scan_index.low_mz/high_mz hold the instrument
574 // scan range (not per-transition values), so the high_mz anchor does not apply.
575 // Q3 centers come from the first scan's peak list; Q3 windows are computed as
576 // Q3_center ± Q3_width/2.
577 let (srm_q1_by_event, srm_q3_windows, srm_ce_by_event) = {
578 use crate::scan_format::ScanDataFormat;
579 match (flat_peaks, scan_format) {
580 (true, ScanDataFormat::FlatV66) if metadata_window.len() >= 24 => {
581 // --- v66 Q1 extraction: anchor on scan_index.high_mz ---
582 let mut event_q3_hi: HashMap<u16, f64> = HashMap::new();
583 for entry in &scan_index {
584 if entry.high_mz > 50.0 && entry.high_mz < 2000.0 {
585 event_q3_hi.entry(entry.scan_event).or_insert(entry.high_mz);
586 }
587 }
588 let data = &metadata_window;
589 let mut q1_map: HashMap<u16, f64> = HashMap::new();
590 'outer_v66: for (&event, &q3_hi_target) in &event_q3_hi {
591 let end = data.len().saturating_sub(8);
592 for i in 16..end {
593 let hi = f64::from_le_bytes(data[i..i + 8].try_into().unwrap());
594 if (hi - q3_hi_target).abs() < 0.002 {
595 let lo = f64::from_le_bytes(data[i - 8..i].try_into().unwrap());
596 if hi > lo && (hi - lo) < 0.1 {
597 let q1 =
598 f64::from_le_bytes(data[i - 16..i - 8].try_into().unwrap());
599 if q1 > 50.0 && q1 < 3000.0 {
600 q1_map.insert(event, q1);
601 continue 'outer_v66;
602 }
603 }
604 }
605 }
606 }
607 // --- v66 Q3 window extraction: read per-scan record header ---
608 let mut seen: HashMap<u16, bool> = HashMap::new();
609 let mut q3_map: HashMap<u16, Vec<(f32, f32)>> = HashMap::new();
610 for entry in &scan_index {
611 if seen.contains_key(&entry.scan_event) {
612 continue;
613 }
614 seen.insert(entry.scan_event, true);
615 if let Ok(windows) = crate::scan_data::read_scan_srm_v66_windows(
616 &mut source,
617 data_addr,
618 entry.offset,
619 ) {
620 if !windows.is_empty() {
621 q3_map.insert(entry.scan_event, windows);
622 }
623 }
624 }
625 (q1_map, q3_map, HashMap::new())
626 }
627 (true, ScanDataFormat::FlatV63) => {
628 // --- v63 Q1 + Q3 window + CE extraction ---
629 // Read peaks from the first scan of each event class; each peak's mz
630 // is the Q3 center for that channel. Search the pre-data region for
631 // the Q3_center value to find Q1, Q3_width, and CE from the transition
632 // table. Q3 windows are computed as (Q3_center - width/2, Q3_center + width/2).
633 let mut seen: HashMap<u16, bool> = HashMap::new();
634 let mut q1_map: HashMap<u16, f64> = HashMap::new();
635 let mut q3_map: HashMap<u16, Vec<(f32, f32)>> = HashMap::new();
636 let mut ce_map: HashMap<u16, f64> = HashMap::new();
637 let data = &metadata_window;
638 for entry in &scan_index {
639 let ev = entry.scan_event;
640 if seen.contains_key(&ev) {
641 continue;
642 }
643 seen.insert(ev, true);
644 let peaks = match read_flat_peaks(
645 &mut source,
646 data_addr,
647 entry.offset,
648 entry.data_size,
649 ) {
650 Ok(p) if !p.is_empty() => p,
651 _ => continue,
652 };
653 // Use the first peak's mz as Q3_center anchor.
654 if let Some((q1, q3w, ce)) = search_v63_transition(data, peaks[0].mz) {
655 q1_map.insert(ev, q1);
656 ce_map.insert(ev, ce);
657 let half = (q3w / 2.0) as f32;
658 let windows: Vec<(f32, f32)> = peaks
659 .iter()
660 .map(|p| (p.mz as f32 - half, p.mz as f32 + half))
661 .collect();
662 q3_map.insert(ev, windows);
663 }
664 }
665 (q1_map, q3_map, ce_map)
666 }
667 _ => (HashMap::new(), HashMap::new(), HashMap::new()),
668 }
669 };
670
671 Ok(Self {
672 header,
673 seq_row,
674 raw_file_info,
675 run_header,
676 scan_index,
677 scan_events,
678 scan_parameters_header,
679 scan_parameters,
680 error_log,
681 inst_log_header,
682 inst_log,
683 version,
684 num_scans,
685 data_addr,
686 flat_peaks,
687 scan_format,
688 device_family,
689 instrument_model,
690 srm_q1_by_event,
691 srm_q3_windows,
692 srm_ce_by_event,
693 })
694 }
695
696 /// Open a RAW file from a path.
697 pub fn open_path(path: impl AsRef<std::path::Path>) -> Result<Self> {
698 let file = std::fs::File::open(path)?;
699 let reader = std::io::BufReader::new(file);
700 Self::open(reader)
701 }
702
703 /// Enumerate all controllers in this RAW file.
704 ///
705 /// Multi-detector acquisition systems write one [`RunHeader`] per
706 /// controller (MS, UV, PDA, Analog). This method parses all controller
707 /// headers and returns a `Vec<ControllerInfo>` with basic metadata for
708 /// each. The primary MS controller can be identified via
709 /// [`ControllerInfo::is_ms_controller`].
710 ///
711 /// For single-controller files (the common case), this returns a
712 /// one-element vec with the MS controller.
713 pub fn controllers<R: Read + Seek>(&self, source: &mut R) -> Result<Vec<ControllerInfo>> {
714 let mut r = BinaryReader::new(source);
715 let addrs = &self.raw_file_info.preamble.run_header_addrs;
716 let mut infos = Vec::with_capacity(addrs.len());
717 for (i, &addr) in addrs.iter().enumerate() {
718 if addr == 0 {
719 continue;
720 }
721 r.seek_to(addr)?;
722 let rh = RunHeader::read(&mut r, self.version)?;
723 let is_ms = if self.version >= 64 {
724 rh.ntrailer > 0 || rh.data_addr == self.data_addr
725 } else {
726 rh.nsegs > 0
727 };
728 let ct = if is_ms {
729 ControllerType::Ms
730 } else {
731 ControllerType::from_nsegs_ntrailer(rh.ntrailer, rh.nsegs)
732 };
733 infos.push(ControllerInfo {
734 index: i,
735 run_header_addr: addr,
736 is_ms_controller: is_ms,
737 controller_type: ct,
738 first_scan: rh.sample_info.first_scan_number,
739 last_scan: rh.sample_info.last_scan_number,
740 start_time: rh.sample_info.start_time,
741 end_time: rh.sample_info.end_time,
742 });
743 }
744 Ok(infos)
745 }
746
747 /// Read a single scan data packet (PacketHeader format).
748 pub fn read_scan<R: Read + Seek>(
749 &self,
750 source: &mut R,
751 scan_number: u32,
752 ) -> Result<ScanDataPacket> {
753 let idx = (scan_number - self.run_header.sample_info.first_scan_number) as usize;
754 if idx >= self.scan_index.len() {
755 return Err(Error::AddressOutOfRange(scan_number as u64));
756 }
757 let entry = &self.scan_index[idx];
758 let abs_offset = self.data_addr + entry.offset;
759 source.seek(SeekFrom::Start(abs_offset))?;
760 let mut r = BinaryReader::new(source);
761 ScanDataPacket::read(&mut r)
762 }
763
764 /// Read a single scan as flat peaks (TSQ/SRM format).
765 ///
766 /// In this format, `entry.offset` is the cumulative end byte offset within
767 /// the data stream. Peaks are (f32, f32) pairs at the end of each record.
768 pub fn read_scan_flat<R: Read + Seek>(
769 &self,
770 source: &mut R,
771 scan_number: u32,
772 ) -> Result<Vec<Peak>> {
773 let idx = (scan_number - self.run_header.sample_info.first_scan_number) as usize;
774 if idx >= self.scan_index.len() {
775 return Err(Error::AddressOutOfRange(scan_number as u64));
776 }
777 let entry = &self.scan_index[idx];
778 read_flat_peaks(source, self.data_addr, entry.offset, entry.data_size)
779 }
780
781 /// Read a single scan in v66 SRM format (TSQ Quantiva / TSQ Altis).
782 ///
783 /// `entry.offset` is the START byte offset within the data stream.
784 /// The record is fixed-size (`entry.data_size` bytes) and contains:
785 /// n_peaks (u32), header, m/z window table, then peak triplets.
786 pub fn read_scan_srm_v66<R: Read + Seek>(
787 &self,
788 source: &mut R,
789 scan_number: u32,
790 ) -> Result<Vec<Peak>> {
791 let idx = (scan_number - self.run_header.sample_info.first_scan_number) as usize;
792 if idx >= self.scan_index.len() {
793 return Err(Error::AddressOutOfRange(scan_number as u64));
794 }
795 let entry = &self.scan_index[idx];
796 read_scan_srm_v66(source, self.data_addr, entry.offset, entry.data_size)
797 }
798
799 /// Read a single scan's peaks using whichever decoder matches this file's
800 /// scan-data format.
801 ///
802 /// This is the recommended high-level entry point. It dispatches on
803 /// [`Self::scan_format`] so callers do not have to know whether a file is
804 /// a TSQ SRM run (flat peaks) or an Orbitrap/ion-trap acquisition
805 /// (PacketHeader records).
806 ///
807 /// The returned `Vec<Peak>` contains centroided peaks regardless of the
808 /// underlying format. For PacketHeader files that also contain a profile
809 /// signal, use [`Self::read_scan`] to access both.
810 pub fn read_scan_peaks<R: Read + Seek>(
811 &self,
812 source: &mut R,
813 scan_number: u32,
814 ) -> Result<Vec<Peak>> {
815 use crate::scan_format::ScanDataFormat;
816 match self.scan_format {
817 ScanDataFormat::PacketHeader => {
818 let pkt = self.read_scan(source, scan_number)?;
819 Ok(pkt.peaks)
820 }
821 ScanDataFormat::FlatV63 => self.read_scan_flat(source, scan_number),
822 ScanDataFormat::FlatV66 => self.read_scan_srm_v66(source, scan_number),
823 }
824 }
825
826 /// Read centroided peaks only, skipping profile data.
827 ///
828 /// For PacketHeader files (Orbitrap / ion-trap), this skips the large
829 /// profile-data section, making it 2-10× faster than
830 /// [`Self::read_scan_peaks`] when only centroided m/z and intensity values
831 /// are needed (e.g. mzML export, peak area queries).
832 ///
833 /// For TSQ/SRM files this is identical to [`Self::read_scan_peaks`].
834 pub fn read_peaks_only<R: Read + Seek>(
835 &self,
836 source: &mut R,
837 scan_number: u32,
838 ) -> Result<Vec<Peak>> {
839 use crate::scan_format::ScanDataFormat;
840 match self.scan_format {
841 ScanDataFormat::PacketHeader => {
842 let idx = (scan_number - self.run_header.sample_info.first_scan_number) as usize;
843 if idx >= self.scan_index.len() {
844 return Err(Error::AddressOutOfRange(scan_number as u64));
845 }
846 let entry = &self.scan_index[idx];
847 let abs_offset = self.data_addr + entry.offset;
848 source.seek(SeekFrom::Start(abs_offset))?;
849 let mut r = BinaryReader::new(source);
850 ScanDataPacket::read_peaks_only(&mut r)
851 }
852 ScanDataFormat::FlatV63 => self.read_scan_flat(source, scan_number),
853 ScanDataFormat::FlatV66 => self.read_scan_srm_v66(source, scan_number),
854 }
855 }
856
857 /// Return the scan-parameter record for a given 1-based scan number.
858 ///
859 /// Returns `None` if the file has no scan-parameter stream or if
860 /// `scan_number` is outside the valid scan range.
861 pub fn scan_parameters(&self, scan_number: u32) -> Option<&GenericRecord> {
862 let first = self.run_header.sample_info.first_scan_number;
863 let idx = scan_number.checked_sub(first)? as usize;
864 self.scan_parameters.get(idx)
865 }
866
867 /// Return a typed view of the scan-parameter record for a given scan.
868 ///
869 /// This wraps [`Self::scan_parameters`] in a [`ScanParams`] accessor that
870 /// provides named, type-safe fields and handles label-name variations
871 /// across instrument families.
872 pub fn scan_params(&self, scan_number: u32) -> Option<ScanParams<'_>> {
873 self.scan_parameters(scan_number).map(ScanParams)
874 }
875
876 /// Return the raw instrument-log record for a given scan number, or
877 /// `None` if the scan is out of range or no instrument log was found.
878 ///
879 /// The instrument log contains per-scan instrument-state values:
880 /// temperatures, voltages, pressures, ion counts, etc.
881 pub fn inst_log_record(&self, scan_number: u32) -> Option<&GenericRecord> {
882 let first = self.run_header.sample_info.first_scan_number;
883 let idx = scan_number.checked_sub(first)? as usize;
884 self.inst_log.get(idx)
885 }
886
887 /// Return a typed [`StatusLogEntry`] view for the given scan number.
888 ///
889 /// This wraps [`Self::inst_log_record`] and provides named, type-safe
890 /// accessors for common instrument-status fields.
891 pub fn status_log_entry(&self, scan_number: u32) -> Option<StatusLogEntry<'_>> {
892 self.inst_log_record(scan_number).map(StatusLogEntry)
893 }
894
895 /// Return the canonical Thermo scan filter string for a given scan
896 /// (1-based scan number), or `None` if the scan is out of range.
897 ///
898 /// Example output: `"FTMS + p NSI Full ms [350.0000-1500.0000]"`.
899 ///
900 /// See [`crate::scan_filter`] for grammar details.
901 pub fn scan_filter(&self, scan_number: u32) -> Option<String> {
902 let first = self.run_header.sample_info.first_scan_number;
903 let idx = scan_number.checked_sub(first)? as usize;
904 let entry = self.scan_index.get(idx)?;
905
906 // SRM files have no scan events; build the filter string from
907 // the pre-loaded Q1 and Q3 window maps.
908 if self.flat_peaks {
909 let q1 = self.srm_q1_by_event.get(&entry.scan_event).copied()?;
910 let windows = self.srm_q3_windows.get(&entry.scan_event)?;
911 // v63 (TSQ Quantum/Vantage): NSI ionization, @cid{CE:.2} after Q1.
912 // v66 (TSQ Quantiva/Altis): ESI ionization, no CE in filter.
913 use crate::scan_format::ScanDataFormat;
914 let ionization = match self.scan_format {
915 ScanDataFormat::FlatV63 => "NSI",
916 _ => "ESI",
917 };
918 let ce_part = if self.scan_format == ScanDataFormat::FlatV63 {
919 self.srm_ce_by_event
920 .get(&entry.scan_event)
921 .map(|&ce| format!("@cid{:.2}", ce))
922 .unwrap_or_default()
923 } else {
924 String::new()
925 };
926 // Format: "+ c {ION} SRM ms2 {Q1:.3}{@cidCE} [{lo1:.3}-{hi1:.3}, ...]"
927 let mut s = format!("+ c {} SRM ms2 {:.3}{}", ionization, q1, ce_part);
928 if !windows.is_empty() {
929 s.push(' ');
930 s.push('[');
931 for (i, (lo, hi)) in windows.iter().enumerate() {
932 if i > 0 {
933 s.push_str(", ");
934 }
935 s.push_str(&format!("{:.3}-{:.3}", lo, hi));
936 }
937 s.push(']');
938 }
939 return Some(s);
940 }
941
942 let event = self.scan_events.get(idx)?;
943 // Precursor m/z and activation energy come from the per-scan params
944 // table (not the event body) for v66+. Fall back silently if missing.
945 let params = self.scan_params(scan_number);
946 let precursor = params.as_ref().and_then(|p| p.monoisotopic_mz());
947 let energy = params.as_ref().and_then(|p| p.activation_energy());
948 let supplemental = params
949 .as_ref()
950 .and_then(|p| p.supplemental_activation_energy());
951 Some(crate::scan_filter::build_filter(
952 event,
953 entry,
954 precursor,
955 energy,
956 supplemental,
957 ))
958 }
959
960 /// Return all scan retention times (minutes) in scan order (1-based scan numbers).
961 ///
962 /// This is equivalent to collecting `scan_index[i].start_time` for every scan.
963 /// The returned `Vec` is indexed by `scan_number - first_scan_number`.
964 pub fn retention_times(&self) -> Vec<f64> {
965 self.scan_index.iter().map(|e| e.start_time).collect()
966 }
967
968 /// Return a per-scan chromatogram as `(retention_time_min, tic)` pairs.
969 pub fn tic_chromatogram(&self) -> Vec<(f64, f64)> {
970 self.scan_index
971 .iter()
972 .map(|e| (e.start_time, e.total_current))
973 .collect()
974 }
975
976 /// Return a per-scan base-peak chromatogram as `(retention_time_min, bpi, base_mz)` triples.
977 pub fn bpc_chromatogram(&self) -> Vec<(f64, f64, f64)> {
978 self.scan_index
979 .iter()
980 .map(|e| (e.start_time, e.base_intensity, e.base_mz))
981 .collect()
982 }
983
984 /// Return the instrument method file path or name as stored in the
985 /// sequence row. This is the name of the method used during acquisition
986 /// (e.g. `"Standard_HCD.meth"`), not the embedded method text.
987 ///
988 /// See also [`Self::instrument_method_text`] for extracting the embedded
989 /// XML/text method body from the file.
990 pub fn instrument_method_name(&self) -> &str {
991 &self.seq_row.inst_method
992 }
993
994 /// Attempt to extract the embedded instrument method text from the RAW file.
995 ///
996 /// Thermo RAW files embed the acquisition method as a UTF-16LE text or
997 /// XML blob in the metadata region. This method scans the bytes between
998 /// the start of the file and the scan data for the longest contiguous
999 /// block of valid UTF-16LE text (at least 256 characters long) and returns
1000 /// it as a `String`.
1001 ///
1002 /// Returns `None` if no suitable text block is found or if the method was
1003 /// not embedded (`method_file_present == false`).
1004 ///
1005 /// Note: This is a best-effort extraction. The result is the raw text
1006 /// content; callers may wish to trim or parse it further.
1007 pub fn instrument_method_text<R: Read + Seek>(&self, source: &mut R) -> Option<String> {
1008 if !self.raw_file_info.preamble.method_file_present {
1009 return None;
1010 }
1011 // Read metadata region: from byte 0 up to (but not including) scan data.
1012 // Cap at 512 KB to avoid reading very large files entirely.
1013 const MAX_WINDOW: u64 = 512 * 1024;
1014 let window_len = MAX_WINDOW.min(self.data_addr) as usize;
1015 if window_len < 4 {
1016 return None;
1017 }
1018 source.seek(std::io::SeekFrom::Start(0)).ok()?;
1019 let mut buf = vec![0u8; window_len];
1020 source.read_exact(&mut buf).ok()?;
1021
1022 // Scan for the longest valid UTF-16LE text block (min 256 chars = 512 bytes).
1023 // Strategy: find aligned 2-byte sequences where every pair decodes to a
1024 // printable/whitespace Unicode scalar (U+0020..U+FFFD).
1025 extract_utf16le_text(&buf, 256)
1026 }
1027}
1028
1029/// Scan `buf` for the longest contiguous UTF-16LE text block of at least
1030/// `min_chars` characters and return it as a String. Returns `None` if no
1031/// such block exists.
1032fn extract_utf16le_text(buf: &[u8], min_chars: usize) -> Option<String> {
1033 if buf.len() < 2 {
1034 return None;
1035 }
1036 let mut best: Option<String> = None;
1037 let mut best_len = 0usize;
1038
1039 // Try each even alignment (0 or 1 byte offset from start).
1040 for alignment in 0..2usize {
1041 let start = alignment;
1042 let usable = buf.len().saturating_sub(start);
1043 let n_units = usable / 2;
1044 if n_units < min_chars {
1045 continue;
1046 }
1047
1048 let mut run_start = 0usize;
1049 let mut run_chars: Vec<u16> = Vec::with_capacity(min_chars);
1050
1051 let flush = |run_chars: &Vec<u16>,
1052 run_start: usize,
1053 best: &mut Option<String>,
1054 best_len: &mut usize| {
1055 if run_chars.len() >= min_chars {
1056 if let Ok(s) = String::from_utf16(run_chars) {
1057 let _ = run_start; // suppress unused warning
1058 if run_chars.len() > *best_len {
1059 *best_len = run_chars.len();
1060 *best = Some(s);
1061 }
1062 }
1063 }
1064 };
1065
1066 for i in 0..n_units {
1067 let off = start + i * 2;
1068 let u = u16::from_le_bytes([buf[off], buf[off + 1]]);
1069 let is_ok = matches!(u, 0x0009 | 0x000A | 0x000D | 0x0020..=0xFFFD);
1070 if is_ok {
1071 run_chars.push(u);
1072 } else {
1073 flush(&run_chars, run_start, &mut best, &mut best_len);
1074 run_start = i + 1;
1075 run_chars.clear();
1076 }
1077 }
1078 flush(&run_chars, run_start, &mut best, &mut best_len);
1079 }
1080 best
1081}
1082
1083// ─── High-level typed accessor for scan parameters ──────────────────────────
1084
1085/// Typed accessor for a scan's extra parameters (`ScanParams` stream).
1086///
1087/// The underlying [`GenericRecord`] stores named fields whose labels vary
1088/// slightly across Thermo instrument families. This wrapper normalises the
1089/// most common labels so callers do not need to hard-code instrument-specific
1090/// strings.
1091///
1092/// # Example
1093/// ```no_run
1094/// use opentfraw::RawFileReader;
1095/// let raw = RawFileReader::open_path("experiment.raw").unwrap();
1096/// if let Some(p) = raw.scan_params(1) {
1097/// println!("Injection time: {:?} ms", p.ion_injection_time_ms());
1098/// println!("Charge state: {:?}", p.charge_state());
1099/// }
1100/// ```
1101pub struct ScanParams<'a>(pub &'a GenericRecord);
1102
1103impl<'a> ScanParams<'a> {
1104 /// Return the raw `GenericRecord` for direct field access.
1105 #[inline]
1106 pub fn record(&self) -> &GenericRecord {
1107 self.0
1108 }
1109
1110 /// Ion injection / fill time in milliseconds.
1111 ///
1112 /// Label varies: `"Ion Injection Time (ms):"` (Orbitrap family) vs
1113 /// `"Ion Inject Time (ms):"` (older LTQ variants).
1114 pub fn ion_injection_time_ms(&self) -> Option<f64> {
1115 // Try canonical label first; fall back to legacy label.
1116 self.0
1117 .get_f64("Ion Injection Time (ms):")
1118 .or_else(|| self.0.get_f64("Ion Inject Time (ms):"))
1119 }
1120
1121 /// Precursor charge state (0 = unknown / MS1 scan).
1122 pub fn charge_state(&self) -> Option<i32> {
1123 self.0
1124 .get_i32("Charge State:")
1125 // Some LCQ files use UInt8 for charge state.
1126 .or_else(|| {
1127 self.0.get("Charge State:").and_then(|v| match v {
1128 GenericValue::UInt8(n) => Some(*n as i32),
1129 _ => None,
1130 })
1131 })
1132 }
1133
1134 /// Monoisotopic precursor m/z (0 = not determined).
1135 ///
1136 /// Tries multiple label variants for compatibility across instrument families:
1137 ///
1138 /// - `"Monoisotopic M/Z:"` - most common (Q Exactive, Orbitrap Fusion)
1139 /// - `"MS2 Isolation M/Z:"` - some older LTQ firmware
1140 ///
1141 /// Returns `None` when the value is absent or zero (not determined).
1142 pub fn monoisotopic_mz(&self) -> Option<f64> {
1143 let v = self
1144 .0
1145 .get_f64("Monoisotopic M/Z:")
1146 .or_else(|| self.0.get_f64("MS2 Isolation M/Z:"))
1147 .or_else(|| self.0.get_f64("Isolation Center M/Z:"))
1148 .or_else(|| self.0.get_f64("Precursor M/Z:"))?;
1149 if v > 0.0 {
1150 Some(v)
1151 } else {
1152 None
1153 }
1154 }
1155
1156 /// Number of micro-scans averaged into this scan.
1157 pub fn micro_scan_count(&self) -> Option<i32> {
1158 self.0.get_i32("Micro Scan Count:")
1159 }
1160
1161 /// Scan number of the master (MS1) scan that triggered this dependent scan.
1162 /// Returns `None` if this is not a dependent scan.
1163 pub fn master_scan_number(&self) -> Option<i32> {
1164 self.0
1165 .get_i32("Master Scan Number:")
1166 .or_else(|| self.0.get_i32("Master Index:"))
1167 }
1168
1169 /// Orbitrap / FT resolving power (e.g. 60000, 120000).
1170 pub fn ft_resolution(&self) -> Option<i32> {
1171 self.orbitrap_resolution()
1172 }
1173
1174 /// Number of lock masses found / matched.
1175 pub fn number_of_lm_found(&self) -> Option<i32> {
1176 self.number_of_lock_masses()
1177 }
1178
1179 /// Lock-mass m/z correction applied (ppm).
1180 pub fn lm_correction_ppm(&self) -> Option<f64> {
1181 self.lock_mass_correction_ppm()
1182 }
1183
1184 /// AGC target fill value (ion count).
1185 pub fn agc_target(&self) -> Option<i32> {
1186 self.0.get_i32("AGC Target:")
1187 }
1188
1189 /// Whether automated gain control (AGC) was active.
1190 pub fn agc_enabled(&self) -> Option<bool> {
1191 match self.0.get("AGC:")? {
1192 GenericValue::Bool(b) => Some(*b),
1193 GenericValue::String(s) => Some(s.to_ascii_lowercase().contains("on")),
1194 _ => None,
1195 }
1196 }
1197
1198 /// Elapsed scan time in seconds (Orbitrap instruments only).
1199 pub fn elapsed_scan_time_s(&self) -> Option<f64> {
1200 self.0.get_f64("Elapsed Scan Time (sec):")
1201 }
1202
1203 /// Maximum allowed ion injection time in milliseconds.
1204 pub fn max_ion_time_ms(&self) -> Option<f64> {
1205 self.0.get_f64("Max. Ion Time (ms):")
1206 }
1207
1208 /// MSn isolation window width in m/z.
1209 ///
1210 /// Label varies: `"MS2 Isolation Width:"` (most common), `"MSn Isolation Width:"`,
1211 /// or `"Isolation Width (M/Z):"` on some firmware.
1212 pub fn isolation_width_mz(&self) -> Option<f64> {
1213 self.0
1214 .get_f64("MS2 Isolation Width:")
1215 .or_else(|| self.0.get_f64("MSn Isolation Width:"))
1216 .or_else(|| self.0.get_f64("Isolation Width (M/Z):"))
1217 .or_else(|| self.0.get_f64("MS2 Isolation Width (M/Z):"))
1218 }
1219
1220 /// MSn isolation window target m/z (the center of the isolation window).
1221 ///
1222 /// Some instruments write this separately from the precursor m/z; when
1223 /// absent, callers should fall back to [`Self::monoisotopic_mz`] or to
1224 /// the event's first reaction `precursor_mz`.
1225 pub fn isolation_target_mz(&self) -> Option<f64> {
1226 self.0
1227 .get_f64("MS2 Isolation Offset:")
1228 .or_else(|| self.0.get_f64("Target M/Z:"))
1229 }
1230
1231 /// Activation energy (eV or %) for the primary activation step.
1232 ///
1233 /// Tries several label variants present across instrument families.
1234 /// NCE (normalized collision energy) labels are checked first because
1235 /// they reflect the user-set method value and are what reference tools
1236 /// (ThermoRawFileParser, Proteome Discoverer) report. eV labels are
1237 /// used as a fallback when no NCE label is present.
1238 ///
1239 /// Label priority:
1240 /// 1. `"HCD Energy:"` / `"HCD Energy V:"` / `"CE:"` - NCE string form
1241 /// 2. `"Normalized Collision Energy:"` - ion-trap CID NCE
1242 /// 3. `"HCD Energy (eV):"` - explicit eV label (Q Exactive HF-X, Exploris)
1243 /// 4. `"HCD Energy eV:"` - eV variant
1244 /// 5. `"Collision Energy (eV):"` - ITMS CID eV
1245 pub fn activation_energy(&self) -> Option<f64> {
1246 // NCE labels: preferred because they match the user-set method value.
1247 // Skip 0.0 (sentinel for "not set").
1248 for label in &["HCD Energy:", "HCD Energy V:", "CE:"] {
1249 if let Some(s) = self.0.get_string(label) {
1250 if let Ok(v) = s.trim().trim_end_matches('%').parse::<f64>() {
1251 if v > 0.0 {
1252 return Some(v);
1253 }
1254 }
1255 }
1256 }
1257 if let Some(v) = self
1258 .0
1259 .get_f64("Normalized Collision Energy:")
1260 .filter(|&v| v > 0.0)
1261 {
1262 return Some(v);
1263 }
1264 // eV labels: used when no NCE label is available.
1265 if let Some(v) = self.0.get_f64("HCD Energy (eV):").filter(|&v| v > 0.0) {
1266 return Some(v);
1267 }
1268 if let Some(v) = self.0.get_f64("HCD Energy eV:").filter(|&v| v > 0.0) {
1269 return Some(v);
1270 }
1271 self.0
1272 .get_f64("Collision Energy (eV):")
1273 .filter(|&v| v > 0.0)
1274 }
1275
1276 /// Whether the value returned by [`activation_energy`] is a normalized
1277 /// collision energy (NCE, dimensionless %) rather than an absolute eV value.
1278 ///
1279 /// Returns `true` when `activation_energy` found a value from an NCE label
1280 /// (`HCD Energy:`, `HCD Energy V:`, `CE:`, or `Normalized Collision Energy:`).
1281 /// Returns `false` when only eV labels were present or no energy was found.
1282 pub fn activation_energy_is_nce(&self) -> bool {
1283 // Returns true if activation_energy() took the NCE path.
1284 for label in &["HCD Energy:", "HCD Energy V:", "CE:"] {
1285 if let Some(s) = self.0.get_string(label) {
1286 if let Ok(v) = s.trim().trim_end_matches('%').parse::<f64>() {
1287 if v > 0.0 {
1288 return true;
1289 }
1290 }
1291 }
1292 }
1293 self.0
1294 .get_f64("Normalized Collision Energy:")
1295 .filter(|&v| v > 0.0)
1296 .is_some()
1297 }
1298
1299 /// Supplemental activation energy for EThcD scans (the HCD component).
1300 ///
1301 /// Returns `None` for non-EThcD scans.
1302 pub fn supplemental_activation_energy(&self) -> Option<f64> {
1303 if let Some(v) = self.0.get_f64("Supplemental Activation CE:") {
1304 return Some(v);
1305 }
1306 if let Some(s) = self.0.get_string("Supplemental Activation:") {
1307 return s.trim().trim_end_matches('%').parse::<f64>().ok();
1308 }
1309 None
1310 }
1311
1312 /// All possible charge states reported by the precursor selection algorithm.
1313 ///
1314 /// Returns `None` when the instrument did not report possible charges.
1315 /// Some firmware stores them as a space-delimited string (e.g. `"2 3"`);
1316 /// others use a typed integer for the single selected charge.
1317 pub fn possible_charge_states(&self) -> Option<Vec<u32>> {
1318 // String variant: "2 3 4"
1319 if let Some(s) = self.0.get_string("Possible Charge States:") {
1320 let v: Vec<u32> = s
1321 .split_whitespace()
1322 .filter_map(|t| t.parse::<u32>().ok())
1323 .collect();
1324 if !v.is_empty() {
1325 return Some(v);
1326 }
1327 }
1328 // Integer variant (single charge)
1329 if let Some(c) = self.charge_state() {
1330 if c > 0 {
1331 return Some(vec![c as u32]);
1332 }
1333 }
1334 None
1335 }
1336
1337 /// FAIMS compensation voltage in V (Orbitrap Fusion/Lumos with FAIMS Pro).
1338 pub fn faims_cv(&self) -> Option<f64> {
1339 self.0
1340 .get_f64("FAIMS CV:")
1341 .or_else(|| self.0.get_f32("FAIMS CV:").map(f64::from))
1342 }
1343
1344 /// Whether FAIMS voltage was active for this scan.
1345 pub fn faims_voltage_on(&self) -> Option<bool> {
1346 match self.0.get("FAIMS Voltage On:")? {
1347 GenericValue::Bool(b) => Some(*b),
1348 GenericValue::String(s) => Some(s.to_ascii_lowercase().contains("on")),
1349 _ => None,
1350 }
1351 }
1352
1353 /// S-Lens RF level (V), typically reported on Q Exactive family.
1354 pub fn s_lens_rf_level(&self) -> Option<f64> {
1355 self.0.get_f64("S-Lens RF Level:")
1356 }
1357
1358 /// AGC fill percentage (0.0-1.0), reported on Q Exactive HF family.
1359 pub fn agc_fill(&self) -> Option<f64> {
1360 self.0.get_f64("AGC Fill:")
1361 }
1362
1363 /// Orbitrap analyzer temperature (°C), where available.
1364 pub fn analyzer_temperature(&self) -> Option<f64> {
1365 self.0.get_f64("Analyzer Temperature:")
1366 }
1367
1368 /// PS injection time in milliseconds (pre-scan injection for Q Exactive).
1369 pub fn ps_injection_time_ms(&self) -> Option<f64> {
1370 self.0.get_f64("PS Inj. Time (ms):")
1371 }
1372
1373 /// Reagent ion injection time in milliseconds (ETD reagent).
1374 pub fn reagent_ion_injection_time_ms(&self) -> Option<f64> {
1375 self.0
1376 .get_f32("Reagent Ion Injection Time (ms):")
1377 .map(f64::from)
1378 }
1379
1380 /// Whether the reagent AGC was active.
1381 pub fn reagent_ion_agc(&self) -> Option<bool> {
1382 match self.0.get("Reagent Ion AGC:")? {
1383 GenericValue::Bool(b) => Some(*b),
1384 _ => None,
1385 }
1386 }
1387
1388 /// Source CID energy applied in the ion source (eV).
1389 pub fn source_cid_energy_ev(&self) -> Option<f64> {
1390 self.0
1391 .get_f64("Source CID eV:")
1392 .or_else(|| self.0.get_f32("API Source CID Energy:").map(f64::from))
1393 }
1394
1395 /// Dynamic retention time shift in minutes (Q Exactive HF-X AutoQC).
1396 pub fn dynamic_rt_shift_min(&self) -> Option<f64> {
1397 self.0.get_f64("Dynamic RT Shift (min):")
1398 }
1399
1400 /// Lock mass correction applied (ppm) - tries several label variants.
1401 pub fn lock_mass_correction_ppm(&self) -> Option<f64> {
1402 self.0
1403 .get_f64("LM Correction (ppm):")
1404 .or_else(|| self.0.get_f64("LM m/z-Correction (ppm):"))
1405 }
1406
1407 /// Number of lock masses found.
1408 pub fn number_of_lock_masses(&self) -> Option<i32> {
1409 self.0
1410 .get_i32("Number of LM Found:")
1411 .or_else(|| self.0.get_i32("Number of Lock Masses:"))
1412 }
1413
1414 /// Orbitrap resolution setting (not measured, but requested).
1415 pub fn orbitrap_resolution(&self) -> Option<i32> {
1416 self.0
1417 .get_i32("Orbitrap Resolution:")
1418 .or_else(|| self.0.get_i32("FT Resolution:"))
1419 }
1420
1421 /// SPS (Synchronous Precursor Selection) mass for MS3 channel N (0-based index).
1422 ///
1423 /// SPS masses are stored as `"SPS Mass 1:"`, `"SPS Mass 2:"`, ... (1-based).
1424 pub fn sps_mass(&self, channel: usize) -> Option<f32> {
1425 let label = format!("SPS Mass {}:", channel + 1);
1426 self.0.get_f32(&label)
1427 }
1428
1429 /// Conversion parameter A (Orbitrap m/z conversion polynomial).
1430 pub fn conversion_parameter_a(&self) -> Option<f64> {
1431 self.0.get_f64("Conversion Parameter A:")
1432 }
1433
1434 /// Conversion parameter B.
1435 pub fn conversion_parameter_b(&self) -> Option<f64> {
1436 self.0.get_f64("Conversion Parameter B:")
1437 }
1438
1439 /// Conversion parameter C.
1440 pub fn conversion_parameter_c(&self) -> Option<f64> {
1441 self.0.get_f64("Conversion Parameter C:")
1442 }
1443
1444 /// Raw over-fill time T (used for AGC computation).
1445 pub fn raw_ovft(&self) -> Option<f64> {
1446 self.0.get_f64("RawOvFtT:")
1447 }
1448
1449 /// Error in the isotopic envelope fit (used for charge-state scoring).
1450 pub fn isotopic_fit_error(&self) -> Option<f64> {
1451 self.0.get_f64("Error in isotopic envelope fit:")
1452 }
1453
1454 /// Scan description string (arbitrary text, set by method or real-time software).
1455 pub fn scan_description(&self) -> Option<&str> {
1456 self.0.get_string("Scan Description:")
1457 }
1458
1459 /// Multi-inject info string (e.g. `"IT=45 "` for ion-trap fill time).
1460 pub fn multi_inject_info(&self) -> Option<&str> {
1461 self.0.get_string("Multi Inject Info:")
1462 }
1463
1464 /// HCD energy string - raw value as stored (may be `"28.00"`, `"28%"`, or `"N/A"`).
1465 pub fn hcd_energy(&self) -> Option<&str> {
1466 self.0
1467 .get_string("HCD Energy:")
1468 .or_else(|| self.0.get_string("HCD Energy V:"))
1469 }
1470}
1471
1472// ─── Status log (instrument log) typed accessor ─────────────────────────────
1473
1474/// Typed accessor for a per-scan instrument-status log entry.
1475///
1476/// The instrument log records instrument-state values (temperatures, voltages,
1477/// pressures, etc.) at the time each scan was acquired. The schema varies
1478/// across instrument models.
1479pub struct StatusLogEntry<'a>(pub &'a GenericRecord);
1480
1481impl<'a> StatusLogEntry<'a> {
1482 /// Return the raw record for direct field access.
1483 #[inline]
1484 pub fn record(&self) -> &GenericRecord {
1485 self.0
1486 }
1487
1488 /// Ion injection time in milliseconds (present on Orbitrap family).
1489 pub fn ion_injection_time_ms(&self) -> Option<f64> {
1490 self.0
1491 .get_f64("Ion Injection Time (ms):")
1492 .or_else(|| self.0.get_f64("Ion Inject Time (ms):"))
1493 }
1494
1495 /// Orbitrap / FT resolving power setting.
1496 pub fn ft_resolution(&self) -> Option<i32> {
1497 self.0
1498 .get_i32("Orbitrap Resolution:")
1499 .or_else(|| self.0.get_i32("FT Resolution:"))
1500 }
1501
1502 /// FAIMS compensation voltage (V).
1503 pub fn faims_cv(&self) -> Option<f64> {
1504 self.0
1505 .get_f64("FAIMS CV:")
1506 .or_else(|| self.0.get_f32("FAIMS CV:").map(f64::from))
1507 }
1508
1509 /// S-Lens RF level (V).
1510 pub fn s_lens_rf_level(&self) -> Option<f64> {
1511 self.0.get_f64("S-Lens RF Level:")
1512 }
1513
1514 /// Orbitrap / analyzer temperature (°C).
1515 pub fn analyzer_temperature(&self) -> Option<f64> {
1516 self.0
1517 .get_f64("Analyzer Temperature:")
1518 .or_else(|| self.0.get_f32("Analyzer Temperature:").map(f64::from))
1519 }
1520
1521 /// API (spray) source voltage (V).
1522 pub fn spray_voltage(&self) -> Option<f64> {
1523 self.0
1524 .get_f64("Spray Voltage (V):")
1525 .or_else(|| self.0.get_f64("Spray Voltage:"))
1526 .or_else(|| self.0.get_f32("Spray Voltage:").map(f64::from))
1527 }
1528
1529 /// Lock mass reference correction (ppm).
1530 pub fn lock_mass_correction_ppm(&self) -> Option<f64> {
1531 self.0
1532 .get_f64("LM Correction (ppm):")
1533 .or_else(|| self.0.get_f64("LM m/z-Correction (ppm):"))
1534 }
1535
1536 /// Capillary temperature (°C).
1537 pub fn capillary_temperature(&self) -> Option<f64> {
1538 self.0
1539 .get_f64("Capillary Temp (°C):")
1540 .or_else(|| self.0.get_f64("Capillary Temp:"))
1541 .or_else(|| self.0.get_f32("Capillary Temp:").map(f64::from))
1542 }
1543
1544 /// Number of lock masses found.
1545 pub fn number_of_lock_masses(&self) -> Option<i32> {
1546 self.0
1547 .get_i32("Number of LM Found:")
1548 .or_else(|| self.0.get_i32("Number of Lock Masses:"))
1549 }
1550
1551 /// Get any field by name (pass-through to the underlying record).
1552 pub fn get(&self, label: &str) -> Option<&GenericValue> {
1553 self.0.get(label)
1554 }
1555
1556 /// Get a float64 field by name.
1557 pub fn get_f64(&self, label: &str) -> Option<f64> {
1558 self.0.get_f64(label)
1559 }
1560
1561 /// Get an int32 field by name.
1562 pub fn get_i32(&self, label: &str) -> Option<i32> {
1563 self.0.get_i32(label)
1564 }
1565
1566 /// Get a string field by name.
1567 pub fn get_string(&self, label: &str) -> Option<&str> {
1568 self.0.get_string(label)
1569 }
1570}