1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
use crate::error::Result;
use crate::reader::BinaryReader;
use std::io::{Read, Seek};
/// Precursor reaction info for MS2+ scans (32 bytes).
#[derive(Debug)]
pub struct Reaction {
pub precursor_mz: f64,
pub unknown_double: f64,
pub energy: f64,
pub unknown_long1: u32,
pub unknown_long2: u32,
}
/// M/z acquisition range.
#[derive(Debug)]
pub struct FractionCollector {
pub low_mz: f64,
pub high_mz: f64,
}
/// Scan event preamble — byte array encoding scan parameters.
#[derive(Debug)]
pub struct ScanEventPreamble {
pub bytes: Vec<u8>,
}
/// Complete scan event.
#[derive(Debug)]
pub struct ScanEvent {
pub preamble: ScanEventPreamble,
pub reactions: Vec<Reaction>,
pub fraction_collectors: Vec<FractionCollector>,
pub coefficients: Vec<f64>,
}
impl ScanEventPreamble {
pub(crate) fn size_for_version(version: u32) -> usize {
match version {
0..=8 => 41,
57 | 60 => 80,
62 => 120,
63 | 64 => 128,
_ => 136, // v66+
}
}
/// Polarity: byte 4.
pub fn polarity(&self) -> Option<crate::Polarity> {
self.bytes
.get(4)
.and_then(|&b| crate::Polarity::from_byte(b))
}
/// Scan mode (centroid/profile): byte 5.
pub fn scan_mode(&self) -> Option<crate::ScanMode> {
self.bytes
.get(5)
.and_then(|&b| crate::ScanMode::from_byte(b))
}
/// MS power: byte 6.
pub fn ms_power(&self) -> Option<crate::MsPower> {
self.bytes
.get(6)
.and_then(|&b| crate::MsPower::from_byte(b))
}
/// Scan type: byte 7.
pub fn scan_type(&self) -> Option<crate::ScanType> {
self.bytes
.get(7)
.and_then(|&b| crate::ScanType::from_byte(b))
}
/// Dependent scan flag: byte 10.
pub fn is_dependent(&self) -> bool {
self.bytes.get(10).copied() == Some(1)
}
/// True if this scan is a Data-Independent Acquisition (DIA) MS2+ scan:
/// ms_power >= 2 and the dependent flag is NOT set. In DIA mode the
/// instrument selects a wide isolation window and fragments all ions in
/// that window together, without targeting a specific precursor.
pub fn is_dia(&self) -> bool {
let ms_power = self.bytes.get(6).copied().unwrap_or(0);
ms_power >= 2 && !self.is_dependent()
}
/// Ionization mode: byte 11.
pub fn ionization(&self) -> Option<crate::Ionization> {
self.bytes
.get(11)
.and_then(|&b| crate::Ionization::from_byte(b))
}
/// Activation method: byte 24.
pub fn activation(&self) -> Option<crate::Activation> {
self.bytes
.get(24)
.and_then(|&b| crate::Activation::from_byte(b))
}
/// Wideband (broadband isolation) flag: byte 32.
pub fn is_wideband(&self) -> bool {
self.bytes.get(32).copied() == Some(1)
}
/// Analyzer type: byte 40.
pub fn analyzer(&self) -> Option<crate::Analyzer> {
self.bytes
.get(40)
.and_then(|&b| crate::Analyzer::from_byte(b))
}
/// Raw value of the activation byte (byte 24). Useful for diagnostics when
/// `activation()` returns `None` (unrecognised code).
pub fn activation_byte(&self) -> u8 {
self.bytes.get(24).copied().unwrap_or(0)
}
}
impl ScanEvent {
/// Read one scan event.
///
/// For v66 files, `body_primary` is the body size for primary (MS1) scans
/// and `body_dependent` is the body size for dependent (MS2+) scans.
/// For uniform-event files these two values are identical.
/// Pass `(0, 0)` for pre-v66 files (body size is self-describing).
pub(crate) fn read<R: Read + Seek>(
r: &mut BinaryReader<R>,
version: u32,
body_primary: usize,
body_dependent: usize,
) -> Result<Self> {
let preamble_size = ScanEventPreamble::size_for_version(version);
let preamble_bytes = r.read_bytes(preamble_size)?;
let preamble = ScanEventPreamble {
bytes: preamble_bytes,
};
if version >= 66 {
// Select body size: primary (MS1) vs dependent (MS2+).
// Primary = ms_power <= Ms1 AND not dependent.
let is_primary = preamble.bytes.get(6).copied().unwrap_or(0) <= 1
&& preamble.bytes.get(10).copied() != Some(1);
let body_size = if is_primary {
body_primary
} else {
body_dependent
};
// Tribrid instruments (Eclipse, Fusion Lumos) use variable-length events
// detected when body_primary != body_dependent. Dependent events on
// these instruments use a different body layout than QExactive/Exploris.
let is_tribrid_dep = body_primary != body_dependent && !is_primary;
Self::read_v66(r, preamble, body_size, is_tribrid_dep)
} else {
Self::read_pre_v66(r, preamble)
}
}
/// V66 scan events have a fixed-size body (size determined by the caller
/// from the stream's address-space: body_size = event_size - preamble_size).
///
/// Two body layouts are in use across instrument families:
///
/// **QExactive / Exploris / uniform-event files** (body_primary == body_dependent):
/// body[0..4]: u32 unknown_long[0] (always 1)
/// body[4..8]: u32 flags (0 for MS1, 0xA0000000 for MS2)
/// body[8..64]: opaque fields (precursor-related for MS2, range aux for MS1)
/// body[fc_off..fc_off+16]: FractionCollector (scan window) at body_size-64
///
/// **Tribrid dependent events** (Eclipse, Fusion Lumos; is_tribrid_dep=true):
/// body[0..4]: u32 n_reactions (0 for MS1, 1 for HCD, 2 for EThcD)
/// body[4..]: n_reactions * 32-byte Reaction records
/// body[body_size-88..body_size-72]: FractionCollector (scan window)
///
/// In both cases nparam + coefficients live at body[body_size-64..].
fn read_v66<R: Read + Seek>(
r: &mut BinaryReader<R>,
preamble: ScanEventPreamble,
body_size: usize,
is_tribrid_dep: bool,
) -> Result<Self> {
let body = r.read_bytes(body_size)?;
// FractionCollector (scan window) location varies by instrument family:
// - Q Exactive / Exploris / Astral (body_size ≥ 136): offset 64
// - Orbitrap Elite / Fusion / Fusion Lumos / Velos Pro (body_size=96):
// MS1 → offset 8, MS2 → offset 64
// - Orbitrap Ascend (body_size=152) MS2 → offset 128
// - LTQ ion-trap only files (body_size < 96): offset 8
// Empirically verified across a 24-file multi-instrument corpus.
//
// Strategy: try a small list of candidate offsets in priority order
// and accept the first that yields a plausible m/z window. This is
// robust across the observed zoo of body layouts.
// Tribrid dependent events (Eclipse, Fusion Lumos) store the FractionCollector
// at body[body_size-88] = body[120] for a 208-byte body. All other v66
// instruments use one of the legacy candidate offsets.
let tribrid_fc = body_size.saturating_sub(88);
let tribrid_candidates;
let legacy_candidates;
let candidates: &[usize] = if is_tribrid_dep {
tribrid_candidates = [tribrid_fc, 8usize, 64, 128];
&tribrid_candidates
} else {
legacy_candidates = [64usize, 8, 128, body_size.saturating_sub(80)];
&legacy_candidates
};
let fraction_collectors = candidates
.iter()
.copied()
.find_map(|off| {
if off + 16 > body_size {
return None;
}
let low_mz = f64::from_le_bytes(body[off..off + 8].try_into().unwrap());
let high_mz = f64::from_le_bytes(body[off + 8..off + 16].try_into().unwrap());
// A valid scan window must be finite, monotonic, and within
// physically realistic m/z bounds (instruments top out well
// below 1e5 m/z). Accept lo == hi as well because some
// SIM / tSIM scans use a single-point window.
if low_mz.is_finite()
&& high_mz.is_finite()
&& low_mz >= 0.1
&& low_mz <= high_mz
&& high_mz <= 50_000.0
{
Some(vec![FractionCollector { low_mz, high_mz }])
} else {
None
}
})
.unwrap_or_default();
// nparam + coefficients live at a fixed offset from the end of the body
// (body_size - 64). This is independent of the FC location and is
// consistent across all v66 instruments in the corpus.
let np_off = body_size.saturating_sub(64);
let mut coefficients = Vec::new();
if np_off + 4 <= body_size {
let nparam_raw =
u32::from_le_bytes(body[np_off..np_off + 4].try_into().unwrap()) as usize;
// Cap nparam at the number of f64s that actually fit in the remaining body.
// Without this cap, a garbage nparam (e.g. 0xFFFFFFFF from uninitialised
// bytes) causes billions of loop iterations just to evaluate the guard.
let max_nparam = (body_size.saturating_sub(np_off + 4)) / 8;
let nparam = nparam_raw.min(max_nparam);
for i in 0..nparam {
let off = np_off + 4 + i * 8;
coefficients.push(f64::from_le_bytes(body[off..off + 8].try_into().unwrap()));
}
}
// Parse precursor reactions from the v66 body for dependent scans and for
// non-dependent MS2+ scans (DIA mode). In DIA, MS2 scans are not flagged as
// dependent but still carry one or more isolation window reactions in the body.
//
// Condition: parse reactions when ms_power >= 2 OR the scan is flagged dependent.
// (MS1 primary scans with ms_power <= 1 and dependent=false are skipped.)
let is_ms2_plus = preamble.bytes.get(6).copied().unwrap_or(0) >= 2;
let reactions = if (!is_ms2_plus && !preamble.is_dependent()) || body_size < 8 {
Vec::new()
} else if is_tribrid_dep {
// Tribrid dependent events (Eclipse, Fusion Lumos): n_reactions is stored at
// body[0..4] and each 32-byte Reaction record begins at body[4].
// The second reaction (if any) is typically a zero-mz supplemental step.
let np = if body_size >= 4 {
u32::from_le_bytes(body[0..4].try_into().unwrap()) as usize
} else {
0
};
let rxn_start = 4usize;
let max_np = body_size.saturating_sub(rxn_start + 32) / 32;
if np == 0 || np > max_np.max(1) {
Vec::new()
} else {
let mut rxs = Vec::with_capacity(np);
for i in 0..np {
let off = rxn_start + i * 32;
if off + 32 > body_size {
break;
}
let mz = f64::from_le_bytes(body[off..off + 8].try_into().unwrap());
let unk = f64::from_le_bytes(body[off + 8..off + 16].try_into().unwrap());
let energy = f64::from_le_bytes(body[off + 16..off + 24].try_into().unwrap());
let ul1 = u32::from_le_bytes(body[off + 24..off + 28].try_into().unwrap());
let ul2 = u32::from_le_bytes(body[off + 28..off + 32].try_into().unwrap());
if mz.is_finite() && mz >= 0.0 {
rxs.push(Reaction {
precursor_mz: mz,
unknown_double: unk,
energy,
unknown_long1: ul1,
unknown_long2: ul2,
});
}
}
rxs
}
} else {
let np = u32::from_le_bytes(body[4..8].try_into().unwrap()) as usize;
// Sanity check: np must fit within the body minus minimum fixed overhead.
// Each reaction is 32 bytes; require at least 32 bytes of post-reaction
// data (FC=16, nparam=4, minimum tail) for the body to be plausible.
let max_np = body_size.saturating_sub(8 + 32) / 32;
if np == 0 || np > max_np.max(1) {
Vec::new()
} else {
let mut rxs = Vec::with_capacity(np);
for i in 0..np {
let off = 8 + i * 32;
if off + 32 > body_size {
break;
}
let mz = f64::from_le_bytes(body[off..off + 8].try_into().unwrap());
let unk = f64::from_le_bytes(body[off + 8..off + 16].try_into().unwrap());
let energy = f64::from_le_bytes(body[off + 16..off + 24].try_into().unwrap());
let ul1 = u32::from_le_bytes(body[off + 24..off + 28].try_into().unwrap());
let ul2 = u32::from_le_bytes(body[off + 28..off + 32].try_into().unwrap());
// Accept only reactions with plausible m/z values (0 is valid for
// MS1 triggers; accept non-negative finite values).
if mz.is_finite() && mz >= 0.0 {
rxs.push(Reaction {
precursor_mz: mz,
unknown_double: unk,
energy,
unknown_long1: ul1,
unknown_long2: ul2,
});
}
}
rxs
}
};
Ok(Self {
preamble,
reactions,
fraction_collectors,
coefficients,
})
}
fn read_pre_v66<R: Read + Seek>(
r: &mut BinaryReader<R>,
preamble: ScanEventPreamble,
) -> Result<Self> {
let np = r.read_u32()?;
let mut reactions = Vec::new();
for _ in 0..np {
reactions.push(Reaction::read(r)?);
}
let _unk1 = r.read_u32()?;
let fc = FractionCollector::read(r)?;
let nparam = r.read_u32()?;
let mut coefficients = Vec::with_capacity(nparam as usize);
for _ in 0..nparam {
coefficients.push(r.read_f64()?);
}
let _unk2 = r.read_u32()?;
let _unk3 = r.read_u32()?;
Ok(Self {
preamble,
reactions,
fraction_collectors: vec![fc],
coefficients,
})
}
}
impl Reaction {
fn read<R: Read + Seek>(r: &mut BinaryReader<R>) -> Result<Self> {
let precursor_mz = r.read_f64()?;
let unknown_double = r.read_f64()?;
let energy = r.read_f64()?;
let unknown_long1 = r.read_u32()?;
let unknown_long2 = r.read_u32()?;
Ok(Self {
precursor_mz,
unknown_double,
energy,
unknown_long1,
unknown_long2,
})
}
}
impl FractionCollector {
fn read<R: Read + Seek>(r: &mut BinaryReader<R>) -> Result<Self> {
let low_mz = r.read_f64()?;
let high_mz = r.read_f64()?;
Ok(Self { low_mz, high_mz })
}
}