Skip to main content

oxiphysics_io/spectroscopy_io/
types.rs

1//! Auto-generated module
2//!
3//! ๐Ÿค– Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4
5#![allow(clippy::needless_range_loop)]
6use std::collections::HashMap;
7use std::io::{BufRead, BufReader, Read, Write};
8
9use super::functions::ANIF_MAGIC;
10#[allow(unused_imports)]
11use super::functions::*;
12
13/// Result of a peak-finding operation.
14#[derive(Debug, Clone)]
15pub struct PeakResult {
16    /// Index in the spectrum array.
17    pub index: usize,
18    /// X position of the peak.
19    pub x: f64,
20    /// Y (intensity) of the peak.
21    pub y: f64,
22    /// Estimated full-width at half-maximum (in X units).
23    pub fwhm: f64,
24}
25/// Endianness of the ANIF file.
26#[derive(Debug, Clone, Copy, PartialEq)]
27pub enum AnifEndian {
28    /// Little-endian byte order.
29    Little,
30    /// Big-endian byte order.
31    Big,
32}
33/// In-memory spectral database with similarity search.
34///
35/// Spectra are resampled to a common X grid before comparison.
36#[derive(Debug, Default)]
37pub struct SpectralDatabase {
38    pub(super) entries: Vec<DatabaseEntry>,
39    pub(super) next_id: usize,
40}
41impl SpectralDatabase {
42    /// Create an empty database.
43    pub fn new() -> Self {
44        Self::default()
45    }
46    /// Insert a spectrum and return its assigned ID.
47    pub fn insert(&mut self, record: SpectrumRecord) -> usize {
48        let id = self.next_id;
49        self.entries.push(DatabaseEntry { id, record });
50        self.next_id += 1;
51        id
52    }
53    /// Retrieve a spectrum by ID.
54    pub fn get(&self, id: usize) -> Option<&SpectrumRecord> {
55        self.entries.iter().find(|e| e.id == id).map(|e| &e.record)
56    }
57    /// Remove a spectrum by ID. Returns `true` if it was present.
58    pub fn remove(&mut self, id: usize) -> bool {
59        let before = self.entries.len();
60        self.entries.retain(|e| e.id != id);
61        self.entries.len() < before
62    }
63    /// Total number of stored spectra.
64    pub fn len(&self) -> usize {
65        self.entries.len()
66    }
67    /// Return `true` if the database is empty.
68    pub fn is_empty(&self) -> bool {
69        self.entries.is_empty()
70    }
71    /// Search for the `top_k` most similar spectra to `query`.
72    ///
73    /// Spectra are compared over their raw Y arrays (resampled if needed via
74    /// `resample_to`). If the arrays differ in length, the shorter is zero-padded.
75    pub fn search(
76        &self,
77        query: &SpectrumRecord,
78        metric: SimilarityMetric,
79        top_k: usize,
80    ) -> Vec<SearchResult> {
81        let mut scores: Vec<(usize, &str, f64)> = self
82            .entries
83            .iter()
84            .map(|e| {
85                let score = Self::compute_similarity(query, &e.record, metric);
86                (e.id, e.record.metadata.title.as_str(), score)
87            })
88            .collect();
89        match metric {
90            SimilarityMetric::Euclidean => {
91                scores.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
92            }
93            _ => {
94                scores.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
95            }
96        }
97        scores
98            .into_iter()
99            .take(top_k)
100            .map(|(id, title, score)| SearchResult {
101                id,
102                title: title.to_string(),
103                score,
104            })
105            .collect()
106    }
107    /// Compute similarity between two spectra.
108    fn compute_similarity(a: &SpectrumRecord, b: &SpectrumRecord, metric: SimilarityMetric) -> f64 {
109        let len = a.y.len().min(b.y.len());
110        if len == 0 {
111            return 0.0;
112        }
113        let ya: &[f64] = &a.y[..len];
114        let yb: &[f64] = &b.y[..len];
115        match metric {
116            SimilarityMetric::DotProduct => {
117                let dot: f64 = ya.iter().zip(yb).map(|(x, y)| x * y).sum();
118                let na: f64 = ya.iter().map(|v| v * v).sum::<f64>().sqrt();
119                let nb: f64 = yb.iter().map(|v| v * v).sum::<f64>().sqrt();
120                if na > 0.0 && nb > 0.0 {
121                    dot / (na * nb)
122                } else {
123                    0.0
124                }
125            }
126            SimilarityMetric::SpectralAngleMapper => {
127                let dot: f64 = ya.iter().zip(yb).map(|(x, y)| x * y).sum();
128                let na: f64 = ya.iter().map(|v| v * v).sum::<f64>().sqrt();
129                let nb: f64 = yb.iter().map(|v| v * v).sum::<f64>().sqrt();
130                if na > 0.0 && nb > 0.0 {
131                    let cos = (dot / (na * nb)).clamp(-1.0, 1.0);
132                    std::f64::consts::PI / 2.0 - cos.acos()
133                } else {
134                    0.0
135                }
136            }
137            SimilarityMetric::Euclidean => ya
138                .iter()
139                .zip(yb)
140                .map(|(x, y)| (x - y).powi(2))
141                .sum::<f64>()
142                .sqrt(),
143            SimilarityMetric::Pearson => {
144                let mean_a = ya.iter().sum::<f64>() / len as f64;
145                let mean_b = yb.iter().sum::<f64>() / len as f64;
146                let num: f64 = ya
147                    .iter()
148                    .zip(yb)
149                    .map(|(x, y)| (x - mean_a) * (y - mean_b))
150                    .sum();
151                let sa: f64 = ya.iter().map(|x| (x - mean_a).powi(2)).sum::<f64>().sqrt();
152                let sb: f64 = yb.iter().map(|y| (y - mean_b).powi(2)).sum::<f64>().sqrt();
153                if sa > 0.0 && sb > 0.0 {
154                    num / (sa * sb)
155                } else {
156                    0.0
157                }
158            }
159        }
160    }
161    /// Iterate over all entries.
162    pub fn iter(&self) -> impl Iterator<Item = &DatabaseEntry> {
163        self.entries.iter()
164    }
165}
166/// Metadata associated with a [`SpectrumRecord`].
167#[derive(Debug, Clone, Default)]
168pub struct SpectrumMetadata {
169    /// Title or name of the compound / measurement.
170    pub title: String,
171    /// Source technique that produced the spectrum.
172    pub source: SpectrumSource,
173    /// Date string of the measurement (ISO-8601 recommended).
174    pub date: String,
175    /// Instrument identifier / model.
176    pub instrument: String,
177    /// X-axis label (e.g. "Wavenumber (cm-1)").
178    pub x_label: String,
179    /// Y-axis label (e.g. "Absorbance").
180    pub y_label: String,
181    /// CAS Registry Number of the compound, if known.
182    pub cas_number: String,
183    /// Molecular formula string.
184    pub molecular_formula: String,
185    /// Molecular weight in g/mol, or NaN if unknown.
186    pub molecular_weight: f64,
187    /// Arbitrary additional key/value pairs from the file header.
188    pub extra: HashMap<String, String>,
189}
190/// Spectral processing and analysis utilities.
191pub struct SpectralAnalysis;
192impl SpectralAnalysis {
193    /// Find peaks in the Y array using a simple local-maximum criterion.
194    ///
195    /// A point is a peak if it is strictly greater than its `window` neighbours
196    /// on each side and above the given `threshold`.
197    pub fn find_peaks(record: &SpectrumRecord, window: usize, threshold: f64) -> Vec<PeakResult> {
198        let n = record.y.len();
199        let half = window.max(1);
200        let mut peaks = Vec::new();
201        for i in half..n.saturating_sub(half) {
202            let y_i = record.y[i];
203            if y_i < threshold {
204                continue;
205            }
206            let is_max = (1..=half).all(|k| y_i > record.y[i - k] && y_i > record.y[i + k]);
207            if is_max {
208                let fwhm = Self::estimate_fwhm(record, i);
209                peaks.push(PeakResult {
210                    index: i,
211                    x: record.x[i],
212                    y: y_i,
213                    fwhm,
214                });
215            }
216        }
217        peaks
218    }
219    /// Estimate FWHM for a peak at index `peak_idx` by walking outward until
220    /// the intensity drops below half-maximum.
221    fn estimate_fwhm(record: &SpectrumRecord, peak_idx: usize) -> f64 {
222        let half_max = record.y[peak_idx] * 0.5;
223        let n = record.y.len();
224        let mut left_x = record.x[peak_idx];
225        for i in (0..peak_idx).rev() {
226            if record.y[i] <= half_max {
227                if i + 1 < n {
228                    let y0 = record.y[i];
229                    let y1 = record.y[i + 1];
230                    let x0 = record.x[i];
231                    let x1 = record.x[i + 1];
232                    if (y1 - y0).abs() > 1e-15 {
233                        left_x = x0 + (half_max - y0) * (x1 - x0) / (y1 - y0);
234                    }
235                }
236                break;
237            }
238        }
239        let mut right_x = record.x[peak_idx];
240        for i in peak_idx + 1..n {
241            if record.y[i] <= half_max {
242                if i > 0 {
243                    let y0 = record.y[i - 1];
244                    let y1 = record.y[i];
245                    let x0 = record.x[i - 1];
246                    let x1 = record.x[i];
247                    if (y1 - y0).abs() > 1e-15 {
248                        right_x = x0 + (half_max - y0) * (x1 - x0) / (y1 - y0);
249                    }
250                }
251                break;
252            }
253        }
254        (right_x - left_x).abs()
255    }
256    /// Asymmetric Least Squares (ALS) baseline correction.
257    ///
258    /// Returns the corrected Y array (input Y minus estimated baseline).
259    ///
260    /// # Parameters
261    /// - `y` โ€” input intensity array.
262    /// - `lam` โ€” smoothing parameter (typically 1e4 โ€“ 1e7).
263    /// - `p` โ€” asymmetry parameter (typically 0.001 โ€“ 0.1).
264    /// - `iterations` โ€” number of ALS iterations (typically 10โ€“30).
265    pub fn als_baseline(y: &[f64], lam: f64, p: f64, iterations: usize) -> Vec<f64> {
266        let n = y.len();
267        if n < 3 {
268            return y.to_vec();
269        }
270        let mut weights = vec![1.0_f64; n];
271        let mut baseline = y.to_vec();
272        for _iter in 0..iterations {
273            let mut diag = vec![0.0_f64; n];
274            let mut sub = vec![0.0_f64; n - 1];
275            let mut sup = vec![0.0_f64; n - 1];
276            diag[..n].copy_from_slice(&weights[..n]);
277            for i in 0..n - 1 {
278                diag[i] += lam;
279                diag[i + 1] += lam;
280                sub[i] -= lam;
281                sup[i] -= lam;
282            }
283            let rhs: Vec<f64> = (0..n).map(|i| weights[i] * y[i]).collect();
284            baseline = Self::tridiagonal_solve(&diag, &sub, &sup, &rhs);
285            for i in 0..n {
286                if y[i] > baseline[i] {
287                    weights[i] = p;
288                } else {
289                    weights[i] = 1.0 - p;
290                }
291            }
292        }
293        y.iter()
294            .zip(baseline.iter())
295            .map(|(&yi, &bi)| yi - bi)
296            .collect()
297    }
298    /// Solve a tridiagonal system Ax = b using the Thomas algorithm.
299    /// `sub` is the sub-diagonal (length n-1), `sup` is the super-diagonal.
300    fn tridiagonal_solve(diag: &[f64], sub: &[f64], sup: &[f64], rhs: &[f64]) -> Vec<f64> {
301        let n = diag.len();
302        if n == 0 {
303            return Vec::new();
304        }
305        let mut c_prime = vec![0.0; n - 1];
306        let mut d_prime = vec![0.0; n];
307        let mut x = vec![0.0; n];
308        c_prime[0] = if diag[0].abs() > 1e-15 {
309            sup[0] / diag[0]
310        } else {
311            0.0
312        };
313        d_prime[0] = if diag[0].abs() > 1e-15 {
314            rhs[0] / diag[0]
315        } else {
316            0.0
317        };
318        for i in 1..n {
319            let denom = diag[i]
320                - if i > 0 {
321                    sub[i - 1] * c_prime[i - 1]
322                } else {
323                    0.0
324                };
325            if denom.abs() < 1e-15 {
326                d_prime[i] = 0.0;
327                if i < n - 1 {
328                    c_prime[i] = 0.0;
329                }
330            } else {
331                d_prime[i] = (rhs[i] - sub[i - 1] * d_prime[i - 1]) / denom;
332                if i < n - 1 {
333                    c_prime[i] = sup[i] / denom;
334                }
335            }
336        }
337        x[n - 1] = d_prime[n - 1];
338        for i in (0..n - 1).rev() {
339            x[i] = d_prime[i] - c_prime[i] * x[i + 1];
340        }
341        x
342    }
343    /// Savitzky-Golay smoothing filter applied to Y values.
344    ///
345    /// Uses a 2nd-order polynomial over the given window.
346    /// Returns the smoothed Y array (same length as input).
347    pub fn savitzky_golay(y: &[f64], window: SgWindow) -> Vec<f64> {
348        let n = y.len();
349        let half = window.half();
350        if n <= 2 * half {
351            return y.to_vec();
352        }
353        let coeffs = Self::sg_coefficients(half);
354        let mut out = y.to_vec();
355        for i in half..n - half {
356            let mut val = 0.0;
357            for (k, &c) in coeffs.iter().enumerate() {
358                val += c * y[i + k - half];
359            }
360            out[i] = val;
361        }
362        out
363    }
364    /// Compute Savitzky-Golay convolution coefficients for a 2nd-order polynomial
365    /// and given half-window size.
366    fn sg_coefficients(half: usize) -> Vec<f64> {
367        let m = 2 * half + 1;
368        let h = half as f64;
369        let norm = (2.0 * h + 1.0) * (2.0 * h + 3.0) * (2.0 * h - 1.0) / 3.0;
370        let mut coeffs = Vec::with_capacity(m);
371        for j in -(half as i64)..=(half as i64) {
372            let c = (3.0 * h * (h + 1.0) - 1.0 - 5.0 * j as f64 * j as f64) / norm;
373            coeffs.push(c);
374        }
375        coeffs
376    }
377    /// Simple moving-average smoothing.
378    ///
379    /// The output at index `i` is the average of `y[i-half..=i+half]`.
380    /// Boundary points are left unchanged.
381    pub fn moving_average(y: &[f64], half_window: usize) -> Vec<f64> {
382        let n = y.len();
383        let mut out = y.to_vec();
384        for i in half_window..n - half_window {
385            let sum: f64 = y[i - half_window..=i + half_window].iter().sum();
386            out[i] = sum / (2 * half_window + 1) as f64;
387        }
388        out
389    }
390    /// Compute the first derivative of Y with respect to X using central differences.
391    ///
392    /// Boundary points use one-sided differences.
393    pub fn derivative(record: &SpectrumRecord) -> Vec<f64> {
394        let n = record.y.len();
395        if n < 2 {
396            return vec![0.0; n];
397        }
398        let mut deriv = vec![0.0; n];
399        for i in 1..n - 1 {
400            let dx = record.x[i + 1] - record.x[i - 1];
401            if dx.abs() > 1e-15 {
402                deriv[i] = (record.y[i + 1] - record.y[i - 1]) / dx;
403            }
404        }
405        if n >= 2 {
406            let dx = record.x[1] - record.x[0];
407            if dx.abs() > 1e-15 {
408                deriv[0] = (record.y[1] - record.y[0]) / dx;
409            }
410            let dx_end = record.x[n - 1] - record.x[n - 2];
411            if dx_end.abs() > 1e-15 {
412                deriv[n - 1] = (record.y[n - 1] - record.y[n - 2]) / dx_end;
413            }
414        }
415        deriv
416    }
417    /// Integrate the spectrum using the trapezoidal rule between `x_start` and `x_end`.
418    pub fn integrate(record: &SpectrumRecord, x_start: f64, x_end: f64) -> f64 {
419        let mut integral = 0.0;
420        let n = record.x.len();
421        for i in 1..n {
422            let x0 = record.x[i - 1];
423            let x1 = record.x[i];
424            if x0 >= x_end || x1 <= x_start {
425                continue;
426            }
427            let xa = x0.max(x_start);
428            let xb = x1.min(x_end);
429            let t0 = (xa - x0) / (x1 - x0);
430            let t1 = (xb - x0) / (x1 - x0);
431            let ya = record.y[i - 1] + t0 * (record.y[i] - record.y[i - 1]);
432            let yb = record.y[i - 1] + t1 * (record.y[i] - record.y[i - 1]);
433            integral += 0.5 * (ya + yb) * (xb - xa);
434        }
435        integral
436    }
437}
438/// A spectral record holding X (wavenumber / frequency / m/z) and Y (intensity)
439/// arrays together with rich metadata.
440#[derive(Debug, Clone, Default)]
441pub struct SpectrumRecord {
442    /// X-axis values (wavenumber in cmโปยน, frequency in Hz, m/z, โ€ฆ).
443    pub x: Vec<f64>,
444    /// Intensity / absorbance / transmittance values.
445    pub y: Vec<f64>,
446    /// Associated metadata.
447    pub metadata: SpectrumMetadata,
448}
449impl SpectrumRecord {
450    /// Create an empty `SpectrumRecord`.
451    pub fn new() -> Self {
452        Self::default()
453    }
454    /// Construct from raw arrays and a metadata object.
455    pub fn from_arrays(x: Vec<f64>, y: Vec<f64>, metadata: SpectrumMetadata) -> Self {
456        Self { x, y, metadata }
457    }
458    /// Return the number of data points.
459    pub fn len(&self) -> usize {
460        self.x.len()
461    }
462    /// Return `true` if the record contains no data points.
463    pub fn is_empty(&self) -> bool {
464        self.x.is_empty()
465    }
466    /// Return the minimum X value, or `f64::NAN` if empty.
467    pub fn x_min(&self) -> f64 {
468        self.x.iter().cloned().fold(f64::NAN, f64::min)
469    }
470    /// Return the maximum X value, or `f64::NAN` if empty.
471    pub fn x_max(&self) -> f64 {
472        self.x.iter().cloned().fold(f64::NAN, f64::max)
473    }
474    /// Return the maximum intensity value, or `f64::NAN` if empty.
475    pub fn y_max(&self) -> f64 {
476        self.y.iter().cloned().fold(f64::NAN, f64::max)
477    }
478    /// Return the minimum intensity value, or `f64::NAN` if empty.
479    pub fn y_min(&self) -> f64 {
480        self.y.iter().cloned().fold(f64::NAN, f64::min)
481    }
482    /// Normalize intensity so the maximum equals 1.0.
483    /// No-op if the record is empty or has zero maximum.
484    pub fn normalize(&mut self) {
485        let max_y = self.y_max();
486        if max_y > 0.0 && max_y.is_finite() {
487            for v in &mut self.y {
488                *v /= max_y;
489            }
490        }
491    }
492    /// Interpolate the intensity at an arbitrary X value using linear interpolation.
493    /// Returns `None` if the spectrum is empty or `x_val` is out of range.
494    pub fn interpolate_at(&self, x_val: f64) -> Option<f64> {
495        if self.x.is_empty() {
496            return None;
497        }
498        let idx = self.x.partition_point(|&v| v < x_val);
499        if idx == 0 {
500            if (self.x[0] - x_val).abs() < f64::EPSILON {
501                return Some(self.y[0]);
502            }
503            return None;
504        }
505        if idx >= self.x.len() {
506            let last = self.x.len() - 1;
507            if (self.x[last] - x_val).abs() < f64::EPSILON {
508                return Some(self.y[last]);
509            }
510            return None;
511        }
512        let x0 = self.x[idx - 1];
513        let x1 = self.x[idx];
514        let y0 = self.y[idx - 1];
515        let y1 = self.y[idx];
516        let t = (x_val - x0) / (x1 - x0);
517        Some(y0 + t * (y1 - y0))
518    }
519}
520/// Reader for the ANIF binary spectral format.
521///
522/// The format layout (little-endian or big-endian based on header flag):
523/// ```text
524/// [4] Magic "ANIF"
525/// [1] Major version
526/// [1] Minor version
527/// [1] Endian flag  (0 = LE, 1 = BE)
528/// [1] Reserved
529/// [4] u32  num_points
530/// [8] f64  x_start
531/// [8] f64  x_delta
532/// [64] title (UTF-8, null-padded)
533/// [32] instrument (UTF-8, null-padded)
534/// [16] date (UTF-8, null-padded)
535/// [num_points * 8] f64 Y values
536/// ```
537#[derive(Debug, Default)]
538pub struct AnifReader {
539    /// Parsed header.
540    pub header: AnifHeader,
541    /// Decoded spectrum record.
542    pub record: SpectrumRecord,
543}
544impl AnifReader {
545    /// Create a new, empty reader.
546    pub fn new() -> Self {
547        Self::default()
548    }
549    /// Parse an ANIF byte stream.
550    pub fn parse<R: Read>(&mut self, mut reader: R) -> crate::Result<()> {
551        let mut buf = Vec::new();
552        reader.read_to_end(&mut buf).map_err(crate::Error::Io)?;
553        self.parse_bytes(&buf)
554    }
555    /// Parse from a byte slice.
556    pub fn parse_bytes(&mut self, buf: &[u8]) -> crate::Result<()> {
557        if buf.len() < 4 || &buf[0..4] != ANIF_MAGIC {
558            return Err(crate::Error::Parse("Not an ANIF file (bad magic)".into()));
559        }
560        if buf.len() < 132 {
561            return Err(crate::Error::Parse("ANIF header too short".into()));
562        }
563        let major = buf[4];
564        let minor = buf[5];
565        let endian_flag = buf[6];
566        let endian = if endian_flag == 0 {
567            AnifEndian::Little
568        } else {
569            AnifEndian::Big
570        };
571        let read_u32 = |b: &[u8], off: usize| -> u32 {
572            let arr: [u8; 4] = b[off..off + 4].try_into().unwrap_or([0u8; 4]);
573            if endian == AnifEndian::Little {
574                u32::from_le_bytes(arr)
575            } else {
576                u32::from_be_bytes(arr)
577            }
578        };
579        let read_f64 = |b: &[u8], off: usize| -> f64 {
580            let arr: [u8; 8] = b[off..off + 8].try_into().unwrap_or([0u8; 8]);
581            if endian == AnifEndian::Little {
582                f64::from_le_bytes(arr)
583            } else {
584                f64::from_be_bytes(arr)
585            }
586        };
587        let read_str = |b: &[u8], off: usize, len: usize| -> String {
588            let slice = &b[off..off + len];
589            let end = slice.iter().position(|&c| c == 0).unwrap_or(len);
590            String::from_utf8_lossy(&slice[..end]).into_owned()
591        };
592        let num_points = read_u32(buf, 8);
593        let x_start = read_f64(buf, 12);
594        let x_delta = read_f64(buf, 20);
595        let title = read_str(buf, 28, 64);
596        let instrument = read_str(buf, 92, 32);
597        let date = read_str(buf, 124, 16);
598        self.header = AnifHeader {
599            version: (major, minor),
600            num_points,
601            x_start,
602            x_delta,
603            endian,
604            title: title.clone(),
605            instrument: instrument.clone(),
606            date: date.clone(),
607        };
608        let data_offset = 140usize;
609        let expected_len = data_offset + num_points as usize * 8;
610        if buf.len() < expected_len {
611            return Err(crate::Error::Parse(format!(
612                "ANIF data block too short: expected {} bytes, got {}",
613                expected_len,
614                buf.len()
615            )));
616        }
617        let mut x_vals = Vec::with_capacity(num_points as usize);
618        let mut y_vals = Vec::with_capacity(num_points as usize);
619        for i in 0..num_points as usize {
620            let off = data_offset + i * 8;
621            x_vals.push(x_start + x_delta * i as f64);
622            y_vals.push(read_f64(buf, off));
623        }
624        let meta = SpectrumMetadata {
625            title,
626            instrument,
627            date,
628            ..Default::default()
629        };
630        self.record = SpectrumRecord::from_arrays(x_vals, y_vals, meta);
631        Ok(())
632    }
633    /// Encode a `SpectrumRecord` to ANIF bytes (little-endian).
634    pub fn encode(record: &SpectrumRecord) -> Vec<u8> {
635        let n = record.len();
636        let mut buf = Vec::with_capacity(140 + n * 8);
637        buf.extend_from_slice(ANIF_MAGIC);
638        buf.push(1);
639        buf.push(0);
640        buf.push(0);
641        buf.push(0);
642        buf.extend_from_slice(&(n as u32).to_le_bytes());
643        let x_start = if record.x.is_empty() {
644            0.0
645        } else {
646            record.x[0]
647        };
648        let x_delta = if record.x.len() >= 2 {
649            record.x[1] - record.x[0]
650        } else {
651            1.0
652        };
653        buf.extend_from_slice(&x_start.to_le_bytes());
654        buf.extend_from_slice(&x_delta.to_le_bytes());
655        let title_bytes = record.metadata.title.as_bytes();
656        let mut tmp = [0u8; 64];
657        let copy_len = title_bytes.len().min(63);
658        tmp[..copy_len].copy_from_slice(&title_bytes[..copy_len]);
659        buf.extend_from_slice(&tmp);
660        let instr_bytes = record.metadata.instrument.as_bytes();
661        let mut tmp2 = [0u8; 32];
662        let copy_len2 = instr_bytes.len().min(31);
663        tmp2[..copy_len2].copy_from_slice(&instr_bytes[..copy_len2]);
664        buf.extend_from_slice(&tmp2);
665        let date_bytes = record.metadata.date.as_bytes();
666        let mut tmp3 = [0u8; 16];
667        let copy_len3 = date_bytes.len().min(15);
668        tmp3[..copy_len3].copy_from_slice(&date_bytes[..copy_len3]);
669        buf.extend_from_slice(&tmp3);
670        for &yv in &record.y {
671            buf.extend_from_slice(&yv.to_le_bytes());
672        }
673        buf
674    }
675}
676/// Similarity metric for spectral search.
677#[derive(Debug, Clone, Copy, PartialEq)]
678pub enum SimilarityMetric {
679    /// Normalized dot product between intensity vectors.
680    DotProduct,
681    /// Spectral Angle Mapper (SAM) โ€” angle in radians between vectors.
682    SpectralAngleMapper,
683    /// Euclidean distance (smaller = more similar).
684    Euclidean,
685    /// Pearson correlation coefficient.
686    Pearson,
687}
688/// Result of a similarity search.
689#[derive(Debug, Clone)]
690pub struct SearchResult {
691    /// Database entry ID.
692    pub id: usize,
693    /// Title of the matching spectrum.
694    pub title: String,
695    /// Similarity score (interpretation depends on metric).
696    pub score: f64,
697}
698/// Writer for various spectral file formats.
699pub struct SpectralExport;
700impl SpectralExport {
701    /// Write a spectrum to a CSV file (`x,y` per line).
702    pub fn write_csv<W: Write>(record: &SpectrumRecord, writer: &mut W) -> crate::Result<()> {
703        writeln!(writer, "# {}", record.metadata.title).map_err(crate::Error::Io)?;
704        writeln!(writer, "x,y").map_err(crate::Error::Io)?;
705        for (&x, &y) in record.x.iter().zip(record.y.iter()) {
706            writeln!(writer, "{},{}", x, y).map_err(crate::Error::Io)?;
707        }
708        Ok(())
709    }
710    /// Write a spectrum in JCAMP-DX format.
711    pub fn write_jcamp_dx<W: Write>(record: &SpectrumRecord, writer: &mut W) -> crate::Result<()> {
712        let title = &record.metadata.title;
713        let n = record.len();
714        let x_start = record.x_min();
715        let x_end = record.x_max();
716        let y_min = record.y_min();
717        let y_max = record.y_max();
718        writeln!(writer, "##TITLE={}", title).map_err(crate::Error::Io)?;
719        writeln!(writer, "##JCAMP-DX=4.24").map_err(crate::Error::Io)?;
720        writeln!(writer, "##DATA TYPE={}", record.metadata.source).map_err(crate::Error::Io)?;
721        writeln!(writer, "##DATE={}", record.metadata.date).map_err(crate::Error::Io)?;
722        writeln!(writer, "##INSTRUMENT={}", record.metadata.instrument)
723            .map_err(crate::Error::Io)?;
724        writeln!(writer, "##XUNITS={}", record.metadata.x_label).map_err(crate::Error::Io)?;
725        writeln!(writer, "##YUNITS={}", record.metadata.y_label).map_err(crate::Error::Io)?;
726        writeln!(writer, "##NPOINTS={}", n).map_err(crate::Error::Io)?;
727        writeln!(writer, "##FIRSTX={}", x_start).map_err(crate::Error::Io)?;
728        writeln!(writer, "##LASTX={}", x_end).map_err(crate::Error::Io)?;
729        writeln!(writer, "##MINY={}", y_min).map_err(crate::Error::Io)?;
730        writeln!(writer, "##MAXY={}", y_max).map_err(crate::Error::Io)?;
731        writeln!(writer, "##XFACTOR=1.0").map_err(crate::Error::Io)?;
732        writeln!(writer, "##YFACTOR=1.0").map_err(crate::Error::Io)?;
733        writeln!(writer, "##XYDATA=(X++(Y..Y))").map_err(crate::Error::Io)?;
734        let chunk_size = 8usize;
735        let mut i = 0;
736        while i < n {
737            let x_prefix = record.x[i];
738            write!(writer, "{:.6}", x_prefix).map_err(crate::Error::Io)?;
739            let end = (i + chunk_size).min(n);
740            for j in i..end {
741                write!(writer, " {:.6}", record.y[j]).map_err(crate::Error::Io)?;
742            }
743            writeln!(writer).map_err(crate::Error::Io)?;
744            i = end;
745        }
746        writeln!(writer, "##END=").map_err(crate::Error::Io)?;
747        Ok(())
748    }
749    /// Write a spectrum as plain-text (space-separated x y columns).
750    pub fn write_plain_text<W: Write>(
751        record: &SpectrumRecord,
752        writer: &mut W,
753    ) -> crate::Result<()> {
754        writeln!(writer, "# Spectrum: {}", record.metadata.title).map_err(crate::Error::Io)?;
755        writeln!(writer, "# Instrument: {}", record.metadata.instrument)
756            .map_err(crate::Error::Io)?;
757        writeln!(writer, "# Date: {}", record.metadata.date).map_err(crate::Error::Io)?;
758        writeln!(writer, "# X_label: {}", record.metadata.x_label).map_err(crate::Error::Io)?;
759        writeln!(writer, "# Y_label: {}", record.metadata.y_label).map_err(crate::Error::Io)?;
760        for (&x, &y) in record.x.iter().zip(record.y.iter()) {
761            writeln!(writer, "{:.8e}  {:.8e}", x, y).map_err(crate::Error::Io)?;
762        }
763        Ok(())
764    }
765    /// Write multiple spectra to a single CSV with columns `x,y1,y2,...`.
766    ///
767    /// All records must share the same X grid. If they differ, the X from the
768    /// first record is used.
769    pub fn write_multi_csv<W: Write>(
770        records: &[SpectrumRecord],
771        writer: &mut W,
772    ) -> crate::Result<()> {
773        if records.is_empty() {
774            return Ok(());
775        }
776        write!(writer, "x").map_err(crate::Error::Io)?;
777        for r in records {
778            write!(writer, ",{}", r.metadata.title).map_err(crate::Error::Io)?;
779        }
780        writeln!(writer).map_err(crate::Error::Io)?;
781        let n = records[0].x.len();
782        for i in 0..n {
783            write!(writer, "{}", records[0].x[i]).map_err(crate::Error::Io)?;
784            for r in records {
785                if i < r.y.len() {
786                    write!(writer, ",{}", r.y[i]).map_err(crate::Error::Io)?;
787                } else {
788                    write!(writer, ",").map_err(crate::Error::Io)?;
789                }
790            }
791            writeln!(writer).map_err(crate::Error::Io)?;
792        }
793        Ok(())
794    }
795}
796/// Entry stored in the spectral database.
797#[derive(Debug, Clone)]
798pub struct DatabaseEntry {
799    /// Unique numeric key.
800    pub id: usize,
801    /// The stored spectrum.
802    pub record: SpectrumRecord,
803}
804/// Parser for JCAMP-DX spectral files (`.jdx` / `.dx`).
805///
806/// Supports the following JCAMP-DX features:
807/// - Single-block files with `##XYDATA=(X++(Y..Y))` data records.
808/// - Multi-block compound files (multiple `##TITLE` blocks).
809/// - `##NTUPLES` sections for multi-channel data.
810/// - `##DELTAX` and `##FIRSTX` / `##LASTX` for compressed X grids.
811/// - `##XFACTOR` / `##YFACTOR` scaling factors.
812#[derive(Debug, Default)]
813pub struct JcampDxReader {
814    /// All spectrum blocks parsed from the file.
815    pub records: Vec<SpectrumRecord>,
816}
817impl JcampDxReader {
818    /// Create a new, empty reader.
819    pub fn new() -> Self {
820        Self::default()
821    }
822    /// Parse JCAMP-DX content from a `Read` source.
823    pub fn parse<R: Read>(&mut self, reader: R) -> crate::Result<()> {
824        let buf = BufReader::new(reader);
825        let mut current = SpectrumRecord::new();
826        let mut section = JdxSection::Header;
827        let mut x_factor = 1.0_f64;
828        let mut y_factor = 1.0_f64;
829        let mut firstx = f64::NAN;
830        let mut deltax = f64::NAN;
831        let mut npoints: usize = 0;
832        let mut raw_y: Vec<f64> = Vec::new();
833        let mut in_block = false;
834        let mut ntuple_x: Vec<f64> = Vec::new();
835        let mut ntuple_y: Vec<f64> = Vec::new();
836        let mut ntuple_var_names: Vec<String> = Vec::new();
837        let mut ntuple_collecting = false;
838        for line_res in buf.lines() {
839            let line = line_res.map_err(crate::Error::Io)?;
840            let trimmed = line.trim();
841            if trimmed.is_empty() || trimmed.starts_with("$$") {
842                continue;
843            }
844            if let Some(content) = trimmed.strip_prefix("##") {
845                let eq_pos = content.find('=');
846                let (label, value) = if let Some(pos) = eq_pos {
847                    (
848                        content[..pos].trim().to_uppercase(),
849                        content[pos + 1..].trim().to_string(),
850                    )
851                } else {
852                    (content.trim().to_uppercase(), String::new())
853                };
854                match label.as_str() {
855                    "TITLE" => {
856                        if in_block && !current.is_empty() {
857                            self.records.push(current.clone());
858                            current = SpectrumRecord::new();
859                            x_factor = 1.0;
860                            y_factor = 1.0;
861                            firstx = f64::NAN;
862                            deltax = f64::NAN;
863                            npoints = 0;
864                            raw_y.clear();
865                        }
866                        in_block = true;
867                        current.metadata.title = value;
868                        section = JdxSection::Header;
869                    }
870                    "END" => {
871                        if !raw_y.is_empty() {
872                            Self::build_xy_from_raw(
873                                &mut current,
874                                &raw_y,
875                                firstx,
876                                deltax,
877                                x_factor,
878                                y_factor,
879                            );
880                            raw_y.clear();
881                        }
882                        if !ntuple_x.is_empty() {
883                            current.x = ntuple_x.clone();
884                            current.y = ntuple_y.clone();
885                            ntuple_x.clear();
886                            ntuple_y.clear();
887                        }
888                        if !current.is_empty() {
889                            self.records.push(current.clone());
890                        }
891                        current = SpectrumRecord::new();
892                        in_block = false;
893                        x_factor = 1.0;
894                        y_factor = 1.0;
895                        firstx = f64::NAN;
896                        deltax = f64::NAN;
897                        npoints = 0;
898                        section = JdxSection::Header;
899                    }
900                    "DATA TYPE" | "DATATYPE" => {
901                        let v = value.to_uppercase();
902                        if v.contains("INFRARED") || v.contains("IR") {
903                            current.metadata.source = SpectrumSource::Ftir;
904                        } else if v.contains("RAMAN") {
905                            current.metadata.source = SpectrumSource::Raman;
906                        } else if v.contains("UV") || v.contains("VIS") {
907                            current.metadata.source = SpectrumSource::UvVis;
908                        } else if v.contains("NMR") {
909                            current.metadata.source = SpectrumSource::Nmr;
910                        } else if v.contains("MASS") {
911                            current.metadata.source = SpectrumSource::MassSpec;
912                        }
913                    }
914                    "DATE" => current.metadata.date = value,
915                    "INSTRUMENT" | "SPECTROMETER/DATA SYSTEM" => {
916                        current.metadata.instrument = value;
917                    }
918                    "XUNITS" => current.metadata.x_label = value,
919                    "YUNITS" => current.metadata.y_label = value,
920                    "CAS REGISTRY NO" | "CASREGNO" => current.metadata.cas_number = value,
921                    "MOLFORM" | "MOLECULAR FORMULA" => {
922                        current.metadata.molecular_formula = value;
923                    }
924                    "MW" | "MOLECULAR WEIGHT" => {
925                        current.metadata.molecular_weight =
926                            value.parse::<f64>().unwrap_or(f64::NAN);
927                    }
928                    "XFACTOR" => x_factor = value.parse::<f64>().unwrap_or(1.0),
929                    "YFACTOR" => y_factor = value.parse::<f64>().unwrap_or(1.0),
930                    "FIRSTX" => firstx = value.parse::<f64>().unwrap_or(f64::NAN),
931                    "DELTAX" => deltax = value.parse::<f64>().unwrap_or(f64::NAN),
932                    "NPOINTS" => npoints = value.parse::<usize>().unwrap_or(0),
933                    "XYDATA" => {
934                        section = JdxSection::XyData;
935                        raw_y.clear();
936                    }
937                    "XYPOINTS" => {
938                        section = JdxSection::XyData;
939                        raw_y.clear();
940                    }
941                    "NTUPLES" => {
942                        section = JdxSection::NtupleDecl;
943                        ntuple_var_names.clear();
944                        ntuple_x.clear();
945                        ntuple_y.clear();
946                        ntuple_collecting = false;
947                    }
948                    "VAR_NAME" => {
949                        ntuple_var_names = value.split(',').map(|s| s.trim().to_string()).collect();
950                    }
951                    "PAGE" => {
952                        ntuple_collecting = true;
953                        section = JdxSection::NtupleData;
954                    }
955                    "END NTUPLES" => {
956                        ntuple_collecting = false;
957                        section = JdxSection::Header;
958                    }
959                    _ => {
960                        current.metadata.extra.insert(label.to_string(), value);
961                    }
962                }
963                continue;
964            }
965            match section {
966                JdxSection::XyData => {
967                    Self::parse_xydata_line(trimmed, &mut current, &mut raw_y, x_factor, y_factor);
968                }
969                JdxSection::NtupleData if ntuple_collecting => {
970                    Self::parse_ntuple_line(trimmed, &mut ntuple_x, &mut ntuple_y);
971                }
972                _ => {}
973            }
974            let _ = npoints;
975        }
976        if in_block && !current.is_empty() {
977            if !raw_y.is_empty() {
978                Self::build_xy_from_raw(&mut current, &raw_y, firstx, deltax, x_factor, y_factor);
979            }
980            if !ntuple_x.is_empty() {
981                current.x = ntuple_x;
982                current.y = ntuple_y;
983            }
984            if !current.is_empty() {
985                self.records.push(current);
986            }
987        }
988        Ok(())
989    }
990    /// Parse a line in `XYDATA=(X++(Y..Y))` mode.
991    fn parse_xydata_line(
992        line: &str,
993        record: &mut SpectrumRecord,
994        raw_y: &mut Vec<f64>,
995        x_factor: f64,
996        y_factor: f64,
997    ) {
998        let tokens: Vec<&str> = line.split_whitespace().collect();
999        if tokens.is_empty() {
1000            return;
1001        }
1002        if let Ok(x_val) = tokens[0].parse::<f64>() {
1003            record.x.push(x_val * x_factor);
1004            for tok in &tokens[1..] {
1005                if let Ok(y_val) = tok.parse::<f64>() {
1006                    raw_y.push(y_val * y_factor);
1007                }
1008            }
1009        }
1010    }
1011    /// Build the full X/Y arrays from compressed X++(Y..Y) raw data.
1012    fn build_xy_from_raw(
1013        record: &mut SpectrumRecord,
1014        raw_y: &[f64],
1015        firstx: f64,
1016        deltax: f64,
1017        x_factor: f64,
1018        y_factor: f64,
1019    ) {
1020        if !firstx.is_nan() && !deltax.is_nan() {
1021            record.x.clear();
1022            record.y.clear();
1023            for (i, &yv) in raw_y.iter().enumerate() {
1024                record.x.push((firstx + deltax * i as f64) * x_factor);
1025                record.y.push(yv * y_factor);
1026            }
1027        } else {
1028            record.y.clear();
1029            record.y.extend_from_slice(raw_y);
1030            let _ = (x_factor, y_factor);
1031        }
1032    }
1033    /// Parse NTUPLE data line (tab or space separated x,y pairs).
1034    fn parse_ntuple_line(line: &str, xs: &mut Vec<f64>, ys: &mut Vec<f64>) {
1035        let tokens: Vec<&str> = line.split_whitespace().collect();
1036        if tokens.len() >= 2
1037            && let (Ok(x), Ok(y)) = (tokens[0].parse::<f64>(), tokens[1].parse::<f64>())
1038        {
1039            xs.push(x);
1040            ys.push(y);
1041        }
1042    }
1043    /// Return the number of parsed blocks.
1044    pub fn count(&self) -> usize {
1045        self.records.len()
1046    }
1047}
1048/// Savitzky-Golay filter window size options.
1049#[derive(Debug, Clone, Copy, PartialEq)]
1050pub enum SgWindow {
1051    /// 5-point window.
1052    W5,
1053    /// 7-point window.
1054    W7,
1055    /// 11-point window.
1056    W11,
1057    /// 15-point window.
1058    W15,
1059    /// 25-point window.
1060    W25,
1061}
1062impl SgWindow {
1063    /// Half-window size (number of points on each side).
1064    pub fn half(self) -> usize {
1065        match self {
1066            SgWindow::W5 => 2,
1067            SgWindow::W7 => 3,
1068            SgWindow::W11 => 5,
1069            SgWindow::W15 => 7,
1070            SgWindow::W25 => 12,
1071        }
1072    }
1073}
1074/// Header parsed from an ANIF file.
1075#[derive(Debug, Clone)]
1076pub struct AnifHeader {
1077    /// Format version (major.minor).
1078    pub version: (u8, u8),
1079    /// Number of data points.
1080    pub num_points: u32,
1081    /// Starting X value.
1082    pub x_start: f64,
1083    /// X increment per point.
1084    pub x_delta: f64,
1085    /// Byte endianness.
1086    pub endian: AnifEndian,
1087    /// User-defined title string (up to 64 bytes).
1088    pub title: String,
1089    /// Instrument identifier (up to 32 bytes).
1090    pub instrument: String,
1091    /// Date string (up to 16 bytes).
1092    pub date: String,
1093}
1094/// Source type that generated the spectrum.
1095#[derive(Debug, Clone, PartialEq)]
1096pub enum SpectrumSource {
1097    /// Fourier-Transform Infrared spectroscopy.
1098    Ftir,
1099    /// Near-Infrared spectroscopy.
1100    Nir,
1101    /// Raman spectroscopy.
1102    Raman,
1103    /// UV-Visible spectroscopy.
1104    UvVis,
1105    /// Mass spectrometry.
1106    MassSpec,
1107    /// Nuclear Magnetic Resonance spectroscopy.
1108    Nmr,
1109    /// Unknown / unspecified source.
1110    Unknown(String),
1111}
1112/// State machine for JCAMP-DX section parsing.
1113#[derive(Debug, PartialEq)]
1114pub(super) enum JdxSection {
1115    Header,
1116    XyData,
1117    NtupleDecl,
1118    NtupleData,
1119}