1#![allow(clippy::needless_range_loop)]
6use std::collections::HashMap;
7use std::io::{BufRead, BufReader, Read, Write};
8
9use super::functions::ANIF_MAGIC;
10#[allow(unused_imports)]
11use super::functions::*;
12
13#[derive(Debug, Clone)]
15pub struct PeakResult {
16 pub index: usize,
18 pub x: f64,
20 pub y: f64,
22 pub fwhm: f64,
24}
25#[derive(Debug, Clone, Copy, PartialEq)]
27pub enum AnifEndian {
28 Little,
30 Big,
32}
33#[derive(Debug, Default)]
37pub struct SpectralDatabase {
38 pub(super) entries: Vec<DatabaseEntry>,
39 pub(super) next_id: usize,
40}
41impl SpectralDatabase {
42 pub fn new() -> Self {
44 Self::default()
45 }
46 pub fn insert(&mut self, record: SpectrumRecord) -> usize {
48 let id = self.next_id;
49 self.entries.push(DatabaseEntry { id, record });
50 self.next_id += 1;
51 id
52 }
53 pub fn get(&self, id: usize) -> Option<&SpectrumRecord> {
55 self.entries.iter().find(|e| e.id == id).map(|e| &e.record)
56 }
57 pub fn remove(&mut self, id: usize) -> bool {
59 let before = self.entries.len();
60 self.entries.retain(|e| e.id != id);
61 self.entries.len() < before
62 }
63 pub fn len(&self) -> usize {
65 self.entries.len()
66 }
67 pub fn is_empty(&self) -> bool {
69 self.entries.is_empty()
70 }
71 pub fn search(
76 &self,
77 query: &SpectrumRecord,
78 metric: SimilarityMetric,
79 top_k: usize,
80 ) -> Vec<SearchResult> {
81 let mut scores: Vec<(usize, &str, f64)> = self
82 .entries
83 .iter()
84 .map(|e| {
85 let score = Self::compute_similarity(query, &e.record, metric);
86 (e.id, e.record.metadata.title.as_str(), score)
87 })
88 .collect();
89 match metric {
90 SimilarityMetric::Euclidean => {
91 scores.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
92 }
93 _ => {
94 scores.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
95 }
96 }
97 scores
98 .into_iter()
99 .take(top_k)
100 .map(|(id, title, score)| SearchResult {
101 id,
102 title: title.to_string(),
103 score,
104 })
105 .collect()
106 }
107 fn compute_similarity(a: &SpectrumRecord, b: &SpectrumRecord, metric: SimilarityMetric) -> f64 {
109 let len = a.y.len().min(b.y.len());
110 if len == 0 {
111 return 0.0;
112 }
113 let ya: &[f64] = &a.y[..len];
114 let yb: &[f64] = &b.y[..len];
115 match metric {
116 SimilarityMetric::DotProduct => {
117 let dot: f64 = ya.iter().zip(yb).map(|(x, y)| x * y).sum();
118 let na: f64 = ya.iter().map(|v| v * v).sum::<f64>().sqrt();
119 let nb: f64 = yb.iter().map(|v| v * v).sum::<f64>().sqrt();
120 if na > 0.0 && nb > 0.0 {
121 dot / (na * nb)
122 } else {
123 0.0
124 }
125 }
126 SimilarityMetric::SpectralAngleMapper => {
127 let dot: f64 = ya.iter().zip(yb).map(|(x, y)| x * y).sum();
128 let na: f64 = ya.iter().map(|v| v * v).sum::<f64>().sqrt();
129 let nb: f64 = yb.iter().map(|v| v * v).sum::<f64>().sqrt();
130 if na > 0.0 && nb > 0.0 {
131 let cos = (dot / (na * nb)).clamp(-1.0, 1.0);
132 std::f64::consts::PI / 2.0 - cos.acos()
133 } else {
134 0.0
135 }
136 }
137 SimilarityMetric::Euclidean => ya
138 .iter()
139 .zip(yb)
140 .map(|(x, y)| (x - y).powi(2))
141 .sum::<f64>()
142 .sqrt(),
143 SimilarityMetric::Pearson => {
144 let mean_a = ya.iter().sum::<f64>() / len as f64;
145 let mean_b = yb.iter().sum::<f64>() / len as f64;
146 let num: f64 = ya
147 .iter()
148 .zip(yb)
149 .map(|(x, y)| (x - mean_a) * (y - mean_b))
150 .sum();
151 let sa: f64 = ya.iter().map(|x| (x - mean_a).powi(2)).sum::<f64>().sqrt();
152 let sb: f64 = yb.iter().map(|y| (y - mean_b).powi(2)).sum::<f64>().sqrt();
153 if sa > 0.0 && sb > 0.0 {
154 num / (sa * sb)
155 } else {
156 0.0
157 }
158 }
159 }
160 }
161 pub fn iter(&self) -> impl Iterator<Item = &DatabaseEntry> {
163 self.entries.iter()
164 }
165}
166#[derive(Debug, Clone, Default)]
168pub struct SpectrumMetadata {
169 pub title: String,
171 pub source: SpectrumSource,
173 pub date: String,
175 pub instrument: String,
177 pub x_label: String,
179 pub y_label: String,
181 pub cas_number: String,
183 pub molecular_formula: String,
185 pub molecular_weight: f64,
187 pub extra: HashMap<String, String>,
189}
190pub struct SpectralAnalysis;
192impl SpectralAnalysis {
193 pub fn find_peaks(record: &SpectrumRecord, window: usize, threshold: f64) -> Vec<PeakResult> {
198 let n = record.y.len();
199 let half = window.max(1);
200 let mut peaks = Vec::new();
201 for i in half..n.saturating_sub(half) {
202 let y_i = record.y[i];
203 if y_i < threshold {
204 continue;
205 }
206 let is_max = (1..=half).all(|k| y_i > record.y[i - k] && y_i > record.y[i + k]);
207 if is_max {
208 let fwhm = Self::estimate_fwhm(record, i);
209 peaks.push(PeakResult {
210 index: i,
211 x: record.x[i],
212 y: y_i,
213 fwhm,
214 });
215 }
216 }
217 peaks
218 }
219 fn estimate_fwhm(record: &SpectrumRecord, peak_idx: usize) -> f64 {
222 let half_max = record.y[peak_idx] * 0.5;
223 let n = record.y.len();
224 let mut left_x = record.x[peak_idx];
225 for i in (0..peak_idx).rev() {
226 if record.y[i] <= half_max {
227 if i + 1 < n {
228 let y0 = record.y[i];
229 let y1 = record.y[i + 1];
230 let x0 = record.x[i];
231 let x1 = record.x[i + 1];
232 if (y1 - y0).abs() > 1e-15 {
233 left_x = x0 + (half_max - y0) * (x1 - x0) / (y1 - y0);
234 }
235 }
236 break;
237 }
238 }
239 let mut right_x = record.x[peak_idx];
240 for i in peak_idx + 1..n {
241 if record.y[i] <= half_max {
242 if i > 0 {
243 let y0 = record.y[i - 1];
244 let y1 = record.y[i];
245 let x0 = record.x[i - 1];
246 let x1 = record.x[i];
247 if (y1 - y0).abs() > 1e-15 {
248 right_x = x0 + (half_max - y0) * (x1 - x0) / (y1 - y0);
249 }
250 }
251 break;
252 }
253 }
254 (right_x - left_x).abs()
255 }
256 pub fn als_baseline(y: &[f64], lam: f64, p: f64, iterations: usize) -> Vec<f64> {
266 let n = y.len();
267 if n < 3 {
268 return y.to_vec();
269 }
270 let mut weights = vec![1.0_f64; n];
271 let mut baseline = y.to_vec();
272 for _iter in 0..iterations {
273 let mut diag = vec![0.0_f64; n];
274 let mut sub = vec![0.0_f64; n - 1];
275 let mut sup = vec![0.0_f64; n - 1];
276 diag[..n].copy_from_slice(&weights[..n]);
277 for i in 0..n - 1 {
278 diag[i] += lam;
279 diag[i + 1] += lam;
280 sub[i] -= lam;
281 sup[i] -= lam;
282 }
283 let rhs: Vec<f64> = (0..n).map(|i| weights[i] * y[i]).collect();
284 baseline = Self::tridiagonal_solve(&diag, &sub, &sup, &rhs);
285 for i in 0..n {
286 if y[i] > baseline[i] {
287 weights[i] = p;
288 } else {
289 weights[i] = 1.0 - p;
290 }
291 }
292 }
293 y.iter()
294 .zip(baseline.iter())
295 .map(|(&yi, &bi)| yi - bi)
296 .collect()
297 }
298 fn tridiagonal_solve(diag: &[f64], sub: &[f64], sup: &[f64], rhs: &[f64]) -> Vec<f64> {
301 let n = diag.len();
302 if n == 0 {
303 return Vec::new();
304 }
305 let mut c_prime = vec![0.0; n - 1];
306 let mut d_prime = vec![0.0; n];
307 let mut x = vec![0.0; n];
308 c_prime[0] = if diag[0].abs() > 1e-15 {
309 sup[0] / diag[0]
310 } else {
311 0.0
312 };
313 d_prime[0] = if diag[0].abs() > 1e-15 {
314 rhs[0] / diag[0]
315 } else {
316 0.0
317 };
318 for i in 1..n {
319 let denom = diag[i]
320 - if i > 0 {
321 sub[i - 1] * c_prime[i - 1]
322 } else {
323 0.0
324 };
325 if denom.abs() < 1e-15 {
326 d_prime[i] = 0.0;
327 if i < n - 1 {
328 c_prime[i] = 0.0;
329 }
330 } else {
331 d_prime[i] = (rhs[i] - sub[i - 1] * d_prime[i - 1]) / denom;
332 if i < n - 1 {
333 c_prime[i] = sup[i] / denom;
334 }
335 }
336 }
337 x[n - 1] = d_prime[n - 1];
338 for i in (0..n - 1).rev() {
339 x[i] = d_prime[i] - c_prime[i] * x[i + 1];
340 }
341 x
342 }
343 pub fn savitzky_golay(y: &[f64], window: SgWindow) -> Vec<f64> {
348 let n = y.len();
349 let half = window.half();
350 if n <= 2 * half {
351 return y.to_vec();
352 }
353 let coeffs = Self::sg_coefficients(half);
354 let mut out = y.to_vec();
355 for i in half..n - half {
356 let mut val = 0.0;
357 for (k, &c) in coeffs.iter().enumerate() {
358 val += c * y[i + k - half];
359 }
360 out[i] = val;
361 }
362 out
363 }
364 fn sg_coefficients(half: usize) -> Vec<f64> {
367 let m = 2 * half + 1;
368 let h = half as f64;
369 let norm = (2.0 * h + 1.0) * (2.0 * h + 3.0) * (2.0 * h - 1.0) / 3.0;
370 let mut coeffs = Vec::with_capacity(m);
371 for j in -(half as i64)..=(half as i64) {
372 let c = (3.0 * h * (h + 1.0) - 1.0 - 5.0 * j as f64 * j as f64) / norm;
373 coeffs.push(c);
374 }
375 coeffs
376 }
377 pub fn moving_average(y: &[f64], half_window: usize) -> Vec<f64> {
382 let n = y.len();
383 let mut out = y.to_vec();
384 for i in half_window..n - half_window {
385 let sum: f64 = y[i - half_window..=i + half_window].iter().sum();
386 out[i] = sum / (2 * half_window + 1) as f64;
387 }
388 out
389 }
390 pub fn derivative(record: &SpectrumRecord) -> Vec<f64> {
394 let n = record.y.len();
395 if n < 2 {
396 return vec![0.0; n];
397 }
398 let mut deriv = vec![0.0; n];
399 for i in 1..n - 1 {
400 let dx = record.x[i + 1] - record.x[i - 1];
401 if dx.abs() > 1e-15 {
402 deriv[i] = (record.y[i + 1] - record.y[i - 1]) / dx;
403 }
404 }
405 if n >= 2 {
406 let dx = record.x[1] - record.x[0];
407 if dx.abs() > 1e-15 {
408 deriv[0] = (record.y[1] - record.y[0]) / dx;
409 }
410 let dx_end = record.x[n - 1] - record.x[n - 2];
411 if dx_end.abs() > 1e-15 {
412 deriv[n - 1] = (record.y[n - 1] - record.y[n - 2]) / dx_end;
413 }
414 }
415 deriv
416 }
417 pub fn integrate(record: &SpectrumRecord, x_start: f64, x_end: f64) -> f64 {
419 let mut integral = 0.0;
420 let n = record.x.len();
421 for i in 1..n {
422 let x0 = record.x[i - 1];
423 let x1 = record.x[i];
424 if x0 >= x_end || x1 <= x_start {
425 continue;
426 }
427 let xa = x0.max(x_start);
428 let xb = x1.min(x_end);
429 let t0 = (xa - x0) / (x1 - x0);
430 let t1 = (xb - x0) / (x1 - x0);
431 let ya = record.y[i - 1] + t0 * (record.y[i] - record.y[i - 1]);
432 let yb = record.y[i - 1] + t1 * (record.y[i] - record.y[i - 1]);
433 integral += 0.5 * (ya + yb) * (xb - xa);
434 }
435 integral
436 }
437}
438#[derive(Debug, Clone, Default)]
441pub struct SpectrumRecord {
442 pub x: Vec<f64>,
444 pub y: Vec<f64>,
446 pub metadata: SpectrumMetadata,
448}
449impl SpectrumRecord {
450 pub fn new() -> Self {
452 Self::default()
453 }
454 pub fn from_arrays(x: Vec<f64>, y: Vec<f64>, metadata: SpectrumMetadata) -> Self {
456 Self { x, y, metadata }
457 }
458 pub fn len(&self) -> usize {
460 self.x.len()
461 }
462 pub fn is_empty(&self) -> bool {
464 self.x.is_empty()
465 }
466 pub fn x_min(&self) -> f64 {
468 self.x.iter().cloned().fold(f64::NAN, f64::min)
469 }
470 pub fn x_max(&self) -> f64 {
472 self.x.iter().cloned().fold(f64::NAN, f64::max)
473 }
474 pub fn y_max(&self) -> f64 {
476 self.y.iter().cloned().fold(f64::NAN, f64::max)
477 }
478 pub fn y_min(&self) -> f64 {
480 self.y.iter().cloned().fold(f64::NAN, f64::min)
481 }
482 pub fn normalize(&mut self) {
485 let max_y = self.y_max();
486 if max_y > 0.0 && max_y.is_finite() {
487 for v in &mut self.y {
488 *v /= max_y;
489 }
490 }
491 }
492 pub fn interpolate_at(&self, x_val: f64) -> Option<f64> {
495 if self.x.is_empty() {
496 return None;
497 }
498 let idx = self.x.partition_point(|&v| v < x_val);
499 if idx == 0 {
500 if (self.x[0] - x_val).abs() < f64::EPSILON {
501 return Some(self.y[0]);
502 }
503 return None;
504 }
505 if idx >= self.x.len() {
506 let last = self.x.len() - 1;
507 if (self.x[last] - x_val).abs() < f64::EPSILON {
508 return Some(self.y[last]);
509 }
510 return None;
511 }
512 let x0 = self.x[idx - 1];
513 let x1 = self.x[idx];
514 let y0 = self.y[idx - 1];
515 let y1 = self.y[idx];
516 let t = (x_val - x0) / (x1 - x0);
517 Some(y0 + t * (y1 - y0))
518 }
519}
520#[derive(Debug, Default)]
538pub struct AnifReader {
539 pub header: AnifHeader,
541 pub record: SpectrumRecord,
543}
544impl AnifReader {
545 pub fn new() -> Self {
547 Self::default()
548 }
549 pub fn parse<R: Read>(&mut self, mut reader: R) -> crate::Result<()> {
551 let mut buf = Vec::new();
552 reader.read_to_end(&mut buf).map_err(crate::Error::Io)?;
553 self.parse_bytes(&buf)
554 }
555 pub fn parse_bytes(&mut self, buf: &[u8]) -> crate::Result<()> {
557 if buf.len() < 4 || &buf[0..4] != ANIF_MAGIC {
558 return Err(crate::Error::Parse("Not an ANIF file (bad magic)".into()));
559 }
560 if buf.len() < 132 {
561 return Err(crate::Error::Parse("ANIF header too short".into()));
562 }
563 let major = buf[4];
564 let minor = buf[5];
565 let endian_flag = buf[6];
566 let endian = if endian_flag == 0 {
567 AnifEndian::Little
568 } else {
569 AnifEndian::Big
570 };
571 let read_u32 = |b: &[u8], off: usize| -> u32 {
572 let arr: [u8; 4] = b[off..off + 4].try_into().unwrap_or([0u8; 4]);
573 if endian == AnifEndian::Little {
574 u32::from_le_bytes(arr)
575 } else {
576 u32::from_be_bytes(arr)
577 }
578 };
579 let read_f64 = |b: &[u8], off: usize| -> f64 {
580 let arr: [u8; 8] = b[off..off + 8].try_into().unwrap_or([0u8; 8]);
581 if endian == AnifEndian::Little {
582 f64::from_le_bytes(arr)
583 } else {
584 f64::from_be_bytes(arr)
585 }
586 };
587 let read_str = |b: &[u8], off: usize, len: usize| -> String {
588 let slice = &b[off..off + len];
589 let end = slice.iter().position(|&c| c == 0).unwrap_or(len);
590 String::from_utf8_lossy(&slice[..end]).into_owned()
591 };
592 let num_points = read_u32(buf, 8);
593 let x_start = read_f64(buf, 12);
594 let x_delta = read_f64(buf, 20);
595 let title = read_str(buf, 28, 64);
596 let instrument = read_str(buf, 92, 32);
597 let date = read_str(buf, 124, 16);
598 self.header = AnifHeader {
599 version: (major, minor),
600 num_points,
601 x_start,
602 x_delta,
603 endian,
604 title: title.clone(),
605 instrument: instrument.clone(),
606 date: date.clone(),
607 };
608 let data_offset = 140usize;
609 let expected_len = data_offset + num_points as usize * 8;
610 if buf.len() < expected_len {
611 return Err(crate::Error::Parse(format!(
612 "ANIF data block too short: expected {} bytes, got {}",
613 expected_len,
614 buf.len()
615 )));
616 }
617 let mut x_vals = Vec::with_capacity(num_points as usize);
618 let mut y_vals = Vec::with_capacity(num_points as usize);
619 for i in 0..num_points as usize {
620 let off = data_offset + i * 8;
621 x_vals.push(x_start + x_delta * i as f64);
622 y_vals.push(read_f64(buf, off));
623 }
624 let meta = SpectrumMetadata {
625 title,
626 instrument,
627 date,
628 ..Default::default()
629 };
630 self.record = SpectrumRecord::from_arrays(x_vals, y_vals, meta);
631 Ok(())
632 }
633 pub fn encode(record: &SpectrumRecord) -> Vec<u8> {
635 let n = record.len();
636 let mut buf = Vec::with_capacity(140 + n * 8);
637 buf.extend_from_slice(ANIF_MAGIC);
638 buf.push(1);
639 buf.push(0);
640 buf.push(0);
641 buf.push(0);
642 buf.extend_from_slice(&(n as u32).to_le_bytes());
643 let x_start = if record.x.is_empty() {
644 0.0
645 } else {
646 record.x[0]
647 };
648 let x_delta = if record.x.len() >= 2 {
649 record.x[1] - record.x[0]
650 } else {
651 1.0
652 };
653 buf.extend_from_slice(&x_start.to_le_bytes());
654 buf.extend_from_slice(&x_delta.to_le_bytes());
655 let title_bytes = record.metadata.title.as_bytes();
656 let mut tmp = [0u8; 64];
657 let copy_len = title_bytes.len().min(63);
658 tmp[..copy_len].copy_from_slice(&title_bytes[..copy_len]);
659 buf.extend_from_slice(&tmp);
660 let instr_bytes = record.metadata.instrument.as_bytes();
661 let mut tmp2 = [0u8; 32];
662 let copy_len2 = instr_bytes.len().min(31);
663 tmp2[..copy_len2].copy_from_slice(&instr_bytes[..copy_len2]);
664 buf.extend_from_slice(&tmp2);
665 let date_bytes = record.metadata.date.as_bytes();
666 let mut tmp3 = [0u8; 16];
667 let copy_len3 = date_bytes.len().min(15);
668 tmp3[..copy_len3].copy_from_slice(&date_bytes[..copy_len3]);
669 buf.extend_from_slice(&tmp3);
670 for &yv in &record.y {
671 buf.extend_from_slice(&yv.to_le_bytes());
672 }
673 buf
674 }
675}
676#[derive(Debug, Clone, Copy, PartialEq)]
678pub enum SimilarityMetric {
679 DotProduct,
681 SpectralAngleMapper,
683 Euclidean,
685 Pearson,
687}
688#[derive(Debug, Clone)]
690pub struct SearchResult {
691 pub id: usize,
693 pub title: String,
695 pub score: f64,
697}
698pub struct SpectralExport;
700impl SpectralExport {
701 pub fn write_csv<W: Write>(record: &SpectrumRecord, writer: &mut W) -> crate::Result<()> {
703 writeln!(writer, "# {}", record.metadata.title).map_err(crate::Error::Io)?;
704 writeln!(writer, "x,y").map_err(crate::Error::Io)?;
705 for (&x, &y) in record.x.iter().zip(record.y.iter()) {
706 writeln!(writer, "{},{}", x, y).map_err(crate::Error::Io)?;
707 }
708 Ok(())
709 }
710 pub fn write_jcamp_dx<W: Write>(record: &SpectrumRecord, writer: &mut W) -> crate::Result<()> {
712 let title = &record.metadata.title;
713 let n = record.len();
714 let x_start = record.x_min();
715 let x_end = record.x_max();
716 let y_min = record.y_min();
717 let y_max = record.y_max();
718 writeln!(writer, "##TITLE={}", title).map_err(crate::Error::Io)?;
719 writeln!(writer, "##JCAMP-DX=4.24").map_err(crate::Error::Io)?;
720 writeln!(writer, "##DATA TYPE={}", record.metadata.source).map_err(crate::Error::Io)?;
721 writeln!(writer, "##DATE={}", record.metadata.date).map_err(crate::Error::Io)?;
722 writeln!(writer, "##INSTRUMENT={}", record.metadata.instrument)
723 .map_err(crate::Error::Io)?;
724 writeln!(writer, "##XUNITS={}", record.metadata.x_label).map_err(crate::Error::Io)?;
725 writeln!(writer, "##YUNITS={}", record.metadata.y_label).map_err(crate::Error::Io)?;
726 writeln!(writer, "##NPOINTS={}", n).map_err(crate::Error::Io)?;
727 writeln!(writer, "##FIRSTX={}", x_start).map_err(crate::Error::Io)?;
728 writeln!(writer, "##LASTX={}", x_end).map_err(crate::Error::Io)?;
729 writeln!(writer, "##MINY={}", y_min).map_err(crate::Error::Io)?;
730 writeln!(writer, "##MAXY={}", y_max).map_err(crate::Error::Io)?;
731 writeln!(writer, "##XFACTOR=1.0").map_err(crate::Error::Io)?;
732 writeln!(writer, "##YFACTOR=1.0").map_err(crate::Error::Io)?;
733 writeln!(writer, "##XYDATA=(X++(Y..Y))").map_err(crate::Error::Io)?;
734 let chunk_size = 8usize;
735 let mut i = 0;
736 while i < n {
737 let x_prefix = record.x[i];
738 write!(writer, "{:.6}", x_prefix).map_err(crate::Error::Io)?;
739 let end = (i + chunk_size).min(n);
740 for j in i..end {
741 write!(writer, " {:.6}", record.y[j]).map_err(crate::Error::Io)?;
742 }
743 writeln!(writer).map_err(crate::Error::Io)?;
744 i = end;
745 }
746 writeln!(writer, "##END=").map_err(crate::Error::Io)?;
747 Ok(())
748 }
749 pub fn write_plain_text<W: Write>(
751 record: &SpectrumRecord,
752 writer: &mut W,
753 ) -> crate::Result<()> {
754 writeln!(writer, "# Spectrum: {}", record.metadata.title).map_err(crate::Error::Io)?;
755 writeln!(writer, "# Instrument: {}", record.metadata.instrument)
756 .map_err(crate::Error::Io)?;
757 writeln!(writer, "# Date: {}", record.metadata.date).map_err(crate::Error::Io)?;
758 writeln!(writer, "# X_label: {}", record.metadata.x_label).map_err(crate::Error::Io)?;
759 writeln!(writer, "# Y_label: {}", record.metadata.y_label).map_err(crate::Error::Io)?;
760 for (&x, &y) in record.x.iter().zip(record.y.iter()) {
761 writeln!(writer, "{:.8e} {:.8e}", x, y).map_err(crate::Error::Io)?;
762 }
763 Ok(())
764 }
765 pub fn write_multi_csv<W: Write>(
770 records: &[SpectrumRecord],
771 writer: &mut W,
772 ) -> crate::Result<()> {
773 if records.is_empty() {
774 return Ok(());
775 }
776 write!(writer, "x").map_err(crate::Error::Io)?;
777 for r in records {
778 write!(writer, ",{}", r.metadata.title).map_err(crate::Error::Io)?;
779 }
780 writeln!(writer).map_err(crate::Error::Io)?;
781 let n = records[0].x.len();
782 for i in 0..n {
783 write!(writer, "{}", records[0].x[i]).map_err(crate::Error::Io)?;
784 for r in records {
785 if i < r.y.len() {
786 write!(writer, ",{}", r.y[i]).map_err(crate::Error::Io)?;
787 } else {
788 write!(writer, ",").map_err(crate::Error::Io)?;
789 }
790 }
791 writeln!(writer).map_err(crate::Error::Io)?;
792 }
793 Ok(())
794 }
795}
796#[derive(Debug, Clone)]
798pub struct DatabaseEntry {
799 pub id: usize,
801 pub record: SpectrumRecord,
803}
804#[derive(Debug, Default)]
813pub struct JcampDxReader {
814 pub records: Vec<SpectrumRecord>,
816}
817impl JcampDxReader {
818 pub fn new() -> Self {
820 Self::default()
821 }
822 pub fn parse<R: Read>(&mut self, reader: R) -> crate::Result<()> {
824 let buf = BufReader::new(reader);
825 let mut current = SpectrumRecord::new();
826 let mut section = JdxSection::Header;
827 let mut x_factor = 1.0_f64;
828 let mut y_factor = 1.0_f64;
829 let mut firstx = f64::NAN;
830 let mut deltax = f64::NAN;
831 let mut npoints: usize = 0;
832 let mut raw_y: Vec<f64> = Vec::new();
833 let mut in_block = false;
834 let mut ntuple_x: Vec<f64> = Vec::new();
835 let mut ntuple_y: Vec<f64> = Vec::new();
836 let mut ntuple_var_names: Vec<String> = Vec::new();
837 let mut ntuple_collecting = false;
838 for line_res in buf.lines() {
839 let line = line_res.map_err(crate::Error::Io)?;
840 let trimmed = line.trim();
841 if trimmed.is_empty() || trimmed.starts_with("$$") {
842 continue;
843 }
844 if let Some(content) = trimmed.strip_prefix("##") {
845 let eq_pos = content.find('=');
846 let (label, value) = if let Some(pos) = eq_pos {
847 (
848 content[..pos].trim().to_uppercase(),
849 content[pos + 1..].trim().to_string(),
850 )
851 } else {
852 (content.trim().to_uppercase(), String::new())
853 };
854 match label.as_str() {
855 "TITLE" => {
856 if in_block && !current.is_empty() {
857 self.records.push(current.clone());
858 current = SpectrumRecord::new();
859 x_factor = 1.0;
860 y_factor = 1.0;
861 firstx = f64::NAN;
862 deltax = f64::NAN;
863 npoints = 0;
864 raw_y.clear();
865 }
866 in_block = true;
867 current.metadata.title = value;
868 section = JdxSection::Header;
869 }
870 "END" => {
871 if !raw_y.is_empty() {
872 Self::build_xy_from_raw(
873 &mut current,
874 &raw_y,
875 firstx,
876 deltax,
877 x_factor,
878 y_factor,
879 );
880 raw_y.clear();
881 }
882 if !ntuple_x.is_empty() {
883 current.x = ntuple_x.clone();
884 current.y = ntuple_y.clone();
885 ntuple_x.clear();
886 ntuple_y.clear();
887 }
888 if !current.is_empty() {
889 self.records.push(current.clone());
890 }
891 current = SpectrumRecord::new();
892 in_block = false;
893 x_factor = 1.0;
894 y_factor = 1.0;
895 firstx = f64::NAN;
896 deltax = f64::NAN;
897 npoints = 0;
898 section = JdxSection::Header;
899 }
900 "DATA TYPE" | "DATATYPE" => {
901 let v = value.to_uppercase();
902 if v.contains("INFRARED") || v.contains("IR") {
903 current.metadata.source = SpectrumSource::Ftir;
904 } else if v.contains("RAMAN") {
905 current.metadata.source = SpectrumSource::Raman;
906 } else if v.contains("UV") || v.contains("VIS") {
907 current.metadata.source = SpectrumSource::UvVis;
908 } else if v.contains("NMR") {
909 current.metadata.source = SpectrumSource::Nmr;
910 } else if v.contains("MASS") {
911 current.metadata.source = SpectrumSource::MassSpec;
912 }
913 }
914 "DATE" => current.metadata.date = value,
915 "INSTRUMENT" | "SPECTROMETER/DATA SYSTEM" => {
916 current.metadata.instrument = value;
917 }
918 "XUNITS" => current.metadata.x_label = value,
919 "YUNITS" => current.metadata.y_label = value,
920 "CAS REGISTRY NO" | "CASREGNO" => current.metadata.cas_number = value,
921 "MOLFORM" | "MOLECULAR FORMULA" => {
922 current.metadata.molecular_formula = value;
923 }
924 "MW" | "MOLECULAR WEIGHT" => {
925 current.metadata.molecular_weight =
926 value.parse::<f64>().unwrap_or(f64::NAN);
927 }
928 "XFACTOR" => x_factor = value.parse::<f64>().unwrap_or(1.0),
929 "YFACTOR" => y_factor = value.parse::<f64>().unwrap_or(1.0),
930 "FIRSTX" => firstx = value.parse::<f64>().unwrap_or(f64::NAN),
931 "DELTAX" => deltax = value.parse::<f64>().unwrap_or(f64::NAN),
932 "NPOINTS" => npoints = value.parse::<usize>().unwrap_or(0),
933 "XYDATA" => {
934 section = JdxSection::XyData;
935 raw_y.clear();
936 }
937 "XYPOINTS" => {
938 section = JdxSection::XyData;
939 raw_y.clear();
940 }
941 "NTUPLES" => {
942 section = JdxSection::NtupleDecl;
943 ntuple_var_names.clear();
944 ntuple_x.clear();
945 ntuple_y.clear();
946 ntuple_collecting = false;
947 }
948 "VAR_NAME" => {
949 ntuple_var_names = value.split(',').map(|s| s.trim().to_string()).collect();
950 }
951 "PAGE" => {
952 ntuple_collecting = true;
953 section = JdxSection::NtupleData;
954 }
955 "END NTUPLES" => {
956 ntuple_collecting = false;
957 section = JdxSection::Header;
958 }
959 _ => {
960 current.metadata.extra.insert(label.to_string(), value);
961 }
962 }
963 continue;
964 }
965 match section {
966 JdxSection::XyData => {
967 Self::parse_xydata_line(trimmed, &mut current, &mut raw_y, x_factor, y_factor);
968 }
969 JdxSection::NtupleData if ntuple_collecting => {
970 Self::parse_ntuple_line(trimmed, &mut ntuple_x, &mut ntuple_y);
971 }
972 _ => {}
973 }
974 let _ = npoints;
975 }
976 if in_block && !current.is_empty() {
977 if !raw_y.is_empty() {
978 Self::build_xy_from_raw(&mut current, &raw_y, firstx, deltax, x_factor, y_factor);
979 }
980 if !ntuple_x.is_empty() {
981 current.x = ntuple_x;
982 current.y = ntuple_y;
983 }
984 if !current.is_empty() {
985 self.records.push(current);
986 }
987 }
988 Ok(())
989 }
990 fn parse_xydata_line(
992 line: &str,
993 record: &mut SpectrumRecord,
994 raw_y: &mut Vec<f64>,
995 x_factor: f64,
996 y_factor: f64,
997 ) {
998 let tokens: Vec<&str> = line.split_whitespace().collect();
999 if tokens.is_empty() {
1000 return;
1001 }
1002 if let Ok(x_val) = tokens[0].parse::<f64>() {
1003 record.x.push(x_val * x_factor);
1004 for tok in &tokens[1..] {
1005 if let Ok(y_val) = tok.parse::<f64>() {
1006 raw_y.push(y_val * y_factor);
1007 }
1008 }
1009 }
1010 }
1011 fn build_xy_from_raw(
1013 record: &mut SpectrumRecord,
1014 raw_y: &[f64],
1015 firstx: f64,
1016 deltax: f64,
1017 x_factor: f64,
1018 y_factor: f64,
1019 ) {
1020 if !firstx.is_nan() && !deltax.is_nan() {
1021 record.x.clear();
1022 record.y.clear();
1023 for (i, &yv) in raw_y.iter().enumerate() {
1024 record.x.push((firstx + deltax * i as f64) * x_factor);
1025 record.y.push(yv * y_factor);
1026 }
1027 } else {
1028 record.y.clear();
1029 record.y.extend_from_slice(raw_y);
1030 let _ = (x_factor, y_factor);
1031 }
1032 }
1033 fn parse_ntuple_line(line: &str, xs: &mut Vec<f64>, ys: &mut Vec<f64>) {
1035 let tokens: Vec<&str> = line.split_whitespace().collect();
1036 if tokens.len() >= 2
1037 && let (Ok(x), Ok(y)) = (tokens[0].parse::<f64>(), tokens[1].parse::<f64>())
1038 {
1039 xs.push(x);
1040 ys.push(y);
1041 }
1042 }
1043 pub fn count(&self) -> usize {
1045 self.records.len()
1046 }
1047}
1048#[derive(Debug, Clone, Copy, PartialEq)]
1050pub enum SgWindow {
1051 W5,
1053 W7,
1055 W11,
1057 W15,
1059 W25,
1061}
1062impl SgWindow {
1063 pub fn half(self) -> usize {
1065 match self {
1066 SgWindow::W5 => 2,
1067 SgWindow::W7 => 3,
1068 SgWindow::W11 => 5,
1069 SgWindow::W15 => 7,
1070 SgWindow::W25 => 12,
1071 }
1072 }
1073}
1074#[derive(Debug, Clone)]
1076pub struct AnifHeader {
1077 pub version: (u8, u8),
1079 pub num_points: u32,
1081 pub x_start: f64,
1083 pub x_delta: f64,
1085 pub endian: AnifEndian,
1087 pub title: String,
1089 pub instrument: String,
1091 pub date: String,
1093}
1094#[derive(Debug, Clone, PartialEq)]
1096pub enum SpectrumSource {
1097 Ftir,
1099 Nir,
1101 Raman,
1103 UvVis,
1105 MassSpec,
1107 Nmr,
1109 Unknown(String),
1111}
1112#[derive(Debug, PartialEq)]
1114pub(super) enum JdxSection {
1115 Header,
1116 XyData,
1117 NtupleDecl,
1118 NtupleData,
1119}