use std::collections::HashMap;
use std::fs;
use std::path::Path;
use arrow::array::{Float64Array, Int32Array};
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use crate::error::Error;
use crate::interp::{log_log_interp, sort_paired_vecs, XYTable};
#[derive(Clone)]
pub struct StoppingDb {
nist: HashMap<(String, u32), XYTable>,
compounds: HashMap<(String, String), XYTable>,
catima: HashMap<(u32, u32), XYTable>,
catima_strag: HashMap<(u32, u32), f64>,
}
unsafe impl Send for StoppingDb {}
unsafe impl Sync for StoppingDb {}
impl StoppingDb {
pub fn open(data_dir: impl AsRef<Path>) -> crate::Result<Self> {
let dir = data_dir.as_ref();
if !dir.exists() {
return Err(Error::DataDirNotFound(dir.to_path_buf()));
}
let nist = Self::load_nist(dir)?;
let compounds = Self::load_compounds(dir)?;
let (catima, catima_strag) = Self::load_catima(dir)?;
Ok(Self {
nist,
compounds,
catima,
catima_strag,
})
}
pub fn from_bytes(data: &[u8]) -> crate::Result<Self> {
let bytes = bytes::Bytes::from(data.to_vec());
let mut nist: HashMap<(String, u32), XYTable> = HashMap::new();
Self::parse_nist_into(bytes, &mut nist)?;
for (e_vec, s_vec) in nist.values_mut() {
sort_paired_vecs(e_vec, s_vec);
}
Ok(Self {
nist,
compounds: HashMap::new(),
catima: HashMap::new(),
catima_strag: HashMap::new(),
})
}
#[inline]
pub fn dedx(&self, source: &str, target_z: u32, energy_mev: f64) -> f64 {
match self.nist.get(&(source.to_string(), target_z)) {
Some((e, s)) => log_log_interp(e, s, energy_mev),
None => f64::NAN,
}
}
#[inline]
pub fn compound_dedx(&self, source: &str, compound: &str, energy_mev: f64) -> f64 {
match self
.compounds
.get(&(source.to_string(), compound.to_string()))
{
Some((e, s)) => log_log_interp(e, s, energy_mev),
None => f64::NAN,
}
}
#[inline]
pub fn catima_dedx(&self, proj_z: u32, target_z: u32, energy_mev_u: f64) -> f64 {
match self.catima.get(&(proj_z, target_z)) {
Some((e, s)) => log_log_interp(e, s, energy_mev_u),
None => f64::NAN,
}
}
#[inline]
pub fn catima_straggling(&self, proj_z: u32, target_z: u32) -> f64 {
self.catima_strag
.get(&(proj_z, target_z))
.copied()
.unwrap_or(f64::NAN)
}
#[inline]
pub fn nist_table(&self, source: &str, target_z: u32) -> Option<&XYTable> {
self.nist.get(&(source.to_string(), target_z))
}
pub fn nist_keys(&self) -> impl Iterator<Item = (&str, u32)> + '_ {
self.nist.keys().map(|(s, z)| (s.as_str(), *z))
}
#[inline]
pub fn compound_table(&self, source: &str, compound: &str) -> Option<&XYTable> {
self.compounds
.get(&(source.to_string(), compound.to_string()))
}
pub fn compound_keys(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
self.compounds.keys().map(|(s, c)| (s.as_str(), c.as_str()))
}
#[inline]
pub fn catima_table(&self, proj_z: u32, target_z: u32) -> Option<&XYTable> {
self.catima.get(&(proj_z, target_z))
}
fn load_nist(dir: &Path) -> crate::Result<HashMap<(String, u32), XYTable>> {
let mut map: HashMap<(String, u32), XYTable> = HashMap::new();
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
continue;
}
if path.extension().and_then(|e| e.to_str()) != Some("parquet") {
continue;
}
let file = fs::File::open(&path)?;
Self::parse_nist_into(file, &mut map)?;
}
for (e_vec, s_vec) in map.values_mut() {
sort_paired_vecs(e_vec, s_vec);
}
Ok(map)
}
fn parse_nist_into(
reader_source: impl parquet::file::reader::ChunkReader + 'static,
map: &mut HashMap<(String, u32), XYTable>,
) -> crate::Result<()> {
let reader = ParquetRecordBatchReaderBuilder::try_new(reader_source)?.build()?;
for batch in reader {
let batch = batch?;
let src_col_ref = batch.column_by_name("source");
let src_values = src_col_ref.and_then(|c| crate::interp::as_string_array(c));
let z_col = batch
.column_by_name("target_Z")
.and_then(|c| c.as_any().downcast_ref::<Int32Array>());
let e_col = batch
.column_by_name("energy_MeV")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
let s_col = batch
.column_by_name("dedx")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
if let (Some(src), Some(z), Some(e), Some(s)) = (src_values, z_col, e_col, s_col) {
#[allow(clippy::needless_range_loop)]
for i in 0..batch.num_rows() {
let key = (src[i].unwrap_or("").to_string(), z.value(i) as u32);
let entry = map.entry(key).or_default();
entry.0.push(e.value(i));
entry.1.push(s.value(i));
}
}
}
Ok(())
}
fn load_compounds(dir: &Path) -> crate::Result<HashMap<(String, String), XYTable>> {
let mut map: HashMap<(String, String), XYTable> = HashMap::new();
let compounds_dir = dir.join("compounds");
if !compounds_dir.exists() {
return Ok(map);
}
for entry in fs::read_dir(&compounds_dir)? {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("parquet") {
continue;
}
let file = fs::File::open(&path)?;
let reader = ParquetRecordBatchReaderBuilder::try_new(file)?.build()?;
for batch in reader {
let batch = batch?;
let src_col_ref = batch.column_by_name("source");
let src_values = src_col_ref.and_then(|c| crate::interp::as_string_array(c));
let cmp_col_ref = batch.column_by_name("compound");
let cmp_values = cmp_col_ref.and_then(|c| crate::interp::as_string_array(c));
let e_col = batch
.column_by_name("energy_MeV")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
let s_col = batch
.column_by_name("dedx")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
if let (Some(src), Some(cmp), Some(e), Some(s)) =
(src_values, cmp_values, e_col, s_col)
{
#[allow(clippy::needless_range_loop)]
for i in 0..batch.num_rows() {
let key = (
src[i].unwrap_or("").to_string(),
cmp[i].unwrap_or("").to_string(),
);
let entry = map.entry(key).or_default();
entry.0.push(e.value(i));
entry.1.push(s.value(i));
}
}
}
}
for (e_vec, s_vec) in map.values_mut() {
sort_paired_vecs(e_vec, s_vec);
}
Ok(map)
}
#[allow(clippy::type_complexity)] fn load_catima(
dir: &Path,
) -> crate::Result<(HashMap<(u32, u32), XYTable>, HashMap<(u32, u32), f64>)> {
let catima_path = dir.join("catima").join("catima.parquet");
let mut map: HashMap<(u32, u32), XYTable> = HashMap::new();
let mut strag_map: HashMap<(u32, u32), f64> = HashMap::new();
if !catima_path.exists() {
return Ok((map, strag_map));
}
let file = fs::File::open(&catima_path)?;
let reader = ParquetRecordBatchReaderBuilder::try_new(file)?.build()?;
for batch in reader {
let batch = batch?;
let pz_col = batch
.column_by_name("proj_Z")
.and_then(|c| c.as_any().downcast_ref::<Int32Array>());
let tz_col = batch
.column_by_name("target_Z")
.and_then(|c| c.as_any().downcast_ref::<Int32Array>());
let e_col = batch
.column_by_name("energy_MeV_u")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
let s_col = batch
.column_by_name("dedx")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
let strag_col = batch
.column_by_name("straggling")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
if let (Some(pz), Some(tz), Some(e), Some(s)) = (pz_col, tz_col, e_col, s_col) {
#[allow(clippy::needless_range_loop)]
for i in 0..batch.num_rows() {
let key = (pz.value(i) as u32, tz.value(i) as u32);
let entry = map.entry(key).or_default();
entry.0.push(e.value(i));
entry.1.push(s.value(i));
if let Some(strag) = strag_col {
strag_map.entry(key).or_insert(strag.value(i));
}
}
}
}
for (e_vec, s_vec) in map.values_mut() {
sort_paired_vecs(e_vec, s_vec);
}
Ok((map, strag_map))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn data_dir() -> std::path::PathBuf {
std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("..")
.join("..")
.join("data")
.join("stopping")
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn open_succeeds() {
let db = StoppingDb::open(data_dir()).unwrap();
let s = db.dedx("PSTAR", 29, 10.0);
assert!(s.is_finite() && s > 0.0, "PSTAR Cu 10 MeV: {s}");
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn catima_open_succeeds() {
let db = StoppingDb::open(data_dir()).unwrap();
let s = db.catima_dedx(1, 29, 100.0);
assert!(s.is_finite() && s > 0.0, "CatIMA p in Cu 100 MeV/u: {s}");
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn miss_returns_nan() {
let db = StoppingDb::open(data_dir()).unwrap();
let s = db.dedx("NONEXISTENT", 999, 10.0);
assert!(s.is_nan());
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn alpha_on_cu_matches_nist_icru49_anchors() {
let db = StoppingDb::open(data_dir()).unwrap();
let anchors: &[(f64, f64)] = &[
(4.0, 483.8), (20.0, 177.4), (80.0, 64.54), (400.0, 19.32), ];
for &(e, expected) in anchors {
let got = db.dedx("ASTAR", 29, e);
let rel = (got - expected).abs() / expected;
assert!(
rel < 0.01,
"α Cu at {e} MeV: got {got}, NIST {expected}, rel err {rel}",
);
}
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn compound_water_positive() {
let db = StoppingDb::open(data_dir()).unwrap();
let s = db.compound_dedx("PSTAR_compound", "WATER_LIQUID", 10.0);
assert!(s.is_finite() && s > 0.0, "PSTAR water 10 MeV: {s}");
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn compound_miss_returns_nan() {
let db = StoppingDb::open(data_dir()).unwrap();
assert!(db
.compound_dedx("PSTAR_compound", "NONEXISTENT", 10.0)
.is_nan());
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn from_bytes_matches_open() {
let db_file = StoppingDb::open(data_dir()).unwrap();
let first_file = std::fs::read_dir(data_dir())
.unwrap()
.filter_map(|e| e.ok())
.find(|e| {
let p = e.path();
!p.is_dir() && p.extension().and_then(|x| x.to_str()) == Some("parquet")
})
.expect("at least one stopping file");
let data = std::fs::read(first_file.path()).unwrap();
let db_bytes = StoppingDb::from_bytes(&data).unwrap();
for (source, z) in db_bytes.nist_keys().take(3) {
let val_file = db_file.dedx(source, z, 10.0);
let val_bytes = db_bytes.dedx(source, z, 10.0);
if val_file.is_finite() && val_bytes.is_finite() {
assert!(
(val_file - val_bytes).abs() / val_file < 1e-10,
"{source} Z={z} dedx mismatch: {val_file} vs {val_bytes}"
);
return;
}
}
panic!("no matching NIST key found");
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn legacy_he3star_source_missing() {
let db = StoppingDb::open(data_dir()).unwrap();
assert!(db.dedx("He3STAR", 29, 10.0).is_nan());
}
}