use std::collections::HashMap;
use std::fs;
use std::path::Path;
use arrow::array::{Float64Array, Int32Array};
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use crate::interp::{log_log_interp, sort_paired_vecs, XYTable};
#[derive(Clone)]
pub struct XcomDb {
elem_mu_rho: HashMap<u8, XYTable>,
elem_mu_en_rho: HashMap<u8, XYTable>,
comp_mu_rho: HashMap<String, XYTable>,
comp_mu_en_rho: HashMap<String, XYTable>,
}
unsafe impl Send for XcomDb {}
unsafe impl Sync for XcomDb {}
impl XcomDb {
pub fn open(meta_dir: impl AsRef<Path>) -> crate::Result<Self> {
let meta = meta_dir.as_ref();
let (elem_mu_rho, elem_mu_en_rho) =
Self::load_elements(&meta.join("xcom_elements.parquet"))?;
let (comp_mu_rho, comp_mu_en_rho) =
Self::load_compounds(&meta.join("xcom_compounds.parquet"))?;
Ok(Self {
elem_mu_rho,
elem_mu_en_rho,
comp_mu_rho,
comp_mu_en_rho,
})
}
pub fn from_bytes(elements_data: &[u8], compounds_data: &[u8]) -> crate::Result<Self> {
let (elem_mu_rho, elem_mu_en_rho) =
Self::parse_elements(bytes::Bytes::from(elements_data.to_vec()))?;
let (comp_mu_rho, comp_mu_en_rho) = if compounds_data.is_empty() {
(HashMap::new(), HashMap::new())
} else {
Self::parse_compounds(bytes::Bytes::from(compounds_data.to_vec()))?
};
Ok(Self {
elem_mu_rho,
elem_mu_en_rho,
comp_mu_rho,
comp_mu_en_rho,
})
}
#[inline]
pub fn mu_rho(&self, z: u8, energy_mev: f64) -> f64 {
match self.elem_mu_rho.get(&z) {
Some((e, v)) => log_log_interp(e, v, energy_mev),
None => f64::NAN,
}
}
#[inline]
pub fn mu_en_rho(&self, z: u8, energy_mev: f64) -> f64 {
match self.elem_mu_en_rho.get(&z) {
Some((e, v)) => log_log_interp(e, v, energy_mev),
None => f64::NAN,
}
}
#[inline]
pub fn compound_mu_rho(&self, compound: &str, energy_mev: f64) -> f64 {
match self.comp_mu_rho.get(compound) {
Some((e, v)) => log_log_interp(e, v, energy_mev),
None => f64::NAN,
}
}
#[inline]
pub fn compound_mu_en_rho(&self, compound: &str, energy_mev: f64) -> f64 {
match self.comp_mu_en_rho.get(compound) {
Some((e, v)) => log_log_interp(e, v, energy_mev),
None => f64::NAN,
}
}
pub fn has_element(&self, z: u8) -> bool {
self.elem_mu_rho.contains_key(&z)
}
pub fn compound_names(&self) -> Vec<&str> {
self.comp_mu_rho.keys().map(|s| s.as_str()).collect()
}
fn load_elements(path: &Path) -> crate::Result<(HashMap<u8, XYTable>, HashMap<u8, XYTable>)> {
let file = fs::File::open(path)?;
Self::parse_elements(file)
}
fn parse_elements(
reader_source: impl parquet::file::reader::ChunkReader + 'static,
) -> crate::Result<(HashMap<u8, XYTable>, HashMap<u8, XYTable>)> {
let mut mu_rho: HashMap<u8, XYTable> = HashMap::new();
let mut mu_en: HashMap<u8, XYTable> = HashMap::new();
let reader = ParquetRecordBatchReaderBuilder::try_new(reader_source)?.build()?;
for batch in reader {
let batch = batch?;
let z_col = batch
.column_by_name("Z")
.and_then(|c| c.as_any().downcast_ref::<Int32Array>());
let e_col = batch
.column_by_name("energy_MeV")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
let mr_col = batch
.column_by_name("mu_rho_cm2_g")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
let men_col = batch
.column_by_name("mu_en_rho_cm2_g")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
if let (Some(z), Some(e), Some(mr), Some(men)) = (z_col, e_col, mr_col, men_col) {
#[allow(clippy::needless_range_loop)]
for i in 0..batch.num_rows() {
let zv = z.value(i) as u8;
let ev = e.value(i);
let entry_mr = mu_rho.entry(zv).or_default();
entry_mr.0.push(ev);
entry_mr.1.push(mr.value(i));
let entry_men = mu_en.entry(zv).or_default();
entry_men.0.push(ev);
entry_men.1.push(men.value(i));
}
}
}
for (e_vec, v_vec) in mu_rho.values_mut() {
sort_paired_vecs(e_vec, v_vec);
}
for (e_vec, v_vec) in mu_en.values_mut() {
sort_paired_vecs(e_vec, v_vec);
}
Ok((mu_rho, mu_en))
}
fn load_compounds(
path: &Path,
) -> crate::Result<(HashMap<String, XYTable>, HashMap<String, XYTable>)> {
let file = fs::File::open(path)?;
Self::parse_compounds(file)
}
fn parse_compounds(
reader_source: impl parquet::file::reader::ChunkReader + 'static,
) -> crate::Result<(HashMap<String, XYTable>, HashMap<String, XYTable>)> {
let mut mu_rho: HashMap<String, XYTable> = HashMap::new();
let mut mu_en: HashMap<String, XYTable> = HashMap::new();
let reader = ParquetRecordBatchReaderBuilder::try_new(reader_source)?.build()?;
for batch in reader {
let batch = batch?;
let mat_col_ref = batch.column_by_name("material");
let mat_values = mat_col_ref.and_then(|c| crate::interp::as_string_array(c));
let e_col = batch
.column_by_name("energy_MeV")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
let mr_col = batch
.column_by_name("mu_rho_cm2_g")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
let men_col = batch
.column_by_name("mu_en_rho_cm2_g")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
if let (Some(mat), Some(e), Some(mr), Some(men)) = (mat_values, e_col, mr_col, men_col)
{
#[allow(clippy::needless_range_loop)]
for i in 0..batch.num_rows() {
let name = mat[i].unwrap_or("").to_string();
let ev = e.value(i);
let entry_mr = mu_rho.entry(name.clone()).or_default();
entry_mr.0.push(ev);
entry_mr.1.push(mr.value(i));
let entry_men = mu_en.entry(name).or_default();
entry_men.0.push(ev);
entry_men.1.push(men.value(i));
}
}
}
for (e_vec, v_vec) in mu_rho.values_mut() {
sort_paired_vecs(e_vec, v_vec);
}
for (e_vec, v_vec) in mu_en.values_mut() {
sort_paired_vecs(e_vec, v_vec);
}
Ok((mu_rho, mu_en))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn meta_dir() -> std::path::PathBuf {
std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("..")
.join("..")
.join("meta")
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn open_succeeds() {
let db = XcomDb::open(meta_dir()).unwrap();
assert!(db.has_element(29)); }
#[test]
#[ignore = "requires nucl-parquet data files"]
fn cu_mu_rho_positive() {
let db = XcomDb::open(meta_dir()).unwrap();
let mu = db.mu_rho(29, 0.1);
assert!(mu.is_finite() && mu > 0.0, "Cu mu/rho at 0.1 MeV: {mu}");
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn cu_mu_en_rho_positive() {
let db = XcomDb::open(meta_dir()).unwrap();
let mu = db.mu_en_rho(29, 0.1);
assert!(mu.is_finite() && mu > 0.0, "Cu mu_en/rho at 0.1 MeV: {mu}");
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn water_compound_exists() {
let db = XcomDb::open(meta_dir()).unwrap();
let names = db.compound_names();
assert!(
names.contains(&"water"),
"water should be in compound list: {names:?}"
);
let mu = db.compound_mu_rho("water", 0.1);
assert!(mu.is_finite() && mu > 0.0, "water mu/rho at 0.1 MeV: {mu}");
}
fn data_meta_dir() -> std::path::PathBuf {
std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("..")
.join("..")
.join("data")
.join("meta")
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn from_bytes_matches_open() {
let db_file = XcomDb::open(data_meta_dir()).unwrap();
let elem_data = std::fs::read(data_meta_dir().join("xcom_elements.parquet")).unwrap();
let comp_data = std::fs::read(data_meta_dir().join("xcom_compounds.parquet")).unwrap();
let db_bytes = XcomDb::from_bytes(&elem_data, &comp_data).unwrap();
let mu_file = db_file.mu_rho(29, 0.1);
let mu_bytes = db_bytes.mu_rho(29, 0.1);
assert!(
(mu_file - mu_bytes).abs() < 1e-12,
"Cu mu/rho mismatch: {mu_file} vs {mu_bytes}"
);
let mu_water_file = db_file.compound_mu_rho("water", 0.1);
let mu_water_bytes = db_bytes.compound_mu_rho("water", 0.1);
assert!(
(mu_water_file - mu_water_bytes).abs() < 1e-12,
"water mu/rho mismatch: {mu_water_file} vs {mu_water_bytes}"
);
}
}