use std::collections::HashMap;
use std::fs;
use std::path::Path;
use arrow::array::{Float64Array, Int32Array};
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use crate::error::Error;
use crate::interp::log_log_interp;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Process {
Total,
Coherent,
Incoherent,
Photoelectric,
PairNuclear,
PairElectron,
PairTotal,
}
impl Process {
fn from_str(s: &str) -> Option<Self> {
match s {
"total" => Some(Self::Total),
"coherent" => Some(Self::Coherent),
"incoherent" => Some(Self::Incoherent),
"photoelectric" => Some(Self::Photoelectric),
"pair_nuclear" => Some(Self::PairNuclear),
"pair_electron" => Some(Self::PairElectron),
"pair_total" => Some(Self::PairTotal),
_ => None,
}
}
}
#[derive(Debug, Clone)]
struct XsTable {
energy: Vec<f64>,
xs: Vec<f64>,
}
#[derive(Debug, Clone)]
struct TabTable {
x: Vec<f64>,
y: Vec<f64>,
}
pub struct PhotonDb {
xs_tables: HashMap<(u8, Process), XsTable>,
form_factors: HashMap<u8, TabTable>,
scattering_fns: HashMap<u8, TabTable>,
}
unsafe impl Send for PhotonDb {}
unsafe impl Sync for PhotonDb {}
impl PhotonDb {
pub fn open(meta_dir: impl AsRef<Path>) -> crate::Result<Self> {
let meta = meta_dir.as_ref();
let xs_dir = meta.join("epdl97").join("photon_xs");
let ff_dir = meta.join("epdl97").join("form_factors");
let sf_dir = meta.join("epdl97").join("scattering_fn");
let xs_tables = Self::load_xs_tables(&xs_dir)?;
let form_factors = Self::load_tab_tables(&ff_dir, "form_factor")?;
let scattering_fns = Self::load_tab_tables(&sf_dir, "scattering_fn")?;
Ok(Self {
xs_tables,
form_factors,
scattering_fns,
})
}
pub fn from_bytes(data: &[u8]) -> crate::Result<Self> {
let bytes = bytes::Bytes::from(data.to_vec());
let mut xs_tables = HashMap::new();
Self::parse_xs_into(bytes, &mut xs_tables)?;
Ok(Self {
xs_tables,
form_factors: HashMap::new(),
scattering_fns: HashMap::new(),
})
}
#[inline]
pub fn cross_section(&self, z: u8, energy_mev: f64, process: Process) -> f64 {
match self.xs_tables.get(&(z, process)) {
Some(table) => log_log_interp(&table.energy, &table.xs, energy_mev),
None => 0.0,
}
}
#[inline]
pub fn all_cross_sections(&self, z: u8, energy_mev: f64) -> [f64; 4] {
[
self.cross_section(z, energy_mev, Process::Photoelectric),
self.cross_section(z, energy_mev, Process::Incoherent),
self.cross_section(z, energy_mev, Process::Coherent),
self.cross_section(z, energy_mev, Process::PairTotal),
]
}
#[inline]
pub fn total_cross_section(&self, z: u8, energy_mev: f64) -> f64 {
self.cross_section(z, energy_mev, Process::Total)
}
#[inline]
pub fn form_factor(&self, z: u8, q: f64) -> f64 {
match self.form_factors.get(&z) {
Some(table) => log_log_interp(&table.x, &table.y, q),
None => 0.0,
}
}
#[inline]
pub fn scattering_function(&self, z: u8, q: f64) -> f64 {
match self.scattering_fns.get(&z) {
Some(table) => log_log_interp(&table.x, &table.y, q),
None => 0.0,
}
}
pub fn has_element(&self, z: u8) -> bool {
self.xs_tables.contains_key(&(z, Process::Total))
}
pub fn num_elements(&self) -> usize {
self.xs_tables
.keys()
.filter(|(_, p)| *p == Process::Total)
.count()
}
fn load_xs_tables(dir: &Path) -> crate::Result<HashMap<(u8, Process), XsTable>> {
let mut tables = HashMap::new();
if !dir.exists() {
return Err(Error::DataDirNotFound(dir.to_path_buf()));
}
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("parquet") {
continue;
}
if path
.file_name()
.and_then(|n| n.to_str())
.is_some_and(|n| n.starts_with("._"))
{
continue;
}
let file = fs::File::open(&path)?;
Self::parse_xs_into(file, &mut tables)?;
}
Ok(tables)
}
fn parse_xs_into(
reader_source: impl parquet::file::reader::ChunkReader + 'static,
tables: &mut HashMap<(u8, Process), XsTable>,
) -> crate::Result<()> {
let reader = ParquetRecordBatchReaderBuilder::try_new(reader_source)?.build()?;
let mut process_data: HashMap<Process, (Vec<f64>, Vec<f64>)> = HashMap::new();
let mut z_val: Option<u8> = None;
let sentinel = std::path::PathBuf::from("<bytes>");
for batch in reader {
let batch = batch?;
let z_col = batch
.column_by_name("Z")
.ok_or_else(|| Error::MissingColumn {
file: sentinel.clone(),
column: "Z".into(),
})?
.as_any()
.downcast_ref::<Int32Array>()
.ok_or_else(|| Error::MissingColumn {
file: sentinel.clone(),
column: "Z (wrong type)".into(),
})?;
let e_col = batch
.column_by_name("energy_MeV")
.ok_or_else(|| Error::MissingColumn {
file: sentinel.clone(),
column: "energy_MeV".into(),
})?
.as_any()
.downcast_ref::<Float64Array>()
.ok_or_else(|| Error::MissingColumn {
file: sentinel.clone(),
column: "energy_MeV (wrong type)".into(),
})?;
let proc_col = batch
.column_by_name("process")
.ok_or_else(|| Error::MissingColumn {
file: sentinel.clone(),
column: "process".into(),
})?;
let proc_values =
crate::interp::as_string_array(proc_col).ok_or_else(|| Error::MissingColumn {
file: sentinel.clone(),
column: "process (wrong type)".into(),
})?;
let xs_col = batch
.column_by_name("xs_barns")
.ok_or_else(|| Error::MissingColumn {
file: sentinel.clone(),
column: "xs_barns".into(),
})?
.as_any()
.downcast_ref::<Float64Array>()
.ok_or_else(|| Error::MissingColumn {
file: sentinel.clone(),
column: "xs_barns (wrong type)".into(),
})?;
#[allow(clippy::needless_range_loop)]
for i in 0..batch.num_rows() {
if z_val.is_none() {
z_val = Some(z_col.value(i) as u8);
}
let proc_str = match proc_values[i] {
Some(s) => s,
None => continue,
};
if let Some(process) = Process::from_str(proc_str) {
let entry = process_data.entry(process).or_default();
entry.0.push(e_col.value(i));
entry.1.push(xs_col.value(i));
}
}
}
if let Some(z) = z_val {
for (process, (mut energy, mut xs)) in process_data {
let mut indices: Vec<usize> = (0..energy.len()).collect();
indices.sort_by(|&a, &b| energy[a].partial_cmp(&energy[b]).unwrap());
let sorted_e: Vec<f64> = indices.iter().map(|&i| energy[i]).collect();
let sorted_xs: Vec<f64> = indices.iter().map(|&i| xs[i]).collect();
energy = sorted_e;
xs = sorted_xs;
tables.insert((z, process), XsTable { energy, xs });
}
}
Ok(())
}
fn load_tab_tables(dir: &Path, value_col_name: &str) -> crate::Result<HashMap<u8, TabTable>> {
let mut tables = HashMap::new();
if !dir.exists() {
return Ok(tables); }
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) != Some("parquet") {
continue;
}
if path
.file_name()
.and_then(|n| n.to_str())
.is_some_and(|n| n.starts_with("._"))
{
continue;
}
let file = fs::File::open(&path)?;
let reader = ParquetRecordBatchReaderBuilder::try_new(file)?.build()?;
let mut x_vals = Vec::new();
let mut y_vals = Vec::new();
let mut z_val: Option<u8> = None;
for batch in reader {
let batch = batch?;
let z_col = batch
.column_by_name("Z")
.and_then(|c| c.as_any().downcast_ref::<Int32Array>());
let x_col = batch
.column_by_name("momentum_transfer")
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
let y_col = batch
.column_by_name(value_col_name)
.and_then(|c| c.as_any().downcast_ref::<Float64Array>());
if let (Some(z), Some(x), Some(y)) = (z_col, x_col, y_col) {
#[allow(clippy::needless_range_loop)]
for i in 0..batch.num_rows() {
if z_val.is_none() {
z_val = Some(z.value(i) as u8);
}
x_vals.push(x.value(i));
y_vals.push(y.value(i));
}
}
}
if let Some(z) = z_val {
let mut indices: Vec<usize> = (0..x_vals.len()).collect();
indices.sort_by(|&a, &b| x_vals[a].partial_cmp(&x_vals[b]).unwrap());
let sorted_x: Vec<f64> = indices.iter().map(|&i| x_vals[i]).collect();
let sorted_y: Vec<f64> = indices.iter().map(|&i| y_vals[i]).collect();
tables.insert(
z,
TabTable {
x: sorted_x,
y: sorted_y,
},
);
}
}
Ok(tables)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn data_meta_dir() -> std::path::PathBuf {
std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("..")
.join("..")
.join("..")
.join("data")
.join("meta")
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn from_bytes_matches_open() {
let db_file = PhotonDb::open(data_meta_dir()).unwrap();
let xs_dir = data_meta_dir().join("epdl97").join("photon_xs");
let first_file = std::fs::read_dir(&xs_dir)
.unwrap()
.filter_map(|e| e.ok())
.find(|e| {
let p = e.path();
p.extension().and_then(|x| x.to_str()) == Some("parquet")
&& !p
.file_name()
.and_then(|n| n.to_str())
.is_some_and(|n| n.starts_with("._"))
})
.expect("at least one photon_xs file");
let data = std::fs::read(first_file.path()).unwrap();
let db_bytes = PhotonDb::from_bytes(&data).unwrap();
for z in 1..=100u8 {
if db_bytes.has_element(z) {
let xs_file = db_file.cross_section(z, 0.511, Process::Total);
let xs_bytes = db_bytes.cross_section(z, 0.511, Process::Total);
assert!(
(xs_file - xs_bytes).abs() < 1e-12,
"Z={z} total XS mismatch: {xs_file} vs {xs_bytes}"
);
break;
}
}
}
#[test]
#[ignore = "requires nucl-parquet data files"]
fn load_and_query() {
let db = PhotonDb::open("../../meta").unwrap();
assert!(db.num_elements() >= 90);
assert!(db.has_element(29));
let pe = db.cross_section(29, 0.511, Process::Photoelectric);
assert!(pe > 0.1 && pe < 1.0, "Cu PE at 511 keV: {pe}");
let compton = db.cross_section(29, 0.511, Process::Incoherent);
assert!(
compton > 5.0 && compton < 15.0,
"Cu Compton at 511 keV: {compton}"
);
let ff0 = db.form_factor(29, 0.0001); assert!(ff0 > 28.0 && ff0 < 30.0, "Cu FF(0): {ff0}");
let [pe, comp, coh, pair] = db.all_cross_sections(29, 0.511);
assert!(pe > 0.0);
assert!(comp > 0.0);
assert!(coh > 0.0);
assert!(pair == 0.0, "no pair production at 511 keV");
}
}