use std::path::{Path, PathBuf};
pub const COMMENT_CHARS: &str = "#;%*!$";
#[derive(Clone, Debug, Default)]
pub struct ColumnFile {
pub path: Option<PathBuf>,
pub header: Vec<String>,
pub labels: Vec<String>,
pub columns: Vec<Vec<f64>>,
}
#[derive(Debug)]
pub enum ReadError {
Io(std::io::Error),
NoData,
}
impl std::fmt::Display for ReadError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ReadError::Io(e) => write!(f, "I/O error: {e}"),
ReadError::NoData => write!(f, "no numeric data rows found"),
}
}
}
impl std::error::Error for ReadError {}
impl From<std::io::Error> for ReadError {
fn from(e: std::io::Error) -> Self {
ReadError::Io(e)
}
}
impl ColumnFile {
pub fn from_path(path: impl AsRef<Path>) -> Result<Self, ReadError> {
let path = path.as_ref();
let text = std::fs::read_to_string(path)?;
let mut cf = Self::from_text(&text)?;
cf.path = Some(path.to_path_buf());
Ok(cf)
}
pub fn from_text(text: &str) -> Result<Self, ReadError> {
let lines: Vec<&str> = text.lines().collect();
let mut first_data: Option<usize> = None;
let mut rows: Vec<Vec<f64>> = Vec::new();
for (i, line) in lines.iter().enumerate() {
match parse_floats(line) {
Some(vals) if !vals.is_empty() => {
if first_data.is_none() {
first_data = Some(i);
}
rows.push(vals);
}
_ => {
if first_data.is_some() {
break; }
}
}
}
let first_data = first_data.ok_or(ReadError::NoData)?;
let header: Vec<String> = lines[..first_data].iter().map(|s| s.to_string()).collect();
let ncols = rows[0].len();
let mut columns: Vec<Vec<f64>> = vec![Vec::with_capacity(rows.len()); ncols];
for row in &rows {
if row.len() < ncols {
continue; }
for (c, col) in columns.iter_mut().enumerate() {
col.push(row[c]);
}
}
let labels = guess_labels(&header, ncols);
Ok(Self {
path: None,
header,
labels,
columns,
})
}
pub fn ncols(&self) -> usize {
self.columns.len()
}
pub fn nrows(&self) -> usize {
self.columns.first().map_or(0, |c| c.len())
}
pub fn column(&self, index: usize) -> Option<&[f64]> {
self.columns.get(index).map(|c| c.as_slice())
}
pub fn label_index(&self, name: &str) -> Option<usize> {
self.labels
.iter()
.position(|l| l.eq_ignore_ascii_case(name))
}
pub fn guess_roles(&self) -> RoleGuess {
let find = |keys: &[&str]| -> Option<usize> {
self.labels.iter().position(|l| {
let l = l.to_ascii_lowercase();
keys.iter().any(|k| l == *k)
})
};
let find_contains = |keys: &[&str]| -> Option<usize> {
self.labels.iter().position(|l| {
let l = l.to_ascii_lowercase();
keys.iter().any(|k| l.contains(k))
})
};
RoleGuess {
energy: find(&["energy", "e", "col1"]).or_else(|| find_contains(&["energy"])),
i0: find(&["i0", "io"]).or_else(|| find_contains(&["i0"])),
it: find(&["it", "itrans", "i1", "trans"])
.or_else(|| find_contains(&["itrans", "trans"])),
iflu: find(&["if", "iflu", "ifluor"])
.or_else(|| find_contains(&["fluor", "fluo", "iff"])),
iref: find(&["iref", "i2", "iref2"]).or_else(|| find_contains(&["iref"])),
mu: find(&["mu", "xmu", "mutrans", "norm"])
.or_else(|| find_contains(&["mutrans", "xmu"])),
}
}
}
pub fn read_chi_dat(path: impl AsRef<Path>) -> Result<(Vec<f64>, Vec<f64>), ReadError> {
let cf = ColumnFile::from_path(path)?;
let k = cf.column(0).ok_or(ReadError::NoData)?.to_vec();
let chi = cf.column(1).ok_or(ReadError::NoData)?.to_vec();
Ok((k, chi))
}
#[derive(Clone, Copy, Debug, Default)]
pub struct RoleGuess {
pub energy: Option<usize>,
pub i0: Option<usize>,
pub it: Option<usize>,
pub iflu: Option<usize>,
pub iref: Option<usize>,
pub mu: Option<usize>,
}
fn parse_floats(line: &str) -> Option<Vec<f64>> {
let trimmed = line.trim();
if trimmed.is_empty() {
return None;
}
if let Some(c) = trimmed.chars().next()
&& COMMENT_CHARS.contains(c)
{
return None;
}
let mut out = Vec::new();
for tok in trimmed
.split([' ', '\t', ',', '\r'])
.filter(|t| !t.is_empty())
{
out.push(tok.parse::<f64>().ok()?);
}
if out.is_empty() { None } else { Some(out) }
}
fn strip_comment(line: &str) -> &str {
let t = line.trim_start();
let t = t.strip_prefix(|c| COMMENT_CHARS.contains(c)).unwrap_or(t);
t.trim()
}
fn guess_labels(header: &[String], ncols: usize) -> Vec<String> {
let mut tagged: Vec<Option<String>> = vec![None; ncols];
let mut any_tag = false;
for line in header {
let body = strip_comment(line);
if let Some(rest) = body.strip_prefix("Column.")
&& let Some((num, after)) = rest.split_once(':')
&& let Ok(n) = num.trim().parse::<usize>()
&& (1..=ncols).contains(&n)
{
let name = after.split("||").next().unwrap_or("").trim();
if let Some(first) = name.split_whitespace().next() {
tagged[n - 1] = Some(first.to_string());
any_tag = true;
}
}
}
if any_tag && tagged.iter().all(|t| t.is_some()) {
return tagged.into_iter().map(|t| t.unwrap()).collect();
}
if let Some(last) = header.iter().rev().find(|l| !strip_comment(l).is_empty()) {
let toks: Vec<&str> = strip_comment(last).split_whitespace().collect();
if toks.len() == ncols {
return toks.into_iter().map(|s| s.to_string()).collect();
}
}
(1..=ncols).map(|i| format!("col{i}")).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_floats_classifies_lines() {
assert_eq!(parse_floats("1.0 2.0 3.0"), Some(vec![1.0, 2.0, 3.0]));
assert_eq!(parse_floats(" .5e1,\t-2 "), Some(vec![5.0, -2.0]));
assert_eq!(parse_floats(".8786204E+04"), Some(vec![8786.204]));
assert!(parse_floats("# energy mu").is_none());
assert!(parse_floats("; a comment").is_none());
assert!(parse_floats("").is_none());
assert!(parse_floats("energy mu").is_none());
}
#[test]
fn parses_semicolon_commented_file_with_last_line_labels() {
let text = "; scan 1\n; I0 sensitivity = 5\n; energy i0 it\n\
100.0 10.0 5.0\n200.0 20.0 8.0\n";
let cf = ColumnFile::from_text(text).unwrap();
assert_eq!(cf.ncols(), 3);
assert_eq!(cf.nrows(), 2);
assert_eq!(cf.labels, vec!["energy", "i0", "it"]);
assert_eq!(cf.column(0), Some([100.0, 200.0].as_slice()));
assert_eq!(cf.label_index("I0"), Some(1));
}
#[test]
fn xdi_column_tags_win_over_last_header_line() {
let text = "# XDI/1.0\n# Column.1: energy eV\n# Column.2: i0\n# Column.3: itrans\n\
# something else entirely\n10.0 1.0 0.5\n20.0 2.0 0.9\n";
let cf = ColumnFile::from_text(text).unwrap();
assert_eq!(cf.labels, vec!["energy", "i0", "itrans"]);
}
#[test]
fn multichannel_xdi_drops_pv_suffix() {
let text = "# Column.1: Energy eV || BL:En.VAL\n# Column.2: I0 counts || BL:S2\n\
# Column.3: mca1 counts || BL:M1\n# Column.4: mca2 counts || BL:M2\n\
1.0 100.0 3.0 4.0\n2.0 200.0 6.0 8.0\n";
let cf = ColumnFile::from_text(text).unwrap();
assert_eq!(cf.labels, vec!["Energy", "I0", "mca1", "mca2"]);
let roles = cf.guess_roles();
assert_eq!(roles.energy, Some(0));
assert_eq!(roles.i0, Some(1));
}
#[test]
fn fallback_labels_when_unguessable() {
let text = "1 2 3\n4 5 6\n";
let cf = ColumnFile::from_text(text).unwrap();
assert_eq!(cf.labels, vec!["col1", "col2", "col3"]);
}
#[test]
fn footer_after_data_is_ignored() {
let text = "# e mu\n1.0 0.1\n2.0 0.2\n# end of scan\nnot data\n";
let cf = ColumnFile::from_text(text).unwrap();
assert_eq!(cf.nrows(), 2);
assert_eq!(cf.ncols(), 2);
}
#[test]
fn read_chi_dat_skips_feff_header_and_takes_k_chi() {
let text = "# Some FEFF header\n# Mu=-0.6 kf=2.1\n\
# -----------\n# k chi mag phase\n\
0.0500 2.705808E-01 2.719035E-01 1.472117E+00\n\
0.1000 -2.710386E-01 2.721822E-01 1.479092E+00\n";
let cf = ColumnFile::from_text(text).unwrap();
assert_eq!(cf.ncols(), 4);
assert_eq!(cf.column(0), Some([0.05, 0.10].as_slice()));
assert_eq!(cf.column(1), Some([0.2705808, -0.2710386].as_slice()));
let path = std::env::temp_dir().join("xasdata_read_chi_dat_test.dat");
std::fs::write(&path, text).unwrap();
let (k, chi) = read_chi_dat(&path).unwrap();
std::fs::remove_file(&path).ok();
assert_eq!(k, vec![0.05, 0.10]);
assert_eq!(chi, vec![0.2705808, -0.2710386]);
}
}