use crate::error::GeddesError;
use quick_xml::events::Event;
use quick_xml::Reader;
use std::io::{BufRead, BufReader, Read, Seek};
use zip::ZipArchive;
#[derive(Debug)]
pub struct ParsedData {
pub x: Vec<f64>,
pub y: Vec<f64>,
pub e: Option<Vec<f64>>,
}
fn parse_columns(parts: &[&str], x: &mut Vec<f64>, y: &mut Vec<f64>, e: &mut Vec<f64>) {
if parts.len() >= 2 {
if let (Ok(val_x), Ok(val_y)) = (parts[0].parse::<f64>(), parts[1].parse::<f64>()) {
x.push(val_x);
y.push(val_y);
if parts.len() >= 3 {
if let Ok(val_e) = parts[2].parse::<f64>() {
e.push(val_e);
}
}
}
}
}
pub fn parse_xy<R: Read>(reader: R) -> Result<ParsedData, GeddesError> {
let reader = BufReader::new(reader);
let mut x = Vec::new();
let mut y = Vec::new();
let mut e = Vec::new();
for line in reader.lines() {
let line = line?;
let line = line.trim();
if line.is_empty() || line.starts_with('#') || line.starts_with('!') {
continue;
}
let parts: Vec<&str> = line.split_whitespace().collect();
parse_columns(&parts, &mut x, &mut y, &mut e);
}
let has_error = !e.is_empty() && e.len() == x.len();
Ok(ParsedData {
x,
y,
e: if has_error { Some(e) } else { None },
})
}
pub fn parse_csv<R: Read>(reader: R) -> Result<ParsedData, GeddesError> {
let reader = BufReader::new(reader);
let mut x = Vec::new();
let mut y = Vec::new();
let mut e = Vec::new();
for line in reader.lines() {
let line = line?;
let line = line.trim();
if line.is_empty() || line.starts_with('#') || line.starts_with('!') {
continue;
}
let parts: Vec<&str> = line
.split(|c: char| c == ',' || c.is_whitespace())
.map(|p| p.trim())
.filter(|p| !p.is_empty())
.collect();
parse_columns(&parts, &mut x, &mut y, &mut e);
}
let has_error = !e.is_empty() && e.len() == x.len();
Ok(ParsedData {
x,
y,
e: if has_error { Some(e) } else { None },
})
}
pub fn parse_rasx<R: Read + Seek>(reader: R) -> Result<ParsedData, GeddesError> {
let mut archive = ZipArchive::new(reader)?;
let names: Vec<String> = (0..archive.len())
.filter_map(|i| archive.by_index(i).ok().map(|f| f.name().to_string()))
.collect();
let profile_name = names
.iter()
.find(|n| n.as_str() == "Data0/Profile0.txt")
.or_else(|| {
names
.iter()
.find(|n| n.contains("Profile") && n.ends_with(".txt"))
})
.ok_or_else(|| GeddesError::FileNotFoundInArchive("Profile*.txt".to_string()))?;
let file = archive.by_name(profile_name)?;
let reader = BufReader::new(file);
let mut x = Vec::new();
let mut y = Vec::new();
for line in reader.lines() {
let line = line?;
let line = line.trim();
if line.is_empty() {
continue;
}
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() >= 2 {
if let (Ok(val_x), Ok(val_y)) = (parts[0].parse::<f64>(), parts[1].parse::<f64>()) {
x.push(val_x);
y.push(val_y);
}
}
}
Ok(ParsedData { x, y, e: None })
}
pub fn parse_xrdml<R: Read>(reader: R) -> Result<ParsedData, GeddesError> {
let reader = BufReader::new(reader);
let mut xml = Reader::from_reader(reader);
xml.config_mut().trim_text(true);
let mut buf = Vec::new();
let mut intensities = Vec::new();
let mut in_intensities = false;
let mut in_positions_2theta = false;
let mut capture_start = false;
let mut capture_end = false;
let mut start_pos: Option<f64> = None;
let mut end_pos: Option<f64> = None;
loop {
match xml.read_event_into(&mut buf) {
Ok(Event::Start(e)) => match e.local_name().as_ref() {
b"positions" => {
in_positions_2theta = false;
for attr in e.attributes() {
let attr = attr.map_err(|err| {
GeddesError::Parse(format!("XRDML attribute error: {err}"))
})?;
if attr.key.as_ref() == b"axis" {
let axis = attr
.unescape_value()
.map_err(|err| {
GeddesError::Parse(format!(
"XRDML attribute decode error: {err}"
))
})?
.into_owned();
if axis == "2Theta" {
in_positions_2theta = true;
}
}
}
}
b"startPosition" => {
if in_positions_2theta {
capture_start = true;
}
}
b"endPosition" => {
if in_positions_2theta {
capture_end = true;
}
}
b"intensities" => {
in_intensities = true;
}
_ => {}
},
Ok(Event::Text(e)) => {
let text = e
.decode()
.map_err(|err| GeddesError::Parse(format!("XRDML text decode error: {err}")))?;
let text = text.trim();
if text.is_empty() {
} else if capture_start {
start_pos = Some(text.parse::<f64>().map_err(|_| {
GeddesError::Parse("XRDML invalid 2Theta start position".into())
})?);
} else if capture_end {
end_pos = Some(text.parse::<f64>().map_err(|_| {
GeddesError::Parse("XRDML invalid 2Theta end position".into())
})?);
} else if in_intensities {
for part in text.split_whitespace() {
if let Ok(value) = part.parse::<f64>() {
intensities.push(value);
}
}
}
}
Ok(Event::End(e)) => match e.local_name().as_ref() {
b"positions" => {
in_positions_2theta = false;
}
b"startPosition" => {
capture_start = false;
}
b"endPosition" => {
capture_end = false;
}
b"intensities" => {
in_intensities = false;
if !intensities.is_empty() && start_pos.is_some() && end_pos.is_some() {
break;
}
}
_ => {}
},
Ok(Event::Eof) => break,
Err(err) => {
return Err(GeddesError::Parse(format!("XRDML parse error: {err}")));
}
_ => {}
}
buf.clear();
}
let start = start_pos
.ok_or_else(|| GeddesError::Parse("XRDML missing 2Theta start position".into()))?;
let end =
end_pos.ok_or_else(|| GeddesError::Parse("XRDML missing 2Theta end position".into()))?;
if intensities.is_empty() {
return Err(GeddesError::Parse("XRDML intensities not found".into()));
}
let mut x = Vec::with_capacity(intensities.len());
if intensities.len() == 1 {
x.push(start);
} else {
let step = (end - start) / (intensities.len() as f64 - 1.0);
for i in 0..intensities.len() {
x.push(start + (i as f64) * step);
}
}
Ok(ParsedData {
x,
y: intensities,
e: None,
})
}
pub fn parse_gsas_raw<R: Read>(reader: R) -> Result<ParsedData, GeddesError> {
let reader = BufReader::new(reader);
let mut lines = reader.lines();
let mut start = 0.0;
let mut step = 0.0;
let mut header_found = false;
for line_res in lines.by_ref() {
let line = line_res?;
if line.starts_with("BANK") {
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() >= 7 {
let start_raw = parts[5]
.parse::<f64>()
.map_err(|_| GeddesError::Parse("Invalid start".into()))?;
let step_raw = parts[6]
.parse::<f64>()
.map_err(|_| GeddesError::Parse("Invalid step".into()))?;
start = start_raw / 100.0;
step = step_raw / 100.0;
header_found = true;
break;
}
}
}
if !header_found {
return Err(GeddesError::Parse(
"BANK header not found in RAW file".into(),
));
}
let mut y = Vec::new();
for line in lines {
let line = line?;
if line.starts_with("BANK") {
break;
}
let parts = line.split_whitespace();
for part in parts {
if let Ok(val) = part.parse::<f64>() {
y.push(val);
}
}
}
let mut x = Vec::with_capacity(y.len());
for i in 0..y.len() {
x.push(start + (i as f64) * step);
}
Ok(ParsedData { x, y, e: None })
}
pub fn parse_bruker_raw<R: Read>(_reader: R) -> Result<ParsedData, GeddesError> {
Err(GeddesError::Parse(
"Bruker binary RAW format not yet supported".into(),
))
}