use nom::bytes::complete::tag;
use nom::bytes::complete::take;
use nom::number::complete::{be_f32, be_u16, be_u32, be_u8};
use nom::sequence::tuple;
use nom::IResult;
use std::convert::TryInto;
use thiserror::Error;
#[derive(Error, Debug, PartialEq)]
pub enum UcsfError {
#[error("Unsupported format version. Currently the parser only supports format version 2.")]
UnsupportedFormat,
#[error("Unsupported number of components. Currently the parser only supports files with a number of1 components per data point (= Real).")]
UnsupportedComponents,
#[error("Failed to parse")]
Parsing,
}
#[derive(Debug, Clone)]
pub struct UcsfFile {
pub header: Header,
pub axis_headers: Vec<AxisHeader>,
pub data: Vec<f32>,
}
impl UcsfFile {
fn calculate_data_size(axis_headers: &[AxisHeader]) -> usize {
axis_headers
.iter()
.map(|axis| axis.data_points as usize)
.product::<usize>()
* 4
}
fn parse_data_raw(input: &[u8], size: usize) -> IResult<&[u8], &[u8]> {
take(size)(input)
}
pub fn parse(input: &[u8]) -> Result<(&[u8], Self), UcsfError> {
let (mut rem, header) = Header::parse(&input)?;
let mut axis_headers = vec![];
for _ in 0..header.dimensions {
let (_rem, axis_header) = AxisHeader::parse(&rem)?;
rem = _rem;
axis_headers.push(axis_header);
}
let data_size = Self::calculate_data_size(&axis_headers);
let (rem, data) = Self::parse_data_raw(rem, data_size).map_err(|_| UcsfError::Parsing)?;
let float_data: Vec<f32> = data
.chunks(4)
.map(|chunk| f32::from_be_bytes(chunk.try_into().unwrap()))
.collect();
Ok((
rem,
Self {
header,
axis_headers,
data: float_data,
},
))
}
pub fn axis_data_points(&self, axis: usize) -> u32 {
self.axis_headers[axis].data_points
}
pub fn axis_tiles(&self, axis: usize) -> u32 {
self.axis_headers[axis].num_tiles()
}
pub fn axis_tile_size(&self, axis: usize) -> u32 {
self.axis_headers[axis].tile_size
}
pub fn tiles(&self) -> Tiles<'_> {
Tiles::for_file(&self)
}
pub fn axis_sizes(&self) -> Vec<usize> {
self.axis_headers
.iter()
.map(|axis| axis.data_points as usize)
.collect()
}
pub fn data_continous(&self) -> Vec<f32> {
let total_size = self.data.len();
let mut data = [0f32].repeat(total_size);
for tile in self.tiles() {
for ((i_axis_1, i_axis_2), value) in tile.iter_with_abolute_pos() {
let pos = i_axis_1 * (self.axis_data_points(1) as usize) + i_axis_2;
data[pos] = value;
}
}
data
}
pub fn bounds(&self) -> (f32, f32) {
let mut sorted_data = self.data.to_vec();
sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap());
let min_val: f32 = *sorted_data.first().unwrap();
let max_val: f32 = *sorted_data.last().unwrap();
(min_val, max_val)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Header {
pub dimensions: u8,
pub components: u8,
pub format_version: u16,
pub remainder: Vec<u8>,
}
impl Header {
fn parse_raw(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8], u8, u8, u16, &[u8])> {
tuple((
tag(b"UCSF NMR"),
take(2u8),
be_u8,
be_u8,
be_u16,
take(166u8),
))(input)
}
pub fn parse(input: &[u8]) -> Result<(&[u8], Self), UcsfError> {
let (rem, res) = Self::parse_raw(input).map_err(|_| UcsfError::Parsing)?;
let map = |(
_magic_string,
_magic_strimg_rem,
dimensions,
components,
format_version,
remainder,
): (_, _, _, _, _, &[u8])| {
if components != 1 {
return Err(UcsfError::UnsupportedComponents);
}
if format_version != 2 {
return Err(UcsfError::UnsupportedFormat);
}
Ok((
rem,
Self {
dimensions,
components,
format_version,
remainder: remainder.to_vec(),
},
))
};
map(res)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct AxisHeader {
pub nucleus_name: String,
pub data_points: u32,
pub tile_size: u32,
pub frequency: f32,
pub spectral_width: f32,
pub center: f32,
pub remainder: Vec<u8>,
}
impl AxisHeader {
fn parse_raw(input: &[u8]) -> IResult<&[u8], (&[u8], u32, &[u8], u32, f32, f32, f32, &[u8])> {
tuple((
take(8u8),
be_u32,
take(4u8),
be_u32,
be_f32,
be_f32,
be_f32,
take(96u8),
))(input)
}
pub fn parse(input: &[u8]) -> Result<(&[u8], Self), UcsfError> {
let (rem, res) = Self::parse_raw(input).map_err(|_| UcsfError::Parsing)?;
let map = |(
nucleus_name,
data_points,
_unknown,
tile_size,
frequency,
spectral_width,
center,
remainder,
): (&[u8], _, _, _, _, _, _, &[u8])| {
let nucleus_name =
String::from_utf8_lossy(nucleus_name.split(|n| *n == 0u8).next().unwrap())
.trim_end()
.to_owned();
Ok((
rem,
Self {
nucleus_name,
data_points,
tile_size,
frequency,
spectral_width,
center,
remainder: remainder.to_vec(),
},
))
};
map(res)
}
pub fn num_tiles(&self) -> u32 {
self.data_points / self.tile_size
}
}
pub struct Tile<'a> {
pub axis_1_len: usize,
pub axis_2_len: usize,
pub axis_1_start: usize,
pub axis_2_start: usize,
pub data: &'a [f32],
}
impl<'a> Tile<'a> {
pub fn data(&self) -> &[f32] {
&self.data
}
pub fn iter_with_abolute_pos(&self) -> AbsolutePosValIter<'_> {
AbsolutePosValIter {
tile: self,
next_index: 0,
}
}
}
pub struct AbsolutePosValIter<'a> {
tile: &'a Tile<'a>,
next_index: usize,
}
impl<'a> Iterator for AbsolutePosValIter<'a> {
type Item = ((usize, usize), f32);
fn next(&mut self) -> Option<Self::Item> {
if self.next_index >= self.tile.data().len() {
return None;
}
let axis_1_rel = self.next_index / self.tile.axis_2_len;
let axis_2_rel = self.next_index % self.tile.axis_2_len;
let axis_1_abs = axis_1_rel + self.tile.axis_1_start;
let axis_2_abs = axis_2_rel + self.tile.axis_2_start;
let val = self.tile.data()[self.next_index];
self.next_index += 1;
Some(((axis_1_abs, axis_2_abs), val))
}
}
pub struct Tiles<'a> {
next_index: usize,
file: &'a UcsfFile,
}
impl<'a> Tiles<'a> {
pub fn for_file(file: &'a UcsfFile) -> Self {
Self {
next_index: 0,
file,
}
}
}
impl<'a> Iterator for Tiles<'a> {
type Item = Tile<'a>;
fn next(&mut self) -> Option<Self::Item> {
let tiles_axis_1 = self.file.axis_tiles(0) as usize;
let tiles_axis_2 = self.file.axis_tiles(1) as usize;
let tiles_total = tiles_axis_1 * tiles_axis_2;
if tiles_total <= self.next_index {
return None;
}
let tile_index_1 = self.next_index / tiles_axis_2;
let tile_index_2 = self.next_index % tiles_axis_2;
let axis_1_len = self.file.axis_tile_size(0) as usize;
let axis_2_len = self.file.axis_tile_size(1) as usize;
let axis_1_start = axis_1_len * tile_index_1;
let axis_2_start = axis_2_len * tile_index_2;
let tile_data_points = axis_1_len * axis_2_len;
let data_range_start = tile_data_points * self.next_index;
let data_range_end = data_range_start + tile_data_points;
self.next_index += 1;
Some(Tile {
axis_1_len,
axis_2_len,
axis_1_start,
axis_2_start,
data: &self.file.data[data_range_start..data_range_end],
})
}
}