use nom::bytes::complete::tag;
use nom::bytes::complete::take;
use nom::number::complete::{be_f32, be_u16, be_u32, be_u8};
use nom::sequence::tuple;
use nom::IResult;
use std::convert::TryInto;
use thiserror::Error;
#[derive(Error, Debug, PartialEq)]
pub enum UcsfError {
#[error("Unsupported format version. Currently the parser only supports format version 2.")]
UnsupportedFormat,
#[error("Unsupported number of components. Currently the parser only supports files with a number of1 components per data point (= Real).")]
UnsupportedComponents,
#[error("Failed to parse")]
Parsing,
}
#[derive(Debug, Clone)]
pub struct UcsfFile {
pub header: Header,
pub axis_headers: Vec<AxisHeader>,
pub data: Vec<f32>,
}
impl UcsfFile {
fn calculate_data_size(axis_headers: &[AxisHeader]) -> usize {
axis_headers
.iter()
.map(|axis| axis.padded_size() as usize)
.product::<usize>()
* 4
}
fn parse_data_raw(input: &[u8], size: usize) -> IResult<&[u8], &[u8]> {
take(size)(input)
}
pub fn parse(input: &[u8]) -> Result<(&[u8], Self), UcsfError> {
let (mut rem, header) = Header::parse(&input)?;
let mut axis_headers = vec![];
for _ in 0..header.dimensions {
let (_rem, axis_header) = AxisHeader::parse(&rem)?;
rem = _rem;
axis_headers.push(axis_header);
}
let data_size = Self::calculate_data_size(&axis_headers);
let (rem, data) = Self::parse_data_raw(rem, data_size).map_err(|_| UcsfError::Parsing)?;
let float_data: Vec<f32> = data
.chunks(4)
.map(|chunk| f32::from_be_bytes(chunk.try_into().unwrap()))
.collect();
Ok((
rem,
Self {
header,
axis_headers,
data: float_data,
},
))
}
pub fn axis_data_points(&self, axis: usize) -> u32 {
self.axis_headers[axis].data_points
}
pub fn tiles(&self) -> Tiles<'_> {
Tiles::for_file(&self)
}
pub fn axis_tiles(&self) -> Vec<usize> {
self.axis_headers
.iter()
.map(|axis| axis.num_tiles() as usize)
.collect()
}
pub fn axis_tile_sizes(&self) -> Vec<usize> {
self.axis_headers
.iter()
.map(|axis| axis.tile_size as usize)
.collect()
}
pub fn axis_sizes(&self) -> Vec<usize> {
self.axis_headers
.iter()
.map(|axis| axis.data_points as usize)
.collect()
}
pub fn data_continous(&self) -> Vec<f32> {
let total_size = Self::calculate_data_size(&self.axis_headers);
let mut data = [0f32].repeat(total_size);
for tile in self.tiles() {
for (axis_indices, value) in tile.iter_with_abolute_pos() {
let pos = multi_dim_position(&self.axis_sizes(), &axis_indices);
data[pos] = value;
}
}
data
}
pub fn bounds(&self) -> (f32, f32) {
let mut sorted_data = self.data.to_vec();
sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap());
let min_val: f32 = *sorted_data.first().unwrap();
let max_val: f32 = *sorted_data.last().unwrap();
(min_val, max_val)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Header {
pub dimensions: u8,
pub components: u8,
pub format_version: u16,
pub remainder: Vec<u8>,
}
impl Header {
fn parse_raw(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8], u8, u8, u16, &[u8])> {
tuple((
tag(b"UCSF NMR"),
take(2u8),
be_u8,
be_u8,
be_u16,
take(166u8),
))(input)
}
pub fn parse(input: &[u8]) -> Result<(&[u8], Self), UcsfError> {
let (rem, res) = Self::parse_raw(input).map_err(|_| UcsfError::Parsing)?;
let map = |(
_magic_string,
_magic_strimg_rem,
dimensions,
components,
format_version,
remainder,
): (_, _, _, _, _, &[u8])| {
if components != 1 {
return Err(UcsfError::UnsupportedComponents);
}
if format_version != 2 {
return Err(UcsfError::UnsupportedFormat);
}
Ok((
rem,
Self {
dimensions,
components,
format_version,
remainder: remainder.to_vec(),
},
))
};
map(res)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct AxisHeader {
pub nucleus_name: String,
pub data_points: u32,
pub tile_size: u32,
pub frequency: f32,
pub spectral_width: f32,
pub center: f32,
pub remainder: Vec<u8>,
}
impl AxisHeader {
fn parse_raw(input: &[u8]) -> IResult<&[u8], (&[u8], u32, &[u8], u32, f32, f32, f32, &[u8])> {
tuple((
take(8u8),
be_u32,
take(4u8),
be_u32,
be_f32,
be_f32,
be_f32,
take(96u8),
))(input)
}
pub fn parse(input: &[u8]) -> Result<(&[u8], Self), UcsfError> {
let (rem, res) = Self::parse_raw(input).map_err(|_| UcsfError::Parsing)?;
let map = |(
nucleus_name,
data_points,
_unknown,
tile_size,
frequency,
spectral_width,
center,
remainder,
): (&[u8], _, _, _, _, _, _, &[u8])| {
let nucleus_name =
String::from_utf8_lossy(nucleus_name.split(|n| *n == 0u8).next().unwrap())
.trim_end()
.to_owned();
Ok((
rem,
Self {
nucleus_name,
data_points,
tile_size,
frequency,
spectral_width,
center,
remainder: remainder.to_vec(),
},
))
};
map(res)
}
pub fn num_tiles(&self) -> u32 {
(self.data_points + self.tile_size - 1) / self.tile_size
}
pub fn padded_size(&self) -> u32 {
self.num_tiles() * self.tile_size
}
pub fn tile_is_padded(&self, tile_n: usize) -> bool {
let num_full_tiles = self.data_points / self.tile_size;
tile_n >= num_full_tiles as usize
}
pub fn tile_padding(&self, tile_n: usize) -> u32 {
match self.tile_is_padded(tile_n) {
false => 0,
true => self.padded_size() - self.data_points,
}
}
}
pub struct Tile<'a> {
pub axis_lengths: Vec<usize>,
pub axis_starts: Vec<usize>,
pub data: &'a [f32],
}
impl<'a> Tile<'a> {
pub fn data(&self) -> &[f32] {
&self.data
}
pub fn iter_with_abolute_pos(&self) -> AbsolutePosValIter<'_> {
AbsolutePosValIter {
tile: self,
next_index: 0,
}
}
}
pub struct AbsolutePosValIter<'a> {
tile: &'a Tile<'a>,
next_index: usize,
}
impl<'a> AbsolutePosValIter<'a> {
pub fn as_2d(&'a mut self) -> AbsolutePosValIter2D<'a> {
AbsolutePosValIter2D { iter: self }
}
pub fn as_3d(&'a mut self) -> AbsolutePosValIter3D<'a> {
AbsolutePosValIter3D { iter: self }
}
pub fn as_4d(&'a mut self) -> AbsolutePosValIter4D<'a> {
AbsolutePosValIter4D { iter: self }
}
}
impl<'a> Iterator for AbsolutePosValIter<'a> {
type Item = (Vec<usize>, f32);
fn next(&mut self) -> Option<Self::Item> {
if self.next_index >= self.tile.data().len() {
return None;
}
let axis_rel = multi_dim_index(&self.tile.axis_lengths, self.next_index);
let axis_abs: Vec<_> = axis_rel
.iter()
.zip(&self.tile.axis_starts)
.map(|(axis_relative, axis_start)| axis_relative + axis_start)
.collect();
let val = self.tile.data()[self.next_index];
self.next_index += 1;
Some(((axis_abs), val))
}
}
pub struct AbsolutePosValIter2D<'a> {
iter: &'a mut AbsolutePosValIter<'a>,
}
impl<'a> Iterator for AbsolutePosValIter2D<'a> {
type Item = ((usize, usize), f32);
fn next(&mut self) -> Option<Self::Item> {
self.iter
.next()
.map(|(axis_abs, val)| ((axis_abs[0], axis_abs[1]), val))
}
}
pub struct AbsolutePosValIter3D<'a> {
iter: &'a mut AbsolutePosValIter<'a>,
}
impl<'a> Iterator for AbsolutePosValIter3D<'a> {
type Item = ((usize, usize, usize), f32);
fn next(&mut self) -> Option<Self::Item> {
self.iter
.next()
.map(|(axis_abs, val)| ((axis_abs[0], axis_abs[1], axis_abs[2]), val))
}
}
pub struct AbsolutePosValIter4D<'a> {
iter: &'a mut AbsolutePosValIter<'a>,
}
impl<'a> Iterator for AbsolutePosValIter4D<'a> {
type Item = ((usize, usize, usize, usize), f32);
fn next(&mut self) -> Option<Self::Item> {
self.iter
.next()
.map(|(axis_abs, val)| ((axis_abs[0], axis_abs[1], axis_abs[2], axis_abs[3]), val))
}
}
pub struct Tiles<'a> {
next_index: usize,
file: &'a UcsfFile,
}
impl<'a> Tiles<'a> {
pub fn for_file(file: &'a UcsfFile) -> Self {
Self {
next_index: 0,
file,
}
}
}
impl<'a> Iterator for Tiles<'a> {
type Item = Tile<'a>;
fn next(&mut self) -> Option<Self::Item> {
let tiles_per_axis = self.file.axis_tiles();
let tiles_total: usize = tiles_per_axis.iter().product();
if tiles_total <= self.next_index {
return None;
}
let tile_indices = multi_dim_index(&tiles_per_axis, self.next_index);
let axis_tile_sizes = self.file.axis_tile_sizes();
let this_tile_axis_lens: Vec<_> = axis_tile_sizes
.iter()
.zip(&tile_indices)
.zip(&self.file.axis_headers)
.map(|((tile_size, tile_index), axis_header)| {
(*tile_size as u32 - axis_header.tile_padding(*tile_index)) as usize
})
.collect();
let axis_starts: Vec<_> = axis_tile_sizes
.iter()
.zip(tile_indices)
.map(|(tile_size, tile_index)| tile_size * tile_index)
.collect();
let tile_data_points: usize = this_tile_axis_lens.iter().product();
let data_range_start = tile_data_points * self.next_index;
let data_range_end = data_range_start + tile_data_points;
self.next_index += 1;
Some(Tile {
axis_lengths: this_tile_axis_lens,
axis_starts,
data: &self.file.data[data_range_start..data_range_end],
})
}
}
fn multi_dim_position(sizes: &[usize], indices: &[usize]) -> usize {
assert!(sizes.len() == indices.len());
let mut pos = 0;
for dim in 0..sizes.len() {
let mut subdimensions_size = 0;
let subdimensions = sizes.len() - (dim + 1);
if subdimensions >= 1 {
subdimensions_size = sizes[(sizes.len() - subdimensions)..(sizes.len())]
.iter()
.product();
}
let dim_index = indices[dim];
match subdimensions_size {
0 => pos += dim_index,
subdimensions_size => pos += dim_index * subdimensions_size,
}
}
pos
}
fn multi_dim_index(sizes: &[usize], pos: usize) -> Vec<usize> {
let mut indices = [0usize].repeat(sizes.len());
match sizes.len() {
2 => {
indices[0] = pos / sizes[1];
indices[1] = pos % sizes[1];
}
3 => {
indices[0] = pos / (sizes[1] * sizes[2]);
indices[1] = (pos % (sizes[1] * sizes[2])) / sizes[2];
indices[2] = (pos % (sizes[1] * sizes[2])) % sizes[2];
}
_ => unimplemented!(),
}
indices
}
#[cfg(test)]
mod test {
#[test]
fn multi_dim_position() {
let f = super::multi_dim_position;
assert_eq!(f(&[3, 3], &[1, 0]), 3);
assert_eq!(f(&[3, 3], &[1, 1]), 4);
assert_eq!(f(&[3, 3], &[1, 2]), 5);
assert_eq!(f(&[3, 3], &[2, 0]), 6);
assert_eq!(f(&[4, 3, 3], &[2, 0, 0]), 18);
assert_eq!(f(&[4, 3, 3], &[3, 2, 1]), 34);
}
#[test]
fn multi_dim_index() {
let f = super::multi_dim_index;
assert_eq!(f(&[4, 3, 3], 18), vec![2, 0, 0]);
assert_eq!(f(&[4, 3, 3], 34), vec![3, 2, 1]);
assert_eq!(f(&[4, 3, 2], 18), vec![3, 0, 0]);
assert_eq!(f(&[4, 3, 2], 19), vec![3, 0, 1]);
assert_eq!(f(&[4, 3, 2], 20), vec![3, 1, 0]);
assert_eq!(f(&[4, 3, 2], 21), vec![3, 1, 1]);
}
}