ame2020/
lib.rs

1//! A parsing library for the [Atomic Mass Evaluation 2020] format
2//!
3//! The data is represented by [`Nuclide`], and the parsing is mostly done by [`Iter`].
4//! The data can be collected into a type that implements [`FromIterator`], such as [`Vec`].
5//!
6//! [Atomic Mass Evaluation 2020]: https://www-nds.iaea.org/amdc/
7//!
8//! # Format
9//!
10//! The format is documented in the preamble of the AME data file itself. This library parses data
11//! formatted like the `mass.mas20` file. The rounded version, and previous versions, such as
12//! AME2016 are incompatible.
13//!
14//! # Examples
15//!
16//! ```no_run
17//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
18//! use ame2020::{Iter, Nuclide};
19//! use std::{fs::File, io::BufReader};
20//!
21//! let file = File::open("mass.mas20")?;
22//! let file = BufReader::new(file);
23//! let iter = Iter::new(file);
24//! let data: Vec<Nuclide> = iter.collect::<Result<_, _>>()?;
25//! # Ok(())
26//! # }
27//! ```
28//!
29//! # Features
30//!
31//! * `serde`: Provide `Serialize` and `Deserialize` implementations for [serde](https://serde.rs).
32//! * `arbitrary`: Provide `Arbitrary` implementations for [arbitrary](https://crates.io/crates/arbitrary), useful for fuzzing.
33#[cfg(feature = "arbitrary")]
34use arbitrary::{Arbitrary, Unstructured};
35use arrayvec::ArrayString;
36#[cfg(feature = "serde")]
37use serde::{Deserialize, Serialize};
38#[cfg(feature = "serde")]
39use std::ops::Not;
40use std::{
41    cmp::Ordering,
42    io::{BufRead, Lines},
43    ops::{ControlFlow, Range},
44};
45
46pub use crate::error::AmeError;
47
48mod error;
49#[cfg(test)]
50mod tests;
51
52/// A value that has a mean and uncertainty.
53///
54/// The data may be an estimate (indicated by `is_estimated`).
55/// If not, they are based on experimental data.
56#[derive(Clone, PartialEq, Debug, Default)]
57#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
58#[cfg_attr(feature = "arbitrary", derive(Arbitrary))]
59pub struct Value {
60    pub mean: f64,
61    pub uncertainty: f64,
62    #[cfg_attr(feature = "serde", serde(default, skip_serializing_if = "Not::not"))]
63    pub is_estimated: bool,
64}
65
66impl PartialOrd for Value {
67    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
68        self.mean.partial_cmp(&other.mean)
69    }
70}
71
72/// A type holding the nuclide data.
73///
74/// # Examples
75///
76/// ```
77/// use ame2020::{Iter, Nuclide};
78/// use std::io::Cursor;
79///
80/// let reader = Cursor::new(r"1
81/// 1
82/// 0  1    1    0    1  n         8071.31806     0.00044       0.0        0.0     B-    782.3470     0.0004    1 008664.91590     0.00047");
83///
84/// let mut iter = Iter::new(reader);
85/// let nuc = iter.next().unwrap().unwrap();
86/// assert_eq!(nuc.n, 1);
87/// assert_eq!(nuc.z, 0);
88/// assert_eq!(&nuc.element, "n");
89/// ```
90#[derive(Clone, PartialEq, Debug)]
91#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
92pub struct Nuclide {
93    /// Neutron number
94    pub n: u32,
95    /// Proton number
96    pub z: u32,
97    /// Chemical symbol of the element
98    pub element: ArrayString<3>,
99    /// Mass excess
100    ///
101    /// The difference between the mass in atomic mass units and the atomic mass number (N+Z).
102    pub mass_excess: Value,
103    /// Binding energy per nucleon
104    pub binding_energy_per_a: Value,
105    /// Beta decay energy, if any
106    pub beta_decay_energy: Option<Value>,
107    /// Atomic Mass in atomic mass units
108    pub atomic_mass: Value,
109}
110
111#[cfg(feature = "arbitrary")]
112impl<'a> Arbitrary<'a> for Nuclide {
113    fn arbitrary(u: &mut Unstructured) -> arbitrary::Result<Self> {
114        // this is adapted from arbitrary's implementation of Arbitrary for &str
115        fn array_string<const CAP: usize>(
116            u: &mut Unstructured,
117        ) -> arbitrary::Result<ArrayString<CAP>> {
118            let size = usize::min(u.arbitrary_len::<u8>()?, CAP);
119            match std::str::from_utf8(u.peek_bytes(size).unwrap()) {
120                Ok(s) => {
121                    u.bytes(size).unwrap();
122                    Ok(ArrayString::from(s).expect("size is limited to CAP"))
123                }
124                Err(e) => {
125                    let i = e.valid_up_to();
126                    let valid = u.bytes(i).unwrap();
127                    let s = ArrayString::from(
128                        std::str::from_utf8(valid).expect("we already checked for validity"),
129                    )
130                    .expect("size is limited to CAP");
131                    Ok(s)
132                }
133            }
134        }
135
136        let n = u.arbitrary()?;
137        let z = u.arbitrary()?;
138        let element = array_string(u)?;
139        let mass_excess = u.arbitrary()?;
140        let binding_energy_per_a = u.arbitrary()?;
141        let beta_decay_energy = u.arbitrary()?;
142        let atomic_mass = u.arbitrary()?;
143
144        Ok(Self {
145            n,
146            z,
147            element,
148            mass_excess,
149            binding_energy_per_a,
150            beta_decay_energy,
151            atomic_mass,
152        })
153    }
154}
155
156#[derive(Copy, Clone, Eq, PartialEq, Hash, Debug)]
157enum ReadState {
158    Start,
159    Preamble,
160    Headers,
161    Body,
162}
163
164/// An iterator that reads AME2020 data.
165///
166/// # Examples
167///
168/// ```
169/// use ame2020::Iter;
170/// use std::io::Cursor;
171///
172/// // `Cursor` is a type that implements `BufRead`.
173/// // Consider using `BufReader` if you have a `File`.
174/// let data = Cursor::new(r"1
175/// 1
176/// 0  1    1    0    1  n         8071.31806     0.00044       0.0        0.0     B-    782.3470     0.0004    1 008664.91590     0.00047");
177/// let mut iter = Iter::new(data);
178/// assert!(iter.next().is_some());
179/// assert!(iter.next().is_none());
180///
181/// ```
182///
183/// # Errors
184///
185/// If a line fails to parse or there is a reading error, [`next`][Self::next] will return `Some(Err)`.
186/// Calling `next` again may return `Some`, but the validity of the data is not guaranteed.
187pub struct Iter<R: BufRead> {
188    lines: Lines<R>,
189    state: ReadState,
190}
191
192impl<R: BufRead> Iter<R> {
193    /// Creates a new `Iter` from `reader`.
194    pub fn new(reader: R) -> Self {
195        let lines = reader.lines();
196        Self {
197            lines,
198            state: ReadState::Start,
199        }
200    }
201
202    fn parse_line(&mut self, line: &str) -> ControlFlow<Result<Nuclide, AmeError>> {
203        fn range_err(line: &str, range: Range<usize>) -> Result<&str, AmeError> {
204            if line.len() < range.end {
205                Err(AmeError::TooShortLine)
206            } else {
207                Ok(line.get(range).ok_or(AmeError::StrIndex)?.trim())
208            }
209        }
210
211        fn parse_value(
212            (s_mean, r_mean): (&str, Range<usize>),
213            (s_unc, r_unc): (&str, Range<usize>),
214        ) -> Result<Value, AmeError> {
215            let mean = range_err(&s_mean.replace('#', "."), r_mean)?.parse()?;
216            let uncertainty = range_err(&s_unc.replace('#', "."), r_unc)?.parse()?;
217            let is_estimated = s_mean.contains('#');
218            Ok(Value {
219                mean,
220                uncertainty,
221                is_estimated,
222            })
223        }
224
225        fn inner(line: &str) -> Result<Nuclide, AmeError> {
226            let n = range_err(line, 4..9)?.parse()?;
227            let z = range_err(line, 9..14)?.parse()?;
228            let element = ArrayString::from(range_err(line, 20..23)?)
229                .expect("the range is 3 and the capacity is 3");
230            let mass_excess = parse_value((line, 28..42), (line, 42..54))?;
231            let binding_energy_per_a = parse_value((line, 54..67), (line, 68..78))?;
232            let beta_decay_energy = (range_err(line, 87..88)? != "*")
233                .then(|| parse_value((line, 81..94), (line, 94..105)))
234                .transpose()?;
235
236            // the value is given in micro-u, with a space before the 1e6 place.
237            // this makes it inconvenient to parse in u.
238            //
239            // lines don't have the same length, so use `line.len()`. you could use a RangeFrom,
240            // but that would require rewriting `parse_value` and `range_err` to be generic, and it
241            // would lead to more complicated bounds checks.
242            let mut atomic_mass = parse_value((line, 110..123), (line, 123..(line.len())))?;
243            atomic_mass.mean *= 1e-6;
244            atomic_mass.uncertainty *= 1e-6;
245            atomic_mass.mean += f64::from(range_err(line, 106..109)?.parse::<u16>()?);
246
247            Ok(Nuclide {
248                n,
249                z,
250                element,
251                mass_excess,
252                binding_energy_per_a,
253                beta_decay_energy,
254                atomic_mass,
255            })
256        }
257
258        match self.state {
259            ReadState::Start => {
260                if line.starts_with('1') {
261                    self.state = ReadState::Preamble;
262                }
263                ControlFlow::Continue(())
264            }
265            ReadState::Preamble => {
266                if line.starts_with('1') {
267                    self.state = ReadState::Headers;
268                }
269                ControlFlow::Continue(())
270            }
271            ReadState::Headers => {
272                if line.starts_with('0') {
273                    self.state = ReadState::Body;
274                    ControlFlow::Break(inner(line))
275                } else {
276                    ControlFlow::Continue(())
277                }
278            }
279            ReadState::Body => ControlFlow::Break(inner(line)),
280        }
281    }
282}
283
284impl<R: BufRead> Iterator for Iter<R> {
285    type Item = Result<Nuclide, AmeError>;
286
287    fn next(&mut self) -> Option<Self::Item> {
288        loop {
289            match self.lines.next()? {
290                Ok(line) => match self.parse_line(&line) {
291                    ControlFlow::Continue(()) => continue,
292                    ControlFlow::Break(res) => return Some(res),
293                },
294                Err(e) => return Some(Err(e.into())),
295            }
296        }
297    }
298}