1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
use std::{num::NonZeroU16, ops::RangeBounds};
use context_error::*;
use crate::{
chemistry::{ELEMENT_PARSE_LIST, MolecularFormula},
helper_functions::{RangeExtension, explain_number_error, str_starts_with},
};
impl MolecularFormula {
/// PSI-MOD: `(12)C -5 (13)C 5 H 1 N 3 O -1 S 9`
/// # Errors
/// If the formula is not valid according to the above specification, with some help on what is going wrong.
/// # Panics
/// It can panic if the string contains not UTF8 symbols.
pub fn from_psi_mod(
value: &str,
range: impl RangeBounds<usize>,
) -> Result<Self, BoxedError<'_, BasicKind>> {
let (mut index, end) = range.bounds(value.len());
let mut isotope = None;
let mut element = None;
let bytes = value.as_bytes();
let mut result = Self::default();
while index < end {
match bytes[index] {
b'(' if isotope.is_none() => {
let len = bytes
.iter()
.skip(index)
.position(|c| *c == b')')
.ok_or_else(|| {
BoxedError::new(
BasicKind::Error,
"Invalid PSI-MOD molecular formula",
"No closing round bracket found",
Context::line(None, value, index, 1),
)
})?;
isotope = Some(
value[index + 1..index + len]
.parse::<NonZeroU16>()
.map_err(|err| {
BoxedError::new(
BasicKind::Error,
"Invalid PSI-MOD molecular formula",
format!("The isotope number {}", explain_number_error(&err)),
Context::line(None, value, index + 1, len),
)
})?,
);
index += len + 1;
}
b'-' | b'0'..=b'9' if element.is_some() => {
let (num, len) = std::str::from_utf8(
&bytes
.iter()
.skip(index)
.take_while(|c| c.is_ascii_digit() || **c == b'-')
.copied()
.collect::<Vec<_>>(),
)
.map_or_else(
|e| panic!("Non UTF8 in PSI-MOD molecular formula, error: {e}"),
|v| {
(
v.parse::<i32>().map_err(|err| {
BoxedError::new(
BasicKind::Error,
"Invalid PSI-MOD molecular formula",
format!(
"The isotope number {}",
explain_number_error(&err)
),
Context::line(None, value, index, v.len()),
)
}),
v.len(),
)
},
);
let num = num?;
if num != 0 && !Self::add(&mut result, (element.unwrap(), isotope, num)) {
return Err(BoxedError::new(
BasicKind::Error,
"Invalid PSI-MOD molecular formula",
format!(
"An element without a defined mass ({}) was used",
element.unwrap()
),
Context::line(None, value, index - 1, 1),
));
}
element = None;
isotope = None;
index += len;
}
b' ' => index += 1,
_ => {
if let Some(element) = element
&& !Self::add(&mut result, (element, None, 1))
{
return Err(BoxedError::new(
BasicKind::Error,
"Invalid PSI-MOD molecular formula",
format!("An element without a defined mass ({element}) was used"),
Context::line(None, value, index - 1, 1),
));
}
let mut found = false;
for possible in ELEMENT_PARSE_LIST {
if str_starts_with(&value[index..], possible.0, true) {
element = Some(possible.1);
index += possible.0.len();
found = true;
break;
}
}
if !found {
return Err(BoxedError::new(
BasicKind::Error,
"Invalid PSI-MOD molecular formula",
"Not a valid character in formula",
Context::line(None, value, index, 1),
));
}
}
}
}
if isotope.is_some() || element.is_some() {
Err(BoxedError::new(
BasicKind::Error,
"Invalid PSI-MOD molecular formula",
"Last element missed a count",
Context::line(None, value, index, 1),
))
} else {
Ok(result)
}
}
}