1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
use std::ops::RangeBounds;
use context_error::*;
use crate::{
chemistry::{ELEMENT_PARSE_LIST, Element, MolecularFormula},
helper_functions::{RangeExtension, next_num, str_starts_with},
quantities::Multi,
};
impl MolecularFormula {
/// Parse RESID formulas: `C 2 H 3 N 1 O 1 +` or `C 4 H 5 N 1 O 3, C 4 H 6 N 2 O 2`. If the `range` (byte range in the given line) is not specified it defaults to the full line.
/// # Errors
/// If the formula is not valid according to the above specification, with some help on what is going wrong.
/// # Panics
/// It can panic if the string contains non UTF8 symbols.
pub fn from_resid(
value: &str,
range: impl RangeBounds<usize>,
) -> Result<Multi<Self>, BoxedError<'_, BasicKind>> {
let mut multi = Vec::new();
let mut start = 0;
for part in value[range.start_index()..range.end_index(value.len())].split(',') {
multi.push(Self::from_resid_single(
value,
range.start_index() + start..range.start_index() + start + part.len(),
)?);
start += part.len() + 1;
}
Ok(multi.into())
}
/// Parse RESID formulas: `C 2 H 3 N 1 O 1 +` but does not allow multi formulas (split with commas). If the `range` (byte range in the given line) is not specified it defaults to the full line.
/// # Errors
/// If the formula is not valid according to the above specification, with some help on what is going wrong.
/// # Panics
/// It can panic if the string contains non UTF8 symbols.
pub fn from_resid_single(
value: &str,
range: impl RangeBounds<usize>,
) -> Result<Self, BoxedError<'_, BasicKind>> {
let (mut index, end) = range.bounds(value.len().saturating_sub(1));
let mut result = Self::default();
while index <= end {
trim(&mut index, value);
let mut element = None;
let mut amount: i32 = 1;
for possible in ELEMENT_PARSE_LIST {
if str_starts_with(&value[index..], possible.0, true) {
element = Some(possible.1);
index += possible.0.len();
break;
}
}
if element.is_none() {
if value[index..].starts_with('+') {
element = Some(Element::Electron);
index += 1;
amount = -1;
} else if value[index..].starts_with('-') {
element = Some(Element::Electron);
index += 1;
}
}
if let Some(element) = element.take() {
trim(&mut index, value);
if let Some(number) = next_num(value.as_bytes(), index, false) {
index += number.0;
amount *= number.1 as i32;
}
if !result.add((element, None, amount)) {
return Err(BoxedError::new(
BasicKind::Error,
"Invalid RESID molecular formula",
"An element with undefined mass was used",
Context::line(
None,
value,
index,
value[index..]
.chars()
.next()
.map(char::len_utf8)
.unwrap_or_default(),
),
));
}
trim(&mut index, value);
} else {
return Err(BoxedError::new(
BasicKind::Error,
"Invalid RESID molecular formula",
format!("Not a valid character in formula, now has: {result:?}"),
Context::line(
None,
value,
index,
value[index..]
.chars()
.next()
.map(char::len_utf8)
.unwrap_or_default(),
),
));
}
}
Ok(result)
}
}
fn trim(index: &mut usize, text: &str) {
*index = *index
+ text[*index..]
.chars()
.take_while(char::is_ascii_whitespace) // defined to be one byte each
.count();
}