1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
//! Properties that can be computed from trees of molecular nodes.
use crate::{errors::CountError, prelude::Element};
mod blankets;
mod chemical_tree;
mod inchi_tree;
pub(crate) use chemical_tree::ChemicalTree;
pub(crate) use inchi_tree::InChITree;
use num_traits::{CheckedAdd, CheckedMul, ConstOne, ConstZero};
/// Helper to check if two elements are in Hill order.
#[must_use]
pub fn is_hill_sorted_pair(prev: Element, next: Element, has_carbon: bool) -> bool {
if has_carbon {
match (prev, next) {
(Element::C, Element::C) | (Element::H, Element::H) => false,
(Element::C, _) => true,
(_, Element::C) => false,
(Element::H, _) => true,
(_, Element::H) => false,
(a, b) => {
let a_str: &str = a.as_ref();
let b_str: &str = b.as_ref();
a_str < b_str
}
}
} else {
let prev_str: &str = prev.as_ref();
let next_str: &str = next.as_ref();
prev_str < next_str
}
}
/// Trait for computing various molecular properties.
pub trait MolecularTree<Count>: Sized {
/// Type of the element iterator.
type ElementIter<'a>: Iterator<Item = Element>
where
Self: 'a;
/// Type of the non-hydrogen element iterator.
type NonHydrogenElementIter<'a>: Iterator<Item = Element>
where
Self: 'a;
/// Iterates over the elements in the molecular formula.
///
/// # Implementation Notes
///
/// Returns an iterator over the non-counted elements in the formula,
/// which means that if an element appears with a count, it is still
/// yielded only once.
/// Isotopes, if present, are normalized to their base elements.
/// If the formula contains residuals, they are ignored.
fn elements(&self) -> Self::ElementIter<'_>;
/// Iterates over the non-hydrogen elements in the molecular formula.
fn non_hydrogens(&self) -> Self::NonHydrogenElementIter<'_>;
/// Returns whether the molecular tree contains any elements.
fn contains_elements(&self) -> bool;
/// Returns whether the molecular tree contains any non-hydrogen elements.
fn contains_non_hydrogens(&self) -> bool;
/// Returns whether the molecular tree contains the provided element.
fn contains_element(&self, element: Element) -> bool;
/// Returns whether the molecular tree contains any isotopes.
fn contains_isotopes(&self) -> bool;
/// Returns whether the molecular tree contains the provided isotope.
fn contains_isotope(&self, isotope: elements_rs::Isotope) -> bool;
/// Returns the number of elements of a specific type in the molecular
/// tree.
///
/// # Errors
///
/// Returns [`CountError`] if the count cannot be computed or represented
/// by the requested type.
fn count_of_element<C>(&self, element: Element) -> Result<C, CountError>
where
C: From<Count> + CheckedAdd + CheckedMul + ConstZero + ConstOne;
/// Returns the number of isotopes of a specific type in the molecular
/// tree.
///
/// # Errors
///
/// Returns [`CountError`] if the count cannot be computed or represented
/// by the requested type.
fn count_of_isotope<C>(&self, isotope: elements_rs::Isotope) -> Result<C, CountError>
where
C: From<Count> + CheckedAdd + CheckedMul + ConstZero + ConstOne;
/// Returns the total number of elements in the molecular tree.
fn number_of_elements(&self) -> usize;
/// Returns the isotopologue mass of the molecular tree without considering
/// any charge.
fn isotopologue_mass(&self) -> f64;
/// Returns whether the molecular tree is a noble gas compound.
fn is_noble_gas_compound(&self) -> bool;
#[must_use]
/// Returns a new molecular tree with isotopic normalization applied.
fn isotopic_normalization(&self) -> Self;
/// Checks if the tree is Hill sorted given context about Carbon presence.
///
/// The `predecessor` is the element that appeared immediately before the
/// current subtree traversal.
///
/// Returns `Ok(Some(last_element))` if the subtree is sorted and non-empty.
/// Returns `Ok(predecessor)` (or `Ok(None)` if predecessor was None) if the
/// subtree is empty. Returns `Err(())` if unsorted.
///
/// # Errors
///
/// Returns `Err(())` if the molecular tree is not Hill sorted.
#[allow(clippy::result_unit_err)]
fn check_hill_ordering(
&self,
predecessor: Option<Element>,
has_carbon: bool,
) -> Result<Option<Element>, ()>;
/// Returns whether the molecular tree is Hill sorted.
fn is_hill_sorted(&self) -> bool {
let has_carbon = self.contains_element(Element::C);
self.check_hill_ordering(None, has_carbon).is_ok()
}
}
/// Trait for molecular trees which can hold a charge.
pub trait ChargedMolecularTree<Count, Charge>: MolecularTree<Count> {
/// Returns the charge of the molecular tree.
fn charge(&self) -> f64;
/// Returns the isotopologue mass with charge considered.
fn isotopologue_mass_with_charge(&self) -> f64;
/// Returns the molar mass.
fn molar_mass(&self) -> f64;
}
#[cfg(test)]
mod tests {
use core::str::FromStr;
use crate::{MolecularFormula, prelude::ChemicalFormula};
#[test]
fn test_is_hill_sorted_cases() {
let cases = [
// --- Branch 1: Starts with C ---
// Valid cases
("C2H5O", true), // C, H, O (sorted alphabetically after H)
("CO2", true), // C, O (Valid, no H)
("CH4", true), // C, H (Valid)
// Invalid: Branch 2 (C appears again immediately)
("CC", false),
// Invalid: Branch 4 (H appears again immediately)
// Note: C.H.H
("CHH", false),
// Invalid: Branch 5 (C appears later)
("COC", false),
// Invalid: Branch 5 (H appears later)
("CHBrH", false), // C, H, Br, H (H reappears)
// Invalid: Branch 6 (Not sorted alphabetically after H)
("CHIBr", false), // I comes after Br, so I, Br is desc order.
// --- Branch 7: Does not start with C ---
// Valid cases
("H2O", true), // H, O. (H < O).
("ClH", true), // Cl, H. ("Cl" < "H" because 'C' < 'H').
("O2", true), // O.
// Invalid: Branch 8 (C appears later)
("HC", false),
// Invalid: Branch 9 (Not sorted alphabetically)
("ON", false), // O, N. "N" < "O". Returns false. (Unsorted: O > N).
("HCl", false), // H, Cl. "H" > "Cl". Returns false. (Unsorted).
];
for (formula_str, expected) in cases {
let formula = ChemicalFormula::<u32, i32>::from_str(formula_str)
.unwrap_or_else(|_| panic!("Failed to parse {formula_str}"));
assert_eq!(formula.is_hill_sorted(), expected, "Mismatch for formula {formula_str}");
}
}
}