Skip to main content

vita_core/
element.rs

1use core::fmt;
2use core::num::NonZeroU8;
3
4/// A chemical element, identified by its atomic number *Z* — the proton count.
5#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
6#[repr(transparent)]
7pub struct Element(NonZeroU8);
8
9/// Highest atomic number with an IUPAC-approved name; above it names are systematic.
10const LAST_NAMED: u8 = 118;
11
12impl Element {
13    /// Constructs an element from atomic number `z`, returning `None` if `z` is zero.
14    #[inline]
15    pub const fn new(z: u8) -> Option<Self> {
16        match NonZeroU8::new(z) {
17            Some(z) => Some(Self(z)),
18            None => None,
19        }
20    }
21
22    /// Constructs an element from a [`NonZeroU8`] atomic number directly.
23    #[inline]
24    pub const fn from_nonzero(z: NonZeroU8) -> Self {
25        Self(z)
26    }
27
28    /// Constructs an element from its symbol (e.g. `"C"`, `"Og"`, `"Uue"`),
29    /// returning `None` if no element has that symbol.
30    ///
31    /// The exact, case-sensitive inverse of [`symbol`](Self::symbol).
32    #[inline]
33    pub fn from_symbol(symbol: &str) -> Option<Self> {
34        match symbol.len() {
35            1 | 2 => data::named_from_symbol(symbol),
36            3 => data::systematic_from_symbol(symbol),
37            _ => None,
38        }
39        .and_then(Self::new)
40    }
41
42    /// Returns the atomic number *Z* (the proton count), always greater than zero.
43    #[inline]
44    pub const fn atomic_number(self) -> u8 {
45        self.0.get()
46    }
47
48    /// Returns the element symbol (e.g. `"C"`, `"Og"`).
49    ///
50    /// Elements with `Z > 118` return their IUPAC systematic symbol (e.g. `"Uue"`).
51    #[inline]
52    pub fn symbol(self) -> &'static str {
53        let z = self.0.get();
54        if z <= LAST_NAMED {
55            data::IUPAC_SYMBOLS[z as usize]
56        } else {
57            data::systematic_symbol(z)
58        }
59    }
60
61    /// Returns the element name (e.g. `"Carbon"`, `"Oganesson"`).
62    ///
63    /// Elements with `Z > 118` return their IUPAC systematic name (e.g. `"Ununennium"`).
64    #[inline]
65    pub fn name(self) -> &'static str {
66        let z = self.0.get();
67        if z <= LAST_NAMED {
68            data::IUPAC_NAMES[z as usize]
69        } else {
70            data::systematic_name(z)
71        }
72    }
73
74    /// Returns the period (periodic-table row, `1..=7`), or `None` for `Z > 118`.
75    #[inline]
76    pub const fn period(self) -> Option<u8> {
77        Some(match self.0.get() {
78            1..=2 => 1,
79            3..=10 => 2,
80            11..=18 => 3,
81            19..=36 => 4,
82            37..=54 => 5,
83            55..=86 => 6,
84            87..=118 => 7,
85            _ => return None,
86        })
87    }
88}
89
90impl fmt::Display for Element {
91    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
92        f.write_str(self.symbol())
93    }
94}
95
96/// IUPAC element names and symbols, and the systematic-name generator.
97mod data {
98    /// IUPAC-approved symbols, indexed by atomic number; index `0` is an unused placeholder.
99    #[rustfmt::skip]
100    pub(super) const IUPAC_SYMBOLS: [&str; 119] = [
101        "",
102        "H",                                                                                                                                                                                      "He",
103        "Li", "Be",                                                                                                                                                 "B",  "C",  "N",  "O",  "F",  "Ne",
104        "Na", "Mg",                                                                                                                                                 "Al", "Si", "P",  "S",  "Cl", "Ar",
105        "K",  "Ca",                                                                                     "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr",
106        "Rb", "Sr",                                                                                     "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I",  "Xe",
107        "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re", "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn",
108        "Fr", "Ra", "Ac", "Th", "Pa", "U",  "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og",
109    ];
110
111    /// IUPAC-approved names, indexed by atomic number; index `0` is an unused placeholder.
112    #[rustfmt::skip]
113    pub(super) const IUPAC_NAMES: [&str; 119] = [
114        "",
115        /*    1-10 */ "Hydrogen", "Helium", "Lithium", "Beryllium", "Boron", "Carbon", "Nitrogen", "Oxygen", "Fluorine", "Neon",
116        /*   11-20 */ "Sodium", "Magnesium", "Aluminium", "Silicon", "Phosphorus", "Sulfur", "Chlorine", "Argon", "Potassium", "Calcium",
117        /*   21-30 */ "Scandium", "Titanium", "Vanadium", "Chromium", "Manganese", "Iron", "Cobalt", "Nickel", "Copper", "Zinc",
118        /*   31-40 */ "Gallium", "Germanium", "Arsenic", "Selenium", "Bromine", "Krypton", "Rubidium", "Strontium", "Yttrium", "Zirconium",
119        /*   41-50 */ "Niobium", "Molybdenum", "Technetium", "Ruthenium", "Rhodium", "Palladium", "Silver", "Cadmium", "Indium", "Tin",
120        /*   51-60 */ "Antimony", "Tellurium", "Iodine", "Xenon", "Caesium", "Barium", "Lanthanum", "Cerium", "Praseodymium", "Neodymium",
121        /*   61-70 */ "Promethium", "Samarium", "Europium", "Gadolinium", "Terbium", "Dysprosium", "Holmium", "Erbium", "Thulium", "Ytterbium",
122        /*   71-80 */ "Lutetium", "Hafnium", "Tantalum", "Tungsten", "Rhenium", "Osmium", "Iridium", "Platinum", "Gold", "Mercury",
123        /*   81-90 */ "Thallium", "Lead", "Bismuth", "Polonium", "Astatine", "Radon", "Francium", "Radium", "Actinium", "Thorium",
124        /*  91-100 */ "Protactinium", "Uranium", "Neptunium", "Plutonium", "Americium", "Curium", "Berkelium", "Californium", "Einsteinium", "Fermium",
125        /* 101-110 */ "Mendelevium", "Nobelium", "Lawrencium", "Rutherfordium", "Dubnium", "Seaborgium", "Bohrium", "Hassium", "Meitnerium", "Darmstadtium",
126        /* 111-118 */ "Roentgenium", "Copernicium", "Nihonium", "Flerovium", "Moscovium", "Livermorium", "Tennessine", "Oganesson",
127    ];
128
129    /// Returns the atomic number of the IUPAC-named element with this `symbol`, if any.
130    pub(super) fn named_from_symbol(symbol: &str) -> Option<u8> {
131        IUPAC_SYMBOLS[1..]
132            .iter()
133            .position(|&s| s == symbol)
134            .map(|i| i as u8 + 1)
135    }
136
137    /// Initial letter of each digit root, for systematic symbols.
138    const ROOT_INITIAL: [u8; 10] = [b'n', b'u', b'b', b't', b'q', b'p', b'h', b's', b'o', b'e'];
139
140    /// Digit roots for systematic names: `0..=9`.
141    const ROOT: [&str; 10] = [
142        "nil", "un", "bi", "tri", "quad", "pent", "hex", "sept", "oct", "enn",
143    ];
144
145    /// Lowest atomic number named systematically (one above [`super::LAST_NAMED`]).
146    const SYSTEMATIC_LO: u16 = (super::LAST_NAMED as u16) + 1;
147    /// Highest representable atomic number ([`u8::MAX`]).
148    const SYSTEMATIC_HI: u16 = u8::MAX as u16;
149    const SYSTEMATIC_LEN: usize = (SYSTEMATIC_HI - SYSTEMATIC_LO + 1) as usize;
150    /// Upper bound on a systematic name length (three four-letter roots plus `"ium"`).
151    const NAME_CAP: usize = 16;
152
153    /// Compile-time table of IUPAC systematic names for `Z` in `SYSTEMATIC_LO..=SYSTEMATIC_HI`.
154    struct Systematic {
155        name: [[u8; NAME_CAP]; SYSTEMATIC_LEN],
156        name_len: [u8; SYSTEMATIC_LEN],
157        symbol: [[u8; 3]; SYSTEMATIC_LEN],
158    }
159
160    static SYSTEMATIC: Systematic = build();
161
162    /// Returns the systematic symbol for `z`, sliced from the static table.
163    pub(super) fn systematic_symbol(z: u8) -> &'static str {
164        let idx = (z as u16 - SYSTEMATIC_LO) as usize;
165        // SAFETY: `build` writes only ASCII digit-root initials.
166        unsafe { core::str::from_utf8_unchecked(&SYSTEMATIC.symbol[idx]) }
167    }
168
169    /// Returns the systematic name for `z`, sliced from the static table.
170    pub(super) fn systematic_name(z: u8) -> &'static str {
171        let idx = (z as u16 - SYSTEMATIC_LO) as usize;
172        let len = SYSTEMATIC.name_len[idx] as usize;
173        // SAFETY: `build` writes only ASCII digit-root letters within `len`.
174        unsafe { core::str::from_utf8_unchecked(&SYSTEMATIC.name[idx][..len]) }
175    }
176
177    /// Returns the atomic number for a three-letter systematic `symbol`, if it denotes
178    /// an element in `SYSTEMATIC_LO..=SYSTEMATIC_HI`.
179    pub(super) fn systematic_from_symbol(symbol: &str) -> Option<u8> {
180        let &[c0, c1, c2] = symbol.as_bytes() else {
181            return None;
182        };
183        if !c0.is_ascii_uppercase() {
184            return None;
185        }
186        let d0 = digit_of_initial(c0.to_ascii_lowercase())? as u16;
187        let d1 = digit_of_initial(c1)? as u16;
188        let d2 = digit_of_initial(c2)? as u16;
189        let z = 100 * d0 + 10 * d1 + d2;
190        if (SYSTEMATIC_LO..=SYSTEMATIC_HI).contains(&z) {
191            Some(z as u8)
192        } else {
193            None
194        }
195    }
196
197    /// Returns the digit whose systematic root has the lowercase initial `b`.
198    fn digit_of_initial(b: u8) -> Option<u8> {
199        ROOT_INITIAL.iter().position(|&r| r == b).map(|d| d as u8)
200    }
201
202    /// Materializes the systematic table at compile time (IUPAC 1979 rules).
203    const fn build() -> Systematic {
204        let mut t = Systematic {
205            name: [[0; NAME_CAP]; SYSTEMATIC_LEN],
206            name_len: [0; SYSTEMATIC_LEN],
207            symbol: [[0; 3]; SYSTEMATIC_LEN],
208        };
209        // Every `Z` in this range has exactly three digits.
210        let mut z = SYSTEMATIC_LO;
211        while z <= SYSTEMATIC_HI {
212            let idx = (z - SYSTEMATIC_LO) as usize;
213            let d0 = (z / 100) as usize;
214            let d1 = (z / 10 % 10) as usize;
215            let d2 = (z % 10) as usize;
216
217            // Symbol: three digit-root initials, the first capitalized.
218            t.symbol[idx][0] = ascii_upper(ROOT_INITIAL[d0]);
219            t.symbol[idx][1] = ROOT_INITIAL[d1];
220            t.symbol[idx][2] = ROOT_INITIAL[d2];
221
222            // Name: roots concatenated with elisions, then the `-ium` suffix.
223            let mut len = append(&mut t.name[idx], 0, ROOT[d0]);
224            // `enn` (9) before `nil` (0) drops one `n`: write `"il"` for the `nil`.
225            len = append(
226                &mut t.name[idx],
227                len,
228                if d1 == 0 && d0 == 9 { "il" } else { ROOT[d1] },
229            );
230            len = append(
231                &mut t.name[idx],
232                len,
233                if d2 == 0 && d1 == 9 { "il" } else { ROOT[d2] },
234            );
235            // A final `bi` (2) or `tri` (3) drops its trailing `i` before `-ium`.
236            len = append(
237                &mut t.name[idx],
238                len,
239                if d2 == 2 || d2 == 3 { "um" } else { "ium" },
240            );
241
242            t.name[idx][0] = ascii_upper(t.name[idx][0]);
243            t.name_len[idx] = len as u8;
244            z += 1;
245        }
246        t
247    }
248
249    /// Appends `s` to `buf` starting at `at`, returning the new length.
250    const fn append(buf: &mut [u8; NAME_CAP], mut at: usize, s: &str) -> usize {
251        let b = s.as_bytes();
252        let mut i = 0;
253        while i < b.len() {
254            buf[at] = b[i];
255            at += 1;
256            i += 1;
257        }
258        at
259    }
260
261    /// Uppercases an ASCII lowercase byte; leaves other bytes unchanged.
262    const fn ascii_upper(b: u8) -> u8 {
263        if b.is_ascii_lowercase() { b - 32 } else { b }
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270
271    #[test]
272    fn new() {
273        assert!(Element::new(0).is_none());
274        assert!(Element::new(1).is_some());
275        assert!(Element::new(255).is_some());
276    }
277
278    #[test]
279    fn from_nonzero() {
280        let z = NonZeroU8::new(6).unwrap();
281        assert_eq!(Element::from_nonzero(z).atomic_number(), 6);
282    }
283
284    #[test]
285    fn from_symbol_named() {
286        assert_eq!(Element::from_symbol("H"), Element::new(1));
287        assert_eq!(Element::from_symbol("C"), Element::new(6));
288        assert_eq!(Element::from_symbol("Fe"), Element::new(26));
289        assert_eq!(Element::from_symbol("Og"), Element::new(118));
290    }
291
292    #[test]
293    fn from_symbol_systematic() {
294        assert_eq!(Element::from_symbol("Uue"), Element::new(119));
295        assert_eq!(Element::from_symbol("Ubn"), Element::new(120));
296        assert_eq!(Element::from_symbol("Uen"), Element::new(190));
297        assert_eq!(Element::from_symbol("Bpp"), Element::new(255));
298    }
299
300    #[test]
301    fn from_symbol_is_case_sensitive() {
302        assert!(Element::from_symbol("h").is_none());
303        assert!(Element::from_symbol("fe").is_none());
304        assert!(Element::from_symbol("FE").is_none());
305        assert!(Element::from_symbol("uue").is_none());
306        assert!(Element::from_symbol("UUE").is_none());
307    }
308
309    #[test]
310    fn from_symbol_rejects_systematic_form_of_named_element() {
311        assert!(Element::from_symbol("Unn").is_none());
312        assert!(Element::from_symbol("Uuo").is_none());
313    }
314
315    #[test]
316    fn from_symbol_rejects_unknown() {
317        assert!(Element::from_symbol("").is_none());
318        assert!(Element::from_symbol("Xx").is_none());
319        assert!(Element::from_symbol("Uux").is_none());
320        assert!(Element::from_symbol("Carbon").is_none());
321    }
322
323    #[test]
324    fn atomic_number_roundtrip() {
325        assert_eq!(Element::new(92).unwrap().atomic_number(), 92);
326    }
327
328    #[test]
329    fn symbol_named() {
330        assert_eq!(Element::new(1).unwrap().symbol(), "H");
331        assert_eq!(Element::new(6).unwrap().symbol(), "C");
332        assert_eq!(Element::new(118).unwrap().symbol(), "Og");
333    }
334
335    #[test]
336    fn name_named() {
337        assert_eq!(Element::new(1).unwrap().name(), "Hydrogen");
338        assert_eq!(Element::new(6).unwrap().name(), "Carbon");
339        assert_eq!(Element::new(118).unwrap().name(), "Oganesson");
340    }
341
342    #[test]
343    fn symbol_systematic() {
344        assert_eq!(Element::new(119).unwrap().symbol(), "Uue");
345        assert_eq!(Element::new(120).unwrap().symbol(), "Ubn");
346        assert_eq!(Element::new(190).unwrap().symbol(), "Uen");
347        assert_eq!(Element::new(255).unwrap().symbol(), "Bpp");
348    }
349
350    #[test]
351    fn name_systematic() {
352        assert_eq!(Element::new(119).unwrap().name(), "Ununennium");
353        assert_eq!(Element::new(120).unwrap().name(), "Unbinilium");
354        assert_eq!(Element::new(122).unwrap().name(), "Unbibium");
355        assert_eq!(Element::new(123).unwrap().name(), "Unbitrium");
356        assert_eq!(Element::new(190).unwrap().name(), "Unennilium");
357        assert_eq!(Element::new(255).unwrap().name(), "Bipentpentium");
358    }
359
360    #[test]
361    fn symbol_roundtrip() {
362        for z in 1..=u8::MAX {
363            let e = Element::new(z).unwrap();
364            assert_eq!(Element::from_symbol(e.symbol()), Some(e));
365        }
366    }
367
368    #[test]
369    fn period() {
370        assert_eq!(Element::new(1).unwrap().period(), Some(1));
371        assert_eq!(Element::new(2).unwrap().period(), Some(1));
372        assert_eq!(Element::new(3).unwrap().period(), Some(2));
373        assert_eq!(Element::new(18).unwrap().period(), Some(3));
374        assert_eq!(Element::new(118).unwrap().period(), Some(7));
375    }
376
377    #[test]
378    fn period_is_none_beyond_named() {
379        assert_eq!(Element::new(119).unwrap().period(), None);
380        assert_eq!(Element::new(255).unwrap().period(), None);
381    }
382
383    #[test]
384    fn copy_and_clone() {
385        let a = Element::new(6).unwrap();
386        let b = a;
387        let c = ::core::clone::Clone::clone(&a);
388        assert_eq!(a, b);
389        assert_eq!(a, c);
390    }
391
392    #[test]
393    fn eq() {
394        let a = Element::new(6).unwrap();
395        assert_eq!(a, Element::new(6).unwrap());
396        assert_ne!(a, Element::new(7).unwrap());
397    }
398
399    #[test]
400    fn ord() {
401        let h = Element::new(1).unwrap();
402        let c = Element::new(6).unwrap();
403        assert!(h < c);
404        assert!(c > h);
405        assert_eq!(c.cmp(&c), ::core::cmp::Ordering::Equal);
406    }
407
408    #[test]
409    fn debug() {
410        assert_eq!(format!("{:?}", Element::new(6).unwrap()), "Element(6)");
411    }
412
413    #[test]
414    fn display() {
415        assert_eq!(format!("{}", Element::new(6).unwrap()), "C");
416        assert_eq!(format!("{}", Element::new(119).unwrap()), "Uue");
417    }
418
419    #[test]
420    fn option_is_same_size_as_u8() {
421        assert_eq!(
422            ::core::mem::size_of::<Option<Element>>(),
423            ::core::mem::size_of::<u8>()
424        );
425    }
426}