1use anyhow::{anyhow, Result};
2use lazy_static::lazy_static;
3use std::collections::HashMap;
4
5use super::{ArgCount, FunctionCategory, FunctionSignature, SqlFunction};
6use crate::data::datatable::DataValue;
7
8#[derive(Debug, Clone)]
10struct Molecule {
11 formula: &'static str,
12 names: &'static [&'static str],
13 category: &'static str,
14}
15
16lazy_static! {
17 static ref MOLECULE_TABLE: Vec<Molecule> = vec![
19 Molecule {
21 formula: "H2O",
22 names: &["WATER"],
23 category: "Inorganic"
24 },
25 Molecule {
26 formula: "H2O2",
27 names: &["HYDROGEN PEROXIDE"],
28 category: "Inorganic"
29 },
30
31 Molecule {
33 formula: "NH3",
34 names: &["AMMONIA"],
35 category: "Inorganic"
36 },
37 Molecule {
38 formula: "CO2",
39 names: &["CARBON DIOXIDE", "CO2"],
40 category: "Inorganic"
41 },
42 Molecule {
43 formula: "CO",
44 names: &["CARBON MONOXIDE", "CO"],
45 category: "Inorganic"
46 },
47 Molecule {
48 formula: "O2",
49 names: &["OXYGEN", "DIOXYGEN"],
50 category: "Inorganic"
51 },
52 Molecule {
53 formula: "N2",
54 names: &["NITROGEN", "DINITROGEN"],
55 category: "Inorganic"
56 },
57 Molecule {
58 formula: "O3",
59 names: &["OZONE"],
60 category: "Inorganic"
61 },
62
63 Molecule {
65 formula: "CH4",
66 names: &["METHANE"],
67 category: "Hydrocarbon"
68 },
69 Molecule {
70 formula: "C2H6",
71 names: &["ETHANE"],
72 category: "Hydrocarbon"
73 },
74 Molecule {
75 formula: "C3H8",
76 names: &["PROPANE"],
77 category: "Hydrocarbon"
78 },
79 Molecule {
80 formula: "C4H10",
81 names: &["BUTANE"],
82 category: "Hydrocarbon"
83 },
84 Molecule {
85 formula: "C5H12",
86 names: &["PENTANE"],
87 category: "Hydrocarbon"
88 },
89 Molecule {
90 formula: "C6H14",
91 names: &["HEXANE"],
92 category: "Hydrocarbon"
93 },
94 Molecule {
95 formula: "C2H4",
96 names: &["ETHENE", "ETHYLENE"],
97 category: "Hydrocarbon"
98 },
99 Molecule {
100 formula: "C2H2",
101 names: &["ETHYNE", "ACETYLENE"],
102 category: "Hydrocarbon"
103 },
104 Molecule {
105 formula: "C6H6",
106 names: &["BENZENE"],
107 category: "Hydrocarbon"
108 },
109
110 Molecule {
112 formula: "C6H12O6",
113 names: &["GLUCOSE", "DEXTROSE"],
114 category: "Sugar"
115 },
116 Molecule {
117 formula: "C6H12O6",
118 names: &["FRUCTOSE"],
119 category: "Sugar"
120 },
121 Molecule {
122 formula: "C12H22O11",
123 names: &["SUCROSE", "TABLE SUGAR"],
124 category: "Sugar"
125 },
126 Molecule {
127 formula: "C12H22O11",
128 names: &["LACTOSE", "MILK SUGAR"],
129 category: "Sugar"
130 },
131
132 Molecule {
134 formula: "NaCl",
135 names: &["SALT", "TABLE SALT", "SODIUM CHLORIDE"],
136 category: "Salt"
137 },
138 Molecule {
139 formula: "NaHCO3",
140 names: &["BAKING SODA", "SODIUM BICARBONATE"],
141 category: "Salt"
142 },
143 Molecule {
144 formula: "CaCO3",
145 names: &["CALCIUM CARBONATE", "LIMESTONE", "CHALK"],
146 category: "Mineral"
147 },
148 Molecule {
149 formula: "CaSO4",
150 names: &["CALCIUM SULFATE", "GYPSUM"],
151 category: "Mineral"
152 },
153
154 Molecule {
156 formula: "HCl",
157 names: &["HYDROCHLORIC ACID"],
158 category: "Acid"
159 },
160 Molecule {
161 formula: "H2SO4",
162 names: &["SULFURIC ACID"],
163 category: "Acid"
164 },
165 Molecule {
166 formula: "HNO3",
167 names: &["NITRIC ACID"],
168 category: "Acid"
169 },
170 Molecule {
171 formula: "H3PO4",
172 names: &["PHOSPHORIC ACID"],
173 category: "Acid"
174 },
175 Molecule {
176 formula: "CH3COOH",
177 names: &["ACETIC ACID", "VINEGAR"],
178 category: "Acid"
179 },
180
181 Molecule {
183 formula: "C2H5OH",
184 names: &["ETHANOL", "ALCOHOL", "ETHYL ALCOHOL"],
185 category: "Alcohol"
186 },
187 Molecule {
188 formula: "CH3OH",
189 names: &["METHANOL", "METHYL ALCOHOL"],
190 category: "Alcohol"
191 },
192 Molecule {
193 formula: "C3H8O",
194 names: &["ISOPROPANOL", "ISOPROPYL ALCOHOL", "RUBBING ALCOHOL"],
195 category: "Alcohol"
196 },
197 Molecule {
198 formula: "CH3COCH3",
199 names: &["ACETONE"],
200 category: "Organic"
201 },
202 Molecule {
203 formula: "C8H10N4O2",
204 names: &["CAFFEINE"],
205 category: "Organic"
206 },
207 Molecule {
208 formula: "C9H8O4",
209 names: &["ASPIRIN", "ACETYLSALICYLIC ACID"],
210 category: "Organic"
211 },
212
213 Molecule {
215 formula: "C6H8O6",
216 names: &["VITAMIN C", "ASCORBIC ACID"],
217 category: "Vitamin"
218 },
219 ];
220
221 static ref MOLECULE_LOOKUP: HashMap<String, &'static str> = {
223 let mut map = HashMap::new();
224 for molecule in MOLECULE_TABLE.iter() {
225 for name in molecule.names {
226 map.insert((*name).to_string(), molecule.formula);
227 }
228 }
229 map
230 };
231}
232
233#[derive(Debug, Clone)]
235struct MolecularFormula {
236 elements: Vec<(String, usize)>, }
238
239impl MolecularFormula {
240 fn parse(formula: &str) -> Result<Self> {
242 let formula = formula.trim();
243
244 if let Some(expanded) = Self::get_compound_alias(formula) {
246 return Self::parse_formula(expanded);
247 }
248
249 Self::parse_formula(formula)
250 }
251
252 fn get_compound_alias(name: &str) -> Option<&'static str> {
254 let name_upper = name.to_uppercase();
255 MOLECULE_LOOKUP.get(&name_upper).copied()
256 }
257
258 fn parse_formula(formula: &str) -> Result<Self> {
260 let mut elements = Vec::new();
261 let mut chars = formula.chars().peekable();
262
263 while chars.peek().is_some() {
264 if chars.peek() == Some(&'(') {
266 chars.next(); let mut group = String::new();
268 let mut depth = 1;
269
270 for ch in chars.by_ref() {
271 if ch == '(' {
272 depth += 1;
273 group.push(ch);
274 } else if ch == ')' {
275 depth -= 1;
276 if depth == 0 {
277 break;
278 }
279 group.push(ch);
280 } else {
281 group.push(ch);
282 }
283 }
284
285 let multiplier = Self::parse_number(&mut chars).unwrap_or(1);
287
288 let group_formula = Self::parse_formula(&group)?;
290 for (elem, count) in group_formula.elements {
291 elements.push((elem, count * multiplier));
292 }
293 } else {
294 let element = Self::parse_element(&mut chars)?;
296 let count = Self::parse_number(&mut chars).unwrap_or(1);
297
298 if let Some((_, existing_count)) = elements.iter_mut().find(|(e, _)| e == &element)
300 {
301 *existing_count += count;
302 } else {
303 elements.push((element, count));
304 }
305 }
306 }
307
308 Ok(MolecularFormula { elements })
309 }
310
311 fn parse_element(chars: &mut std::iter::Peekable<std::str::Chars>) -> Result<String> {
313 let mut element = String::new();
314
315 if let Some(ch) = chars.peek() {
317 if ch.is_uppercase() {
318 element.push(chars.next().unwrap());
319 } else {
320 return Err(anyhow!("Expected uppercase letter for element symbol"));
321 }
322 } else {
323 return Err(anyhow!("Unexpected end of formula"));
324 }
325
326 while let Some(&ch) = chars.peek() {
328 if ch.is_lowercase() {
329 element.push(chars.next().unwrap());
330 } else {
331 break;
332 }
333 }
334
335 Ok(element)
336 }
337
338 fn parse_number(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<usize> {
340 let mut num_str = String::new();
341
342 while let Some(&ch) = chars.peek() {
343 if ch.is_ascii_digit() {
344 num_str.push(chars.next().unwrap());
345 } else {
346 break;
347 }
348 }
349
350 if num_str.is_empty() {
351 None
352 } else {
353 num_str.parse().ok()
354 }
355 }
356
357 fn calculate_mass(&self) -> Result<f64> {
359 let mut total_mass = 0.0;
360
361 for (element, count) in &self.elements {
362 let atomic_mass = AtomicMassFunction::get_atomic_mass(element)
363 .ok_or_else(|| anyhow!("Unknown element: {}", element))?;
364 total_mass += atomic_mass * (*count as f64);
365 }
366
367 Ok(total_mass)
368 }
369}
370
371pub struct AvogadroFunction;
373
374impl SqlFunction for AvogadroFunction {
375 fn signature(&self) -> FunctionSignature {
376 FunctionSignature {
377 name: "AVOGADRO",
378 category: FunctionCategory::Chemical,
379 arg_count: ArgCount::Fixed(0),
380 description: "Returns Avogadro's number (6.022 × 10^23)",
381 returns: "FLOAT",
382 examples: vec![
383 "SELECT AVOGADRO()",
384 "SELECT molecules / AVOGADRO() AS moles",
385 ],
386 }
387 }
388
389 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
390 self.validate_args(args)?;
391 Ok(DataValue::Float(6.02214076e23))
392 }
393}
394
395pub struct AtomicMassFunction;
397
398impl AtomicMassFunction {
399 fn get_atomic_mass(element: &str) -> Option<f64> {
400 let masses: HashMap<&str, f64> = [
401 ("H", 1.008),
403 ("HYDROGEN", 1.008),
404 ("He", 4.003),
405 ("HE", 4.003),
406 ("HELIUM", 4.003),
407 ("Li", 6.941),
408 ("LI", 6.941),
409 ("LITHIUM", 6.941),
410 ("Be", 9.012),
411 ("BE", 9.012),
412 ("BERYLLIUM", 9.012),
413 ("B", 10.81),
414 ("BORON", 10.81),
415 ("C", 12.01),
416 ("CARBON", 12.01),
417 ("N", 14.01),
418 ("NITROGEN", 14.01),
419 ("O", 16.00),
420 ("OXYGEN", 16.00),
421 ("F", 19.00),
422 ("FLUORINE", 19.00),
423 ("Ne", 20.18),
424 ("NE", 20.18),
425 ("NEON", 20.18),
426 ("Na", 22.99),
427 ("NA", 22.99),
428 ("SODIUM", 22.99),
429 ("Mg", 24.31),
430 ("MG", 24.31),
431 ("MAGNESIUM", 24.31),
432 ("Al", 26.98),
433 ("AL", 26.98),
434 ("ALUMINUM", 26.98),
435 ("ALUMINIUM", 26.98),
436 ("Si", 28.09),
437 ("SI", 28.09),
438 ("SILICON", 28.09),
439 ("P", 30.97),
440 ("PHOSPHORUS", 30.97),
441 ("S", 32.07),
442 ("SULFUR", 32.07),
443 ("SULPHUR", 32.07),
444 ("Cl", 35.45),
445 ("CL", 35.45),
446 ("CHLORINE", 35.45),
447 ("Ar", 39.95),
448 ("AR", 39.95),
449 ("ARGON", 39.95),
450 ("K", 39.10),
451 ("POTASSIUM", 39.10),
452 ("Ca", 40.08),
453 ("CA", 40.08),
454 ("CALCIUM", 40.08),
455 ("FE", 55.85),
457 ("Fe", 55.85),
458 ("IRON", 55.85),
459 ("CU", 63.55),
460 ("Cu", 63.55),
461 ("COPPER", 63.55),
462 ("Zn", 65.39),
463 ("ZN", 65.39),
464 ("ZINC", 65.39),
465 ("Ag", 107.87),
466 ("AG", 107.87),
467 ("SILVER", 107.87),
468 ("Au", 196.97),
469 ("AU", 196.97),
470 ("GOLD", 196.97),
471 ("Hg", 200.59),
472 ("HG", 200.59),
473 ("MERCURY", 200.59),
474 ("Pb", 207.2),
475 ("PB", 207.2),
476 ("LEAD", 207.2),
477 ("U", 238.03),
478 ("URANIUM", 238.03),
479 ]
480 .iter()
481 .copied()
482 .collect();
483
484 masses.get(element.to_uppercase().as_str()).copied()
485 }
486}
487
488impl SqlFunction for AtomicMassFunction {
489 fn signature(&self) -> FunctionSignature {
490 FunctionSignature {
491 name: "ATOMIC_MASS",
492 category: FunctionCategory::Chemical,
493 arg_count: ArgCount::Fixed(1),
494 description: "Returns the atomic mass of an element or molecular formula in amu",
495 returns: "FLOAT",
496 examples: vec![
497 "SELECT ATOMIC_MASS('H')",
498 "SELECT ATOMIC_MASS('Carbon')",
499 "SELECT ATOMIC_MASS('H2O') AS water_mass",
500 "SELECT ATOMIC_MASS('Ca(OH)2') AS calcium_hydroxide",
501 "SELECT ATOMIC_MASS('water') AS water_mass",
502 ],
503 }
504 }
505
506 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
507 self.validate_args(args)?;
508
509 match &args[0] {
510 DataValue::String(input) => {
511 if let Some(mass) = Self::get_atomic_mass(input) {
513 return Ok(DataValue::Float(mass));
514 }
515
516 match MolecularFormula::parse(input) {
518 Ok(formula) => {
519 let mass = formula.calculate_mass()?;
520 Ok(DataValue::Float(mass))
521 }
522 Err(_) => Err(anyhow!(
523 "Unknown element or invalid molecular formula: {}",
524 input
525 )),
526 }
527 }
528 DataValue::InternedString(input) => {
529 if let Some(mass) = Self::get_atomic_mass(input) {
531 return Ok(DataValue::Float(mass));
532 }
533
534 match MolecularFormula::parse(input) {
536 Ok(formula) => {
537 let mass = formula.calculate_mass()?;
538 Ok(DataValue::Float(mass))
539 }
540 Err(_) => Err(anyhow!(
541 "Unknown element or invalid molecular formula: {}",
542 input
543 )),
544 }
545 }
546 _ => Err(anyhow!("ATOMIC_MASS() requires a string argument")),
547 }
548 }
549}
550
551pub struct AtomicNumberFunction;
553
554impl AtomicNumberFunction {
555 fn get_atomic_number(element: &str) -> Option<i64> {
556 let numbers: HashMap<&str, i64> = [
557 ("H", 1),
558 ("HYDROGEN", 1),
559 ("HE", 2),
560 ("HELIUM", 2),
561 ("LI", 3),
562 ("LITHIUM", 3),
563 ("BE", 4),
564 ("BERYLLIUM", 4),
565 ("B", 5),
566 ("BORON", 5),
567 ("C", 6),
568 ("CARBON", 6),
569 ("N", 7),
570 ("NITROGEN", 7),
571 ("O", 8),
572 ("OXYGEN", 8),
573 ("F", 9),
574 ("FLUORINE", 9),
575 ("NE", 10),
576 ("NEON", 10),
577 ("NA", 11),
578 ("SODIUM", 11),
579 ("MG", 12),
580 ("MAGNESIUM", 12),
581 ("AL", 13),
582 ("ALUMINUM", 13),
583 ("ALUMINIUM", 13),
584 ("SI", 14),
585 ("SILICON", 14),
586 ("P", 15),
587 ("PHOSPHORUS", 15),
588 ("S", 16),
589 ("SULFUR", 16),
590 ("SULPHUR", 16),
591 ("CL", 17),
592 ("CHLORINE", 17),
593 ("AR", 18),
594 ("ARGON", 18),
595 ("K", 19),
596 ("POTASSIUM", 19),
597 ("CA", 20),
598 ("CALCIUM", 20),
599 ("FE", 26),
601 ("IRON", 26),
602 ("CU", 29),
603 ("COPPER", 29),
604 ("ZN", 30),
605 ("ZINC", 30),
606 ("AG", 47),
607 ("SILVER", 47),
608 ("AU", 79),
609 ("GOLD", 79),
610 ("HG", 80),
611 ("MERCURY", 80),
612 ("PB", 82),
613 ("LEAD", 82),
614 ("U", 92),
615 ("URANIUM", 92),
616 ]
617 .iter()
618 .copied()
619 .collect();
620
621 numbers.get(element.to_uppercase().as_str()).copied()
622 }
623}
624
625impl SqlFunction for AtomicNumberFunction {
626 fn signature(&self) -> FunctionSignature {
627 FunctionSignature {
628 name: "ATOMIC_NUMBER",
629 category: FunctionCategory::Chemical,
630 arg_count: ArgCount::Fixed(1),
631 description: "Returns the atomic number of an element",
632 returns: "INTEGER",
633 examples: vec![
634 "SELECT ATOMIC_NUMBER('H')",
635 "SELECT ATOMIC_NUMBER('Carbon')",
636 "SELECT ATOMIC_NUMBER('Au') AS gold_number",
637 ],
638 }
639 }
640
641 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
642 self.validate_args(args)?;
643
644 match &args[0] {
645 DataValue::String(element) => match Self::get_atomic_number(element) {
646 Some(number) => Ok(DataValue::Integer(number)),
647 None => Err(anyhow!("Unknown element: {}", element)),
648 },
649 DataValue::InternedString(element) => match Self::get_atomic_number(element) {
650 Some(number) => Ok(DataValue::Integer(number)),
651 None => Err(anyhow!("Unknown element: {}", element)),
652 },
653 _ => Err(anyhow!("ATOMIC_NUMBER() requires a string argument")),
654 }
655 }
656}
657
658pub struct NeutronsFunction;
660
661impl SqlFunction for NeutronsFunction {
662 fn signature(&self) -> FunctionSignature {
663 FunctionSignature {
664 name: "NEUTRONS",
665 category: FunctionCategory::Chemical,
666 arg_count: ArgCount::Fixed(1),
667 description: "Returns the number of neutrons in the most common isotope",
668 returns: "INTEGER",
669 examples: vec![
670 "SELECT NEUTRONS('C')", "SELECT NEUTRONS('U')", "SELECT NEUTRONS('Gold')", ],
674 }
675 }
676
677 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
678 self.validate_args(args)?;
679
680 let element = match &args[0] {
681 DataValue::String(s) => s.as_str(),
682 DataValue::InternedString(s) => s.as_str(),
683 _ => return Err(anyhow!("NEUTRONS() requires a string argument")),
684 };
685
686 let protons = AtomicNumberFunction::get_atomic_number(element)
688 .ok_or_else(|| anyhow!("Unknown element: {}", element))?;
689
690 let atomic_mass = AtomicMassFunction::get_atomic_mass(element)
692 .ok_or_else(|| anyhow!("Unknown element: {}", element))?;
693
694 let mass_number = atomic_mass.round() as i64;
696
697 let neutrons = mass_number - protons;
699
700 Ok(DataValue::Integer(neutrons))
701 }
702}
703
704pub struct MoleculeFormulaFunction;
706
707impl SqlFunction for MoleculeFormulaFunction {
708 fn signature(&self) -> FunctionSignature {
709 FunctionSignature {
710 name: "MOLECULE_FORMULA",
711 category: FunctionCategory::Chemical,
712 arg_count: ArgCount::Fixed(1),
713 description: "Returns the molecular formula for a compound name",
714 returns: "STRING",
715 examples: vec![
716 "SELECT MOLECULE_FORMULA('water')",
717 "SELECT MOLECULE_FORMULA('glucose')",
718 "SELECT MOLECULE_FORMULA('caffeine')",
719 ],
720 }
721 }
722
723 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
724 self.validate_args(args)?;
725
726 let input = match &args[0] {
727 DataValue::String(s) => s.clone(),
728 DataValue::InternedString(s) => s.to_string(),
729 _ => return Err(anyhow!("MOLECULE_FORMULA expects a string")),
730 };
731
732 let upper_input = input.to_uppercase();
733
734 if let Some(formula) = MOLECULE_LOOKUP.get(&upper_input) {
736 return Ok(DataValue::String((*formula).to_string()));
737 }
738
739 Err(anyhow!("Unknown molecule: {}", input))
741 }
742}
743
744#[cfg(test)]
745mod tests {
746 use super::*;
747
748 #[test]
749 fn test_avogadro() {
750 let func = AvogadroFunction;
751 let result = func.evaluate(&[]).unwrap();
752 match result {
753 DataValue::Float(val) => assert!((val - 6.022140857e23).abs() < 1e20),
754 _ => panic!("Expected Float"),
755 }
756 }
757
758 #[test]
759 fn test_atomic_mass_hydrogen() {
760 let func = AtomicMassFunction;
761 let result = func
762 .evaluate(&[DataValue::String("H".to_string())])
763 .unwrap();
764 match result {
765 DataValue::Float(val) => assert!((val - 1.008).abs() < 0.001),
766 _ => panic!("Expected Float"),
767 }
768 }
769
770 #[test]
771 fn test_atomic_mass_carbon() {
772 let func = AtomicMassFunction;
773 let result = func
774 .evaluate(&[DataValue::String("Carbon".to_string())])
775 .unwrap();
776 match result {
777 DataValue::Float(val) => assert!((val - 12.01).abs() < 0.01),
778 _ => panic!("Expected Float"),
779 }
780 }
781
782 #[test]
783 fn test_atomic_mass_gold() {
784 let func = AtomicMassFunction;
785 let result = func
786 .evaluate(&[DataValue::String("Au".to_string())])
787 .unwrap();
788 match result {
789 DataValue::Float(val) => assert!((val - 196.97).abs() < 0.01),
790 _ => panic!("Expected Float"),
791 }
792 }
793
794 #[test]
795 fn test_atomic_mass_unknown_element() {
796 let func = AtomicMassFunction;
797 let result = func.evaluate(&[DataValue::String("Xyz".to_string())]);
798 assert!(result.is_err());
799 }
800
801 #[test]
802 fn test_atomic_number_carbon() {
803 let func = AtomicNumberFunction;
804 let result = func
805 .evaluate(&[DataValue::String("C".to_string())])
806 .unwrap();
807 match result {
808 DataValue::Integer(val) => assert_eq!(val, 6),
809 _ => panic!("Expected Integer"),
810 }
811 }
812}