1use anyhow::{anyhow, Result};
2use lazy_static::lazy_static;
3use std::collections::HashMap;
4
5use super::{ArgCount, FunctionCategory, FunctionSignature, SqlFunction};
6use crate::data::datatable::DataValue;
7
8#[derive(Debug, Clone)]
10struct Molecule {
11 formula: &'static str,
12 names: &'static [&'static str],
13 category: &'static str,
14}
15
16lazy_static! {
17 static ref MOLECULE_TABLE: Vec<Molecule> = vec![
19 Molecule {
21 formula: "H2O",
22 names: &["WATER"],
23 category: "Inorganic"
24 },
25 Molecule {
26 formula: "H2O2",
27 names: &["HYDROGEN PEROXIDE"],
28 category: "Inorganic"
29 },
30
31 Molecule {
33 formula: "NH3",
34 names: &["AMMONIA"],
35 category: "Inorganic"
36 },
37 Molecule {
38 formula: "CO2",
39 names: &["CARBON DIOXIDE", "CO2"],
40 category: "Inorganic"
41 },
42 Molecule {
43 formula: "CO",
44 names: &["CARBON MONOXIDE", "CO"],
45 category: "Inorganic"
46 },
47 Molecule {
48 formula: "O2",
49 names: &["OXYGEN", "DIOXYGEN"],
50 category: "Inorganic"
51 },
52 Molecule {
53 formula: "N2",
54 names: &["NITROGEN", "DINITROGEN"],
55 category: "Inorganic"
56 },
57 Molecule {
58 formula: "O3",
59 names: &["OZONE"],
60 category: "Inorganic"
61 },
62
63 Molecule {
65 formula: "CH4",
66 names: &["METHANE"],
67 category: "Hydrocarbon"
68 },
69 Molecule {
70 formula: "C2H6",
71 names: &["ETHANE"],
72 category: "Hydrocarbon"
73 },
74 Molecule {
75 formula: "C3H8",
76 names: &["PROPANE"],
77 category: "Hydrocarbon"
78 },
79 Molecule {
80 formula: "C4H10",
81 names: &["BUTANE"],
82 category: "Hydrocarbon"
83 },
84 Molecule {
85 formula: "C5H12",
86 names: &["PENTANE"],
87 category: "Hydrocarbon"
88 },
89 Molecule {
90 formula: "C6H14",
91 names: &["HEXANE"],
92 category: "Hydrocarbon"
93 },
94 Molecule {
95 formula: "C2H4",
96 names: &["ETHENE", "ETHYLENE"],
97 category: "Hydrocarbon"
98 },
99 Molecule {
100 formula: "C2H2",
101 names: &["ETHYNE", "ACETYLENE"],
102 category: "Hydrocarbon"
103 },
104 Molecule {
105 formula: "C6H6",
106 names: &["BENZENE"],
107 category: "Hydrocarbon"
108 },
109
110 Molecule {
112 formula: "C6H12O6",
113 names: &["GLUCOSE", "DEXTROSE"],
114 category: "Sugar"
115 },
116 Molecule {
117 formula: "C6H12O6",
118 names: &["FRUCTOSE"],
119 category: "Sugar"
120 },
121 Molecule {
122 formula: "C12H22O11",
123 names: &["SUCROSE", "TABLE SUGAR"],
124 category: "Sugar"
125 },
126 Molecule {
127 formula: "C12H22O11",
128 names: &["LACTOSE", "MILK SUGAR"],
129 category: "Sugar"
130 },
131
132 Molecule {
134 formula: "NaCl",
135 names: &["SALT", "TABLE SALT", "SODIUM CHLORIDE"],
136 category: "Salt"
137 },
138 Molecule {
139 formula: "NaHCO3",
140 names: &["BAKING SODA", "SODIUM BICARBONATE"],
141 category: "Salt"
142 },
143 Molecule {
144 formula: "CaCO3",
145 names: &["CALCIUM CARBONATE", "LIMESTONE", "CHALK"],
146 category: "Mineral"
147 },
148 Molecule {
149 formula: "CaSO4",
150 names: &["CALCIUM SULFATE", "GYPSUM"],
151 category: "Mineral"
152 },
153
154 Molecule {
156 formula: "HCl",
157 names: &["HYDROCHLORIC ACID"],
158 category: "Acid"
159 },
160 Molecule {
161 formula: "H2SO4",
162 names: &["SULFURIC ACID"],
163 category: "Acid"
164 },
165 Molecule {
166 formula: "HNO3",
167 names: &["NITRIC ACID"],
168 category: "Acid"
169 },
170 Molecule {
171 formula: "H3PO4",
172 names: &["PHOSPHORIC ACID"],
173 category: "Acid"
174 },
175 Molecule {
176 formula: "CH3COOH",
177 names: &["ACETIC ACID", "VINEGAR"],
178 category: "Acid"
179 },
180
181 Molecule {
183 formula: "C2H5OH",
184 names: &["ETHANOL", "ALCOHOL", "ETHYL ALCOHOL"],
185 category: "Alcohol"
186 },
187 Molecule {
188 formula: "CH3OH",
189 names: &["METHANOL", "METHYL ALCOHOL"],
190 category: "Alcohol"
191 },
192 Molecule {
193 formula: "C3H8O",
194 names: &["ISOPROPANOL", "ISOPROPYL ALCOHOL", "RUBBING ALCOHOL"],
195 category: "Alcohol"
196 },
197 Molecule {
198 formula: "CH3COCH3",
199 names: &["ACETONE"],
200 category: "Organic"
201 },
202 Molecule {
203 formula: "C8H10N4O2",
204 names: &["CAFFEINE"],
205 category: "Organic"
206 },
207 Molecule {
208 formula: "C9H8O4",
209 names: &["ASPIRIN", "ACETYLSALICYLIC ACID"],
210 category: "Organic"
211 },
212
213 Molecule {
215 formula: "C6H8O6",
216 names: &["VITAMIN C", "ASCORBIC ACID"],
217 category: "Vitamin"
218 },
219 ];
220
221 static ref MOLECULE_LOOKUP: HashMap<String, &'static str> = {
223 let mut map = HashMap::new();
224 for molecule in MOLECULE_TABLE.iter() {
225 for name in molecule.names {
226 map.insert((*name).to_string(), molecule.formula);
227 }
228 }
229 map
230 };
231}
232
233#[derive(Debug, Clone)]
235struct MolecularFormula {
236 elements: Vec<(String, usize)>, }
238
239impl MolecularFormula {
240 fn parse(formula: &str) -> Result<Self> {
242 let formula = formula.trim();
243
244 if let Some(expanded) = Self::get_compound_alias(formula) {
246 return Self::parse_formula(expanded);
247 }
248
249 Self::parse_formula(formula)
250 }
251
252 fn get_compound_alias(name: &str) -> Option<&'static str> {
254 let name_upper = name.to_uppercase();
255 MOLECULE_LOOKUP.get(&name_upper).copied()
256 }
257
258 fn parse_formula(formula: &str) -> Result<Self> {
260 let mut elements = Vec::new();
261 let mut chars = formula.chars().peekable();
262
263 while chars.peek().is_some() {
264 if chars.peek() == Some(&'(') {
266 chars.next(); let mut group = String::new();
268 let mut depth = 1;
269
270 for ch in chars.by_ref() {
271 if ch == '(' {
272 depth += 1;
273 group.push(ch);
274 } else if ch == ')' {
275 depth -= 1;
276 if depth == 0 {
277 break;
278 }
279 group.push(ch);
280 } else {
281 group.push(ch);
282 }
283 }
284
285 let multiplier = Self::parse_number(&mut chars).unwrap_or(1);
287
288 let group_formula = Self::parse_formula(&group)?;
290 for (elem, count) in group_formula.elements {
291 elements.push((elem, count * multiplier));
292 }
293 } else {
294 let element = Self::parse_element(&mut chars)?;
296 let count = Self::parse_number(&mut chars).unwrap_or(1);
297
298 if let Some((_, existing_count)) = elements.iter_mut().find(|(e, _)| e == &element)
300 {
301 *existing_count += count;
302 } else {
303 elements.push((element, count));
304 }
305 }
306 }
307
308 Ok(MolecularFormula { elements })
309 }
310
311 fn parse_element(chars: &mut std::iter::Peekable<std::str::Chars>) -> Result<String> {
313 let mut element = String::new();
314
315 if let Some(ch) = chars.peek() {
317 if ch.is_uppercase() {
318 element.push(chars.next().unwrap());
319 } else {
320 return Err(anyhow!("Expected uppercase letter for element symbol"));
321 }
322 } else {
323 return Err(anyhow!("Unexpected end of formula"));
324 }
325
326 while let Some(&ch) = chars.peek() {
328 if ch.is_lowercase() {
329 element.push(chars.next().unwrap());
330 } else {
331 break;
332 }
333 }
334
335 Ok(element)
336 }
337
338 fn parse_number(chars: &mut std::iter::Peekable<std::str::Chars>) -> Option<usize> {
340 let mut num_str = String::new();
341
342 while let Some(&ch) = chars.peek() {
343 if ch.is_ascii_digit() {
344 num_str.push(chars.next().unwrap());
345 } else {
346 break;
347 }
348 }
349
350 if num_str.is_empty() {
351 None
352 } else {
353 num_str.parse().ok()
354 }
355 }
356
357 fn calculate_mass(&self) -> Result<f64> {
359 let mut total_mass = 0.0;
360
361 for (element, count) in &self.elements {
362 let atomic_mass = AtomicMassFunction::get_atomic_mass(element)
363 .ok_or_else(|| anyhow!("Unknown element: {}", element))?;
364 total_mass += atomic_mass * (*count as f64);
365 }
366
367 Ok(total_mass)
368 }
369}
370
371pub struct AvogadroFunction;
373
374impl SqlFunction for AvogadroFunction {
375 fn signature(&self) -> FunctionSignature {
376 FunctionSignature {
377 name: "AVOGADRO",
378 category: FunctionCategory::Chemical,
379 arg_count: ArgCount::Fixed(0),
380 description: "Returns Avogadro's number (6.022 × 10^23)",
381 returns: "FLOAT",
382 examples: vec![
383 "SELECT AVOGADRO()",
384 "SELECT molecules / AVOGADRO() AS moles",
385 ],
386 }
387 }
388
389 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
390 self.validate_args(args)?;
391 Ok(DataValue::Float(6.02214076e23))
392 }
393}
394
395pub struct AtomicMassFunction;
397
398impl AtomicMassFunction {
399 fn get_atomic_mass(element: &str) -> Option<f64> {
400 let masses: HashMap<&str, f64> = [
401 ("H", 1.008),
403 ("HYDROGEN", 1.008),
404 ("He", 4.003),
405 ("HE", 4.003),
406 ("HELIUM", 4.003),
407 ("Li", 6.941),
408 ("LI", 6.941),
409 ("LITHIUM", 6.941),
410 ("Be", 9.012),
411 ("BE", 9.012),
412 ("BERYLLIUM", 9.012),
413 ("B", 10.81),
414 ("BORON", 10.81),
415 ("C", 12.01),
416 ("CARBON", 12.01),
417 ("N", 14.01),
418 ("NITROGEN", 14.01),
419 ("O", 16.00),
420 ("OXYGEN", 16.00),
421 ("F", 19.00),
422 ("FLUORINE", 19.00),
423 ("Ne", 20.18),
424 ("NE", 20.18),
425 ("NEON", 20.18),
426 ("Na", 22.99),
427 ("NA", 22.99),
428 ("SODIUM", 22.99),
429 ("Mg", 24.31),
430 ("MG", 24.31),
431 ("MAGNESIUM", 24.31),
432 ("Al", 26.98),
433 ("AL", 26.98),
434 ("ALUMINUM", 26.98),
435 ("ALUMINIUM", 26.98),
436 ("Si", 28.09),
437 ("SI", 28.09),
438 ("SILICON", 28.09),
439 ("P", 30.97),
440 ("PHOSPHORUS", 30.97),
441 ("S", 32.07),
442 ("SULFUR", 32.07),
443 ("SULPHUR", 32.07),
444 ("Cl", 35.45),
445 ("CL", 35.45),
446 ("CHLORINE", 35.45),
447 ("Ar", 39.95),
448 ("AR", 39.95),
449 ("ARGON", 39.95),
450 ("K", 39.10),
451 ("POTASSIUM", 39.10),
452 ("Ca", 40.08),
453 ("CA", 40.08),
454 ("CALCIUM", 40.08),
455 ("FE", 55.85),
457 ("Fe", 55.85),
458 ("IRON", 55.85),
459 ("CU", 63.55),
460 ("Cu", 63.55),
461 ("COPPER", 63.55),
462 ("Zn", 65.39),
463 ("ZN", 65.39),
464 ("ZINC", 65.39),
465 ("Ag", 107.87),
466 ("AG", 107.87),
467 ("SILVER", 107.87),
468 ("Au", 196.97),
469 ("AU", 196.97),
470 ("GOLD", 196.97),
471 ("Hg", 200.59),
472 ("HG", 200.59),
473 ("MERCURY", 200.59),
474 ("Pb", 207.2),
475 ("PB", 207.2),
476 ("LEAD", 207.2),
477 ("U", 238.03),
478 ("URANIUM", 238.03),
479 ]
480 .iter()
481 .copied()
482 .collect();
483
484 masses.get(element.to_uppercase().as_str()).copied()
485 }
486}
487
488impl SqlFunction for AtomicMassFunction {
489 fn signature(&self) -> FunctionSignature {
490 FunctionSignature {
491 name: "ATOMIC_MASS",
492 category: FunctionCategory::Chemical,
493 arg_count: ArgCount::Fixed(1),
494 description: "Returns the atomic mass of an element or molecular formula in amu",
495 returns: "FLOAT",
496 examples: vec![
497 "SELECT ATOMIC_MASS('H')",
498 "SELECT ATOMIC_MASS('Carbon')",
499 "SELECT ATOMIC_MASS('H2O') AS water_mass",
500 "SELECT ATOMIC_MASS('Ca(OH)2') AS calcium_hydroxide",
501 "SELECT ATOMIC_MASS('water') AS water_mass",
502 ],
503 }
504 }
505
506 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
507 self.validate_args(args)?;
508
509 match &args[0] {
510 DataValue::String(input) => {
511 if let Some(mass) = Self::get_atomic_mass(input) {
513 return Ok(DataValue::Float(mass));
514 }
515
516 match MolecularFormula::parse(input) {
518 Ok(formula) => {
519 let mass = formula.calculate_mass()?;
520 Ok(DataValue::Float(mass))
521 }
522 Err(_) => Err(anyhow!(
523 "Unknown element or invalid molecular formula: {}",
524 input
525 )),
526 }
527 }
528 DataValue::InternedString(input) => {
529 if let Some(mass) = Self::get_atomic_mass(input) {
531 return Ok(DataValue::Float(mass));
532 }
533
534 match MolecularFormula::parse(input) {
536 Ok(formula) => {
537 let mass = formula.calculate_mass()?;
538 Ok(DataValue::Float(mass))
539 }
540 Err(_) => Err(anyhow!(
541 "Unknown element or invalid molecular formula: {}",
542 input
543 )),
544 }
545 }
546 _ => Err(anyhow!("ATOMIC_MASS() requires a string argument")),
547 }
548 }
549}
550
551pub struct AtomicNumberFunction;
553
554impl AtomicNumberFunction {
555 fn get_atomic_number(element: &str) -> Option<i64> {
556 let numbers: HashMap<&str, i64> = [
557 ("H", 1),
558 ("HYDROGEN", 1),
559 ("HE", 2),
560 ("HELIUM", 2),
561 ("LI", 3),
562 ("LITHIUM", 3),
563 ("BE", 4),
564 ("BERYLLIUM", 4),
565 ("B", 5),
566 ("BORON", 5),
567 ("C", 6),
568 ("CARBON", 6),
569 ("N", 7),
570 ("NITROGEN", 7),
571 ("O", 8),
572 ("OXYGEN", 8),
573 ("F", 9),
574 ("FLUORINE", 9),
575 ("NE", 10),
576 ("NEON", 10),
577 ("NA", 11),
578 ("SODIUM", 11),
579 ("MG", 12),
580 ("MAGNESIUM", 12),
581 ("AL", 13),
582 ("ALUMINUM", 13),
583 ("ALUMINIUM", 13),
584 ("SI", 14),
585 ("SILICON", 14),
586 ("P", 15),
587 ("PHOSPHORUS", 15),
588 ("S", 16),
589 ("SULFUR", 16),
590 ("SULPHUR", 16),
591 ("CL", 17),
592 ("CHLORINE", 17),
593 ("AR", 18),
594 ("ARGON", 18),
595 ("K", 19),
596 ("POTASSIUM", 19),
597 ("CA", 20),
598 ("CALCIUM", 20),
599 ("FE", 26),
601 ("IRON", 26),
602 ("CU", 29),
603 ("COPPER", 29),
604 ("ZN", 30),
605 ("ZINC", 30),
606 ("AG", 47),
607 ("SILVER", 47),
608 ("AU", 79),
609 ("GOLD", 79),
610 ("HG", 80),
611 ("MERCURY", 80),
612 ("PB", 82),
613 ("LEAD", 82),
614 ("U", 92),
615 ("URANIUM", 92),
616 ]
617 .iter()
618 .copied()
619 .collect();
620
621 numbers.get(element.to_uppercase().as_str()).copied()
622 }
623}
624
625impl SqlFunction for AtomicNumberFunction {
626 fn signature(&self) -> FunctionSignature {
627 FunctionSignature {
628 name: "ATOMIC_NUMBER",
629 category: FunctionCategory::Chemical,
630 arg_count: ArgCount::Fixed(1),
631 description: "Returns the atomic number of an element",
632 returns: "INTEGER",
633 examples: vec![
634 "SELECT ATOMIC_NUMBER('H')",
635 "SELECT ATOMIC_NUMBER('Carbon')",
636 "SELECT ATOMIC_NUMBER('Au') AS gold_number",
637 ],
638 }
639 }
640
641 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
642 self.validate_args(args)?;
643
644 match &args[0] {
645 DataValue::String(element) => match Self::get_atomic_number(element) {
646 Some(number) => Ok(DataValue::Integer(number)),
647 None => Err(anyhow!("Unknown element: {}", element)),
648 },
649 DataValue::InternedString(element) => match Self::get_atomic_number(element) {
650 Some(number) => Ok(DataValue::Integer(number)),
651 None => Err(anyhow!("Unknown element: {}", element)),
652 },
653 _ => Err(anyhow!("ATOMIC_NUMBER() requires a string argument")),
654 }
655 }
656}
657
658pub struct MoleculeFormulaFunction;
660
661impl SqlFunction for MoleculeFormulaFunction {
662 fn signature(&self) -> FunctionSignature {
663 FunctionSignature {
664 name: "MOLECULE_FORMULA",
665 category: FunctionCategory::Chemical,
666 arg_count: ArgCount::Fixed(1),
667 description: "Returns the molecular formula for a compound name",
668 returns: "STRING",
669 examples: vec![
670 "SELECT MOLECULE_FORMULA('water')",
671 "SELECT MOLECULE_FORMULA('glucose')",
672 "SELECT MOLECULE_FORMULA('caffeine')",
673 ],
674 }
675 }
676
677 fn evaluate(&self, args: &[DataValue]) -> Result<DataValue> {
678 self.validate_args(args)?;
679
680 let input = match &args[0] {
681 DataValue::String(s) => s.clone(),
682 DataValue::InternedString(s) => s.to_string(),
683 _ => return Err(anyhow!("MOLECULE_FORMULA expects a string")),
684 };
685
686 let upper_input = input.to_uppercase();
687
688 if let Some(formula) = MOLECULE_LOOKUP.get(&upper_input) {
690 return Ok(DataValue::String((*formula).to_string()));
691 }
692
693 Err(anyhow!("Unknown molecule: {}", input))
695 }
696}
697
698#[cfg(test)]
699mod tests {
700 use super::*;
701
702 #[test]
703 fn test_avogadro() {
704 let func = AvogadroFunction;
705 let result = func.evaluate(&[]).unwrap();
706 match result {
707 DataValue::Float(val) => assert!((val - 6.022140857e23).abs() < 1e20),
708 _ => panic!("Expected Float"),
709 }
710 }
711
712 #[test]
713 fn test_atomic_mass_hydrogen() {
714 let func = AtomicMassFunction;
715 let result = func
716 .evaluate(&[DataValue::String("H".to_string())])
717 .unwrap();
718 match result {
719 DataValue::Float(val) => assert!((val - 1.008).abs() < 0.001),
720 _ => panic!("Expected Float"),
721 }
722 }
723
724 #[test]
725 fn test_atomic_mass_carbon() {
726 let func = AtomicMassFunction;
727 let result = func
728 .evaluate(&[DataValue::String("Carbon".to_string())])
729 .unwrap();
730 match result {
731 DataValue::Float(val) => assert!((val - 12.01).abs() < 0.01),
732 _ => panic!("Expected Float"),
733 }
734 }
735
736 #[test]
737 fn test_atomic_mass_gold() {
738 let func = AtomicMassFunction;
739 let result = func
740 .evaluate(&[DataValue::String("Au".to_string())])
741 .unwrap();
742 match result {
743 DataValue::Float(val) => assert!((val - 196.97).abs() < 0.01),
744 _ => panic!("Expected Float"),
745 }
746 }
747
748 #[test]
749 fn test_atomic_mass_unknown_element() {
750 let func = AtomicMassFunction;
751 let result = func.evaluate(&[DataValue::String("Xyz".to_string())]);
752 assert!(result.is_err());
753 }
754
755 #[test]
756 fn test_atomic_number_carbon() {
757 let func = AtomicNumberFunction;
758 let result = func
759 .evaluate(&[DataValue::String("C".to_string())])
760 .unwrap();
761 match result {
762 DataValue::Integer(val) => assert_eq!(val, 6),
763 _ => panic!("Expected Integer"),
764 }
765 }
766}