sql_cli/sql/functions/
mod.rs

1use anyhow::{anyhow, Result};
2use std::collections::HashMap;
3use std::fmt;
4use std::sync::Arc;
5
6use crate::data::datatable::DataValue;
7
8pub mod astronomy;
9pub mod chemistry;
10pub mod comparison;
11pub mod constants;
12pub mod convert;
13pub mod date_time;
14pub mod financial;
15pub mod format;
16pub mod format_number;
17pub mod geometry;
18pub mod group_num;
19pub mod hash;
20pub mod math;
21pub mod mathematics;
22pub mod number_words;
23pub mod particle_charges;
24pub mod physics;
25pub mod random;
26pub mod roman;
27pub mod solar_system;
28pub mod string_methods;
29pub mod string_utils;
30pub mod text_processing;
31pub mod type_checking;
32
33// Re-export MethodFunction trait
34pub use string_methods::MethodFunction;
35
36/// Category of SQL functions for organization and discovery
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
38pub enum FunctionCategory {
39    Constant,     // Mathematical and physical constants
40    Mathematical, // Mathematical operations
41    Astronomical, // Astronomical constants and calculations
42    Chemical,     // Chemical elements and properties
43    Date,         // Date/time operations
44    String,       // String manipulation
45    Aggregate,    // Aggregation functions
46    Conversion,   // Unit conversion functions
47}
48
49impl fmt::Display for FunctionCategory {
50    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
51        match self {
52            FunctionCategory::Constant => write!(f, "Constant"),
53            FunctionCategory::Mathematical => write!(f, "Mathematical"),
54            FunctionCategory::Astronomical => write!(f, "Astronomical"),
55            FunctionCategory::Chemical => write!(f, "Chemical"),
56            FunctionCategory::Date => write!(f, "Date"),
57            FunctionCategory::String => write!(f, "String"),
58            FunctionCategory::Aggregate => write!(f, "Aggregate"),
59            FunctionCategory::Conversion => write!(f, "Conversion"),
60        }
61    }
62}
63
64/// Describes the number of arguments a function accepts
65#[derive(Debug, Clone)]
66pub enum ArgCount {
67    /// Exactly n arguments
68    Fixed(usize),
69    /// Between min and max arguments (inclusive)
70    Range(usize, usize),
71    /// Any number of arguments
72    Variadic,
73}
74
75impl ArgCount {
76    #[must_use]
77    pub fn is_valid(&self, count: usize) -> bool {
78        match self {
79            ArgCount::Fixed(n) => count == *n,
80            ArgCount::Range(min, max) => count >= *min && count <= *max,
81            ArgCount::Variadic => true,
82        }
83    }
84
85    #[must_use]
86    pub fn description(&self) -> String {
87        match self {
88            ArgCount::Fixed(0) => "no arguments".to_string(),
89            ArgCount::Fixed(1) => "1 argument".to_string(),
90            ArgCount::Fixed(n) => format!("{n} arguments"),
91            ArgCount::Range(min, max) => format!("{min} to {max} arguments"),
92            ArgCount::Variadic => "any number of arguments".to_string(),
93        }
94    }
95}
96
97/// Signature of a SQL function including metadata
98#[derive(Debug, Clone)]
99pub struct FunctionSignature {
100    pub name: &'static str,
101    pub category: FunctionCategory,
102    pub arg_count: ArgCount,
103    pub description: &'static str,
104    pub returns: &'static str,
105    pub examples: Vec<&'static str>,
106}
107
108/// Trait that all SQL functions must implement
109pub trait SqlFunction: Send + Sync {
110    /// Get the function's signature and metadata
111    fn signature(&self) -> FunctionSignature;
112
113    /// Evaluate the function with the given arguments
114    fn evaluate(&self, args: &[DataValue]) -> Result<DataValue>;
115
116    /// Validate arguments before evaluation (default implementation checks count)
117    fn validate_args(&self, args: &[DataValue]) -> Result<()> {
118        let sig = self.signature();
119        if !sig.arg_count.is_valid(args.len()) {
120            return Err(anyhow!(
121                "{}() expects {}, got {}",
122                sig.name,
123                sig.arg_count.description(),
124                args.len()
125            ));
126        }
127        Ok(())
128    }
129}
130
131/// Registry for all SQL functions
132pub struct FunctionRegistry {
133    functions: HashMap<String, Box<dyn SqlFunction>>,
134    by_category: HashMap<FunctionCategory, Vec<String>>,
135    methods: HashMap<String, Arc<dyn MethodFunction>>,
136}
137
138impl FunctionRegistry {
139    /// Create a new registry with all built-in functions
140    #[must_use]
141    pub fn new() -> Self {
142        let mut registry = Self {
143            functions: HashMap::new(),
144            by_category: HashMap::new(),
145            methods: HashMap::new(),
146        };
147
148        // Register all built-in functions
149        registry.register_constants();
150        registry.register_astronomical_functions();
151        registry.register_chemical_functions();
152        registry.register_mathematical_functions();
153        registry.register_geometry_functions();
154        registry.register_physics_functions();
155        registry.register_date_time_functions();
156        registry.register_string_methods();
157        registry.register_financial_functions();
158        registry.register_conversion_functions();
159        registry.register_hash_functions();
160        registry.register_comparison_functions();
161        registry.register_aggregate_functions();
162        registry.register_random_functions();
163        registry.register_format_functions();
164        registry.register_type_checking_functions();
165
166        registry
167    }
168
169    /// Register a function in the registry
170    pub fn register(&mut self, func: Box<dyn SqlFunction>) {
171        let sig = func.signature();
172        let name = sig.name.to_uppercase();
173        let category = sig.category;
174
175        // Add to main registry
176        self.functions.insert(name.clone(), func);
177
178        // Add to category index
179        self.by_category.entry(category).or_default().push(name);
180    }
181
182    /// Get a function by name (case-insensitive)
183    #[must_use]
184    pub fn get(&self, name: &str) -> Option<&dyn SqlFunction> {
185        self.functions
186            .get(&name.to_uppercase())
187            .map(std::convert::AsRef::as_ref)
188    }
189
190    /// Check if a function exists
191    #[must_use]
192    pub fn contains(&self, name: &str) -> bool {
193        self.functions.contains_key(&name.to_uppercase())
194    }
195
196    /// Get all functions matching a prefix (for autocomplete)
197    #[must_use]
198    pub fn autocomplete(&self, prefix: &str) -> Vec<FunctionSignature> {
199        let prefix_upper = prefix.to_uppercase();
200        self.functions
201            .iter()
202            .filter(|(name, _)| name.starts_with(&prefix_upper))
203            .map(|(_, func)| func.signature())
204            .collect()
205    }
206
207    /// Get all functions in a category
208    #[must_use]
209    pub fn get_by_category(&self, category: FunctionCategory) -> Vec<FunctionSignature> {
210        self.by_category
211            .get(&category)
212            .map(|names| {
213                names
214                    .iter()
215                    .filter_map(|name| self.functions.get(name))
216                    .map(|func| func.signature())
217                    .collect()
218            })
219            .unwrap_or_default()
220    }
221
222    /// Get all available functions
223    #[must_use]
224    pub fn all_functions(&self) -> Vec<FunctionSignature> {
225        self.functions
226            .values()
227            .map(|func| func.signature())
228            .collect()
229    }
230
231    /// Register a method function
232    pub fn register_method(&mut self, method: Arc<dyn MethodFunction>) {
233        let method_name = method.method_name().to_uppercase();
234        self.methods.insert(method_name, method);
235    }
236
237    /// Get a method function by name
238    #[must_use]
239    pub fn get_method(&self, name: &str) -> Option<Arc<dyn MethodFunction>> {
240        // Try exact match first
241        if let Some(method) = self.methods.get(&name.to_uppercase()) {
242            return Some(Arc::clone(method));
243        }
244
245        // Try to find a method that handles this name
246        for method in self.methods.values() {
247            if method.handles_method(name) {
248                return Some(Arc::clone(method));
249            }
250        }
251
252        None
253    }
254
255    /// Check if a method exists
256    #[must_use]
257    pub fn has_method(&self, name: &str) -> bool {
258        self.get_method(name).is_some()
259    }
260
261    /// Generate markdown documentation for all functions
262    #[must_use]
263    pub fn generate_markdown_docs(&self) -> String {
264        use std::fmt::Write;
265        let mut doc = String::new();
266
267        writeln!(&mut doc, "# SQL CLI Function Reference\n").unwrap();
268        writeln!(
269            &mut doc,
270            "This document is auto-generated from the function registry.\n"
271        )
272        .unwrap();
273
274        // Get all categories in a deterministic order
275        let mut categories: Vec<FunctionCategory> = self.by_category.keys().copied().collect();
276        categories.sort_by_key(|c| format!("{c:?}"));
277
278        for category in categories {
279            let functions = self.get_by_category(category);
280            if functions.is_empty() {
281                continue;
282            }
283
284            writeln!(&mut doc, "## {category} Functions\n").unwrap();
285
286            // Sort functions by name for consistent output
287            let mut functions = functions;
288            functions.sort_by_key(|f| f.name);
289
290            for func in functions {
291                writeln!(&mut doc, "### {}()\n", func.name).unwrap();
292                writeln!(&mut doc, "**Description:** {}\n", func.description).unwrap();
293                writeln!(
294                    &mut doc,
295                    "**Arguments:** {}\n",
296                    func.arg_count.description()
297                )
298                .unwrap();
299                writeln!(&mut doc, "**Returns:** {}\n", func.returns).unwrap();
300
301                if !func.examples.is_empty() {
302                    writeln!(&mut doc, "**Examples:**").unwrap();
303                    writeln!(&mut doc, "```sql").unwrap();
304                    for example in &func.examples {
305                        writeln!(&mut doc, "{example}").unwrap();
306                    }
307                    writeln!(&mut doc, "```\n").unwrap();
308                }
309            }
310        }
311
312        doc
313    }
314
315    /// Generate help text for a specific function
316    #[must_use]
317    pub fn generate_function_help(&self, name: &str) -> Option<String> {
318        self.get(name).map(|func| {
319            let sig = func.signature();
320            let mut help = String::new();
321            use std::fmt::Write;
322
323            writeln!(&mut help, "Function: {}()", sig.name).unwrap();
324            writeln!(&mut help, "Category: {}", sig.category).unwrap();
325            writeln!(&mut help, "Description: {}", sig.description).unwrap();
326            writeln!(&mut help, "Arguments: {}", sig.arg_count.description()).unwrap();
327            writeln!(&mut help, "Returns: {}", sig.returns).unwrap();
328
329            if !sig.examples.is_empty() {
330                writeln!(&mut help, "\nExamples:").unwrap();
331                for example in &sig.examples {
332                    writeln!(&mut help, "  {example}").unwrap();
333                }
334            }
335
336            help
337        })
338    }
339
340    /// List all available functions with brief descriptions
341    #[must_use]
342    pub fn list_functions(&self) -> String {
343        use std::fmt::Write;
344        let mut list = String::new();
345
346        writeln!(&mut list, "Available SQL Functions:\n").unwrap();
347
348        let mut categories: Vec<FunctionCategory> = self.by_category.keys().copied().collect();
349        categories.sort_by_key(|c| format!("{c:?}"));
350
351        for category in categories {
352            let functions = self.get_by_category(category);
353            if functions.is_empty() {
354                continue;
355            }
356
357            writeln!(&mut list, "{category} Functions:").unwrap();
358
359            let mut functions = functions;
360            functions.sort_by_key(|f| f.name);
361
362            for func in functions {
363                writeln!(
364                    &mut list,
365                    "  {:20} - {}",
366                    format!("{}()", func.name),
367                    func.description
368                )
369                .unwrap();
370            }
371            writeln!(&mut list).unwrap();
372        }
373
374        list
375    }
376
377    /// Register constant functions
378    fn register_constants(&mut self) {
379        use constants::{
380            EFunction, HbarFunction, MassElectronFunction, MeFunction, PhiFunction, PiFunction,
381            TauFunction,
382        };
383
384        self.register(Box::new(PiFunction));
385        self.register(Box::new(EFunction));
386        self.register(Box::new(MeFunction)); // Mass of electron
387        self.register(Box::new(MassElectronFunction)); // Alias for ME
388        self.register(Box::new(TauFunction));
389        self.register(Box::new(PhiFunction));
390        self.register(Box::new(HbarFunction));
391    }
392
393    /// Register astronomical functions
394    fn register_astronomical_functions(&mut self) {
395        use astronomy::{
396            AuFunction, DistJupiterFunction, DistMarsFunction, DistMercuryFunction,
397            DistNeptuneFunction, DistSaturnFunction, DistUranusFunction, DistVenusFunction,
398            LightYearFunction, MassEarthFunction, MassJupiterFunction, MassMarsFunction,
399            MassMercuryFunction, MassMoonFunction, MassNeptuneFunction, MassSaturnFunction,
400            MassSunFunction, MassUranusFunction, MassVenusFunction, ParsecFunction,
401            RadiusEarthFunction, RadiusJupiterFunction, RadiusMarsFunction, RadiusMercuryFunction,
402            RadiusMoonFunction, RadiusNeptuneFunction, RadiusSaturnFunction, RadiusSunFunction,
403            RadiusUranusFunction, RadiusVenusFunction,
404        };
405
406        use solar_system::{
407            DensitySolarBodyFunction, DistanceSolarBodyFunction, EscapeVelocitySolarBodyFunction,
408            GravitySolarBodyFunction, MassSolarBodyFunction, MoonsSolarBodyFunction,
409            OrbitalPeriodSolarBodyFunction, RadiusSolarBodyFunction,
410            RotationPeriodSolarBodyFunction,
411        };
412
413        self.register(Box::new(MassEarthFunction));
414        self.register(Box::new(MassSunFunction));
415        self.register(Box::new(MassMoonFunction));
416        self.register(Box::new(AuFunction)); // Astronomical unit
417        self.register(Box::new(LightYearFunction));
418        self.register(Box::new(ParsecFunction));
419
420        // Planetary masses
421        self.register(Box::new(MassMercuryFunction));
422        self.register(Box::new(MassVenusFunction));
423        self.register(Box::new(MassMarsFunction));
424        self.register(Box::new(MassJupiterFunction));
425        self.register(Box::new(MassSaturnFunction));
426        self.register(Box::new(MassUranusFunction));
427        self.register(Box::new(MassNeptuneFunction));
428
429        // Solar body radius functions
430        self.register(Box::new(RadiusSunFunction));
431        self.register(Box::new(RadiusEarthFunction));
432        self.register(Box::new(RadiusMoonFunction));
433        self.register(Box::new(RadiusMercuryFunction));
434        self.register(Box::new(RadiusVenusFunction));
435        self.register(Box::new(RadiusMarsFunction));
436        self.register(Box::new(RadiusJupiterFunction));
437        self.register(Box::new(RadiusSaturnFunction));
438        self.register(Box::new(RadiusUranusFunction));
439        self.register(Box::new(RadiusNeptuneFunction));
440
441        // Planetary distances from the Sun
442        self.register(Box::new(DistMercuryFunction));
443        self.register(Box::new(DistVenusFunction));
444        self.register(Box::new(DistMarsFunction));
445        self.register(Box::new(DistJupiterFunction));
446        self.register(Box::new(DistSaturnFunction));
447        self.register(Box::new(DistUranusFunction));
448        self.register(Box::new(DistNeptuneFunction));
449
450        // Solar system lookup functions
451        self.register(Box::new(MassSolarBodyFunction));
452        self.register(Box::new(RadiusSolarBodyFunction));
453        self.register(Box::new(DistanceSolarBodyFunction));
454        self.register(Box::new(OrbitalPeriodSolarBodyFunction));
455        self.register(Box::new(GravitySolarBodyFunction));
456        self.register(Box::new(DensitySolarBodyFunction));
457        self.register(Box::new(EscapeVelocitySolarBodyFunction));
458        self.register(Box::new(RotationPeriodSolarBodyFunction));
459        self.register(Box::new(MoonsSolarBodyFunction));
460    }
461
462    /// Register chemical functions
463    fn register_chemical_functions(&mut self) {
464        use chemistry::{
465            AtomicMassFunction, AtomicNumberFunction, AvogadroFunction, MoleculeFormulaFunction,
466            NeutronsFunction,
467        };
468
469        self.register(Box::new(AvogadroFunction));
470        self.register(Box::new(AtomicMassFunction));
471        self.register(Box::new(AtomicNumberFunction));
472        self.register(Box::new(NeutronsFunction));
473        self.register(Box::new(MoleculeFormulaFunction));
474    }
475
476    /// Register string method functions
477    fn register_string_methods(&mut self) {
478        use number_words::{ToOrdinal, ToOrdinalWords, ToWords};
479        use string_utils::{LPadFunction, RPadFunction, RepeatFunction};
480        use text_processing::{CleanText, ExtractWords, StripPunctuation, Tokenize, WordCount};
481
482        string_methods::register_string_methods(self);
483
484        // String utility functions
485        self.register(Box::new(RepeatFunction));
486        self.register(Box::new(LPadFunction));
487        self.register(Box::new(RPadFunction));
488
489        // Number to words functions
490        self.register(Box::new(ToWords));
491        self.register(Box::new(ToOrdinal));
492        self.register(Box::new(ToOrdinalWords));
493
494        // Text processing functions
495        self.register(Box::new(StripPunctuation));
496        self.register(Box::new(Tokenize));
497        self.register(Box::new(CleanText));
498        self.register(Box::new(ExtractWords));
499        self.register(Box::new(WordCount));
500    }
501
502    /// Register geometry functions
503    fn register_geometry_functions(&mut self) {
504        use geometry::{
505            CircleAreaFunction, CircleCircumferenceFunction, Distance2DFunction,
506            PythagorasFunction, SphereSurfaceAreaFunction, SphereVolumeFunction,
507            TriangleAreaFunction,
508        };
509
510        self.register(Box::new(PythagorasFunction));
511        self.register(Box::new(CircleAreaFunction));
512        self.register(Box::new(CircleCircumferenceFunction));
513        self.register(Box::new(SphereVolumeFunction));
514        self.register(Box::new(SphereSurfaceAreaFunction));
515        self.register(Box::new(TriangleAreaFunction));
516        self.register(Box::new(Distance2DFunction));
517    }
518
519    /// Register hash functions
520    fn register_hash_functions(&mut self) {
521        use hash::{Md5Function, Sha1Function, Sha256Function, Sha512Function};
522
523        self.register(Box::new(Md5Function));
524        self.register(Box::new(Sha1Function));
525        self.register(Box::new(Sha256Function));
526        self.register(Box::new(Sha512Function));
527    }
528
529    /// Register comparison functions
530    fn register_comparison_functions(&mut self) {
531        comparison::register_comparison_functions(self);
532    }
533
534    /// Register mathematical functions
535    fn register_mathematical_functions(&mut self) {
536        use mathematics::{
537            IsPrimeFunction, NextPrimeFunction, NthPrimeFunction, PrevPrimeFunction,
538            PrimeCountFunction, PrimeFunction, PrimePiFunction,
539        };
540
541        // Prime number functions
542        self.register(Box::new(PrimeFunction));
543        self.register(Box::new(NthPrimeFunction)); // Alias for PRIME
544        self.register(Box::new(IsPrimeFunction));
545        self.register(Box::new(PrimeCountFunction));
546        self.register(Box::new(PrimePiFunction)); // Alias for PRIME_COUNT
547        self.register(Box::new(NextPrimeFunction));
548        self.register(Box::new(PrevPrimeFunction));
549
550        // General math functions
551        math::register_math_functions(self);
552    }
553
554    /// Register physics constants
555    fn register_physics_functions(&mut self) {
556        physics::register_physics_functions(self);
557
558        // Register particle charge functions
559        use particle_charges::{
560            ChargeDownQuarkFunction, ChargeElectronFunction, ChargeMuonFunction,
561            ChargeNeutronFunction, ChargePositronFunction, ChargeProtonFunction, ChargeTauFunction,
562            ChargeUpQuarkFunction,
563        };
564
565        self.register(Box::new(ChargeElectronFunction));
566        self.register(Box::new(ChargeProtonFunction));
567        self.register(Box::new(ChargeNeutronFunction));
568        self.register(Box::new(ChargeUpQuarkFunction));
569        self.register(Box::new(ChargeDownQuarkFunction));
570        self.register(Box::new(ChargePositronFunction));
571        self.register(Box::new(ChargeMuonFunction));
572        self.register(Box::new(ChargeTauFunction));
573    }
574
575    /// Register date/time functions
576    fn register_date_time_functions(&mut self) {
577        date_time::register_date_time_functions(self);
578    }
579
580    /// Register financial functions
581    fn register_financial_functions(&mut self) {
582        financial::register_financial_functions(self);
583    }
584
585    /// Register conversion functions
586    fn register_conversion_functions(&mut self) {
587        use convert::ConvertFunction;
588        use roman::{FromRoman, ToRoman};
589
590        self.register(Box::new(ConvertFunction));
591        self.register(Box::new(ToRoman));
592        self.register(Box::new(FromRoman));
593    }
594
595    /// Register aggregate and analytic functions
596    fn register_aggregate_functions(&mut self) {
597        use group_num::GroupNumFunction;
598
599        // Register GROUP_NUM function
600        // Note: We create a new instance per query to ensure clean memoization
601        self.register(Box::new(GroupNumFunction::new()));
602    }
603
604    /// Register random number generation functions
605    fn register_random_functions(&mut self) {
606        use random::{RandIntFunction, RandRangeFunction, RandomFunction};
607
608        self.register(Box::new(RandomFunction));
609        self.register(Box::new(RandIntFunction));
610        self.register(Box::new(RandRangeFunction));
611    }
612
613    /// Register formatting functions
614    fn register_format_functions(&mut self) {
615        use format::{
616            CenterFunction, FormatDateFunction, FormatNumberFunction, LPadFunction, RPadFunction,
617        };
618        use format_number::{FormatCurrencyFunction, RenderNumberFunction};
619
620        self.register(Box::new(FormatNumberFunction));
621        self.register(Box::new(FormatDateFunction));
622        self.register(Box::new(LPadFunction));
623        self.register(Box::new(RPadFunction));
624        self.register(Box::new(CenterFunction));
625        self.register(Box::new(RenderNumberFunction));
626        self.register(Box::new(FormatCurrencyFunction));
627    }
628
629    /// Register type checking functions
630    fn register_type_checking_functions(&mut self) {
631        use type_checking::{
632            IsBoolFunction, IsDateFunction, IsFloatFunction, IsIntegerFunction, IsNotNullFunction,
633            IsNullFunction, IsNumericFunction,
634        };
635
636        self.register(Box::new(IsDateFunction));
637        self.register(Box::new(IsBoolFunction));
638        self.register(Box::new(IsNumericFunction));
639        self.register(Box::new(IsIntegerFunction));
640        self.register(Box::new(IsFloatFunction));
641        self.register(Box::new(IsNullFunction));
642        self.register(Box::new(IsNotNullFunction));
643    }
644}
645
646impl Default for FunctionRegistry {
647    fn default() -> Self {
648        Self::new()
649    }
650}
651
652#[cfg(test)]
653mod tests {
654    use super::*;
655
656    #[test]
657    fn test_registry_creation() {
658        let registry = FunctionRegistry::new();
659
660        // Check that some known functions exist
661        assert!(registry.contains("PI"));
662        assert!(registry.contains("MASS_EARTH"));
663        assert!(registry.contains("ME"));
664    }
665
666    #[test]
667    fn test_case_insensitive_lookup() {
668        let registry = FunctionRegistry::new();
669
670        assert!(registry.get("pi").is_some());
671        assert!(registry.get("PI").is_some());
672        assert!(registry.get("Pi").is_some());
673    }
674
675    #[test]
676    fn test_autocomplete() {
677        let registry = FunctionRegistry::new();
678
679        let mass_functions = registry.autocomplete("MASS");
680        assert!(!mass_functions.is_empty());
681
682        // Should include MASS_EARTH, MASS_SUN, etc.
683        let names: Vec<&str> = mass_functions.iter().map(|sig| sig.name).collect();
684        assert!(names.contains(&"MASS_EARTH"));
685        assert!(names.contains(&"MASS_SUN"));
686    }
687}