Skip to main content

oxirs_core/query/functions/
registry.rs

1//! Function registry and core types for SPARQL functions
2
3use crate::model::Term;
4use crate::OxirsError;
5use scirs2_core::metrics::{Counter, Histogram, MetricsRegistry, Timer};
6use std::collections::HashMap;
7use std::sync::Arc;
8
9// Import all function implementations
10use super::aggregate::*;
11use super::bitwise::*;
12use super::datetime::*;
13use super::hash::*;
14use super::numeric::*;
15use super::string::*;
16use super::type_check::*;
17
18/// SPARQL function registry with built-in performance monitoring
19pub struct FunctionRegistry {
20    /// Built-in functions
21    functions: HashMap<String, FunctionImpl>,
22    /// Custom extension functions
23    extensions: HashMap<String, Arc<dyn CustomFunction>>,
24    /// Function execution counter (tracks calls per function)
25    execution_counter: Arc<Counter>,
26    /// Function execution timer (tracks execution time)
27    execution_timer: Arc<Timer>,
28    /// Function error counter
29    error_counter: Arc<Counter>,
30    /// Function execution time histogram
31    execution_histogram: Arc<Histogram>,
32    /// Metrics registry for global tracking
33    metrics_registry: Arc<MetricsRegistry>,
34}
35
36/// Function implementation
37pub enum FunctionImpl {
38    /// Native Rust implementation
39    Native(NativeFunction),
40    /// JavaScript implementation (for extensibility)
41    JavaScript(String),
42    /// WASM module
43    Wasm(Vec<u8>),
44}
45
46/// Native function pointer
47pub type NativeFunction = Arc<dyn Fn(&[Term]) -> Result<Term, OxirsError> + Send + Sync>;
48
49/// Custom function trait
50pub trait CustomFunction: Send + Sync {
51    /// Execute the function
52    fn execute(&self, args: &[Term]) -> Result<Term, OxirsError>;
53
54    /// Get function metadata
55    fn metadata(&self) -> FunctionMetadata;
56}
57
58/// Function metadata
59#[derive(Debug, Clone)]
60pub struct FunctionMetadata {
61    /// Function name
62    pub name: String,
63    /// Description
64    pub description: String,
65    /// Minimum arguments
66    pub min_args: usize,
67    /// Maximum arguments (None = unlimited)
68    pub max_args: Option<usize>,
69    /// Argument types
70    pub arg_types: Vec<ArgumentType>,
71    /// Return type
72    pub return_type: ReturnType,
73}
74
75/// Argument type specification
76#[derive(Debug, Clone)]
77pub enum ArgumentType {
78    /// Any RDF term
79    Any,
80    /// String literal
81    String,
82    /// Numeric literal
83    Numeric,
84    /// Boolean literal
85    Boolean,
86    /// Date/time literal
87    DateTime,
88    /// IRI
89    IRI,
90    /// Specific datatype
91    Datatype(String),
92}
93
94/// Return type specification
95#[derive(Debug, Clone)]
96pub enum ReturnType {
97    /// Same as input
98    SameAsInput,
99    /// Specific type
100    Fixed(ArgumentType),
101    /// Dynamic based on input
102    Dynamic,
103}
104
105impl Default for FunctionRegistry {
106    fn default() -> Self {
107        Self::new()
108    }
109}
110
111impl FunctionRegistry {
112    /// Create new function registry with SPARQL 1.2 built-ins and performance monitoring
113    pub fn new() -> Self {
114        let metrics_registry = Arc::new(MetricsRegistry::new());
115
116        let execution_counter = Arc::new(Counter::new("function_executions".to_string()));
117        let execution_timer = Arc::new(Timer::new("function_duration".to_string()));
118        let error_counter = Arc::new(Counter::new("function_errors".to_string()));
119        let execution_histogram = Arc::new(Histogram::new("function_duration_dist".to_string()));
120
121        let mut registry = FunctionRegistry {
122            functions: HashMap::new(),
123            extensions: HashMap::new(),
124            execution_counter,
125            execution_timer,
126            error_counter,
127            execution_histogram,
128            metrics_registry,
129        };
130
131        registry.register_sparql_12_functions();
132        registry
133    }
134
135    /// Register all SPARQL 1.2 built-in functions
136    fn register_sparql_12_functions(&mut self) {
137        // String functions
138        self.register_native("CONCAT", Arc::new(fn_concat));
139        self.register_native("STRLEN", Arc::new(fn_strlen));
140        self.register_native("SUBSTR", Arc::new(fn_substr));
141        self.register_native("REPLACE", Arc::new(fn_replace));
142        self.register_native("REGEX", Arc::new(fn_regex));
143        self.register_native("STRAFTER", Arc::new(fn_strafter));
144        self.register_native("STRBEFORE", Arc::new(fn_strbefore));
145        self.register_native("STRSTARTS", Arc::new(fn_strstarts));
146        self.register_native("STRENDS", Arc::new(fn_strends));
147        self.register_native("CONTAINS", Arc::new(fn_contains));
148        self.register_native("ENCODE_FOR_URI", Arc::new(fn_encode_for_uri));
149
150        // Case functions
151        self.register_native("UCASE", Arc::new(fn_ucase));
152        self.register_native("LCASE", Arc::new(fn_lcase));
153
154        // SPARQL 1.2 additional string functions
155        self.register_native("CONCAT_WS", Arc::new(fn_concat_ws));
156        self.register_native("SPLIT", Arc::new(fn_split));
157        self.register_native("LPAD", Arc::new(fn_lpad));
158        self.register_native("RPAD", Arc::new(fn_rpad));
159
160        // Advanced string utility functions
161        self.register_native("TRIM", Arc::new(fn_trim));
162        self.register_native("LTRIM", Arc::new(fn_ltrim));
163        self.register_native("RTRIM", Arc::new(fn_rtrim));
164        self.register_native("REVERSE", Arc::new(fn_reverse));
165        self.register_native("REPEAT", Arc::new(fn_repeat));
166
167        // String inspection functions (SPARQL Extension)
168        self.register_native("CAPITALIZE", Arc::new(fn_capitalize));
169        self.register_native("ISALPHA", Arc::new(fn_isalpha));
170        self.register_native("ISDIGIT", Arc::new(fn_isdigit));
171        self.register_native("ISALNUM", Arc::new(fn_isalnum));
172        self.register_native("ISWHITESPACE", Arc::new(fn_iswhitespace));
173
174        // Numeric functions
175        self.register_native("ABS", Arc::new(fn_abs));
176        self.register_native("CEIL", Arc::new(fn_ceil));
177        self.register_native("FLOOR", Arc::new(fn_floor));
178        self.register_native("ROUND", Arc::new(fn_round));
179        self.register_native("RAND", Arc::new(fn_rand));
180
181        // Math functions (SPARQL 1.2 additions)
182        self.register_native("SQRT", Arc::new(fn_sqrt));
183        self.register_native("SIN", Arc::new(fn_sin));
184        self.register_native("COS", Arc::new(fn_cos));
185        self.register_native("TAN", Arc::new(fn_tan));
186        self.register_native("ASIN", Arc::new(fn_asin));
187        self.register_native("ACOS", Arc::new(fn_acos));
188        self.register_native("ATAN", Arc::new(fn_atan));
189        self.register_native("ATAN2", Arc::new(fn_atan2));
190        self.register_native("EXP", Arc::new(fn_exp));
191        self.register_native("LOG", Arc::new(fn_log));
192        self.register_native("LOG10", Arc::new(fn_log10));
193        self.register_native("POW", Arc::new(fn_pow));
194
195        // Hyperbolic functions (SPARQL Extension)
196        self.register_native("SINH", Arc::new(fn_sinh));
197        self.register_native("COSH", Arc::new(fn_cosh));
198        self.register_native("TANH", Arc::new(fn_tanh));
199        self.register_native("ASINH", Arc::new(fn_asinh));
200        self.register_native("ACOSH", Arc::new(fn_acosh));
201        self.register_native("ATANH", Arc::new(fn_atanh));
202
203        // Mathematical constants (SPARQL Extension)
204        self.register_native("PI", Arc::new(fn_pi));
205        self.register_native("E", Arc::new(fn_e));
206        self.register_native("TAU", Arc::new(fn_tau));
207
208        // Advanced numeric utility functions
209        self.register_native("SIGN", Arc::new(fn_sign));
210        self.register_native("MOD", Arc::new(fn_mod));
211        self.register_native("TRUNC", Arc::new(fn_trunc));
212        self.register_native("GCD", Arc::new(fn_gcd));
213        self.register_native("LCM", Arc::new(fn_lcm));
214
215        // Bitwise operations (SPARQL Extension)
216        self.register_native("BITAND", Arc::new(fn_bitand));
217        self.register_native("BITOR", Arc::new(fn_bitor));
218        self.register_native("BITXOR", Arc::new(fn_bitxor));
219        self.register_native("BITNOT", Arc::new(fn_bitnot));
220        self.register_native("LSHIFT", Arc::new(fn_lshift));
221        self.register_native("RSHIFT", Arc::new(fn_rshift));
222
223        // Date/time functions
224        self.register_native("NOW", Arc::new(fn_now));
225        self.register_native("YEAR", Arc::new(fn_year));
226        self.register_native("MONTH", Arc::new(fn_month));
227        self.register_native("DAY", Arc::new(fn_day));
228        self.register_native("HOURS", Arc::new(fn_hours));
229        self.register_native("MINUTES", Arc::new(fn_minutes));
230        self.register_native("SECONDS", Arc::new(fn_seconds));
231        self.register_native("TIMEZONE", Arc::new(fn_timezone));
232        self.register_native("TZ", Arc::new(fn_tz));
233        self.register_native("ADJUST", Arc::new(fn_adjust));
234
235        // Hash functions (SPARQL 1.2)
236        self.register_native("SHA1", Arc::new(fn_sha1));
237        self.register_native("SHA256", Arc::new(fn_sha256));
238        self.register_native("SHA384", Arc::new(fn_sha384));
239        self.register_native("SHA512", Arc::new(fn_sha512));
240        self.register_native("MD5", Arc::new(fn_md5));
241
242        // Type functions
243        self.register_native("STR", Arc::new(fn_str));
244        self.register_native("LANG", Arc::new(fn_lang));
245        self.register_native("DATATYPE", Arc::new(fn_datatype));
246        self.register_native("IRI", Arc::new(fn_iri));
247        self.register_native("URI", Arc::new(fn_iri)); // Alias
248        self.register_native("BNODE", Arc::new(fn_bnode));
249        self.register_native("STRDT", Arc::new(fn_strdt));
250        self.register_native("STRLANG", Arc::new(fn_strlang));
251        self.register_native("UUID", Arc::new(fn_uuid));
252        self.register_native("STRUUID", Arc::new(fn_struuid));
253
254        // Aggregate functions
255        self.register_native("COUNT", Arc::new(fn_count));
256        self.register_native("SUM", Arc::new(fn_sum));
257        self.register_native("AVG", Arc::new(fn_avg));
258        self.register_native("MIN", Arc::new(fn_min));
259        self.register_native("MAX", Arc::new(fn_max));
260        self.register_native("GROUP_CONCAT", Arc::new(fn_group_concat));
261        self.register_native("SAMPLE", Arc::new(fn_sample));
262
263        // Boolean functions
264        self.register_native("NOT", Arc::new(fn_not));
265        self.register_native("EXISTS", Arc::new(fn_exists));
266        self.register_native("NOT_EXISTS", Arc::new(fn_not_exists));
267        self.register_native("BOUND", Arc::new(fn_bound));
268        self.register_native("COALESCE", Arc::new(fn_coalesce));
269        self.register_native("IF", Arc::new(fn_if));
270
271        // Type checking functions (SPARQL 1.1)
272        self.register_native("isIRI", Arc::new(fn_is_iri));
273        self.register_native("isURI", Arc::new(fn_is_iri));
274        self.register_native("isBLANK", Arc::new(fn_is_blank));
275        self.register_native("isLITERAL", Arc::new(fn_is_literal));
276        self.register_native("isNUMERIC", Arc::new(fn_is_numeric));
277        self.register_native("sameTerm", Arc::new(fn_same_term));
278        self.register_native("LANGMATCHES", Arc::new(fn_langmatches));
279
280        // List functions (SPARQL 1.2)
281        self.register_native("IN", Arc::new(fn_in));
282        self.register_native("NOT_IN", Arc::new(fn_not_in));
283    }
284
285    /// Register a native function
286    fn register_native(&mut self, name: &str, func: NativeFunction) {
287        self.functions
288            .insert(name.to_string(), FunctionImpl::Native(func));
289    }
290
291    /// Register a custom function
292    pub fn register_custom(&mut self, func: Arc<dyn CustomFunction>) {
293        let metadata = func.metadata();
294        self.extensions.insert(metadata.name.clone(), func);
295    }
296
297    /// Execute a function with automatic performance monitoring
298    pub fn execute(&self, name: &str, args: &[Term]) -> Result<Term, OxirsError> {
299        // Start timing
300        let start = std::time::Instant::now();
301
302        // Increment execution counter
303        self.execution_counter.inc();
304
305        // Execute function
306        let result = if let Some(func) = self.functions.get(name) {
307            match func {
308                FunctionImpl::Native(f) => f(args),
309                FunctionImpl::JavaScript(_) => Err(OxirsError::Query(
310                    "JavaScript functions not yet implemented".to_string(),
311                )),
312                FunctionImpl::Wasm(_) => Err(OxirsError::Query(
313                    "WASM functions not yet implemented".to_string(),
314                )),
315            }
316        }
317        // Check custom functions
318        else if let Some(func) = self.extensions.get(name) {
319            func.execute(args)
320        } else {
321            Err(OxirsError::Query(format!("Unknown function: {name}")))
322        };
323
324        // Record execution time
325        let duration = start.elapsed();
326        self.execution_timer.observe(duration);
327        self.execution_histogram
328            .observe(duration.as_micros() as f64);
329
330        // Track errors
331        if result.is_err() {
332            self.error_counter.inc();
333        }
334
335        result
336    }
337
338    /// Get function execution statistics
339    pub fn get_statistics(&self) -> FunctionStatistics {
340        let timer_stats = self.execution_timer.get_stats();
341
342        FunctionStatistics {
343            total_executions: self.execution_counter.get(),
344            total_errors: self.error_counter.get(),
345            average_duration_micros: timer_stats.mean * 1_000_000.0, // Convert to microseconds
346            // Note: SCIRS2 Histogram doesn't currently expose percentiles
347            p95_duration_micros: timer_stats.mean * 1_000_000.0, // Use mean as approximation
348            p99_duration_micros: timer_stats.mean * 1_000_000.0, // Use mean as approximation
349        }
350    }
351
352    /// Get metrics registry for external monitoring systems
353    pub fn metrics_registry(&self) -> &Arc<MetricsRegistry> {
354        &self.metrics_registry
355    }
356}
357
358/// Function execution statistics
359#[derive(Debug, Clone)]
360pub struct FunctionStatistics {
361    /// Total number of function executions
362    pub total_executions: u64,
363    /// Total number of function errors
364    pub total_errors: u64,
365    /// Average execution duration in microseconds
366    pub average_duration_micros: f64,
367    /// 95th percentile execution duration in microseconds
368    pub p95_duration_micros: f64,
369    /// 99th percentile execution duration in microseconds
370    pub p99_duration_micros: f64,
371}
372
373impl std::fmt::Display for FunctionStatistics {
374    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
375        write!(
376            f,
377            "FunctionStats {{ executions: {}, errors: {}, avg: {:.2}μs, p95: {:.2}μs, p99: {:.2}μs, error_rate: {:.2}% }}",
378            self.total_executions,
379            self.total_errors,
380            self.average_duration_micros,
381            self.p95_duration_micros,
382            self.p99_duration_micros,
383            if self.total_executions > 0 {
384                (self.total_errors as f64 / self.total_executions as f64) * 100.0
385            } else {
386                0.0
387            }
388        )
389    }
390}