rdf_fusion_functions/scalar/strings/
regex.rs

1use crate::scalar::dispatch::{
2    dispatch_binary_typed_value, dispatch_ternary_typed_value,
3};
4use crate::scalar::sparql_op_impl::{
5    ScalarSparqlOpImpl, create_typed_value_sparql_op_impl,
6};
7use crate::scalar::{ScalarSparqlOp, ScalarSparqlOpSignature, SparqlOpArity};
8use rdf_fusion_encoding::typed_value::TypedValueEncoding;
9use rdf_fusion_extensions::functions::BuiltinName;
10use rdf_fusion_extensions::functions::FunctionName;
11use rdf_fusion_model::{SimpleLiteralRef, ThinError, ThinResult, TypedValueRef};
12use regex::{Regex, RegexBuilder};
13use std::borrow::Cow;
14
15/// Implementation of the SPARQL `regex` function (binary version).
16#[derive(Debug, Hash, PartialEq, Eq)]
17pub struct RegexSparqlOp;
18
19impl Default for RegexSparqlOp {
20    fn default() -> Self {
21        Self::new()
22    }
23}
24
25impl RegexSparqlOp {
26    const NAME: FunctionName = FunctionName::Builtin(BuiltinName::Regex);
27
28    /// Creates a new [RegexSparqlOp].
29    pub fn new() -> Self {
30        Self {}
31    }
32}
33
34impl ScalarSparqlOp for RegexSparqlOp {
35    fn name(&self) -> &FunctionName {
36        &Self::NAME
37    }
38
39    fn signature(&self) -> ScalarSparqlOpSignature {
40        ScalarSparqlOpSignature::default_with_arity(SparqlOpArity::OneOf(vec![
41            SparqlOpArity::Fixed(2),
42            SparqlOpArity::Fixed(3),
43        ]))
44    }
45
46    fn typed_value_encoding_op(
47        &self,
48    ) -> Option<Box<dyn ScalarSparqlOpImpl<TypedValueEncoding>>> {
49        Some(create_typed_value_sparql_op_impl(|args| {
50            match args.args.len() {
51                2 => dispatch_binary_typed_value(
52                    &args.args[0],
53                    &args.args[1],
54                    |lhs_value, rhs_value| {
55                        let TypedValueRef::SimpleLiteral(pattern) = rhs_value else {
56                            return ThinError::expected();
57                        };
58
59                        let regex = compile_pattern(pattern.value, None)?;
60                        match lhs_value {
61                            TypedValueRef::SimpleLiteral(value) => {
62                                Ok(TypedValueRef::BooleanLiteral(
63                                    regex.is_match(value.value).into(),
64                                ))
65                            }
66                            TypedValueRef::LanguageStringLiteral(value) => {
67                                Ok(TypedValueRef::BooleanLiteral(
68                                    regex.is_match(value.value).into(),
69                                ))
70                            }
71                            _ => ThinError::expected(),
72                        }
73                    },
74                    |_, _| ThinError::expected(),
75                ),
76                3 => dispatch_ternary_typed_value(
77                    &args.args[0],
78                    &args.args[1],
79                    &args.args[2],
80                    |arg0, arg1, arg2| {
81                        let arg1 = SimpleLiteralRef::try_from(arg1)?;
82                        let arg2 = SimpleLiteralRef::try_from(arg2)?;
83
84                        let regex = compile_pattern(arg1.value, Some(arg2.value))?;
85                        match arg0 {
86                            TypedValueRef::SimpleLiteral(value) => {
87                                Ok(TypedValueRef::BooleanLiteral(
88                                    regex.is_match(value.value).into(),
89                                ))
90                            }
91                            TypedValueRef::LanguageStringLiteral(value) => {
92                                Ok(TypedValueRef::BooleanLiteral(
93                                    regex.is_match(value.value).into(),
94                                ))
95                            }
96                            _ => ThinError::expected(),
97                        }
98                    },
99                    |_, _, _| ThinError::expected(),
100                ),
101                _ => unreachable!("Invalid number of arguments"),
102            }
103        }))
104    }
105}
106
107pub(super) fn compile_pattern(pattern: &str, flags: Option<&str>) -> ThinResult<Regex> {
108    const REGEX_SIZE_LIMIT: usize = 1_000_000;
109
110    let mut pattern = Cow::Borrowed(pattern);
111    let flags = flags.unwrap_or_default();
112    if flags.contains('q') {
113        pattern = regex::escape(&pattern).into();
114    }
115    let mut regex_builder = RegexBuilder::new(&pattern);
116    regex_builder.size_limit(REGEX_SIZE_LIMIT);
117    for flag in flags.chars() {
118        match flag {
119            's' => {
120                regex_builder.dot_matches_new_line(true);
121            }
122            'm' => {
123                regex_builder.multi_line(true);
124            }
125            'i' => {
126                regex_builder.case_insensitive(true);
127            }
128            'x' => {
129                regex_builder.ignore_whitespace(true);
130            }
131            'q' => (),                         // Already supported
132            _ => return ThinError::expected(), // invalid option
133        }
134    }
135    regex_builder.build().map_err(|_| ThinError::ExpectedError)
136}