rdf_fusion_functions/scalar/strings/
regex.rs1use crate::scalar::dispatch::{
2 dispatch_binary_typed_value, dispatch_ternary_typed_value,
3};
4use crate::scalar::sparql_op_impl::{
5 ScalarSparqlOpImpl, create_typed_value_sparql_op_impl,
6};
7use crate::scalar::{ScalarSparqlOp, ScalarSparqlOpSignature, SparqlOpArity};
8use rdf_fusion_encoding::typed_value::TypedValueEncoding;
9use rdf_fusion_extensions::functions::BuiltinName;
10use rdf_fusion_extensions::functions::FunctionName;
11use rdf_fusion_model::{SimpleLiteralRef, ThinError, ThinResult, TypedValueRef};
12use regex::{Regex, RegexBuilder};
13use std::borrow::Cow;
14
15#[derive(Debug, Hash, PartialEq, Eq)]
17pub struct RegexSparqlOp;
18
19impl Default for RegexSparqlOp {
20 fn default() -> Self {
21 Self::new()
22 }
23}
24
25impl RegexSparqlOp {
26 const NAME: FunctionName = FunctionName::Builtin(BuiltinName::Regex);
27
28 pub fn new() -> Self {
30 Self {}
31 }
32}
33
34impl ScalarSparqlOp for RegexSparqlOp {
35 fn name(&self) -> &FunctionName {
36 &Self::NAME
37 }
38
39 fn signature(&self) -> ScalarSparqlOpSignature {
40 ScalarSparqlOpSignature::default_with_arity(SparqlOpArity::OneOf(vec![
41 SparqlOpArity::Fixed(2),
42 SparqlOpArity::Fixed(3),
43 ]))
44 }
45
46 fn typed_value_encoding_op(
47 &self,
48 ) -> Option<Box<dyn ScalarSparqlOpImpl<TypedValueEncoding>>> {
49 Some(create_typed_value_sparql_op_impl(|args| {
50 match args.args.len() {
51 2 => dispatch_binary_typed_value(
52 &args.args[0],
53 &args.args[1],
54 |lhs_value, rhs_value| {
55 let TypedValueRef::SimpleLiteral(pattern) = rhs_value else {
56 return ThinError::expected();
57 };
58
59 let regex = compile_pattern(pattern.value, None)?;
60 match lhs_value {
61 TypedValueRef::SimpleLiteral(value) => {
62 Ok(TypedValueRef::BooleanLiteral(
63 regex.is_match(value.value).into(),
64 ))
65 }
66 TypedValueRef::LanguageStringLiteral(value) => {
67 Ok(TypedValueRef::BooleanLiteral(
68 regex.is_match(value.value).into(),
69 ))
70 }
71 _ => ThinError::expected(),
72 }
73 },
74 |_, _| ThinError::expected(),
75 ),
76 3 => dispatch_ternary_typed_value(
77 &args.args[0],
78 &args.args[1],
79 &args.args[2],
80 |arg0, arg1, arg2| {
81 let arg1 = SimpleLiteralRef::try_from(arg1)?;
82 let arg2 = SimpleLiteralRef::try_from(arg2)?;
83
84 let regex = compile_pattern(arg1.value, Some(arg2.value))?;
85 match arg0 {
86 TypedValueRef::SimpleLiteral(value) => {
87 Ok(TypedValueRef::BooleanLiteral(
88 regex.is_match(value.value).into(),
89 ))
90 }
91 TypedValueRef::LanguageStringLiteral(value) => {
92 Ok(TypedValueRef::BooleanLiteral(
93 regex.is_match(value.value).into(),
94 ))
95 }
96 _ => ThinError::expected(),
97 }
98 },
99 |_, _, _| ThinError::expected(),
100 ),
101 _ => unreachable!("Invalid number of arguments"),
102 }
103 }))
104 }
105}
106
107pub(super) fn compile_pattern(pattern: &str, flags: Option<&str>) -> ThinResult<Regex> {
108 const REGEX_SIZE_LIMIT: usize = 1_000_000;
109
110 let mut pattern = Cow::Borrowed(pattern);
111 let flags = flags.unwrap_or_default();
112 if flags.contains('q') {
113 pattern = regex::escape(&pattern).into();
114 }
115 let mut regex_builder = RegexBuilder::new(&pattern);
116 regex_builder.size_limit(REGEX_SIZE_LIMIT);
117 for flag in flags.chars() {
118 match flag {
119 's' => {
120 regex_builder.dot_matches_new_line(true);
121 }
122 'm' => {
123 regex_builder.multi_line(true);
124 }
125 'i' => {
126 regex_builder.case_insensitive(true);
127 }
128 'x' => {
129 regex_builder.ignore_whitespace(true);
130 }
131 'q' => (), _ => return ThinError::expected(), }
134 }
135 regex_builder.build().map_err(|_| ThinError::ExpectedError)
136}