rdf_fusion_functions/scalar/strings/
sub_str.rs

1use crate::scalar::dispatch::{
2    dispatch_binary_typed_value, dispatch_ternary_typed_value,
3};
4use crate::scalar::sparql_op_impl::{
5    ScalarSparqlOpImpl, create_typed_value_sparql_op_impl,
6};
7use crate::scalar::{ScalarSparqlOp, ScalarSparqlOpSignature, SparqlOpArity};
8use rdf_fusion_encoding::typed_value::TypedValueEncoding;
9use rdf_fusion_extensions::functions::BuiltinName;
10use rdf_fusion_extensions::functions::FunctionName;
11use rdf_fusion_model::{
12    Integer, LanguageStringRef, SimpleLiteralRef, StringLiteralRef, ThinError,
13    ThinResult, TypedValueRef,
14};
15
16/// Implementation of the SPARQL `substr` function (binary version).
17#[derive(Debug, Hash, PartialEq, Eq)]
18pub struct SubStrSparqlOp;
19
20impl Default for SubStrSparqlOp {
21    fn default() -> Self {
22        Self::new()
23    }
24}
25
26impl SubStrSparqlOp {
27    const NAME: FunctionName = FunctionName::Builtin(BuiltinName::SubStr);
28
29    /// Creates a new [SubStrSparqlOp].
30    pub fn new() -> Self {
31        Self {}
32    }
33}
34
35impl ScalarSparqlOp for SubStrSparqlOp {
36    fn name(&self) -> &FunctionName {
37        &Self::NAME
38    }
39
40    fn signature(&self) -> ScalarSparqlOpSignature {
41        ScalarSparqlOpSignature::default_with_arity(SparqlOpArity::OneOf(vec![
42            SparqlOpArity::Fixed(2),
43            SparqlOpArity::Fixed(3),
44        ]))
45    }
46
47    fn typed_value_encoding_op(
48        &self,
49    ) -> Option<Box<dyn ScalarSparqlOpImpl<TypedValueEncoding>>> {
50        Some(create_typed_value_sparql_op_impl(|args| {
51            match args.args.len() {
52                2 => dispatch_binary_typed_value(
53                    &args.args[0],
54                    &args.args[1],
55                    |lhs_value, rhs_value| {
56                        let lhs_value = StringLiteralRef::try_from(lhs_value)?;
57                        let rhs_value = Integer::try_from(rhs_value)?;
58                        evaluate_substr(lhs_value, rhs_value, None)
59                    },
60                    |_, _| ThinError::expected(),
61                ),
62                3 => dispatch_ternary_typed_value(
63                    &args.args[0],
64                    &args.args[1],
65                    &args.args[2],
66                    |arg0, arg1, arg2| {
67                        let arg0 = StringLiteralRef::try_from(arg0)?;
68                        let arg1 = Integer::try_from(arg1)?;
69                        let arg2 = Integer::try_from(arg2)?;
70                        evaluate_substr(arg0, arg1, Some(arg2))
71                    },
72                    |_, _, _| ThinError::expected(),
73                ),
74                _ => unreachable!("Invalid number of arguments"),
75            }
76        }))
77    }
78}
79
80fn evaluate_substr(
81    source: StringLiteralRef<'_>,
82    starting_loc: Integer,
83    length: Option<Integer>,
84) -> ThinResult<TypedValueRef<'_>> {
85    let index = usize::try_from(starting_loc.as_i64())?;
86    let length = length.map(|l| usize::try_from(l.as_i64())).transpose()?;
87
88    // We want to slice on char indices, not byte indices
89    let mut start_iter = source
90        .0
91        .char_indices()
92        .skip(index.checked_sub(1).ok_or(ThinError::ExpectedError)?)
93        .peekable();
94    let result = if let Some((start_position, _)) = start_iter.peek().copied() {
95        if let Some(length) = length {
96            let mut end_iter = start_iter.skip(length).peekable();
97            if let Some((end_position, _)) = end_iter.peek() {
98                &source.0[start_position..*end_position]
99            } else {
100                &source.0[start_position..]
101            }
102        } else {
103            &source.0[start_position..]
104        }
105    } else {
106        ""
107    };
108
109    Ok(match source.1 {
110        None => TypedValueRef::SimpleLiteral(SimpleLiteralRef { value: result }),
111        Some(language) => TypedValueRef::LanguageStringLiteral(LanguageStringRef {
112            value: result,
113            language,
114        }),
115    })
116}