Skip to main content

datafusion_functions/string/
octet_length.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::compute::kernels::length::length;
19use arrow::datatypes::DataType;
20
21use crate::utils::utf8_to_int_type;
22use datafusion_common::types::logical_string;
23use datafusion_common::utils::take_function_args;
24use datafusion_common::{Result, ScalarValue};
25use datafusion_expr::{
26    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
27    TypeSignatureClass, Volatility,
28};
29use datafusion_macros::user_doc;
30
31#[user_doc(
32    doc_section(label = "String Functions"),
33    description = "Returns the length of a string in bytes.",
34    syntax_example = "octet_length(str)",
35    sql_example = r#"```sql
36> select octet_length('Ångström');
37+--------------------------------+
38| octet_length(Utf8("Ångström")) |
39+--------------------------------+
40| 10                             |
41+--------------------------------+
42```"#,
43    standard_argument(name = "str", prefix = "String"),
44    related_udf(name = "bit_length"),
45    related_udf(name = "length")
46)]
47#[derive(Debug, PartialEq, Eq, Hash)]
48pub struct OctetLengthFunc {
49    signature: Signature,
50}
51
52impl Default for OctetLengthFunc {
53    fn default() -> Self {
54        Self::new()
55    }
56}
57
58impl OctetLengthFunc {
59    pub fn new() -> Self {
60        Self {
61            signature: Signature::coercible(
62                vec![Coercion::new_exact(TypeSignatureClass::Native(
63                    logical_string(),
64                ))],
65                Volatility::Immutable,
66            ),
67        }
68    }
69}
70
71impl ScalarUDFImpl for OctetLengthFunc {
72    fn name(&self) -> &str {
73        "octet_length"
74    }
75
76    fn signature(&self) -> &Signature {
77        &self.signature
78    }
79
80    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
81        utf8_to_int_type(&arg_types[0], "octet_length")
82    }
83
84    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
85        let [array] = take_function_args(self.name(), &args.args)?;
86
87        match array {
88            ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
89            ColumnarValue::Scalar(v) => match v {
90                ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
91                    v.as_ref().map(|x| x.len() as i32),
92                ))),
93                ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
94                    ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)),
95                )),
96                ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
97                    ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)),
98                )),
99                _ => unreachable!("OctetLengthFunc"),
100            },
101        }
102    }
103
104    fn documentation(&self) -> Option<&Documentation> {
105        self.doc()
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use std::sync::Arc;
112
113    use arrow::array::{Array, Int32Array, StringArray};
114    use arrow::datatypes::DataType::Int32;
115
116    use datafusion_common::ScalarValue;
117    use datafusion_common::{Result, exec_err};
118    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
119
120    use crate::string::octet_length::OctetLengthFunc;
121    use crate::utils::test::test_function;
122
123    #[test]
124    fn test_functions() -> Result<()> {
125        test_function!(
126            OctetLengthFunc::new(),
127            vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(12)))],
128            exec_err!(
129                "The OCTET_LENGTH function can only accept strings, but got Int32."
130            ),
131            i32,
132            Int32,
133            Int32Array
134        );
135        test_function!(
136            OctetLengthFunc::new(),
137            vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
138                String::from("chars"),
139                String::from("chars2"),
140            ])))],
141            Ok(Some(5)),
142            i32,
143            Int32,
144            Int32Array
145        );
146        test_function!(
147            OctetLengthFunc::new(),
148            vec![
149                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars")))),
150                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars"))))
151            ],
152            exec_err!("octet_length function requires 1 argument, got 2"),
153            i32,
154            Int32,
155            Int32Array
156        );
157        test_function!(
158            OctetLengthFunc::new(),
159            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
160                String::from("chars")
161            )))],
162            Ok(Some(5)),
163            i32,
164            Int32,
165            Int32Array
166        );
167        test_function!(
168            OctetLengthFunc::new(),
169            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
170                String::from("josé")
171            )))],
172            Ok(Some(5)),
173            i32,
174            Int32,
175            Int32Array
176        );
177        test_function!(
178            OctetLengthFunc::new(),
179            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
180                String::from("")
181            )))],
182            Ok(Some(0)),
183            i32,
184            Int32,
185            Int32Array
186        );
187        test_function!(
188            OctetLengthFunc::new(),
189            vec![ColumnarValue::Scalar(ScalarValue::Utf8(None))],
190            Ok(None),
191            i32,
192            Int32,
193            Int32Array
194        );
195        test_function!(
196            OctetLengthFunc::new(),
197            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
198                String::from("joséjoséjoséjosé")
199            )))],
200            Ok(Some(20)),
201            i32,
202            Int32,
203            Int32Array
204        );
205        test_function!(
206            OctetLengthFunc::new(),
207            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
208                String::from("josé")
209            )))],
210            Ok(Some(5)),
211            i32,
212            Int32,
213            Int32Array
214        );
215        test_function!(
216            OctetLengthFunc::new(),
217            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
218                String::from("")
219            )))],
220            Ok(Some(0)),
221            i32,
222            Int32,
223            Int32Array
224        );
225
226        Ok(())
227    }
228}