datafusion_functions/string/
octet_length.rs1use arrow::compute::kernels::length::length;
19use arrow::datatypes::DataType;
20
21use crate::utils::utf8_to_int_type;
22use datafusion_common::types::logical_string;
23use datafusion_common::utils::take_function_args;
24use datafusion_common::{Result, ScalarValue};
25use datafusion_expr::{
26 Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
27 TypeSignatureClass, Volatility,
28};
29use datafusion_macros::user_doc;
30
31#[user_doc(
32 doc_section(label = "String Functions"),
33 description = "Returns the length of a string in bytes.",
34 syntax_example = "octet_length(str)",
35 sql_example = r#"```sql
36> select octet_length('Ångström');
37+--------------------------------+
38| octet_length(Utf8("Ångström")) |
39+--------------------------------+
40| 10 |
41+--------------------------------+
42```"#,
43 standard_argument(name = "str", prefix = "String"),
44 related_udf(name = "bit_length"),
45 related_udf(name = "length")
46)]
47#[derive(Debug, PartialEq, Eq, Hash)]
48pub struct OctetLengthFunc {
49 signature: Signature,
50}
51
52impl Default for OctetLengthFunc {
53 fn default() -> Self {
54 Self::new()
55 }
56}
57
58impl OctetLengthFunc {
59 pub fn new() -> Self {
60 Self {
61 signature: Signature::coercible(
62 vec![Coercion::new_exact(TypeSignatureClass::Native(
63 logical_string(),
64 ))],
65 Volatility::Immutable,
66 ),
67 }
68 }
69}
70
71impl ScalarUDFImpl for OctetLengthFunc {
72 fn name(&self) -> &str {
73 "octet_length"
74 }
75
76 fn signature(&self) -> &Signature {
77 &self.signature
78 }
79
80 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
81 utf8_to_int_type(&arg_types[0], "octet_length")
82 }
83
84 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
85 let [array] = take_function_args(self.name(), &args.args)?;
86
87 match array {
88 ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
89 ColumnarValue::Scalar(v) => match v {
90 ScalarValue::Utf8(v) => Ok(ColumnarValue::Scalar(ScalarValue::Int32(
91 v.as_ref().map(|x| x.len() as i32),
92 ))),
93 ScalarValue::LargeUtf8(v) => Ok(ColumnarValue::Scalar(
94 ScalarValue::Int64(v.as_ref().map(|x| x.len() as i64)),
95 )),
96 ScalarValue::Utf8View(v) => Ok(ColumnarValue::Scalar(
97 ScalarValue::Int32(v.as_ref().map(|x| x.len() as i32)),
98 )),
99 _ => unreachable!("OctetLengthFunc"),
100 },
101 }
102 }
103
104 fn documentation(&self) -> Option<&Documentation> {
105 self.doc()
106 }
107}
108
109#[cfg(test)]
110mod tests {
111 use std::sync::Arc;
112
113 use arrow::array::{Array, Int32Array, StringArray};
114 use arrow::datatypes::DataType::Int32;
115
116 use datafusion_common::ScalarValue;
117 use datafusion_common::{Result, exec_err};
118 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
119
120 use crate::string::octet_length::OctetLengthFunc;
121 use crate::utils::test::test_function;
122
123 #[test]
124 fn test_functions() -> Result<()> {
125 test_function!(
126 OctetLengthFunc::new(),
127 vec![ColumnarValue::Scalar(ScalarValue::Int32(Some(12)))],
128 exec_err!(
129 "The OCTET_LENGTH function can only accept strings, but got Int32."
130 ),
131 i32,
132 Int32,
133 Int32Array
134 );
135 test_function!(
136 OctetLengthFunc::new(),
137 vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![
138 String::from("chars"),
139 String::from("chars2"),
140 ])))],
141 Ok(Some(5)),
142 i32,
143 Int32,
144 Int32Array
145 );
146 test_function!(
147 OctetLengthFunc::new(),
148 vec![
149 ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars")))),
150 ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("chars"))))
151 ],
152 exec_err!("octet_length function requires 1 argument, got 2"),
153 i32,
154 Int32,
155 Int32Array
156 );
157 test_function!(
158 OctetLengthFunc::new(),
159 vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
160 String::from("chars")
161 )))],
162 Ok(Some(5)),
163 i32,
164 Int32,
165 Int32Array
166 );
167 test_function!(
168 OctetLengthFunc::new(),
169 vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
170 String::from("josé")
171 )))],
172 Ok(Some(5)),
173 i32,
174 Int32,
175 Int32Array
176 );
177 test_function!(
178 OctetLengthFunc::new(),
179 vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
180 String::from("")
181 )))],
182 Ok(Some(0)),
183 i32,
184 Int32,
185 Int32Array
186 );
187 test_function!(
188 OctetLengthFunc::new(),
189 vec![ColumnarValue::Scalar(ScalarValue::Utf8(None))],
190 Ok(None),
191 i32,
192 Int32,
193 Int32Array
194 );
195 test_function!(
196 OctetLengthFunc::new(),
197 vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
198 String::from("joséjoséjoséjosé")
199 )))],
200 Ok(Some(20)),
201 i32,
202 Int32,
203 Int32Array
204 );
205 test_function!(
206 OctetLengthFunc::new(),
207 vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
208 String::from("josé")
209 )))],
210 Ok(Some(5)),
211 i32,
212 Int32,
213 Int32Array
214 );
215 test_function!(
216 OctetLengthFunc::new(),
217 vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
218 String::from("")
219 )))],
220 Ok(Some(0)),
221 i32,
222 Int32,
223 Int32Array
224 );
225
226 Ok(())
227 }
228}