Skip to main content

datafusion_functions/unicode/
left.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19
20use crate::unicode::common::{LeftSlicer, general_left_right};
21use crate::utils::make_scalar_function;
22use arrow::datatypes::DataType;
23use datafusion_common::Result;
24use datafusion_common::exec_err;
25use datafusion_expr::TypeSignature::Exact;
26use datafusion_expr::{
27    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
28};
29use datafusion_macros::user_doc;
30
31#[user_doc(
32    doc_section(label = "String Functions"),
33    description = "Returns a specified number of characters from the left side of a string.",
34    syntax_example = "left(str, n)",
35    sql_example = r#"```sql
36> select left('datafusion', 4);
37+-----------------------------------+
38| left(Utf8("datafusion"),Int64(4)) |
39+-----------------------------------+
40| data                              |
41+-----------------------------------+
42```"#,
43    standard_argument(name = "str", prefix = "String"),
44    argument(name = "n", description = "Number of characters to return."),
45    related_udf(name = "right")
46)]
47#[derive(Debug, PartialEq, Eq, Hash)]
48pub struct LeftFunc {
49    signature: Signature,
50}
51
52impl Default for LeftFunc {
53    fn default() -> Self {
54        Self::new()
55    }
56}
57
58impl LeftFunc {
59    pub fn new() -> Self {
60        use DataType::*;
61        Self {
62            signature: Signature::one_of(
63                vec![
64                    Exact(vec![Utf8View, Int64]),
65                    Exact(vec![Utf8, Int64]),
66                    Exact(vec![LargeUtf8, Int64]),
67                ],
68                Volatility::Immutable,
69            ),
70        }
71    }
72}
73
74impl ScalarUDFImpl for LeftFunc {
75    fn as_any(&self) -> &dyn Any {
76        self
77    }
78
79    fn name(&self) -> &str {
80        "left"
81    }
82
83    fn signature(&self) -> &Signature {
84        &self.signature
85    }
86
87    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
88        Ok(arg_types[0].clone())
89    }
90
91    /// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
92    /// left('abcde', 2) = 'ab'
93    /// left('abcde', -2) = 'abc'
94    /// The implementation uses UTF-8 code points as characters
95    fn invoke_with_args(
96        &self,
97        args: datafusion_expr::ScalarFunctionArgs,
98    ) -> Result<ColumnarValue> {
99        let args = &args.args;
100        match args[0].data_type() {
101            DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8 => {
102                make_scalar_function(general_left_right::<LeftSlicer>, vec![])(args)
103            }
104            other => exec_err!(
105                "Unsupported data type {other:?} for function {},\
106                expected Utf8View, Utf8 or LargeUtf8.",
107                self.name()
108            ),
109        }
110    }
111
112    fn documentation(&self) -> Option<&Documentation> {
113        self.doc()
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use arrow::array::{Array, StringArray, StringViewArray};
120    use arrow::datatypes::DataType::{Utf8, Utf8View};
121
122    use datafusion_common::{Result, ScalarValue};
123    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
124
125    use crate::unicode::left::LeftFunc;
126    use crate::utils::test::test_function;
127
128    #[test]
129    fn test_functions() -> Result<()> {
130        test_function!(
131            LeftFunc::new(),
132            vec![
133                ColumnarValue::Scalar(ScalarValue::from("abcde")),
134                ColumnarValue::Scalar(ScalarValue::from(2i64)),
135            ],
136            Ok(Some("ab")),
137            &str,
138            Utf8,
139            StringArray
140        );
141        test_function!(
142            LeftFunc::new(),
143            vec![
144                ColumnarValue::Scalar(ScalarValue::from("abcde")),
145                ColumnarValue::Scalar(ScalarValue::from(200i64)),
146            ],
147            Ok(Some("abcde")),
148            &str,
149            Utf8,
150            StringArray
151        );
152        test_function!(
153            LeftFunc::new(),
154            vec![
155                ColumnarValue::Scalar(ScalarValue::from("abcde")),
156                ColumnarValue::Scalar(ScalarValue::from(-2i64)),
157            ],
158            Ok(Some("abc")),
159            &str,
160            Utf8,
161            StringArray
162        );
163        test_function!(
164            LeftFunc::new(),
165            vec![
166                ColumnarValue::Scalar(ScalarValue::from("abcde")),
167                ColumnarValue::Scalar(ScalarValue::from(i64::MIN)),
168            ],
169            Ok(Some("")),
170            &str,
171            Utf8,
172            StringArray
173        );
174        test_function!(
175            LeftFunc::new(),
176            vec![
177                ColumnarValue::Scalar(ScalarValue::from("abcde")),
178                ColumnarValue::Scalar(ScalarValue::from(-200i64)),
179            ],
180            Ok(Some("")),
181            &str,
182            Utf8,
183            StringArray
184        );
185        test_function!(
186            LeftFunc::new(),
187            vec![
188                ColumnarValue::Scalar(ScalarValue::from("abcde")),
189                ColumnarValue::Scalar(ScalarValue::from(0i64)),
190            ],
191            Ok(Some("")),
192            &str,
193            Utf8,
194            StringArray
195        );
196        test_function!(
197            LeftFunc::new(),
198            vec![
199                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
200                ColumnarValue::Scalar(ScalarValue::from(2i64)),
201            ],
202            Ok(None),
203            &str,
204            Utf8,
205            StringArray
206        );
207        test_function!(
208            LeftFunc::new(),
209            vec![
210                ColumnarValue::Scalar(ScalarValue::from("abcde")),
211                ColumnarValue::Scalar(ScalarValue::Int64(None)),
212            ],
213            Ok(None),
214            &str,
215            Utf8,
216            StringArray
217        );
218        test_function!(
219            LeftFunc::new(),
220            vec![
221                ColumnarValue::Scalar(ScalarValue::from("joséésoj")),
222                ColumnarValue::Scalar(ScalarValue::from(5i64)),
223            ],
224            Ok(Some("joséé")),
225            &str,
226            Utf8,
227            StringArray
228        );
229        test_function!(
230            LeftFunc::new(),
231            vec![
232                ColumnarValue::Scalar(ScalarValue::from("joséésoj")),
233                ColumnarValue::Scalar(ScalarValue::from(-3i64)),
234            ],
235            Ok(Some("joséé")),
236            &str,
237            Utf8,
238            StringArray
239        );
240        #[cfg(not(feature = "unicode_expressions"))]
241        test_function!(
242            LeftFunc::new(),
243            &[
244                ColumnarValue::Scalar(ScalarValue::from("abcde")),
245                ColumnarValue::Scalar(ScalarValue::from(2i64)),
246            ],
247            internal_err!(
248                "function left requires compilation with feature flag: unicode_expressions."
249            ),
250            &str,
251            Utf8,
252            StringArray
253        );
254
255        // StringView cases
256        test_function!(
257            LeftFunc::new(),
258            vec![
259                ColumnarValue::Scalar(ScalarValue::Utf8View(Some("abcde".to_string()))),
260                ColumnarValue::Scalar(ScalarValue::from(2i64)),
261            ],
262            Ok(Some("ab")),
263            &str,
264            Utf8View,
265            StringViewArray
266        );
267        test_function!(
268            LeftFunc::new(),
269            vec![
270                ColumnarValue::Scalar(ScalarValue::Utf8View(Some("abcde".to_string()))),
271                ColumnarValue::Scalar(ScalarValue::from(200i64)),
272            ],
273            Ok(Some("abcde")),
274            &str,
275            Utf8View,
276            StringViewArray
277        );
278        test_function!(
279            LeftFunc::new(),
280            vec![
281                ColumnarValue::Scalar(ScalarValue::Utf8View(Some("".to_string()))),
282                ColumnarValue::Scalar(ScalarValue::from(200i64)),
283            ],
284            Ok(Some("")),
285            &str,
286            Utf8View,
287            StringViewArray
288        );
289        test_function!(
290            LeftFunc::new(),
291            vec![
292                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
293                    "joséésoj".to_string()
294                ))),
295                ColumnarValue::Scalar(ScalarValue::from(-3i64)),
296            ],
297            Ok(Some("joséé")),
298            &str,
299            Utf8View,
300            StringViewArray
301        );
302
303        // Unicode indexing case
304        let input = "joé楽s𐀀so↓j";
305        for n in 1..=input.chars().count() {
306            let expected = input
307                .chars()
308                .take(input.chars().count() - n)
309                .collect::<String>();
310            test_function!(
311                LeftFunc::new(),
312                vec![
313                    ColumnarValue::Scalar(ScalarValue::from(input)),
314                    ColumnarValue::Scalar(ScalarValue::from(-(n as i64))),
315                ],
316                Ok(Some(expected.as_str())),
317                &str,
318                Utf8,
319                StringArray
320            );
321        }
322
323        Ok(())
324    }
325}