Skip to main content

datafusion_functions/unicode/
left.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::unicode::common::{LeftSlicer, general_left_right};
19use crate::utils::make_scalar_function;
20use arrow::datatypes::DataType;
21use datafusion_common::Result;
22use datafusion_common::exec_err;
23use datafusion_expr::TypeSignature::Exact;
24use datafusion_expr::{
25    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
26    Volatility,
27};
28use datafusion_macros::user_doc;
29
30#[user_doc(
31    doc_section(label = "String Functions"),
32    description = "Returns a specified number of characters from the left side of a string.",
33    syntax_example = "left(str, n)",
34    sql_example = r#"```sql
35> select left('datafusion', 4);
36+-----------------------------------+
37| left(Utf8("datafusion"),Int64(4)) |
38+-----------------------------------+
39| data                              |
40+-----------------------------------+
41```"#,
42    standard_argument(name = "str", prefix = "String"),
43    argument(name = "n", description = "Number of characters to return."),
44    related_udf(name = "right")
45)]
46#[derive(Debug, PartialEq, Eq, Hash)]
47pub struct LeftFunc {
48    signature: Signature,
49}
50
51impl Default for LeftFunc {
52    fn default() -> Self {
53        Self::new()
54    }
55}
56
57impl LeftFunc {
58    pub fn new() -> Self {
59        use DataType::*;
60        Self {
61            signature: Signature::one_of(
62                vec![
63                    Exact(vec![Utf8View, Int64]),
64                    Exact(vec![Utf8, Int64]),
65                    Exact(vec![LargeUtf8, Int64]),
66                ],
67                Volatility::Immutable,
68            ),
69        }
70    }
71}
72
73impl ScalarUDFImpl for LeftFunc {
74    fn name(&self) -> &str {
75        "left"
76    }
77
78    fn signature(&self) -> &Signature {
79        &self.signature
80    }
81
82    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
83        Ok(DataType::Utf8View)
84    }
85
86    /// Returns first n characters in the string, or when n is negative, returns all but last |n| characters.
87    /// left('abcde', 2) = 'ab'
88    /// left('abcde', -2) = 'abc'
89    /// The implementation uses UTF-8 code points as characters
90    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
91        let args = &args.args;
92        match args[0].data_type() {
93            DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8 => {
94                make_scalar_function(general_left_right::<LeftSlicer>, vec![])(args)
95            }
96            other => exec_err!(
97                "Unsupported data type {other:?} for function {},\
98                expected Utf8View, Utf8 or LargeUtf8.",
99                self.name()
100            ),
101        }
102    }
103
104    fn documentation(&self) -> Option<&Documentation> {
105        self.doc()
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use arrow::array::{Array, StringViewArray};
112    use arrow::datatypes::DataType::Utf8View;
113
114    use datafusion_common::{Result, ScalarValue};
115    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
116
117    use crate::unicode::left::LeftFunc;
118    use crate::utils::test::test_function;
119
120    #[test]
121    fn test_functions() -> Result<()> {
122        test_function!(
123            LeftFunc::new(),
124            vec![
125                ColumnarValue::Scalar(ScalarValue::from("abcde")),
126                ColumnarValue::Scalar(ScalarValue::from(2i64)),
127            ],
128            Ok(Some("ab")),
129            &str,
130            Utf8View,
131            StringViewArray
132        );
133        test_function!(
134            LeftFunc::new(),
135            vec![
136                ColumnarValue::Scalar(ScalarValue::from("abcde")),
137                ColumnarValue::Scalar(ScalarValue::from(200i64)),
138            ],
139            Ok(Some("abcde")),
140            &str,
141            Utf8View,
142            StringViewArray
143        );
144        test_function!(
145            LeftFunc::new(),
146            vec![
147                ColumnarValue::Scalar(ScalarValue::from("abcde")),
148                ColumnarValue::Scalar(ScalarValue::from(-2i64)),
149            ],
150            Ok(Some("abc")),
151            &str,
152            Utf8View,
153            StringViewArray
154        );
155        test_function!(
156            LeftFunc::new(),
157            vec![
158                ColumnarValue::Scalar(ScalarValue::from("abcde")),
159                ColumnarValue::Scalar(ScalarValue::from(i64::MIN)),
160            ],
161            Ok(Some("")),
162            &str,
163            Utf8View,
164            StringViewArray
165        );
166        test_function!(
167            LeftFunc::new(),
168            vec![
169                ColumnarValue::Scalar(ScalarValue::from("abcde")),
170                ColumnarValue::Scalar(ScalarValue::from(-200i64)),
171            ],
172            Ok(Some("")),
173            &str,
174            Utf8View,
175            StringViewArray
176        );
177        test_function!(
178            LeftFunc::new(),
179            vec![
180                ColumnarValue::Scalar(ScalarValue::from("abcde")),
181                ColumnarValue::Scalar(ScalarValue::from(0i64)),
182            ],
183            Ok(Some("")),
184            &str,
185            Utf8View,
186            StringViewArray
187        );
188        test_function!(
189            LeftFunc::new(),
190            vec![
191                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
192                ColumnarValue::Scalar(ScalarValue::from(2i64)),
193            ],
194            Ok(None),
195            &str,
196            Utf8View,
197            StringViewArray
198        );
199        test_function!(
200            LeftFunc::new(),
201            vec![
202                ColumnarValue::Scalar(ScalarValue::from("abcde")),
203                ColumnarValue::Scalar(ScalarValue::Int64(None)),
204            ],
205            Ok(None),
206            &str,
207            Utf8View,
208            StringViewArray
209        );
210        test_function!(
211            LeftFunc::new(),
212            vec![
213                ColumnarValue::Scalar(ScalarValue::from("joséésoj")),
214                ColumnarValue::Scalar(ScalarValue::from(5i64)),
215            ],
216            Ok(Some("joséé")),
217            &str,
218            Utf8View,
219            StringViewArray
220        );
221        test_function!(
222            LeftFunc::new(),
223            vec![
224                ColumnarValue::Scalar(ScalarValue::from("joséésoj")),
225                ColumnarValue::Scalar(ScalarValue::from(-3i64)),
226            ],
227            Ok(Some("joséé")),
228            &str,
229            Utf8View,
230            StringViewArray
231        );
232        #[cfg(not(feature = "unicode_expressions"))]
233        test_function!(
234            LeftFunc::new(),
235            &[
236                ColumnarValue::Scalar(ScalarValue::from("abcde")),
237                ColumnarValue::Scalar(ScalarValue::from(2i64)),
238            ],
239            internal_err!(
240                "function left requires compilation with feature flag: unicode_expressions."
241            ),
242            &str,
243            Utf8View,
244            StringViewArray
245        );
246
247        // StringView cases
248        test_function!(
249            LeftFunc::new(),
250            vec![
251                ColumnarValue::Scalar(ScalarValue::Utf8View(Some("abcde".to_string()))),
252                ColumnarValue::Scalar(ScalarValue::from(2i64)),
253            ],
254            Ok(Some("ab")),
255            &str,
256            Utf8View,
257            StringViewArray
258        );
259        test_function!(
260            LeftFunc::new(),
261            vec![
262                ColumnarValue::Scalar(ScalarValue::Utf8View(Some("abcde".to_string()))),
263                ColumnarValue::Scalar(ScalarValue::from(200i64)),
264            ],
265            Ok(Some("abcde")),
266            &str,
267            Utf8View,
268            StringViewArray
269        );
270        test_function!(
271            LeftFunc::new(),
272            vec![
273                ColumnarValue::Scalar(ScalarValue::Utf8View(Some("".to_string()))),
274                ColumnarValue::Scalar(ScalarValue::from(200i64)),
275            ],
276            Ok(Some("")),
277            &str,
278            Utf8View,
279            StringViewArray
280        );
281        test_function!(
282            LeftFunc::new(),
283            vec![
284                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
285                    "joséésoj".to_string()
286                ))),
287                ColumnarValue::Scalar(ScalarValue::from(-3i64)),
288            ],
289            Ok(Some("joséé")),
290            &str,
291            Utf8View,
292            StringViewArray
293        );
294
295        // Unicode indexing case
296        let input = "joé楽s𐀀so↓j";
297        for n in 1..=input.chars().count() {
298            let expected = input
299                .chars()
300                .take(input.chars().count() - n)
301                .collect::<String>();
302            test_function!(
303                LeftFunc::new(),
304                vec![
305                    ColumnarValue::Scalar(ScalarValue::from(input)),
306                    ColumnarValue::Scalar(ScalarValue::from(-(n as i64))),
307                ],
308                Ok(Some(expected.as_str())),
309                &str,
310                Utf8View,
311                StringViewArray
312            );
313        }
314
315        Ok(())
316    }
317}