Skip to main content

datafusion_functions/string/
btrim.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::string::common::*;
19use crate::utils::{make_scalar_function, utf8_to_str_type};
20use arrow::array::{ArrayRef, OffsetSizeTrait};
21use arrow::datatypes::DataType;
22use datafusion_common::types::logical_string;
23use datafusion_common::{Result, exec_err};
24use datafusion_expr::function::Hint;
25use datafusion_expr::{
26    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
27    TypeSignature, TypeSignatureClass, Volatility,
28};
29use datafusion_macros::user_doc;
30use std::sync::Arc;
31
32/// Returns the longest string with leading and trailing characters removed. If the characters are not specified, spaces are removed.
33/// btrim('xyxtrimyyx', 'xyz') = 'trim'
34fn btrim<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
35    let use_string_view = args[0].data_type() == &DataType::Utf8View;
36    let args = if args.len() > 1 {
37        let arg1 = arrow::compute::kernels::cast::cast(&args[1], args[0].data_type())?;
38        vec![Arc::clone(&args[0]), arg1]
39    } else {
40        args.to_owned()
41    };
42    general_trim::<T, TrimBoth>(&args, use_string_view)
43}
44
45#[user_doc(
46    doc_section(label = "String Functions"),
47    description = "Trims the specified trim string from the start and end of a string. If no trim string is provided, all spaces are removed from the start and end of the input string.",
48    syntax_example = "btrim(str[, trim_str])",
49    sql_example = r#"```sql
50> select btrim('__datafusion____', '_');
51+-------------------------------------------+
52| btrim(Utf8("__datafusion____"),Utf8("_")) |
53+-------------------------------------------+
54| datafusion                                |
55+-------------------------------------------+
56```"#,
57    standard_argument(name = "str", prefix = "String"),
58    argument(
59        name = "trim_str",
60        description = r"String expression to operate on. Can be a constant, column, or function, and any combination of operators. _Default is a space._"
61    ),
62    alternative_syntax = "trim(BOTH trim_str FROM str)",
63    alternative_syntax = "trim(trim_str FROM str)",
64    related_udf(name = "ltrim"),
65    related_udf(name = "rtrim")
66)]
67#[derive(Debug, PartialEq, Eq, Hash)]
68pub struct BTrimFunc {
69    signature: Signature,
70    aliases: Vec<String>,
71}
72
73impl Default for BTrimFunc {
74    fn default() -> Self {
75        Self::new()
76    }
77}
78
79impl BTrimFunc {
80    pub fn new() -> Self {
81        Self {
82            signature: Signature::one_of(
83                vec![
84                    TypeSignature::Coercible(vec![
85                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
86                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
87                    ]),
88                    TypeSignature::Coercible(vec![Coercion::new_exact(
89                        TypeSignatureClass::Native(logical_string()),
90                    )]),
91                ],
92                Volatility::Immutable,
93            ),
94            aliases: vec![String::from("trim")],
95        }
96    }
97}
98
99impl ScalarUDFImpl for BTrimFunc {
100    fn name(&self) -> &str {
101        "btrim"
102    }
103
104    fn signature(&self) -> &Signature {
105        &self.signature
106    }
107
108    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
109        if arg_types[0] == DataType::Utf8View {
110            Ok(DataType::Utf8View)
111        } else {
112            utf8_to_str_type(&arg_types[0], "btrim")
113        }
114    }
115
116    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
117        match args.args[0].data_type() {
118            DataType::Utf8 | DataType::Utf8View => make_scalar_function(
119                btrim::<i32>,
120                vec![Hint::Pad, Hint::AcceptsSingular],
121            )(&args.args),
122            DataType::LargeUtf8 => make_scalar_function(
123                btrim::<i64>,
124                vec![Hint::Pad, Hint::AcceptsSingular],
125            )(&args.args),
126            other => exec_err!(
127                "Unsupported data type {other:?} for function btrim,\
128                expected Utf8, LargeUtf8 or Utf8View."
129            ),
130        }
131    }
132
133    fn aliases(&self) -> &[String] {
134        &self.aliases
135    }
136
137    fn documentation(&self) -> Option<&Documentation> {
138        self.doc()
139    }
140}
141
142#[cfg(test)]
143mod tests {
144    use arrow::array::{Array, StringArray, StringViewArray};
145    use arrow::datatypes::DataType::{Utf8, Utf8View};
146
147    use datafusion_common::{Result, ScalarValue};
148    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
149
150    use crate::string::btrim::BTrimFunc;
151    use crate::utils::test::test_function;
152
153    #[test]
154    fn test_functions() {
155        // String view cases for checking normal logic
156        test_function!(
157            BTrimFunc::new(),
158            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
159                String::from("alphabet  ")
160            )))],
161            Ok(Some("alphabet")),
162            &str,
163            Utf8View,
164            StringViewArray
165        );
166        test_function!(
167            BTrimFunc::new(),
168            vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some(
169                String::from("  alphabet  ")
170            ))),],
171            Ok(Some("alphabet")),
172            &str,
173            Utf8View,
174            StringViewArray
175        );
176        test_function!(
177            BTrimFunc::new(),
178            vec![
179                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
180                    "alphabet"
181                )))),
182                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("t")))),
183            ],
184            Ok(Some("alphabe")),
185            &str,
186            Utf8View,
187            StringViewArray
188        );
189        test_function!(
190            BTrimFunc::new(),
191            vec![
192                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
193                    "alphabet"
194                )))),
195                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
196                    "alphabe"
197                )))),
198            ],
199            Ok(Some("t")),
200            &str,
201            Utf8View,
202            StringViewArray
203        );
204        test_function!(
205            BTrimFunc::new(),
206            vec![
207                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
208                    "alphabet"
209                )))),
210                ColumnarValue::Scalar(ScalarValue::Utf8View(None)),
211            ],
212            Ok(None),
213            &str,
214            Utf8View,
215            StringViewArray
216        );
217        // Special string view case for checking unlined output(len > 12)
218        test_function!(
219            BTrimFunc::new(),
220            vec![
221                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from(
222                    "xxxalphabetalphabetxxx"
223                )))),
224                ColumnarValue::Scalar(ScalarValue::Utf8View(Some(String::from("x")))),
225            ],
226            Ok(Some("alphabetalphabet")),
227            &str,
228            Utf8View,
229            StringViewArray
230        );
231        // String cases
232        test_function!(
233            BTrimFunc::new(),
234            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
235                String::from("alphabet  ")
236            ))),],
237            Ok(Some("alphabet")),
238            &str,
239            Utf8,
240            StringArray
241        );
242        test_function!(
243            BTrimFunc::new(),
244            vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
245                String::from("alphabet  ")
246            ))),],
247            Ok(Some("alphabet")),
248            &str,
249            Utf8,
250            StringArray
251        );
252        test_function!(
253            BTrimFunc::new(),
254            vec![
255                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabet")))),
256                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("t")))),
257            ],
258            Ok(Some("alphabe")),
259            &str,
260            Utf8,
261            StringArray
262        );
263        test_function!(
264            BTrimFunc::new(),
265            vec![
266                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabet")))),
267                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabe")))),
268            ],
269            Ok(Some("t")),
270            &str,
271            Utf8,
272            StringArray
273        );
274        test_function!(
275            BTrimFunc::new(),
276            vec![
277                ColumnarValue::Scalar(ScalarValue::Utf8(Some(String::from("alphabet")))),
278                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
279            ],
280            Ok(None),
281            &str,
282            Utf8,
283            StringArray
284        );
285    }
286}