Skip to main content

datafusion_spark/function/string/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18pub mod ascii;
19pub mod base64;
20pub mod char;
21pub mod concat;
22pub mod elt;
23pub mod format_string;
24pub mod ilike;
25pub mod is_valid_utf8;
26pub mod length;
27pub mod like;
28pub mod luhn_check;
29pub mod make_valid_utf8;
30pub mod soundex;
31pub mod space;
32pub mod substring;
33
34use datafusion_expr::ScalarUDF;
35use datafusion_functions::make_udf_function;
36use std::sync::Arc;
37
38make_udf_function!(ascii::SparkAscii, ascii);
39make_udf_function!(base64::SparkBase64, base64);
40make_udf_function!(char::CharFunc, char);
41make_udf_function!(concat::SparkConcat, concat);
42make_udf_function!(ilike::SparkILike, ilike);
43make_udf_function!(length::SparkLengthFunc, length);
44make_udf_function!(elt::SparkElt, elt);
45make_udf_function!(like::SparkLike, like);
46make_udf_function!(luhn_check::SparkLuhnCheck, luhn_check);
47make_udf_function!(format_string::FormatStringFunc, format_string);
48make_udf_function!(space::SparkSpace, space);
49make_udf_function!(substring::SparkSubstring, substring);
50make_udf_function!(base64::SparkUnBase64, unbase64);
51make_udf_function!(soundex::SparkSoundex, soundex);
52make_udf_function!(make_valid_utf8::SparkMakeValidUtf8, make_valid_utf8);
53make_udf_function!(is_valid_utf8::SparkIsValidUtf8, is_valid_utf8);
54
55pub mod expr_fn {
56    use datafusion_functions::export_functions;
57
58    export_functions!((
59        ascii,
60        "Returns the ASCII code point of the first character of string.",
61        arg1
62    ));
63    export_functions!((
64        base64,
65        "Encodes the input binary `bin` into a base64 string.",
66        bin
67    ));
68    export_functions!((
69        char,
70        "Returns the ASCII character having the binary equivalent to col. If col is larger than 256 the result is equivalent to char(col % 256).",
71        arg1
72    ));
73    export_functions!((
74        concat,
75        "Concatenates multiple input strings into a single string. Returns NULL if any input is NULL.",
76        args
77    ));
78    export_functions!((
79        elt,
80        "Returns the n-th input (1-indexed), e.g. returns 2nd input when n is 2. The function returns NULL if the index is 0 or exceeds the length of the array.",
81        select_col arg1 arg2 argn
82    ));
83    export_functions!((
84        ilike,
85        "Returns true if str matches pattern (case insensitive).",
86        str pattern
87    ));
88    export_functions!((
89        length,
90        "Returns the character length of string data or number of bytes of binary data. The length of string data includes the trailing spaces. The length of binary data includes binary zeros.",
91        arg1
92    ));
93    export_functions!((
94        like,
95        "Returns true if str matches pattern (case sensitive).",
96        str pattern
97    ));
98    export_functions!((
99        luhn_check,
100        "Returns whether the input string of digits is valid according to the Luhn algorithm.",
101        arg1
102    ));
103    export_functions!((
104        format_string,
105        "Returns a formatted string from printf-style format strings.",
106        strfmt args
107    ));
108    export_functions!((space, "Returns a string consisting of n spaces.", arg1));
109    export_functions!((
110        substring,
111        "Returns the substring from string `str` starting at position `pos` with length `length.",
112        str pos length
113    ));
114    export_functions!((
115        unbase64,
116        "Decodes the input string `str` from a base64 string into binary data.",
117        str
118    ));
119    export_functions!((soundex, "Returns Soundex code of the string.", str));
120    export_functions!((
121        is_valid_utf8,
122        "Returns true if str is a valid UTF-8 string, otherwise returns false",
123        str
124    ));
125    export_functions!((
126        make_valid_utf8,
127        "Returns the original string if str is a valid UTF-8 string, otherwise returns a new string whose invalid UTF8 byte sequences are replaced using the UNICODE replacement character U+FFFD.",
128        str
129    ));
130}
131
132pub fn functions() -> Vec<Arc<ScalarUDF>> {
133    vec![
134        ascii(),
135        base64(),
136        char(),
137        concat(),
138        elt(),
139        ilike(),
140        length(),
141        like(),
142        luhn_check(),
143        format_string(),
144        space(),
145        substring(),
146        unbase64(),
147        soundex(),
148        make_valid_utf8(),
149        is_valid_utf8(),
150    ]
151}