Skip to main content

datafusion_spark/function/url/
url_encode.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use arrow::array::{ArrayRef, LargeStringArray, StringArray, StringViewArray};
22use arrow::datatypes::DataType;
23use datafusion_common::cast::{
24    as_large_string_array, as_string_array, as_string_view_array,
25};
26use datafusion_common::{Result, exec_err, plan_err};
27use datafusion_expr::{
28    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
29};
30use datafusion_functions::utils::make_scalar_function;
31use url::form_urlencoded::byte_serialize;
32
33#[derive(Debug, PartialEq, Eq, Hash)]
34pub struct UrlEncode {
35    signature: Signature,
36}
37
38impl Default for UrlEncode {
39    fn default() -> Self {
40        Self::new()
41    }
42}
43
44impl UrlEncode {
45    pub fn new() -> Self {
46        Self {
47            signature: Signature::string(1, Volatility::Immutable),
48        }
49    }
50
51    /// Encode a string to application/x-www-form-urlencoded format.
52    ///
53    /// # Arguments
54    ///
55    /// * `value` - The string to encode
56    ///
57    /// # Returns
58    ///
59    /// * `Ok(String)` - The encoded string
60    ///
61    fn encode(value: &str) -> Result<String> {
62        Ok(byte_serialize(value.as_bytes()).collect::<String>())
63    }
64}
65
66impl ScalarUDFImpl for UrlEncode {
67    fn as_any(&self) -> &dyn Any {
68        self
69    }
70
71    fn name(&self) -> &str {
72        "url_encode"
73    }
74
75    fn signature(&self) -> &Signature {
76        &self.signature
77    }
78
79    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
80        if arg_types.len() != 1 {
81            return plan_err!(
82                "{} expects 1 argument, but got {}",
83                self.name(),
84                arg_types.len()
85            );
86        }
87        // As the type signature is already checked, we can safely return the type of the first argument
88        Ok(arg_types[0].clone())
89    }
90
91    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
92        let ScalarFunctionArgs { args, .. } = args;
93        make_scalar_function(spark_url_encode, vec![])(&args)
94    }
95}
96
97/// Core implementation of URL encoding function.
98///
99/// # Arguments
100///
101/// * `args` - A slice containing exactly one ArrayRef with the strings to encode
102///
103/// # Returns
104///
105/// * `Ok(ArrayRef)` - A new array of the same type containing encoded strings
106/// * `Err(DataFusionError)` - If invalid arguments are provided
107///
108fn spark_url_encode(args: &[ArrayRef]) -> Result<ArrayRef> {
109    if args.len() != 1 {
110        return exec_err!("`url_encode` expects 1 argument");
111    }
112
113    match &args[0].data_type() {
114        DataType::Utf8 => as_string_array(&args[0])?
115            .iter()
116            .map(|x| x.map(UrlEncode::encode).transpose())
117            .collect::<Result<StringArray>>()
118            .map(|array| Arc::new(array) as ArrayRef),
119        DataType::LargeUtf8 => as_large_string_array(&args[0])?
120            .iter()
121            .map(|x| x.map(UrlEncode::encode).transpose())
122            .collect::<Result<LargeStringArray>>()
123            .map(|array| Arc::new(array) as ArrayRef),
124        DataType::Utf8View => as_string_view_array(&args[0])?
125            .iter()
126            .map(|x| x.map(UrlEncode::encode).transpose())
127            .collect::<Result<StringViewArray>>()
128            .map(|array| Arc::new(array) as ArrayRef),
129        other => exec_err!("`url_encode`: Expr must be STRING, got {other:?}"),
130    }
131}