Skip to main content

datafusion_spark/function/url/
url_encode.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::sync::Arc;
19
20use arrow::array::{ArrayRef, LargeStringArray, StringArray, StringViewArray};
21use arrow::datatypes::DataType;
22use datafusion_common::cast::{
23    as_large_string_array, as_string_array, as_string_view_array,
24};
25use datafusion_common::{Result, exec_err, plan_err};
26use datafusion_expr::{
27    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
28};
29use datafusion_functions::utils::make_scalar_function;
30use url::form_urlencoded::byte_serialize;
31
32#[derive(Debug, PartialEq, Eq, Hash)]
33pub struct UrlEncode {
34    signature: Signature,
35}
36
37impl Default for UrlEncode {
38    fn default() -> Self {
39        Self::new()
40    }
41}
42
43impl UrlEncode {
44    pub fn new() -> Self {
45        Self {
46            signature: Signature::string(1, Volatility::Immutable),
47        }
48    }
49
50    /// Encode a string to application/x-www-form-urlencoded format.
51    ///
52    /// # Arguments
53    ///
54    /// * `value` - The string to encode
55    ///
56    /// # Returns
57    ///
58    /// * `Ok(String)` - The encoded string
59    ///
60    fn encode(value: &str) -> Result<String> {
61        Ok(byte_serialize(value.as_bytes()).collect::<String>())
62    }
63}
64
65impl ScalarUDFImpl for UrlEncode {
66    fn name(&self) -> &str {
67        "url_encode"
68    }
69
70    fn signature(&self) -> &Signature {
71        &self.signature
72    }
73
74    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
75        if arg_types.len() != 1 {
76            return plan_err!(
77                "{} expects 1 argument, but got {}",
78                self.name(),
79                arg_types.len()
80            );
81        }
82        // As the type signature is already checked, we can safely return the type of the first argument
83        Ok(arg_types[0].clone())
84    }
85
86    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
87        let ScalarFunctionArgs { args, .. } = args;
88        make_scalar_function(spark_url_encode, vec![])(&args)
89    }
90}
91
92/// Core implementation of URL encoding function.
93///
94/// # Arguments
95///
96/// * `args` - A slice containing exactly one ArrayRef with the strings to encode
97///
98/// # Returns
99///
100/// * `Ok(ArrayRef)` - A new array of the same type containing encoded strings
101/// * `Err(DataFusionError)` - If invalid arguments are provided
102///
103fn spark_url_encode(args: &[ArrayRef]) -> Result<ArrayRef> {
104    if args.len() != 1 {
105        return exec_err!("`url_encode` expects 1 argument");
106    }
107
108    match &args[0].data_type() {
109        DataType::Utf8 => as_string_array(&args[0])?
110            .iter()
111            .map(|x| x.map(UrlEncode::encode).transpose())
112            .collect::<Result<StringArray>>()
113            .map(|array| Arc::new(array) as ArrayRef),
114        DataType::LargeUtf8 => as_large_string_array(&args[0])?
115            .iter()
116            .map(|x| x.map(UrlEncode::encode).transpose())
117            .collect::<Result<LargeStringArray>>()
118            .map(|array| Arc::new(array) as ArrayRef),
119        DataType::Utf8View => as_string_view_array(&args[0])?
120            .iter()
121            .map(|x| x.map(UrlEncode::encode).transpose())
122            .collect::<Result<StringViewArray>>()
123            .map(|array| Arc::new(array) as ArrayRef),
124        other => exec_err!("`url_encode`: Expr must be STRING, got {other:?}"),
125    }
126}