dsq_functions/builtin/
split.rs1use dsq_shared::value::Value;
2use dsq_shared::Result;
3use inventory;
4use polars::prelude::*;
5use serde_json;
6use std::borrow::Cow;
7
8pub fn builtin_split(args: &[Value]) -> Result<Value> {
9 if args.is_empty() || args.len() > 2 {
10 return Err(dsq_shared::error::operation_error(
11 "split() expects 1 or 2 arguments",
12 ));
13 }
14
15 let separator = if args.len() == 2 {
16 match &args[1] {
17 Value::String(s) => s.clone(),
18 _ => {
19 return Err(dsq_shared::error::operation_error(
20 "split() separator must be a string",
21 ));
22 }
23 }
24 } else {
25 " ".to_string() };
27
28 match &args[0] {
29 Value::String(s) => {
30 let parts: Vec<Value> = if separator.is_empty() {
31 s.chars().map(|c| Value::String(c.to_string())).collect()
32 } else {
33 s.split(&separator)
34 .map(|part| Value::String(part.to_string()))
35 .collect()
36 };
37 Ok(Value::Array(parts))
38 }
39 Value::Array(arr) => {
40 let split_arrays: Result<Vec<Value>> = arr
41 .iter()
42 .map(|v| match v {
43 Value::String(s) => {
44 let parts: Vec<Value> = if separator.is_empty() {
45 s.chars().map(|c| Value::String(c.to_string())).collect()
46 } else {
47 s.split(&separator)
48 .map(|part| Value::String(part.to_string()))
49 .collect()
50 };
51 Ok(Value::Array(parts))
52 }
53 _ => Err(dsq_shared::error::operation_error(
54 "split() requires string elements in array",
55 )),
56 })
57 .collect();
58 Ok(Value::Array(split_arrays?))
59 }
60 Value::DataFrame(df) => {
61 let mut new_series = Vec::new();
62 for col_name in df.get_column_names() {
63 if let Ok(series) = df.column(col_name) {
64 if series.dtype() == &DataType::String {
65 let split_series = series
66 .str()
67 .unwrap()
68 .apply(|s| {
69 s.map(|s| {
70 let parts: Vec<String> = if separator.is_empty() {
71 s.chars().map(|c| c.to_string()).collect()
72 } else {
73 s.split(&separator).map(|part| part.to_string()).collect()
74 };
75 Cow::Owned(
76 serde_json::to_string(&Value::Array(
77 parts.into_iter().map(Value::String).collect(),
78 ))
79 .unwrap_or("null".to_string()),
80 )
81 })
82 })
83 .into_series();
84 let mut s = split_series;
85 s.rename(col_name.clone());
86 new_series.push(s.into());
87 } else {
88 let mut s = series.clone();
89 s.rename(col_name.clone());
90 new_series.push(s);
91 }
92 }
93 }
94 match DataFrame::new(new_series) {
95 Ok(new_df) => Ok(Value::DataFrame(new_df)),
96 Err(e) => Err(dsq_shared::error::operation_error(format!(
97 "split() failed on DataFrame: {}",
98 e
99 ))),
100 }
101 }
102 Value::Series(series) => {
103 if series.dtype() == &DataType::String {
104 let split_series = series
105 .str()
106 .unwrap()
107 .apply(|s| {
108 s.map(|s| {
109 let parts: Vec<String> = if separator.is_empty() {
110 s.chars().map(|c| c.to_string()).collect()
111 } else {
112 s.split(&separator).map(|part| part.to_string()).collect()
113 };
114 Cow::Owned(
115 serde_json::to_string(&Value::Array(
116 parts.into_iter().map(Value::String).collect(),
117 ))
118 .unwrap_or("null".to_string()),
119 )
120 })
121 })
122 .into_series();
123 Ok(Value::Series(split_series))
124 } else {
125 Ok(Value::Series(series.clone()))
126 }
127 }
128 _ => Err(dsq_shared::error::operation_error(
129 "split() requires string, array, DataFrame, or Series",
130 )),
131 }
132}
133
134inventory::submit! {
135 crate::FunctionRegistration {
136 name: "split",
137 func: builtin_split,
138 }
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144 use dsq_shared::value::Value;
145
146 #[test]
147 fn test_split_string_default_separator() {
148 let result = builtin_split(&[Value::String("hello world".to_string())]).unwrap();
149 match result {
150 Value::Array(parts) => {
151 assert_eq!(parts.len(), 2);
152 assert_eq!(parts[0], Value::String("hello".to_string()));
153 assert_eq!(parts[1], Value::String("world".to_string()));
154 }
155 _ => panic!("Expected Array"),
156 }
157 }
158
159 #[test]
160 fn test_split_string_custom_separator() {
161 let result = builtin_split(&[
162 Value::String("a,b,c".to_string()),
163 Value::String(",".to_string()),
164 ])
165 .unwrap();
166 match result {
167 Value::Array(parts) => {
168 assert_eq!(parts.len(), 3);
169 assert_eq!(parts[0], Value::String("a".to_string()));
170 assert_eq!(parts[1], Value::String("b".to_string()));
171 assert_eq!(parts[2], Value::String("c".to_string()));
172 }
173 _ => panic!("Expected Array"),
174 }
175 }
176
177 #[test]
178 fn test_split_string_empty_separator() {
179 let result = builtin_split(&[
180 Value::String("abc".to_string()),
181 Value::String("".to_string()),
182 ])
183 .unwrap();
184 match result {
185 Value::Array(parts) => {
186 assert_eq!(parts.len(), 3);
187 assert_eq!(parts[0], Value::String("a".to_string()));
188 assert_eq!(parts[1], Value::String("b".to_string()));
189 assert_eq!(parts[2], Value::String("c".to_string()));
190 }
191 _ => panic!("Expected Array"),
192 }
193 }
194
195 #[test]
196 fn test_split_array() {
197 let arr = Value::Array(vec![
198 Value::String("a b".to_string()),
199 Value::String("c d".to_string()),
200 ]);
201 let result = builtin_split(&[arr]).unwrap();
202 match result {
203 Value::Array(arrays) => {
204 assert_eq!(arrays.len(), 2);
205 if let Value::Array(first) = &arrays[0] {
206 assert_eq!(first.len(), 2);
207 assert_eq!(first[0], Value::String("a".to_string()));
208 assert_eq!(first[1], Value::String("b".to_string()));
209 } else {
210 panic!("Expected nested Array");
211 }
212 if let Value::Array(second) = &arrays[1] {
213 assert_eq!(second.len(), 2);
214 assert_eq!(second[0], Value::String("c".to_string()));
215 assert_eq!(second[1], Value::String("d".to_string()));
216 } else {
217 panic!("Expected nested Array");
218 }
219 }
220 _ => panic!("Expected Array"),
221 }
222 }
223
224 #[test]
225 fn test_split_wrong_args() {
226 let result = builtin_split(&[]);
227 assert!(result.is_err());
228 let result = builtin_split(&[Value::Int(1), Value::String(",".to_string()), Value::Int(2)]);
229 assert!(result.is_err());
230 }
231
232 #[test]
233 fn test_split_non_string_in_array() {
234 let arr = Value::Array(vec![Value::Int(1)]);
235 let result = builtin_split(&[arr]);
236 assert!(result.is_err());
237 }
238
239 #[test]
240 fn test_split_registered_via_inventory() {
241 let mut found = false;
242 for func in inventory::iter::<crate::FunctionRegistration> {
243 if func.name == "split" {
244 found = true;
245 let result = (func.func)(&[Value::String("test split".to_string())]).unwrap();
247 match result {
248 Value::Array(parts) => {
249 assert_eq!(parts.len(), 2);
250 assert_eq!(parts[0], Value::String("test".to_string()));
251 assert_eq!(parts[1], Value::String("split".to_string()));
252 }
253 _ => panic!("Expected Array"),
254 }
255 break;
256 }
257 }
258 assert!(found, "split function not found in inventory");
259 }
260}