1use crate::data::datatable::{DataColumn, DataRow, DataTable, DataType, DataValue};
2use crate::sql::generators::TableGenerator;
3use anyhow::{anyhow, Result};
4use std::collections::HashMap;
5use std::sync::Arc;
6
7pub struct Split;
9
10impl TableGenerator for Split {
11 fn name(&self) -> &str {
12 "SPLIT"
13 }
14
15 fn columns(&self) -> Vec<DataColumn> {
16 vec![
17 DataColumn {
18 name: "value".to_string(),
19 data_type: DataType::String,
20 nullable: false,
21 unique_values: Some(0),
22 null_count: 0,
23 metadata: HashMap::new(),
24 qualified_name: None,
25 source_table: None,
26 },
27 DataColumn {
28 name: "index".to_string(),
29 data_type: DataType::Integer,
30 nullable: false,
31 unique_values: Some(0),
32 null_count: 0,
33 metadata: HashMap::new(),
34 qualified_name: None,
35 source_table: None,
36 },
37 ]
38 }
39
40 fn generate(&self, args: Vec<DataValue>) -> Result<Arc<DataTable>> {
41 if args.is_empty() {
42 return Err(anyhow!(
43 "SPLIT requires at least 1 argument (text to split)"
44 ));
45 }
46
47 let text = match &args[0] {
49 DataValue::String(s) => s.clone(),
50 DataValue::Null => return Err(anyhow!("SPLIT text cannot be NULL")),
51 other => other.to_string(),
52 };
53
54 let delimiter = if args.len() > 1 {
56 match &args[1] {
57 DataValue::String(s) => s.clone(),
58 DataValue::Null => " ".to_string(),
59 other => other.to_string(),
60 }
61 } else {
62 " ".to_string()
63 };
64
65 let mut table = DataTable::new("split");
66 table.add_column(DataColumn::new("value"));
67 table.add_column(DataColumn::new("index"));
68
69 if delimiter.is_empty() {
71 for (idx, ch) in text.chars().enumerate() {
73 table
74 .add_row(DataRow::new(vec![
75 DataValue::String(ch.to_string()),
76 DataValue::Integer((idx + 1) as i64),
77 ]))
78 .map_err(|e| anyhow!(e))?;
79 }
80 } else {
81 for (idx, part) in text.split(&delimiter).enumerate() {
83 if part.is_empty() {
85 continue;
86 }
87
88 table
89 .add_row(DataRow::new(vec![
90 DataValue::String(part.to_string()),
91 DataValue::Integer((idx + 1) as i64),
92 ]))
93 .map_err(|e| anyhow!(e))?;
94 }
95 }
96
97 Ok(Arc::new(table))
98 }
99
100 fn description(&self) -> &str {
101 "Split a string into rows based on delimiter"
102 }
103
104 fn arg_count(&self) -> usize {
105 2 }
107}
108
109pub struct Tokenize;
111
112impl TableGenerator for Tokenize {
113 fn name(&self) -> &str {
114 "TOKENIZE"
115 }
116
117 fn columns(&self) -> Vec<DataColumn> {
118 vec![
119 DataColumn {
120 name: "token".to_string(),
121 data_type: DataType::String,
122 nullable: false,
123 unique_values: Some(0),
124 null_count: 0,
125 metadata: HashMap::new(),
126 qualified_name: None,
127 source_table: None,
128 },
129 DataColumn {
130 name: "position".to_string(),
131 data_type: DataType::Integer,
132 nullable: false,
133 unique_values: Some(0),
134 null_count: 0,
135 metadata: HashMap::new(),
136 qualified_name: None,
137 source_table: None,
138 },
139 ]
140 }
141
142 fn generate(&self, args: Vec<DataValue>) -> Result<Arc<DataTable>> {
143 if args.is_empty() {
144 return Err(anyhow!(
145 "TOKENIZE requires at least 1 argument (text to tokenize)"
146 ));
147 }
148
149 let text = match &args[0] {
151 DataValue::String(s) => s.clone(),
152 DataValue::Null => return Err(anyhow!("TOKENIZE text cannot be NULL")),
153 other => other.to_string(),
154 };
155
156 let case_option = if args.len() > 1 {
158 match &args[1] {
159 DataValue::String(s) => s.to_lowercase(),
160 _ => "preserve".to_string(),
161 }
162 } else {
163 "preserve".to_string()
164 };
165
166 let mut table = DataTable::new("tokenize");
167 table.add_column(DataColumn::new("token"));
168 table.add_column(DataColumn::new("position"));
169
170 let mut tokens = Vec::new();
172 let mut current_token = String::new();
173
174 for ch in text.chars() {
175 if ch.is_alphanumeric() {
176 current_token.push(ch);
177 } else if !current_token.is_empty() {
178 tokens.push(current_token.clone());
179 current_token.clear();
180 }
181 }
182
183 if !current_token.is_empty() {
185 tokens.push(current_token);
186 }
187
188 let tokens = match case_option.as_str() {
190 "lower" | "lowercase" => tokens.iter().map(|t| t.to_lowercase()).collect(),
191 "upper" | "uppercase" => tokens.iter().map(|t| t.to_uppercase()).collect(),
192 _ => tokens,
193 };
194
195 for (idx, token) in tokens.iter().enumerate() {
197 table
198 .add_row(DataRow::new(vec![
199 DataValue::String(token.clone()),
200 DataValue::Integer((idx + 1) as i64),
201 ]))
202 .map_err(|e| anyhow!(e))?;
203 }
204
205 Ok(Arc::new(table))
206 }
207
208 fn description(&self) -> &str {
209 "Extract alphanumeric tokens from text"
210 }
211
212 fn arg_count(&self) -> usize {
213 2 }
215}
216
217pub struct Chars;
219
220impl TableGenerator for Chars {
221 fn name(&self) -> &str {
222 "CHARS"
223 }
224
225 fn columns(&self) -> Vec<DataColumn> {
226 vec![
227 DataColumn {
228 name: "char".to_string(),
229 data_type: DataType::String,
230 nullable: false,
231 unique_values: Some(0),
232 null_count: 0,
233 metadata: HashMap::new(),
234 qualified_name: None,
235 source_table: None,
236 },
237 DataColumn {
238 name: "position".to_string(),
239 data_type: DataType::Integer,
240 nullable: false,
241 unique_values: Some(0),
242 null_count: 0,
243 metadata: HashMap::new(),
244 qualified_name: None,
245 source_table: None,
246 },
247 DataColumn {
248 name: "ascii".to_string(),
249 data_type: DataType::Integer,
250 nullable: false,
251 unique_values: Some(0),
252 null_count: 0,
253 metadata: HashMap::new(),
254 qualified_name: None,
255 source_table: None,
256 },
257 ]
258 }
259
260 fn generate(&self, args: Vec<DataValue>) -> Result<Arc<DataTable>> {
261 if args.is_empty() {
262 return Err(anyhow!("CHARS requires 1 argument (text)"));
263 }
264
265 let text = match &args[0] {
267 DataValue::String(s) => s.clone(),
268 DataValue::Null => return Err(anyhow!("CHARS text cannot be NULL")),
269 other => other.to_string(),
270 };
271
272 let mut table = DataTable::new("chars");
273 table.add_column(DataColumn::new("char"));
274 table.add_column(DataColumn::new("position"));
275 table.add_column(DataColumn::new("ascii"));
276
277 for (idx, ch) in text.chars().enumerate() {
279 table
280 .add_row(DataRow::new(vec![
281 DataValue::String(ch.to_string()),
282 DataValue::Integer((idx + 1) as i64),
283 DataValue::Integer(ch as i64),
284 ]))
285 .map_err(|e| anyhow!(e))?;
286 }
287
288 Ok(Arc::new(table))
289 }
290
291 fn description(&self) -> &str {
292 "Split string into individual characters with ASCII codes"
293 }
294
295 fn arg_count(&self) -> usize {
296 1
297 }
298}
299
300pub struct Lines;
302
303impl TableGenerator for Lines {
304 fn name(&self) -> &str {
305 "LINES"
306 }
307
308 fn columns(&self) -> Vec<DataColumn> {
309 vec![
310 DataColumn {
311 name: "line".to_string(),
312 data_type: DataType::String,
313 nullable: false,
314 unique_values: Some(0),
315 null_count: 0,
316 metadata: HashMap::new(),
317 qualified_name: None,
318 source_table: None,
319 },
320 DataColumn {
321 name: "line_number".to_string(),
322 data_type: DataType::Integer,
323 nullable: false,
324 unique_values: Some(0),
325 null_count: 0,
326 metadata: HashMap::new(),
327 qualified_name: None,
328 source_table: None,
329 },
330 ]
331 }
332
333 fn generate(&self, args: Vec<DataValue>) -> Result<Arc<DataTable>> {
334 if args.is_empty() {
335 return Err(anyhow!("LINES requires 1 argument (text)"));
336 }
337
338 let text = match &args[0] {
340 DataValue::String(s) => s.clone(),
341 DataValue::Null => return Err(anyhow!("LINES text cannot be NULL")),
342 other => other.to_string(),
343 };
344
345 let mut table = DataTable::new("lines");
346 table.add_column(DataColumn::new("line"));
347 table.add_column(DataColumn::new("line_number"));
348
349 for (idx, line) in text.lines().enumerate() {
351 table
352 .add_row(DataRow::new(vec![
353 DataValue::String(line.to_string()),
354 DataValue::Integer((idx + 1) as i64),
355 ]))
356 .map_err(|e| anyhow!(e))?;
357 }
358
359 Ok(Arc::new(table))
360 }
361
362 fn description(&self) -> &str {
363 "Split text into lines"
364 }
365
366 fn arg_count(&self) -> usize {
367 1
368 }
369}