1use crate::DEFAULT_INDEX_COLUMN_NAME;
2use polars::prelude::{DataType, Schema};
3
4pub const DEFAULT_QUERY: &str = "\
8-- Select all columns and rows
9SELECT *
10FROM AllData;
11";
12
13const COLS_FILTER_OUT: [&str; 14] = [
16 DEFAULT_INDEX_COLUMN_NAME, "Average",
18 "Frequency",
19 "Total",
20 "As Float",
21 "Category",
22 "Calculation Result",
23 "Row Count",
24 "Unique Values",
25 "Minimum",
26 "Maximum",
27 "New Name for",
28 "+ 10%",
29 "DMY (day/month/year)", ];
32
33fn is_filtered_col(name: &str) -> bool {
38 let trimmed_name = name.trim();
39
40 if trimmed_name.is_empty() {
41 return true; }
43
44 COLS_FILTER_OUT
47 .iter()
48 .any(|substring| trimmed_name.contains(substring))
49}
50
51fn find_nth_col_name(
63 schema: &Schema,
64 n: usize,
65 target_dtype: impl Fn(&DataType) -> bool,
66) -> Option<&str> {
67 schema
68 .iter()
69 .filter(|(name, dtype)| !is_filtered_col(name.as_str()) && target_dtype(dtype))
70 .nth(n) .map(|(name, _dtype)| name.as_str())
72}
73
74fn generate_limit_example(commands: &mut Vec<String>) {
78 commands.push(
79 "\
80-- Limit the number of rows returned
81SELECT *
82FROM AllData
83LIMIT 50;
84"
85 .to_string(),
86 );
87}
88
89fn generate_select_specific_columns(
91 commands: &mut Vec<String>,
92 opt_str_col: Option<&str>,
93 opt_num_col: Option<&str>, ) {
95 if let (Some(col1), Some(col2)) = (opt_str_col, opt_num_col) {
96 commands.push(format!(
97 "\
98-- Select specific columns by name
99SELECT
100 `{col1}`,
101 `{col2}`
102FROM AllData;
103"
104 ));
105 }
106}
107
108fn generate_except_example(
110 commands: &mut Vec<String>,
111 opt_col1: Option<&str>, opt_col2: Option<&str>, ) {
114 if let (Some(col1), Some(col2)) = (opt_col1, opt_col2) {
115 commands.push(format!(
116 "\
117-- Select all columns EXCEPT specific ones
118-- Useful for dropping temporary or unwanted columns
119SELECT *
120EXCEPT (
121 `{col1}`,
122 `{col2}`
123)
124FROM AllData;
125"
126 ));
127 }
128}
129
130fn generate_rename_example(
132 commands: &mut Vec<String>,
133 opt_col1: Option<&str>, opt_col2: Option<&str>, ) {
136 if let (Some(col1), Some(col2)) = (opt_col1, opt_col2) {
137 commands.push(format!(
138 "\
139-- Rename columns while preserving order
140SELECT *
141RENAME (
142 `{col1}` AS `New Name for {col1}`,
143 `{col2}` AS `New Name for {col2}`
144)
145FROM AllData;
146"
147 ));
148 }
149}
150
151fn generate_replace_float_example(commands: &mut Vec<String>, opt_float_col: Option<&str>) {
153 if let Some(float_col) = opt_float_col {
154 commands.push(format!(
155 "\
156-- Replace values in a column with a calculation
157-- Example: Increase float column `{float_col}` by 10%
158SELECT *
159REPLACE (
160 `{float_col}` * 1.1
161 AS
162 `{float_col}`
163)
164FROM AllData;
165"
166 ));
167 }
168}
169
170fn generate_add_new_column_example(
172 commands: &mut Vec<String>,
173 opt_float_col: Option<&str>,
174 opt_int_col: Option<&str>,
175) {
176 if let (Some(col_f), Some(col_i)) = (opt_float_col, opt_int_col) {
177 commands.push(format!(
178 "\
179-- Add a NEW calculated column 'Calculation Result'
180-- Selects all original columns PLUS the new one
181SELECT *,
182 `{col_f}` * `{col_i}`
183 AS
184 `Calculation Result`
185FROM AllData;
186"
187 ));
188 }
189}
190
191fn generate_combined_transform_example(
193 commands: &mut Vec<String>,
194 opt_col1: Option<&str>, opt_col2: Option<&str>, opt_float_col: Option<&str>,
197) {
198 if let (Some(col1), Some(col2), Some(float_col)) = (opt_col1, opt_col2, opt_float_col) {
199 commands.push(format!(
200 "\
201-- Except, Replace and Rename
202SELECT *
203EXCEPT (`{col1}`)
204REPLACE (
205 `{float_col}` * 1.1
206 AS
207 `{float_col}`
208)
209RENAME (
210 `{float_col}` AS `{float_col} + 10%`,
211 `{col2}` AS `New Name for {col2}`
212)
213FROM AllData;
214"
215 ));
216 }
217}
218
219fn generate_cast_int_to_float_example(commands: &mut Vec<String>, opt_int_col: Option<&str>) {
221 if let Some(int_col) = opt_int_col {
222 commands.push(format!(
223 "\
224-- Explicitly CAST an integer column to FLOAT
225SELECT *,
226 CAST(`{int_col}` AS DOUBLE) AS `{int_col} As Float`
227FROM AllData;
228"
229 ));
230 }
231}
232
233fn generate_case_when_example(
235 commands: &mut Vec<String>,
236 opt_num_col: Option<&str>, ) {
238 if let Some(num_col) = opt_num_col {
239 commands.push(format!(
240 "\
241-- Create a new column based on conditions using CASE WHEN
242SELECT *,
243 CASE
244 WHEN `{num_col}` > 100 THEN 'High'
245 WHEN `{num_col}` > 10 THEN 'Medium'
246 ELSE 'Low'
247 END AS `{num_col} Category`
248FROM AllData;
249"
250 ));
251 }
252}
253
254fn generate_where_isnull_example(
256 commands: &mut Vec<String>,
257 opt_any_col: Option<&str>, ) {
259 if let Some(col) = opt_any_col {
260 commands.push(format!(
261 "\
262-- Filter rows where a column IS NULL or IS NOT NULL
263SELECT *
264FROM AllData
265WHERE
266 `{col}` IS NULL;
267-- Replace IS NULL with IS NOT NULL to get non-null rows
268"
269 ));
270 }
271}
272
273fn generate_where_int_equality_example(commands: &mut Vec<String>, opt_int_col: Option<&str>) {
275 if let Some(int_col) = opt_int_col {
276 commands.push(format!(
277 "\
278-- Filter rows where a numeric column equals a specific value
279-- number comparisons: =, >, <, >=, <=, !=
280SELECT *
281FROM AllData
282WHERE
283 `{int_col}` = 2024;
284-- Or filter by an (inclusive) range of values:
285-- WHERE `{int_col}` BETWEEN 2020 AND 2024
286-- Or filter based on a list of values using IN:
287-- WHERE `{int_col}` IN (2020, 2022, 2024)
288"
289 ));
290 }
291}
292
293fn generate_where_string_filter_example(commands: &mut Vec<String>, opt_str_col: Option<&str>) {
295 if let Some(str_col) = opt_str_col {
296 commands.push(format!(
297 "\
298-- Filter rows based on string content
299SELECT *
300FROM AllData
301WHERE
302 `{str_col}` = 'Specific Value';
303-- Or using LIKE for pattern matching (case-sensitive)
304-- WHERE `{str_col}` LIKE 'Prefix%';
305-- WHERE `{str_col}` LIKE '%Middle%';
306-- WHERE `{str_col}` LIKE '%Suffix';
307-- Use ILIKE for case-insensitive matching
308-- WHERE `{str_col}` ILIKE 'pattern%';
309-- Or filter based on a list of values using IN:
310-- WHERE `{str_col}` IN ('Value A', 'Value B')
311"
312 ));
313 }
314}
315
316fn generate_where_combined_example(
318 commands: &mut Vec<String>,
319 opt_int_col: Option<&str>,
320 opt_str_col: Option<&str>,
321) {
322 if let (Some(int_col), Some(str_col)) = (opt_int_col, opt_str_col) {
323 commands.push(format!(
324 "\
325-- Combine multiple filter conditions using AND / OR
326SELECT *
327FROM AllData
328WHERE
329 `{int_col}` >= 2024
330AND
331 `{str_col}` != 'Exclude This'
332-- Or filter based on a list of values using IN:
333-- WHERE `{int_col}` IN (2020, 2022, 2024)
334-- WHERE `{str_col}` IN ('Value A', 'Value B')
335"
336 ));
337 }
338}
339
340fn generate_orderby_example(
342 commands: &mut Vec<String>,
343 opt_str_col: Option<&str>,
344 opt_num_col: Option<&str>, ) {
346 if let (Some(col1), Some(col2)) = (opt_str_col, opt_num_col) {
347 commands.push(format!(
348 "\
349-- Sort results by a single column (ASC default, or DESC) or
350-- Sort results by multiple columns (precedence based on order)
351SELECT *
352FROM AllData
353ORDER BY
354 `{col1}` ASC, -- Primary sort key
355 `{col2}` DESC; -- Secondary sort key
356"
357 ));
358 }
359}
360
361fn generate_groupby_count_example(
363 commands: &mut Vec<String>,
364 opt_cat_col: Option<&str>, opt_col2: Option<&str>, ) {
367 if let (Some(col1), Some(col2)) = (opt_cat_col, opt_col2) {
369 commands.push(format!(
370 "\
371-- Count rows per category (Frequency)
372SELECT
373 `{col1}`,
374 `{col2}`,
375 COUNT(*) AS Frequency
376FROM AllData
377GROUP BY
378 `{col1}`,
379 `{col2}`
380ORDER BY
381 Frequency DESC;
382"
383 ));
384 }
385}
386
387fn generate_groupby_sum_example(
389 commands: &mut Vec<String>,
390 opt_str_col: Option<&str>, opt_num_col: Option<&str>, ) {
393 if let (Some(cat_col), Some(num_col)) = (opt_str_col, opt_num_col) {
394 commands.push(format!(
395 "\
396-- Calculate SUM of a numeric column per category
397SELECT
398 `{cat_col}`,
399 SUM(`{num_col}`) AS `{num_col} Total`
400FROM AllData
401GROUP BY
402 `{cat_col}`
403ORDER BY
404 `{num_col} Total` DESC;
405"
406 ));
407 }
408}
409
410fn generate_groupby_multiple_aggregates_example(
412 commands: &mut Vec<String>,
413 opt_str_col: Option<&str>, opt_val_col: Option<&str>, ) {
416 if let (Some(cat_col), Some(val_col)) = (opt_str_col, opt_val_col) {
417 commands.push(format!(
418 "\
419-- Calculate multiple aggregate functions per category
420SELECT
421 `{cat_col}`,
422 -- Total rows per category
423 COUNT(*) AS `Row Count`,
424 -- Count unique values in `{val_col}`:
425 COUNT(DISTINCT `{val_col}`) AS `Unique Values`,
426 SUM(`{val_col}`) AS Total, -- Sum of val_col
427 AVG(`{val_col}`) AS Average, -- Average of val_col
428 MIN(`{val_col}`) AS Minimum, -- Minimum value
429 MAX(`{val_col}`) AS Maximum -- Maximum value
430FROM AllData
431GROUP BY
432 `{cat_col}`
433ORDER BY
434 `Row Count` DESC;
435"
436 ));
437 }
438}
439
440fn generate_having_example(
442 commands: &mut Vec<String>,
443 opt_str_col: Option<&str>, opt_val_col: Option<&str>, ) {
446 if let (Some(cat_col), Some(val_col)) = (opt_str_col, opt_val_col) {
447 commands.push(format!(
448 "\
449-- Use HAVING to filter groups based on aggregate results
450SELECT
451 `{cat_col}`,
452 AVG(`{val_col}`) AS `{val_col} Average`
453FROM AllData
454GROUP BY
455 `{cat_col}`
456HAVING
457 -- Only show categories where the average > 1000
458 `{val_col} Average` > 1000
459ORDER BY
460 `{val_col} Average` DESC;
461"
462 ));
463 }
464}
465
466fn generate_strftime_example(commands: &mut Vec<String>, opt_date_col: Option<&str>) {
468 if let Some(date_col) = opt_date_col {
469 commands.push(format!(
470 "\
471-- Format a date column into DD/MM/YYYY using STRFTIME
472SELECT *, -- Select all original columns
473 STRFTIME(`{date_col}`, '%d/%m/%Y') AS `DMY (day/month/year)`
474FROM AllData
475ORDER BY
476 `{date_col}`;
477"
478 ));
479 }
480}
481
482fn generate_distinct_example(
484 commands: &mut Vec<String>,
485 opt_col1: Option<&str>, opt_col2: Option<&str>, ) {
488 if let (Some(col1), Some(col2)) = (opt_col1, opt_col2) {
491 commands.push(format!(
492 "\
493-- Show distinct values of columns
494SELECT DISTINCT
495 `{col1}`,
496 `{col2}`
497FROM AllData
498ORDER BY
499 `{col2}`;
500"
501 ));
502 }
503}
504
505fn replace_cell_value(
508 commands: &mut Vec<String>,
509 opt_int_col: Option<&str>,
510 opt_str_col: Option<&str>,
511) {
512 match (opt_int_col, opt_str_col) {
513 (Some(int_col), _) => {
514 commands.push(format!(
515 "\
516-- Replaces cell values based on a condition using CASE WHEN in REPLACE
517SELECT *
518REPLACE (
519 CASE
520 -- Condition to identify the specific row (example uses an integer column)
521 WHEN `{int_col}` >= 10
522 -- The new value for cells (some integer or string value)
523 THEN 'New String Value'
524 -- Keep the original values for all other rows in this column
525 ELSE `{int_col}`
526 -- Apply the result back to the original column name or another column
527 END AS `{int_col}`
528)
529FROM AllData;
530"
531 ));
532 }
533 (None, Some(str_col)) => {
534 commands.push(format!(
535 "\
536-- Replaces cell values based on a condition using CASE WHEN in REPLACE
537SELECT *
538REPLACE (
539 CASE
540 -- Condition to identify the specific row (example uses a string column with ILIKE)
541 WHEN `{str_col}` ILIKE '%pattern%'
542 -- The new value for cells (some integer or string value)
543 THEN 'New String Value'
544 -- Keep the original values for all other rows in this column
545 ELSE `{str_col}`
546 -- Apply the result back to the original column name or another column
547 END AS `{str_col}`
548)
549FROM AllData;
550"
551 ));
552 }
553 (None, None) => (), }
555}
556
557pub fn sql_commands(schema: &Schema) -> Vec<String> {
562 let mut commands: Vec<String> = vec![DEFAULT_QUERY.to_string()];
564
565 let is_any = |dtype: &DataType| !dtype.is_null(); let opt_str_col = find_nth_col_name(schema, 0, |dtype| dtype.is_string());
572 let opt_int_col = find_nth_col_name(schema, 0, |dtype| dtype.is_integer());
573 let opt_float_col = find_nth_col_name(schema, 0, |dtype| dtype.is_float());
574 let opt_date_col = find_nth_col_name(schema, 0, |dtype| dtype.is_date());
575
576 let opt_any_col = find_nth_col_name(schema, 0, is_any); let opt_any_col_1 = find_nth_col_name(schema, 1, is_any); let opt_any_col_2 = find_nth_col_name(schema, 2, is_any); let opt_int_or_float_col = opt_int_col.or(opt_float_col);
585 let opt_date_int_str_or_any_col = opt_date_col.or(opt_int_col).or(opt_str_col).or(opt_any_col);
586
587 generate_limit_example(&mut commands);
591 generate_select_specific_columns(&mut commands, opt_str_col, opt_int_or_float_col);
592 generate_except_example(&mut commands, opt_any_col, opt_any_col_2); generate_rename_example(&mut commands, opt_any_col, opt_any_col_1); generate_replace_float_example(&mut commands, opt_float_col);
595 replace_cell_value(&mut commands, opt_int_col, opt_str_col); generate_add_new_column_example(&mut commands, opt_float_col, opt_int_col);
597 generate_combined_transform_example(&mut commands, opt_any_col, opt_any_col_1, opt_float_col); generate_cast_int_to_float_example(&mut commands, opt_int_col);
601
602 generate_case_when_example(&mut commands, opt_int_or_float_col);
604
605 generate_where_isnull_example(&mut commands, opt_any_col);
607 generate_where_int_equality_example(&mut commands, opt_int_col);
608 generate_where_string_filter_example(&mut commands, opt_str_col);
609 generate_where_combined_example(&mut commands, opt_int_col, opt_str_col);
610
611 generate_orderby_example(&mut commands, opt_str_col, opt_int_or_float_col);
613
614 generate_groupby_count_example(&mut commands, opt_date_int_str_or_any_col, opt_any_col_2); generate_groupby_sum_example(&mut commands, opt_str_col, opt_int_or_float_col); generate_groupby_multiple_aggregates_example(&mut commands, opt_str_col, opt_int_or_float_col); generate_having_example(&mut commands, opt_str_col, opt_int_or_float_col); generate_strftime_example(&mut commands, opt_date_col);
623
624 generate_distinct_example(&mut commands, opt_any_col, opt_any_col_2); commands
628}