Skip to main content

polars_view/
sqls.rs

1use crate::DEFAULT_INDEX_COLUMN_NAME;
2use polars::prelude::{DataType, Schema};
3
4// Constants for SQL Generation
5
6/// The default SQL query, selected when the application starts or when examples are unavailable.
7pub const DEFAULT_QUERY: &str = "\
8-- Select all columns and rows
9SELECT *
10FROM AllData;
11";
12
13/// Column names potentially generated by Polars or common aggregation results
14/// that are typically filtered out when searching for *source* columns for examples.
15const COLS_FILTER_OUT: [&str; 14] = [
16    DEFAULT_INDEX_COLUMN_NAME, // Default name from the index column feature
17    "Average",
18    "Frequency",
19    "Total",
20    "As Float",
21    "Category",
22    "Calculation Result",
23    "Row Count",
24    "Unique Values",
25    "Minimum",
26    "Maximum",
27    "New Name for",
28    "+ 10%",
29    "DMY (day/month/year)", // Output of STRFTIME example
30                            // Add other common aggregate/generated names here if needed
31];
32
33// --- Helper Functions for Column Finding ---
34
35/// Checks if a column name should be filtered out from examples, either because
36/// it's empty/whitespace or contains a known filtered substring.
37fn is_filtered_col(name: &str) -> bool {
38    let trimmed_name = name.trim();
39
40    if trimmed_name.is_empty() {
41        return true; // Filter empty/whitespace names
42    }
43
44    // Check if the column name CONTAINS any of the substrings to filter out.
45    // The `any()` method stops checking as soon as it finds a match.
46    COLS_FILTER_OUT
47        .iter()
48        .any(|substring| trimmed_name.contains(substring))
49}
50
51/// Finds the name of the Nth (0-based) column in the schema that matches a specified data type.
52/// Columns in `COLS_FILTER_OUT` are ignored.
53///
54/// # Arguments
55/// * `schema`: The DataFrame schema.
56/// * `n`: The 0-based index (0 for first, 1 for second, etc.).
57/// * `target_dtype`: A closure `Fn(&DataType) -> bool` that defines the type check logic.
58///
59/// # Returns
60/// * `Some(&'a str)`: The name of the Nth matching column, borrowing from the schema.
61/// * `None`: If fewer than N+1 columns satisfy the target_dtype and are not filtered out.
62fn find_nth_col_name(
63    schema: &Schema,
64    n: usize,
65    target_dtype: impl Fn(&DataType) -> bool,
66) -> Option<&str> {
67    schema
68        .iter()
69        .filter(|(name, dtype)| !is_filtered_col(name.as_str()) && target_dtype(dtype))
70        .nth(n) // nth(0) returns the first value, nth(1) the second, and so on.
71        .map(|(name, _dtype)| name.as_str())
72}
73
74// --- Functions to Generate Specific SQL Examples ---
75
76/// Generates an example SQL query demonstrating the LIMIT clause.
77fn generate_limit_example(commands: &mut Vec<String>) {
78    commands.push(
79        "\
80-- Limit the number of rows returned
81SELECT *
82FROM AllData
83LIMIT 50;
84"
85        .to_string(),
86    );
87}
88
89/// Generates an example SQL query demonstrating selecting specific columns.
90fn generate_select_specific_columns(
91    commands: &mut Vec<String>,
92    opt_str_col: Option<&str>,
93    opt_num_col: Option<&str>, // Represents an Option<Int or Float column>
94) {
95    if let (Some(col1), Some(col2)) = (opt_str_col, opt_num_col) {
96        commands.push(format!(
97            "\
98-- Select specific columns by name
99SELECT
100    `{col1}`,
101    `{col2}`
102FROM AllData;
103"
104        ));
105    }
106}
107
108/// Generates an example SQL query demonstrating the EXCEPT clause.
109fn generate_except_example(
110    commands: &mut Vec<String>,
111    opt_col1: Option<&str>, // Needs the first non-filtered column
112    opt_col2: Option<&str>, // Needs the third non-filtered column (index 2)
113) {
114    if let (Some(col1), Some(col2)) = (opt_col1, opt_col2) {
115        commands.push(format!(
116            "\
117-- Select all columns EXCEPT specific ones
118-- Useful for dropping temporary or unwanted columns
119SELECT *
120EXCEPT (
121    `{col1}`,
122    `{col2}`
123)
124FROM AllData;
125"
126        ));
127    }
128}
129
130/// Generates an example SQL query demonstrating the RENAME clause.
131fn generate_rename_example(
132    commands: &mut Vec<String>,
133    opt_col1: Option<&str>, // Needs the first non-filtered column
134    opt_col2: Option<&str>, // Needs the second non-filtered column
135) {
136    if let (Some(col1), Some(col2)) = (opt_col1, opt_col2) {
137        commands.push(format!(
138            "\
139-- Rename columns while preserving order
140SELECT *
141RENAME (
142    `{col1}` AS `New Name for {col1}`,
143    `{col2}` AS `New Name for {col2}`
144)
145FROM AllData;
146"
147        ));
148    }
149}
150
151/// Generates an example SQL query demonstrating the REPLACE clause for a float column.
152fn generate_replace_float_example(commands: &mut Vec<String>, opt_float_col: Option<&str>) {
153    if let Some(float_col) = opt_float_col {
154        commands.push(format!(
155            "\
156-- Replace values in a column with a calculation
157-- Example: Increase float column `{float_col}` by 10%
158SELECT *
159REPLACE (
160    `{float_col}` * 1.1
161    AS
162    `{float_col}`
163)
164FROM AllData;
165"
166        ));
167    }
168}
169
170/// Generates an example SQL query demonstrating adding a NEW calculated column.
171fn generate_add_new_column_example(
172    commands: &mut Vec<String>,
173    opt_float_col: Option<&str>,
174    opt_int_col: Option<&str>,
175) {
176    if let (Some(col_f), Some(col_i)) = (opt_float_col, opt_int_col) {
177        commands.push(format!(
178            "\
179-- Add a NEW calculated column 'Calculation Result'
180-- Selects all original columns PLUS the new one
181SELECT *,
182       `{col_f}` * `{col_i}`
183       AS
184       `Calculation Result`
185FROM AllData;
186"
187        ));
188    }
189}
190
191/// Generates an example SQL query combining EXCEPT, REPLACE, and RENAME.
192fn generate_combined_transform_example(
193    commands: &mut Vec<String>,
194    opt_col1: Option<&str>, // First non-filtered column
195    opt_col2: Option<&str>, // Second non-filtered column
196    opt_float_col: Option<&str>,
197) {
198    if let (Some(col1), Some(col2), Some(float_col)) = (opt_col1, opt_col2, opt_float_col) {
199        commands.push(format!(
200            "\
201-- Except, Replace and Rename
202SELECT *
203EXCEPT (`{col1}`)
204REPLACE (
205    `{float_col}` * 1.1
206    AS
207    `{float_col}`
208)
209RENAME (
210    `{float_col}` AS `{float_col} + 10%`,
211    `{col2}` AS `New Name for {col2}`
212)
213FROM AllData;
214"
215        ));
216    }
217}
218
219/// Generates an example SQL query demonstrating casting an integer column to float.
220fn generate_cast_int_to_float_example(commands: &mut Vec<String>, opt_int_col: Option<&str>) {
221    if let Some(int_col) = opt_int_col {
222        commands.push(format!(
223            "\
224-- Explicitly CAST an integer column to FLOAT
225SELECT *,
226    CAST(`{int_col}` AS DOUBLE) AS `{int_col} As Float`
227FROM AllData;
228"
229        ));
230    }
231}
232
233/// Generates an example SQL query demonstrating conditional logic with CASE WHEN.
234fn generate_case_when_example(
235    commands: &mut Vec<String>,
236    opt_num_col: Option<&str>, // Represents an Option<Int or Float column>
237) {
238    if let Some(num_col) = opt_num_col {
239        commands.push(format!(
240            "\
241-- Create a new column based on conditions using CASE WHEN
242SELECT *,
243    CASE
244        WHEN `{num_col}` > 100 THEN 'High'
245        WHEN `{num_col}` > 10 THEN 'Medium'
246        ELSE 'Low'
247    END AS `{num_col} Category`
248FROM AllData;
249"
250        ));
251    }
252}
253
254/// Generates an example SQL query demonstrating filtering with IS NULL / IS NOT NULL.
255fn generate_where_isnull_example(
256    commands: &mut Vec<String>,
257    opt_any_col: Option<&str>, // Any non-filtered column
258) {
259    if let Some(col) = opt_any_col {
260        commands.push(format!(
261            "\
262-- Filter rows where a column IS NULL or IS NOT NULL
263SELECT *
264FROM AllData
265WHERE
266    `{col}` IS NULL;
267-- Replace IS NULL with IS NOT NULL to get non-null rows
268"
269        ));
270    }
271}
272
273/// Generates an example SQL query demonstrating filtering by integer equality.
274fn generate_where_int_equality_example(commands: &mut Vec<String>, opt_int_col: Option<&str>) {
275    if let Some(int_col) = opt_int_col {
276        commands.push(format!(
277            "\
278-- Filter rows where a numeric column equals a specific value
279-- number comparisons: =, >, <, >=, <=, !=
280SELECT *
281FROM AllData
282WHERE
283    `{int_col}` = 2024;
284-- Or filter by an (inclusive) range of values:
285-- WHERE `{int_col}` BETWEEN 2020 AND 2024
286-- Or filter based on a list of values using IN:
287-- WHERE `{int_col}` IN (2020, 2022, 2024)
288"
289        ));
290    }
291}
292
293/// Generates an example SQL query demonstrating filtering by string content using = / LIKE / ILIKE.
294fn generate_where_string_filter_example(commands: &mut Vec<String>, opt_str_col: Option<&str>) {
295    if let Some(str_col) = opt_str_col {
296        commands.push(format!(
297            "\
298-- Filter rows based on string content
299SELECT *
300FROM AllData
301WHERE
302    `{str_col}` = 'Specific Value';
303-- Or using LIKE for pattern matching (case-sensitive)
304-- WHERE `{str_col}` LIKE 'Prefix%';
305-- WHERE `{str_col}` LIKE '%Middle%';
306-- WHERE `{str_col}` LIKE '%Suffix';
307-- Use ILIKE for case-insensitive matching
308-- WHERE `{str_col}` ILIKE 'pattern%';
309-- Or filter based on a list of values using IN:
310-- WHERE `{str_col}` IN ('Value A', 'Value B')
311"
312        ));
313    }
314}
315
316/// Generates an example SQL query demonstrating combined filtering with AND / OR.
317fn generate_where_combined_example(
318    commands: &mut Vec<String>,
319    opt_int_col: Option<&str>,
320    opt_str_col: Option<&str>,
321) {
322    if let (Some(int_col), Some(str_col)) = (opt_int_col, opt_str_col) {
323        commands.push(format!(
324            "\
325-- Combine multiple filter conditions using AND / OR
326SELECT *
327FROM AllData
328WHERE
329    `{int_col}` >= 2024
330AND
331    `{str_col}` != 'Exclude This'
332-- Or filter based on a list of values using IN:
333-- WHERE `{int_col}` IN (2020, 2022, 2024)
334-- WHERE `{str_col}` IN ('Value A', 'Value B')
335"
336        ));
337    }
338}
339
340/// Generates an example SQL query demonstrating ordering results by multiple columns.
341fn generate_orderby_example(
342    commands: &mut Vec<String>,
343    opt_str_col: Option<&str>,
344    opt_num_col: Option<&str>, // Represents an Option<Int or Float column>
345) {
346    if let (Some(col1), Some(col2)) = (opt_str_col, opt_num_col) {
347        commands.push(format!(
348            "\
349-- Sort results by a single column (ASC default, or DESC) or
350-- Sort results by multiple columns (precedence based on order)
351SELECT *
352FROM AllData
353ORDER BY
354    `{col1}` ASC, -- Primary sort key
355    `{col2}` DESC; -- Secondary sort key
356"
357        ));
358    }
359}
360
361/// Generates a basic GROUP BY example query demonstrating counting rows per category.
362fn generate_groupby_count_example(
363    commands: &mut Vec<String>,
364    opt_cat_col: Option<&str>, // Represents an Option<Date or Int or Str or Any non-filtered column>
365    opt_col2: Option<&str>,    // Represents the third non-filtered column (index 2)
366) {
367    // Needs a column suitable for grouping (like Date, Int, String, or just Any)
368    if let (Some(col1), Some(col2)) = (opt_cat_col, opt_col2) {
369        commands.push(format!(
370            "\
371-- Count rows per category (Frequency)
372SELECT
373    `{col1}`,
374    `{col2}`,
375    COUNT(*) AS Frequency
376FROM AllData
377GROUP BY
378    `{col1}`,
379    `{col2}`
380ORDER BY
381    Frequency DESC;
382"
383        ));
384    }
385}
386
387/// Generates a GROUP BY example query demonstrating summing a numeric column per category.
388fn generate_groupby_sum_example(
389    commands: &mut Vec<String>,
390    opt_str_col: Option<&str>, // Column for category
391    opt_num_col: Option<&str>, // Column for summing
392) {
393    if let (Some(cat_col), Some(num_col)) = (opt_str_col, opt_num_col) {
394        commands.push(format!(
395            "\
396-- Calculate SUM of a numeric column per category
397SELECT
398    `{cat_col}`,
399    SUM(`{num_col}`) AS `{num_col} Total`
400FROM AllData
401GROUP BY
402    `{cat_col}`
403ORDER BY
404    `{num_col} Total` DESC;
405"
406        ));
407    }
408}
409
410/// Generates a GROUP BY example query demonstrating multiple aggregate functions.
411fn generate_groupby_multiple_aggregates_example(
412    commands: &mut Vec<String>,
413    opt_str_col: Option<&str>, // Column for category
414    opt_val_col: Option<&str>, // Column for aggregation
415) {
416    if let (Some(cat_col), Some(val_col)) = (opt_str_col, opt_val_col) {
417        commands.push(format!(
418            "\
419-- Calculate multiple aggregate functions per category
420SELECT
421    `{cat_col}`,
422    -- Total rows per category
423    COUNT(*) AS `Row Count`,
424    -- Count unique values in `{val_col}`:
425    COUNT(DISTINCT `{val_col}`) AS `Unique Values`,
426    SUM(`{val_col}`) AS Total,     -- Sum of val_col
427    AVG(`{val_col}`) AS Average,   -- Average of val_col
428    MIN(`{val_col}`) AS Minimum,   -- Minimum value
429    MAX(`{val_col}`) AS Maximum    -- Maximum value
430FROM AllData
431GROUP BY
432    `{cat_col}`
433ORDER BY
434    `Row Count` DESC;
435"
436        ));
437    }
438}
439
440/// Generates a GROUP BY example query demonstrating filtering groups with HAVING.
441fn generate_having_example(
442    commands: &mut Vec<String>,
443    opt_str_col: Option<&str>, // Column for category
444    opt_val_col: Option<&str>, // Column for aggregation (must be numeric for AVG)
445) {
446    if let (Some(cat_col), Some(val_col)) = (opt_str_col, opt_val_col) {
447        commands.push(format!(
448            "\
449-- Use HAVING to filter groups based on aggregate results
450SELECT
451    `{cat_col}`,
452    AVG(`{val_col}`) AS `{val_col} Average`
453FROM AllData
454GROUP BY
455    `{cat_col}`
456HAVING
457    -- Only show categories where the average > 1000
458    `{val_col} Average` > 1000
459ORDER BY
460    `{val_col} Average` DESC;
461"
462        ));
463    }
464}
465
466/// Generates an example SQL query demonstrating formatting a date column using STRFTIME.
467fn generate_strftime_example(commands: &mut Vec<String>, opt_date_col: Option<&str>) {
468    if let Some(date_col) = opt_date_col {
469        commands.push(format!(
470            "\
471-- Format a date column into DD/MM/YYYY using STRFTIME
472SELECT *, -- Select all original columns
473    STRFTIME(`{date_col}`, '%d/%m/%Y') AS `DMY (day/month/year)`
474FROM AllData
475ORDER BY
476    `{date_col}`;
477"
478        ));
479    }
480}
481
482/// Generates an example SQL query demonstrating the DISTINCT clause.
483fn generate_distinct_example(
484    commands: &mut Vec<String>,
485    opt_col1: Option<&str>, // First non-filtered column
486    opt_col2: Option<&str>, // Third non-filtered column (index 2)
487) {
488    // Note: Original code used opt_any_col and opt_any_col_2 for Distinct.
489    // This matches the parameters needed here.
490    if let (Some(col1), Some(col2)) = (opt_col1, opt_col2) {
491        commands.push(format!(
492            "\
493-- Show distinct values of columns
494SELECT DISTINCT
495    `{col1}`,
496    `{col2}`
497FROM AllData
498ORDER BY
499    `{col2}`;
500"
501        ));
502    }
503}
504
505/// Generates an example SQL query demonstrating replacing a cell value using CASE WHEN in REPLACE.
506/// (Existing function kept for completeness, matches the pattern)
507fn replace_cell_value(
508    commands: &mut Vec<String>,
509    opt_int_col: Option<&str>,
510    opt_str_col: Option<&str>,
511) {
512    match (opt_int_col, opt_str_col) {
513        (Some(int_col), _) => {
514            commands.push(format!(
515                "\
516-- Replaces cell values based on a condition using CASE WHEN in REPLACE
517SELECT *
518REPLACE (
519    CASE
520        -- Condition to identify the specific row (example uses an integer column)
521        WHEN `{int_col}` >= 10
522        -- The new value for cells (some integer or string value)
523        THEN 'New String Value'
524        -- Keep the original values for all other rows in this column
525        ELSE `{int_col}`
526    -- Apply the result back to the original column name or another column
527    END AS `{int_col}`
528)
529FROM AllData;
530"
531            ));
532        }
533        (None, Some(str_col)) => {
534            commands.push(format!(
535                "\
536-- Replaces cell values based on a condition using CASE WHEN in REPLACE
537SELECT *
538REPLACE (
539    CASE
540        -- Condition to identify the specific row (example uses a string column with ILIKE)
541        WHEN `{str_col}` ILIKE '%pattern%'
542        -- The new value for cells (some integer or string value)
543        THEN 'New String Value'
544        -- Keep the original values for all other rows in this column
545        ELSE `{str_col}`
546    -- Apply the result back to the original column name or another column
547    END AS `{str_col}`
548)
549FROM AllData;
550"
551            ));
552        }
553        (None, None) => (), // No suitable column found
554    }
555}
556
557// --- Main SQL Command Generator ---
558
559/// Generates a list of example SQL commands based on the provided DataFrame schema.
560/// Uses helper functions to find suitable columns and generate diverse examples.
561pub fn sql_commands(schema: &Schema) -> Vec<String> {
562    // Start with the default query
563    let mut commands: Vec<String> = vec![DEFAULT_QUERY.to_string()];
564
565    // --- target_dtypes for finding usable columns ---
566    let is_any = |dtype: &DataType| !dtype.is_null(); // Find any non-null type column
567
568    // Find first few usable columns of different types for examples
569    // The index used for finding these columns corresponds to their potential
570    // use case in the examples.
571    let opt_str_col = find_nth_col_name(schema, 0, |dtype| dtype.is_string());
572    let opt_int_col = find_nth_col_name(schema, 0, |dtype| dtype.is_integer());
573    let opt_float_col = find_nth_col_name(schema, 0, |dtype| dtype.is_float());
574    let opt_date_col = find_nth_col_name(schema, 0, |dtype| dtype.is_date());
575
576    // Find the Nth available column of *any* type, filtering out specified names.
577    // These are used for examples that don't require a specific type,
578    // or need distinct columns for comparison/exclusion/etc.
579    let opt_any_col = find_nth_col_name(schema, 0, is_any); // First usable column
580    let opt_any_col_1 = find_nth_col_name(schema, 1, is_any); // Second usable column
581    let opt_any_col_2 = find_nth_col_name(schema, 2, is_any); // Third usable column
582
583    // Helper options combining types
584    let opt_int_or_float_col = opt_int_col.or(opt_float_col);
585    let opt_date_int_str_or_any_col = opt_date_col.or(opt_int_col).or(opt_str_col).or(opt_any_col);
586
587    // === Example Generation Calls (grouped by type/clause) ===
588
589    // SELECT Clause Examples
590    generate_limit_example(&mut commands);
591    generate_select_specific_columns(&mut commands, opt_str_col, opt_int_or_float_col);
592    generate_except_example(&mut commands, opt_any_col, opt_any_col_2); // Needs 1st and 3rd usable cols
593    generate_rename_example(&mut commands, opt_any_col, opt_any_col_1); // Needs 1st and 2nd usable cols
594    generate_replace_float_example(&mut commands, opt_float_col);
595    replace_cell_value(&mut commands, opt_int_col, opt_str_col); // Calls the pre-existing one
596    generate_add_new_column_example(&mut commands, opt_float_col, opt_int_col);
597    generate_combined_transform_example(&mut commands, opt_any_col, opt_any_col_1, opt_float_col); // Needs 1st, 2nd usable cols & float
598
599    // Data Type Casting Examples
600    generate_cast_int_to_float_example(&mut commands, opt_int_col);
601
602    // Conditional Logic Examples
603    generate_case_when_example(&mut commands, opt_int_or_float_col);
604
605    // WHERE Clause Examples
606    generate_where_isnull_example(&mut commands, opt_any_col);
607    generate_where_int_equality_example(&mut commands, opt_int_col);
608    generate_where_string_filter_example(&mut commands, opt_str_col);
609    generate_where_combined_example(&mut commands, opt_int_col, opt_str_col);
610
611    // ORDER BY Examples
612    generate_orderby_example(&mut commands, opt_str_col, opt_int_or_float_col);
613
614    // GROUP BY Examples
615    // Needs a column suitable for grouping and one for aggregation
616    generate_groupby_count_example(&mut commands, opt_date_int_str_or_any_col, opt_any_col_2); // Needs usable grouping col and 3rd usable col
617    generate_groupby_sum_example(&mut commands, opt_str_col, opt_int_or_float_col); // Needs str for grouping, int/float for summing
618    generate_groupby_multiple_aggregates_example(&mut commands, opt_str_col, opt_int_or_float_col); // Needs str for grouping, int/float for aggregates
619    generate_having_example(&mut commands, opt_str_col, opt_int_or_float_col); // Needs str for grouping, int/float for average
620
621    // Date/Time Functions Examples
622    generate_strftime_example(&mut commands, opt_date_col);
623
624    // Miscellaneous Examples
625    generate_distinct_example(&mut commands, opt_any_col, opt_any_col_2); // Needs 1st and 3rd usable cols
626
627    commands
628}