flowscope_core/generated/
functions.rs

1//! Function classification sets.
2//!
3//! Generated from functions.json
4//!
5//! This module provides sets of SQL function names categorized by their behavior
6//! (aggregate, window, table-generating). These classifications are used during
7//! lineage analysis to determine how expressions should be analyzed.
8
9use std::collections::HashSet;
10use std::sync::LazyLock;
11
12/// Aggregate functions (57 total).
13pub static AGGREGATE_FUNCTIONS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
14    let mut set = HashSet::new();
15    set.insert("agg_func");
16    set.insert("ai_agg");
17    set.insert("ai_summarize_agg");
18    set.insert("any_value");
19    set.insert("approx_distinct");
20    set.insert("approx_quantile");
21    set.insert("approx_quantiles");
22    set.insert("approx_top_k");
23    set.insert("approx_top_k_accumulate");
24    set.insert("approx_top_k_combine");
25    set.insert("approx_top_sum");
26    set.insert("approximate_similarity");
27    set.insert("arg_max");
28    set.insert("arg_min");
29    set.insert("array_agg");
30    set.insert("array_concat_agg");
31    set.insert("array_union_agg");
32    set.insert("array_unique_agg");
33    set.insert("avg");
34    set.insert("bitmap_construct_agg");
35    set.insert("bitmap_or_agg");
36    set.insert("bitwise_and_agg");
37    set.insert("bitwise_or_agg");
38    set.insert("bitwise_xor_agg");
39    set.insert("boolxor_agg");
40    set.insert("combined_agg_func");
41    set.insert("combined_parameterized_agg");
42    set.insert("corr");
43    set.insert("count");
44    set.insert("count_if");
45    set.insert("covar_pop");
46    set.insert("covar_samp");
47    set.insert("first");
48    set.insert("group_concat");
49    set.insert("grouping");
50    set.insert("grouping_id");
51    set.insert("hll");
52    set.insert("json_object_agg");
53    set.insert("jsonb_object_agg");
54    set.insert("last");
55    set.insert("logical_and");
56    set.insert("logical_or");
57    set.insert("max");
58    set.insert("median");
59    set.insert("min");
60    set.insert("minhash");
61    set.insert("minhash_combine");
62    set.insert("object_agg");
63    set.insert("parameterized_agg");
64    set.insert("quantile");
65    set.insert("skewness");
66    set.insert("stddev");
67    set.insert("stddev_pop");
68    set.insert("stddev_samp");
69    set.insert("sum");
70    set.insert("variance");
71    set.insert("variance_pop");
72    set
73});
74
75/// Window functions (13 total).
76pub static WINDOW_FUNCTIONS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
77    let mut set = HashSet::new();
78    set.insert("cume_dist");
79    set.insert("dense_rank");
80    set.insert("first_value");
81    set.insert("lag");
82    set.insert("last_value");
83    set.insert("lead");
84    set.insert("nth_value");
85    set.insert("ntile");
86    set.insert("percent_rank");
87    set.insert("percentile_cont");
88    set.insert("percentile_disc");
89    set.insert("rank");
90    set.insert("row_number");
91    set
92});
93
94/// Table-generating functions / UDTFs (5 total).
95pub static UDTF_FUNCTIONS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
96    let mut set = HashSet::new();
97    set.insert("explode");
98    set.insert("explode_outer");
99    set.insert("posexplode");
100    set.insert("posexplode_outer");
101    set.insert("unnest");
102    set
103});
104
105/// Checks if a function is an aggregate function (e.g., SUM, COUNT, AVG).
106///
107/// Aggregate functions combine multiple input rows into a single output value.
108/// This classification is used to detect aggregation in SELECT expressions
109/// and validate GROUP BY semantics.
110///
111/// The check is case-insensitive. Uses ASCII lowercase for performance since
112/// SQL function names are always ASCII.
113pub fn is_aggregate_function(name: &str) -> bool {
114    // SQL function names are ASCII, so we can use the faster ASCII lowercase
115    let lower = name.to_ascii_lowercase();
116    AGGREGATE_FUNCTIONS.contains(lower.as_str())
117}
118
119/// Checks if a function is a window function (e.g., ROW_NUMBER, RANK, LAG).
120///
121/// Window functions perform calculations across a set of rows related to
122/// the current row, without collapsing them into a single output.
123///
124/// The check is case-insensitive. Uses ASCII lowercase for performance since
125/// SQL function names are always ASCII.
126pub fn is_window_function(name: &str) -> bool {
127    // SQL function names are ASCII, so we can use the faster ASCII lowercase
128    let lower = name.to_ascii_lowercase();
129    WINDOW_FUNCTIONS.contains(lower.as_str())
130}
131
132/// Checks if a function is a table-generating function / UDTF (e.g., UNNEST, EXPLODE).
133///
134/// UDTFs return multiple rows for each input row, expanding the result set.
135/// This classification affects how lineage is tracked through these functions.
136///
137/// The check is case-insensitive. Uses ASCII lowercase for performance since
138/// SQL function names are always ASCII.
139pub fn is_udtf_function(name: &str) -> bool {
140    // SQL function names are ASCII, so we can use the faster ASCII lowercase
141    let lower = name.to_ascii_lowercase();
142    UDTF_FUNCTIONS.contains(lower.as_str())
143}