use indexmap::IndexMap;
use serde::Deserialize;
use std::collections::{BTreeMap, BTreeSet};
use std::env;
use std::error::Error;
use std::fs;
use std::io;
use std::path::Path;
const KNOWN_DIALECTS: &[&str] = &[
"bigquery",
"clickhouse",
"databricks",
"duckdb",
"hive",
"mssql",
"mysql",
"postgres",
"redshift",
"snowflake",
"sqlite",
];
fn main() {
if let Err(e) = run() {
eprintln!("Build script error: {e}");
std::process::exit(1);
}
}
fn run() -> Result<(), Box<dyn Error>> {
let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap_or_default();
let in_packaged_dir = manifest_dir.contains("/target/package/");
if env::var("CARGO_PUBLISH").is_ok() || in_packaged_dir {
println!("cargo:rerun-if-changed=specs/dialect-semantics/");
println!("cargo:rerun-if-changed=build.rs");
return Ok(());
}
let spec_dir = Path::new("specs/dialect-semantics");
if !spec_dir.exists() {
return Err(format!(
"Spec directory not found at {:?}. Expected at crates/flowscope-core/specs/dialect-semantics/",
spec_dir.canonicalize().unwrap_or_else(|_| spec_dir.to_path_buf())
).into());
}
let generated_dir = Path::new("src/generated");
fs::create_dir_all(generated_dir)
.map_err(|e| format!("Failed to create src/generated directory: {e}"))?;
let dialects = load_dialects_json(spec_dir)?;
let functions = load_functions_json(spec_dir)?;
let normalization_overrides = load_normalization_overrides(spec_dir)?;
let scoping_rules = load_scoping_rules(spec_dir)?;
let dialect_behavior = load_dialect_behavior(spec_dir)?;
let type_system = load_type_system(spec_dir)?;
validate_dialect_coverage(&dialects, &scoping_rules);
generate_mod_rs(generated_dir)?;
generate_case_sensitivity(generated_dir, &dialects, &normalization_overrides)?;
generate_scoping_rules(generated_dir, &scoping_rules)?;
generate_function_rules(generated_dir, &dialect_behavior)?;
generate_functions(generated_dir, &functions)?;
generate_type_system(generated_dir, &type_system)?;
println!("cargo:rerun-if-changed=specs/dialect-semantics/");
println!("cargo:rerun-if-changed=build.rs");
Ok(())
}
#[derive(Debug, Deserialize)]
#[allow(dead_code)] struct DialectSpec {
normalization: String,
#[serde(default)]
pseudocolumns: Vec<String>,
#[serde(default)]
quote_chars: Option<QuoteChars>,
#[serde(default)]
parser_settings: Option<ParserSettings>,
#[serde(default)]
generator_settings: Option<GeneratorSettings>,
#[serde(default)]
type_mapping_count: Option<usize>,
}
#[derive(Debug, Deserialize, Default)]
#[allow(dead_code)] struct QuoteChars {
#[serde(default)]
identifier_quotes: Vec<serde_json::Value>,
#[serde(default)]
string_escapes: Vec<String>,
}
#[derive(Debug, Deserialize, Default)]
#[allow(dead_code)] struct ParserSettings {
#[serde(default)]
tablesample_csv: bool,
#[serde(default)]
log_defaults_to_ln: bool,
}
#[derive(Debug, Deserialize, Default)]
#[allow(dead_code)] struct GeneratorSettings {
#[serde(default)]
limit_fetch: Option<String>,
#[serde(default)]
tablesample_size_is_rows: bool,
#[serde(default)]
locking_reads_supported: bool,
#[serde(default)]
null_ordering_supported: Option<bool>,
#[serde(default)]
ignore_nulls_in_func: bool,
#[serde(default)]
can_implement_array_any: bool,
#[serde(default)]
supports_table_alias_columns: bool,
#[serde(default)]
unpivot_aliases_are_identifiers: bool,
#[serde(default)]
custom_transforms_count: Option<usize>,
}
fn load_dialects_json(spec_dir: &Path) -> Result<BTreeMap<String, DialectSpec>, Box<dyn Error>> {
let path = spec_dir.join("dialects.json");
let content = fs::read_to_string(&path).map_err(|e| format!("Failed to read {path:?}: {e}"))?;
serde_json::from_str(&content).map_err(|e| format!("Failed to parse {path:?}: {e}").into())
}
#[derive(Debug, Deserialize)]
#[allow(dead_code)] struct FunctionDef {
class: String,
categories: Vec<String>,
#[serde(default)]
sql_names: Vec<String>,
#[serde(default)]
arg_types: IndexMap<String, serde_json::Value>,
#[serde(default)]
dialects: Vec<String>,
#[serde(default)]
dialect_specific: bool,
#[serde(default)]
return_type: Option<ReturnTypeSpec>,
}
#[derive(Debug, Deserialize)]
struct ReturnTypeSpec {
rule: String,
}
fn load_functions_json(spec_dir: &Path) -> Result<BTreeMap<String, FunctionDef>, Box<dyn Error>> {
let path = spec_dir.join("functions.json");
let content = fs::read_to_string(&path).map_err(|e| format!("Failed to read {path:?}: {e}"))?;
serde_json::from_str(&content).map_err(|e| format!("Failed to parse {path:?}: {e}").into())
}
#[derive(Debug, Deserialize)]
#[allow(dead_code)] struct NormalizationOverride {
normalization_strategy: String,
has_custom_normalization: bool,
#[serde(default)]
override_reason: Option<String>,
#[serde(default)]
udf_case_sensitive: Option<bool>,
#[serde(default)]
qualified_table_case_sensitive: Option<bool>,
}
fn load_normalization_overrides(
spec_dir: &Path,
) -> Result<BTreeMap<String, NormalizationOverride>, Box<dyn Error>> {
let path = spec_dir.join("normalization_overrides.toml");
let content = fs::read_to_string(&path).map_err(|e| format!("Failed to read {path:?}: {e}"))?;
toml::from_str(&content).map_err(|e| format!("Failed to parse {path:?}: {e}").into())
}
#[derive(Debug, Deserialize)]
struct ScopingRule {
alias_in_group_by: bool,
alias_in_having: bool,
alias_in_order_by: bool,
lateral_column_alias: bool,
}
fn load_scoping_rules(spec_dir: &Path) -> Result<BTreeMap<String, ScopingRule>, Box<dyn Error>> {
let path = spec_dir.join("scoping_rules.toml");
let content = fs::read_to_string(&path).map_err(|e| format!("Failed to read {path:?}: {e}"))?;
toml::from_str(&content).map_err(|e| format!("Failed to parse {path:?}: {e}").into())
}
#[derive(Debug, Deserialize)]
struct DialectBehavior {
value_table_functions: ValueTableFunctions,
null_ordering: BTreeMap<String, String>,
unnest: UnnestBehavior,
date_functions: BTreeMap<String, BTreeMap<String, toml::Value>>,
}
#[derive(Debug, Deserialize)]
struct ValueTableFunctions {
common: Vec<String>,
#[serde(default)]
postgres: Vec<String>,
#[serde(default)]
bigquery: Vec<String>,
#[serde(default)]
snowflake: Vec<String>,
#[serde(default)]
redshift: Vec<String>,
#[serde(default)]
mysql: Vec<String>,
#[serde(default)]
mssql: Vec<String>,
#[serde(default)]
duckdb: Vec<String>,
#[serde(default)]
clickhouse: Vec<String>,
#[serde(default)]
databricks: Vec<String>,
#[serde(default)]
hive: Vec<String>,
#[serde(default)]
sqlite: Vec<String>,
}
#[derive(Debug, Deserialize)]
struct UnnestBehavior {
implicit_unnest: Vec<String>,
}
fn load_dialect_behavior(spec_dir: &Path) -> Result<DialectBehavior, Box<dyn Error>> {
let path = spec_dir.join("dialect_behavior.toml");
let content = fs::read_to_string(&path).map_err(|e| format!("Failed to read {path:?}: {e}"))?;
toml::from_str(&content).map_err(|e| format!("Failed to parse {path:?}: {e}").into())
}
#[derive(Debug, Deserialize)]
struct TypeSystem {
type_categories: BTreeMap<String, Vec<String>>,
implicit_casts: BTreeMap<String, Vec<String>>,
dialect_type_mapping: BTreeMap<String, BTreeMap<String, String>>,
}
fn load_type_system(spec_dir: &Path) -> Result<TypeSystem, Box<dyn Error>> {
let path = spec_dir.join("type_system.toml");
let content = fs::read_to_string(&path).map_err(|e| format!("Failed to read {path:?}: {e}"))?;
toml::from_str(&content).map_err(|e| format!("Failed to parse {path:?}: {e}").into())
}
fn validate_dialect_coverage(
dialects: &BTreeMap<String, DialectSpec>,
scoping: &BTreeMap<String, ScopingRule>,
) {
let mut warnings = Vec::new();
for dialect in KNOWN_DIALECTS {
if !dialects.contains_key(*dialect) {
warnings.push(format!("Dialect '{dialect}' missing from dialects.json"));
}
if !scoping.contains_key(*dialect) {
warnings.push(format!(
"Dialect '{dialect}' missing from scoping_rules.toml"
));
}
}
for warning in &warnings {
println!("cargo:warning={warning}");
}
}
fn write_if_changed(path: &Path, content: &str) -> Result<(), Box<dyn Error>> {
let write_needed = match fs::read_to_string(path) {
Ok(existing) => existing != content,
Err(err) if err.kind() == io::ErrorKind::NotFound => true,
Err(err) => return Err(format!("Failed to read {path:?}: {err}").into()),
};
if write_needed {
if let Err(err) = fs::write(path, content) {
return Err(format!("Failed to write {path:?}: {err}").into());
}
}
Ok(())
}
fn generate_mod_rs(dir: &Path) -> Result<(), Box<dyn Error>> {
let content = r#"//! Generated dialect semantic code.
//!
//! DO NOT EDIT MANUALLY - generated by build.rs from specs/dialect-semantics/
pub mod case_sensitivity;
pub mod function_rules;
pub mod functions;
mod scoping_rules;
pub mod type_system;
pub use case_sensitivity::*;
pub use function_rules::*;
pub use functions::*;
pub use type_system::*;
// scoping_rules adds methods to Dialect via impl, no re-export needed
"#;
write_if_changed(&dir.join("mod.rs"), content)
}
fn generate_case_sensitivity(
dir: &Path,
dialects: &BTreeMap<String, DialectSpec>,
overrides: &BTreeMap<String, NormalizationOverride>,
) -> Result<(), Box<dyn Error>> {
let mut code = String::from(
r#"//! Case sensitivity rules per dialect.
//!
//! Generated from dialects.json and normalization_overrides.toml
//!
//! This module defines how SQL identifiers (table names, column names, etc.)
//! should be normalized for comparison. Different SQL dialects have different
//! rules for identifier case sensitivity.
use std::borrow::Cow;
use crate::Dialect;
/// Normalization strategy for identifier handling.
///
/// SQL dialects differ in how they handle identifier case. This enum represents
/// the different strategies used for normalizing identifiers during analysis.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NormalizationStrategy {
/// Fold to lowercase (Postgres, Redshift)
Lowercase,
/// Fold to uppercase (Snowflake, Oracle)
Uppercase,
/// Case-insensitive comparison without folding
CaseInsensitive,
/// Case-sensitive, preserve exactly
CaseSensitive,
}
impl NormalizationStrategy {
/// Applies this normalization strategy to a string.
///
/// Returns a `Cow<str>` to avoid allocation when no transformation is needed
/// (i.e., for `CaseSensitive` strategy or when the string is already in the
/// correct case).
///
/// For `CaseInsensitive`, lowercase folding is used as the canonical form.
///
/// # Example
///
/// ```
/// use std::borrow::Cow;
/// use flowscope_core::generated::NormalizationStrategy;
///
/// let strategy = NormalizationStrategy::Lowercase;
/// assert_eq!(strategy.apply("MyTable"), "mytable");
///
/// // CaseSensitive returns a borrowed reference (no allocation)
/// let strategy = NormalizationStrategy::CaseSensitive;
/// assert!(matches!(strategy.apply("MyTable"), Cow::Borrowed(_)));
/// ```
pub fn apply<'a>(&self, s: &'a str) -> Cow<'a, str> {
match self {
Self::CaseSensitive => Cow::Borrowed(s),
Self::Lowercase | Self::CaseInsensitive => {
// Optimization: only allocate if the string contains uppercase chars
if s.chars().any(|c| c.is_uppercase()) {
Cow::Owned(s.to_lowercase())
} else {
Cow::Borrowed(s)
}
}
Self::Uppercase => {
// Optimization: only allocate if the string contains lowercase chars
if s.chars().any(|c| c.is_lowercase()) {
Cow::Owned(s.to_uppercase())
} else {
Cow::Borrowed(s)
}
}
}
}
}
impl Dialect {
/// Get the normalization strategy for this dialect.
pub const fn normalization_strategy(&self) -> NormalizationStrategy {
match self {
"#,
);
for (dialect, spec) in dialects {
if let Some(variant) = dialect_to_variant(dialect) {
let strategy = match spec.normalization.as_str() {
"lowercase" => "NormalizationStrategy::Lowercase",
"uppercase" => "NormalizationStrategy::Uppercase",
"case_insensitive" => "NormalizationStrategy::CaseInsensitive",
"case_sensitive" => "NormalizationStrategy::CaseSensitive",
other => {
println!(
"cargo:warning=Unknown normalization '{other}' for dialect '{dialect}', using CaseInsensitive"
);
"NormalizationStrategy::CaseInsensitive"
}
};
code.push_str(&format!(" Dialect::{variant} => {strategy},\n"));
}
}
code.push_str(" Dialect::Generic => NormalizationStrategy::CaseInsensitive,\n");
code.push_str(" Dialect::Ansi => NormalizationStrategy::Uppercase,\n");
code.push_str(" }\n }\n\n");
let custom_dialects: Vec<_> = overrides
.iter()
.filter(|(_, o)| o.has_custom_normalization)
.map(|(d, _)| d)
.collect();
if custom_dialects.is_empty() {
code.push_str(
r#" /// Returns true if this dialect has custom normalization logic
/// that cannot be captured by a simple strategy.
pub const fn has_custom_normalization(&self) -> bool {
false
}
"#,
);
} else {
let variants: Vec<_> = custom_dialects
.iter()
.filter_map(|d| dialect_to_variant(d))
.map(|v| format!("Dialect::{v}"))
.collect();
code.push_str(&format!(
r#" /// Returns true if this dialect has custom normalization logic
/// that cannot be captured by a simple strategy.
pub const fn has_custom_normalization(&self) -> bool {{
matches!(self, {})
}}
"#,
variants.join(" | ")
));
}
code.push_str(
r#"
/// Get pseudocolumns for this dialect (implicit columns like _PARTITIONTIME).
pub fn pseudocolumns(&self) -> &'static [&'static str] {
match self {
"#,
);
for (dialect, spec) in dialects {
if !spec.pseudocolumns.is_empty() {
if let Some(variant) = dialect_to_variant(dialect) {
let cols: Vec<_> = spec
.pseudocolumns
.iter()
.map(|s| format!("\"{s}\""))
.collect();
let cols_str = cols.join(", ");
code.push_str(&format!(
" Dialect::{variant} => &[{cols_str}],\n"
));
}
}
}
code.push_str(" _ => &[],\n");
code.push_str(" }\n }\n");
code.push_str(
r#"
/// Get the identifier quote characters for this dialect.
/// Note: Some dialects use paired quotes (like SQLite's []) which are represented
/// as single characters here - the opening bracket.
pub fn identifier_quotes(&self) -> &'static [&'static str] {
match self {
"#,
);
for (dialect, spec) in dialects {
if let Some(ref qc) = spec.quote_chars {
if !qc.identifier_quotes.is_empty() {
if let Some(variant) = dialect_to_variant(dialect) {
let quotes: Vec<_> = qc
.identifier_quotes
.iter()
.filter_map(|v| {
match v {
serde_json::Value::String(s) => {
let escaped = s.escape_default();
Some(format!("\"{escaped}\""))
}
serde_json::Value::Array(arr) => {
arr.first().and_then(|v| v.as_str()).map(|s| {
let escaped = s.escape_default();
format!("\"{escaped}\"")
})
}
_ => None,
}
})
.collect();
if !quotes.is_empty() {
let quotes_str = quotes.join(", ");
code.push_str(&format!(
" Dialect::{variant} => &[{quotes_str}],\n"
));
}
}
}
}
}
code.push_str(" _ => &[\"\\\"\"],\n"); code.push_str(" }\n }\n}\n");
write_if_changed(&dir.join("case_sensitivity.rs"), &code)
}
fn generate_scoping_rules(
dir: &Path,
rules: &BTreeMap<String, ScopingRule>,
) -> Result<(), Box<dyn Error>> {
let mut code = String::from(
r#"//! Alias visibility and scoping rules per dialect.
//!
//! Generated from scoping_rules.toml
use crate::Dialect;
impl Dialect {
/// Whether SELECT aliases can be referenced in GROUP BY.
pub const fn alias_in_group_by(&self) -> bool {
match self {
"#,
);
for (dialect, rule) in rules {
if let Some(variant) = dialect_to_variant(dialect) {
let val = rule.alias_in_group_by;
code.push_str(&format!(" Dialect::{variant} => {val},\n"));
}
}
code.push_str(" _ => false, // Default: strict (Postgres-like)\n");
code.push_str(" }\n }\n\n");
code.push_str(
r#" /// Whether SELECT aliases can be referenced in HAVING.
pub const fn alias_in_having(&self) -> bool {
match self {
"#,
);
for (dialect, rule) in rules {
if let Some(variant) = dialect_to_variant(dialect) {
let val = rule.alias_in_having;
code.push_str(&format!(" Dialect::{variant} => {val},\n"));
}
}
code.push_str(" _ => false,\n");
code.push_str(" }\n }\n\n");
code.push_str(
r#" /// Whether SELECT aliases can be referenced in ORDER BY.
pub const fn alias_in_order_by(&self) -> bool {
match self {
"#,
);
for (dialect, rule) in rules {
if let Some(variant) = dialect_to_variant(dialect) {
let val = rule.alias_in_order_by;
code.push_str(&format!(" Dialect::{variant} => {val},\n"));
}
}
code.push_str(" _ => true, // ORDER BY alias is widely supported\n");
code.push_str(" }\n }\n\n");
code.push_str(
r#" /// Whether lateral column aliases are supported (referencing earlier SELECT items).
pub const fn lateral_column_alias(&self) -> bool {
match self {
"#,
);
for (dialect, rule) in rules {
if let Some(variant) = dialect_to_variant(dialect) {
let val = rule.lateral_column_alias;
code.push_str(&format!(" Dialect::{variant} => {val},\n"));
}
}
code.push_str(" _ => false,\n");
code.push_str(" }\n }\n}\n");
write_if_changed(&dir.join("scoping_rules.rs"), &code)
}
fn generate_function_rules(dir: &Path, behavior: &DialectBehavior) -> Result<(), Box<dyn Error>> {
let mut code = String::from(
r#"//! Function argument handling rules per dialect.
//!
//! Generated from dialect_behavior.toml
//!
//! This module provides dialect-aware rules for function argument handling,
//! particularly for date/time functions where certain arguments are keywords
//! (like `YEAR`, `MONTH`) rather than column references.
use crate::Dialect;
/// Returns argument indices to skip when extracting column references from a function call.
///
/// Certain SQL functions take keyword arguments (e.g., `DATEDIFF(YEAR, start, end)` in Snowflake)
/// that should not be treated as column references during lineage analysis. This function
/// returns the indices of such arguments for the given function and dialect.
///
/// # Arguments
///
/// * `dialect` - The SQL dialect being analyzed
/// * `func_name` - The function name (case-insensitive, underscore-insensitive)
///
/// # Returns
///
/// A slice of argument indices (0-based) to skip. Returns an empty slice for
/// unknown functions or functions without skip rules.
///
/// # Example
///
/// ```ignore
/// // In Snowflake, DATEDIFF takes a unit as the first argument
/// let skip = skip_args_for_function(Dialect::Snowflake, "DATEDIFF");
/// assert_eq!(skip, &[0]); // Skip first argument (the unit)
///
/// // Both DATEADD and DATE_ADD match the same rules
/// let skip1 = skip_args_for_function(Dialect::Snowflake, "DATEADD");
/// let skip2 = skip_args_for_function(Dialect::Snowflake, "DATE_ADD");
/// assert_eq!(skip1, skip2);
/// ```
pub fn skip_args_for_function(dialect: Dialect, func_name: &str) -> &'static [usize] {
// Normalize: lowercase and remove underscores to handle both DATEADD and DATE_ADD variants
let func_normalized: String = func_name
.chars()
.filter(|c| *c != '_')
.map(|c| c.to_ascii_lowercase())
.collect();
match func_normalized.as_str() {
"#,
);
for (func_name, dialect_rules) in &behavior.date_functions {
let func_normalized: String = func_name
.chars()
.filter(|c| *c != '_')
.flat_map(|c| c.to_lowercase())
.collect();
let has_default = dialect_rules.contains_key("_default");
let dialect_specific_rules: Vec<_> = dialect_rules
.iter()
.filter(|(d, _)| *d != "_default" && dialect_to_variant(d).is_some())
.collect();
if dialect_specific_rules.is_empty() {
if has_default {
let default_indices = parse_skip_indices(dialect_rules.get("_default").unwrap());
if default_indices.is_empty() {
code.push_str(&format!(" \"{func_normalized}\" => &[],\n"));
} else {
let idx_str = default_indices
.iter()
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join(", ");
code.push_str(&format!(" \"{func_normalized}\" => &[{idx_str}],\n"));
}
} else {
code.push_str(&format!(" \"{func_normalized}\" => &[],\n"));
}
continue;
}
code.push_str(&format!(
" \"{func_normalized}\" => match dialect {{\n"
));
for (dialect, value) in dialect_rules {
if dialect == "_default" {
continue;
}
if let Some(variant) = dialect_to_variant(dialect) {
let indices = parse_skip_indices(value);
if indices.is_empty() {
code.push_str(&format!(" Dialect::{variant} => &[],\n"));
} else {
let idx_str = indices
.iter()
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join(", ");
code.push_str(&format!(
" Dialect::{variant} => &[{idx_str}],\n"
));
}
}
}
if has_default {
let default_indices = parse_skip_indices(dialect_rules.get("_default").unwrap());
if default_indices.is_empty() {
code.push_str(" _ => &[],\n");
} else {
let idx_str = default_indices
.iter()
.map(|i| i.to_string())
.collect::<Vec<_>>()
.join(", ");
code.push_str(&format!(" _ => &[{idx_str}],\n"));
}
} else {
code.push_str(" _ => &[],\n");
}
code.push_str(" },\n");
}
code.push_str(" _ => &[],\n");
code.push_str(" }\n}\n\n");
code.push_str(
r#"
/// NULL ordering behavior in ORDER BY.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NullOrdering {
/// NULLs sort as larger than all other values (NULLS LAST for ASC)
NullsAreLarge,
/// NULLs sort as smaller than all other values (NULLS FIRST for ASC)
NullsAreSmall,
/// NULLs always sort last regardless of ASC/DESC
NullsAreLast,
}
impl Dialect {
/// Get the default NULL ordering behavior for this dialect.
pub const fn null_ordering(&self) -> NullOrdering {
match self {
"#,
);
for (dialect, ordering) in &behavior.null_ordering {
if let Some(variant) = dialect_to_variant(dialect) {
let ordering_variant = match ordering.as_str() {
"nulls_are_large" => "NullOrdering::NullsAreLarge",
"nulls_are_small" => "NullOrdering::NullsAreSmall",
"nulls_are_last" => "NullOrdering::NullsAreLast",
_ => "NullOrdering::NullsAreLast",
};
code.push_str(&format!(
" Dialect::{variant} => {ordering_variant},\n"
));
}
}
code.push_str(" _ => NullOrdering::NullsAreLast,\n");
code.push_str(" }\n }\n\n");
let implicit_variants: Vec<_> = behavior
.unnest
.implicit_unnest
.iter()
.filter_map(|d| dialect_to_variant(d))
.map(|v| format!("Dialect::{v}"))
.collect();
if implicit_variants.is_empty() {
code.push_str(
r#" /// Whether this dialect supports implicit UNNEST (no CROSS JOIN needed).
pub const fn supports_implicit_unnest(&self) -> bool {
false
}
}
"#,
);
} else {
code.push_str(&format!(
r#" /// Whether this dialect supports implicit UNNEST (no CROSS JOIN needed).
pub const fn supports_implicit_unnest(&self) -> bool {{
matches!(self, {})
}}
}}
"#,
implicit_variants.join(" | ")
));
}
generate_value_table_function(&mut code, behavior);
write_if_changed(&dir.join("function_rules.rs"), &code)
}
fn generate_value_table_function(code: &mut String, behavior: &DialectBehavior) {
let vtf = &behavior.value_table_functions;
let common_funcs: Vec<_> = vtf
.common
.iter()
.map(|s| format!("\"{}\"", s.to_ascii_uppercase()))
.collect();
code.push_str(
r#"
/// Checks if a function is a value table function (returns rows) for the given dialect.
///
/// Value table functions (like UNNEST, GENERATE_SERIES, FLATTEN) return rows/tables
/// rather than scalar values. This classification is used during lineage analysis
/// to determine how FROM clause function calls should be handled.
///
/// # Arguments
///
/// * `dialect` - The SQL dialect being analyzed
/// * `func_name` - The function name (case-insensitive)
///
/// # Returns
///
/// `true` if the function is a value table function for the given dialect.
///
/// # Example
///
/// ```ignore
/// use flowscope_core::generated::is_value_table_function;
/// use flowscope_core::Dialect;
///
/// assert!(is_value_table_function(Dialect::Postgres, "UNNEST"));
/// assert!(is_value_table_function(Dialect::Snowflake, "FLATTEN"));
/// assert!(!is_value_table_function(Dialect::Postgres, "COUNT"));
/// ```
pub fn is_value_table_function(dialect: Dialect, func_name: &str) -> bool {
let name = func_name.to_ascii_uppercase();
// Check common functions
"#,
);
if common_funcs.is_empty() {
code.push_str(" // No common value table functions defined\n");
} else {
code.push_str(&format!(
" if matches!(name.as_str(), {}) {{\n return true;\n }}\n",
common_funcs.join(" | ")
));
}
code.push_str(" // Check dialect-specific functions\n");
code.push_str(" match dialect {\n");
let dialect_configs = [
("postgres", &vtf.postgres, "Postgres"),
("bigquery", &vtf.bigquery, "Bigquery"),
("snowflake", &vtf.snowflake, "Snowflake"),
("redshift", &vtf.redshift, "Redshift"),
("mysql", &vtf.mysql, "Mysql"),
("mssql", &vtf.mssql, "Mssql"),
("duckdb", &vtf.duckdb, "Duckdb"),
("clickhouse", &vtf.clickhouse, "Clickhouse"),
("databricks", &vtf.databricks, "Databricks"),
("hive", &vtf.hive, "Hive"),
("sqlite", &vtf.sqlite, "Sqlite"),
];
for (_dialect_name, funcs, variant) in dialect_configs {
let dialect_specific: Vec<_> = funcs
.iter()
.filter(|f| !vtf.common.iter().any(|c| c.eq_ignore_ascii_case(f)))
.map(|s| format!("\"{}\"", s.to_ascii_uppercase()))
.collect();
if !dialect_specific.is_empty() {
code.push_str(&format!(
" Dialect::{} => matches!(name.as_str(), {}),\n",
variant,
dialect_specific.join(" | ")
));
}
}
code.push_str(" _ => false,\n");
code.push_str(" }\n}\n");
}
fn class_name_to_sql_name(class_name: &str) -> String {
let mut result = String::new();
let mut prev_was_upper = false;
let mut prev_was_letter = false;
for (i, c) in class_name.chars().enumerate() {
if c.is_uppercase() {
if i > 0 && prev_was_letter {
let next_is_lower = class_name
.chars()
.nth(i + 1)
.is_some_and(|nc| nc.is_lowercase());
if !prev_was_upper || next_is_lower {
result.push('_');
}
}
result.push(c.to_ascii_lowercase());
prev_was_upper = true;
} else {
result.push(c.to_ascii_lowercase());
prev_was_upper = false;
}
prev_was_letter = c.is_alphabetic();
}
result
}
struct FunctionSigData {
name: String,
display_name: String, params: Vec<(String, bool)>, return_rule: Option<String>,
category: String, }
fn generate_functions(
dir: &Path,
functions: &BTreeMap<String, FunctionDef>,
) -> Result<(), Box<dyn Error>> {
let mut aggregates: BTreeSet<String> = BTreeSet::new();
let mut windows: BTreeSet<String> = BTreeSet::new();
let mut udtfs: BTreeSet<String> = BTreeSet::new();
let mut return_types: BTreeMap<String, String> = BTreeMap::new();
let mut signatures: Vec<FunctionSigData> = Vec::new();
for def in functions.values() {
let sql_name = class_name_to_sql_name(&def.class);
let category = if def.categories.contains(&"aggregate".to_string()) {
"aggregate"
} else if def.categories.contains(&"window".to_string()) {
"window"
} else {
"scalar"
};
for cat in &def.categories {
match cat.as_str() {
"aggregate" => {
aggregates.insert(sql_name.clone());
}
"window" => {
windows.insert(sql_name.clone());
}
"udtf" => {
udtfs.insert(sql_name.clone());
}
_ => {}
}
}
if let Some(ref rt) = def.return_type {
return_types.insert(sql_name.clone(), rt.rule.clone());
}
let skip_params = [
"bracket_notation",
"struct_name_inheritance",
"ensure_variant",
"nulls_excluded",
];
let params: Vec<(String, bool)> = def
.arg_types
.iter()
.filter(|(k, _)| !skip_params.contains(&k.as_str()))
.map(|(k, v)| {
let required = v.as_bool().unwrap_or(false);
(k.clone(), required)
})
.collect();
signatures.push(FunctionSigData {
name: sql_name.clone(),
display_name: sql_name.to_ascii_uppercase(),
params,
return_rule: def.return_type.as_ref().map(|rt| rt.rule.clone()),
category: category.to_string(),
});
}
let mut code = String::from(
r#"//! Function classification sets.
//!
//! Generated from functions.json
//!
//! This module provides sets of SQL function names categorized by their behavior
//! (aggregate, window, table-generating). These classifications are used during
//! lineage analysis to determine how expressions should be analyzed.
use std::collections::HashSet;
use std::sync::LazyLock;
"#,
);
let agg_count = aggregates.len();
code.push_str(&format!("/// Aggregate functions ({agg_count} total).\n"));
code.push_str(
"pub static AGGREGATE_FUNCTIONS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {\n",
);
code.push_str(" let mut set = HashSet::new();\n");
for func in &aggregates {
code.push_str(&format!(" set.insert(\"{func}\");\n"));
}
code.push_str(" set\n});\n\n");
let win_count = windows.len();
code.push_str(&format!("/// Window functions ({win_count} total).\n"));
code.push_str(
"pub static WINDOW_FUNCTIONS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {\n",
);
code.push_str(" let mut set = HashSet::new();\n");
for func in &windows {
code.push_str(&format!(" set.insert(\"{func}\");\n"));
}
code.push_str(" set\n});\n\n");
let udtf_count = udtfs.len();
code.push_str(&format!(
"/// Table-generating functions / UDTFs ({udtf_count} total).\n"
));
code.push_str(
"pub static UDTF_FUNCTIONS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {\n",
);
code.push_str(" let mut set = HashSet::new();\n");
for func in &udtfs {
code.push_str(&format!(" set.insert(\"{func}\");\n"));
}
code.push_str(" set\n});\n\n");
code.push_str(
r#"/// Checks if a function is an aggregate function (e.g., SUM, COUNT, AVG).
///
/// Aggregate functions combine multiple input rows into a single output value.
/// This classification is used to detect aggregation in SELECT expressions
/// and validate GROUP BY semantics.
///
/// The check is case-insensitive. Uses ASCII lowercase for performance since
/// SQL function names are always ASCII.
pub fn is_aggregate_function(name: &str) -> bool {
// SQL function names are ASCII, so we can use the faster ASCII lowercase
let lower = name.to_ascii_lowercase();
AGGREGATE_FUNCTIONS.contains(lower.as_str())
}
/// Checks if a function is a window function (e.g., ROW_NUMBER, RANK, LAG).
///
/// Window functions perform calculations across a set of rows related to
/// the current row, without collapsing them into a single output.
///
/// The check is case-insensitive. Uses ASCII lowercase for performance since
/// SQL function names are always ASCII.
pub fn is_window_function(name: &str) -> bool {
// SQL function names are ASCII, so we can use the faster ASCII lowercase
let lower = name.to_ascii_lowercase();
WINDOW_FUNCTIONS.contains(lower.as_str())
}
/// Checks if a function is a table-generating function / UDTF (e.g., UNNEST, EXPLODE).
///
/// UDTFs return multiple rows for each input row, expanding the result set.
/// This classification affects how lineage is tracked through these functions.
///
/// The check is case-insensitive. Uses ASCII lowercase for performance since
/// SQL function names are always ASCII.
pub fn is_udtf_function(name: &str) -> bool {
// SQL function names are ASCII, so we can use the faster ASCII lowercase
let lower = name.to_ascii_lowercase();
UDTF_FUNCTIONS.contains(lower.as_str())
}
"#,
);
code.push_str(
r#"
/// Return type rule for function type inference.
///
/// This enum represents the different strategies for determining a function's
/// return type during type inference in SQL analysis.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ReturnTypeRule {
/// Returns Integer (e.g., COUNT, ROW_NUMBER)
Integer,
/// Returns Number (e.g., SUM, AVG)
Numeric,
/// Returns Text (e.g., CONCAT, SUBSTRING)
Text,
/// Returns Timestamp (e.g., NOW, CURRENT_TIMESTAMP)
Timestamp,
/// Returns Boolean (e.g., AND, OR)
Boolean,
/// Returns Date (e.g., CURRENT_DATE)
Date,
/// Returns same type as first argument (e.g., MIN, MAX, COALESCE)
MatchFirstArg,
}
/// Infers the return type rule for a SQL function.
///
/// This function returns the return type rule for known SQL functions,
/// enabling data-driven type inference. The check is case-insensitive.
///
/// # Arguments
///
/// * `name` - The function name (case-insensitive)
///
/// # Returns
///
/// `Some(ReturnTypeRule)` if the function has a known return type rule,
/// `None` otherwise (fallback to existing logic).
///
/// # Example
///
/// ```ignore
/// use flowscope_core::generated::infer_function_return_type;
///
/// assert_eq!(infer_function_return_type("COUNT"), Some(ReturnTypeRule::Integer));
/// assert_eq!(infer_function_return_type("MIN"), Some(ReturnTypeRule::MatchFirstArg));
/// assert_eq!(infer_function_return_type("UNKNOWN_FUNC"), None);
/// ```
pub fn infer_function_return_type(name: &str) -> Option<ReturnTypeRule> {
// SQL function names are ASCII, so we can use the faster ASCII lowercase
let lower = name.to_ascii_lowercase();
match lower.as_str() {
"#,
);
for (func_name, rule) in &return_types {
let variant = match rule.as_str() {
"integer" => "ReturnTypeRule::Integer",
"numeric" => "ReturnTypeRule::Numeric",
"text" => "ReturnTypeRule::Text",
"timestamp" => "ReturnTypeRule::Timestamp",
"boolean" => "ReturnTypeRule::Boolean",
"date" => "ReturnTypeRule::Date",
"match_first_arg" => "ReturnTypeRule::MatchFirstArg",
unknown => {
println!(
"cargo:warning=Unknown return_type rule '{unknown}' for function '{func_name}'"
);
continue;
}
};
code.push_str(&format!(" \"{func_name}\" => Some({variant}),\n"));
}
code.push_str(
r#" _ => None,
}
}
"#,
);
code.push_str(
r#"
/// Function category for completion context filtering.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FunctionCategory {
/// Aggregate functions (SUM, COUNT, AVG, etc.)
Aggregate,
/// Window functions (ROW_NUMBER, RANK, LAG, etc.)
Window,
/// Scalar functions (LOWER, CONCAT, ABS, etc.)
Scalar,
}
/// Function parameter information for completion.
#[derive(Debug, Clone)]
pub struct FunctionParam {
/// Parameter name (e.g., "expression", "separator")
pub name: &'static str,
/// Whether this parameter is required
pub required: bool,
}
/// Function signature for smart completion.
///
/// Contains all metadata needed to display rich function completions
/// with parameter hints and return type information.
#[derive(Debug, Clone)]
pub struct FunctionSignature {
/// Function name in lowercase (for lookup)
pub name: &'static str,
/// Display name in uppercase (for completion label)
pub display_name: &'static str,
/// Function parameters
pub params: &'static [FunctionParam],
/// Return type rule, if known
pub return_type: Option<ReturnTypeRule>,
/// Function category
pub category: FunctionCategory,
}
impl FunctionSignature {
/// Formats the function signature as "NAME(params) → TYPE"
pub fn format_signature(&self) -> String {
let params_str = self.params
.iter()
.map(|p| {
if p.required {
p.name.to_string()
} else {
format!("[{}]", p.name)
}
})
.collect::<Vec<_>>()
.join(", ");
let return_str = self.return_type
.map(|rt| format!(" → {}", match rt {
ReturnTypeRule::Integer => "INTEGER",
ReturnTypeRule::Numeric => "NUMERIC",
ReturnTypeRule::Text => "TEXT",
ReturnTypeRule::Timestamp => "TIMESTAMP",
ReturnTypeRule::Boolean => "BOOLEAN",
ReturnTypeRule::Date => "DATE",
ReturnTypeRule::MatchFirstArg => "T",
}))
.unwrap_or_default();
format!("{}({}){}", self.display_name, params_str, return_str)
}
}
"#,
);
code.push_str("/// Static function parameter definitions.\n");
for sig in &signatures {
if sig.params.is_empty() {
continue;
}
let params_name = format!("PARAMS_{}", sig.name.to_ascii_uppercase());
code.push_str(&format!("static {params_name}: &[FunctionParam] = &[\n"));
for (param_name, required) in &sig.params {
code.push_str(&format!(
" FunctionParam {{ name: \"{param_name}\", required: {required} }},\n"
));
}
code.push_str("];\n");
}
code.push_str(
r#"
/// Looks up a function signature by name.
///
/// Returns the complete function signature including parameters, return type,
/// and category. The lookup is case-insensitive.
///
/// # Arguments
///
/// * `name` - The function name (case-insensitive)
///
/// # Returns
///
/// `Some(FunctionSignature)` if the function is known, `None` otherwise.
pub fn get_function_signature(name: &str) -> Option<FunctionSignature> {
let lower = name.to_ascii_lowercase();
match lower.as_str() {
"#,
);
for sig in &signatures {
let params_ref = if sig.params.is_empty() {
"&[]".to_string()
} else {
format!("PARAMS_{}", sig.name.to_ascii_uppercase())
};
let return_type = sig
.return_rule
.as_ref()
.and_then(|r| match r.as_str() {
"integer" => Some("Some(ReturnTypeRule::Integer)"),
"numeric" => Some("Some(ReturnTypeRule::Numeric)"),
"text" => Some("Some(ReturnTypeRule::Text)"),
"timestamp" => Some("Some(ReturnTypeRule::Timestamp)"),
"boolean" => Some("Some(ReturnTypeRule::Boolean)"),
"date" => Some("Some(ReturnTypeRule::Date)"),
"match_first_arg" => Some("Some(ReturnTypeRule::MatchFirstArg)"),
_ => None,
})
.unwrap_or("None");
let category = match sig.category.as_str() {
"aggregate" => "FunctionCategory::Aggregate",
"window" => "FunctionCategory::Window",
_ => "FunctionCategory::Scalar",
};
code.push_str(&format!(
r#" "{}" => Some(FunctionSignature {{
name: "{}",
display_name: "{}",
params: {},
return_type: {},
category: {},
}}),
"#,
sig.name, sig.name, sig.display_name, params_ref, return_type, category
));
}
code.push_str(
r#" _ => None,
}
}
/// Returns all function signatures for completion.
///
/// This provides access to all known SQL functions for populating
/// completion lists. Functions are returned in a static slice for efficiency.
pub fn all_function_signatures() -> impl Iterator<Item = FunctionSignature> {
static NAMES: &[&str] = &[
"#,
);
for sig in &signatures {
code.push_str(&format!(" \"{}\",\n", sig.name));
}
code.push_str(
r#" ];
NAMES.iter().filter_map(|name| get_function_signature(name))
}
"#,
);
write_if_changed(&dir.join("functions.rs"), &code)
}
fn generate_type_system(dir: &Path, type_system: &TypeSystem) -> Result<(), Box<dyn Error>> {
let mut code = String::from(
r#"//! SQL Type System for cross-dialect type normalization and compatibility.
//!
//! Generated from type_system.toml
//!
//! This module provides canonical SQL types, type normalization from dialect-specific
//! names to canonical types, implicit cast checking, and dialect-specific type name mapping.
use crate::Dialect;
use std::fmt;
/// Canonical SQL types for cross-dialect type system.
///
/// These represent the fundamental SQL type categories that can be mapped
/// from various dialect-specific type names.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum CanonicalType {
Integer,
Float,
Text,
Boolean,
Timestamp,
Date,
Time,
Binary,
Json,
Array,
}
impl CanonicalType {
/// Returns the canonical type name as a lowercase string.
pub const fn as_str(&self) -> &'static str {
match self {
CanonicalType::Integer => "integer",
CanonicalType::Float => "float",
CanonicalType::Text => "text",
CanonicalType::Boolean => "boolean",
CanonicalType::Timestamp => "timestamp",
CanonicalType::Date => "date",
CanonicalType::Time => "time",
CanonicalType::Binary => "binary",
CanonicalType::Json => "json",
CanonicalType::Array => "array",
}
}
/// Returns the canonical type name as an uppercase SQL-standard string.
///
/// This is more efficient than `to_string()` when you only need a static string,
/// as it avoids heap allocation.
pub const fn as_uppercase_str(&self) -> &'static str {
match self {
CanonicalType::Integer => "INTEGER",
CanonicalType::Float => "FLOAT",
CanonicalType::Text => "TEXT",
CanonicalType::Boolean => "BOOLEAN",
CanonicalType::Timestamp => "TIMESTAMP",
CanonicalType::Date => "DATE",
CanonicalType::Time => "TIME",
CanonicalType::Binary => "BINARY",
CanonicalType::Json => "JSON",
CanonicalType::Array => "ARRAY",
}
}
}
impl fmt::Display for CanonicalType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Use uppercase for SQL-standard output representation
match self {
CanonicalType::Integer => write!(f, "INTEGER"),
CanonicalType::Float => write!(f, "FLOAT"),
CanonicalType::Text => write!(f, "TEXT"),
CanonicalType::Boolean => write!(f, "BOOLEAN"),
CanonicalType::Timestamp => write!(f, "TIMESTAMP"),
CanonicalType::Date => write!(f, "DATE"),
CanonicalType::Time => write!(f, "TIME"),
CanonicalType::Binary => write!(f, "BINARY"),
CanonicalType::Json => write!(f, "JSON"),
CanonicalType::Array => write!(f, "ARRAY"),
}
}
}
/// Normalize a type name to its canonical type.
///
/// This function maps any dialect-specific type alias (e.g., "INT64", "VARCHAR",
/// "TIMESTAMPTZ") to its canonical type category.
///
/// # Arguments
///
/// * `type_name` - The type name to normalize (case-insensitive)
///
/// # Returns
///
/// `Some(CanonicalType)` if the type name is recognized, `None` otherwise.
///
/// # Example
///
/// ```ignore
/// use flowscope_core::generated::normalize_type_name;
/// use flowscope_core::generated::CanonicalType;
///
/// assert_eq!(normalize_type_name("INT64"), Some(CanonicalType::Integer));
/// assert_eq!(normalize_type_name("varchar"), Some(CanonicalType::Text));
/// assert_eq!(normalize_type_name("UNKNOWN_TYPE"), None);
/// ```
pub fn normalize_type_name(type_name: &str) -> Option<CanonicalType> {
let lower = type_name.to_ascii_lowercase();
match lower.as_str() {
"#,
);
for (canonical_type, aliases) in &type_system.type_categories {
let variant = canonical_type_to_variant(canonical_type);
let alias_patterns: Vec<_> = aliases.iter().map(|a| format!("\"{}\"", a)).collect();
let patterns = alias_patterns.join(" | ");
code.push_str(&format!(
" {patterns} => Some(CanonicalType::{variant}),\n"
));
}
code.push_str(
r#" _ => None,
}
}
/// Check if a type can be implicitly cast to another type.
///
/// Implicit casts are automatic type conversions that SQL engines perform
/// without requiring explicit CAST expressions. This function returns true
/// if the source type can be implicitly converted to the target type.
///
/// Note: A type can always be implicitly cast to itself (identity cast).
///
/// # Arguments
///
/// * `from` - The source canonical type
/// * `to` - The target canonical type
///
/// # Returns
///
/// `true` if implicit cast is allowed, `false` otherwise.
///
/// # Example
///
/// ```ignore
/// use flowscope_core::generated::{can_implicitly_cast, CanonicalType};
///
/// // Integer can be cast to Float
/// assert!(can_implicitly_cast(CanonicalType::Integer, CanonicalType::Float));
/// // Float cannot be cast to Integer implicitly
/// assert!(!can_implicitly_cast(CanonicalType::Float, CanonicalType::Integer));
/// // Any type can be cast to itself
/// assert!(can_implicitly_cast(CanonicalType::Text, CanonicalType::Text));
/// ```
pub fn can_implicitly_cast(from: CanonicalType, to: CanonicalType) -> bool {
if from == to {
return true;
}
match from {
"#,
);
for (from_type, to_types) in &type_system.implicit_casts {
let from_variant = canonical_type_to_variant(from_type);
if to_types.is_empty() {
code.push_str(&format!(
" CanonicalType::{from_variant} => false,\n"
));
} else {
let to_variants: Vec<_> = to_types
.iter()
.map(|t| format!("CanonicalType::{}", canonical_type_to_variant(t)))
.collect();
let matches_pattern = to_variants.join(" | ");
code.push_str(&format!(
" CanonicalType::{from_variant} => matches!(to, {matches_pattern}),\n"
));
}
}
code.push_str(
r#" _ => false,
}
}
/// Get the dialect-specific type name for a canonical type.
///
/// This function returns the preferred type name for a given canonical type
/// in a specific SQL dialect.
///
/// # Arguments
///
/// * `dialect` - The SQL dialect
/// * `canonical` - The canonical type
///
/// # Returns
///
/// The dialect-specific type name as a static string.
///
/// # Example
///
/// ```ignore
/// use flowscope_core::generated::{dialect_type_name, CanonicalType};
/// use flowscope_core::Dialect;
///
/// assert_eq!(dialect_type_name(Dialect::Bigquery, CanonicalType::Integer), "INT64");
/// assert_eq!(dialect_type_name(Dialect::Postgres, CanonicalType::Text), "text");
/// ```
pub fn dialect_type_name(dialect: Dialect, canonical: CanonicalType) -> &'static str {
match canonical {
"#,
);
for (canonical_type, dialect_mapping) in &type_system.dialect_type_mapping {
let variant = canonical_type_to_variant(canonical_type);
code.push_str(&format!(
" CanonicalType::{variant} => match dialect {{\n"
));
for (dialect_name, type_name) in dialect_mapping {
if let Some(dialect_variant) = dialect_to_variant(dialect_name) {
code.push_str(&format!(
" Dialect::{dialect_variant} => \"{type_name}\",\n"
));
}
}
let default_name = canonical_type.to_uppercase();
code.push_str(&format!(" _ => \"{default_name}\",\n"));
code.push_str(" },\n");
}
let mapped_types: BTreeSet<_> = type_system.dialect_type_mapping.keys().collect();
let all_types = [
"integer",
"float",
"text",
"boolean",
"timestamp",
"date",
"time",
"binary",
"json",
"array",
];
for type_name in all_types {
if !mapped_types.contains(&type_name.to_string()) {
let variant = canonical_type_to_variant(type_name);
let default_name = type_name.to_uppercase();
code.push_str(&format!(
" CanonicalType::{variant} => \"{default_name}\",\n"
));
}
}
code.push_str(
r#" }
}
"#,
);
write_if_changed(&dir.join("type_system.rs"), &code)
}
fn canonical_type_to_variant(type_name: &str) -> &'static str {
match type_name.to_lowercase().as_str() {
"integer" => "Integer",
"float" => "Float",
"text" => "Text",
"boolean" => "Boolean",
"timestamp" => "Timestamp",
"date" => "Date",
"time" => "Time",
"binary" => "Binary",
"json" => "Json",
"array" => "Array",
other => {
println!("cargo:warning=Unknown type category '{other}' in type_system.toml, defaulting to Text");
"Text"
}
}
}
fn dialect_to_variant(dialect: &str) -> Option<&'static str> {
match dialect.to_lowercase().as_str() {
"bigquery" => Some("Bigquery"),
"clickhouse" => Some("Clickhouse"),
"databricks" => Some("Databricks"),
"duckdb" => Some("Duckdb"),
"hive" => Some("Hive"),
"mssql" | "tsql" => Some("Mssql"),
"mysql" => Some("Mysql"),
"postgres" => Some("Postgres"),
"redshift" => Some("Redshift"),
"snowflake" => Some("Snowflake"),
"sqlite" => Some("Sqlite"),
"doris" | "drill" | "oracle" | "presto" | "spark" | "starrocks" | "tableau"
| "teradata" | "trino" => None,
_ => {
println!("cargo:warning=Unknown dialect '{dialect}' in specs");
None
}
}
}
fn parse_skip_indices(value: &toml::Value) -> Vec<usize> {
match value {
toml::Value::Array(arr) => arr
.iter()
.filter_map(|v| v.as_integer().map(|i| i as usize))
.collect(),
_ => vec![],
}
}