Skip to main content

flowscope_core/generated/
case_sensitivity.rs

1//! Case sensitivity rules per dialect.
2//!
3//! Generated from dialects.json and normalization_overrides.toml
4//!
5//! This module defines how SQL identifiers (table names, column names, etc.)
6//! should be normalized for comparison. Different SQL dialects have different
7//! rules for identifier case sensitivity.
8
9use std::borrow::Cow;
10
11use crate::Dialect;
12
13/// Normalization strategy for identifier handling.
14///
15/// SQL dialects differ in how they handle identifier case. This enum represents
16/// the different strategies used for normalizing identifiers during analysis.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum NormalizationStrategy {
19    /// Fold to lowercase (Postgres, Redshift)
20    Lowercase,
21    /// Fold to uppercase (Snowflake, Oracle)
22    Uppercase,
23    /// Case-insensitive comparison without folding
24    CaseInsensitive,
25    /// Case-sensitive, preserve exactly
26    CaseSensitive,
27}
28
29impl NormalizationStrategy {
30    /// Applies this normalization strategy to a string.
31    ///
32    /// Returns a `Cow<str>` to avoid allocation when no transformation is needed
33    /// (i.e., for `CaseSensitive` strategy or when the string is already in the
34    /// correct case).
35    ///
36    /// For `CaseInsensitive`, lowercase folding is used as the canonical form.
37    ///
38    /// # Example
39    ///
40    /// ```
41    /// use std::borrow::Cow;
42    /// use flowscope_core::generated::NormalizationStrategy;
43    ///
44    /// let strategy = NormalizationStrategy::Lowercase;
45    /// assert_eq!(strategy.apply("MyTable"), "mytable");
46    ///
47    /// // CaseSensitive returns a borrowed reference (no allocation)
48    /// let strategy = NormalizationStrategy::CaseSensitive;
49    /// assert!(matches!(strategy.apply("MyTable"), Cow::Borrowed(_)));
50    /// ```
51    pub fn apply<'a>(&self, s: &'a str) -> Cow<'a, str> {
52        match self {
53            Self::CaseSensitive => Cow::Borrowed(s),
54            Self::Lowercase | Self::CaseInsensitive => {
55                // Optimization: only allocate if the string contains uppercase chars
56                if s.chars().any(|c| c.is_uppercase()) {
57                    Cow::Owned(s.to_lowercase())
58                } else {
59                    Cow::Borrowed(s)
60                }
61            }
62            Self::Uppercase => {
63                // Optimization: only allocate if the string contains lowercase chars
64                if s.chars().any(|c| c.is_lowercase()) {
65                    Cow::Owned(s.to_uppercase())
66                } else {
67                    Cow::Borrowed(s)
68                }
69            }
70        }
71    }
72}
73
74impl Dialect {
75    /// Get the normalization strategy for this dialect.
76    pub const fn normalization_strategy(&self) -> NormalizationStrategy {
77        match self {
78            Dialect::Bigquery => NormalizationStrategy::CaseInsensitive,
79            Dialect::Clickhouse => NormalizationStrategy::CaseSensitive,
80            Dialect::Databricks => NormalizationStrategy::CaseInsensitive,
81            Dialect::Duckdb => NormalizationStrategy::CaseInsensitive,
82            Dialect::Hive => NormalizationStrategy::CaseInsensitive,
83            Dialect::Mssql => NormalizationStrategy::CaseInsensitive,
84            Dialect::Mysql => NormalizationStrategy::CaseSensitive,
85            Dialect::Oracle => NormalizationStrategy::Uppercase,
86            Dialect::Postgres => NormalizationStrategy::Lowercase,
87            Dialect::Redshift => NormalizationStrategy::CaseInsensitive,
88            Dialect::Snowflake => NormalizationStrategy::Uppercase,
89            Dialect::Sqlite => NormalizationStrategy::CaseInsensitive,
90            Dialect::Generic => NormalizationStrategy::CaseInsensitive,
91            Dialect::Ansi => NormalizationStrategy::Uppercase,
92        }
93    }
94
95    /// Returns true if this dialect has custom normalization logic
96    /// that cannot be captured by a simple strategy.
97    pub const fn has_custom_normalization(&self) -> bool {
98        matches!(self, Dialect::Bigquery)
99    }
100
101    /// Get pseudocolumns for this dialect (implicit columns like _PARTITIONTIME).
102    pub fn pseudocolumns(&self) -> &'static [&'static str] {
103        match self {
104            Dialect::Bigquery => &[
105                "_FILE_NAME",
106                "_PARTITIONDATE",
107                "_PARTITIONTIME",
108                "_TABLE_SUFFIX",
109            ],
110            Dialect::Oracle => &[
111                "LEVEL",
112                "OBJECT_ID",
113                "OBJECT_VALUE",
114                "ROWID",
115                "ROWNUM",
116                "SYSDATE",
117                "SYSTIMESTAMP",
118            ],
119            Dialect::Snowflake => &["LEVEL"],
120            _ => &[],
121        }
122    }
123
124    /// Get pseudo-tables for this dialect (e.g., Oracle DUAL).
125    /// These tables are implicit and should not appear in lineage output.
126    pub fn pseudo_tables(&self) -> &'static [&'static str] {
127        match self {
128            Dialect::Oracle => &["DUAL"],
129            _ => &[],
130        }
131    }
132
133    /// Get the identifier quote characters for this dialect.
134    /// Note: Some dialects use paired quotes (like SQLite's []) which are represented
135    /// as single characters here - the opening bracket.
136    pub fn identifier_quotes(&self) -> &'static [&'static str] {
137        match self {
138            Dialect::Bigquery => &["`"],
139            Dialect::Clickhouse => &["\"", "`"],
140            Dialect::Databricks => &["`"],
141            Dialect::Duckdb => &["\""],
142            Dialect::Hive => &["`"],
143            Dialect::Mssql => &["[", "\""],
144            Dialect::Mysql => &["`"],
145            Dialect::Oracle => &["\""],
146            Dialect::Postgres => &["\""],
147            Dialect::Redshift => &["\""],
148            Dialect::Snowflake => &["\""],
149            Dialect::Sqlite => &["\"", "[", "`"],
150            _ => &["\""],
151        }
152    }
153}