flowscope_core/generated/case_sensitivity.rs
1//! Case sensitivity rules per dialect.
2//!
3//! Generated from dialects.json and normalization_overrides.toml
4//!
5//! This module defines how SQL identifiers (table names, column names, etc.)
6//! should be normalized for comparison. Different SQL dialects have different
7//! rules for identifier case sensitivity.
8
9use std::borrow::Cow;
10
11use crate::Dialect;
12
13/// Normalization strategy for identifier handling.
14///
15/// SQL dialects differ in how they handle identifier case. This enum represents
16/// the different strategies used for normalizing identifiers during analysis.
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum NormalizationStrategy {
19 /// Fold to lowercase (Postgres, Redshift)
20 Lowercase,
21 /// Fold to uppercase (Snowflake, Oracle)
22 Uppercase,
23 /// Case-insensitive comparison without folding
24 CaseInsensitive,
25 /// Case-sensitive, preserve exactly
26 CaseSensitive,
27}
28
29impl NormalizationStrategy {
30 /// Applies this normalization strategy to a string.
31 ///
32 /// Returns a `Cow<str>` to avoid allocation when no transformation is needed
33 /// (i.e., for `CaseSensitive` strategy or when the string is already in the
34 /// correct case).
35 ///
36 /// For `CaseInsensitive`, lowercase folding is used as the canonical form.
37 ///
38 /// # Example
39 ///
40 /// ```
41 /// use std::borrow::Cow;
42 /// use flowscope_core::generated::NormalizationStrategy;
43 ///
44 /// let strategy = NormalizationStrategy::Lowercase;
45 /// assert_eq!(strategy.apply("MyTable"), "mytable");
46 ///
47 /// // CaseSensitive returns a borrowed reference (no allocation)
48 /// let strategy = NormalizationStrategy::CaseSensitive;
49 /// assert!(matches!(strategy.apply("MyTable"), Cow::Borrowed(_)));
50 /// ```
51 pub fn apply<'a>(&self, s: &'a str) -> Cow<'a, str> {
52 match self {
53 Self::CaseSensitive => Cow::Borrowed(s),
54 Self::Lowercase | Self::CaseInsensitive => {
55 // Optimization: only allocate if the string contains uppercase chars
56 if s.chars().any(|c| c.is_uppercase()) {
57 Cow::Owned(s.to_lowercase())
58 } else {
59 Cow::Borrowed(s)
60 }
61 }
62 Self::Uppercase => {
63 // Optimization: only allocate if the string contains lowercase chars
64 if s.chars().any(|c| c.is_lowercase()) {
65 Cow::Owned(s.to_uppercase())
66 } else {
67 Cow::Borrowed(s)
68 }
69 }
70 }
71 }
72}
73
74impl Dialect {
75 /// Get the normalization strategy for this dialect.
76 pub const fn normalization_strategy(&self) -> NormalizationStrategy {
77 match self {
78 Dialect::Bigquery => NormalizationStrategy::CaseInsensitive,
79 Dialect::Clickhouse => NormalizationStrategy::CaseSensitive,
80 Dialect::Databricks => NormalizationStrategy::CaseInsensitive,
81 Dialect::Duckdb => NormalizationStrategy::CaseInsensitive,
82 Dialect::Hive => NormalizationStrategy::CaseInsensitive,
83 Dialect::Mssql => NormalizationStrategy::CaseInsensitive,
84 Dialect::Mysql => NormalizationStrategy::CaseSensitive,
85 Dialect::Oracle => NormalizationStrategy::Uppercase,
86 Dialect::Postgres => NormalizationStrategy::Lowercase,
87 Dialect::Redshift => NormalizationStrategy::CaseInsensitive,
88 Dialect::Snowflake => NormalizationStrategy::Uppercase,
89 Dialect::Sqlite => NormalizationStrategy::CaseInsensitive,
90 Dialect::Generic => NormalizationStrategy::CaseInsensitive,
91 Dialect::Ansi => NormalizationStrategy::Uppercase,
92 }
93 }
94
95 /// Returns true if this dialect has custom normalization logic
96 /// that cannot be captured by a simple strategy.
97 pub const fn has_custom_normalization(&self) -> bool {
98 matches!(self, Dialect::Bigquery)
99 }
100
101 /// Get pseudocolumns for this dialect (implicit columns like _PARTITIONTIME).
102 pub fn pseudocolumns(&self) -> &'static [&'static str] {
103 match self {
104 Dialect::Bigquery => &[
105 "_FILE_NAME",
106 "_PARTITIONDATE",
107 "_PARTITIONTIME",
108 "_TABLE_SUFFIX",
109 ],
110 Dialect::Oracle => &[
111 "LEVEL",
112 "OBJECT_ID",
113 "OBJECT_VALUE",
114 "ROWID",
115 "ROWNUM",
116 "SYSDATE",
117 "SYSTIMESTAMP",
118 ],
119 Dialect::Snowflake => &["LEVEL"],
120 _ => &[],
121 }
122 }
123
124 /// Get pseudo-tables for this dialect (e.g., Oracle DUAL).
125 /// These tables are implicit and should not appear in lineage output.
126 pub fn pseudo_tables(&self) -> &'static [&'static str] {
127 match self {
128 Dialect::Oracle => &["DUAL"],
129 _ => &[],
130 }
131 }
132
133 /// Get the identifier quote characters for this dialect.
134 /// Note: Some dialects use paired quotes (like SQLite's []) which are represented
135 /// as single characters here - the opening bracket.
136 pub fn identifier_quotes(&self) -> &'static [&'static str] {
137 match self {
138 Dialect::Bigquery => &["`"],
139 Dialect::Clickhouse => &["\"", "`"],
140 Dialect::Databricks => &["`"],
141 Dialect::Duckdb => &["\""],
142 Dialect::Hive => &["`"],
143 Dialect::Mssql => &["[", "\""],
144 Dialect::Mysql => &["`"],
145 Dialect::Oracle => &["\""],
146 Dialect::Postgres => &["\""],
147 Dialect::Redshift => &["\""],
148 Dialect::Snowflake => &["\""],
149 Dialect::Sqlite => &["\"", "[", "`"],
150 _ => &["\""],
151 }
152 }
153}