use std::collections::{HashMap, HashSet};
use std::sync::OnceLock;
use sqlparser::keywords::{
Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX, RESERVED_FOR_COLUMN_ALIAS, RESERVED_FOR_TABLE_ALIAS,
};
use crate::sql::dialect::Dialect;
pub(super) fn is_keyword(ident: &str, dialect: &Dialect) -> bool {
let ident = ident.to_ascii_uppercase();
sql_keywords().contains(ident.as_str()) || dialect_keywords(dialect).contains(ident.as_str())
}
fn dialect_keywords(dialect: &Dialect) -> &'static HashSet<&'static str> {
match dialect {
Dialect::Redshift => redshift_keywords(),
_ => empty_keywords(),
}
}
fn empty_keywords() -> &'static HashSet<&'static str> {
static EMPTY: OnceLock<HashSet<&str>> = OnceLock::new();
EMPTY.get_or_init(HashSet::new)
}
fn redshift_keywords() -> &'static HashSet<&'static str> {
static REDSHIFT: OnceLock<HashSet<&str>> = OnceLock::new();
REDSHIFT.get_or_init(|| {
let mut m = HashSet::new();
m.extend(REDSHIFT_KEYWORDS);
m
})
}
fn sql_keywords() -> &'static HashSet<&'static str> {
static SQL_KEYWORDS: OnceLock<HashSet<&str>> = OnceLock::new();
SQL_KEYWORDS.get_or_init(|| {
let mut m = HashSet::new();
m.extend(SQLITE_KEYWORDS);
m.extend(POSTGRES_KEYWORDS);
m.extend(DUCKDB_KEYWORDS);
m.extend(BIGQUERY_KEYWORDS);
let reverse_index: HashMap<&Keyword, usize> = ALL_KEYWORDS_INDEX
.iter()
.enumerate()
.map(|(idx, kw)| (kw, idx))
.collect();
m.extend(
RESERVED_FOR_COLUMN_ALIAS
.iter()
.map(|x| ALL_KEYWORDS[reverse_index[x]]),
);
m.extend(
RESERVED_FOR_TABLE_ALIAS
.iter()
.map(|x| ALL_KEYWORDS[reverse_index[x]]),
);
m
})
}
const SQLITE_KEYWORDS: &[&str] = &[
"ABORT",
"ACTION",
"ADD",
"AFTER",
"ALL",
"ALTER",
"ALWAYS",
"ANALYZE",
"AND",
"AS",
"ASC",
"ATTACH",
"AUTOINCREMENT",
"BEFORE",
"BEGIN",
"BETWEEN",
"BY",
"CASCADE",
"CASE",
"CAST",
"CHECK",
"COLLATE",
"COLUMN",
"COMMIT",
"CONFLICT",
"CONSTRAINT",
"CREATE",
"CROSS",
"CURRENT",
"CURRENT_DATE",
"CURRENT_TIME",
"CURRENT_TIMESTAMP",
"DATABASE",
"DEFAULT",
"DEFERRABLE",
"DEFERRED",
"DELETE",
"DESC",
"DETACH",
"DISTINCT",
"DO",
"DROP",
"EACH",
"ELSE",
"END",
"ESCAPE",
"EXCEPT",
"EXCLUDE",
"EXCLUSIVE",
"EXISTS",
"EXPLAIN",
"FAIL",
"FILTER",
"FIRST",
"FOLLOWING",
"FOR",
"FOREIGN",
"FROM",
"FULL",
"GENERATED",
"GLOB",
"GROUP",
"GROUPS",
"HAVING",
"IF",
"IGNORE",
"IMMEDIATE",
"IN",
"INDEX",
"INDEXED",
"INITIALLY",
"INNER",
"INSERT",
"INSTEAD",
"INTERSECT",
"INTO",
"IS",
"ISNULL",
"JOIN",
"KEY",
"LAST",
"LEFT",
"LIKE",
"LIMIT",
"MATCH",
"MATERIALIZED",
"NATURAL",
"NO",
"NOT",
"NOTHING",
"NOTNULL",
"NULL",
"NULLS",
"OF",
"OFFSET",
"ON",
"OR",
"ORDER",
"OTHERS",
"OUTER",
"OVER",
"PARTITION",
"PLAN",
"PRAGMA",
"PRECEDING",
"PRIMARY",
"QUERY",
"RAISE",
"RANGE",
"RECURSIVE",
"REFERENCES",
"REGEXP",
"REINDEX",
"RELEASE",
"RENAME",
"REPLACE",
"RESTRICT",
"RETURNING",
"RIGHT",
"ROLLBACK",
"ROW",
"ROWS",
"SAVEPOINT",
"SELECT",
"SET",
"TABLE",
"TEMP",
"TEMPORARY",
"THEN",
"TIES",
"TO",
"TRANSACTION",
"TRIGGER",
"UNBOUNDED",
"UNION",
"UNIQUE",
"UPDATE",
"USING",
"VACUUM",
"VALUES",
"VIEW",
"VIRTUAL",
"WHEN",
"WHERE",
"WINDOW",
"WITH",
"WITHOUT",
];
const POSTGRES_KEYWORDS: &[&str] = &[
"ALL",
"ANALYSE",
"ANALYZE",
"AND",
"ANY",
"ARRAY",
"AS",
"ASC",
"ASYMMETRIC",
"AUTHORIZATION",
"BINARY",
"BOTH",
"CASE",
"CAST",
"CHECK",
"COLLATE",
"COLLATION",
"COLUMN",
"CONCURRENTLY",
"CONSTRAINT",
"CREATE",
"CROSS",
"CURRENT_CATALOG",
"CURRENT_DATE",
"CURRENT_ROLE",
"CURRENT_SCHEMA",
"CURRENT_TIME",
"CURRENT_TIMESTAMP",
"CURRENT_USER",
"DEFAULT",
"DEFERRABLE",
"DESC",
"DISTINCT",
"DO",
"ELSE",
"END",
"EXCEPT",
"FALSE",
"FETCH",
"FOR",
"FOREIGN",
"FREEZE",
"FROM",
"FULL",
"GRANT",
"GROUP",
"HAVING",
"ILIKE",
"IN",
"INITIALLY",
"INNER",
"INTERSECT",
"INTO",
"IS",
"ISNULL",
"JOIN",
"LATERAL",
"LEADING",
"LEFT",
"LIKE",
"LIMIT",
"LOCALTIME",
"LOCALTIMESTAMP",
"NATURAL",
"NOT",
"NOTNULL",
"NULL",
"OFFSET",
"ON",
"ONLY",
"OR",
"ORDER",
"OUTER",
"OVERLAPS",
"PLACING",
"PRIMARY",
"REFERENCES",
"RETURNING",
"RIGHT",
"SELECT",
"SESSION_USER",
"SIMILAR",
"SOME",
"SYMMETRIC",
"SYSTEM_USER",
"TABLE",
"TABLESAMPLE",
"THEN",
"TO",
"TRAILING",
"TRUE",
"UNION",
"UNIQUE",
"USER",
"USING",
"VARIADIC",
"VERBOSE",
"WHEN",
"WHERE",
"WINDOW",
"WITH",
];
const DUCKDB_KEYWORDS: &[&str] = &[
"ALL",
"ANALYSE",
"ANALYZE",
"AND",
"ANY",
"ARRAY",
"AS",
"ASC",
"ASYMMETRIC",
"BOTH",
"CASE",
"CAST",
"CHECK",
"COLLATE",
"COLUMN",
"CONSTRAINT",
"CREATE",
"DEFAULT",
"DEFERRABLE",
"DESC",
"DESCRIBE",
"DISTINCT",
"DO",
"ELSE",
"END",
"EXCEPT",
"FALSE",
"FETCH",
"FOR",
"FOREIGN",
"FROM",
"GRANT",
"GROUP",
"HAVING",
"IN",
"INITIALLY",
"INTERSECT",
"INTO",
"LATERAL",
"LEADING",
"LIMIT",
"NOT",
"NULL",
"OFFSET",
"ON",
"ONLY",
"OR",
"ORDER",
"PIVOT",
"PIVOT_LONGER",
"PIVOT_WIDER",
"PLACING",
"PRIMARY",
"QUALIFY",
"REFERENCES",
"RETURNING",
"SELECT",
"SHOW",
"SOME",
"SUMMARIZE",
"SYMMETRIC",
"TABLE",
"THEN",
"TO",
"TRAILING",
"TRUE",
"UNION",
"UNIQUE",
"UNPIVOT",
"USING",
"VARIADIC",
"WHEN",
"WHERE",
"WINDOW",
"WITH",
];
const BIGQUERY_KEYWORDS: &[&str] = &[
"ALL",
"AND",
"ANY",
"ARRAY",
"AS",
"ASC",
"ASSERT_ROWS_MODIFIED",
"AT",
"BETWEEN",
"BY",
"CASE",
"CAST",
"COLLATE",
"CONTAINS",
"CREATE",
"CROSS",
"CUBE",
"CURRENT",
"DEFAULT",
"DEFINE",
"DESC",
"DISTINCT",
"ELSE",
"END",
"ENUM",
"ESCAPE",
"EXCEPT",
"EXCLUDE",
"EXISTS",
"EXTRACT",
"FALSE",
"FETCH",
"FOLLOWING",
"FOR",
"FROM",
"FULL",
"GROUP",
"GROUPING",
"GROUPS",
"HASH",
"HAVING",
"IF",
"IGNORE",
"IN",
"INNER",
"INTERSECT",
"INTERVAL",
"INTO",
"IS",
"JOIN",
"LATERAL",
"LEFT",
"LIKE",
"LIMIT",
"LOOKUP",
"MERGE",
"NATURAL",
"NEW",
"NO",
"NOT",
"NULL",
"NULLS",
"OF",
"ON",
"OR",
"ORDER",
"OUTER",
"OVER",
"PARTITION",
"PRECEDING",
"PROTO",
"QUALIFY",
"RANGE",
"RECURSIVE",
"RESPECT",
"RIGHT",
"ROLLUP",
"ROWS",
"SELECT",
"SET",
"SOME",
"STRUCT",
"TABLESAMPLE",
"THEN",
"TO",
"TREAT",
"TRUE",
"UNBOUNDED",
"UNION",
"UNNEST",
"USING",
"WHEN",
"WHERE",
"WINDOW",
"WITH",
"WITHIN",
];
const REDSHIFT_KEYWORDS: &[&str] = &[
"AES128",
"AES256",
"ALLOWOVERWRITE",
"BACKUP",
"BLANKSASNULL",
"BYTEDICT",
"BZIP2",
"CREDENTIALS",
"DEFRAG",
"DEFLATE",
"DELTA",
"DELTA32K",
"EMPTYASNULL",
"ENCODE",
"ENCRYPT",
"ENCRYPTION",
"EXPLICIT",
"GLOBALDICT256",
"GLOBALDICT64K",
"GZIP",
"IDENTITY",
"LUN",
"LUNS",
"LZO",
"LZOP",
"MINUS",
"MOSTLY13",
"MOSTLY32",
"MOSTLY8",
"OFFLINE",
"OID",
"PARALLEL",
"PERCENT",
"PERMISSIONS",
"RAW",
"READRATIO",
"RECOVER",
"REJECTLOG",
"RESORT",
"RESTORE",
"SNAPSHOT",
"SYSDATE",
"SYSTEM",
"TAG",
"TDES",
"TEXT255",
"TEXT32K",
"TIME",
"TIMESTAMP",
"TOP",
"TRUNCATECOLUMNS",
"WALLET",
];
#[test]
fn test_sql_keywords() {
assert!(is_keyword("from", &Dialect::Generic));
assert!(is_keyword("user", &Dialect::Generic));
assert!(is_keyword("time", &Dialect::Redshift));
assert!(!is_keyword("time", &Dialect::Postgres));
assert!(!is_keyword("time", &Dialect::Generic));
}