use crate::is_ident_byte;
use crate::sql_sem::{SqlSemanticVerb, SqlStatementModel, TableUsageKind, TableUse};
#[must_use]
pub fn resolve_sql(raw: &str) -> SqlStatementModel {
let trimmed = raw.trim_start();
let upper = crate::fact_emit::mask_string_literals(&trimmed.to_ascii_uppercase());
let verb = classify_verb(&upper);
let mut model = SqlStatementModel {
verb,
..SqlStatementModel::default()
};
match verb {
SqlSemanticVerb::Select => {
collect_from_and_joins(&upper, trimmed, &mut model, TableUsageKind::Read);
}
SqlSemanticVerb::Insert => {
for (s, t, a) in tables_after_keyword(&upper, trimmed, "INTO") {
add(&mut model, s, t, a, TableUsageKind::Write);
}
collect_from_and_joins(&upper, trimmed, &mut model, TableUsageKind::Read);
}
SqlSemanticVerb::Update => {
for (s, t, a) in tables_after_keyword(&upper, trimmed, "UPDATE") {
add(&mut model, s, t, a, TableUsageKind::Write);
}
collect_from_and_joins(&upper, trimmed, &mut model, TableUsageKind::Read);
}
SqlSemanticVerb::Delete => {
let target = delete_target(&upper, trimmed);
let target_key = target.as_ref().map(|(s, t, _)| (s.clone(), t.clone()));
if let Some((s, t, a)) = target {
add(&mut model, s, t, a, TableUsageKind::Write);
}
let mut target_consumed = false;
for (s, t, a) in tables_after_keyword(&upper, trimmed, "FROM") {
if !target_consumed && target_key.as_ref() == Some(&(s.clone(), t.clone())) {
target_consumed = true;
continue;
}
add(&mut model, s, t, a, TableUsageKind::Read);
}
for (s, t, a) in tables_after_keyword(&upper, trimmed, "JOIN") {
add(&mut model, s, t, a, TableUsageKind::Read);
}
}
SqlSemanticVerb::MergeUpdate
| SqlSemanticVerb::MergeInsert
| SqlSemanticVerb::MergeDelete => {
for (s, t, a) in tables_after_keyword(&upper, trimmed, "INTO") {
add(&mut model, s, t, a, TableUsageKind::ReadWrite);
}
for (s, t, a) in tables_after_keyword(&upper, trimmed, "USING") {
add(&mut model, s, t, a, TableUsageKind::Read);
}
}
}
model
}
fn classify_verb(upper: &str) -> SqlSemanticVerb {
if upper.starts_with("INSERT") {
SqlSemanticVerb::Insert
} else if upper.starts_with("UPDATE") {
SqlSemanticVerb::Update
} else if upper.starts_with("DELETE") {
SqlSemanticVerb::Delete
} else if upper.starts_with("MERGE") {
SqlSemanticVerb::MergeUpdate
} else {
SqlSemanticVerb::Select
}
}
fn collect_from_and_joins(
upper: &str,
raw: &str,
model: &mut SqlStatementModel,
usage: TableUsageKind,
) {
for (s, t, a) in tables_after_keyword(upper, raw, "FROM") {
add(model, s, t, a, usage);
}
for (s, t, a) in tables_after_keyword(upper, raw, "JOIN") {
add(model, s, t, a, usage);
}
}
fn add(
model: &mut SqlStatementModel,
schema: Option<String>,
table: String,
alias: String,
usage: TableUsageKind,
) {
if table.is_empty() || table == "DUAL" {
return;
}
let schema_str = schema.clone().unwrap_or_default();
let alias_key = if alias.is_empty() {
table.clone()
} else {
alias.clone()
};
model.alias_scope_bind(&alias_key, &schema_str, &table);
if !model
.tables
.iter()
.any(|tu| tu.schema == schema_str && tu.table == table && tu.usage == usage)
{
model.tables.push(TableUse {
schema: schema_str,
table,
alias,
usage,
});
}
}
fn tables_after_keyword(
upper: &str,
raw: &str,
keyword: &str,
) -> Vec<(Option<String>, String, String)> {
const STOP: &[&str] = &[
"WHERE",
"GROUP",
"ORDER",
"HAVING",
"SET",
"ON",
"USING",
"WHEN",
"VALUES",
"SELECT",
"CONNECT",
"START",
"UNION",
"MINUS",
"INTERSECT",
"FETCH",
"OFFSET",
];
let mut out = Vec::new();
let bytes = upper.as_bytes();
let kw = keyword.to_ascii_uppercase();
let mut search = 0;
while let Some(rel) = upper[search..].find(&kw) {
let abs = search + rel;
search = abs + kw.len();
let prev_ok = abs == 0 || !is_ident_byte(bytes[abs - 1]);
let after = abs + kw.len();
let next_ok = after >= bytes.len() || !is_ident_byte(bytes[after]);
if !(prev_ok && next_ok) {
continue;
}
let mut i = after;
loop {
while i < bytes.len() && (bytes[i].is_ascii_whitespace() || bytes[i] == b',') {
i += 1;
}
if i >= bytes.len() {
break;
}
let tok_start = i;
while i < bytes.len() && (is_ident_byte(bytes[i]) || bytes[i] == b'.') {
i += 1;
}
if i == tok_start {
break;
}
let token = &raw[tok_start..i];
let token_upper = token.to_ascii_uppercase();
if STOP.contains(&token_upper.as_str()) || token.starts_with('(') {
break;
}
let (schema, table) = match token_upper.rsplit_once('.') {
Some((s, t)) if !t.is_empty() => (Some(s.to_string()), t.to_string()),
_ => (None, token_upper.clone()),
};
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
let mut alias = String::new();
if i < bytes.len() && is_ident_byte(bytes[i]) {
let a_start = i;
while i < bytes.len() && is_ident_byte(bytes[i]) {
i += 1;
}
let cand = raw[a_start..i].to_string();
let cand_upper = cand.to_ascii_uppercase();
if STOP.contains(&cand_upper.as_str())
|| cand_upper == "JOIN"
|| cand_upper == "INNER"
|| cand_upper == "LEFT"
|| cand_upper == "RIGHT"
|| cand_upper == "FULL"
|| cand_upper == "CROSS"
{
i = a_start;
} else if cand_upper == "AS" {
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
let real_start = i;
while i < bytes.len() && is_ident_byte(bytes[i]) {
i += 1;
}
alias = raw[real_start..i].to_string();
} else {
alias = cand;
}
}
out.push((schema, table, alias));
if keyword != "FROM" {
break;
}
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= bytes.len() || bytes[i] != b',' {
break;
}
}
}
out
}
fn delete_target(upper: &str, raw: &str) -> Option<(Option<String>, String, String)> {
let bytes = upper.as_bytes();
let mut i = 0;
while i < bytes.len() && (is_ident_byte(bytes[i]) || bytes[i] == b'.') {
i += 1;
}
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
if bytes[i..]
.get(..4)
.is_some_and(|w| w.eq_ignore_ascii_case(b"FROM"))
&& (i + 4 >= bytes.len() || !is_ident_byte(bytes[i + 4]))
{
i += 4;
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
}
let start = i;
while i < bytes.len() && (is_ident_byte(bytes[i]) || bytes[i] == b'.') {
i += 1;
}
if i == start {
return None;
}
let token_upper = upper[start..i].to_string();
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
let mut alias = String::new();
if i < bytes.len() && is_ident_byte(bytes[i]) {
let a_start = i;
while i < bytes.len() && is_ident_byte(bytes[i]) {
i += 1;
}
let cand_upper = upper[a_start..i].to_string();
if cand_upper != "WHERE" && cand_upper != "SET" && cand_upper != "RETURNING" {
alias = raw[a_start..i].to_string();
}
}
let (schema, table) = match token_upper.rsplit_once('.') {
Some((s, t)) if !t.is_empty() => (Some(s.to_string()), t.to_string()),
_ => (None, token_upper),
};
Some((schema, table, alias))
}
impl SqlStatementModel {
fn alias_scope_bind(&mut self, alias: &str, schema: &str, table: &str) {
let mut scope = std::mem::take(&mut self.alias_scope);
scope.bind(alias, schema, table);
self.alias_scope = scope;
}
}
pub use crate::sql_sem::AliasScope as ResolvedAliasScope;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn select_from_single_table_with_alias() {
let m = resolve_sql("SELECT e.id INTO v FROM employees e WHERE e.id = 1");
assert_eq!(m.verb, SqlSemanticVerb::Select);
assert_eq!(m.tables.len(), 1);
assert_eq!(m.tables[0].table, "EMPLOYEES");
assert_eq!(m.tables[0].alias, "e");
assert_eq!(m.tables[0].usage, TableUsageKind::Read);
assert_eq!(m.alias_scope.resolve("e"), Some(("", "EMPLOYEES")));
}
#[test]
fn leading_whitespace_does_not_shift_table_and_alias_offsets() {
let m = resolve_sql(" SELECT id FROM Employees emp");
assert_eq!(m.tables.len(), 1, "{:?}", m.tables);
assert_eq!(m.tables[0].table, "EMPLOYEES");
assert_eq!(m.tables[0].alias, "emp");
}
#[test]
fn select_schema_qualified_table() {
let m = resolve_sql("SELECT 1 INTO v FROM hr.employees");
assert_eq!(m.tables[0].schema, "HR");
assert_eq!(m.tables[0].table, "EMPLOYEES");
}
#[test]
fn select_comma_joined_list() {
let m = resolve_sql("SELECT 1 INTO v FROM a, b, c WHERE a.x = b.x");
let names: Vec<&str> = m.tables.iter().map(|t| t.table.as_str()).collect();
assert!(names.contains(&"A"));
assert!(names.contains(&"B"));
assert!(names.contains(&"C"));
}
#[test]
fn join_tables_collected() {
let m = resolve_sql("SELECT 1 INTO v FROM employees e JOIN departments d ON e.dept = d.id");
let names: Vec<&str> = m.tables.iter().map(|t| t.table.as_str()).collect();
assert!(names.contains(&"EMPLOYEES"));
assert!(names.contains(&"DEPARTMENTS"));
assert!(m.tables.iter().all(|t| t.usage == TableUsageKind::Read));
}
#[test]
fn insert_into_is_write_subselect_is_read() {
let m = resolve_sql("INSERT INTO summary SELECT id FROM employees");
assert!(
m.tables
.iter()
.any(|t| t.table == "SUMMARY" && t.usage == TableUsageKind::Write)
);
assert!(
m.tables
.iter()
.any(|t| t.table == "EMPLOYEES" && t.usage == TableUsageKind::Read)
);
}
#[test]
fn update_with_alias_is_write() {
let m = resolve_sql("UPDATE employees e SET e.salary = e.salary * 1.1");
assert_eq!(m.verb, SqlSemanticVerb::Update);
assert!(
m.tables
.iter()
.any(|t| t.table == "EMPLOYEES" && t.usage == TableUsageKind::Write)
);
}
#[test]
fn delete_from_is_write() {
let m = resolve_sql("DELETE FROM stale WHERE id < 100");
assert_eq!(m.verb, SqlSemanticVerb::Delete);
assert_eq!(m.tables[0].table, "STALE");
assert_eq!(m.tables[0].usage, TableUsageKind::Write);
}
#[test]
fn clause_keyword_inside_string_literal_is_not_a_phantom_table() {
let m = resolve_sql("INSERT INTO log VALUES ('read FROM cache')");
assert!(
!m.tables.iter().any(|t| t.table == "CACHE"),
"FROM inside a literal must not mint a phantom CACHE read: {:?}",
m.tables
);
assert!(
m.tables
.iter()
.any(|t| t.table == "LOG" && t.usage == TableUsageKind::Write),
"the real INSERT target LOG must still be a Write: {:?}",
m.tables
);
}
#[test]
fn delete_with_multibyte_first_token_does_not_panic() {
let _ = resolve_sql("DELETE é★ WHERE x = 1");
let _ = resolve_sql("DELETE é★"); }
#[test]
fn delete_with_where_subquery_target_write_subquery_read() {
let m = resolve_sql("DELETE FROM t WHERE id IN (SELECT id FROM staging)");
assert_eq!(m.verb, SqlSemanticVerb::Delete);
assert!(
m.tables
.iter()
.any(|t| t.table == "T" && t.usage == TableUsageKind::Write),
"DELETE target T must be Write: {:?}",
m.tables
);
assert!(
m.tables
.iter()
.any(|t| t.table == "STAGING" && t.usage == TableUsageKind::Read),
"WHERE sub-SELECT table STAGING must be Read: {:?}",
m.tables
);
assert!(
!m.tables
.iter()
.any(|t| t.table == "STAGING" && t.usage == TableUsageKind::Write),
"STAGING must NEVER be Write: {:?}",
m.tables
);
}
#[test]
fn from_less_delete_resolves_write_target() {
let m = resolve_sql("DELETE employees WHERE id = 5");
assert_eq!(m.verb, SqlSemanticVerb::Delete);
assert_eq!(m.tables.len(), 1, "{:?}", m.tables);
assert_eq!(m.tables[0].table, "EMPLOYEES");
assert_eq!(m.tables[0].usage, TableUsageKind::Write);
}
#[test]
fn from_less_qualified_delete_resolves_schema() {
let m = resolve_sql("DELETE hr.audit_log WHERE ts < SYSDATE");
assert_eq!(m.verb, SqlSemanticVerb::Delete);
assert_eq!(m.tables[0].schema, "HR");
assert_eq!(m.tables[0].table, "AUDIT_LOG");
assert_eq!(m.tables[0].usage, TableUsageKind::Write);
}
#[test]
fn from_less_delete_subquery_target_write_subquery_read() {
let m = resolve_sql("DELETE t WHERE id IN (SELECT id FROM staging)");
assert!(
m.tables
.iter()
.any(|t| t.table == "T" && t.usage == TableUsageKind::Write),
"FROM-less DELETE target T must be Write: {:?}",
m.tables
);
assert!(
m.tables
.iter()
.any(|t| t.table == "STAGING" && t.usage == TableUsageKind::Read),
"WHERE sub-SELECT table STAGING must be Read: {:?}",
m.tables
);
assert!(
!m.tables
.iter()
.any(|t| t.table == "STAGING" && t.usage == TableUsageKind::Write),
"STAGING must NEVER be Write: {:?}",
m.tables
);
}
#[test]
fn merge_into_is_readwrite_using_is_read() {
let m = resolve_sql(
"MERGE INTO target t USING source s ON (t.id = s.id) WHEN MATCHED THEN UPDATE SET t.v = s.v",
);
assert!(
m.tables
.iter()
.any(|t| t.table == "TARGET" && t.usage == TableUsageKind::ReadWrite)
);
assert!(
m.tables
.iter()
.any(|t| t.table == "SOURCE" && t.usage == TableUsageKind::Read)
);
}
#[test]
fn as_alias_form_parsed() {
let m = resolve_sql("SELECT 1 INTO v FROM employees AS emp");
assert_eq!(m.tables[0].alias, "emp");
assert_eq!(m.alias_scope.resolve("emp"), Some(("", "EMPLOYEES")));
}
#[test]
fn dual_filtered_out() {
let m = resolve_sql("SELECT SYSDATE INTO v FROM dual");
assert!(m.tables.is_empty());
}
#[test]
fn alias_scope_resolves_qualifier() {
let m = resolve_sql("SELECT e.name INTO v FROM hr.employees e");
assert_eq!(m.alias_scope.resolve("e"), Some(("HR", "EMPLOYEES")));
}
#[test]
fn no_table_keyword_yields_empty_model() {
let m = resolve_sql("SELECT 1 INTO v FROM dual");
assert_eq!(m.verb, SqlSemanticVerb::Select);
assert!(m.tables.is_empty());
}
}