1use crate::is_ident_byte;
34use crate::sql_sem::{SqlSemanticVerb, SqlStatementModel, TableUsageKind, TableUse};
35
36#[must_use]
42pub fn resolve_sql(raw: &str) -> SqlStatementModel {
43 let trimmed = raw.trim_start();
52 let upper = crate::fact_emit::mask_string_literals(&trimmed.to_ascii_uppercase());
59 let verb = classify_verb(&upper);
60 let mut model = SqlStatementModel {
61 verb,
62 ..SqlStatementModel::default()
63 };
64
65 match verb {
66 SqlSemanticVerb::Select => {
67 collect_from_and_joins(&upper, trimmed, &mut model, TableUsageKind::Read);
68 }
69 SqlSemanticVerb::Insert => {
70 for (s, t, a) in tables_after_keyword(&upper, trimmed, "INTO") {
71 add(&mut model, s, t, a, TableUsageKind::Write);
72 }
73 collect_from_and_joins(&upper, trimmed, &mut model, TableUsageKind::Read);
75 }
76 SqlSemanticVerb::Update => {
77 for (s, t, a) in tables_after_keyword(&upper, trimmed, "UPDATE") {
78 add(&mut model, s, t, a, TableUsageKind::Write);
79 }
80 collect_from_and_joins(&upper, trimmed, &mut model, TableUsageKind::Read);
81 }
82 SqlSemanticVerb::Delete => {
83 let target = delete_target(&upper, trimmed);
92 let target_key = target.as_ref().map(|(s, t, _)| (s.clone(), t.clone()));
93 if let Some((s, t, a)) = target {
94 add(&mut model, s, t, a, TableUsageKind::Write);
95 }
96 let mut target_consumed = false;
97 for (s, t, a) in tables_after_keyword(&upper, trimmed, "FROM") {
98 if !target_consumed && target_key.as_ref() == Some(&(s.clone(), t.clone())) {
99 target_consumed = true;
100 continue;
101 }
102 add(&mut model, s, t, a, TableUsageKind::Read);
103 }
104 for (s, t, a) in tables_after_keyword(&upper, trimmed, "JOIN") {
105 add(&mut model, s, t, a, TableUsageKind::Read);
106 }
107 }
108 SqlSemanticVerb::MergeUpdate
109 | SqlSemanticVerb::MergeInsert
110 | SqlSemanticVerb::MergeDelete => {
111 for (s, t, a) in tables_after_keyword(&upper, trimmed, "INTO") {
112 add(&mut model, s, t, a, TableUsageKind::ReadWrite);
113 }
114 for (s, t, a) in tables_after_keyword(&upper, trimmed, "USING") {
115 add(&mut model, s, t, a, TableUsageKind::Read);
116 }
117 }
118 }
119 model
120}
121
122fn classify_verb(upper: &str) -> SqlSemanticVerb {
123 if upper.starts_with("INSERT") {
124 SqlSemanticVerb::Insert
125 } else if upper.starts_with("UPDATE") {
126 SqlSemanticVerb::Update
127 } else if upper.starts_with("DELETE") {
128 SqlSemanticVerb::Delete
129 } else if upper.starts_with("MERGE") {
130 SqlSemanticVerb::MergeUpdate
131 } else {
132 SqlSemanticVerb::Select
133 }
134}
135
136fn collect_from_and_joins(
137 upper: &str,
138 raw: &str,
139 model: &mut SqlStatementModel,
140 usage: TableUsageKind,
141) {
142 for (s, t, a) in tables_after_keyword(upper, raw, "FROM") {
143 add(model, s, t, a, usage);
144 }
145 for (s, t, a) in tables_after_keyword(upper, raw, "JOIN") {
146 add(model, s, t, a, usage);
147 }
148}
149
150fn add(
151 model: &mut SqlStatementModel,
152 schema: Option<String>,
153 table: String,
154 alias: String,
155 usage: TableUsageKind,
156) {
157 if table.is_empty() || table == "DUAL" {
158 return;
159 }
160 let schema_str = schema.clone().unwrap_or_default();
161 let alias_key = if alias.is_empty() {
164 table.clone()
165 } else {
166 alias.clone()
167 };
168 model.alias_scope_bind(&alias_key, &schema_str, &table);
169 if !model
171 .tables
172 .iter()
173 .any(|tu| tu.schema == schema_str && tu.table == table && tu.usage == usage)
174 {
175 model.tables.push(TableUse {
176 schema: schema_str,
177 table,
178 alias,
179 usage,
180 });
181 }
182}
183
184fn tables_after_keyword(
188 upper: &str,
189 raw: &str,
190 keyword: &str,
191) -> Vec<(Option<String>, String, String)> {
192 const STOP: &[&str] = &[
193 "WHERE",
194 "GROUP",
195 "ORDER",
196 "HAVING",
197 "SET",
198 "ON",
199 "USING",
200 "WHEN",
201 "VALUES",
202 "SELECT",
203 "CONNECT",
204 "START",
205 "UNION",
206 "MINUS",
207 "INTERSECT",
208 "FETCH",
209 "OFFSET",
210 ];
211 let mut out = Vec::new();
212 let bytes = upper.as_bytes();
213 let kw = keyword.to_ascii_uppercase();
214 let mut search = 0;
215 while let Some(rel) = upper[search..].find(&kw) {
216 let abs = search + rel;
217 search = abs + kw.len();
218 let prev_ok = abs == 0 || !is_ident_byte(bytes[abs - 1]);
219 let after = abs + kw.len();
220 let next_ok = after >= bytes.len() || !is_ident_byte(bytes[after]);
221 if !(prev_ok && next_ok) {
222 continue;
223 }
224 let mut i = after;
226 loop {
227 while i < bytes.len() && (bytes[i].is_ascii_whitespace() || bytes[i] == b',') {
228 i += 1;
229 }
230 if i >= bytes.len() {
231 break;
232 }
233 let tok_start = i;
235 while i < bytes.len() && (is_ident_byte(bytes[i]) || bytes[i] == b'.') {
236 i += 1;
237 }
238 if i == tok_start {
239 break;
240 }
241 let token = &raw[tok_start..i];
242 let token_upper = token.to_ascii_uppercase();
243 if STOP.contains(&token_upper.as_str()) || token.starts_with('(') {
244 break;
245 }
246 let (schema, table) = match token_upper.rsplit_once('.') {
247 Some((s, t)) if !t.is_empty() => (Some(s.to_string()), t.to_string()),
248 _ => (None, token_upper.clone()),
249 };
250 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
253 i += 1;
254 }
255 let mut alias = String::new();
256 if i < bytes.len() && is_ident_byte(bytes[i]) {
257 let a_start = i;
258 while i < bytes.len() && is_ident_byte(bytes[i]) {
259 i += 1;
260 }
261 let cand = raw[a_start..i].to_string();
262 let cand_upper = cand.to_ascii_uppercase();
263 if STOP.contains(&cand_upper.as_str())
264 || cand_upper == "JOIN"
265 || cand_upper == "INNER"
266 || cand_upper == "LEFT"
267 || cand_upper == "RIGHT"
268 || cand_upper == "FULL"
269 || cand_upper == "CROSS"
270 {
271 i = a_start;
274 } else if cand_upper == "AS" {
275 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
278 i += 1;
279 }
280 let real_start = i;
281 while i < bytes.len() && is_ident_byte(bytes[i]) {
282 i += 1;
283 }
284 alias = raw[real_start..i].to_string();
285 } else {
286 alias = cand;
287 }
288 }
289 out.push((schema, table, alias));
290 if keyword != "FROM" {
292 break;
293 }
294 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
296 i += 1;
297 }
298 if i >= bytes.len() || bytes[i] != b',' {
299 break;
300 }
301 }
302 }
303 out
304}
305
306fn delete_target(upper: &str, raw: &str) -> Option<(Option<String>, String, String)> {
314 let bytes = upper.as_bytes();
315 let mut i = 0;
317 while i < bytes.len() && (is_ident_byte(bytes[i]) || bytes[i] == b'.') {
318 i += 1;
319 }
320 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
321 i += 1;
322 }
323 if bytes[i..]
331 .get(..4)
332 .is_some_and(|w| w.eq_ignore_ascii_case(b"FROM"))
333 && (i + 4 >= bytes.len() || !is_ident_byte(bytes[i + 4]))
334 {
335 i += 4;
336 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
337 i += 1;
338 }
339 }
340 let start = i;
342 while i < bytes.len() && (is_ident_byte(bytes[i]) || bytes[i] == b'.') {
343 i += 1;
344 }
345 if i == start {
346 return None;
347 }
348 let token_upper = upper[start..i].to_string();
349 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
351 i += 1;
352 }
353 let mut alias = String::new();
354 if i < bytes.len() && is_ident_byte(bytes[i]) {
355 let a_start = i;
356 while i < bytes.len() && is_ident_byte(bytes[i]) {
357 i += 1;
358 }
359 let cand_upper = upper[a_start..i].to_string();
360 if cand_upper != "WHERE" && cand_upper != "SET" && cand_upper != "RETURNING" {
363 alias = raw[a_start..i].to_string();
364 }
365 }
366 let (schema, table) = match token_upper.rsplit_once('.') {
367 Some((s, t)) if !t.is_empty() => (Some(s.to_string()), t.to_string()),
368 _ => (None, token_upper),
369 };
370 Some((schema, table, alias))
371}
372
373impl SqlStatementModel {
374 fn alias_scope_bind(&mut self, alias: &str, schema: &str, table: &str) {
375 let mut scope = std::mem::take(&mut self.alias_scope);
377 scope.bind(alias, schema, table);
378 self.alias_scope = scope;
379 }
380}
381
382pub use crate::sql_sem::AliasScope as ResolvedAliasScope;
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389
390 #[test]
391 fn select_from_single_table_with_alias() {
392 let m = resolve_sql("SELECT e.id INTO v FROM employees e WHERE e.id = 1");
393 assert_eq!(m.verb, SqlSemanticVerb::Select);
394 assert_eq!(m.tables.len(), 1);
395 assert_eq!(m.tables[0].table, "EMPLOYEES");
396 assert_eq!(m.tables[0].alias, "e");
397 assert_eq!(m.tables[0].usage, TableUsageKind::Read);
398 assert_eq!(m.alias_scope.resolve("e"), Some(("", "EMPLOYEES")));
399 }
400
401 #[test]
402 fn leading_whitespace_does_not_shift_table_and_alias_offsets() {
403 let m = resolve_sql(" SELECT id FROM Employees emp");
408 assert_eq!(m.tables.len(), 1, "{:?}", m.tables);
409 assert_eq!(m.tables[0].table, "EMPLOYEES");
410 assert_eq!(m.tables[0].alias, "emp");
411 }
412
413 #[test]
414 fn select_schema_qualified_table() {
415 let m = resolve_sql("SELECT 1 INTO v FROM hr.employees");
416 assert_eq!(m.tables[0].schema, "HR");
417 assert_eq!(m.tables[0].table, "EMPLOYEES");
418 }
419
420 #[test]
421 fn select_comma_joined_list() {
422 let m = resolve_sql("SELECT 1 INTO v FROM a, b, c WHERE a.x = b.x");
423 let names: Vec<&str> = m.tables.iter().map(|t| t.table.as_str()).collect();
424 assert!(names.contains(&"A"));
425 assert!(names.contains(&"B"));
426 assert!(names.contains(&"C"));
427 }
428
429 #[test]
430 fn join_tables_collected() {
431 let m = resolve_sql("SELECT 1 INTO v FROM employees e JOIN departments d ON e.dept = d.id");
432 let names: Vec<&str> = m.tables.iter().map(|t| t.table.as_str()).collect();
433 assert!(names.contains(&"EMPLOYEES"));
434 assert!(names.contains(&"DEPARTMENTS"));
435 assert!(m.tables.iter().all(|t| t.usage == TableUsageKind::Read));
436 }
437
438 #[test]
439 fn insert_into_is_write_subselect_is_read() {
440 let m = resolve_sql("INSERT INTO summary SELECT id FROM employees");
441 assert!(
442 m.tables
443 .iter()
444 .any(|t| t.table == "SUMMARY" && t.usage == TableUsageKind::Write)
445 );
446 assert!(
447 m.tables
448 .iter()
449 .any(|t| t.table == "EMPLOYEES" && t.usage == TableUsageKind::Read)
450 );
451 }
452
453 #[test]
454 fn update_with_alias_is_write() {
455 let m = resolve_sql("UPDATE employees e SET e.salary = e.salary * 1.1");
456 assert_eq!(m.verb, SqlSemanticVerb::Update);
457 assert!(
458 m.tables
459 .iter()
460 .any(|t| t.table == "EMPLOYEES" && t.usage == TableUsageKind::Write)
461 );
462 }
463
464 #[test]
465 fn delete_from_is_write() {
466 let m = resolve_sql("DELETE FROM stale WHERE id < 100");
467 assert_eq!(m.verb, SqlSemanticVerb::Delete);
468 assert_eq!(m.tables[0].table, "STALE");
469 assert_eq!(m.tables[0].usage, TableUsageKind::Write);
470 }
471
472 #[test]
473 fn clause_keyword_inside_string_literal_is_not_a_phantom_table() {
474 let m = resolve_sql("INSERT INTO log VALUES ('read FROM cache')");
477 assert!(
478 !m.tables.iter().any(|t| t.table == "CACHE"),
479 "FROM inside a literal must not mint a phantom CACHE read: {:?}",
480 m.tables
481 );
482 assert!(
483 m.tables
484 .iter()
485 .any(|t| t.table == "LOG" && t.usage == TableUsageKind::Write),
486 "the real INSERT target LOG must still be a Write: {:?}",
487 m.tables
488 );
489 }
490
491 #[test]
492 fn delete_with_multibyte_first_token_does_not_panic() {
493 let _ = resolve_sql("DELETE é★ WHERE x = 1");
500 let _ = resolve_sql("DELETE é★"); }
502
503 #[test]
507 fn delete_with_where_subquery_target_write_subquery_read() {
508 let m = resolve_sql("DELETE FROM t WHERE id IN (SELECT id FROM staging)");
509 assert_eq!(m.verb, SqlSemanticVerb::Delete);
510 assert!(
511 m.tables
512 .iter()
513 .any(|t| t.table == "T" && t.usage == TableUsageKind::Write),
514 "DELETE target T must be Write: {:?}",
515 m.tables
516 );
517 assert!(
518 m.tables
519 .iter()
520 .any(|t| t.table == "STAGING" && t.usage == TableUsageKind::Read),
521 "WHERE sub-SELECT table STAGING must be Read: {:?}",
522 m.tables
523 );
524 assert!(
525 !m.tables
526 .iter()
527 .any(|t| t.table == "STAGING" && t.usage == TableUsageKind::Write),
528 "STAGING must NEVER be Write: {:?}",
529 m.tables
530 );
531 }
532
533 #[test]
538 fn from_less_delete_resolves_write_target() {
539 let m = resolve_sql("DELETE employees WHERE id = 5");
540 assert_eq!(m.verb, SqlSemanticVerb::Delete);
541 assert_eq!(m.tables.len(), 1, "{:?}", m.tables);
542 assert_eq!(m.tables[0].table, "EMPLOYEES");
543 assert_eq!(m.tables[0].usage, TableUsageKind::Write);
544 }
545
546 #[test]
547 fn from_less_qualified_delete_resolves_schema() {
548 let m = resolve_sql("DELETE hr.audit_log WHERE ts < SYSDATE");
549 assert_eq!(m.verb, SqlSemanticVerb::Delete);
550 assert_eq!(m.tables[0].schema, "HR");
551 assert_eq!(m.tables[0].table, "AUDIT_LOG");
552 assert_eq!(m.tables[0].usage, TableUsageKind::Write);
553 }
554
555 #[test]
558 fn from_less_delete_subquery_target_write_subquery_read() {
559 let m = resolve_sql("DELETE t WHERE id IN (SELECT id FROM staging)");
560 assert!(
561 m.tables
562 .iter()
563 .any(|t| t.table == "T" && t.usage == TableUsageKind::Write),
564 "FROM-less DELETE target T must be Write: {:?}",
565 m.tables
566 );
567 assert!(
568 m.tables
569 .iter()
570 .any(|t| t.table == "STAGING" && t.usage == TableUsageKind::Read),
571 "WHERE sub-SELECT table STAGING must be Read: {:?}",
572 m.tables
573 );
574 assert!(
575 !m.tables
576 .iter()
577 .any(|t| t.table == "STAGING" && t.usage == TableUsageKind::Write),
578 "STAGING must NEVER be Write: {:?}",
579 m.tables
580 );
581 }
582
583 #[test]
584 fn merge_into_is_readwrite_using_is_read() {
585 let m = resolve_sql(
586 "MERGE INTO target t USING source s ON (t.id = s.id) WHEN MATCHED THEN UPDATE SET t.v = s.v",
587 );
588 assert!(
589 m.tables
590 .iter()
591 .any(|t| t.table == "TARGET" && t.usage == TableUsageKind::ReadWrite)
592 );
593 assert!(
594 m.tables
595 .iter()
596 .any(|t| t.table == "SOURCE" && t.usage == TableUsageKind::Read)
597 );
598 }
599
600 #[test]
601 fn as_alias_form_parsed() {
602 let m = resolve_sql("SELECT 1 INTO v FROM employees AS emp");
603 assert_eq!(m.tables[0].alias, "emp");
604 assert_eq!(m.alias_scope.resolve("emp"), Some(("", "EMPLOYEES")));
605 }
606
607 #[test]
608 fn dual_filtered_out() {
609 let m = resolve_sql("SELECT SYSDATE INTO v FROM dual");
610 assert!(m.tables.is_empty());
611 }
612
613 #[test]
614 fn alias_scope_resolves_qualifier() {
615 let m = resolve_sql("SELECT e.name INTO v FROM hr.employees e");
616 assert_eq!(m.alias_scope.resolve("e"), Some(("HR", "EMPLOYEES")));
617 }
618
619 #[test]
620 fn no_table_keyword_yields_empty_model() {
621 let m = resolve_sql("SELECT 1 INTO v FROM dual");
622 assert_eq!(m.verb, SqlSemanticVerb::Select);
623 assert!(m.tables.is_empty());
624 }
625}