1use crate::sql_sem::{
32 ColumnResolution, ColumnUse, ProjectionItem, SqlSemanticModel, SqlSemanticVerb,
33 SqlStatementModel,
34};
35
36pub fn extract_columns(model: &mut SqlStatementModel, raw: &str) {
40 let single_table = model.tables.len() == 1;
41 match model.verb {
42 SqlSemanticVerb::Select => {
43 let proj = parse_select_list(raw);
44 for item in &proj {
45 classify_projection_reads(item, single_table, model);
46 }
47 model.projection = proj;
48 for c in predicate_columns(raw) {
49 push_read(model, c, single_table);
50 }
51 }
52 SqlSemanticVerb::Insert => {
53 for c in insert_target_columns(raw) {
54 push_write(model, c, single_table);
55 }
56 for item in parse_select_list(raw) {
58 if !item.is_star {
59 push_read_name(model, &item.expression_text, single_table);
60 }
61 }
62 }
63 SqlSemanticVerb::Update => {
64 for c in update_set_columns(raw) {
65 push_write(model, c, single_table);
66 }
67 for c in predicate_columns(raw) {
68 push_read(model, c, single_table);
69 }
70 }
71 SqlSemanticVerb::Delete => {
72 for c in predicate_columns(raw) {
73 push_read(model, c, single_table);
74 }
75 }
76 SqlSemanticVerb::MergeUpdate
77 | SqlSemanticVerb::MergeInsert
78 | SqlSemanticVerb::MergeDelete => {
79 for c in update_set_columns(raw) {
80 push_write(model, c, single_table);
81 }
82 for c in predicate_columns(raw) {
83 push_read(model, c, single_table);
84 }
85 }
86 }
87}
88
89pub fn extract_columns_for_model(model: &mut SqlSemanticModel, raws: &[String]) {
93 for (i, stmt) in model.statements.iter_mut().enumerate() {
94 if let Some(raw) = raws.get(i) {
95 extract_columns(stmt, raw);
96 }
97 }
98}
99
100fn parse_select_list(raw: &str) -> Vec<ProjectionItem> {
101 let upper = raw.to_ascii_uppercase();
102 let Some(sel) = upper.find("SELECT") else {
103 return Vec::new();
104 };
105 let after = sel + "SELECT".len();
106 let into = upper[after..].find(" INTO ").map(|p| after + p);
108 let from = upper[after..].find(" FROM ").map(|p| after + p);
109 let end = [into, from]
110 .into_iter()
111 .flatten()
112 .min()
113 .unwrap_or(raw.len());
114 let list = raw[after..end].trim();
115 split_top_level_commas(list)
116 .into_iter()
117 .map(|piece| parse_projection_item(piece.trim()))
118 .filter(|p| !p.expression_text.is_empty())
119 .collect()
120}
121
122fn parse_projection_item(piece: &str) -> ProjectionItem {
123 let is_star = piece == "*" || piece.ends_with(".*");
124 let upper = piece.to_ascii_uppercase();
126 if let Some(as_pos) = upper.rfind(" AS ") {
127 let expr = piece[..as_pos].trim().to_string();
128 let alias = piece[as_pos + 4..].trim().to_string();
129 return ProjectionItem {
130 alias,
131 expression_text: expr,
132 is_star,
133 };
134 }
135 if let Some(ws) = piece.rfind(char::is_whitespace) {
139 let head = piece[..ws].trim();
140 let tail = piece[ws..].trim();
141 if !head.is_empty()
142 && tail.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
143 && !head.ends_with(['(', ','])
144 && !is_star
145 {
146 return ProjectionItem {
147 alias: tail.to_string(),
148 expression_text: head.to_string(),
149 is_star,
150 };
151 }
152 }
153 ProjectionItem {
154 alias: String::new(),
155 expression_text: piece.to_string(),
156 is_star,
157 }
158}
159
160fn classify_projection_reads(
161 item: &ProjectionItem,
162 single_table: bool,
163 model: &mut SqlStatementModel,
164) {
165 if item.is_star {
166 let (qual, _col) = split_qualifier(&item.expression_text);
167 model.reads.push(ColumnUse {
168 qualifier: qual,
169 column: "*".to_string(),
170 resolution: ColumnResolution::StarExpansion,
171 });
172 return;
173 }
174 for ident in column_idents(&item.expression_text) {
176 push_read_name(model, &ident, single_table);
177 }
178}
179
180fn push_read(model: &mut SqlStatementModel, col: String, single_table: bool) {
181 push_read_name(model, &col, single_table);
182}
183
184fn push_read_name(model: &mut SqlStatementModel, name: &str, single_table: bool) {
185 if let Some(cu) = make_column_use(name, single_table, model) {
186 if !model.reads.contains(&cu) {
187 model.reads.push(cu);
188 }
189 }
190}
191
192fn push_write(model: &mut SqlStatementModel, col: String, single_table: bool) {
193 if let Some(cu) = make_column_use(&col, single_table, model) {
194 if !model.writes.contains(&cu) {
195 model.writes.push(cu);
196 }
197 }
198}
199
200fn make_column_use(name: &str, single_table: bool, model: &SqlStatementModel) -> Option<ColumnUse> {
201 let name = name.trim();
202 if name.is_empty() || is_sql_noise(name) {
203 return None;
204 }
205 let (qualifier, column) = split_qualifier(name);
206 if column.is_empty() || !column.chars().next()?.is_ascii_alphabetic() {
207 return None;
208 }
209 let resolution = if !qualifier.is_empty() {
210 if model.alias_scope.resolve(&qualifier).is_some() {
211 ColumnResolution::Resolved
212 } else {
213 ColumnResolution::Unresolved
214 }
215 } else if single_table {
216 ColumnResolution::Resolved
217 } else {
218 ColumnResolution::Unresolved
219 };
220 Some(ColumnUse {
221 qualifier,
222 column: column.to_ascii_uppercase(),
223 resolution,
224 })
225}
226
227fn split_qualifier(name: &str) -> (String, String) {
228 match name.rsplit_once('.') {
229 Some((q, c)) => (q.trim().to_string(), c.trim().to_string()),
230 None => (String::new(), name.trim().to_string()),
231 }
232}
233
234fn column_idents(expr: &str) -> Vec<String> {
235 let mut out = Vec::new();
236 let mut cur = String::new();
237 for ch in expr.chars() {
238 if ch.is_ascii_alphanumeric() || ch == '_' || ch == '$' || ch == '#' || ch == '.' {
239 cur.push(ch);
240 } else {
241 if !cur.is_empty() {
242 out.push(std::mem::take(&mut cur));
243 }
244 }
245 }
246 if !cur.is_empty() {
247 out.push(cur);
248 }
249 out.into_iter().filter(|w| !is_sql_noise(w)).collect()
250}
251
252fn insert_target_columns(raw: &str) -> Vec<String> {
253 let upper = raw.to_ascii_uppercase();
256 let Some(into) = upper.find("INTO") else {
257 return Vec::new();
258 };
259 let rest = &raw[into + 4..];
260 let Some(open) = rest.find('(') else {
261 return Vec::new();
262 };
263 let Some(close) = rest[open..].find(')') else {
264 return Vec::new();
265 };
266 split_top_level_commas(&rest[open + 1..open + close])
267 .into_iter()
268 .map(|s| s.trim().to_string())
269 .filter(|s| !s.is_empty())
270 .collect()
271}
272
273fn update_set_columns(raw: &str) -> Vec<String> {
274 let upper = raw.to_ascii_uppercase();
275 let Some(set) = upper.find(" SET ") else {
276 return Vec::new();
277 };
278 let after = set + 5;
279 let end = upper[after..]
280 .find(" WHERE ")
281 .map(|p| after + p)
282 .unwrap_or(raw.len());
283 split_top_level_commas(&raw[after..end])
284 .into_iter()
285 .filter_map(|assign| assign.split('=').next().map(|s| s.trim().to_string()))
286 .filter(|s| !s.is_empty())
287 .collect()
288}
289
290fn predicate_columns(raw: &str) -> Vec<String> {
291 let upper = raw.to_ascii_uppercase();
292 let Some(w) = upper.find(" WHERE ") else {
293 return Vec::new();
294 };
295 let pred = &raw[w + 7..];
296 let pu = pred.to_ascii_uppercase();
298 let stop = ["GROUP ", "ORDER ", "HAVING ", "CONNECT "]
299 .iter()
300 .filter_map(|kw| pu.find(kw))
301 .min()
302 .unwrap_or(pred.len());
303 column_idents(&pred[..stop])
304}
305
306fn split_top_level_commas(s: &str) -> Vec<String> {
307 let mut out = Vec::new();
308 let mut depth = 0i32;
309 let mut buf = String::new();
310 for ch in s.chars() {
311 match ch {
312 '(' => {
313 depth += 1;
314 buf.push(ch);
315 }
316 ')' => {
317 depth -= 1;
318 buf.push(ch);
319 }
320 ',' if depth == 0 => out.push(std::mem::take(&mut buf)),
321 _ => buf.push(ch),
322 }
323 }
324 if !buf.trim().is_empty() {
325 out.push(buf);
326 }
327 out
328}
329
330fn is_sql_noise(w: &str) -> bool {
331 let u = w.to_ascii_uppercase();
332 matches!(
333 u.as_str(),
334 "AND"
335 | "OR"
336 | "NOT"
337 | "NULL"
338 | "IS"
339 | "IN"
340 | "LIKE"
341 | "BETWEEN"
342 | "EXISTS"
343 | "TRUE"
344 | "FALSE"
345 | "FROM"
346 | "WHERE"
347 | "SELECT"
348 | "INTO"
349 | "VALUES"
350 | "SET"
351 | "DUAL"
352 | "SYSDATE"
353 | "COUNT"
354 | "SUM"
355 | "AVG"
356 | "MIN"
357 | "MAX"
358 | "DISTINCT"
359 | "AS"
360 | "ON"
361 | "USING"
362 | "CASE"
363 | "WHEN"
364 | "THEN"
365 | "ELSE"
366 | "END"
367 ) || u.chars().all(|c| c.is_ascii_digit() || c == '.')
368}
369
370#[cfg(test)]
371mod tests {
372 use super::*;
373 use crate::sql_resolve::resolve_sql;
374
375 #[test]
376 fn select_list_columns_become_reads() {
377 let raw = "SELECT e.id, e.name INTO a, b FROM employees e";
378 let mut m = resolve_sql(raw);
379 extract_columns(&mut m, raw);
380 assert_eq!(m.projection.len(), 2);
381 let cols: Vec<&str> = m.reads.iter().map(|c| c.column.as_str()).collect();
382 assert!(cols.contains(&"ID"));
383 assert!(cols.contains(&"NAME"));
384 assert!(
386 m.reads
387 .iter()
388 .all(|c| c.resolution == ColumnResolution::Resolved)
389 );
390 }
391
392 #[test]
393 fn star_projection_is_star_expansion() {
394 let raw = "SELECT * INTO r FROM employees";
395 let mut m = resolve_sql(raw);
396 extract_columns(&mut m, raw);
397 assert!(m.projection.iter().any(|p| p.is_star));
398 assert!(
399 m.reads
400 .iter()
401 .any(|c| c.resolution == ColumnResolution::StarExpansion)
402 );
403 }
404
405 #[test]
406 fn bare_column_single_table_resolved() {
407 let raw = "SELECT salary INTO v FROM employees";
408 let mut m = resolve_sql(raw);
409 extract_columns(&mut m, raw);
410 let c = m.reads.iter().find(|c| c.column == "SALARY").unwrap();
411 assert_eq!(c.resolution, ColumnResolution::Resolved);
412 }
413
414 #[test]
415 fn bare_column_multi_table_unresolved() {
416 let raw = "SELECT amount INTO v FROM orders o, payments p WHERE o.id = p.oid";
417 let mut m = resolve_sql(raw);
418 extract_columns(&mut m, raw);
419 let c = m.reads.iter().find(|c| c.column == "AMOUNT");
420 assert_eq!(c.map(|c| c.resolution), Some(ColumnResolution::Unresolved));
421 }
422
423 #[test]
424 fn qualified_unbound_alias_is_unresolved() {
425 let raw = "SELECT zzz.col INTO v FROM employees e";
426 let mut m = resolve_sql(raw);
427 extract_columns(&mut m, raw);
428 let c = m.reads.iter().find(|c| c.column == "COL").unwrap();
429 assert_eq!(c.resolution, ColumnResolution::Unresolved);
430 }
431
432 #[test]
433 fn insert_target_columns_become_writes() {
434 let raw = "INSERT INTO audit (event_id, ts) VALUES (1, SYSDATE)";
435 let mut m = resolve_sql(raw);
436 extract_columns(&mut m, raw);
437 let cols: Vec<&str> = m.writes.iter().map(|c| c.column.as_str()).collect();
438 assert!(cols.contains(&"EVENT_ID"));
439 assert!(cols.contains(&"TS"));
440 }
441
442 #[test]
443 fn update_set_columns_become_writes() {
444 let raw = "UPDATE employees e SET e.salary = e.salary * 1.1 WHERE e.id = 1";
445 let mut m = resolve_sql(raw);
446 extract_columns(&mut m, raw);
447 assert!(m.writes.iter().any(|c| c.column == "SALARY"));
448 assert!(m.reads.iter().any(|c| c.column == "ID"));
450 }
451
452 #[test]
453 fn delete_predicate_columns_are_reads() {
454 let raw = "DELETE FROM stale WHERE created < SYSDATE - 30";
455 let mut m = resolve_sql(raw);
456 extract_columns(&mut m, raw);
457 assert!(m.reads.iter().any(|c| c.column == "CREATED"));
458 assert!(m.writes.is_empty());
459 }
460
461 #[test]
462 fn projection_alias_split() {
463 let raw = "SELECT e.salary AS pay INTO v FROM employees e";
464 let mut m = resolve_sql(raw);
465 extract_columns(&mut m, raw);
466 let p = &m.projection[0];
467 assert_eq!(p.alias, "pay");
468 assert_eq!(p.expression_text, "e.salary");
469 }
470
471 #[test]
472 fn sql_noise_not_recorded_as_columns() {
473 let raw = "SELECT id INTO v FROM employees WHERE id IS NOT NULL AND id > 0";
474 let mut m = resolve_sql(raw);
475 extract_columns(&mut m, raw);
476 let cols: Vec<&str> = m.reads.iter().map(|c| c.column.as_str()).collect();
477 assert!(
478 !cols
479 .iter()
480 .any(|c| *c == "NULL" || *c == "AND" || *c == "NOT")
481 );
482 assert!(cols.contains(&"ID"));
483 }
484
485 #[test]
486 fn extract_columns_for_model_walks_all_statements() {
487 let mut model = SqlSemanticModel::default();
488 model.push(resolve_sql("SELECT id INTO v FROM t1"));
489 model.push(resolve_sql("SELECT name INTO v FROM t2"));
490 extract_columns_for_model(
491 &mut model,
492 &[
493 "SELECT id INTO v FROM t1".to_string(),
494 "SELECT name INTO v FROM t2".to_string(),
495 ],
496 );
497 assert!(model.statements[0].reads.iter().any(|c| c.column == "ID"));
498 assert!(model.statements[1].reads.iter().any(|c| c.column == "NAME"));
499 }
500
501 #[test]
502 fn serde_round_trip_preserves_column_resolution() {
503 let raw = "SELECT salary INTO v FROM employees";
504 let mut m = resolve_sql(raw);
505 extract_columns(&mut m, raw);
506 let json = serde_json::to_string(&m).unwrap();
507 let back: SqlStatementModel = serde_json::from_str(&json).unwrap();
508 assert_eq!(back, m);
509 }
510}