1use std::collections::HashMap;
29use std::sync::LazyLock;
30
31use regex::Regex;
32
33use crate::error::{Result, SqlcxError};
34use crate::ir::{ColumnDef, TableDef};
35
36#[derive(Debug)]
40pub struct AliasMap<'a> {
41 entries: HashMap<String, &'a TableDef>,
42}
43
44impl<'a> AliasMap<'a> {
45 pub fn new() -> Self {
46 Self {
47 entries: HashMap::new(),
48 }
49 }
50
51 pub fn insert(&mut self, qualifier: &str, table: &'a TableDef) {
52 self.entries.insert(qualifier.to_lowercase(), table);
53 }
54
55 pub fn lookup(&self, qualifier: &str) -> Option<&&'a TableDef> {
56 self.entries.get(&qualifier.to_lowercase())
57 }
58
59 pub fn len(&self) -> usize {
60 self.entries.len()
61 }
62
63 pub fn is_empty(&self) -> bool {
64 self.entries.is_empty()
65 }
66}
67
68impl<'a> Default for AliasMap<'a> {
69 fn default() -> Self {
70 Self::new()
71 }
72}
73
74static FROM_CLAUSE_RE: LazyLock<Regex> = LazyLock::new(|| {
77 Regex::new(
78 r"(?is)\bFROM\s+(.+?)(?:\bWHERE\b|\bGROUP\s+BY\b|\bORDER\s+BY\b|\bHAVING\b|\bLIMIT\b|\bRETURNING\b|;|$)",
79 )
80 .unwrap()
81});
82
83static TABLE_REF_RE: LazyLock<Regex> = LazyLock::new(|| {
85 Regex::new(r"(?is)^\s*([A-Za-z_][A-Za-z0-9_]*)(?:\s+(?:AS\s+)?([A-Za-z_][A-Za-z0-9_]*))?\s*$")
86 .unwrap()
87});
88
89static UNSUPPORTED_JOIN_RE: LazyLock<Regex> = LazyLock::new(|| {
95 Regex::new(
96 r"(?i)\b(LEFT|RIGHT|FULL|NATURAL|CROSS)\s+(?:(?:INNER|OUTER)\s+)?JOIN\b|\bOUTER\s+JOIN\b|\bUSING\s*\(",
97 )
98 .unwrap()
99});
100
101static ON_SEP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)\s+ON\s+").unwrap());
106static AS_SEP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)\s+AS\s+").unwrap());
107
108static JOIN_DETECT_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)\bJOIN\b").unwrap());
113
114pub fn has_outer_join(sql: &str) -> bool {
118 let Some(caps) = FROM_CLAUSE_RE.captures(sql) else {
119 return false;
120 };
121 let from_body = caps.get(1).unwrap().as_str();
122 JOIN_DETECT_RE.is_match(from_body)
123}
124
125pub fn resolve_multi_table_columns(
141 cols_part: &str,
142 sql: &str,
143 schema_tables: &[TableDef],
144 source_file: &str,
145) -> Result<Vec<ColumnDef>> {
146 if cols_part.trim() == "*" {
147 return Err(SqlcxError::ParseError {
148 file: source_file.to_string(),
149 message:
150 "SELECT * across multi-table JOINs is not supported in v1.1 — list qualified columns explicitly (users.id, orgs.slug). `SELECT *` across joins ships in v1.2."
151 .to_string(),
152 });
153 }
154
155 let alias_map = parse_join_clauses(sql, schema_tables, source_file)?;
156
157 let columns: Vec<ColumnDef> = cols_part
158 .split(',')
159 .map(|s| resolve_multi_table_select_column(s.trim(), &alias_map, source_file))
160 .collect::<Result<_>>()?;
161
162 reject_unaliased_collisions(&columns, source_file)?;
163
164 Ok(columns)
165}
166
167fn reject_unaliased_collisions(columns: &[ColumnDef], source_file: &str) -> Result<()> {
168 use std::collections::HashMap;
169
170 let mut first_seen: HashMap<String, (String, String, bool)> = HashMap::new();
176 for col in columns {
177 let effective = col.alias.as_deref().unwrap_or(&col.name).to_lowercase();
178 if let Some((prev_col, prev_source, prev_had_alias)) = first_seen.get(&effective) {
179 let this_source = col.source_table.as_deref().unwrap_or("?").to_string();
180 let this_col = &col.name;
181 let this_had_alias = col.alias.is_some();
182
183 let message = if *prev_had_alias || this_had_alias {
184 format!(
185 "two joined columns produce the same field name `{effective}` — one or \
186 both use an explicit AS alias that collides. Choose distinct aliases \
187 so the generated row type has unique fields."
188 )
189 } else {
190 format!(
191 "joined columns `{prev_source}.{prev_col}` and `{this_source}.{this_col}` \
192 produce the same field name. Add explicit `AS` aliases to disambiguate, \
193 e.g. `{prev_source}.{prev_col} AS {prev_source}_{prev_col}, \
194 {this_source}.{this_col} AS {this_source}_{this_col}`."
195 )
196 };
197 return Err(SqlcxError::ParseError {
198 file: source_file.to_string(),
199 message,
200 });
201 }
202 first_seen.insert(
203 effective,
204 (
205 col.name.clone(),
206 col.source_table.as_deref().unwrap_or("?").to_string(),
207 col.alias.is_some(),
208 ),
209 );
210 }
211 Ok(())
212}
213
214pub fn parse_join_clauses<'a>(
219 sql: &str,
220 schema_tables: &'a [TableDef],
221 source_file: &str,
222) -> Result<AliasMap<'a>> {
223 let mut map = AliasMap::new();
224
225 let Some(caps) = FROM_CLAUSE_RE.captures(sql) else {
226 return Ok(map);
227 };
228 let from_body = caps.get(1).unwrap().as_str();
229
230 if let Some(bad) = UNSUPPORTED_JOIN_RE.find(from_body) {
231 return Err(SqlcxError::ParseError {
232 file: source_file.to_string(),
233 message: format!(
234 "unsupported join flavor `{}`: v1.1 supports INNER JOIN only. \
235 OUTER JOIN nullability propagation, USING, NATURAL, and CROSS \
236 joins are on the v1.2 roadmap.",
237 bad.as_str().trim()
238 ),
239 });
240 }
241
242 let inner_join_re = Regex::new(r"(?i)\s+(?:INNER\s+)?JOIN\s+").unwrap();
243 let segments: Vec<&str> = inner_join_re.split(from_body).collect();
244
245 for segment in segments {
246 let ref_part = split_off_on_clause(segment);
248 insert_table_ref(&mut map, ref_part, schema_tables, source_file)?;
249 }
250
251 Ok(map)
252}
253
254fn split_off_on_clause(segment: &str) -> &str {
255 match ON_SEP_RE.find(segment) {
256 Some(m) => &segment[..m.start()],
257 None => segment,
258 }
259}
260
261fn insert_table_ref<'a>(
262 map: &mut AliasMap<'a>,
263 ref_part: &str,
264 schema_tables: &'a [TableDef],
265 source_file: &str,
266) -> Result<()> {
267 let caps = TABLE_REF_RE
268 .captures(ref_part)
269 .ok_or_else(|| SqlcxError::ParseError {
270 file: source_file.to_string(),
271 message: format!("could not parse table reference `{}`", ref_part.trim()),
272 })?;
273
274 let table_name = caps.get(1).unwrap().as_str();
275 let alias = caps.get(2).map(|m| m.as_str());
276
277 let table = schema_tables
278 .iter()
279 .find(|t| t.name.eq_ignore_ascii_case(table_name))
280 .ok_or_else(|| SqlcxError::ParseError {
281 file: source_file.to_string(),
282 message: format!(
283 "table `{}` referenced in FROM/JOIN but not defined in schema",
284 table_name
285 ),
286 })?;
287
288 map.insert(table_name, table);
289 if let Some(a) = alias {
290 map.insert(a, table);
291 }
292 Ok(())
293}
294
295pub fn resolve_multi_table_select_column(
299 expr: &str,
300 alias_map: &AliasMap<'_>,
301 source_file: &str,
302) -> Result<ColumnDef> {
303 let trimmed = expr.trim();
304
305 let (lhs, alias) = split_as_alias(trimmed);
307
308 let (qualifier, col_name) = lhs.split_once('.').ok_or_else(|| SqlcxError::ParseError {
309 file: source_file.to_string(),
310 message: format!(
311 "multi-table resolver requires qualified columns, got `{}`",
312 trimmed
313 ),
314 })?;
315
316 let qualifier = qualifier.trim();
317 let col_name = col_name.trim();
318
319 let table = *alias_map
320 .lookup(qualifier)
321 .ok_or_else(|| SqlcxError::ParseError {
322 file: source_file.to_string(),
323 message: format!(
324 "unknown table qualifier `{}` in expression `{}` — not in FROM/JOIN clause",
325 qualifier, trimmed
326 ),
327 })?;
328
329 let column = table
330 .columns
331 .iter()
332 .find(|c| c.name.eq_ignore_ascii_case(col_name))
333 .ok_or_else(|| SqlcxError::ParseError {
334 file: source_file.to_string(),
335 message: format!("column `{}` not found on table `{}`", col_name, table.name),
336 })?;
337
338 Ok(ColumnDef {
339 name: column.name.clone(),
340 alias: alias.map(|a| a.to_string()),
341 source_table: Some(table.name.clone()),
342 sql_type: column.sql_type.clone(),
343 nullable: column.nullable,
344 has_default: column.has_default,
345 })
346}
347
348fn split_as_alias(expr: &str) -> (&str, Option<&str>) {
349 if let Some(m) = AS_SEP_RE.find_iter(expr).last() {
353 let lhs = &expr[..m.start()];
354 let alias = expr[m.end()..].trim();
355 if !alias.is_empty() && alias.chars().all(|c| c.is_alphanumeric() || c == '_') {
356 return (lhs.trim(), Some(alias));
357 }
358 }
359 (expr.trim(), None)
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365 use crate::ir::{SqlType, SqlTypeCategory};
366
367 fn col(name: &str) -> ColumnDef {
368 ColumnDef {
369 name: name.to_string(),
370 alias: None,
371 source_table: None,
372 sql_type: SqlType {
373 raw: "integer".to_string(),
374 normalized: "integer".to_string(),
375 category: SqlTypeCategory::Number,
376 element_type: None,
377 enum_name: None,
378 enum_values: None,
379 json_shape: None,
380 },
381 nullable: false,
382 has_default: false,
383 }
384 }
385
386 fn table(name: &str, cols: &[&str]) -> TableDef {
387 TableDef {
388 name: name.to_string(),
389 columns: cols.iter().map(|c| col(c)).collect(),
390 primary_key: vec![],
391 unique_constraints: vec![],
392 }
393 }
394
395 #[test]
396 fn parse_join_clauses_single_table_no_alias() {
397 let tables = vec![table("users", &["id", "name"])];
398 let map = parse_join_clauses("SELECT id FROM users", &tables, "q.sql").unwrap();
399 assert_eq!(map.len(), 1);
400 assert!(map.lookup("users").is_some());
401 }
402
403 #[test]
404 fn parse_join_clauses_single_table_with_alias() {
405 let tables = vec![table("users", &["id"])];
406 let map = parse_join_clauses("SELECT u.id FROM users u", &tables, "q.sql").unwrap();
407 assert_eq!(map.len(), 2);
408 assert!(map.lookup("users").is_some());
409 assert!(map.lookup("u").is_some());
410 }
411
412 #[test]
413 fn parse_join_clauses_inner_join() {
414 let tables = vec![table("users", &["id"]), table("orgs", &["id", "slug"])];
415 let sql = "SELECT u.id, o.slug FROM users u INNER JOIN orgs o ON u.org_id = o.id";
416 let map = parse_join_clauses(sql, &tables, "q.sql").unwrap();
417 assert!(map.lookup("u").is_some());
418 assert!(map.lookup("o").is_some());
419 assert_eq!(map.lookup("u").unwrap().name, "users");
420 assert_eq!(map.lookup("o").unwrap().name, "orgs");
421 }
422
423 #[test]
424 fn parse_join_clauses_rejects_left_join() {
425 let tables = vec![table("users", &["id"]), table("orgs", &["id"])];
426 let sql = "SELECT u.id FROM users u LEFT JOIN orgs o ON u.org_id = o.id";
427 let err = parse_join_clauses(sql, &tables, "q.sql").unwrap_err();
428 assert!(err.to_string().contains("v1.1 supports INNER JOIN only"));
429 }
430
431 #[test]
432 fn parse_join_clauses_rejects_using() {
433 let tables = vec![table("users", &["id"]), table("orgs", &["id"])];
434 let sql = "SELECT * FROM users JOIN orgs USING (id)";
435 let err = parse_join_clauses(sql, &tables, "q.sql").unwrap_err();
436 assert!(err.to_string().contains("v1.1 supports INNER JOIN only"));
437 }
438
439 #[test]
440 fn parse_join_clauses_errors_on_unknown_table() {
441 let tables = vec![table("users", &["id"])];
442 let sql = "SELECT u.id FROM users u INNER JOIN ghost g ON u.x = g.x";
443 let err = parse_join_clauses(sql, &tables, "q.sql").unwrap_err();
444 assert!(err.to_string().contains("ghost"));
445 assert!(err.to_string().contains("not defined in schema"));
446 }
447
448 #[test]
449 fn resolve_multi_table_by_table_name() {
450 let tables = vec![table("users", &["id", "email"])];
451 let map = parse_join_clauses("SELECT * FROM users", &tables, "q.sql").unwrap();
452 let col = resolve_multi_table_select_column("users.email", &map, "q.sql").unwrap();
453 assert_eq!(col.name, "email");
454 assert_eq!(col.source_table.as_deref(), Some("users"));
455 assert_eq!(col.alias, None);
456 }
457
458 #[test]
459 fn resolve_multi_table_by_alias() {
460 let tables = vec![table("users", &["id"]), table("orgs", &["slug"])];
461 let sql = "SELECT u.id, o.slug FROM users u INNER JOIN orgs o ON u.org_id = o.id";
462 let map = parse_join_clauses(sql, &tables, "q.sql").unwrap();
463 let col = resolve_multi_table_select_column("o.slug", &map, "q.sql").unwrap();
464 assert_eq!(col.name, "slug");
465 assert_eq!(col.source_table.as_deref(), Some("orgs"));
466 }
467
468 #[test]
469 fn resolve_multi_table_with_as_alias() {
470 let tables = vec![table("users", &["id"])];
471 let map = parse_join_clauses("SELECT * FROM users u", &tables, "q.sql").unwrap();
472 let col = resolve_multi_table_select_column("u.id AS user_id", &map, "q.sql").unwrap();
473 assert_eq!(col.name, "id");
474 assert_eq!(col.alias.as_deref(), Some("user_id"));
475 assert_eq!(col.source_table.as_deref(), Some("users"));
476 }
477
478 #[test]
479 fn resolve_multi_table_with_mixed_case_as() {
480 let tables = vec![table("users", &["id"])];
481 let map = parse_join_clauses("SELECT * FROM users u", &tables, "q.sql").unwrap();
482 for form in ["u.id As user_id", "u.id aS user_id", "u.id as user_id"] {
483 let col = resolve_multi_table_select_column(form, &map, "q.sql").unwrap();
484 assert_eq!(col.name, "id", "form={form}");
485 assert_eq!(col.alias.as_deref(), Some("user_id"), "form={form}");
486 }
487 }
488
489 #[test]
490 fn split_as_alias_does_not_panic_on_non_ascii() {
491 let (lhs, alias) = split_as_alias("u.name_İ AS user_name");
496 assert_eq!(lhs, "u.name_İ");
497 assert_eq!(alias, Some("user_name"));
498 }
499
500 #[test]
501 fn parse_join_clauses_does_not_panic_on_non_ascii_on_clause() {
502 let tables = vec![table("users", &["id"]), table("orgs", &["id"])];
503 let sql = "SELECT u.id FROM users u INNER JOIN orgs o ON u.İd = o.id";
505 let map = parse_join_clauses(sql, &tables, "q.sql").unwrap();
506 assert!(map.lookup("u").is_some());
507 assert!(map.lookup("o").is_some());
508 }
509
510 #[test]
511 fn unsupported_join_rejects_natural_inner_join() {
512 let tables = vec![table("users", &["id"]), table("orgs", &["id"])];
513 let sql = "SELECT u.id FROM users u NATURAL INNER JOIN orgs o";
514 let err = parse_join_clauses(sql, &tables, "q.sql").unwrap_err();
515 assert!(err.to_string().contains("v1.1 supports INNER JOIN only"));
516 }
517
518 #[test]
519 fn unsupported_join_rejects_left_outer_join() {
520 let tables = vec![table("users", &["id"]), table("orgs", &["id"])];
521 let sql = "SELECT u.id FROM users u LEFT OUTER JOIN orgs o ON u.id = o.id";
522 let err = parse_join_clauses(sql, &tables, "q.sql").unwrap_err();
523 assert!(err.to_string().contains("v1.1 supports INNER JOIN only"));
524 }
525
526 #[test]
527 fn has_outer_join_true_when_from_contains_join() {
528 assert!(has_outer_join(
529 "SELECT u.id FROM users u INNER JOIN orgs o ON u.org_id = o.id"
530 ));
531 }
532
533 #[test]
534 fn has_outer_join_false_when_no_from() {
535 assert!(!has_outer_join("INSERT INTO users VALUES (1, 'foo')"));
536 }
537
538 #[test]
539 fn has_outer_join_false_when_join_only_in_subquery() {
540 let sql = "SELECT id FROM users WHERE id IN (SELECT user_id FROM orgs INNER JOIN something ON orgs.id = something.org_id)";
543 assert!(!has_outer_join(sql));
544 }
545
546 #[test]
547 fn resolve_multi_table_errors_on_unknown_qualifier() {
548 let tables = vec![table("users", &["id"])];
549 let map = parse_join_clauses("SELECT * FROM users", &tables, "q.sql").unwrap();
550 let err = resolve_multi_table_select_column("orgs.id", &map, "q.sql").unwrap_err();
551 assert!(err.to_string().contains("unknown table qualifier"));
552 }
553
554 #[test]
555 fn resolve_multi_table_errors_on_unknown_column() {
556 let tables = vec![table("users", &["id"])];
557 let map = parse_join_clauses("SELECT * FROM users", &tables, "q.sql").unwrap();
558 let err = resolve_multi_table_select_column("users.ghost", &map, "q.sql").unwrap_err();
559 assert!(err.to_string().contains("column `ghost` not found"));
560 }
561
562 #[test]
563 fn rejects_unaliased_collision() {
564 let tables = vec![table("users", &["id"]), table("orgs", &["id"])];
565 let sql = "SELECT users.id, orgs.id FROM users INNER JOIN orgs ON users.id = orgs.id";
566 let err =
567 resolve_multi_table_columns("users.id, orgs.id", sql, &tables, "q.sql").unwrap_err();
568 let msg = err.to_string();
569 assert!(msg.contains("produce the same field name"));
570 assert!(msg.contains("AS"));
571 assert!(msg.contains("users_id"));
572 }
573
574 #[test]
575 fn accepts_colliding_columns_with_aliases() {
576 let tables = vec![table("users", &["id"]), table("orgs", &["id"])];
577 let sql = "SELECT users.id AS user_id, orgs.id AS org_id FROM users INNER JOIN orgs ON users.id = orgs.id";
578 let cols = resolve_multi_table_columns(
579 "users.id AS user_id, orgs.id AS org_id",
580 sql,
581 &tables,
582 "q.sql",
583 )
584 .unwrap();
585 assert_eq!(cols.len(), 2);
586 assert_eq!(cols[0].alias.as_deref(), Some("user_id"));
587 assert_eq!(cols[1].alias.as_deref(), Some("org_id"));
588 }
589
590 #[test]
591 fn accepts_distinct_names_without_aliases() {
592 let tables = vec![table("users", &["id"]), table("orgs", &["slug"])];
593 let sql = "SELECT users.id, orgs.slug FROM users INNER JOIN orgs ON users.id = orgs.id";
594 let cols =
595 resolve_multi_table_columns("users.id, orgs.slug", sql, &tables, "q.sql").unwrap();
596 assert_eq!(cols.len(), 2);
597 }
598
599 #[test]
600 fn rejects_alias_collision_with_non_column_message() {
601 let tables = vec![table("users", &["id"]), table("orgs", &["id"])];
605 let sql = "SELECT users.id AS uid, orgs.id AS uid FROM users INNER JOIN orgs ON users.id = orgs.id";
606 let err =
607 resolve_multi_table_columns("users.id AS uid, orgs.id AS uid", sql, &tables, "q.sql")
608 .unwrap_err();
609 let msg = err.to_string();
610 assert!(msg.contains("field name `uid`"), "msg: {msg}");
611 assert!(
612 !msg.contains("users.uid"),
613 "must not reference alias as column: {msg}"
614 );
615 assert!(
616 !msg.contains("orgs.uid"),
617 "must not reference alias as column: {msg}"
618 );
619 }
620}