1use std::collections::HashMap;
2use std::sync::LazyLock;
3
4use regex::Regex;
5
6use crate::annotations::extract_annotations;
7use crate::error::Result;
8use crate::ir::{ColumnDef, EnumDef, QueryDef, SqlType, SqlTypeCategory, TableDef};
9use crate::parser::joins::{has_outer_join, resolve_multi_table_columns};
10use crate::parser::{
11 DatabaseParser, build_params, ensure_supported_select_expr, make_unknown_column,
12 split_column_defs, split_query_blocks,
13};
14
15static BASE_TYPE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)^(\w+)").unwrap());
18
19static CONSTRAINT_RE: LazyLock<Regex> = LazyLock::new(|| {
20 Regex::new(r"(?i)^(PRIMARY\s+KEY|CONSTRAINT|UNIQUE|CHECK|FOREIGN\s+KEY)").unwrap()
21});
22
23static COL_NAME_RE: LazyLock<Regex> =
24 LazyLock::new(|| Regex::new(r#"^(?:`(\w+)`|"(\w+)"|\[(\w+)\]|(\w+))\s+"#).unwrap());
25
26static COL_TYPE_RE: LazyLock<Regex> =
27 LazyLock::new(|| Regex::new(r"(?i)^(\w+(?:\s*\([^)]*\))?)").unwrap());
28
29static NOT_NULL_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)\bNOT\s+NULL\b").unwrap());
30
31static DEFAULT_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)\bDEFAULT\b").unwrap());
32
33static PK_INLINE_RE: LazyLock<Regex> =
34 LazyLock::new(|| Regex::new(r"(?i)\bPRIMARY\s+KEY\b").unwrap());
35
36static UNIQUE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)\bUNIQUE\b").unwrap());
37
38static AUTO_INC_RE: LazyLock<Regex> =
39 LazyLock::new(|| Regex::new(r"(?i)\bAUTOINCREMENT\b").unwrap());
40
41static TABLE_RE: LazyLock<Regex> = LazyLock::new(|| {
42 Regex::new(r"(?is)CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?(?:`?(\w+)`?)\s*\(([\s\S]*?)\)\s*;")
43 .unwrap()
44});
45
46static TABLE_PK_RE: LazyLock<Regex> =
47 LazyLock::new(|| Regex::new(r"(?i)^PRIMARY\s+KEY\s*\(\s*([\w\s,`]+)\s*\)").unwrap());
48
49static INSERT_RE: LazyLock<Regex> = LazyLock::new(|| {
50 Regex::new(
51 r"(?i)INSERT\s+INTO\s+`?\w+`?\s*\(\s*([\w\s,`]+)\s*\)\s*VALUES\s*\(\s*([?,\s]+)\s*\)",
52 )
53 .unwrap()
54});
55
56static WHERE_PARAM_RE: LazyLock<Regex> = LazyLock::new(|| {
57 Regex::new(
58 r"(?i)(?:(\w+)\s*\(\s*(\w+)\s*\)|(\w+))\s*(?:=|!=|<>|<=?|>=?|(?:NOT\s+)?(?:I?LIKE|IN|IS))\s*\?",
59 )
60 .unwrap()
61});
62
63static FROM_TABLE_RE: LazyLock<Regex> =
64 LazyLock::new(|| Regex::new(r"(?i)(?:FROM|INTO|UPDATE)\s+`?(\w+)`?").unwrap());
65
66static SELECT_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)^\s*SELECT\b").unwrap());
67
68static SELECT_COLS_RE: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"(?i)SELECT\s+([\s\S]+?)\s+FROM\b").unwrap());
70
71static ALIAS_RE: LazyLock<Regex> =
72 LazyLock::new(|| Regex::new(r"(?i)^`?(\w+)`?\s+as\s+`?(\w+)`?$").unwrap());
73
74fn type_category(normalized: &str) -> SqlTypeCategory {
77 match normalized {
78 "integer" | "int" | "tinyint" | "smallint" | "mediumint" | "bigint" => {
79 SqlTypeCategory::Number
80 }
81 "real" | "double" | "float" | "decimal" | "numeric" => SqlTypeCategory::Number,
82 "text" | "varchar" | "char" | "character" | "clob" | "nchar" | "nvarchar" => {
83 SqlTypeCategory::String
84 }
85 "blob" => SqlTypeCategory::Binary,
86 "boolean" | "bool" => SqlTypeCategory::Boolean,
87 "datetime" | "timestamp" | "date" | "time" => SqlTypeCategory::Date,
88 "json" => SqlTypeCategory::Json,
89 _ => SqlTypeCategory::Unknown,
90 }
91}
92
93fn resolve_sql_type(raw: &str) -> SqlType {
94 let trimmed = raw.trim();
95
96 let normalized = BASE_TYPE_RE
98 .captures(trimmed)
99 .map(|c| c[1].to_lowercase())
100 .unwrap_or_else(|| trimmed.to_lowercase());
101
102 let category = type_category(&normalized);
103
104 SqlType {
105 raw: trimmed.to_string(),
106 normalized,
107 category,
108 element_type: None,
109 enum_name: None,
110 enum_values: None,
111 json_shape: None,
112 }
113}
114
115struct ParsedColumn {
118 col: ColumnDef,
119 is_pk: bool,
120 is_unique: bool,
121}
122
123fn parse_column_line(line: &str) -> Option<ParsedColumn> {
124 let line = line.trim();
125 if line.is_empty() {
126 return None;
127 }
128
129 if CONSTRAINT_RE.is_match(line) {
131 return None;
132 }
133
134 let name_cap = COL_NAME_RE.captures(line)?;
136 let col_name = name_cap
137 .get(1)
138 .or_else(|| name_cap.get(2))
139 .or_else(|| name_cap.get(3))
140 .or_else(|| name_cap.get(4))?
141 .as_str()
142 .to_lowercase();
143 let after_name = &line[name_cap[0].len()..];
144
145 let (raw_type, rest) = if let Some(cap) = COL_TYPE_RE.captures(after_name) {
147 (cap[1].to_string(), &after_name[cap[0].len()..])
148 } else {
149 ("blob".to_string(), after_name)
150 };
151
152 let is_not_null = NOT_NULL_RE.is_match(rest);
153 let has_default_kw = DEFAULT_RE.is_match(rest);
154 let is_pk = PK_INLINE_RE.is_match(rest);
155 let is_unique = UNIQUE_RE.is_match(rest);
156 let is_auto_inc = AUTO_INC_RE.is_match(rest);
157
158 let sql_type = resolve_sql_type(&raw_type);
159
160 Some(ParsedColumn {
161 col: ColumnDef {
162 name: col_name,
163 alias: None,
164 source_table: None,
165 sql_type,
166 nullable: !is_not_null,
167 has_default: has_default_kw || is_auto_inc,
168 },
169 is_pk,
170 is_unique,
171 })
172}
173
174fn parse_schema_tables(sql: &str) -> Vec<TableDef> {
175 let mut tables = Vec::new();
176
177 for cap in TABLE_RE.captures_iter(sql) {
178 let table_name = cap[1].to_lowercase();
179 let body = &cap[2];
180
181 let mut columns = Vec::new();
182 let mut primary_key: Vec<String> = Vec::new();
183 let mut unique_constraints: Vec<Vec<String>> = Vec::new();
184
185 let raw_lines: Vec<&str> = body.lines().collect();
186 let mut pending_comment = String::new();
187 let mut non_comment_buf = String::new();
188 let mut comment_map: HashMap<usize, String> = HashMap::new();
189
190 for raw_line in &raw_lines {
191 let trimmed = raw_line.trim();
192 if trimmed.starts_with("--") {
193 if !pending_comment.is_empty() {
194 pending_comment.push('\n');
195 }
196 pending_comment.push_str(trimmed);
197 } else {
198 let before = split_column_defs(&non_comment_buf)
199 .iter()
200 .filter(|d| !d.is_empty())
201 .count();
202 if !non_comment_buf.is_empty() {
203 non_comment_buf.push('\n');
204 }
205 non_comment_buf.push_str(raw_line);
206 let after = split_column_defs(&non_comment_buf)
207 .iter()
208 .filter(|d| !d.is_empty())
209 .count();
210
211 if after > before && !pending_comment.is_empty() {
212 comment_map.insert(before, pending_comment.clone());
213 pending_comment.clear();
214 } else if after == before {
215 } else {
217 pending_comment.clear();
218 }
219 }
220 }
221
222 let lines = split_column_defs(&non_comment_buf);
223
224 for (i, line) in lines.iter().enumerate() {
225 let trimmed = line.trim();
226
227 if let Some(pk_cap) = TABLE_PK_RE.captures(trimmed) {
229 for col in pk_cap[1].split(',') {
230 primary_key.push(col.trim().trim_matches('`').to_lowercase());
231 }
232 continue;
233 }
234
235 let Some(mut parsed) = parse_column_line(trimmed) else {
236 continue;
237 };
238
239 if let Some(comment) = comment_map.get(&i) {
241 let (_, ann) = extract_annotations(comment);
242 if let Some(values) = ann.enums.get(&parsed.col.name) {
243 parsed.col.sql_type.category = SqlTypeCategory::Enum;
244 parsed.col.sql_type.enum_values = Some(values.clone());
245 }
246 if let Some(shape) = ann.json_shapes.get(&parsed.col.name) {
247 parsed.col.sql_type.json_shape = Some(shape.clone());
248 }
249 }
250
251 if parsed.is_pk {
252 primary_key.push(parsed.col.name.clone());
253 }
254 if parsed.is_unique {
255 unique_constraints.push(vec![parsed.col.name.clone()]);
256 }
257 columns.push(parsed.col);
258 }
259
260 for col in &mut columns {
262 if primary_key.contains(&col.name) {
263 col.nullable = false;
264 }
265 }
266
267 tables.push(TableDef {
268 name: table_name,
269 columns,
270 primary_key,
271 unique_constraints,
272 });
273 }
274
275 tables
276}
277
278fn extract_param_indices(sql: &str) -> Vec<u32> {
281 let mut count = 0u32;
282 let mut indices = Vec::new();
283 for ch in sql.chars() {
284 if ch == '?' {
285 count += 1;
286 indices.push(count);
287 }
288 }
289 indices
290}
291
292fn infer_param_columns(sql: &str) -> HashMap<u32, String> {
293 let mut result = HashMap::new();
294
295 if let Some(cap) = INSERT_RE.captures(sql) {
297 let cols: Vec<String> = cap[1]
298 .split(',')
299 .map(|s| s.trim().trim_matches('`').to_lowercase())
300 .collect();
301 let values_str = &cap[2];
302 let mut idx = 0u32;
303 for ch in values_str.chars() {
304 if ch == '?' {
305 idx += 1;
306 if (idx as usize) <= cols.len() {
307 result.insert(idx, cols[idx as usize - 1].clone());
308 }
309 }
310 }
311 return result;
312 }
313
314 let sql_keywords: std::collections::HashSet<&str> = [
316 "not", "and", "or", "where", "set", "when", "then", "else", "case", "between", "exists",
317 "any", "all", "some", "having",
318 ]
319 .into_iter()
320 .collect();
321
322 let mut question_positions: Vec<usize> = Vec::new();
323 for (i, ch) in sql.char_indices() {
324 if ch == '?' {
325 question_positions.push(i);
326 }
327 }
328
329 for cap in WHERE_PARAM_RE.captures_iter(sql) {
330 let match_end = cap.get(0).unwrap().end();
331 let q_pos = match_end - 1;
332 if let Some(idx_0based) = question_positions.iter().position(|&p| p == q_pos) {
333 let idx = (idx_0based + 1) as u32;
334 if cap.get(1).is_some() && cap.get(2).is_some() {
335 result.insert(idx, cap[2].to_lowercase());
336 } else if let Some(m) = cap.get(3) {
337 let word = m.as_str().to_lowercase();
338 if !sql_keywords.contains(word.as_str()) {
339 result.insert(idx, word);
340 }
341 }
342 }
343 }
344
345 result
346}
347
348fn find_from_table<'a>(sql: &str, tables: &'a [TableDef]) -> Option<&'a TableDef> {
349 let cap = FROM_TABLE_RE.captures(sql)?;
350 let table_name = cap[1].to_lowercase();
351 tables.iter().find(|t| t.name == table_name)
352}
353
354fn resolve_return_columns(
355 sql: &str,
356 table: Option<&TableDef>,
357 schema_tables: &[TableDef],
358 source_file: &str,
359) -> Result<Vec<ColumnDef>> {
360 if !SELECT_RE.is_match(sql) {
361 return Ok(Vec::new());
362 }
363
364 let Some(cap) = SELECT_COLS_RE.captures(sql) else {
365 return Ok(Vec::new());
366 };
367 let cols_part = cap[1].trim();
368
369 if has_outer_join(sql) {
373 return resolve_multi_table_columns(cols_part, sql, schema_tables, source_file);
374 }
375
376 if cols_part == "*" {
377 return Ok(table.map(|t| t.columns.clone()).unwrap_or_default());
378 }
379
380 let Some(table) = table else {
381 return Ok(Vec::new());
382 };
383
384 let col_names: Vec<&str> = cols_part.split(',').map(|s| s.trim()).collect();
385
386 col_names
387 .iter()
388 .map(|&col_expr| -> Result<ColumnDef> {
389 ensure_supported_select_expr(col_expr, source_file)?;
390 let expr_lower = col_expr.to_lowercase();
391 if let Some(alias_cap) = ALIAS_RE.captures(&expr_lower) {
392 let actual = &alias_cap[1];
393 let alias = alias_cap[2].to_string();
394 Ok(table
395 .columns
396 .iter()
397 .find(|c| c.name == actual)
398 .map(|c| {
399 let mut col = c.clone();
400 col.alias = Some(alias);
401 col
402 })
403 .unwrap_or_else(|| make_unknown_column(actual)))
404 } else {
405 let name = expr_lower.trim_matches('`');
406 Ok(table
407 .columns
408 .iter()
409 .find(|c| c.name == name)
410 .cloned()
411 .unwrap_or_else(|| make_unknown_column(name)))
412 }
413 })
414 .collect()
415}
416
417pub struct SqliteParser;
420
421impl SqliteParser {
422 pub fn new() -> Self {
423 Self
424 }
425}
426
427impl Default for SqliteParser {
428 fn default() -> Self {
429 Self::new()
430 }
431}
432
433impl DatabaseParser for SqliteParser {
434 fn parse_schema(&self, sql: &str) -> Result<(Vec<TableDef>, Vec<EnumDef>)> {
435 let tables = parse_schema_tables(sql);
437 Ok((tables, Vec::new()))
438 }
439
440 fn parse_queries(
441 &self,
442 sql: &str,
443 tables: &[TableDef],
444 enums: &[EnumDef],
445 source_file: &str,
446 ) -> Result<Vec<QueryDef>> {
447 let _ = enums;
448 let blocks = split_query_blocks(sql);
449 let mut queries = Vec::new();
450
451 for block in blocks {
452 let table = find_from_table(&block.sql, tables);
453 let param_indices = extract_param_indices(&block.sql);
454 let inferred_cols = infer_param_columns(&block.sql);
455 let params = build_params(&block.comments, table, param_indices, inferred_cols);
456 let returns = resolve_return_columns(&block.sql, table, tables, source_file)?;
457
458 let clean_sql = block
459 .sql
460 .trim_end()
461 .trim_end_matches(';')
462 .trim()
463 .to_string();
464
465 queries.push(QueryDef {
466 name: block.name,
467 command: block.command,
468 sql: clean_sql,
469 params,
470 returns,
471 source_file: source_file.to_string(),
472 });
473 }
474
475 Ok(queries)
476 }
477}
478
479#[cfg(test)]
482mod tests {
483 use super::*;
484 use crate::ir::{QueryCommand, SqlTypeCategory};
485 use crate::parser::DatabaseParser;
486
487 const SCHEMA_SQL: &str = include_str!("../../../../tests/fixtures/sqlite_schema.sql");
488 const QUERIES_SQL: &str = include_str!("../../../../tests/fixtures/sqlite_queries/users.sql");
489
490 #[test]
491 fn parses_users_table() {
492 let parser = SqliteParser::new();
493 let (tables, _) = parser.parse_schema(SCHEMA_SQL).unwrap();
494 let users = tables.iter().find(|t| t.name == "users").unwrap();
495 assert_eq!(users.columns.len(), 10);
496 assert_eq!(users.primary_key, vec!["id"]);
497 let id = &users.columns[0];
498 assert_eq!(id.sql_type.category, SqlTypeCategory::Number);
499 assert!(id.has_default); }
501
502 #[test]
503 fn parses_text_as_string() {
504 let parser = SqliteParser::new();
505 let (tables, _) = parser.parse_schema(SCHEMA_SQL).unwrap();
506 let users = tables.iter().find(|t| t.name == "users").unwrap();
507 let name = users.columns.iter().find(|c| c.name == "name").unwrap();
508 assert_eq!(name.sql_type.category, SqlTypeCategory::String);
509 }
510
511 #[test]
512 fn parses_boolean_convention() {
513 let parser = SqliteParser::new();
514 let (tables, _) = parser.parse_schema(SCHEMA_SQL).unwrap();
515 let users = tables.iter().find(|t| t.name == "users").unwrap();
516 let active = users
517 .columns
518 .iter()
519 .find(|c| c.name == "is_active")
520 .unwrap();
521 assert_eq!(active.sql_type.category, SqlTypeCategory::Boolean);
522 }
523
524 #[test]
525 fn parses_datetime_convention() {
526 let parser = SqliteParser::new();
527 let (tables, _) = parser.parse_schema(SCHEMA_SQL).unwrap();
528 let users = tables.iter().find(|t| t.name == "users").unwrap();
529 let created = users
530 .columns
531 .iter()
532 .find(|c| c.name == "created_at")
533 .unwrap();
534 assert_eq!(created.sql_type.category, SqlTypeCategory::Date);
535 }
536
537 #[test]
538 fn parses_blob_as_binary() {
539 let parser = SqliteParser::new();
540 let (tables, _) = parser.parse_schema(SCHEMA_SQL).unwrap();
541 let users = tables.iter().find(|t| t.name == "users").unwrap();
542 let avatar = users.columns.iter().find(|c| c.name == "avatar").unwrap();
543 assert_eq!(avatar.sql_type.category, SqlTypeCategory::Binary);
544 }
545
546 #[test]
547 fn parses_nullable_columns() {
548 let parser = SqliteParser::new();
549 let (tables, _) = parser.parse_schema(SCHEMA_SQL).unwrap();
550 let users = tables.iter().find(|t| t.name == "users").unwrap();
551 let bio = users.columns.iter().find(|c| c.name == "bio").unwrap();
552 assert!(bio.nullable);
553 let name = users.columns.iter().find(|c| c.name == "name").unwrap();
554 assert!(!name.nullable);
555 }
556
557 #[test]
558 fn parses_posts_table() {
559 let parser = SqliteParser::new();
560 let (tables, _) = parser.parse_schema(SCHEMA_SQL).unwrap();
561 let posts = tables.iter().find(|t| t.name == "posts").unwrap();
562 assert_eq!(posts.columns.len(), 6);
563 }
564
565 #[test]
566 fn no_enums_in_sqlite() {
567 let parser = SqliteParser::new();
568 let (_, enums) = parser.parse_schema(SCHEMA_SQL).unwrap();
569 assert!(enums.is_empty());
570 }
571
572 #[test]
573 fn parses_get_user_query() {
574 let parser = SqliteParser::new();
575 let (tables, enums) = parser.parse_schema(SCHEMA_SQL).unwrap();
576 let queries = parser
577 .parse_queries(QUERIES_SQL, &tables, &enums, "sqlite_queries/users.sql")
578 .unwrap();
579 let get_user = queries.iter().find(|q| q.name == "GetUser").unwrap();
580 assert_eq!(get_user.command, QueryCommand::One);
581 assert_eq!(get_user.params.len(), 1);
582 assert_eq!(get_user.params[0].name, "id");
583 assert_eq!(get_user.returns.len(), 10);
584 }
585
586 #[test]
587 fn parses_insert_params() {
588 let parser = SqliteParser::new();
589 let (tables, enums) = parser.parse_schema(SCHEMA_SQL).unwrap();
590 let queries = parser
591 .parse_queries(QUERIES_SQL, &tables, &enums, "sqlite_queries/users.sql")
592 .unwrap();
593 let create = queries.iter().find(|q| q.name == "CreateUser").unwrap();
594 assert_eq!(create.command, QueryCommand::Exec);
595 assert_eq!(create.params.len(), 3);
596 }
597
598 #[test]
599 fn parses_param_overrides() {
600 let parser = SqliteParser::new();
601 let (tables, enums) = parser.parse_schema(SCHEMA_SQL).unwrap();
602 let queries = parser
603 .parse_queries(QUERIES_SQL, &tables, &enums, "sqlite_queries/users.sql")
604 .unwrap();
605 let dr = queries
606 .iter()
607 .find(|q| q.name == "ListUsersByDateRange")
608 .unwrap();
609 assert_eq!(dr.params[0].name, "start_date");
610 assert_eq!(dr.params[1].name, "end_date");
611 }
612
613 fn join_schema() -> &'static str {
616 "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL, org_id INTEGER NOT NULL);\n\
617 CREATE TABLE orgs (id INTEGER PRIMARY KEY, slug TEXT NOT NULL);"
618 }
619
620 #[test]
621 fn inner_join_resolves_qualified_columns() {
622 let parser = SqliteParser::new();
623 let (tables, enums) = parser.parse_schema(join_schema()).unwrap();
624 let sql = "-- name: GetUserWithOrg :one\nSELECT users.name, orgs.slug FROM users INNER JOIN orgs ON users.org_id = orgs.id WHERE users.id = ?;";
625 let queries = parser.parse_queries(sql, &tables, &enums, "q.sql").unwrap();
626 assert_eq!(queries[0].returns.len(), 2);
627 assert_eq!(queries[0].returns[0].source_table.as_deref(), Some("users"));
628 assert_eq!(queries[0].returns[1].source_table.as_deref(), Some("orgs"));
629 }
630
631 #[test]
632 fn inner_join_rejects_select_star() {
633 let parser = SqliteParser::new();
634 let (tables, enums) = parser.parse_schema(join_schema()).unwrap();
635 let sql =
636 "-- name: All :many\nSELECT * FROM users INNER JOIN orgs ON users.org_id = orgs.id;";
637 let err = parser
638 .parse_queries(sql, &tables, &enums, "q.sql")
639 .unwrap_err();
640 assert!(
641 err.to_string()
642 .contains("SELECT * across multi-table JOINs")
643 );
644 }
645
646 #[test]
647 fn left_join_rejected_with_v12_pointer() {
648 let parser = SqliteParser::new();
649 let (tables, enums) = parser.parse_schema(join_schema()).unwrap();
650 let sql = "-- name: WithLeft :many\nSELECT users.id FROM users LEFT JOIN orgs ON users.org_id = orgs.id;";
651 let err = parser
652 .parse_queries(sql, &tables, &enums, "q.sql")
653 .unwrap_err();
654 assert!(err.to_string().contains("v1.1 supports INNER JOIN only"));
655 }
656}