use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum Statement {
Null,
Assignment { target: String, rhs_text: String },
If {
arms: Vec<IfArm>,
else_body_text: Option<String>,
},
BareLoop { body_text: String },
ForLoop {
iterator: String,
range_text: String,
body_text: String,
},
WhileLoop {
cond_text: String,
body_text: String,
},
Raise { exception: Option<String> },
Return { value_text: Option<String> },
Exit { when_text: Option<String> },
ExecuteImmediate {
sql_literal: String,
has_bind_variables: bool,
},
Sql { verb: SqlVerb, raw_text: String },
NestedBlock { body_text: String },
TransactionControl { verb: String },
Unrecognized {
raw_text: String,
unknown_reason: UnknownStatementReason,
},
}
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct IfArm {
pub cond_text: String,
pub body_text: String,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SqlVerb {
Select,
Insert,
Update,
Delete,
Merge,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum UnknownStatementReason {
UnrecognizedKeyword,
UnterminatedBlock,
NonStatement,
}
#[must_use]
pub fn lower_statement_body(source: &str) -> Vec<Statement> {
let mut out: Vec<Statement> = Vec::new();
for chunk in split_statements(source) {
let stripped = strip_comments(&chunk.text).trim().to_string();
if stripped.is_empty() {
continue;
}
if chunk.unterminated {
out.push(Statement::Unrecognized {
raw_text: stripped,
unknown_reason: UnknownStatementReason::UnterminatedBlock,
});
continue;
}
out.push(classify(&stripped));
}
out
}
struct StatementChunk {
text: String,
unterminated: bool,
}
fn split_statements(source: &str) -> Vec<StatementChunk> {
let mut out: Vec<StatementChunk> = Vec::new();
let mut depth: i32 = 0;
let mut buffer = String::new();
let upper_chars: Vec<char> = source.chars().map(|c| c.to_ascii_uppercase()).collect();
let mut i = 0;
let chars: Vec<char> = source.chars().collect();
while i < chars.len() {
if let Some(end) = opaque_span_end(&chars, i) {
for &ch in &chars[i..end] {
buffer.push(ch);
}
i = end;
continue;
}
let c = chars[i];
if let Some(consumed) = consume_end_keyword(&upper_chars, i) {
depth = (depth - 1).max(0);
for &ch in chars.iter().skip(i).take(consumed) {
buffer.push(ch);
}
i += consumed;
continue;
}
if let Some(consumed) =
consume_any_keyword(&upper_chars, i, &["BEGIN", "IF", "LOOP", "CASE"])
{
depth += 1;
for &ch in chars.iter().skip(i).take(consumed) {
buffer.push(ch);
}
i += consumed;
continue;
}
buffer.push(c);
if c == ';' && depth == 0 {
out.push(StatementChunk {
text: std::mem::take(&mut buffer),
unterminated: false,
});
}
i += 1;
}
if !buffer.trim().is_empty() {
out.push(StatementChunk {
text: buffer,
unterminated: depth > 0,
});
}
out
}
fn string_literal_end(chars: &[char], i: usize) -> Option<usize> {
let len = chars.len();
if i >= len {
return None;
}
let prev_is_ident = i > 0 && (chars[i - 1].is_ascii_alphanumeric() || chars[i - 1] == '_');
let q_at = if chars[i].eq_ignore_ascii_case(&'n') && i + 1 < len {
i + 1
} else {
i
};
if !prev_is_ident
&& chars[q_at].eq_ignore_ascii_case(&'q')
&& q_at + 2 < len
&& chars[q_at + 1] == '\''
{
let open = chars[q_at + 2];
let close = match open {
'[' => ']',
'(' => ')',
'{' => '}',
'<' => '>',
other => other,
};
let mut j = q_at + 3;
while j + 1 < len {
if chars[j] == close && chars[j + 1] == '\'' {
return Some(j + 2);
}
j += 1;
}
return Some(len); }
if chars[i] == '\'' {
let mut j = i + 1;
while j < len {
if chars[j] == '\'' {
if j + 1 < len && chars[j + 1] == '\'' {
j += 2; } else {
return Some(j + 1);
}
} else {
j += 1;
}
}
return Some(len); }
None
}
fn opaque_span_end(chars: &[char], i: usize) -> Option<usize> {
if let Some(end) = string_literal_end(chars, i) {
return Some(end);
}
let len = chars.len();
if chars[i] == '-' && chars.get(i + 1) == Some(&'-') {
let mut j = i + 2;
while j < len && chars[j] != '\n' {
j += 1;
}
if j < len {
j += 1; }
return Some(j);
}
if chars[i] == '/' && chars.get(i + 1) == Some(&'*') {
let mut j = i + 2;
while j < len {
if chars[j] == '*' && chars.get(j + 1) == Some(&'/') {
return Some(j + 2);
}
j += 1;
}
return Some(len); }
None
}
fn consume_end_keyword(chars: &[char], pos: usize) -> Option<usize> {
let end = consume_keyword(chars, pos, "END")?;
let mut j = pos + end;
while j < chars.len() && chars[j].is_whitespace() {
j += 1;
}
for sub in ["IF", "LOOP", "CASE"] {
if let Some(sub_len) = consume_keyword(chars, j, sub) {
return Some(j + sub_len - pos);
}
}
Some(end)
}
fn consume_any_keyword(chars: &[char], pos: usize, keywords: &[&str]) -> Option<usize> {
keywords
.iter()
.find_map(|kw| consume_keyword(chars, pos, kw))
}
fn consume_keyword(chars: &[char], pos: usize, keyword: &str) -> Option<usize> {
let kw: Vec<char> = keyword.chars().collect();
if pos + kw.len() > chars.len() {
return None;
}
for (j, k) in kw.iter().enumerate() {
if chars[pos + j] != *k {
return None;
}
}
if pos > 0 {
let prev = chars[pos - 1];
if prev.is_ascii_alphanumeric() || prev == '_' || prev == '$' || prev == '#' {
return None;
}
}
if pos + kw.len() < chars.len() {
let next = chars[pos + kw.len()];
if next.is_ascii_alphanumeric() || next == '_' || next == '$' || next == '#' {
return None;
}
}
Some(kw.len())
}
fn strip_comments(s: &str) -> String {
let chars: Vec<char> = s.chars().collect();
let mut out = String::with_capacity(s.len());
let mut i = 0;
while i < chars.len() {
if let Some(end) = string_literal_end(&chars, i) {
for &ch in &chars[i..end] {
out.push(ch);
}
i = end;
continue;
}
let c = chars[i];
if c == '-' && chars.get(i + 1) == Some(&'-') {
i += 2;
while i < chars.len() && chars[i] != '\n' {
i += 1;
}
if i < chars.len() {
out.push('\n');
i += 1;
}
continue;
}
if c == '/' && chars.get(i + 1) == Some(&'*') {
i += 2;
while i < chars.len() {
if chars[i] == '*' && chars.get(i + 1) == Some(&'/') {
i += 2;
break;
}
i += 1;
}
out.push(' ');
continue;
}
out.push(c);
i += 1;
}
out
}
fn starts_with_keyword(trimmed: &str, keyword: &str) -> bool {
let Some(rest) = trimmed.strip_prefix(keyword) else {
return false;
};
match rest.chars().next() {
None => true,
Some(c) => !(c.is_ascii_alphanumeric() || c == '_' || c == '$' || c == '#'),
}
}
fn execute_immediate_body_offset(text: &str) -> Option<usize> {
let b = text.as_bytes();
let is_kw_byte = |c: u8| c.is_ascii_alphanumeric() || c == b'_' || c == b'$' || c == b'#';
let skip_ws = |mut i: usize| {
while i < b.len() && b[i].is_ascii_whitespace() {
i += 1;
}
i
};
let match_kw = |start: usize, kw: &[u8]| -> Option<usize> {
let end = start + kw.len();
if end > b.len() || !b[start..end].eq_ignore_ascii_case(kw) {
return None;
}
(end >= b.len() || !is_kw_byte(b[end])).then_some(end)
};
let start = skip_ws(0);
let after_exec = match_kw(start, b"EXECUTE")?;
let after_ws = skip_ws(after_exec);
if after_ws == after_exec {
return None; }
match_kw(after_ws, b"IMMEDIATE")
}
fn top_level_assign_pos(text: &str) -> Option<usize> {
let chars: Vec<char> = text.chars().collect();
let mut byte_off = 0usize;
let mut i = 0usize;
while i < chars.len() {
if let Some(end) = string_literal_end(&chars, i) {
for &ch in &chars[i..end] {
byte_off += ch.len_utf8();
}
i = end;
continue;
}
if chars[i] == ':' && chars.get(i + 1) == Some(&'=') {
return Some(byte_off);
}
byte_off += chars[i].len_utf8();
i += 1;
}
None
}
fn classify(text: &str) -> Statement {
let upper = text.to_ascii_uppercase();
let trimmed = upper.trim();
if let Some(body_off) = execute_immediate_body_offset(text) {
let after = &text[body_off..];
let sql_literal = extract_quoted(after).unwrap_or_default();
let has_bind_variables = after.to_ascii_uppercase().contains("USING ");
return Statement::ExecuteImmediate {
sql_literal,
has_bind_variables,
};
}
if starts_with_keyword(trimmed, "IF") {
return classify_if(text);
}
if starts_with_keyword(trimmed, "LOOP")
|| starts_with_keyword(trimmed, "FOR")
|| starts_with_keyword(trimmed, "WHILE")
{
return classify_loop(text);
}
if starts_with_keyword(trimmed, "BEGIN") || starts_with_keyword(trimmed, "DECLARE") {
return Statement::NestedBlock {
body_text: text.to_string(),
};
}
if let Some(pos) = top_level_assign_pos(text) {
let lhs = &text[..pos];
let rhs = &text[pos + 2..];
return Statement::Assignment {
target: lhs.trim().to_string(),
rhs_text: rhs.trim().trim_end_matches(';').trim().to_string(),
};
}
if starts_with_keyword(trimmed, "NULL") {
return Statement::Null;
}
if starts_with_keyword(trimmed, "COMMIT")
|| starts_with_keyword(trimmed, "ROLLBACK")
|| starts_with_keyword(trimmed, "SAVEPOINT")
{
let verb = trimmed.split_whitespace().next().unwrap_or("").to_string();
return Statement::TransactionControl { verb };
}
if starts_with_keyword(trimmed, "RAISE") {
let rest = text[5..].trim().trim_end_matches(';').trim();
let exception = if rest.is_empty() {
None
} else {
Some(rest.to_string())
};
return Statement::Raise { exception };
}
if starts_with_keyword(trimmed, "RETURN") {
let rest = text[6..].trim().trim_end_matches(';').trim();
let value_text = if rest.is_empty() {
None
} else {
Some(rest.to_string())
};
return Statement::Return { value_text };
}
if starts_with_keyword(trimmed, "EXIT") {
let rest = text[4..].trim().trim_end_matches(';').trim();
let when_text = rest
.strip_prefix("WHEN")
.or_else(|| rest.strip_prefix("when"))
.map(|s| s.trim().to_string());
return Statement::Exit { when_text };
}
for verb in ["SELECT", "INSERT", "UPDATE", "DELETE", "MERGE"] {
if starts_with_keyword(trimmed, verb) {
let kind = match verb {
"SELECT" => SqlVerb::Select,
"INSERT" => SqlVerb::Insert,
"UPDATE" => SqlVerb::Update,
"DELETE" => SqlVerb::Delete,
"MERGE" => SqlVerb::Merge,
_ => unreachable!(),
};
return Statement::Sql {
verb: kind,
raw_text: text.to_string(),
};
}
}
Statement::Unrecognized {
raw_text: text.to_string(),
unknown_reason: UnknownStatementReason::UnrecognizedKeyword,
}
}
fn classify_if(text: &str) -> Statement {
let upper = text.to_ascii_uppercase();
let end_pos = upper.rfind("END IF").unwrap_or(upper.len());
let body = &text[..end_pos];
let after_if = body.get(2..).unwrap_or("").trim_start();
let mut arms: Vec<IfArm> = Vec::new();
let mut else_body_text: Option<String> = None;
let mut cond_start = 0usize;
while let Some(then_pos) = find_keyword(after_if, "THEN", cond_start) {
let cond_text = after_if[cond_start..then_pos].trim().to_string();
let body_start = then_pos + 4;
let next_arm = find_any_keyword(after_if, &["ELSIF", "ELSE"], body_start);
let body_end = next_arm.map_or(after_if.len(), |(p, _)| p);
let body_text = after_if
.get(body_start..body_end)
.unwrap_or("")
.trim()
.to_string();
arms.push(IfArm {
cond_text,
body_text,
});
match next_arm {
Some((pos, "ELSIF")) => cond_start = pos + 5,
Some((pos, _)) => {
let else_text = after_if.get(pos + 4..).unwrap_or("").trim().to_string();
else_body_text = Some(else_text);
break;
}
None => break,
}
}
Statement::If {
arms,
else_body_text,
}
}
fn classify_loop(text: &str) -> Statement {
let upper = text.to_ascii_uppercase();
if upper.starts_with("FOR ") {
let in_pos = find_keyword(text, "IN", 4);
let loop_pos = find_keyword(text, "LOOP", in_pos.unwrap_or(0));
let end_loop = upper.rfind("END LOOP").unwrap_or(text.len());
if let (Some(in_p), Some(loop_p)) = (in_pos, loop_pos) {
let iterator = text.get(4..in_p).unwrap_or("").trim().to_string();
let range_text = text.get(in_p + 2..loop_p).unwrap_or("").trim().to_string();
let body = text
.get(loop_p + 4..end_loop)
.unwrap_or("")
.trim()
.to_string();
return Statement::ForLoop {
iterator,
range_text,
body_text: body,
};
}
}
if upper.starts_with("WHILE ") {
let loop_pos = find_keyword(text, "LOOP", 6);
let end_loop = upper.rfind("END LOOP").unwrap_or(text.len());
if let Some(loop_p) = loop_pos {
let cond_text = text.get(6..loop_p).unwrap_or("").trim().to_string();
let body = text
.get(loop_p + 4..end_loop)
.unwrap_or("")
.trim()
.to_string();
return Statement::WhileLoop {
cond_text,
body_text: body,
};
}
}
let upper = text.to_ascii_uppercase();
let body = if let Some(end_pos) = upper.rfind("END LOOP") {
text.get(4..end_pos).unwrap_or("").trim().to_string()
} else {
text.trim_start_matches("LOOP")
.trim_start_matches("loop")
.trim()
.to_string()
};
Statement::BareLoop { body_text: body }
}
fn extract_quoted(text: &str) -> Option<String> {
let mut iter = text.chars().peekable();
while let Some(c) = iter.next() {
if c == '\'' {
let mut buf = String::new();
while let Some(nc) = iter.next() {
if nc == '\'' {
if iter.peek() == Some(&'\'') {
iter.next();
buf.push('\'');
continue;
}
return Some(buf);
}
buf.push(nc);
}
return Some(buf);
}
}
None
}
fn find_keyword(text: &str, keyword: &str, start: usize) -> Option<usize> {
let upper = text.to_ascii_uppercase();
let kw_upper = keyword.to_ascii_uppercase();
let mut search_from = upper
.char_indices()
.map(|(i, _)| i)
.find(|&i| i >= start)
.unwrap_or(upper.len());
while search_from <= upper.len() {
let Some(rel) = upper[search_from..].find(&kw_upper) else {
break;
};
let abs = search_from + rel;
if is_word_boundary(&upper, abs, abs + kw_upper.len()) {
return Some(abs);
}
search_from = abs + upper[abs..].chars().next().map_or(1, char::len_utf8);
}
None
}
fn find_any_keyword(text: &str, keywords: &[&str], start: usize) -> Option<(usize, &'static str)> {
static ELSIF: &str = "ELSIF";
static ELSE: &str = "ELSE";
let upper = text.to_ascii_uppercase();
let mut best: Option<(usize, &'static str)> = None;
for kw in keywords {
let kw_upper = kw.to_ascii_uppercase();
let mut search_from = upper
.char_indices()
.map(|(i, _)| i)
.find(|&i| i >= start)
.unwrap_or(upper.len());
while search_from <= upper.len() {
let Some(rel) = upper[search_from..].find(&kw_upper) else {
break;
};
let abs = search_from + rel;
if is_word_boundary(&upper, abs, abs + kw_upper.len()) {
let tag: &'static str = match kw_upper.as_str() {
"ELSIF" => ELSIF,
"ELSE" => ELSE,
_ => continue,
};
if best.is_none_or(|(b, _)| abs < b) {
best = Some((abs, tag));
}
break;
}
search_from = abs + upper[abs..].chars().next().map_or(1, char::len_utf8);
}
}
best
}
fn is_word_boundary(text: &str, start: usize, end: usize) -> bool {
let bytes = text.as_bytes();
let prev_ok = start == 0 || {
let b = bytes[start - 1];
!(b.is_ascii_alphanumeric() || b == b'_' || b == b'$' || b == b'#')
};
let next_ok = end >= bytes.len() || {
let b = bytes[end];
!(b.is_ascii_alphanumeric() || b == b'_' || b == b'$' || b == b'#')
};
prev_ok && next_ok
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn null_statement_classified() {
let r = lower_statement_body("NULL;");
assert_eq!(r.len(), 1);
assert_eq!(r[0], Statement::Null);
}
#[test]
fn assignment_captures_target_and_rhs() {
let r = lower_statement_body("v_x := 42;");
match &r[0] {
Statement::Assignment { target, rhs_text } => {
assert_eq!(target, "v_x");
assert_eq!(rhs_text, "42");
}
other => panic!("expected Assignment, got {other:?}"),
}
}
#[test]
fn verb_prefixed_assignment_is_an_assignment_not_a_keyword() {
for (input, want_target, want_rhs) in [
("return_val := p_user;", "return_val", "p_user"),
("null_count := 5;", "null_count", "5"),
("update_x := p_user;", "update_x", "p_user"),
("delete_flag := 1;", "delete_flag", "1"),
("commit_count := 1;", "commit_count", "1"),
("exit_code := 0;", "exit_code", "0"),
("raise_amount := 100;", "raise_amount", "100"),
("select_idx := 7;", "select_idx", "7"),
("merge_key := p_user;", "merge_key", "p_user"),
("insert_seq := 3;", "insert_seq", "3"),
("savepoint_id := 2;", "savepoint_id", "2"),
("rollback_count := 9;", "rollback_count", "9"),
] {
let r = lower_statement_body(input);
match &r[0] {
Statement::Assignment { target, rhs_text } => {
assert_eq!(target, want_target, "target for {input:?}");
assert_eq!(rhs_text, want_rhs, "rhs for {input:?}");
}
other => panic!("expected Assignment for {input:?}, got {other:?}"),
}
}
}
#[test]
fn real_keyword_statements_still_classify() {
assert_eq!(lower_statement_body("NULL;")[0], Statement::Null);
assert!(matches!(
lower_statement_body("RETURN 1;")[0],
Statement::Return { .. }
));
assert!(matches!(
lower_statement_body("DELETE FROM t WHERE id = 1;")[0],
Statement::Sql {
verb: SqlVerb::Delete,
..
}
));
assert!(matches!(
lower_statement_body("COMMIT;")[0],
Statement::TransactionControl { .. }
));
}
#[test]
fn raise_with_named_exception() {
let r = lower_statement_body("RAISE no_data_found;");
assert!(
matches!(&r[0], Statement::Raise { exception } if exception.as_deref() == Some("no_data_found"))
);
}
#[test]
fn bare_raise_classified() {
let r = lower_statement_body("RAISE;");
assert!(matches!(&r[0], Statement::Raise { exception: None }));
}
#[test]
fn return_with_value() {
let r = lower_statement_body("RETURN v_sum;");
assert!(
matches!(&r[0], Statement::Return { value_text } if value_text.as_deref() == Some("v_sum"))
);
}
#[test]
fn return_without_value() {
let r = lower_statement_body("RETURN;");
assert!(matches!(&r[0], Statement::Return { value_text: None }));
}
#[test]
fn exit_when_cond() {
let r = lower_statement_body("EXIT WHEN i > 10;");
assert!(
matches!(&r[0], Statement::Exit { when_text } if when_text.as_deref() == Some("i > 10"))
);
}
#[test]
fn execute_immediate_with_binds_detected() {
let r = lower_statement_body("EXECUTE IMMEDIATE 'UPDATE t SET a = :1' USING v_a;");
match &r[0] {
Statement::ExecuteImmediate {
sql_literal,
has_bind_variables,
} => {
assert_eq!(sql_literal, "UPDATE t SET a = :1");
assert!(*has_bind_variables);
}
other => panic!("expected ExecuteImmediate, got {other:?}"),
}
}
#[test]
fn execute_immediate_honors_doubled_quote_escape() {
let r = lower_statement_body("EXECUTE IMMEDIATE 'SELECT ''x'' FROM dual';");
match &r[0] {
Statement::ExecuteImmediate { sql_literal, .. } => {
assert_eq!(sql_literal, "SELECT 'x' FROM dual");
}
other => panic!("expected ExecuteImmediate, got {other:?}"),
}
}
#[test]
fn execute_immediate_without_binds() {
let r = lower_statement_body("EXECUTE IMMEDIATE 'ALTER SESSION SET …';");
if let Statement::ExecuteImmediate {
has_bind_variables, ..
} = &r[0]
{
assert!(!has_bind_variables);
} else {
panic!("{r:?}");
}
}
#[test]
fn execute_immediate_recognised_with_non_canonical_whitespace() {
for src in [
"EXECUTE IMMEDIATE 'DROP TABLE t';", "EXECUTE\tIMMEDIATE 'DROP TABLE t';", "EXECUTE\nIMMEDIATE 'DROP TABLE t';", "EXECUTE/**/IMMEDIATE 'DROP TABLE t';", ] {
let r = lower_statement_body(src);
assert!(
matches!(r.first(), Some(Statement::ExecuteImmediate { sql_literal, .. }) if sql_literal == "DROP TABLE t"),
"non-canonical EXECUTE IMMEDIATE must classify as dynamic SQL: {src:?} -> {r:?}"
);
}
assert!(
!matches!(
lower_statement_body("executable_flag := 1;").first(),
Some(Statement::ExecuteImmediate { .. })
),
"an identifier starting with EXECUTE must not match"
);
}
#[test]
fn sql_verbs_classified() {
for (verb, src) in [
("SELECT", "SELECT * INTO v_row FROM t;"),
("INSERT", "INSERT INTO t VALUES (1);"),
("UPDATE", "UPDATE t SET x = 1;"),
("DELETE", "DELETE FROM t WHERE id = 1;"),
(
"MERGE",
"MERGE INTO t USING s ON (t.id = s.id) WHEN MATCHED THEN UPDATE SET x = s.x;",
),
] {
let r = lower_statement_body(src);
assert!(matches!(&r[0], Statement::Sql { .. }), "{verb}: {r:?}");
}
}
#[test]
fn transaction_control_classified() {
for src in ["COMMIT;", "ROLLBACK;", "SAVEPOINT s1;"] {
let r = lower_statement_body(src);
assert!(
matches!(&r[0], Statement::TransactionControl { .. }),
"{src}: {r:?}"
);
}
}
#[test]
fn comment_only_chunks_dropped() {
let r = lower_statement_body("-- header\n-- still here\nNULL;");
assert_eq!(r.len(), 1);
assert!(matches!(r[0], Statement::Null));
}
#[test]
fn unrecognised_line_surfaces_with_typed_reason() {
let r = lower_statement_body("xyz_unknown_keyword;");
match &r[0] {
Statement::Unrecognized {
unknown_reason: UnknownStatementReason::UnrecognizedKeyword,
..
} => {}
other => panic!("{other:?}"),
}
}
#[test]
fn nested_block_passes_through() {
let r = lower_statement_body("BEGIN NULL; END;");
assert!(matches!(r[0], Statement::NestedBlock { .. }));
}
#[test]
fn multiple_statements_split_at_top_level_semicolons() {
let src = "v_x := 1; v_y := 2; NULL;";
let r = lower_statement_body(src);
assert_eq!(r.len(), 3);
}
#[test]
fn semicolon_inside_string_literal_is_not_a_boundary() {
let r = lower_statement_body("v_msg := 'a; b; c'; NULL;");
assert_eq!(r.len(), 2, "the assignment (with its literal) + NULL");
}
#[test]
fn block_keywords_inside_string_literal_do_not_move_depth() {
let r = lower_statement_body("v_msg := 'BEGIN x END;'; v_y := 2;");
assert_eq!(r.len(), 2);
}
#[test]
fn q_quote_with_embedded_end_and_semicolon_is_opaque() {
let r = lower_statement_body("v_sql := q'{SELECT 1; END;}'; NULL;");
assert_eq!(r.len(), 2);
}
#[test]
fn semicolon_inside_line_comment_is_not_a_boundary() {
let r = lower_statement_body("v_x := 1; -- trailing; comment; here\nNULL;");
assert_eq!(r.len(), 2);
}
#[test]
fn semicolon_inside_block_comment_is_not_a_boundary() {
let r = lower_statement_body("v_x := 1 /* a; b; c */ + 2; NULL;");
assert_eq!(r.len(), 2);
}
#[test]
fn comment_markers_inside_string_literal_are_preserved() {
let r = lower_statement_body("v_msg := 'keep -- this and /* this */ too';");
assert_eq!(r.len(), 1);
let dbg = format!("{:?}", r[0]);
assert!(
dbg.contains("keep -- this") && dbg.contains("/* this */"),
"comment-like content inside the literal must survive strip_comments: {dbg}"
);
}
#[test]
fn for_loop_captures_iterator_and_range() {
let r = lower_statement_body("FOR i IN 1..10 LOOP NULL; END LOOP;");
match &r[0] {
Statement::ForLoop {
iterator,
range_text,
..
} => {
assert_eq!(iterator, "i");
assert_eq!(range_text, "1..10");
}
other => panic!("{other:?}"),
}
}
#[test]
fn multi_statement_if_body_is_one_statement() {
let src = "IF p_flag = 1 THEN \
INSERT INTO audit_log VALUES (1); \
UPDATE accounts SET bal = 0; \
END IF;";
let r = lower_statement_body(src);
assert_eq!(r.len(), 1, "IF body must not be torn apart: {r:?}");
match &r[0] {
Statement::If { arms, .. } => {
assert_eq!(arms.len(), 1);
assert!(arms[0].body_text.to_ascii_uppercase().contains("INSERT"));
assert!(arms[0].body_text.to_ascii_uppercase().contains("UPDATE"));
}
other => panic!("expected If, got {other:?}"),
}
}
#[test]
fn multi_statement_loop_body_is_one_statement() {
let src = "FOR r IN 1..10 LOOP \
INSERT INTO dst VALUES (r); \
DELETE FROM stale WHERE id = r; \
END LOOP;";
let r = lower_statement_body(src);
assert_eq!(r.len(), 1, "LOOP body must not be torn apart: {r:?}");
match &r[0] {
Statement::ForLoop { body_text, .. } => {
assert!(body_text.to_ascii_uppercase().contains("INSERT"));
assert!(body_text.to_ascii_uppercase().contains("DELETE"));
}
other => panic!("expected ForLoop, got {other:?}"),
}
}
#[test]
fn multi_statement_bare_loop_body_is_one_statement() {
let src = "LOOP v_x := 1; v_y := 2; EXIT WHEN v_x > 5; END LOOP;";
let r = lower_statement_body(src);
assert_eq!(r.len(), 1, "bare LOOP body must not be torn apart: {r:?}");
assert!(matches!(r[0], Statement::BareLoop { .. }));
}
#[test]
fn nested_if_inside_loop_stays_one_statement() {
let src = "FOR i IN 1..3 LOOP \
IF i > 1 THEN do_a(i); ELSE do_b(i); END IF; \
log_iter(i); \
END LOOP;";
let r = lower_statement_body(src);
assert_eq!(r.len(), 1, "nested IF/LOOP must not be torn apart: {r:?}");
assert!(matches!(r[0], Statement::ForLoop { .. }));
}
#[test]
fn unterminated_if_block_degrades_with_typed_reason() {
let src = "IF a THEN foo(); bar();";
let r = lower_statement_body(src);
assert_eq!(r.len(), 1, "unterminated IF stays one chunk: {r:?}");
match &r[0] {
Statement::Unrecognized {
unknown_reason: UnknownStatementReason::UnterminatedBlock,
..
} => {}
other => panic!("expected Unrecognized/UnterminatedBlock, got {other:?}"),
}
}
#[test]
fn multi_elsif_if_has_no_phantom_arms() {
let src = "IF a THEN NULL ELSIF b THEN NULL ELSIF c THEN NULL ELSE NULL END IF";
let r = lower_statement_body(src);
assert_eq!(r.len(), 1);
match &r[0] {
Statement::If {
arms,
else_body_text,
} => {
let conds: Vec<&str> = arms.iter().map(|a| a.cond_text.as_str()).collect();
assert_eq!(
conds,
vec!["a", "b", "c"],
"expected exactly 3 arms a/b/c, got {arms:?}"
);
assert_eq!(else_body_text.as_deref(), Some("NULL"));
}
other => panic!("expected If, got {other:?}"),
}
}
#[test]
fn multi_elsif_if_keeps_bodies_with_conditions() {
let src = "IF a THEN s1; ELSIF b THEN s2; ELSIF c THEN s3; ELSE s4; END IF;";
let r = lower_statement_body(src);
assert_eq!(r.len(), 1);
match &r[0] {
Statement::If { arms, .. } => {
assert_eq!(arms.len(), 3);
assert_eq!(arms[0].cond_text, "a");
assert_eq!(arms[0].body_text, "s1;");
assert_eq!(arms[1].cond_text, "b");
assert_eq!(arms[1].body_text, "s2;");
assert_eq!(arms[2].cond_text, "c");
assert_eq!(arms[2].body_text, "s3;");
}
other => panic!("expected If, got {other:?}"),
}
}
#[test]
fn if_keyword_followed_by_multibyte_char_does_not_panic() {
let r = lower_statement_body("IFé THEN x := 1; END IF;");
assert_eq!(r.len(), 1, "expected a single classified statement");
assert!(
matches!(&r[0], Statement::If { .. }),
"expected If, got {:?}",
r[0]
);
}
#[test]
fn loop_keywords_followed_by_multibyte_char_do_not_panic() {
for input in [
"FORé LOOP NULL; END LOOP;",
"WHILEé LOOP NULL; END LOOP;",
"FORé i IN 1..3 LOOP NULL; END LOOP;",
] {
let r = lower_statement_body(input);
assert_eq!(r.len(), 1, "expected one statement for {input:?}");
}
}
}