use plsql_core::{FileId, Position, Span};
use plsql_parser::tokens::{Token, TokenKind, TokenTape, Trivia, TriviaTable};
use proptest::prelude::*;
fn keyword_strategy() -> impl Strategy<Value = &'static str> {
prop_oneof![
Just("SELECT"),
Just("FROM"),
Just("WHERE"),
Just("BEGIN"),
Just("END"),
Just("IF"),
Just("THEN"),
Just("ELSE"),
Just("LOOP"),
Just("CREATE"),
Just("REPLACE"),
Just("PACKAGE"),
Just("PROCEDURE"),
Just("FUNCTION"),
Just("RETURN"),
Just("IS"),
Just("AS"),
Just("DECLARE"),
Just("TYPE"),
Just("TABLE"),
Just("NUMBER"),
Just("VARCHAR2"),
Just("BOOLEAN"),
]
}
fn identifier_strategy() -> impl Strategy<Value = String> {
"[a-zA-Z_][a-zA-Z0-9_]{0,15}"
}
fn punct_strategy() -> impl Strategy<Value = &'static str> {
prop_oneof![
Just(";"),
Just("("),
Just(")"),
Just(","),
Just("."),
Just(":="),
Just("=>"),
Just("||"),
Just("+"),
Just("-"),
Just("*"),
Just("="),
Just("<"),
Just(">"),
Just("/"),
]
}
fn ws_char_strategy() -> impl Strategy<Value = char> {
prop_oneof![Just(' '), Just('\t'), Just('\n'), Just('\r')]
}
fn whitespace_strategy() -> impl Strategy<Value = String> {
prop::collection::vec(ws_char_strategy(), 1..4).prop_map(|v| v.into_iter().collect())
}
fn comment_strategy() -> impl Strategy<Value = String> {
prop_oneof![
("--", "[^\n]{0,30}").prop_map(|(p, c)| format!("{p}{c}")),
("/*", "[^*]{0,30}").prop_map(|(o, c)| format!("{o}{c}*/")),
]
}
fn token_element_strategy() -> impl Strategy<Value = (String, TokenKind)> {
prop_oneof![
keyword_strategy().prop_map(|k| (k.to_string(), TokenKind::Keyword)),
identifier_strategy().prop_map(|i| (i, TokenKind::Identifier)),
punct_strategy().prop_map(|p| (p.to_string(), TokenKind::Operator)),
]
}
fn trivia_element_strategy() -> impl Strategy<Value = Trivia> {
prop_oneof![
whitespace_strategy().prop_map(Trivia::Whitespace),
comment_strategy().prop_map(Trivia::LineComment),
]
}
fn source_strategy() -> impl Strategy<Value = String> {
prop::collection::vec(
(
prop::option::of(trivia_element_strategy()),
token_element_strategy(),
prop::option::of(trivia_element_strategy()),
),
1..20,
)
.prop_map(|elements| {
let mut out = String::new();
for (pre, (text, _kind), post) in elements {
if let Some(t) = pre {
push_trivia(&mut out, &t);
}
out.push_str(&text);
if let Some(t) = post {
push_trivia(&mut out, &t);
}
}
out
})
}
fn push_trivia(out: &mut String, t: &Trivia) {
match t {
Trivia::Whitespace(s) | Trivia::LineComment(s) | Trivia::BlockComment(s) => {
out.push_str(s);
}
}
}
fn simple_lex(source: &str) -> (TokenTape, TriviaTable) {
let mut tape = TokenTape::new();
let mut trivia = TriviaTable::new();
let bytes = source.as_bytes();
let len = bytes.len();
let mut pos = 0;
let mut token_index = 0;
while pos < len {
if bytes[pos].is_ascii_whitespace() {
let start = pos;
while pos < len && bytes[pos].is_ascii_whitespace() {
pos += 1;
}
let text = &source[start..pos];
trivia.push(token_index, Trivia::Whitespace(text.to_string()));
continue;
}
let start = pos;
while pos < len && !bytes[pos].is_ascii_whitespace() {
pos += 1;
}
let text = &source[start..pos];
let kind = classify_token(text);
let span = Span::new(
FileId::new(0),
Position::new(1, start as u32 + 1, start as u32),
Position::new(1, pos as u32 + 1, pos as u32),
);
tape.push(Token::new(kind, span, text));
token_index += 1;
}
(tape, trivia)
}
fn classify_token(text: &str) -> TokenKind {
match text.to_uppercase().as_str() {
"SELECT" | "FROM" | "WHERE" | "BEGIN" | "END" | "IF" | "THEN" | "ELSE" | "LOOP"
| "CREATE" | "REPLACE" | "PACKAGE" | "PROCEDURE" | "FUNCTION" | "RETURN" | "IS" | "AS"
| "DECLARE" | "TYPE" | "TABLE" | "NUMBER" | "VARCHAR2" | "BOOLEAN" => TokenKind::Keyword,
";" => TokenKind::Semicolon,
"/" => TokenKind::Slash,
"." => TokenKind::Dot,
"," => TokenKind::Comma,
"(" => TokenKind::LParen,
")" => TokenKind::RParen,
":=" => TokenKind::Assign,
"=>" => TokenKind::Arrow,
"||" => TokenKind::Concat,
_ => TokenKind::Identifier,
}
}
proptest! {
#[test]
fn round_trip_reconstruct_equals_source(source in source_strategy()) {
let (tape, trivia) = simple_lex(&source);
let reconstructed = tape.reconstruct(&trivia);
prop_assert_eq!(
&reconstructed, &source,
"\nRound-trip failed.\nOriginal: {:?}\nReconstructed: {:?}\nTokens: {}",
source, reconstructed, tape.len()
);
}
#[test]
fn token_count_bounded_by_source_length(source in source_strategy()) {
let (tape, _trivia) = simple_lex(&source);
prop_assert!(
tape.len() <= source.len(),
"Token count {} exceeds source length {}",
tape.len(), source.len()
);
}
#[test]
fn empty_source_produces_empty_tape(_seed in 0u32..1) {
let (tape, trivia) = simple_lex("");
prop_assert!(tape.is_empty());
prop_assert_eq!(trivia.total_count(), 0);
prop_assert_eq!(tape.reconstruct(&trivia), "");
}
#[test]
fn single_char_round_trips(ch in "[a-zA-Z0-9;.,()+\\-*/]") {
let (tape, trivia) = simple_lex(&ch);
prop_assert_eq!(tape.reconstruct(&trivia), ch);
}
#[test]
fn whitespace_only_round_trips(ws in "[ \t\n\r]+") {
let (tape, trivia) = simple_lex(&ws);
prop_assert_eq!(tape.reconstruct(&trivia), ws);
}
#[test]
fn trailing_whitespace_preserved(
tokens in prop::collection::vec("[a-zA-Z]+", 1..5),
trailing in "[ \t\n]+"
) {
let mut source = tokens.join(" ");
source.push_str(&trailing);
let (tape, trivia) = simple_lex(&source);
prop_assert_eq!(tape.reconstruct(&trivia), source);
}
#[test]
fn leading_whitespace_preserved(
leading in "[ \t\n]+",
tokens in prop::collection::vec("[a-zA-Z]+", 1..5)
) {
let mut source = leading;
source.push_str(&tokens.join(" "));
let (tape, trivia) = simple_lex(&source);
prop_assert_eq!(tape.reconstruct(&trivia), source);
}
#[test]
fn multi_whitespace_preserved(
w1 in "[a-zA-Z]+",
ws in "[ \t\n]{2,5}",
w2 in "[a-zA-Z]+"
) {
let source = format!("{w1}{ws}{w2}");
let (tape, trivia) = simple_lex(&source);
prop_assert_eq!(tape.reconstruct(&trivia), source);
}
}