iridium_core 0.1.12

SQL Server-compatible Rust engine core for Iridium SQL
Documentation
use crate::parser::ast::Token;
use crate::parser::token::Keyword;
use winnow::ascii::float;
use winnow::combinator::{alt, opt, repeat};
use winnow::prelude::*;
use winnow::token::{any, take_while};

pub fn lex(input: &mut &str, quoted_identifier: bool) -> ModalResult<Vec<Token>> {
    repeat(
        0..,
        alt((
            parse_whitespace.map(|_| None),
            parse_comment.map(|_| None),
            parse_binary_literal.map(|hex| Some(Token::BinaryLiteral(hex.to_string()))),
            |input: &mut _| {
                let start = *input;
                parse_number(input).map(|n| {
                    let consumed = &start[..start.len() - input.len()];
                    let is_float =
                        consumed.contains('.') || consumed.contains('e') || consumed.contains('E');
                    Some(Token::Number {
                        value: n,
                        is_float,
                        raw: consumed.to_string(),
                    })
                })
            },
            |input: &mut _| {
                parse_string(input).map(|s| {
                    let is_n = s.starts_with('N');
                    let unescaped = unescape_string(s);
                    Some(if is_n {
                        Token::NString(unescaped)
                    } else {
                        Token::String(unescaped)
                    })
                })
            },
            |i: &mut _| {
                if !quoted_identifier {
                    parse_quoted_identifier(i)
                        .map(|s| Some(Token::String(unescape_quoted_identifier(s))))
                } else {
                    Err(winnow::error::ErrMode::Backtrack(
                        winnow::error::ContextError::new(),
                    ))
                }
            },
            parse_variable.map(|v| Some(Token::Variable(v.to_string()))),
            |i: &mut _| {
                if quoted_identifier {
                    parse_quoted_identifier(i)
                        .map(|id| Some(Token::Identifier(unescape_quoted_identifier(id))))
                } else {
                    Err(winnow::error::ErrMode::Backtrack(
                        winnow::error::ContextError::new(),
                    ))
                }
            },
            parse_identifier
                .map(|id| {
                    if id.eq_ignore_ascii_case("GO") {
                        Token::Go
                    } else if let Some(kw) = Keyword::parse(id) {
                        Token::Keyword(kw)
                    } else {
                        Token::Identifier(id.to_string())
                    }
                })
                .map(Some),
            parse_bracketed_identifier
                .map(|id| Some(Token::Identifier(unescape_bracketed_identifier(id)))),
            parse_operator_token.map(Some),
            parse_punctuation,
        )),
    )
    .map(|v: Vec<Option<Token>>| v.into_iter().flatten().collect())
    .parse_next(input)
}

fn parse_whitespace<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
    take_while(1.., |c: char| c.is_whitespace()).parse_next(input)
}

fn parse_comment(input: &mut &str) -> ModalResult<()> {
    if input.starts_with("--") {
        let rest = &input[2..];
        let next_is_ws = rest
            .chars()
            .next()
            .map(|c| c.is_whitespace())
            .unwrap_or(true);
        if rest.is_empty() || next_is_ws {
            *input = rest;
            take_while(0.., |c| c != '\n').parse_next(input)?;
            opt('\n').parse_next(input)?;
            return Ok(());
        }
    }
    if input.starts_with("/*") {
        ("/*", winnow::token::take_until(0.., "*/"), "*/")
            .map(|_| ())
            .parse_next(input)
    } else {
        Err(winnow::error::ErrMode::Backtrack(
            winnow::error::ContextError::new(),
        ))
    }
}

fn parse_number(input: &mut &str) -> ModalResult<f64> {
    let start = *input;
    let result: ModalResult<f64> = float.parse_next(input);
    if let Ok(val) = result {
        if val.is_infinite() || val.is_nan() {
            *input = start;
            return Err(winnow::error::ErrMode::Backtrack(
                winnow::error::ContextError::new(),
            ));
        }
    }
    result
}

fn parse_string<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
    let start = *input;
    if input.starts_with('N') {
        *input = &input[1..];
    }
    if !input.starts_with('\'') {
        return Err(winnow::error::ErrMode::Backtrack(
            winnow::error::ContextError::new(),
        ));
    }
    *input = &input[1..];
    loop {
        let _ = take_while(0.., |c| c != '\'').parse_next(input)?;
        if input.starts_with('\'') {
            *input = &input[1..];
            if input.starts_with('\'') {
                *input = &input[1..];
                continue;
            } else {
                break;
            }
        } else {
            return Err(winnow::error::ErrMode::Backtrack(
                winnow::error::ContextError::new(),
            ));
        }
    }
    let len = start.len() - input.len();
    Ok(&start[..len])
}

fn parse_identifier<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
    let start = *input;
    let _: char = any
        .verify(|c: &char| c.is_ascii_alphabetic() || *c == '_' || *c == '#' || *c == '@')
        .parse_next(input)?;
    let _: &str = take_while(0.., |c: char| {
        c.is_ascii_alphanumeric() || c == '_' || c == '#' || c == '$'
    })
    .parse_next(input)?;
    let len = start.len() - input.len();
    Ok(&start[..len])
}

fn parse_bracketed_identifier<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
    let start = *input;
    if !input.starts_with('[') {
        return Err(winnow::error::ErrMode::Backtrack(
            winnow::error::ContextError::new(),
        ));
    }
    *input = &input[1..];
    loop {
        let _ = take_while(0.., |c| c != ']').parse_next(input)?;
        if input.starts_with(']') {
            *input = &input[1..];
            if input.starts_with(']') {
                *input = &input[1..];
                continue;
            } else {
                break;
            }
        } else {
            return Err(winnow::error::ErrMode::Backtrack(
                winnow::error::ContextError::new(),
            ));
        }
    }
    let len = start.len() - input.len();
    Ok(&start[..len])
}

fn parse_variable<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
    let start = *input;
    let _: &str = alt(("@@", "@")).parse_next(input)?;
    let _: &str =
        take_while(1.., |c: char| c.is_ascii_alphanumeric() || c == '_').parse_next(input)?;
    let len = start.len() - input.len();
    Ok(&start[..len])
}

fn parse_binary_literal<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
    let start = *input;
    let _: &str = alt(("0x", "0X")).parse_next(input)?;
    let _: &str = take_while(0.., |c: char| c.is_ascii_hexdigit()).parse_next(input)?;
    let len = start.len() - input.len();
    Ok(&start[..len])
}

fn parse_quoted_identifier<'a>(input: &mut &'a str) -> ModalResult<&'a str> {
    let start = *input;
    if !input.starts_with('"') {
        return Err(winnow::error::ErrMode::Backtrack(
            winnow::error::ContextError::new(),
        ));
    }
    *input = &input[1..];
    loop {
        let _ = take_while(0.., |c| c != '"').parse_next(input)?;
        if input.starts_with('"') {
            *input = &input[1..];
            if input.starts_with('"') {
                *input = &input[1..];
                continue;
            } else {
                break;
            }
        } else {
            return Err(winnow::error::ErrMode::Backtrack(
                winnow::error::ContextError::new(),
            ));
        }
    }
    let len = start.len() - input.len();
    Ok(&start[..len])
}

fn parse_operator_token(input: &mut &str) -> ModalResult<Token> {
    alt((
        "~".map(|_| Token::Tilde),
        alt((
            alt(("<=", ">=", "<>", "!=")),
            alt(("=", "<", ">", "+", "-", "*")),
            alt(("/", "%", "&", "|", "^")),
        ))
        .map(|op: &str| {
            if op == "*" {
                Token::Star
            } else {
                Token::Operator(op.to_string())
            }
        }),
    ))
    .parse_next(input)
}

fn parse_punctuation(input: &mut &str) -> ModalResult<Option<Token>> {
    alt((
        "(".map(|_| Some(Token::LParen)),
        ")".map(|_| Some(Token::RParen)),
        ",".map(|_| Some(Token::Comma)),
        ";".map(|_| Some(Token::Semicolon)),
        ".".map(|_| Some(Token::Dot)),
    ))
    .parse_next(input)
}

pub fn unescape_string(s: &str) -> String {
    let mut s_slice = s;
    if s_slice.starts_with('N') {
        s_slice = &s_slice[1..];
    }
    if s_slice.starts_with('\'') {
        s_slice = &s_slice[1..];
    }
    if s_slice.ends_with('\'') {
        s_slice = &s_slice[..s_slice.len() - 1];
    }
    s_slice.replace("''", "'")
}

pub fn unescape_bracketed_identifier(s: &str) -> String {
    let mut s_slice = s;
    if s_slice.starts_with('[') {
        s_slice = &s_slice[1..];
    }
    if s_slice.ends_with(']') {
        s_slice = &s_slice[..s_slice.len() - 1];
    }
    s_slice.replace("]]", "]")
}

pub fn unescape_quoted_identifier(s: &str) -> String {
    let mut s_slice = s;
    if s_slice.starts_with('"') {
        s_slice = &s_slice[1..];
    }
    if s_slice.ends_with('"') {
        s_slice = &s_slice[..s_slice.len() - 1];
    }
    s_slice.replace("\"\"", "\"")
}