#![allow(non_camel_case_types)]
use logos::{Lexer, Logos};
#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(u16)]
pub enum SyntaxKind {
#[regex(r"([ \t])+")]
WHITESPACE = 0,
#[regex(r"(\n|\r\n)+")]
NEWLINE,
#[regex(r"#[^\n\r]*")]
COMMENT,
#[regex(r"[A-Za-z0-9_-]+", priority = 2)]
IDENT,
#[regex(r"[*?A-Za-z0-9_-]+")]
IDENT_WITH_GLOB,
#[token(".")]
PERIOD,
#[token(",")]
COMMA,
#[token("=")]
EQ,
#[regex(r#"""#, lex_string)]
STRING,
#[regex(r#"""""#, lex_multi_line_string)]
MULTI_LINE_STRING,
#[regex(r#"'"#, lex_string_literal)]
STRING_LITERAL,
#[regex(r#"'''"#, lex_multi_line_string_literal)]
MULTI_LINE_STRING_LITERAL,
#[regex(r"[+-]?[0-9_]+", priority = 4)]
INTEGER,
#[regex(r"0x[0-9A-Fa-f_]+")]
INTEGER_HEX,
#[regex(r"0o[0-7_]+")]
INTEGER_OCT,
#[regex(r"0b(0|1|_)+")]
INTEGER_BIN,
#[regex(r"[-+]?([0-9_]+(\.[0-9_]+)?([eE][+-]?[0-9_]+)?|nan|inf)", priority = 3)]
FLOAT,
#[regex(r"true|false")]
BOOL,
#[regex(r#"(?:[1-9]\d\d\d-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1\d|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[1-9]\d(?:0[48]|[2468][048]|[13579][26])|(?:[2468][048]|[13579][26])00)-02-29)(?:T|t| )(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d(?:(?:\.|,)\d+)?(?:[Zz]|[+-][01]\d:[0-5]\d)"#)]
DATE_TIME_OFFSET,
#[regex(r#"(?:[1-9]\d\d\d-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1\d|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[1-9]\d(?:0[48]|[2468][048]|[13579][26])|(?:[2468][048]|[13579][26])00)-02-29)(?:T|t| )(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d(?:(?:\.|,)\d+)?"#)]
DATE_TIME_LOCAL,
#[regex(r#"(?:[1-9]\d\d\d-(?:(?:0[1-9]|1[0-2])-(?:0[1-9]|1\d|2[0-8])|(?:0[13-9]|1[0-2])-(?:29|30)|(?:0[13578]|1[02])-31)|(?:[1-9]\d(?:0[48]|[2468][048]|[13579][26])|(?:[2468][048]|[13579][26])00)-02-29)"#)]
DATE,
#[regex(r#"(?:[01]\d|2[0-3]):[0-5]\d:[0-5]\d(?:(?:\.|,)\d+)?"#)]
TIME,
#[token("[")]
BRACKET_START,
#[token("]")]
BRACKET_END,
#[token("{")]
BRACE_START,
#[token("}")]
BRACE_END,
#[error]
ERROR,
KEY, VALUE, TABLE_HEADER, TABLE_ARRAY_HEADER, ENTRY, ARRAY, INLINE_TABLE,
ROOT, }
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum Lang {}
impl rowan::Language for Lang {
type Kind = SyntaxKind;
fn kind_from_raw(raw: rowan::SyntaxKind) -> Self::Kind {
assert!(raw.0 <= SyntaxKind::ROOT as u16);
unsafe { std::mem::transmute::<u16, SyntaxKind>(raw.0) }
}
fn kind_to_raw(kind: Self::Kind) -> rowan::SyntaxKind {
kind.into()
}
}
pub type SyntaxNode = rowan::SyntaxNode<Lang>;
pub type SyntaxToken = rowan::SyntaxToken<Lang>;
pub type SyntaxElement = rowan::NodeOrToken<SyntaxNode, SyntaxToken>;
fn lex_string(lex: &mut Lexer<SyntaxKind>) -> bool {
let remainder: &str = lex.remainder();
let mut escaped = false;
let mut total_len = 0;
for c in remainder.chars() {
total_len += c.len_utf8();
if c == '\\' {
escaped = !escaped;
continue;
}
if c == '"' && !escaped {
lex.bump(total_len);
return true;
}
escaped = false;
}
false
}
fn lex_multi_line_string(lex: &mut Lexer<SyntaxKind>) -> bool {
let remainder: &str = lex.remainder();
let mut total_len = 0;
let mut quote_count = 0;
let mut escaped = false;
let mut quotes_found = false;
for c in remainder.chars() {
if quotes_found {
if c != '"' {
if quote_count >= 6 {
return false;
}
lex.bump(total_len);
return true;
} else {
quote_count += 1;
total_len += c.len_utf8();
continue;
}
}
total_len += c.len_utf8();
if c == '\\' {
escaped = true;
continue;
}
if c == '"' && !escaped {
quote_count += 1;
} else {
quote_count = 0;
}
if quote_count == 3 {
quotes_found = true;
}
escaped = false;
}
if quotes_found {
if quote_count >= 6 {
return false;
}
lex.bump(total_len);
true
} else {
false
}
}
fn lex_string_literal(lex: &mut Lexer<SyntaxKind>) -> bool {
let remainder: &str = lex.remainder();
let mut total_len = 0;
for c in remainder.chars() {
total_len += c.len_utf8();
if c == '\'' {
lex.bump(total_len);
return true;
}
}
false
}
fn lex_multi_line_string_literal(lex: &mut Lexer<SyntaxKind>) -> bool {
let remainder: &str = lex.remainder();
let mut total_len = 0;
let mut quote_count = 0;
let mut quotes_found = false;
for c in remainder.chars() {
if quotes_found {
if c != '\'' {
lex.bump(total_len);
return true;
} else {
if quote_count > 4 {
return false;
}
quote_count += 1;
total_len += c.len_utf8();
continue;
}
}
total_len += c.len_utf8();
if c == '\'' {
quote_count += 1;
} else {
quote_count = 0;
}
if quote_count == 3 {
quotes_found = true;
}
}
if quotes_found {
lex.bump(total_len);
true
} else {
false
}
}