#![doc = include_str!("readme.md")]
use oak_core::{
Lexer, LexerCache, LexerState, OakError, Source,
lexer::{CommentConfig, LexOutput, StringConfig, WhitespaceConfig},
};
pub mod token_type;
use crate::{language::IniLanguage, lexer::token_type::IniTokenType};
pub(crate) type State<'a, S> = LexerState<'a, S, IniLanguage>;
static _INI_WHITESPACE: WhitespaceConfig = WhitespaceConfig { unicode_whitespace: true };
static _INI_COMMENT: CommentConfig = CommentConfig { line_marker: ";", block_start: "", block_end: "", nested_blocks: false };
static _INI_STRING: StringConfig = StringConfig { quotes: &['"', '\''], escape: Some('\\') };
#[derive(Clone, Debug)]
pub struct IniLexer<'config> {
config: &'config IniLanguage,
}
impl<'config> Lexer<IniLanguage> for IniLexer<'config> {
fn lex<'a, S: Source + ?Sized>(&self, source: &S, _edits: &[oak_core::TextEdit], cache: &'a mut impl LexerCache<IniLanguage>) -> LexOutput<IniLanguage> {
let mut state: State<'_, S> = State::new(source);
let result = self.run(&mut state);
if result.is_ok() {
state.add_eof();
}
state.finish_with_cache(result, cache)
}
}
impl<'config> IniLexer<'config> {
pub fn new(config: &'config IniLanguage) -> Self {
Self { config }
}
fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
while state.not_at_end() {
let safe_point = state.get_position();
if self.skip_whitespace(state) {
continue;
}
if self.lex_newline(state) {
continue;
}
if self.skip_comment(state) {
continue;
}
if self.lex_string_literal(state) {
continue;
}
if self.lex_number_literal(state) {
continue;
}
if self.lex_identifier(state) {
continue;
}
if self.lex_punctuation(state) {
continue;
}
state.advance_if_dead_lock(safe_point);
}
Ok(())
}
fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
let start = state.get_position();
while let Some(ch) = state.peek() {
if ch == ' ' || ch == '\t' || ch == '\r' {
state.advance(ch.len_utf8());
}
else {
break;
}
}
if state.get_position() > start {
state.add_token(IniTokenType::Whitespace, start, state.get_position());
return true;
}
false
}
fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
let start = state.get_position();
if state.current() == Some('\n') {
state.advance(1);
state.add_token(IniTokenType::Newline, start, state.get_position());
return true;
}
false
}
fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
let start = state.get_position();
if let Some(ch) = state.current() {
if ch == ';' || ch == '#' {
state.advance(1);
while let Some(ch) = state.peek() {
if ch != '\n' {
state.advance(ch.len_utf8());
}
else {
break;
}
}
state.add_token(IniTokenType::Comment, start, state.get_position());
return true;
}
}
false
}
fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
let start = state.get_position();
if let Some(quote_char) = state.current() {
if quote_char == '"' || quote_char == '\'' {
state.advance(1);
while let Some(ch) = state.peek() {
if ch != quote_char {
if ch == '\\' {
state.advance(1); if let Some(_) = state.peek() {
state.advance(1); }
}
else {
state.advance(ch.len_utf8());
}
}
else {
state.advance(1);
break;
}
}
state.add_token(IniTokenType::String, start, state.get_position());
return true;
}
}
false
}
fn lex_number_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
let start = state.get_position();
let first = match state.current() {
Some(c) => c,
None => return false,
};
if !first.is_ascii_digit() && first != '-' && first != '+' {
return false;
}
if first == '-' || first == '+' {
if let Some(next) = state.peek_next_n(1) {
if !next.is_ascii_digit() {
return false;
}
}
else {
return false;
}
}
state.advance(1);
let mut has_dot = false;
let mut has_exp = false;
while let Some(ch) = state.peek() {
if ch.is_ascii_digit() {
state.advance(1);
}
else if ch == '.' && !has_dot && !has_exp {
has_dot = true;
state.advance(1);
}
else if (ch == 'e' || ch == 'E') && !has_exp {
has_exp = true;
state.advance(1);
if let Some(sign) = state.peek() {
if sign == '+' || sign == '-' {
state.advance(1);
}
}
}
else {
break;
}
}
let end = state.get_position();
let text = state.get_text_in((start..end).into());
if text.as_ref() == "-" || text.as_ref() == "+" || text.as_ref() == "." {
state.set_position(start);
return false;
}
let kind = if has_dot || has_exp { IniTokenType::Float } else { IniTokenType::Integer };
state.add_token(kind, start, state.get_position());
true
}
fn lex_identifier<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
let start = state.get_position();
let ch = match state.current() {
Some(c) => c,
None => return false,
};
if !(ch.is_ascii_alphabetic() || ch == '_') {
return false;
}
state.advance(1);
while let Some(c) = state.current() {
if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
state.advance(1);
}
else {
break;
}
}
let end = state.get_position();
let text = state.get_text_in((start..end).into());
let kind = match text.to_lowercase().as_str() {
"true" | "false" => IniTokenType::Boolean,
_ => {
if self.is_datetime_like(text.as_ref()) {
IniTokenType::DateTime
}
else {
IniTokenType::Identifier
}
}
};
state.add_token(kind, start, state.get_position());
true
}
fn lex_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
let start = state.get_position();
if state.starts_with("[[") {
state.advance(2);
state.add_token(IniTokenType::DoubleLeftBracket, start, state.get_position());
return true;
}
if state.starts_with("]]") {
state.advance(2);
state.add_token(IniTokenType::DoubleRightBracket, start, state.get_position());
return true;
}
if let Some(ch) = state.current() {
let kind = match ch {
'{' => IniTokenType::LeftBrace,
'}' => IniTokenType::RightBrace,
'[' => IniTokenType::LeftBracket,
']' => IniTokenType::RightBracket,
',' => IniTokenType::Comma,
'.' => IniTokenType::Dot,
'=' => IniTokenType::Equal,
_ => return false,
};
state.advance(ch.len_utf8());
state.add_token(kind, start, state.get_position());
return true;
}
false
}
fn is_datetime_like(&self, text: &str) -> bool {
text.contains('-') && text.contains(':')
}
}