use super::ConfError;
use std::ops::Range;
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
Eof,
Comment,
Whitespace,
Newline,
Argument,
Continuation,
Semicolon,
LeftCurlyBrace,
RightCurlyBrace,
}
#[derive(Debug, Clone)]
pub struct Token {
pub token_type: TokenType,
pub span: Range<usize>,
pub is_quoted: bool,
pub is_triple_quoted: bool,
pub is_expression: bool,
}
pub struct Lexer<'a> {
input: &'a str,
position: usize,
options: super::ConfOptions,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str, options: super::ConfOptions) -> Self {
Self {
input,
position: 0,
options,
}
}
pub fn input(&self) -> &'a str {
self.input
}
pub fn next_token(&mut self) -> Result<Token, ConfError> {
if let Some(c) = self.current_char() {
if self.is_forbidden_char(c) {
return Err(ConfError::LexerError {
position: self.position,
message: format!("Forbidden character: U+{:04X}", c as u32),
});
}
}
while self.position < self.input.len() && self.is_whitespace() && !self.is_newline() {
self.advance();
}
if self.position >= self.input.len() {
return Ok(Token {
token_type: TokenType::Eof,
span: self.position..self.position,
is_quoted: false,
is_triple_quoted: false,
is_expression: false,
});
}
if self.is_comment() {
let start = self.position;
self.scan_comment()?;
return Ok(Token {
token_type: TokenType::Comment,
span: start..self.position,
is_quoted: false,
is_triple_quoted: false,
is_expression: false,
});
}
let start = self.position;
let (token_type, is_quoted, is_triple_quoted, is_expression) = match self.current_char() {
Some(c) if self.is_line_terminator(c) => {
self.advance();
if c == '\r' && self.current_char() == Some('\n') {
self.advance();
}
(TokenType::Newline, false, false, false)
}
Some(';') => {
self.advance();
(TokenType::Semicolon, false, false, false)
}
Some('{') => {
self.advance();
(TokenType::LeftCurlyBrace, false, false, false)
}
Some('}') => {
self.advance();
(TokenType::RightCurlyBrace, false, false, false)
}
Some('\\') => {
self.advance();
if self
.current_char()
.is_some_and(|c| self.is_line_terminator(c))
{
let continuation_start = start;
self.advance();
if self.input.as_bytes().get(self.position - 1) == Some(&b'\r')
&& self.current_char() == Some('\n')
{
self.advance();
}
while self.current_char().is_some_and(|_| self.is_whitespace()) {
self.advance();
}
return Ok(Token {
token_type: TokenType::Continuation,
span: continuation_start..continuation_start + 1, is_quoted: false,
is_triple_quoted: false,
is_expression: false,
});
} else {
self.position = start; let is_expression = self.scan_argument()?;
(TokenType::Argument, false, false, is_expression)
}
}
Some('"') => {
let (is_triple_quoted, is_expression) = self.scan_quoted_argument()?;
(TokenType::Argument, true, is_triple_quoted, is_expression)
}
_ => {
let is_expression = self.scan_argument()?;
(TokenType::Argument, false, false, is_expression)
}
};
Ok(Token {
token_type,
span: start..self.position,
is_quoted,
is_triple_quoted,
is_expression,
})
}
fn current_char(&self) -> Option<char> {
if self.position < self.input.len() {
self.input[self.position..].chars().next()
} else {
None
}
}
fn next_char(&self) -> Option<char> {
if let Some(c) = self.current_char() {
let next_pos = self.position + c.len_utf8();
if next_pos < self.input.len() {
self.input[next_pos..].chars().next()
} else {
None
}
} else {
None
}
}
fn advance(&mut self) {
if let Some(c) = self.current_char() {
self.position += c.len_utf8();
}
}
fn is_whitespace(&self) -> bool {
self.current_char()
.is_some_and(|c| c.is_whitespace() && !self.is_line_terminator(c))
}
fn is_line_terminator(&self, c: char) -> bool {
matches!(
c,
'\u{000A}' | '\u{000B}' | '\u{000C}' | '\u{000D}' | '\u{0085}' | '\u{2028}' | '\u{2029}' )
}
fn is_newline(&self) -> bool {
self.current_char()
.is_some_and(|c| self.is_line_terminator(c))
}
fn is_forbidden_char(&self, c: char) -> bool {
let is_control = c.is_control() && !c.is_whitespace();
let is_bidi = if !self.options.allow_bidi {
matches!(
c,
'\u{061C}' | '\u{200E}' | '\u{200F}' | '\u{2066}' | '\u{2067}' | '\u{2068}' | '\u{2069}' | '\u{202A}' | '\u{202B}' | '\u{202C}' | '\u{202D}' | '\u{202E}' )
} else {
false
};
is_control || is_bidi
}
fn is_comment(&self) -> bool {
self.current_char().is_some_and(|c| {
c == '#'
|| (self.options.allow_c_style_comments
&& c == '/'
&& self.next_char() == Some('*'))
})
}
fn scan_comment(&mut self) -> Result<(), ConfError> {
let start = self.position;
match self.current_char() {
Some('#') => {
self.advance();
while let Some(c) = self.current_char() {
if self.is_line_terminator(c) {
break;
}
if self.is_forbidden_char(c) {
return Err(ConfError::LexerError {
position: self.position,
message: format!("Forbidden character in comment: U+{:04X}", c as u32),
});
}
self.advance();
}
}
Some('/') if self.next_char() == Some('*') && self.options.allow_c_style_comments => {
self.advance(); self.advance(); let mut found_end = false;
while let Some(c) = self.current_char() {
if self.is_forbidden_char(c) {
return Err(ConfError::LexerError {
position: self.position,
message: format!("Forbidden character in comment: U+{:04X}", c as u32),
});
}
if c == '*' && self.next_char() == Some('/') {
self.advance(); self.advance(); found_end = true;
break;
}
self.advance();
}
if !found_end {
return Err(ConfError::LexerError {
position: start,
message: "Unterminated multi-line comment".to_string(),
});
}
}
_ => {
return Err(ConfError::LexerError {
position: start,
message: "Expected comment".to_string(),
});
}
}
Ok(())
}
fn scan_quoted_argument(&mut self) -> Result<(bool, bool), ConfError> {
let start = self.position;
self.advance();
let is_triple_quoted = self.current_char() == Some('"') && self.next_char() == Some('"');
if is_triple_quoted {
self.advance(); self.advance(); }
let mut found_end = false;
while let Some(c) = self.current_char() {
if self.is_forbidden_char(c) && !(is_triple_quoted && self.is_line_terminator(c)) {
return Err(ConfError::LexerError {
position: self.position,
message: format!("Forbidden character in quoted argument: U+{:04X}", c as u32),
});
}
if c == '\\' {
self.advance(); if let Some(escaped) = self.current_char() {
if is_triple_quoted && self.is_line_terminator(escaped) {
self.advance(); if escaped == '\r' && self.current_char() == Some('\n') {
self.advance();
}
} else {
self.advance(); }
} else {
return Err(ConfError::LexerError {
position: self.position,
message: "Unterminated escape sequence".to_string(),
});
}
} else if c == '"' {
if is_triple_quoted {
self.advance(); if self.current_char() == Some('"') {
self.advance(); if self.current_char() == Some('"') {
self.advance(); found_end = true;
break;
}
}
self.position -= 1;
} else {
self.advance(); found_end = true;
break;
}
} else {
if !is_triple_quoted && self.is_line_terminator(c) {
return Err(ConfError::LexerError {
position: self.position,
message: "Newline in quoted string".to_string(),
});
}
self.advance();
}
}
if !found_end {
return Err(ConfError::LexerError {
position: start,
message: if is_triple_quoted {
"Unterminated triple-quoted string".to_string()
} else {
"Unterminated quoted string".to_string()
},
});
}
let is_expression = if self.options.allow_expression_arguments {
self.current_char() == Some('(')
} else {
false
};
Ok((is_triple_quoted, is_expression))
}
fn scan_argument(&mut self) -> Result<bool, ConfError> {
let start = self.position;
while let Some(c) = self.current_char() {
if c.is_whitespace()
|| c == ';'
|| c == '{'
|| c == '}'
|| c == '('
|| c == '"'
|| c == '#'
{
break;
}
if self.is_forbidden_char(c) {
return Err(ConfError::LexerError {
position: self.position,
message: format!("Forbidden character in argument: U+{:04X}", c as u32),
});
}
if c == '\\' {
self.advance(); if let Some(escaped) = self.current_char() {
if self.is_line_terminator(escaped) {
self.advance(); if escaped == '\r' && self.current_char() == Some('\n') {
self.advance();
}
while self.current_char().is_some_and(|_| self.is_whitespace()) {
self.advance();
}
} else {
self.advance(); }
} else {
return Err(ConfError::LexerError {
position: self.position,
message: "Unterminated escape sequence".to_string(),
});
}
} else {
self.advance();
}
}
if self.position == start {
return Err(ConfError::LexerError {
position: start,
message: "Expected argument".to_string(),
});
}
let is_expression = if self.options.allow_expression_arguments {
self.current_char() == Some('(')
} else {
false
};
Ok(is_expression)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_lexer_new() {
let input = "test";
let options = super::super::ConfOptions::default();
let lexer = Lexer::new(input, options);
assert_eq!(lexer.input, input);
assert_eq!(lexer.position, 0);
}
#[test]
fn test_lexer_current_char() {
let input = "test";
let options = super::super::ConfOptions::default();
let lexer = Lexer::new(input, options);
assert_eq!(lexer.current_char(), Some('t'));
}
#[test]
fn test_lexer_next_char() {
let input = "test";
let options = super::super::ConfOptions::default();
let lexer = Lexer::new(input, options);
assert_eq!(lexer.next_char(), Some('e'));
}
#[test]
fn test_lexer_advance() {
let input = "test";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
lexer.advance();
assert_eq!(lexer.position, 1);
}
#[test]
fn test_lexer_is_whitespace() {
let input = " ";
let options = super::super::ConfOptions::default();
let lexer = Lexer::new(input, options);
assert!(lexer.is_whitespace());
}
#[test]
fn test_lexer_is_newline() {
let input = "\n";
let options = super::super::ConfOptions::default();
let lexer = Lexer::new(input, options);
assert!(lexer.is_newline());
}
#[test]
fn test_lexer_is_comment() {
let input = "#";
let options = super::super::ConfOptions {
allow_c_style_comments: true,
..Default::default()
};
let lexer = Lexer::new(input, options);
assert!(lexer.is_comment());
}
#[test]
fn test_lexer_is_comment_multi_line() {
let input = "/*";
let options = super::super::ConfOptions {
allow_c_style_comments: true,
..Default::default()
};
let lexer = Lexer::new(input, options);
assert!(lexer.is_comment());
}
#[test]
fn test_lexer_scan_comment_single_line() {
let input = "# This is a comment\n";
let options = super::super::ConfOptions {
allow_c_style_comments: true,
..Default::default()
};
let mut lexer = Lexer::new(input, options);
assert!(lexer.scan_comment().is_ok());
assert_eq!(lexer.position, input.len() - 1);
}
#[test]
fn test_lexer_scan_comment_multi_line() {
let input = "/* This is a\nmulti-line\ncomment */";
let options = super::super::ConfOptions {
allow_c_style_comments: true,
..Default::default()
};
let mut lexer = Lexer::new(input, options);
assert!(lexer.scan_comment().is_ok());
assert_eq!(lexer.position, input.len());
}
#[test]
fn test_lexer_scan_comment_multi_line_unterminated() {
let input = "/* This is an unterminated comment";
let options = super::super::ConfOptions {
allow_c_style_comments: true,
..Default::default()
};
let mut lexer = Lexer::new(input, options);
assert!(lexer.scan_comment().is_err());
}
#[test]
fn test_lexer_scan_quoted_argument() {
let input = "\"test\"";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let (is_triple_quoted, is_expression) = lexer.scan_quoted_argument().unwrap();
assert!(!is_triple_quoted);
assert!(!is_expression);
assert_eq!(lexer.position, input.len());
}
#[test]
fn test_lexer_scan_quoted_argument_with_escape() {
let input = "\"test\\n\"";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let (is_triple_quoted, is_expression) = lexer.scan_quoted_argument().unwrap();
assert!(!is_triple_quoted);
assert!(!is_expression);
assert_eq!(lexer.position, input.len());
}
#[test]
fn test_lexer_scan_quoted_argument_unterminated() {
let input = "\"test";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
assert!(lexer.scan_quoted_argument().is_err());
}
#[test]
fn test_lexer_scan_quoted_argument_triple() {
let input = "\"\"\"test\"\"\"";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let (is_triple_quoted, is_expression) = lexer.scan_quoted_argument().unwrap();
assert!(is_triple_quoted);
assert!(!is_expression);
assert_eq!(lexer.position, input.len());
}
#[test]
fn test_lexer_scan_quoted_argument_triple_unterminated() {
let input = "\"\"\"test";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
assert!(lexer.scan_quoted_argument().is_err());
}
#[test]
fn test_lexer_scan_argument() {
let input = "test";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let is_expression = lexer.scan_argument().unwrap();
assert!(!is_expression);
assert_eq!(lexer.position, input.len());
}
#[test]
fn test_lexer_scan_argument_with_escape() {
let input = "test\\n";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
lexer.scan_argument().unwrap();
assert_eq!(lexer.position, 6); }
#[test]
fn test_lexer_scan_argument_with_space() {
let input = "test ";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let is_expression = lexer.scan_argument().unwrap();
assert!(!is_expression);
assert_eq!(lexer.position, input.len() - 1);
}
#[test]
fn test_lexer_scan_argument_with_expression() {
let input = "test(";
let options = super::super::ConfOptions {
allow_expression_arguments: true,
..Default::default()
};
let mut lexer = Lexer::new(input, options);
let is_expression = lexer.scan_argument().unwrap();
assert!(is_expression);
assert_eq!(lexer.position, 4); }
#[test]
fn test_lexer_next_token_eof() {
let input = "";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let token = lexer.next_token().unwrap();
assert_eq!(token.token_type, TokenType::Eof);
assert_eq!(token.span, 0..0);
assert!(!token.is_quoted);
assert!(!token.is_triple_quoted);
assert!(!token.is_expression);
}
#[test]
fn test_lexer_next_token_newline() {
let input = "\n";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let token = lexer.next_token().unwrap();
assert_eq!(token.token_type, TokenType::Newline);
assert_eq!(token.span, 0..1);
assert!(!token.is_quoted);
assert!(!token.is_triple_quoted);
assert!(!token.is_expression);
}
#[test]
fn test_lexer_next_token_semicolon() {
let input = ";";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let token = lexer.next_token().unwrap();
assert_eq!(token.token_type, TokenType::Semicolon);
assert_eq!(token.span, 0..1);
assert!(!token.is_quoted);
assert!(!token.is_triple_quoted);
assert!(!token.is_expression);
}
#[test]
fn test_lexer_next_token_left_curly_brace() {
let input = "{";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let token = lexer.next_token().unwrap();
assert_eq!(token.token_type, TokenType::LeftCurlyBrace);
assert_eq!(token.span, 0..1);
assert!(!token.is_quoted);
assert!(!token.is_triple_quoted);
assert!(!token.is_expression);
}
#[test]
fn test_lexer_next_token_right_curly_brace() {
let input = "}";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let token = lexer.next_token().unwrap();
assert_eq!(token.token_type, TokenType::RightCurlyBrace);
assert_eq!(token.span, 0..1);
assert!(!token.is_quoted);
assert!(!token.is_triple_quoted);
assert!(!token.is_expression);
}
#[test]
fn test_lexer_next_token_continuation() {
let input = "\\\n"; let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let token = lexer.next_token().unwrap();
assert_eq!(token.token_type, TokenType::Continuation);
assert_eq!(token.span, 0..1); assert!(!token.is_quoted);
assert!(!token.is_triple_quoted);
assert!(!token.is_expression);
}
#[test]
fn test_lexer_next_token_quoted_argument() {
let input = "\"test\"";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let token = lexer.next_token().unwrap();
assert_eq!(token.token_type, TokenType::Argument);
assert_eq!(token.span, 0..input.len());
assert!(token.is_quoted);
assert!(!token.is_triple_quoted);
assert!(!token.is_expression);
}
#[test]
fn test_lexer_next_token_triple_quoted_argument() {
let input = "\"\"\"test\"\"\"";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let token = lexer.next_token().unwrap();
assert_eq!(token.token_type, TokenType::Argument);
assert_eq!(token.span, 0..input.len());
assert!(token.is_quoted);
assert!(token.is_triple_quoted);
assert!(!token.is_expression);
}
#[test]
fn test_lexer_next_token_argument() {
let input = "test";
let options = super::super::ConfOptions::default();
let mut lexer = Lexer::new(input, options);
let token = lexer.next_token().unwrap();
assert_eq!(token.token_type, TokenType::Argument);
assert_eq!(token.span, 0..input.len());
assert!(!token.is_quoted);
assert!(!token.is_triple_quoted);
assert!(!token.is_expression);
}
#[test]
fn test_lexer_next_token_argument_with_expression() {
let input = "test(";
let options = super::super::ConfOptions {
allow_expression_arguments: true,
..Default::default()
};
let mut lexer = Lexer::new(input, options);
let token = lexer.next_token().unwrap();
assert_eq!(token.token_type, TokenType::Argument);
assert_eq!(token.span, 0..4); assert!(!token.is_quoted);
assert!(!token.is_triple_quoted);
assert!(token.is_expression);
}
}