#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[allow(non_camel_case_types)]
#[repr(u16)]
pub enum SyntaxKind {
LEFT_BRACKET = 0,
RIGHT_BRACKET,
EQUALS,
KEY,
SECTION_NAME,
LOCALE,
VALUE,
COMMENT,
NEWLINE,
WHITESPACE,
ERROR,
ROOT,
GROUP,
GROUP_HEADER,
ENTRY,
BLANK_LINE,
}
impl From<SyntaxKind> for rowan::SyntaxKind {
fn from(kind: SyntaxKind) -> Self {
Self(kind as u16)
}
}
#[inline]
fn is_valid_initial_key_char(c: char) -> bool {
c.is_ascii_alphanumeric()
}
#[inline]
fn is_valid_key_char(c: char) -> bool {
c.is_ascii_alphanumeric() || c == '-'
}
#[inline]
fn is_newline(c: char) -> bool {
c == '\n' || c == '\r'
}
#[inline]
fn is_whitespace(c: char) -> bool {
c == ' ' || c == '\t'
}
fn lex_impl(input: &str) -> impl Iterator<Item = (SyntaxKind, &str)> + '_ {
let mut remaining = input;
let mut at_line_start = true;
let mut in_section_header = false;
let mut in_locale = false;
std::iter::from_fn(move || {
if remaining.is_empty() {
return None;
}
let c = remaining.chars().next()?;
match c {
_ if is_newline(c) => {
let char_len = c.len_utf8();
if c == '\r' && remaining.get(1..2) == Some("\n") {
let (token, rest) = remaining.split_at(2);
remaining = rest;
at_line_start = true;
in_section_header = false;
in_locale = false;
Some((SyntaxKind::NEWLINE, token))
} else {
let (token, rest) = remaining.split_at(char_len);
remaining = rest;
at_line_start = true;
in_section_header = false;
in_locale = false;
Some((SyntaxKind::NEWLINE, token))
}
}
'#' if at_line_start => {
let end = remaining.find(is_newline).unwrap_or(remaining.len());
let (token, rest) = remaining.split_at(end);
remaining = rest;
Some((SyntaxKind::COMMENT, token))
}
'[' if at_line_start => {
remaining = &remaining[1..]; at_line_start = false;
in_section_header = true;
Some((SyntaxKind::LEFT_BRACKET, "["))
}
'[' => {
remaining = &remaining[1..]; in_locale = true;
Some((SyntaxKind::LEFT_BRACKET, "["))
}
']' => {
remaining = &remaining[1..]; in_section_header = false;
in_locale = false;
Some((SyntaxKind::RIGHT_BRACKET, "]"))
}
_ if is_whitespace(c) && at_line_start => {
let end = remaining
.find(|c| !is_whitespace(c))
.unwrap_or(remaining.len());
let (token, rest) = remaining.split_at(end);
remaining = rest;
Some((SyntaxKind::WHITESPACE, token))
}
_ if is_whitespace(c) => {
let end = remaining
.find(|c| !is_whitespace(c))
.unwrap_or(remaining.len());
let (token, rest) = remaining.split_at(end);
remaining = rest;
Some((SyntaxKind::WHITESPACE, token))
}
'=' => {
remaining = &remaining[1..];
Some((SyntaxKind::EQUALS, "="))
}
_ if is_valid_initial_key_char(c) && at_line_start => {
let end = remaining
.find(|c: char| !is_valid_key_char(c))
.unwrap_or(remaining.len());
let (token, rest) = remaining.split_at(end);
remaining = rest;
at_line_start = false;
Some((SyntaxKind::KEY, token))
}
_ if in_section_header || in_locale => {
let end = remaining.find(']').unwrap_or(remaining.len());
let (token, rest) = remaining.split_at(end);
remaining = rest;
Some((SyntaxKind::VALUE, token))
}
_ if !at_line_start => {
let end = remaining.find(is_newline).unwrap_or(remaining.len());
let (token, rest) = remaining.split_at(end);
remaining = rest;
Some((SyntaxKind::VALUE, token))
}
_ => {
let char_len = c.len_utf8();
let (token, rest) = remaining.split_at(char_len);
remaining = rest;
at_line_start = false;
Some((SyntaxKind::ERROR, token))
}
}
})
}
pub(crate) fn lex(input: &str) -> impl Iterator<Item = (SyntaxKind, &str)> {
lex_impl(input)
}
#[cfg(test)]
mod tests {
use super::SyntaxKind::*;
use super::*;
#[test]
fn test_empty() {
assert_eq!(lex("").collect::<Vec<_>>(), vec![]);
}
#[test]
fn test_simple_section() {
let input = "[Desktop Entry]\n";
assert_eq!(
lex(input).collect::<Vec<_>>(),
vec![
(LEFT_BRACKET, "["),
(VALUE, "Desktop Entry"),
(RIGHT_BRACKET, "]"),
(NEWLINE, "\n"),
]
);
}
#[test]
fn test_key_value() {
let input = "Name=Example\n";
assert_eq!(
lex(input).collect::<Vec<_>>(),
vec![
(KEY, "Name"),
(EQUALS, "="),
(VALUE, "Example"),
(NEWLINE, "\n"),
]
);
}
#[test]
fn test_key_value_with_spaces() {
let input = "Name = Example Application\n";
assert_eq!(
lex(input).collect::<Vec<_>>(),
vec![
(KEY, "Name"),
(WHITESPACE, " "),
(EQUALS, "="),
(WHITESPACE, " "),
(VALUE, "Example Application"),
(NEWLINE, "\n"),
]
);
}
#[test]
fn test_comment() {
let input = "# This is a comment\n";
assert_eq!(
lex(input).collect::<Vec<_>>(),
vec![(COMMENT, "# This is a comment"), (NEWLINE, "\n"),]
);
}
#[test]
fn test_full_desktop_file() {
let input = r#"[Desktop Entry]
Name=Example
Type=Application
Exec=example
# Comment
Icon=example.png
[Desktop Action Play]
Name=Play
Exec=example --play
"#;
let tokens: Vec<_> = lex(input).collect();
assert_eq!(tokens[0].0, LEFT_BRACKET);
assert_eq!(tokens[1].0, VALUE); assert_eq!(tokens[2].0, RIGHT_BRACKET);
assert_eq!(tokens[3].0, NEWLINE);
let name_idx = tokens
.iter()
.position(|(k, t)| *k == KEY && *t == "Name")
.unwrap();
assert_eq!(tokens[name_idx + 1].0, EQUALS);
assert_eq!(tokens[name_idx + 2].0, VALUE);
assert_eq!(tokens[name_idx + 2].1, "Example");
}
#[test]
fn test_blank_lines() {
let input = "Key=Value\n\nKey2=Value2\n";
let tokens: Vec<_> = lex(input).collect();
let first_newline = tokens.iter().position(|(k, _)| *k == NEWLINE).unwrap();
assert_eq!(tokens[first_newline + 1].0, NEWLINE);
}
}