use super::error::{ParseError, ParseErrorKind};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TokenKind {
Eof,
Name,
Scalar,
MessageOpen,
MessageClose,
ListOpen,
ListClose,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NameKind {
Ident,
TypeName,
FieldNumber,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ScalarKind {
Number,
String,
Literal,
}
#[derive(Debug, Clone, Copy)]
pub struct Token<'a> {
pub kind: TokenKind,
pub raw: &'a str,
pub pos: usize,
pub name_kind: NameKind,
pub scalar_kind: ScalarKind,
pub has_separator: bool,
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum OpenKind {
Top,
Message(u8),
List,
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum LastKind {
Bof,
Name,
Scalar,
MessageOpen,
MessageClose,
ListOpen,
ListClose,
Comma,
Semicolon,
}
pub struct Tokenizer<'a> {
input: &'a str,
cursor: usize,
last_kind: LastKind,
open_stack: [u8; crate::message::RECURSION_LIMIT as usize],
open_depth: usize,
peeked: Option<(Token<'a>, LastKind, usize)>,
}
impl<'a> Tokenizer<'a> {
pub fn new(input: &'a str) -> Self {
let cursor = if input.starts_with('\u{FEFF}') { 3 } else { 0 };
Tokenizer {
input,
cursor,
last_kind: LastKind::Bof,
open_stack: [0u8; crate::message::RECURSION_LIMIT as usize],
open_depth: 0,
peeked: None,
}
}
pub fn peek(&mut self) -> Result<Token<'a>, ParseError> {
if let Some((tok, _, _)) = self.peeked {
return Ok(tok);
}
let save_cursor = self.cursor;
let save_depth = self.open_depth;
let save_last = self.last_kind;
let tok = self.parse_next()?;
let new_last = self.last_kind;
let new_cursor = self.cursor;
self.cursor = save_cursor;
self.open_depth = save_depth;
self.last_kind = save_last;
self.peeked = Some((tok, new_last, new_cursor));
Ok(tok)
}
pub fn read(&mut self) -> Result<Token<'a>, ParseError> {
if let Some((tok, last, cur)) = self.peeked.take() {
self.last_kind = last;
self.cursor = cur;
match tok.kind {
TokenKind::MessageOpen | TokenKind::ListOpen => self.open_depth += 1,
TokenKind::MessageClose | TokenKind::ListClose => self.open_depth -= 1,
_ => {}
}
return Ok(tok);
}
self.parse_next()
}
pub fn line_col(&self, pos: usize) -> (u32, u32) {
let pos = pos.min(self.input.len());
let before = &self.input[..pos];
let line = before.bytes().filter(|&b| b == b'\n').count() as u32 + 1;
let line_start = before.rfind('\n').map(|i| i + 1).unwrap_or(0);
let col = before[line_start..].chars().count() as u32 + 1;
(line, col)
}
fn err(&self, pos: usize, kind: ParseErrorKind) -> ParseError {
let (line, col) = self.line_col(pos);
ParseError::new(line, col, kind)
}
fn err_here(&self, kind: ParseErrorKind) -> ParseError {
self.err(self.cursor, kind)
}
#[inline]
fn rest(&self) -> &'a [u8] {
&self.input.as_bytes()[self.cursor..]
}
fn current_open(&self) -> OpenKind {
if self.open_depth == 0 {
return OpenKind::Top;
}
match self.open_stack[self.open_depth - 1] {
b'{' => OpenKind::Message(b'}'),
b'<' => OpenKind::Message(b'>'),
b'[' => OpenKind::List,
_ => unreachable!("open_stack holds only {{, <, ["),
}
}
fn push_open(&mut self, ch: u8) -> Result<(), ParseError> {
if self.open_depth >= self.open_stack.len() {
return Err(self.err_here(ParseErrorKind::RecursionLimitExceeded));
}
self.open_stack[self.open_depth] = ch;
self.open_depth += 1;
Ok(())
}
fn pop_open(&mut self) {
debug_assert!(self.open_depth > 0);
self.open_depth -= 1;
}
fn consume(&mut self, n: usize) {
self.cursor += n;
loop {
match self.rest().first() {
Some(&c) if is_textproto_ws(c) => self.cursor += 1,
Some(b'#') => {
let rest = self.rest();
match rest.iter().position(|&b| b == b'\n') {
Some(i) => self.cursor += i + 1,
None => self.cursor = self.input.len(),
}
}
_ => break,
}
}
}
fn try_consume_char(&mut self, c: u8) -> bool {
if self.rest().first() == Some(&c) {
self.consume(1);
true
} else {
false
}
}
fn parse_next(&mut self) -> Result<Token<'a>, ParseError> {
loop {
self.consume(0);
let at_eof = self.rest().is_empty();
let open = self.current_open();
match self.last_kind {
LastKind::Bof => {
if at_eof {
return self.emit_eof();
}
return self.parse_field_name();
}
LastKind::Name => {
if at_eof {
return Err(self.err_here(ParseErrorKind::UnexpectedEof));
}
let ch = self.rest()[0];
match ch {
b'{' | b'<' => {
self.push_open(ch)?;
return self.emit(TokenKind::MessageOpen, 1);
}
b'[' => {
self.push_open(ch)?;
return self.emit(TokenKind::ListOpen, 1);
}
_ => return self.parse_scalar(),
}
}
LastKind::Scalar | LastKind::MessageClose | LastKind::ListClose => {
match open {
OpenKind::Top => {
if at_eof {
return self.emit_eof();
}
match self.rest()[0] {
b',' => {
self.consume(1);
self.last_kind = LastKind::Comma;
continue;
}
b';' => {
self.consume(1);
self.last_kind = LastKind::Semicolon;
continue;
}
_ => return self.parse_field_name(),
}
}
OpenKind::Message(close) => {
if at_eof {
return Err(self.err_here(ParseErrorKind::UnexpectedEof));
}
let ch = self.rest()[0];
if ch == close {
self.pop_open();
return self.emit(TokenKind::MessageClose, 1);
}
if ch == other_close(close) {
return Err(self.err_here(ParseErrorKind::DelimiterMismatch));
}
match ch {
b',' => {
self.consume(1);
self.last_kind = LastKind::Comma;
continue;
}
b';' => {
self.consume(1);
self.last_kind = LastKind::Semicolon;
continue;
}
_ => return self.parse_field_name(),
}
}
OpenKind::List => {
if at_eof {
return Err(self.err_here(ParseErrorKind::UnexpectedEof));
}
let ch = self.rest()[0];
match ch {
b']' => {
self.pop_open();
return self.emit(TokenKind::ListClose, 1);
}
b',' => {
self.consume(1);
self.last_kind = LastKind::Comma;
continue;
}
_ => {
return Err(self.err_here(ParseErrorKind::UnexpectedToken {
expected: "',' or ']'",
}));
}
}
}
}
}
LastKind::MessageOpen => {
if at_eof {
return Err(self.err_here(ParseErrorKind::UnexpectedEof));
}
let OpenKind::Message(close) = open else {
unreachable!("MessageOpen always pushes a Message frame")
};
let ch = self.rest()[0];
if ch == close {
self.pop_open();
return self.emit(TokenKind::MessageClose, 1);
}
if ch == other_close(close) {
return Err(self.err_here(ParseErrorKind::DelimiterMismatch));
}
return self.parse_field_name();
}
LastKind::ListOpen => {
if at_eof {
return Err(self.err_here(ParseErrorKind::UnexpectedEof));
}
let ch = self.rest()[0];
match ch {
b']' => {
self.pop_open();
return self.emit(TokenKind::ListClose, 1);
}
b'{' | b'<' => {
self.push_open(ch)?;
return self.emit(TokenKind::MessageOpen, 1);
}
_ => return self.parse_scalar(),
}
}
LastKind::Comma | LastKind::Semicolon => {
match open {
OpenKind::Top => {
if at_eof {
return self.emit_eof();
}
return self.parse_field_name();
}
OpenKind::Message(close) => {
if at_eof {
return Err(self.err_here(ParseErrorKind::UnexpectedEof));
}
let ch = self.rest()[0];
if ch == close {
self.pop_open();
return self.emit(TokenKind::MessageClose, 1);
}
if ch == other_close(close) {
return Err(self.err_here(ParseErrorKind::DelimiterMismatch));
}
return self.parse_field_name();
}
OpenKind::List => {
if at_eof {
return Err(self.err_here(ParseErrorKind::UnexpectedEof));
}
let ch = self.rest()[0];
match ch {
b'{' | b'<' => {
self.push_open(ch)?;
return self.emit(TokenKind::MessageOpen, 1);
}
_ => return self.parse_scalar(),
}
}
}
}
}
}
}
fn emit(&mut self, kind: TokenKind, len: usize) -> Result<Token<'a>, ParseError> {
let pos = self.cursor;
let raw = &self.input[pos..pos + len];
self.consume(len);
self.last_kind = match kind {
TokenKind::Name => LastKind::Name,
TokenKind::Scalar => LastKind::Scalar,
TokenKind::MessageOpen => LastKind::MessageOpen,
TokenKind::MessageClose => LastKind::MessageClose,
TokenKind::ListOpen => LastKind::ListOpen,
TokenKind::ListClose => LastKind::ListClose,
TokenKind::Eof => LastKind::Bof, };
Ok(Token {
kind,
raw,
pos,
name_kind: NameKind::Ident,
scalar_kind: ScalarKind::Number,
has_separator: false,
})
}
fn emit_eof(&mut self) -> Result<Token<'a>, ParseError> {
Ok(Token {
kind: TokenKind::Eof,
raw: &self.input[self.input.len()..],
pos: self.input.len(),
name_kind: NameKind::Ident,
scalar_kind: ScalarKind::Number,
has_separator: false,
})
}
fn parse_field_name(&mut self) -> Result<Token<'a>, ParseError> {
let start = self.cursor;
let rest = self.rest();
if rest[0] == b'[' {
let mut i = 1;
while i < rest.len() && rest[i] != b']' {
i += 1;
}
if i >= rest.len() {
return Err(self.err(start, ParseErrorKind::UnexpectedEof));
}
let len = i + 1; let raw = &self.input[start..start + len];
self.consume(len);
self.last_kind = LastKind::Name;
let has_separator = self.try_consume_char(b':');
return Ok(Token {
kind: TokenKind::Name,
raw,
pos: start,
name_kind: NameKind::TypeName,
scalar_kind: ScalarKind::Number,
has_separator,
});
}
let ilen = parse_ident(rest, false);
if ilen > 0 {
let raw = &self.input[start..start + ilen];
self.consume(ilen);
self.last_kind = LastKind::Name;
let has_separator = self.try_consume_char(b':');
return Ok(Token {
kind: TokenKind::Name,
raw,
pos: start,
name_kind: NameKind::Ident,
scalar_kind: ScalarKind::Number,
has_separator,
});
}
if let Some(num) = lex_number(rest) {
if !num.neg && num.kind == NumKind::Dec {
let s = &self.input[start..start + num.len];
if s.parse::<i32>().is_ok() {
let raw = s;
self.consume(num.len);
self.last_kind = LastKind::Name;
let has_separator = self.try_consume_char(b':');
return Ok(Token {
kind: TokenKind::Name,
raw,
pos: start,
name_kind: NameKind::FieldNumber,
scalar_kind: ScalarKind::Number,
has_separator,
});
}
}
}
Err(self.err(
start,
ParseErrorKind::UnexpectedToken {
expected: "field name",
},
))
}
fn parse_scalar(&mut self) -> Result<Token<'a>, ParseError> {
let start = self.cursor;
let rest = self.rest();
let first = rest[0];
if first == b'"' || first == b'\'' {
let len = lex_string_run(rest).ok_or_else(|| {
self.err(start, ParseErrorKind::InvalidString("unterminated string"))
})?;
let raw = &self.input[start..start + len];
self.consume(len);
self.last_kind = LastKind::Scalar;
return Ok(Token {
kind: TokenKind::Scalar,
raw,
pos: start,
name_kind: NameKind::Ident,
scalar_kind: ScalarKind::String,
has_separator: false,
});
}
let ilen = parse_ident(rest, true);
if ilen > 0 {
let raw = &self.input[start..start + ilen];
self.consume(ilen);
self.last_kind = LastKind::Scalar;
return Ok(Token {
kind: TokenKind::Scalar,
raw,
pos: start,
name_kind: NameKind::Ident,
scalar_kind: ScalarKind::Literal,
has_separator: false,
});
}
if let Some(num) = lex_number(rest) {
let raw = &self.input[start..start + num.len];
self.consume(num.len);
self.last_kind = LastKind::Scalar;
return Ok(Token {
kind: TokenKind::Scalar,
raw,
pos: start,
name_kind: NameKind::Ident,
scalar_kind: ScalarKind::Number,
has_separator: false,
});
}
Err(self.err(
start,
ParseErrorKind::UnexpectedToken {
expected: "scalar value",
},
))
}
}
#[inline]
fn other_close(c: u8) -> u8 {
match c {
b'}' => b'>',
b'>' => b'}',
_ => unreachable!(),
}
}
#[inline]
fn is_delim(c: u8) -> bool {
!(c == b'-' || c == b'+' || c == b'.' || c == b'_' || c.is_ascii_alphanumeric())
}
#[inline]
pub(super) const fn is_textproto_ws(c: u8) -> bool {
matches!(c, b' ' | b'\t' | b'\r' | b'\n' | b'\x0B' | b'\x0C')
}
fn parse_ident(s: &[u8], allow_neg: bool) -> usize {
let mut i = 0;
if allow_neg && s.first() == Some(&b'-') {
i = 1;
let after = consume_ws(&s[1..]);
i += s.len() - 1 - after.len();
if s.get(i).is_none() {
return 0;
}
}
match s.get(i) {
Some(b'_') | Some(b'a'..=b'z') | Some(b'A'..=b'Z') => i += 1,
_ => return 0,
}
while let Some(&c) = s.get(i) {
if c == b'_' || c.is_ascii_alphanumeric() {
i += 1;
} else {
break;
}
}
if let Some(&c) = s.get(i) {
if !is_delim(c) {
return 0;
}
}
i
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub(super) enum NumKind {
Dec,
Hex,
Oct,
Float,
}
#[derive(Clone, Copy, Debug)]
pub(super) struct LexNumber {
pub kind: NumKind,
pub neg: bool,
pub len: usize,
pub sep: usize,
}
pub(super) fn lex_number(input: &[u8]) -> Option<LexNumber> {
let mut s = input;
let mut len = 0;
let mut neg = false;
let mut sep = 0;
let mut kind = NumKind::Dec;
if s.is_empty() {
return None;
}
if s[0] == b'-' {
neg = true;
s = &s[1..];
len += 1;
let before = s.len();
s = consume_ws(s);
sep = before - s.len();
len += sep;
if s.is_empty() {
return None;
}
}
match s[0] {
b'0' => {
if s.len() > 1 {
match s[1] {
b'x' | b'X' => {
kind = NumKind::Hex;
let mut n = 2;
while s.get(n).is_some_and(|c| c.is_ascii_hexdigit()) {
n += 1;
}
if n == 2 {
return None; }
len += n;
s = &s[n..];
return finish_number(s, kind, neg, len, sep);
}
b'0'..=b'7' => {
kind = NumKind::Oct;
let mut n = 2;
while s.get(n).is_some_and(|&c| (b'0'..=b'7').contains(&c)) {
n += 1;
}
len += n;
s = &s[n..];
return finish_number(s, kind, neg, len, sep);
}
_ => {}
}
}
s = &s[1..];
len += 1;
}
b'1'..=b'9' => {
let mut n = 1;
while s.get(n).is_some_and(u8::is_ascii_digit) {
n += 1;
}
s = &s[n..];
len += n;
}
b'.' => {
kind = NumKind::Float;
}
_ => return None,
}
if s.first() == Some(&b'.') {
let mut n = 1;
let had_digits = kind != NumKind::Float;
while s.get(n).is_some_and(u8::is_ascii_digit) {
n += 1;
}
if !had_digits && n == 1 {
return None; }
s = &s[n..];
len += n;
kind = NumKind::Float;
}
if s.len() >= 2 && matches!(s[0], b'e' | b'E') {
kind = NumKind::Float;
let mut n = 1;
if matches!(s[1], b'+' | b'-') {
n = 2;
if s.len() <= 2 {
return None;
}
}
let start = n;
while s.get(n).is_some_and(u8::is_ascii_digit) {
n += 1;
}
if n == start {
return None; }
s = &s[n..];
len += n;
}
if matches!(s.first(), Some(b'f' | b'F')) {
kind = NumKind::Float;
s = &s[1..];
len += 1;
}
finish_number(s, kind, neg, len, sep)
}
#[inline]
fn finish_number(
rest: &[u8],
kind: NumKind,
neg: bool,
len: usize,
sep: usize,
) -> Option<LexNumber> {
if let Some(&c) = rest.first() {
if !is_delim(c) {
return None;
}
}
Some(LexNumber {
kind,
neg,
len,
sep,
})
}
pub(super) fn number_for_parse<'a>(raw: &'a str, num: &LexNumber) -> alloc::borrow::Cow<'a, str> {
let bytes = raw.as_bytes();
let mut end = bytes.len();
if num.kind == NumKind::Float && matches!(bytes.last(), Some(b'f' | b'F')) {
end -= 1;
}
if num.neg && num.sep > 0 {
let mut s = alloc::string::String::with_capacity(end - num.sep);
s.push('-');
s.push_str(&raw[1 + num.sep..end]);
alloc::borrow::Cow::Owned(s)
} else {
alloc::borrow::Cow::Borrowed(&raw[..end])
}
}
fn consume_ws(mut s: &[u8]) -> &[u8] {
loop {
match s.first() {
Some(&c) if is_textproto_ws(c) => s = &s[1..],
Some(b'#') => match s.iter().position(|&b| b == b'\n') {
Some(i) => s = &s[i + 1..],
None => return &[],
},
_ => return s,
}
}
}
fn lex_string_run(s: &[u8]) -> Option<usize> {
let mut i = 0;
loop {
let quote = *s.get(i)?;
debug_assert!(quote == b'"' || quote == b'\'');
i += 1;
loop {
match s.get(i)? {
&c if c == quote => {
i += 1;
break;
}
b'\n' | 0 => return None,
b'\\' => {
i += 2;
if i > s.len() {
return None;
}
}
_ => i += 1,
}
}
let mut j = i;
while s.get(j).is_some_and(|&c| is_textproto_ws(c)) {
j += 1;
}
if matches!(s.get(j), Some(b'"') | Some(b'\'')) {
i = j;
continue;
}
return Some(i);
}
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec::Vec;
fn drain(input: &str) -> Result<Vec<(TokenKind, &str)>, ParseError> {
let mut t = Tokenizer::new(input);
let mut out = Vec::new();
loop {
let tok = t.read()?;
if tok.kind == TokenKind::Eof {
return Ok(out);
}
out.push((tok.kind, tok.raw));
}
}
#[test]
fn tokenize_simple_field() {
let toks = drain("foo: 42").unwrap();
assert_eq!(toks, [(TokenKind::Name, "foo"), (TokenKind::Scalar, "42")]);
}
#[test]
fn tokenize_nested_message() {
let toks = drain("child { a: 1 b: 2 }").unwrap();
assert_eq!(
toks,
[
(TokenKind::Name, "child"),
(TokenKind::MessageOpen, "{"),
(TokenKind::Name, "a"),
(TokenKind::Scalar, "1"),
(TokenKind::Name, "b"),
(TokenKind::Scalar, "2"),
(TokenKind::MessageClose, "}"),
]
);
}
#[test]
fn tokenize_angle_delimiters() {
let toks = drain("m < x: 1 >").unwrap();
assert_eq!(toks[1], (TokenKind::MessageOpen, "<"));
assert_eq!(toks[4], (TokenKind::MessageClose, ">"));
}
#[test]
fn tokenize_list() {
let toks = drain("r: [1, 2, 3]").unwrap();
assert_eq!(
toks,
[
(TokenKind::Name, "r"),
(TokenKind::ListOpen, "["),
(TokenKind::Scalar, "1"),
(TokenKind::Scalar, "2"),
(TokenKind::Scalar, "3"),
(TokenKind::ListClose, "]"),
]
);
}
#[test]
fn tokenize_empty_list() {
let toks = drain("r: []").unwrap();
assert_eq!(
toks,
[
(TokenKind::Name, "r"),
(TokenKind::ListOpen, "["),
(TokenKind::ListClose, "]"),
]
);
}
#[test]
fn tokenize_message_list() {
let toks = drain("r: [{a: 1}, {a: 2}]").unwrap();
assert_eq!(toks[1], (TokenKind::ListOpen, "["));
assert_eq!(toks[2], (TokenKind::MessageOpen, "{"));
assert_eq!(toks[5], (TokenKind::MessageClose, "}"));
assert_eq!(toks[6], (TokenKind::MessageOpen, "{"));
assert_eq!(toks[9], (TokenKind::MessageClose, "}"));
assert_eq!(toks[10], (TokenKind::ListClose, "]"));
}
#[test]
fn separators_consumed() {
let a = drain("a: 1 b: 2").unwrap();
let b = drain("a: 1, b: 2").unwrap();
let c = drain("a: 1; b: 2").unwrap();
assert_eq!(a, b);
assert_eq!(a, c);
}
#[test]
fn comments_skipped() {
let toks = drain("# leading\nfoo: 1 # trailing\nbar: 2").unwrap();
assert_eq!(
toks,
[
(TokenKind::Name, "foo"),
(TokenKind::Scalar, "1"),
(TokenKind::Name, "bar"),
(TokenKind::Scalar, "2"),
]
);
}
#[test]
fn empty_input() {
assert_eq!(drain("").unwrap(), []);
assert_eq!(drain(" \n\t ").unwrap(), []);
assert_eq!(drain("# just a comment").unwrap(), []);
}
#[test]
fn name_kinds() {
#[rustfmt::skip]
let cases: &[(&str, NameKind, &str, bool)] = &[
("foo: 1", NameKind::Ident, "foo", true),
("foo_bar: 1", NameKind::Ident, "foo_bar", true),
("_private: 1", NameKind::Ident, "_private", true),
("msg {}", NameKind::Ident, "msg", false), ("[pkg.ext]: 1", NameKind::TypeName, "[pkg.ext]", true),
("[a.b.c/d.e]: 1", NameKind::TypeName, "[a.b.c/d.e]", true),
("[type.googleapis.com/Foo] {}", NameKind::TypeName, "[type.googleapis.com/Foo]", false),
("42: 1", NameKind::FieldNumber, "42", true),
];
for &(input, want_kind, want_raw, want_sep) in cases {
let mut t = Tokenizer::new(input);
let tok = t.read().unwrap();
assert_eq!(tok.kind, TokenKind::Name, "input: {input}");
assert_eq!(tok.name_kind, want_kind, "input: {input}");
assert_eq!(tok.raw, want_raw, "input: {input}");
assert_eq!(tok.has_separator, want_sep, "input: {input}");
}
}
#[test]
fn scalar_kinds() {
#[rustfmt::skip]
let cases: &[(&str, ScalarKind, &str)] = &[
("f: 42", ScalarKind::Number, "42"),
("f: -7", ScalarKind::Number, "-7"),
("f: 0x1F", ScalarKind::Number, "0x1F"),
("f: 0777", ScalarKind::Number, "0777"),
("f: 1.5", ScalarKind::Number, "1.5"),
("f: 1.5e-3", ScalarKind::Number, "1.5e-3"),
("f: .5", ScalarKind::Number, ".5"),
("f: 1f", ScalarKind::Number, "1f"),
("f: 1.5F", ScalarKind::Number, "1.5F"),
(r#"f: "hello""#, ScalarKind::String, r#""hello""#),
(r#"f: 'world'"#, ScalarKind::String, r#"'world'"#),
(r#"f: "a" "b""#, ScalarKind::String, r#""a" "b""#), ("f: true", ScalarKind::Literal, "true"),
("f: False", ScalarKind::Literal, "False"),
("f: FOO_BAR", ScalarKind::Literal, "FOO_BAR"), ("f: inf", ScalarKind::Literal, "inf"),
("f: -inf", ScalarKind::Literal, "-inf"), ("f: nan", ScalarKind::Literal, "nan"),
];
for &(input, want_kind, want_raw) in cases {
let mut t = Tokenizer::new(input);
t.read().unwrap(); let tok = t.read().unwrap();
assert_eq!(tok.kind, TokenKind::Scalar, "input: {input}");
assert_eq!(tok.scalar_kind, want_kind, "input: {input}");
assert_eq!(tok.raw, want_raw, "input: {input}");
}
}
#[test]
fn string_escape_not_closing() {
let mut t = Tokenizer::new(r#"f: "say \"hi\"""#);
t.read().unwrap();
let tok = t.read().unwrap();
assert_eq!(tok.scalar_kind, ScalarKind::String);
assert_eq!(tok.raw, r#""say \"hi\"""#);
}
#[test]
fn adjacent_strings_no_whitespace() {
let mut t = Tokenizer::new(r#"f: "a"'b'"c""#);
t.read().unwrap();
let tok = t.read().unwrap();
assert_eq!(tok.raw, r#""a"'b'"c""#);
}
#[test]
fn lex_number_table() {
#[rustfmt::skip]
let cases: &[(&str, Option<(NumKind, usize)>)] = &[
("0", Some((NumKind::Dec, 1))),
("42", Some((NumKind::Dec, 2))),
("-7", Some((NumKind::Dec, 2))),
("- 7", Some((NumKind::Dec, 3))), ("0x1F", Some((NumKind::Hex, 4))),
("0XFF", Some((NumKind::Hex, 4))),
("-0x1", Some((NumKind::Hex, 4))),
("0777", Some((NumKind::Oct, 4))),
("1.5", Some((NumKind::Float, 3))),
(".5", Some((NumKind::Float, 2))),
("1.", Some((NumKind::Float, 2))),
("1e3", Some((NumKind::Float, 3))),
("1.5e-3", Some((NumKind::Float, 6))),
("1.5E+3", Some((NumKind::Float, 6))),
("1f", Some((NumKind::Float, 2))),
("1.5F", Some((NumKind::Float, 4))),
("", None),
("abc", None),
("0x", None), (".", None), ("1e", None), ("1e+", None), ("-", None), ("0x1g", None), ("42abc", None), ];
for &(input, want) in cases {
let got = lex_number(input.as_bytes()).map(|n| (n.kind, n.len));
assert_eq!(got, want, "input: {input:?}");
}
}
#[test]
fn number_for_parse_strips_suffix() {
let n = lex_number(b"1.5f").unwrap();
assert_eq!(number_for_parse("1.5f", &n), "1.5");
}
#[test]
fn number_for_parse_strips_separator() {
let n = lex_number(b"- 42").unwrap();
assert_eq!(n.sep, 1);
assert_eq!(number_for_parse("- 42", &n), "-42");
}
#[test]
fn number_for_parse_neg_hex_with_sep() {
let n = lex_number(b"- 0xFF").unwrap();
assert_eq!(n.kind, NumKind::Hex);
assert!(n.neg);
assert_eq!(n.sep, 1);
assert_eq!(number_for_parse("- 0xFF", &n), "-0xFF");
}
#[test]
fn delimiter_mismatch() {
let err = drain("m { a: 1 >").unwrap_err();
assert_eq!(err.kind, ParseErrorKind::DelimiterMismatch);
}
#[test]
fn delimiter_mismatch_angle() {
let err = drain("m < a: 1 }").unwrap_err();
assert_eq!(err.kind, ParseErrorKind::DelimiterMismatch);
}
#[test]
fn unexpected_eof_in_message() {
let err = drain("m { a: 1").unwrap_err();
assert_eq!(err.kind, ParseErrorKind::UnexpectedEof);
}
#[test]
fn unexpected_eof_in_list() {
let err = drain("r: [1, 2").unwrap_err();
assert_eq!(err.kind, ParseErrorKind::UnexpectedEof);
}
#[test]
fn unterminated_string() {
let err = drain(r#"f: "oops"#).unwrap_err();
assert!(matches!(err.kind, ParseErrorKind::InvalidString(_)));
}
#[test]
fn list_missing_comma() {
let err = drain("r: [1 2]").unwrap_err();
assert!(matches!(
err.kind,
ParseErrorKind::UnexpectedToken {
expected: "',' or ']'"
}
));
}
#[test]
fn recursion_limit() {
let depth = crate::message::RECURSION_LIMIT as usize + 1;
let mut s = alloc::string::String::new();
for _ in 0..depth {
s.push_str("m { ");
}
let err = drain(&s).unwrap_err();
assert_eq!(err.kind, ParseErrorKind::RecursionLimitExceeded);
}
#[test]
fn peek_does_not_advance() {
let mut t = Tokenizer::new("a: 1 b: 2");
let p1 = t.peek().unwrap();
let p2 = t.peek().unwrap();
assert_eq!(p1.raw, "a");
assert_eq!(p2.raw, "a");
let r = t.read().unwrap();
assert_eq!(r.raw, "a");
let next = t.read().unwrap();
assert_eq!(next.raw, "1");
}
#[test]
fn peek_then_read_preserves_nesting() {
let mut t = Tokenizer::new("m { a: 1 }");
t.read().unwrap(); let p = t.peek().unwrap();
assert_eq!(p.kind, TokenKind::MessageOpen);
t.read().unwrap(); assert_eq!(t.read().unwrap().raw, "a");
assert_eq!(t.read().unwrap().raw, "1");
assert_eq!(t.read().unwrap().kind, TokenKind::MessageClose);
}
#[test]
fn line_col_table() {
let input = "ab\ncde\nfg";
let t = Tokenizer::new(input);
#[rustfmt::skip]
let cases: &[(usize, (u32, u32))] = &[
(0, (1, 1)), (1, (1, 2)), (2, (1, 3)), (3, (2, 1)), (5, (2, 3)), (7, (3, 1)), (999, (3, 3)), ];
for &(pos, want) in cases {
assert_eq!(t.line_col(pos), want, "pos: {pos}");
}
}
#[test]
fn line_col_unicode() {
let t = Tokenizer::new("éx");
assert_eq!(t.line_col(0), (1, 1));
assert_eq!(t.line_col(2), (1, 2)); }
#[test]
fn error_has_correct_position() {
let err = drain("a: 1\nb: 2\nm { x: 1 >").unwrap_err();
assert_eq!(err.line, 3);
assert_eq!(err.col, 10);
}
#[test]
fn bom_is_skipped() {
let toks = drain("\u{FEFF}a: 1").unwrap();
assert_eq!(toks, &[(TokenKind::Name, "a"), (TokenKind::Scalar, "1")]);
}
#[test]
fn bom_only_is_empty() {
let toks = drain("\u{FEFF}").unwrap();
assert!(toks.is_empty());
}
#[test]
fn vertical_tab_and_form_feed_are_whitespace() {
let toks = drain("a:\x0B1\x0Cb: 2").unwrap();
assert_eq!(
toks,
&[
(TokenKind::Name, "a"),
(TokenKind::Scalar, "1"),
(TokenKind::Name, "b"),
(TokenKind::Scalar, "2"),
]
);
}
#[test]
fn signed_literal_with_whitespace() {
let toks = drain("f: - inf").unwrap();
assert_eq!(
toks,
&[(TokenKind::Name, "f"), (TokenKind::Scalar, "- inf")]
);
}
}