use std::collections::HashMap;
use smol_str::SmolStr;
use crate::semantic::signature::{ArgKind, ArgSpec, builtin};
use crate::syntax::SyntaxKind;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token {
pub kind: SyntaxKind,
pub text: SmolStr,
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub enum LatexFlavor {
#[default]
Document,
Package,
}
impl LatexFlavor {
fn letter_mode_start(self) -> bool {
matches!(self, LatexFlavor::Package)
}
}
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
pub struct LexConfig {
pub flavor: LatexFlavor,
pub dtx: bool,
}
impl From<LatexFlavor> for LexConfig {
fn from(flavor: LatexFlavor) -> Self {
Self { flavor, dtx: false }
}
}
#[derive(Debug, Default, Clone)]
pub struct VerbCtx {
commands: HashMap<SmolStr, Vec<ArgSpec>>,
environments: HashMap<SmolStr, Vec<ArgSpec>>,
}
impl VerbCtx {
pub fn is_empty(&self) -> bool {
self.commands.is_empty() && self.environments.is_empty()
}
pub(crate) fn insert(&mut self, name: SmolStr, leading: Vec<ArgSpec>) {
self.commands.insert(name, leading);
}
pub(crate) fn insert_environment(&mut self, name: SmolStr, args: Vec<ArgSpec>) {
self.environments.insert(name, args);
}
fn leading_args(&self, name: &str) -> Option<&[ArgSpec]> {
self.commands.get(name).map(Vec::as_slice)
}
fn verbatim_environment_args(&self, name: &str) -> Option<&[ArgSpec]> {
self.environments.get(name).map(Vec::as_slice)
}
pub(crate) fn is_verbatim_environment(&self, name: &str) -> bool {
self.environments.contains_key(name)
|| builtin()
.environment(name)
.is_some_and(|env| env.verbatim_body)
}
}
pub(crate) fn is_block_environment(name: &str) -> bool {
builtin().environment(name).is_some_and(|env| env.block)
}
fn is_definition_keyword(text: &str) -> bool {
matches!(
text,
"\\newcommand"
| "\\renewcommand"
| "\\providecommand"
| "\\DeclareRobustCommand"
| "\\NewDocumentCommand"
| "\\RenewDocumentCommand"
| "\\ProvideDocumentCommand"
| "\\DeclareDocumentCommand"
| "\\def"
| "\\edef"
| "\\gdef"
| "\\xdef"
| "\\let"
)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum ExplToggle {
On,
Off,
}
pub(crate) fn expl_toggle(text: &str) -> Option<ExplToggle> {
match text {
"\\ExplSyntaxOn"
| "\\ProvidesExplPackage"
| "\\ProvidesExplClass"
| "\\ProvidesExplFile" => Some(ExplToggle::On),
"\\ExplSyntaxOff" => Some(ExplToggle::Off),
_ => None,
}
}
pub fn lex(input: &str) -> Vec<Token> {
lex_with(input, &VerbCtx::default(), LexConfig::default())
}
pub fn lex_with(input: &str, ctx: &VerbCtx, config: LexConfig) -> Vec<Token> {
let mut out = Vec::new();
let mut pos = 0;
let mut at_letter = config.flavor.letter_mode_start(); let mut expl_syntax = false;
let mut at_line_start = true;
let mut in_macrocode = false;
let mut saved_at_letter = at_letter;
let mut pending_delim = false;
let mut pending_def = false;
while pos < input.len() {
let rest = &input[pos..];
if config.dtx
&& at_line_start
&& let Some(consumed) = lex_macrocode_frame(rest, !in_macrocode, &mut out)
{
if in_macrocode {
in_macrocode = false;
at_letter = saved_at_letter;
} else {
in_macrocode = true;
saved_at_letter = at_letter;
at_letter = true;
}
pos += consumed;
at_line_start = false;
pending_delim = false;
pending_def = false;
continue;
}
if config.dtx
&& at_line_start
&& rest.starts_with("%<")
&& let Some(rel) = rest[2..].find(['>', '\n', '\r'])
&& rest.as_bytes()[2 + rel] == b'>'
{
let len = 2 + rel + 1;
out.push(Token {
kind: SyntaxKind::GUARD,
text: SmolStr::new(&rest[..len]),
});
pos += len;
at_line_start = false;
continue;
}
if config.dtx
&& at_line_start
&& !in_macrocode
&& rest.starts_with('%')
&& !rest.starts_with("%<")
{
out.push(Token {
kind: SyntaxKind::DOC_MARGIN,
text: SmolStr::new("%"),
});
pos += 1;
at_line_start = false;
continue;
}
if let Some(consumed) = lex_verbatim_environment(rest, ctx, &mut out) {
pos += consumed;
pending_delim = false;
pending_def = false;
at_line_start = false;
continue;
}
if !pending_def
&& let Some(consumed) =
lex_verbatim_command(rest, at_letter, expl_syntax, ctx, &mut out)
{
pos += consumed;
pending_delim = false;
at_line_start = false;
continue;
}
let (kind, mut len) = next_token(rest, at_letter, expl_syntax);
if pending_delim && kind == SyntaxKind::WORD {
len = rest.chars().next().expect("rest is non-empty").len_utf8();
}
debug_assert!(len > 0, "lexer made no progress at byte {pos}");
let text = &rest[..len];
if kind == SyntaxKind::CONTROL_WORD {
match text {
"\\makeatletter" => at_letter = true,
"\\makeatother" => at_letter = false,
_ => {
if let Some(toggle) = expl_toggle(text) {
expl_syntax = matches!(toggle, ExplToggle::On);
}
}
}
}
pending_delim = match kind {
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE => pending_delim,
SyntaxKind::CONTROL_WORD if text == "\\left" || text == "\\right" => true,
_ => false,
};
pending_def = match kind {
SyntaxKind::CONTROL_WORD if is_definition_keyword(text) => true,
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::L_BRACE => pending_def,
_ => false,
};
out.push(Token {
kind,
text: SmolStr::new(text),
});
at_line_start = kind == SyntaxKind::NEWLINE;
pos += len;
}
out
}
fn next_token(rest: &str, at_letter: bool, expl_syntax: bool) -> (SyntaxKind, usize) {
let c = rest.chars().next().expect("rest is non-empty");
match c {
'\\' => lex_control(rest, at_letter, expl_syntax),
'%' => (
SyntaxKind::COMMENT,
run_len(rest, |c| c != '\n' && c != '\r'),
),
'{' => (SyntaxKind::L_BRACE, 1),
'}' => (SyntaxKind::R_BRACE, 1),
'[' => (SyntaxKind::L_BRACKET, 1),
']' => (SyntaxKind::R_BRACKET, 1),
'$' => (SyntaxKind::DOLLAR, 1),
'&' => (SyntaxKind::AMPERSAND, 1),
'#' => (SyntaxKind::HASH, 1),
'^' => (SyntaxKind::CARET, 1),
'_' if !expl_syntax => (SyntaxKind::UNDERSCORE, 1),
'~' => (SyntaxKind::TILDE, 1),
'\n' => (SyntaxKind::NEWLINE, 1),
'\r' => {
let len = if rest.as_bytes().get(1) == Some(&b'\n') {
2
} else {
1
};
(SyntaxKind::NEWLINE, len)
}
' ' | '\t' => (
SyntaxKind::WHITESPACE,
run_len(rest, |c| c == ' ' || c == '\t'),
),
_ => (
SyntaxKind::WORD,
run_len(rest, |c| is_word_char(c) || (expl_syntax && c == '_')),
),
}
}
fn lex_control(rest: &str, at_letter: bool, expl_syntax: bool) -> (SyntaxKind, usize) {
match rest[1..].chars().next() {
Some(d) if is_letter(d, at_letter, expl_syntax) => {
let letters = run_len(&rest[1..], |c| is_letter(c, at_letter, expl_syntax));
let word_len = 1 + letters;
if &rest[..word_len] == "\\verb"
&& let Some(arg_len) = verb_len(&rest[word_len..])
{
return (SyntaxKind::VERB, word_len + arg_len);
}
(SyntaxKind::CONTROL_WORD, word_len)
}
Some(d) => (SyntaxKind::CONTROL_SYMBOL, 1 + d.len_utf8()),
None => (SyntaxKind::CONTROL_SYMBOL, 1),
}
}
fn verb_len(after: &str) -> Option<usize> {
match after.strip_prefix('*') {
Some(rest) => Some(1 + delimited_len(rest)?),
None => delimited_len(after),
}
}
fn delimited_len(after: &str) -> Option<usize> {
let mut chars = after.chars();
let delim = chars.next()?;
if delim.is_whitespace() {
return None;
}
let mut consumed = delim.len_utf8();
for c in chars {
if c == '\n' || c == '\r' {
return None;
}
consumed += c.len_utf8();
if c == delim {
return Some(consumed);
}
}
None
}
fn lex_verbatim_environment(rest: &str, ctx: &VerbCtx, out: &mut Vec<Token>) -> Option<usize> {
let after_begin = rest.strip_prefix("\\begin{")?;
let close = after_begin.find('}')?;
let name = &after_begin[..close];
let args: &[ArgSpec] = match ctx.verbatim_environment_args(name) {
Some(args) => args,
None => {
&builtin()
.environment(name)
.filter(|e| e.verbatim_body)?
.args
}
};
let prefix_len = "\\begin{".len() + name.len() + "}".len();
out.push(Token {
kind: SyntaxKind::CONTROL_WORD,
text: SmolStr::new("\\begin"),
});
out.push(Token {
kind: SyntaxKind::L_BRACE,
text: SmolStr::new("{"),
});
out.push(Token {
kind: SyntaxKind::WORD,
text: SmolStr::new(name),
});
out.push(Token {
kind: SyntaxKind::R_BRACE,
text: SmolStr::new("}"),
});
let args_region = &rest[prefix_len..];
let args_len = scan_verbatim_args(args_region, args);
lex_into(&args_region[..args_len], out);
let body_region = &args_region[args_len..];
let end_marker = format!("\\end{{{name}}}");
let body_len = body_region.find(&end_marker).unwrap_or(body_region.len());
if body_len > 0 {
out.push(Token {
kind: SyntaxKind::VERBATIM_BODY,
text: SmolStr::new(&body_region[..body_len]),
});
}
Some(prefix_len + args_len + body_len)
}
fn lex_macrocode_frame(rest: &str, want_begin: bool, out: &mut Vec<Token>) -> Option<usize> {
let after_pct = rest.strip_prefix('%')?;
let ws_len = after_pct
.bytes()
.take_while(|&b| b == b' ' || b == b'\t')
.count();
let body = &after_pct[ws_len..];
let (control, open) = if want_begin {
("\\begin", "\\begin{")
} else {
("\\end", "\\end{")
};
let after_open = body.strip_prefix(open)?;
let close = after_open.find('}')?;
let name = &after_open[..close];
if name != "macrocode" && name != "macrocode*" {
return None;
}
let after_close = &after_open[close + 1..];
let trailing = after_close
.bytes()
.take_while(|&b| b == b' ' || b == b'\t')
.count();
let tail = &after_close[trailing..];
if !(tail.is_empty() || tail.starts_with('\n') || tail.starts_with('\r')) {
return None;
}
out.push(Token {
kind: SyntaxKind::DOC_MARGIN,
text: SmolStr::new("%"),
});
if ws_len > 0 {
out.push(Token {
kind: SyntaxKind::WHITESPACE,
text: SmolStr::new(&after_pct[..ws_len]),
});
}
out.push(Token {
kind: SyntaxKind::CONTROL_WORD,
text: SmolStr::new(control),
});
out.push(Token {
kind: SyntaxKind::L_BRACE,
text: SmolStr::new("{"),
});
out.push(Token {
kind: SyntaxKind::WORD,
text: SmolStr::new(name),
});
out.push(Token {
kind: SyntaxKind::R_BRACE,
text: SmolStr::new("}"),
});
Some(1 + ws_len + control.len() + 1 + name.len() + 1)
}
fn lex_verbatim_command(
rest: &str,
at_letter: bool,
expl_syntax: bool,
ctx: &VerbCtx,
out: &mut Vec<Token>,
) -> Option<usize> {
if !rest.starts_with('\\') {
return None;
}
let letters = run_len(&rest[1..], |c| is_letter(c, at_letter, expl_syntax));
if letters == 0 {
return None;
}
let word_len = 1 + letters;
let name = &rest[1..word_len];
if name == "verb" {
return None;
}
let leading = match ctx.leading_args(name) {
Some(args) => args,
None => &builtin().command(name).filter(|c| c.verbatim)?.args,
};
let after_word = &rest[word_len..];
let args_len = scan_verbatim_args(after_word, leading);
let region = &after_word[args_len..];
let ws_len = region
.bytes()
.take_while(|&b| b == b' ' || b == b'\t')
.count();
let arg_region = ®ion[ws_len..];
let arg_len = match arg_region.bytes().next() {
Some(b'{') => balanced_group_len(arg_region, b'}')?,
Some(_) => delimited_len(arg_region)?,
None => return None,
};
out.push(Token {
kind: SyntaxKind::CONTROL_WORD,
text: SmolStr::new(&rest[..word_len]),
});
lex_into(&after_word[..args_len], out);
if ws_len > 0 {
out.push(Token {
kind: SyntaxKind::WHITESPACE,
text: SmolStr::new(®ion[..ws_len]),
});
}
out.push(Token {
kind: SyntaxKind::VERB,
text: SmolStr::new(&arg_region[..arg_len]),
});
Some(word_len + args_len + ws_len + arg_len)
}
fn scan_verbatim_args(region: &str, args: &[ArgSpec]) -> usize {
let bytes = region.as_bytes();
let mut pos = 0;
for arg in args {
let mut probe = pos;
while matches!(bytes.get(probe), Some(b' ' | b'\t')) {
probe += 1;
}
let (open, close) = match arg.kind {
ArgKind::Bracket => (b'[', b']'),
ArgKind::Brace => (b'{', b'}'),
};
if bytes.get(probe) != Some(&open) {
continue;
}
match balanced_group_len(®ion[probe..], close) {
Some(len) => pos = probe + len,
None => break, }
}
pos
}
fn balanced_group_len(s: &str, close: u8) -> Option<usize> {
let bytes = s.as_bytes();
let mut stack = vec![close];
let mut i = 1;
while i < bytes.len() {
match bytes[i] {
b'\\' => {
i += 2;
continue;
}
b'{' => stack.push(b'}'),
b'[' => stack.push(b']'),
c @ (b'}' | b']') if stack.last() == Some(&c) => {
stack.pop();
if stack.is_empty() {
return Some(i + 1);
}
}
_ => {}
}
i += 1;
}
None
}
fn lex_into(region: &str, out: &mut Vec<Token>) {
let mut pos = 0;
while pos < region.len() {
let (kind, len) = next_token(®ion[pos..], false, false);
debug_assert!(len > 0, "lexer made no progress in verbatim args");
out.push(Token {
kind,
text: SmolStr::new(®ion[pos..pos + len]),
});
pos += len;
}
}
fn run_len(s: &str, pred: impl Fn(char) -> bool) -> usize {
let mut len = 0;
for c in s.chars() {
if pred(c) {
len += c.len_utf8();
} else {
break;
}
}
len
}
fn is_letter(c: char, at_letter: bool, expl_syntax: bool) -> bool {
c.is_ascii_alphabetic() || (at_letter && c == '@') || (expl_syntax && (c == '_' || c == ':'))
}
pub(crate) fn is_word_char(c: char) -> bool {
!matches!(
c,
'\\' | '%'
| '{'
| '}'
| '['
| ']'
| '$'
| '&'
| '#'
| '^'
| '_'
| '~'
| ' '
| '\t'
| '\n'
| '\r'
)
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_lossless(input: &str) {
let joined: String = lex(input).iter().map(|t| t.text.as_str()).collect();
assert_eq!(joined, input);
}
#[test]
fn block_environment_classification() {
assert!(is_block_environment("figure"));
assert!(is_block_environment("itemize")); assert!(!is_block_environment("myenv")); }
#[test]
fn lossless_on_assorted_inputs() {
for input in [
"",
"plain text",
r"\section{Hi}[x]",
"$a^2_b$",
"a%c\n\nb",
"café ∑ \\\\ \\{ \\,",
"tab\tand spaces",
"trailing\\",
r"\verb|$x$|",
"\\begin{verbatim}\n$x$ %not a comment\n\\end{verbatim}",
"\\begin{lstlisting}[language=C]\nint a[3]; % raw\n\\end{lstlisting}",
"\\begin{minted}[frame=single]{python}\nprint(\"$x$\")\n\\end{minted}",
"\\begin{lstlisting}\n[1,2,3]\n\\end{lstlisting}",
r"\makeatletter\a@b\makeatother\a@b",
r"\ExplSyntaxOn\seq_new:N \g_@@_x_tl a_b\ExplSyntaxOff\seq_new:N",
r"$\left(x+y\right)^2 \left.\frac{a}{b}\right|_0$",
] {
assert_lossless(input);
}
}
#[test]
fn control_word_stops_at_non_letter() {
let toks = lex(r"\alpha2");
assert_eq!(toks[0].kind, SyntaxKind::CONTROL_WORD);
assert_eq!(toks[0].text, "\\alpha");
assert_eq!(toks[1].kind, SyntaxKind::WORD);
assert_eq!(toks[1].text, "2");
}
#[test]
fn double_backslash_is_one_control_symbol() {
let toks = lex(r"\\");
assert_eq!(toks.len(), 1);
assert_eq!(toks[0].kind, SyntaxKind::CONTROL_SYMBOL);
assert_eq!(toks[0].text, r"\\");
}
#[test]
fn comment_stops_before_newline() {
let toks = lex("% hi\nx");
assert_eq!(toks[0].kind, SyntaxKind::COMMENT);
assert_eq!(toks[0].text, "% hi");
assert_eq!(toks[1].kind, SyntaxKind::NEWLINE);
}
#[test]
fn crlf_is_a_single_newline() {
let toks = lex("a\r\nb");
assert_eq!(toks[1].kind, SyntaxKind::NEWLINE);
assert_eq!(toks[1].text, "\r\n");
}
#[test]
fn verb_inline_is_one_token() {
let toks = lex(r"\verb|$x$|");
assert_eq!(toks.len(), 1);
assert_eq!(toks[0].kind, SyntaxKind::VERB);
assert_eq!(toks[0].text, r"\verb|$x$|");
}
#[test]
fn verb_star_with_plus_delimiter() {
let toks = lex(r"a\verb*+b+c");
assert_eq!(toks[1].kind, SyntaxKind::VERB);
assert_eq!(toks[1].text, r"\verb*+b+");
assert_eq!(toks[2].text, "c");
}
#[test]
fn verb_without_closing_delimiter_is_a_plain_control_word() {
let toks = lex(r"\verb|x");
assert_eq!(toks[0].kind, SyntaxKind::CONTROL_WORD);
assert_eq!(toks[0].text, r"\verb");
}
#[test]
fn left_right_isolate_word_delimiter() {
let toks = lex(r"\left(x+y\right)");
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert_eq!(
seen,
[
(SyntaxKind::CONTROL_WORD, "\\left"),
(SyntaxKind::WORD, "("),
(SyntaxKind::WORD, "x+y"),
(SyntaxKind::CONTROL_WORD, "\\right"),
(SyntaxKind::WORD, ")"),
]
);
}
#[test]
fn left_delimiter_carries_across_whitespace() {
let toks = lex(r"\left ( a");
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert_eq!(
seen,
[
(SyntaxKind::CONTROL_WORD, "\\left"),
(SyntaxKind::WHITESPACE, " "),
(SyntaxKind::WORD, "("),
(SyntaxKind::WHITESPACE, " "),
(SyntaxKind::WORD, "a"),
]
);
}
#[test]
fn left_non_word_delimiters_are_untouched() {
for input in [r"\left\{", r"\left\langle", r"\left["] {
assert_lossless(input);
}
let toks = lex(r"\left\langle x \right\rangle");
assert!(toks.iter().any(|t| t.text == "\\langle"));
assert!(toks.iter().any(|t| t.text == "\\rangle"));
}
#[test]
fn leftarrow_is_not_left() {
let toks = lex(r"\leftarrow(x)");
assert_eq!(toks[0].text, "\\leftarrow");
assert_eq!(toks[1].text, "(x)");
}
#[test]
fn makeatletter_makes_at_a_letter() {
let toks = lex(r"\makeatletter\foo@bar\makeatother\foo@bar");
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert!(seen.contains(&(SyntaxKind::CONTROL_WORD, "\\foo@bar")));
assert!(seen.contains(&(SyntaxKind::CONTROL_WORD, "\\foo")));
}
#[test]
fn expl_syntax_makes_underscore_and_colon_letters() {
let toks = lex(r"\ExplSyntaxOn\seq_new:N\ExplSyntaxOff\seq_new:N");
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert!(seen.contains(&(SyntaxKind::CONTROL_WORD, "\\seq_new:N")));
assert!(seen.contains(&(SyntaxKind::CONTROL_WORD, "\\seq")));
}
#[test]
fn expl_syntax_lexes_internal_double_underscore_name() {
let toks = lex(r"\ExplSyntaxOn\__module_internal:nn");
assert_eq!(toks[1].kind, SyntaxKind::CONTROL_WORD);
assert_eq!(toks[1].text, "\\__module_internal:nn");
}
#[test]
fn provides_expl_package_turns_on_expl_syntax() {
let toks = lex(r"\ProvidesExplPackage{p}{2026/01/01}{1.0}{d}\tl_set:Nn");
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert!(seen.contains(&(SyntaxKind::CONTROL_WORD, "\\tl_set:Nn")));
}
#[test]
fn expl_syntax_composes_with_makeatletter() {
let toks = lex(r"\makeatletter\ExplSyntaxOn\g_@@_frame_title_tl");
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert!(seen.contains(&(SyntaxKind::CONTROL_WORD, "\\g_@@_frame_title_tl")));
}
#[test]
fn expl_syntax_makes_bare_underscore_a_word_not_subscript() {
let toks = lex(r"\ExplSyntaxOn a_b");
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert!(seen.contains(&(SyntaxKind::WORD, "a_b")));
assert!(!seen.iter().any(|(k, _)| *k == SyntaxKind::UNDERSCORE));
}
#[test]
fn package_flavor_starts_in_letter_mode() {
let toks = lex_with(
r"\foo@bar",
&VerbCtx::default(),
LatexFlavor::Package.into(),
);
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert_eq!(seen, vec![(SyntaxKind::CONTROL_WORD, "\\foo@bar")]);
}
#[test]
fn package_flavor_respects_trailing_makeatother() {
let toks = lex_with(
r"\foo@bar\makeatother\foo@bar",
&VerbCtx::default(),
LatexFlavor::Package.into(),
);
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert!(seen.contains(&(SyntaxKind::CONTROL_WORD, "\\foo@bar")));
assert!(seen.contains(&(SyntaxKind::CONTROL_WORD, "\\foo")));
}
#[test]
fn document_flavor_keeps_at_non_letter() {
let toks = lex(r"\foo@bar");
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert!(seen.contains(&(SyntaxKind::CONTROL_WORD, "\\foo")));
assert!(!seen.contains(&(SyntaxKind::CONTROL_WORD, "\\foo@bar")));
}
#[test]
fn dtx_mode_lexes_line_leading_percent_as_a_margin() {
let dtx = LexConfig {
flavor: LatexFlavor::Document,
dtx: true,
};
let toks = lex_with("% \\foo\nbar % tail\n", &VerbCtx::default(), dtx);
let seen: Vec<_> = toks.iter().map(|t| (t.kind, t.text.as_str())).collect();
assert_eq!(seen[0], (SyntaxKind::DOC_MARGIN, "%"));
assert!(seen.contains(&(SyntaxKind::CONTROL_WORD, "\\foo")));
assert!(seen.contains(&(SyntaxKind::COMMENT, "% tail")));
assert_eq!(
seen.iter()
.filter(|(k, _)| *k == SyntaxKind::DOC_MARGIN)
.count(),
1
);
}
#[test]
fn dtx_mode_is_off_by_default_for_margins_and_guards() {
let plain = lex("% \\foo\n");
assert_eq!(plain[0].kind, SyntaxKind::COMMENT);
let plain_guard = lex("%<*driver>\n");
assert_eq!(plain_guard[0].kind, SyntaxKind::COMMENT);
assert_eq!(plain_guard[0].text, "%<*driver>");
}
#[test]
fn dtx_mode_lexes_line_leading_guards() {
let dtx = LexConfig {
flavor: LatexFlavor::Document,
dtx: true,
};
let block = lex_with("%<*driver>\n%</driver>\n", &VerbCtx::default(), dtx);
assert_eq!(block[0].kind, SyntaxKind::GUARD);
assert_eq!(block[0].text, "%<*driver>");
assert!(
block
.iter()
.any(|t| t.kind == SyntaxKind::GUARD && t.text == "%</driver>")
);
let inline = lex_with("%<plain>\\RequirePackage{x}\n", &VerbCtx::default(), dtx);
assert_eq!(inline[0].kind, SyntaxKind::GUARD);
assert_eq!(inline[0].text, "%<plain>");
assert!(
inline
.iter()
.any(|t| t.kind == SyntaxKind::CONTROL_WORD && t.text == "\\RequirePackage")
);
let expr = lex_with("%<*package|driver>\n", &VerbCtx::default(), dtx);
assert_eq!(expr[0].kind, SyntaxKind::GUARD);
assert_eq!(expr[0].text, "%<*package|driver>");
let midline = lex_with("a %<x>\n", &VerbCtx::default(), dtx);
assert!(
midline
.iter()
.any(|t| t.kind == SyntaxKind::COMMENT && t.text == "%<x>")
);
assert!(!midline.iter().any(|t| t.kind == SyntaxKind::GUARD));
let malformed = lex_with("%<unterminated\n", &VerbCtx::default(), dtx);
assert_eq!(malformed[0].kind, SyntaxKind::COMMENT);
assert_eq!(malformed[0].text, "%<unterminated");
}
#[test]
fn verbatim_environment_body_is_one_raw_token() {
let toks = lex("\\begin{verbatim}\n$not$ %literal\n\\end{verbatim}");
assert_eq!(toks[0].text, "\\begin");
assert_eq!(toks[2].text, "verbatim");
assert!(
toks.iter()
.any(|t| t.kind == SyntaxKind::VERBATIM_BODY && t.text.contains("$not$ %literal"))
);
assert!(!toks.iter().any(|t| t.kind == SyntaxKind::DOLLAR));
assert!(!toks.iter().any(|t| t.kind == SyntaxKind::COMMENT));
}
#[test]
fn argument_taking_verbatim_separates_args_from_body() {
let toks = lex("\\begin{minted}[frame=single]{python}\nprint(\"$x$\")\n\\end{minted}");
let kinds: Vec<_> = toks.iter().map(|t| t.kind).collect();
assert!(kinds.contains(&SyntaxKind::L_BRACKET));
assert!(kinds.contains(&SyntaxKind::R_BRACKET));
assert!(kinds.contains(&SyntaxKind::L_BRACE));
assert!(
toks.iter()
.any(|t| t.kind == SyntaxKind::VERBATIM_BODY && t.text.contains("print(\"$x$\")"))
);
assert!(!toks.iter().any(|t| t.kind == SyntaxKind::DOLLAR));
}
#[test]
fn verbatim_body_starting_with_bracket_is_not_an_argument() {
let toks = lex("\\begin{lstlisting}\n[1,2,3]\n\\end{lstlisting}");
assert!(
!toks
.iter()
.take_while(|t| t.kind != SyntaxKind::VERBATIM_BODY)
.any(|t| t.kind == SyntaxKind::L_BRACKET),
"the bracket on the body's first line must not be lexed as an argument"
);
assert!(
toks.iter()
.any(|t| t.kind == SyntaxKind::VERBATIM_BODY && t.text.contains("[1,2,3]"))
);
}
}