use std::collections::{HashMap, HashSet};
use crate::ast::{command_name, group_command_name, group_inner_source, nth_group, nth_group_text};
use crate::semantic::signature::{ArgKind, ArgSpec, CommandSig, EnvironmentSig, SignatureDb};
use crate::semantic::xparse;
use crate::syntax::{SyntaxKind, SyntaxNode};
use rowan::NodeOrToken;
use smol_str::SmolStr;
pub fn scan_definitions(root: &SyntaxNode) -> SignatureDb {
let mut db = SignatureDb::default();
let mut bodies: HashMap<SmolStr, DefBody> = HashMap::new();
let mut env_bodies: HashMap<SmolStr, DefBody> = HashMap::new();
for command in root
.descendants()
.filter(|node| node.kind() == SyntaxKind::COMMAND)
{
let Some(name) = command_name(&command) else {
continue;
};
match DefKind::of(&name) {
Some(DefKind::Command) => scan_newcommand(&command, &mut db, &mut bodies),
Some(DefKind::Def) => scan_def(&command, &mut db, &mut bodies),
Some(DefKind::Environment) => scan_newenvironment(&command, &mut db, &mut env_bodies),
Some(DefKind::XparseCommand) => scan_xparse_command(&command, &mut db, &mut bodies),
Some(DefKind::XparseEnvironment) => {
scan_xparse_environment(&command, &mut db, &mut env_bodies)
}
None => {}
}
}
apply_verbatim_flags(&mut db, &bodies);
apply_verbatim_env_flags(&mut db, &env_bodies, &bodies);
db
}
struct DefBody {
signal: bool,
called: Vec<SmolStr>,
}
fn apply_verbatim_flags(db: &mut SignatureDb, bodies: &HashMap<SmolStr, DefBody>) {
let verbatim: Vec<SmolStr> = bodies
.keys()
.filter(|name| {
db.command(name).is_some_and(|sig| !sig.args.is_empty())
&& reaches_signal(name, bodies, &mut HashSet::new())
})
.cloned()
.collect();
for name in verbatim {
if let Some(mut sig) = db.command(&name).cloned() {
sig.args.pop(); sig.verbatim = true;
db.insert_command(name, sig);
}
}
}
fn apply_verbatim_env_flags(
db: &mut SignatureDb,
env_bodies: &HashMap<SmolStr, DefBody>,
bodies: &HashMap<SmolStr, DefBody>,
) {
let verbatim: Vec<SmolStr> = env_bodies
.iter()
.filter(|(name, body)| {
db.environment(name).is_some() && reaches_signal_body(body, bodies, &mut HashSet::new())
})
.map(|(name, _)| name.clone())
.collect();
for name in verbatim {
if let Some(mut sig) = db.environment(&name).cloned() {
sig.verbatim_body = true;
sig.reflow = false; db.insert_environment(name, sig);
}
}
}
fn reaches_signal(
name: &str,
bodies: &HashMap<SmolStr, DefBody>,
visited: &mut HashSet<SmolStr>,
) -> bool {
if !visited.insert(SmolStr::new(name)) {
return false;
}
let Some(body) = bodies.get(name) else {
return false;
};
reaches_signal_body(body, bodies, visited)
}
fn reaches_signal_body(
body: &DefBody,
bodies: &HashMap<SmolStr, DefBody>,
visited: &mut HashSet<SmolStr>,
) -> bool {
body.signal
|| body
.called
.iter()
.any(|callee| reaches_signal(callee, bodies, visited))
}
fn catcode_signal(body: &str) -> bool {
body.contains("\\@makeother")
|| body.contains("\\@sanitize")
|| body.contains("\\dospecials")
|| (body.contains("\\catcode") && body.contains("12"))
}
fn called_macros(body: &str) -> Vec<SmolStr> {
body.match_indices('\\')
.filter_map(|(pos, _)| {
let after = &body[pos + 1..];
let len: usize = after
.chars()
.take_while(|c| c.is_ascii_alphabetic() || *c == '@')
.map(char::len_utf8)
.sum();
(len > 0).then(|| SmolStr::new(&after[..len]))
})
.collect()
}
enum DefKind {
Command,
Def,
Environment,
XparseCommand,
XparseEnvironment,
}
impl DefKind {
fn of(name: &str) -> Option<Self> {
Some(match name {
"newcommand" | "renewcommand" | "providecommand" | "DeclareRobustCommand" => {
DefKind::Command
}
"def" | "edef" | "gdef" | "xdef" => DefKind::Def,
"newenvironment" | "renewenvironment" => DefKind::Environment,
"NewDocumentCommand"
| "RenewDocumentCommand"
| "ProvideDocumentCommand"
| "DeclareDocumentCommand" => DefKind::XparseCommand,
"NewDocumentEnvironment"
| "RenewDocumentEnvironment"
| "ProvideDocumentEnvironment"
| "DeclareDocumentEnvironment" => DefKind::XparseEnvironment,
_ => return None,
})
}
}
fn scan_newcommand(
command: &SyntaxNode,
db: &mut SignatureDb,
bodies: &mut HashMap<SmolStr, DefBody>,
) {
let Some(def) = resolve_command_def(command) else {
return;
};
let (arity, first_optional) = newcommand_arity(&def.host);
record_body(
bodies,
&def.name,
nth_group(&def.host, def.first_arg_group).as_ref(),
);
db.insert_command(
def.name,
CommandSig {
args: latex2e_args(arity, first_optional),
sectioning: None,
verbatim: false,
rule: false,
inline: false,
},
);
}
fn scan_def(command: &SyntaxNode, db: &mut SignatureDb, bodies: &mut HashMap<SmolStr, DefBody>) {
let Some(name_node) = adjacent_sibling_command(command) else {
return;
};
let Some(name) = command_name(&name_node) else {
return;
};
let (arity, body) = def_params_and_body(&name_node);
record_body(bodies, &name, body.as_ref());
db.insert_command(
name,
CommandSig {
args: latex2e_args(arity, false),
sectioning: None,
verbatim: false,
rule: false,
inline: false,
},
);
}
fn def_params_and_body(name_node: &SyntaxNode) -> (usize, Option<SyntaxNode>) {
if let Some(body) = nth_group(name_node, 0) {
return (0, Some(body));
}
let mut arity = 0usize;
let mut next = name_node.next_sibling_or_token();
while let Some(element) = next {
match element {
NodeOrToken::Token(token) if is_trivia(token.kind()) => {
next = token.next_sibling_or_token();
}
NodeOrToken::Token(token) if token.kind() == SyntaxKind::HASH => {
arity += 1;
next = token.next_sibling_or_token();
}
NodeOrToken::Token(token) if token.kind() == SyntaxKind::WORD => {
next = token.next_sibling_or_token();
}
NodeOrToken::Node(node) if node.kind() == SyntaxKind::GROUP => {
return (arity.min(9), Some(node));
}
_ => return (arity.min(9), None),
}
}
(arity.min(9), None)
}
fn record_body(bodies: &mut HashMap<SmolStr, DefBody>, name: &str, body: Option<&SyntaxNode>) {
let text = body.map(group_inner_source).unwrap_or_default();
bodies.insert(
SmolStr::new(name),
DefBody {
signal: catcode_signal(&text),
called: called_macros(&text),
},
);
}
fn scan_newenvironment(
command: &SyntaxNode,
db: &mut SignatureDb,
env_bodies: &mut HashMap<SmolStr, DefBody>,
) {
let Some(name) = nth_group_text(command, 0) else {
return;
};
let name = name.trim();
if name.is_empty() {
return;
}
record_body(env_bodies, name, nth_group(command, 1).as_ref());
let (arity, first_optional) = newcommand_arity(command);
db.insert_environment(name, environment_sig(latex2e_args(arity, first_optional)));
}
fn scan_xparse_command(
command: &SyntaxNode,
db: &mut SignatureDb,
bodies: &mut HashMap<SmolStr, DefBody>,
) {
let Some(def) = resolve_command_def(command) else {
return;
};
let Some(spec) = nth_group(&def.host, def.first_arg_group) else {
return;
};
record_body(
bodies,
&def.name,
nth_group(&def.host, def.first_arg_group + 1).as_ref(),
);
db.insert_command(
def.name,
CommandSig {
args: xparse::parse_spec(&group_inner_source(&spec)),
sectioning: None,
verbatim: false,
rule: false,
inline: false,
},
);
}
struct CommandDef {
name: String,
host: SyntaxNode,
first_arg_group: usize,
}
fn resolve_command_def(command: &SyntaxNode) -> Option<CommandDef> {
if command.children().any(|c| c.kind() == SyntaxKind::GROUP) {
let name = nth_group(command, 0)
.as_ref()
.and_then(group_command_name)?;
return Some(CommandDef {
name,
host: command.clone(),
first_arg_group: 1,
});
}
let sibling = adjacent_sibling_command(command)?;
let name = command_name(&sibling)?;
Some(CommandDef {
name,
host: sibling,
first_arg_group: 0,
})
}
fn adjacent_sibling_command(command: &SyntaxNode) -> Option<SyntaxNode> {
let mut next = command.next_sibling_or_token();
while let Some(element) = next {
match element {
NodeOrToken::Token(token) if is_trivia(token.kind()) => {
next = token.next_sibling_or_token();
}
NodeOrToken::Node(node) if node.kind() == SyntaxKind::COMMAND => return Some(node),
_ => return None,
}
}
None
}
fn is_trivia(kind: SyntaxKind) -> bool {
matches!(
kind,
SyntaxKind::WHITESPACE | SyntaxKind::NEWLINE | SyntaxKind::COMMENT
)
}
fn scan_xparse_environment(
command: &SyntaxNode,
db: &mut SignatureDb,
env_bodies: &mut HashMap<SmolStr, DefBody>,
) {
let Some(name) = nth_group_text(command, 0) else {
return;
};
let name = name.trim();
if name.is_empty() {
return;
}
let Some(spec) = nth_group(command, 1) else {
return;
};
record_body(env_bodies, name, nth_group(command, 2).as_ref());
db.insert_environment(
name,
environment_sig(xparse::parse_spec(&group_inner_source(&spec))),
);
}
fn newcommand_arity(command: &SyntaxNode) -> (usize, bool) {
let optionals: Vec<SyntaxNode> = command
.children()
.filter(|child| child.kind() == SyntaxKind::OPTIONAL)
.collect();
let arity = optionals
.first()
.and_then(optional_number)
.unwrap_or(0)
.min(9); (arity, optionals.len() >= 2)
}
fn optional_number(node: &SyntaxNode) -> Option<usize> {
let text = node.text().to_string();
let inner = text.strip_prefix('[').unwrap_or(&text);
let inner = inner.strip_suffix(']').unwrap_or(inner);
inner.trim().parse().ok()
}
fn latex2e_args(arity: usize, first_optional: bool) -> Vec<ArgSpec> {
(0..arity)
.map(|i| {
if i == 0 && first_optional {
ArgSpec {
required: false,
kind: ArgKind::Bracket,
prose: false,
collapse: false,
}
} else {
ArgSpec {
required: true,
kind: ArgKind::Brace,
prose: false,
collapse: false,
}
}
})
.collect()
}
fn environment_sig(args: Vec<ArgSpec>) -> EnvironmentSig {
EnvironmentSig {
args,
verbatim_body: false,
math: false,
code: false,
align: false,
reflow: true,
no_indent: false,
list: false,
block: false,
outline: None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::{parse, reconstruct};
fn db_of(src: &str) -> SignatureDb {
assert_eq!(reconstruct(src), src, "reconstruct must round-trip");
scan_definitions(&SyntaxNode::new_root(parse(src).green))
}
fn arg_kinds(args: &[ArgSpec]) -> Vec<ArgKind> {
args.iter().map(|a| a.kind).collect()
}
#[test]
fn newcommand_counts_mandatory_args() {
let db = db_of("\\newcommand{\\foo}[2]{#1#2}\n");
let sig = db.command("foo").expect("foo defined");
assert_eq!(arg_kinds(&sig.args), vec![ArgKind::Brace, ArgKind::Brace]);
assert!(sig.args.iter().all(|a| a.required));
}
#[test]
fn newcommand_optional_first_arg() {
let db = db_of("\\newcommand{\\foo}[2][d]{#1#2}\n");
let sig = db.command("foo").expect("foo defined");
assert_eq!(arg_kinds(&sig.args), vec![ArgKind::Bracket, ArgKind::Brace]);
assert!(!sig.args[0].required);
assert!(sig.args[1].required);
}
#[test]
fn newcommand_zero_args() {
let db = db_of("\\newcommand{\\foo}{bar}\n");
assert!(db.command("foo").expect("foo defined").args.is_empty());
}
#[test]
fn renew_and_provide_recognized() {
let db = db_of("\\renewcommand{\\a}[1]{x}\\providecommand{\\b}[1]{y}\n");
assert_eq!(db.command("a").unwrap().args.len(), 1);
assert_eq!(db.command("b").unwrap().args.len(), 1);
}
#[test]
fn newenvironment_args() {
let db = db_of("\\newenvironment{thm}[1]{begin #1}{end}\n");
let sig = db.environment("thm").expect("thm defined");
assert_eq!(arg_kinds(&sig.args), vec![ArgKind::Brace]);
assert!(sig.reflow);
assert!(!sig.verbatim_body);
assert!(!sig.math);
}
#[test]
fn xparse_command_spec() {
let db = db_of("\\NewDocumentCommand{\\foo}{m O{d} m}{x}\n");
let sig = db.command("foo").expect("foo defined");
assert_eq!(
arg_kinds(&sig.args),
vec![ArgKind::Brace, ArgKind::Bracket, ArgKind::Brace]
);
}
#[test]
fn xparse_environment_spec() {
let db = db_of("\\NewDocumentEnvironment{env}{O{x} m}{a}{b}\n");
let sig = db.environment("env").expect("env defined");
assert_eq!(arg_kinds(&sig.args), vec![ArgKind::Bracket, ArgKind::Brace]);
}
#[test]
fn unbraced_newcommand_extracted() {
let db = db_of("\\newcommand\\foo[2]{#1#2}\n");
let sig = db.command("foo").expect("foo defined");
assert_eq!(arg_kinds(&sig.args), vec![ArgKind::Brace, ArgKind::Brace]);
assert!(sig.args.iter().all(|a| a.required));
}
#[test]
fn unbraced_optional_first_arg() {
let db = db_of("\\newcommand\\foo[2][d]{#1#2}\n");
let sig = db.command("foo").expect("foo defined");
assert_eq!(arg_kinds(&sig.args), vec![ArgKind::Bracket, ArgKind::Brace]);
assert!(!sig.args[0].required);
assert!(sig.args[1].required);
}
#[test]
fn unbraced_zero_args() {
let db = db_of("\\newcommand\\foo{x}\n");
assert!(db.command("foo").expect("foo defined").args.is_empty());
}
#[test]
fn unbraced_spaced_binds() {
let db = db_of("\\newcommand \\foo[1]{x}\n");
assert_eq!(db.command("foo").unwrap().args.len(), 1);
}
#[test]
fn unbraced_renewcommand() {
let db = db_of("\\renewcommand\\foo[1]{x}\n");
assert_eq!(db.command("foo").unwrap().args.len(), 1);
}
#[test]
fn unbraced_xparse_command() {
let db = db_of("\\NewDocumentCommand\\foo{m O{d} m}{x}\n");
let sig = db.command("foo").expect("foo defined");
assert_eq!(
arg_kinds(&sig.args),
vec![ArgKind::Brace, ArgKind::Bracket, ArgKind::Brace]
);
}
#[test]
fn unbraced_stray_text_not_bound() {
let db = db_of("\\newcommand foo \\bar{x}\n");
assert!(db.command("foo").is_none());
assert!(db.command("bar").is_none());
}
#[test]
fn redefinition_last_wins() {
let db = db_of("\\newcommand{\\foo}[1]{x}\\renewcommand{\\foo}[3]{y}\n");
assert_eq!(db.command("foo").unwrap().args.len(), 3);
}
#[test]
fn garbage_definition_degrades_to_no_insert() {
let db = db_of("\\newcommand\n");
assert!(db.command("foo").is_none());
}
#[test]
fn nested_definition_collected() {
let db = db_of("\\begin{document}\n\\newcommand{\\foo}[1]{x}\n\\end{document}\n");
assert_eq!(db.command("foo").unwrap().args.len(), 1);
}
#[test]
fn commented_definition_ignored() {
let db = db_of("% \\newcommand{\\foo}[1]{x}\n");
assert!(db.command("foo").is_none());
}
#[test]
fn verbatim_makeother_flagged() {
let db = db_of("\\newcommand\\shellcmd[1]{\\@makeother\\$#1}\n");
let sig = db.command("shellcmd").expect("shellcmd defined");
assert!(sig.verbatim);
assert!(sig.args.is_empty());
}
#[test]
fn verbatim_catcode_flagged() {
let db = db_of("\\newcommand\\shellcmd[1]{\\catcode 36=12 #1}\n");
assert!(db.command("shellcmd").expect("shellcmd defined").verbatim);
}
#[test]
fn verbatim_dospecials_flagged() {
let db = db_of("\\newcommand\\shellcmd[1]{\\let\\do\\@makeother\\dospecials #1}\n");
assert!(db.command("shellcmd").expect("shellcmd defined").verbatim);
}
#[test]
fn verbatim_keeps_leading_args() {
let db = db_of("\\newcommand\\mycode[2]{\\@makeother\\$#1#2}\n");
let sig = db.command("mycode").expect("mycode defined");
assert!(sig.verbatim);
assert_eq!(arg_kinds(&sig.args), vec![ArgKind::Brace]);
}
#[test]
fn verbatim_via_chained_helper() {
let db =
db_of("\\newcommand\\setup{\\@makeother\\$}\\newcommand\\shellcmd[1]{\\setup#1}\n");
assert!(db.command("shellcmd").expect("shellcmd defined").verbatim);
assert!(!db.command("setup").expect("setup defined").verbatim);
}
#[test]
fn verbatim_chain_cycle_terminates() {
let db = db_of("\\newcommand\\a[1]{\\b#1}\\newcommand\\b[1]{\\a#1}\n");
assert!(!db.command("a").expect("a defined").verbatim);
assert!(!db.command("b").expect("b defined").verbatim);
}
#[test]
fn ordinary_command_not_verbatim() {
let db = db_of("\\newcommand\\foo[1]{\\emph{#1}}\n");
assert!(!db.command("foo").expect("foo defined").verbatim);
}
#[test]
fn verbatim_needs_an_argument() {
let db = db_of("\\newcommand\\setup{\\@makeother\\$}\n");
assert!(!db.command("setup").expect("setup defined").verbatim);
}
#[test]
fn def_helper_chain_followed() {
let db = db_of("\\def\\setup{\\@makeother\\$}\\newcommand\\shellcmd[1]{\\setup#1}\n");
assert!(db.command("shellcmd").expect("shellcmd defined").verbatim);
assert!(!db.command("setup").expect("setup defined").verbatim);
}
#[test]
fn def_direct_verbatim_flagged() {
let db = db_of("\\def\\shellcmd#1{\\@makeother\\$#1}\n");
let sig = db.command("shellcmd").expect("shellcmd defined");
assert!(sig.verbatim);
assert!(sig.args.is_empty());
}
#[test]
fn def_zero_params() {
let db = db_of("\\def\\foo{x}\n");
let sig = db.command("foo").expect("foo defined");
assert!(sig.args.is_empty());
assert!(!sig.verbatim);
}
#[test]
fn def_counts_params() {
let db = db_of("\\def\\foo#1#2{#1#2}\n");
let sig = db.command("foo").expect("foo defined");
assert_eq!(arg_kinds(&sig.args), vec![ArgKind::Brace, ArgKind::Brace]);
}
#[test]
fn def_variants_scanned() {
let db = db_of("\\edef\\a#1{x}\\gdef\\b{y}\\xdef\\c#1{\\@makeother\\$#1}\n");
assert_eq!(db.command("a").expect("a defined").args.len(), 1);
assert!(db.command("b").expect("b defined").args.is_empty());
let c = db.command("c").expect("c defined");
assert!(c.verbatim);
assert!(c.args.is_empty());
}
#[test]
fn def_chain_through_def_helpers() {
let db = db_of(
"\\def\\inner{\\@makeother\\$}\\def\\outer{\\inner}\\newcommand\\cmd[1]{\\outer#1}\n",
);
assert!(db.command("cmd").expect("cmd defined").verbatim);
}
#[test]
fn verbatim_xparse_flagged() {
let db = db_of("\\NewDocumentCommand\\shellcmd{m}{\\@makeother\\$#1}\n");
let sig = db.command("shellcmd").expect("shellcmd defined");
assert!(sig.verbatim);
assert!(sig.args.is_empty());
}
#[test]
fn env_makeother_flagged() {
let db = db_of("\\newenvironment{shellenv}{\\@makeother\\$}{}\n");
let sig = db.environment("shellenv").expect("shellenv defined");
assert!(sig.verbatim_body);
assert!(!sig.reflow); }
#[test]
fn env_catcode_flagged() {
let db = db_of("\\newenvironment{shellenv}[1]{\\catcode 36=12 }{}\n");
let sig = db.environment("shellenv").expect("shellenv defined");
assert!(sig.verbatim_body);
assert_eq!(arg_kinds(&sig.args), vec![ArgKind::Brace]);
}
#[test]
fn env_via_chained_helper() {
let db =
db_of("\\newcommand\\setup{\\@makeother\\$}\\newenvironment{shellenv}{\\setup}{}\n");
assert!(
db.environment("shellenv")
.expect("shellenv defined")
.verbatim_body
);
}
#[test]
fn env_without_signal_not_flagged() {
let db = db_of("\\newenvironment{remark}{\\par\\noindent\\textbf{Remark.}}{\\par}\n");
let sig = db.environment("remark").expect("remark defined");
assert!(!sig.verbatim_body);
assert!(sig.reflow);
}
#[test]
fn xparse_env_makeother_flagged() {
let db = db_of("\\NewDocumentEnvironment{shellenv}{O{x}}{\\dospecials}{}\n");
let sig = db.environment("shellenv").expect("shellenv defined");
assert!(sig.verbatim_body);
assert_eq!(arg_kinds(&sig.args), vec![ArgKind::Bracket]);
}
}