use super::token::{TexToken, TokenList};
use super::utils;
#[derive(Debug, Clone, PartialEq)]
pub enum PatternPart {
Argument(u8),
Literal(Vec<TexToken>),
}
#[derive(Debug, Clone, PartialEq)]
pub enum MacroSignature {
Simple(u8),
Pattern(Vec<PatternPart>),
}
impl MacroSignature {
pub fn num_args(&self) -> u8 {
match self {
MacroSignature::Simple(n) => *n,
MacroSignature::Pattern(parts) => parts
.iter()
.filter_map(|p| match p {
PatternPart::Argument(n) => Some(*n),
_ => None,
})
.max()
.unwrap_or(0),
}
}
}
#[derive(Debug, Clone)]
pub enum DefinitionKind {
NewCommand {
name: String,
num_args: u8,
default: Option<TokenList>,
body: TokenList,
},
RenewCommand {
name: String,
num_args: u8,
default: Option<TokenList>,
body: TokenList,
},
ProvideCommand {
name: String,
num_args: u8,
default: Option<TokenList>,
body: TokenList,
},
Def {
name: String,
signature: MacroSignature,
body: TokenList,
},
Edef {
name: String,
signature: MacroSignature,
body: TokenList,
},
Let { name: String, target: String },
NewEnvironment {
name: String,
num_args: u8,
default: Option<TokenList>,
begin_body: TokenList,
end_body: TokenList,
},
RenewEnvironment {
name: String,
num_args: u8,
default: Option<TokenList>,
begin_body: TokenList,
end_body: TokenList,
},
NewIf {
base_name: String,
},
DeclareMathOperator {
name: String,
body: TokenList,
is_starred: bool,
},
}
pub fn is_definition_command(name: &str) -> bool {
matches!(
name,
"newcommand"
| "renewcommand"
| "providecommand"
| "NewDocumentCommand"
| "RenewDocumentCommand"
| "DeclareMathOperator"
| "DeclareRobustCommand"
| "newenvironment"
| "renewenvironment"
| "def"
| "edef"
| "gdef"
| "xdef"
| "let"
| "newif"
)
}
pub fn parse_definition(
cmd_name: &str,
tokens: TokenList,
) -> Result<(DefinitionKind, TokenList), TokenList> {
match cmd_name {
"newcommand" | "renewcommand" | "providecommand" | "DeclareRobustCommand" => {
parse_newcommand_style(cmd_name, tokens)
}
"NewDocumentCommand" | "RenewDocumentCommand" => parse_xparse_style(cmd_name, tokens),
"DeclareMathOperator" => parse_declare_math_operator(tokens),
"newenvironment" | "renewenvironment" => parse_newenvironment(cmd_name, tokens),
"def" | "gdef" => parse_def(tokens, false),
"edef" | "xdef" => parse_def(tokens, true),
"let" => parse_let(tokens),
"newif" => parse_newif(tokens),
_ => Err(tokens),
}
}
pub fn parse_definitions(tokens: TokenList) -> (Vec<DefinitionKind>, TokenList) {
let mut definitions = Vec::new();
let mut result = Vec::new();
let mut iter = tokens.into_inner().into_iter().peekable();
while let Some(token) = iter.next() {
match &token {
TexToken::ControlSeq(name) if is_definition_command(name) => {
let remaining: Vec<TexToken> = iter.collect();
let remaining_list = TokenList::from_vec(remaining);
match parse_definition(name, remaining_list) {
Ok((def, rest)) => {
definitions.push(def);
iter = rest.into_inner().into_iter().peekable();
}
Err(rest) => {
result.push(token);
iter = rest.into_inner().into_iter().peekable();
}
}
}
_ => {
result.push(token);
}
}
}
(definitions, TokenList::from_vec(result))
}
fn parse_newcommand_style(
cmd_name: &str,
tokens: TokenList,
) -> Result<(DefinitionKind, TokenList), TokenList> {
let mut iter = tokens.into_inner().into_iter().peekable();
utils::skip_spaces(&mut iter);
if matches!(iter.peek(), Some(TexToken::Char('*'))) {
iter.next();
utils::skip_spaces(&mut iter);
}
let macro_name = match iter.peek() {
Some(TexToken::BeginGroup) => {
iter.next();
let name = match utils::read_control_seq_name(&mut iter) {
Ok(n) => n,
Err(_) => return Err(TokenList::from_vec(iter.collect())),
};
match iter.next() {
Some(TexToken::EndGroup) => {}
_ => return Err(TokenList::from_vec(iter.collect())),
}
name
}
Some(TexToken::ControlSeq(_)) => {
if let Some(TexToken::ControlSeq(name)) = iter.next() {
name
} else {
return Err(TokenList::from_vec(iter.collect()));
}
}
_ => return Err(TokenList::from_vec(iter.collect())),
};
utils::skip_spaces(&mut iter);
let num_args = if matches!(iter.peek(), Some(TexToken::Char('['))) {
iter.next();
let num = utils::read_number(&mut iter).unwrap_or_default();
utils::skip_until_char(&mut iter, ']');
num
} else {
0
};
utils::skip_spaces(&mut iter);
let default = if matches!(iter.peek(), Some(TexToken::Char('['))) {
iter.next();
let def_tokens = utils::read_until_char(&mut iter, ']');
Some(def_tokens)
} else {
None
};
utils::skip_spaces(&mut iter);
let body = if matches!(iter.peek(), Some(TexToken::BeginGroup)) {
iter.next();
utils::read_balanced_group(&mut iter)
} else {
return Err(TokenList::from_vec(iter.collect()));
};
let def = match cmd_name {
"newcommand" => DefinitionKind::NewCommand {
name: macro_name,
num_args,
default,
body,
},
"renewcommand" => DefinitionKind::RenewCommand {
name: macro_name,
num_args,
default,
body,
},
"providecommand" => DefinitionKind::ProvideCommand {
name: macro_name,
num_args,
default,
body,
},
"DeclareRobustCommand" => DefinitionKind::NewCommand {
name: macro_name,
num_args,
default,
body,
},
_ => unreachable!(),
};
Ok((def, TokenList::from_vec(iter.collect())))
}
fn parse_xparse_style(
cmd_name: &str,
tokens: TokenList,
) -> Result<(DefinitionKind, TokenList), TokenList> {
let mut iter = tokens.into_inner().into_iter().peekable();
utils::skip_spaces(&mut iter);
let macro_name = match iter.peek() {
Some(TexToken::BeginGroup) => {
iter.next();
let name = match utils::read_control_seq_name(&mut iter) {
Ok(n) => n,
Err(_) => return Err(TokenList::from_vec(iter.collect())),
};
match iter.next() {
Some(TexToken::EndGroup) => {}
_ => return Err(TokenList::from_vec(iter.collect())),
}
name
}
Some(TexToken::ControlSeq(_)) => {
if let Some(TexToken::ControlSeq(name)) = iter.next() {
name
} else {
return Err(TokenList::from_vec(iter.collect()));
}
}
_ => return Err(TokenList::from_vec(iter.collect())),
};
utils::skip_spaces(&mut iter);
let num_args = if matches!(iter.peek(), Some(TexToken::BeginGroup)) {
iter.next();
let spec = utils::read_balanced_group(&mut iter);
count_mandatory_args(&spec)
} else {
0
};
utils::skip_spaces(&mut iter);
let body = if matches!(iter.peek(), Some(TexToken::BeginGroup)) {
iter.next();
utils::read_balanced_group(&mut iter)
} else {
return Err(TokenList::from_vec(iter.collect()));
};
let def = match cmd_name {
"NewDocumentCommand" => DefinitionKind::NewCommand {
name: macro_name,
num_args,
default: None,
body,
},
"RenewDocumentCommand" => DefinitionKind::RenewCommand {
name: macro_name,
num_args,
default: None,
body,
},
_ => unreachable!(),
};
Ok((def, TokenList::from_vec(iter.collect())))
}
fn parse_def(tokens: TokenList, is_edef: bool) -> Result<(DefinitionKind, TokenList), TokenList> {
let mut iter = tokens.into_inner().into_iter().peekable();
utils::skip_spaces(&mut iter);
let macro_name = match iter.next() {
Some(TexToken::ControlSeq(name)) => name,
_ => return Err(TokenList::from_vec(iter.collect())),
};
let mut parts: Vec<PatternPart> = Vec::new();
let mut literal_acc: Vec<TexToken> = Vec::new();
while let Some(token) = iter.peek() {
match token {
TexToken::BeginGroup => break,
TexToken::Param(n) => {
if !literal_acc.is_empty() {
parts.push(PatternPart::Literal(std::mem::take(&mut literal_acc)));
}
parts.push(PatternPart::Argument(*n));
iter.next();
}
_ => {
literal_acc.push(
iter.next()
.expect("iterator guaranteed non-empty by loop condition"),
);
}
}
}
if !literal_acc.is_empty() {
parts.push(PatternPart::Literal(literal_acc));
}
let is_simple_sequence = parts
.iter()
.enumerate()
.all(|(i, p)| matches!(p, PatternPart::Argument(n) if *n as usize == i + 1));
let signature = if is_simple_sequence {
MacroSignature::Simple(parts.len() as u8)
} else {
MacroSignature::Pattern(parts)
};
let body = if matches!(iter.peek(), Some(TexToken::BeginGroup)) {
iter.next();
utils::read_balanced_group(&mut iter)
} else {
return Err(TokenList::from_vec(iter.collect()));
};
let def = if is_edef {
DefinitionKind::Edef {
name: macro_name,
signature,
body,
}
} else {
DefinitionKind::Def {
name: macro_name,
signature,
body,
}
};
Ok((def, TokenList::from_vec(iter.collect())))
}
fn parse_let(tokens: TokenList) -> Result<(DefinitionKind, TokenList), TokenList> {
let mut iter = tokens.into_inner().into_iter().peekable();
utils::skip_spaces(&mut iter);
let new_name = match iter.next() {
Some(TexToken::ControlSeq(name)) => name,
_ => return Err(TokenList::from_vec(iter.collect())),
};
utils::skip_spaces(&mut iter);
if matches!(iter.peek(), Some(TexToken::Char('='))) {
iter.next();
utils::skip_spaces(&mut iter);
}
let target = match iter.next() {
Some(TexToken::ControlSeq(name)) => name,
_ => return Err(TokenList::from_vec(iter.collect())),
};
Ok((
DefinitionKind::Let {
name: new_name,
target,
},
TokenList::from_vec(iter.collect()),
))
}
fn parse_newif(tokens: TokenList) -> Result<(DefinitionKind, TokenList), TokenList> {
let mut iter = tokens.into_inner().into_iter().peekable();
utils::skip_spaces(&mut iter);
let full_name = match iter.next() {
Some(TexToken::ControlSeq(name)) => name,
_ => return Err(TokenList::from_vec(iter.collect())),
};
if !full_name.starts_with("if") {
return Err(TokenList::from_vec(iter.collect()));
}
let base_name = full_name
.strip_prefix("if")
.unwrap_or(&full_name)
.to_string();
Ok((
DefinitionKind::NewIf { base_name },
TokenList::from_vec(iter.collect()),
))
}
fn parse_declare_math_operator(
tokens: TokenList,
) -> Result<(DefinitionKind, TokenList), TokenList> {
let mut iter = tokens.into_inner().into_iter().peekable();
utils::skip_spaces(&mut iter);
let is_starred = if matches!(iter.peek(), Some(TexToken::Char('*'))) {
iter.next();
utils::skip_spaces(&mut iter);
true
} else {
false
};
let macro_name = match iter.peek() {
Some(TexToken::BeginGroup) => {
iter.next();
let name = match utils::read_control_seq_name(&mut iter) {
Ok(n) => n,
Err(_) => return Err(TokenList::from_vec(iter.collect())),
};
match iter.next() {
Some(TexToken::EndGroup) => {}
_ => return Err(TokenList::from_vec(iter.collect())),
}
name
}
Some(TexToken::ControlSeq(_)) => {
if let Some(TexToken::ControlSeq(name)) = iter.next() {
name
} else {
return Err(TokenList::from_vec(iter.collect()));
}
}
_ => return Err(TokenList::from_vec(iter.collect())),
};
utils::skip_spaces(&mut iter);
let body = if matches!(iter.peek(), Some(TexToken::BeginGroup)) {
iter.next();
utils::read_balanced_group(&mut iter)
} else {
return Err(TokenList::from_vec(iter.collect()));
};
Ok((
DefinitionKind::DeclareMathOperator {
name: macro_name,
body,
is_starred,
},
TokenList::from_vec(iter.collect()),
))
}
fn parse_newenvironment(
cmd_name: &str,
tokens: TokenList,
) -> Result<(DefinitionKind, TokenList), TokenList> {
let mut iter = tokens.into_inner().into_iter().peekable();
utils::skip_spaces(&mut iter);
if matches!(iter.peek(), Some(TexToken::Char('*'))) {
iter.next();
utils::skip_spaces(&mut iter);
}
let env_name = if matches!(iter.peek(), Some(TexToken::BeginGroup)) {
iter.next();
let name_tokens = utils::read_balanced_group(&mut iter);
name_tokens
.as_slice()
.iter()
.filter_map(|t| match t {
TexToken::Char(c) => Some(*c),
_ => None,
})
.collect::<String>()
} else {
return Err(TokenList::from_vec(iter.collect()));
};
if env_name.is_empty() {
return Err(TokenList::from_vec(iter.collect()));
}
utils::skip_spaces(&mut iter);
let num_args = if matches!(iter.peek(), Some(TexToken::Char('['))) {
iter.next();
let num = utils::read_number(&mut iter).unwrap_or_default();
utils::skip_until_char(&mut iter, ']');
num
} else {
0
};
utils::skip_spaces(&mut iter);
let default = if matches!(iter.peek(), Some(TexToken::Char('['))) {
iter.next();
let def_tokens = utils::read_until_char(&mut iter, ']');
Some(def_tokens)
} else {
None
};
utils::skip_spaces(&mut iter);
let begin_body = if matches!(iter.peek(), Some(TexToken::BeginGroup)) {
iter.next();
utils::read_balanced_group(&mut iter)
} else {
return Err(TokenList::from_vec(iter.collect()));
};
utils::skip_spaces(&mut iter);
let end_body = if matches!(iter.peek(), Some(TexToken::BeginGroup)) {
iter.next();
utils::read_balanced_group(&mut iter)
} else {
return Err(TokenList::from_vec(iter.collect()));
};
let def = match cmd_name {
"newenvironment" => DefinitionKind::NewEnvironment {
name: env_name,
num_args,
default,
begin_body,
end_body,
},
"renewenvironment" => DefinitionKind::RenewEnvironment {
name: env_name,
num_args,
default,
begin_body,
end_body,
},
_ => unreachable!(),
};
Ok((def, TokenList::from_vec(iter.collect())))
}
fn count_mandatory_args(spec: &TokenList) -> u8 {
spec.as_slice()
.iter()
.filter(|t| matches!(t, TexToken::Char('m') | TexToken::Char('M')))
.count() as u8
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::latex2typst::engine::lexer::tokenize;
#[test]
fn test_parse_newcommand_simple() {
let tokens = tokenize("{\\foo}{bar}");
let result = parse_newcommand_style("newcommand", tokens);
assert!(result.is_ok());
let (def, _rest) = result.unwrap();
match def {
DefinitionKind::NewCommand {
name,
num_args,
body,
..
} => {
assert_eq!(name, "foo");
assert_eq!(num_args, 0);
assert_eq!(format!("{}", body), "bar");
}
_ => panic!("Wrong definition kind"),
}
}
#[test]
fn test_parse_newcommand_with_args() {
let tokens = tokenize("{\\pair}[2]{\\langle #1, #2\\rangle}");
let result = parse_newcommand_style("newcommand", tokens);
assert!(result.is_ok());
let (def, _rest) = result.unwrap();
match def {
DefinitionKind::NewCommand {
name,
num_args,
body,
..
} => {
assert_eq!(name, "pair");
assert_eq!(num_args, 2);
assert!(format!("{}", body).contains("#1"));
assert!(format!("{}", body).contains("#2"));
}
_ => panic!("Wrong definition kind"),
}
}
#[test]
fn test_parse_def() {
let tokens = tokenize("\\foo#1#2{#1 and #2}");
let result = parse_def(tokens, false);
assert!(result.is_ok());
let (def, _rest) = result.unwrap();
match def {
DefinitionKind::Def {
name,
signature,
body,
} => {
assert_eq!(name, "foo");
assert_eq!(signature.num_args(), 2);
assert!(format!("{}", body).contains("#1"));
}
_ => panic!("Wrong definition kind"),
}
}
#[test]
fn test_parse_edef() {
let tokens = tokenize("\\bar{expanded content}");
let result = parse_def(tokens, true);
assert!(result.is_ok());
let (def, _rest) = result.unwrap();
match def {
DefinitionKind::Edef {
name,
signature,
body,
} => {
assert_eq!(name, "bar");
assert_eq!(signature.num_args(), 0);
assert!(format!("{}", body).contains("expanded"));
}
_ => panic!("Wrong definition kind: {:?}", def),
}
}
#[test]
fn test_parse_let() {
let tokens = tokenize("\\foo=\\bar");
let result = parse_let(tokens);
assert!(result.is_ok());
let (def, _rest) = result.unwrap();
match def {
DefinitionKind::Let { name, target } => {
assert_eq!(name, "foo");
assert_eq!(target, "bar");
}
_ => panic!("Wrong definition kind"),
}
}
}