1use std::{
90 borrow::Cow,
91 ops::{Deref, Range},
92 sync::Arc,
93};
94
95use crate::{
96 snapshot_map::{self, SnapshotMap},
97 BraceKind, CommandName, IfCommandName, MacroifyStream, StreamContext, Tok, Token, TokenStream,
98};
99use mitex_spec::CommandSpec;
100
101pub type Checkpoint = (snapshot_map::Snapshot,);
102
103type MacroMap<'a> = SnapshotMap<&'a str, Macro<'a>>;
104
105#[derive(Debug)]
106pub struct CmdMacro<'a> {
107 pub name: String,
108 pub num_args: u8,
109 pub opt: Option<Vec<Tok<'a>>>,
110 pub def: Vec<Tok<'a>>,
111}
112
113#[derive(Debug)]
114pub struct EnvMacro<'a> {
115 pub name: String,
116 pub num_args: u8,
117 pub opt: Option<Vec<Tok<'a>>>,
118 pub begin_def: Vec<Tok<'a>>,
119 pub end_def: Vec<Tok<'a>>,
120}
121
122#[derive(Debug, Clone)]
123pub enum DeclareCmdOrEnv {
124 NewCommand { renew: bool, star: bool },
131 ProvideCommand { star: bool },
138 DeclareRobustCommand { star: bool },
145 NewEnvironment { renew: bool, star: bool },
152}
153
154#[derive(Debug, Clone)]
155pub enum DeclareMacro {
156 CmdOrEnv(DeclareCmdOrEnv),
157 DeclareTextCommand,
164 DeclareTextCommandDefault,
168 ProvideTextCommand,
175 ProvideTextCommandDefault,
179 AtEndOfClass,
183 AtEndOfPackage,
187 AtBeginDocument,
191 AtEndDocument,
195}
196
197fn define_declarative_macros(macros: &mut MacroMap) {
198 for (name, value) in [
199 (
200 "newcommand",
201 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::NewCommand {
202 renew: false,
203 star: false,
204 }),
205 ),
206 (
207 "newcommand*",
208 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::NewCommand {
209 renew: false,
210 star: true,
211 }),
212 ),
213 (
214 "renewcommand",
215 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::NewCommand {
216 renew: true,
217 star: false,
218 }),
219 ),
220 (
221 "renewcommand*",
222 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::NewCommand {
223 renew: true,
224 star: true,
225 }),
226 ),
227 (
228 "providecommand",
229 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::ProvideCommand { star: false }),
230 ),
231 (
232 "providecommand*",
233 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::ProvideCommand { star: true }),
234 ),
235 (
236 "DeclareRobustCommand",
237 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::DeclareRobustCommand { star: false }),
238 ),
239 (
240 "DeclareRobustCommand*",
241 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::DeclareRobustCommand { star: true }),
242 ),
243 ("DeclareTextCommand", DeclareMacro::DeclareTextCommand),
244 (
245 "DeclareTextCommandDefault",
246 DeclareMacro::DeclareTextCommandDefault,
247 ),
248 ("ProvideTextCommand", DeclareMacro::ProvideTextCommand),
249 (
250 "ProvideTextCommandDefault",
251 DeclareMacro::ProvideTextCommandDefault,
252 ),
253 (
254 "newenvironment",
255 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::NewEnvironment {
256 renew: false,
257 star: false,
258 }),
259 ),
260 (
261 "newenvironment*",
262 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::NewEnvironment {
263 renew: false,
264 star: true,
265 }),
266 ),
267 (
268 "renewenvironment",
269 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::NewEnvironment {
270 renew: true,
271 star: false,
272 }),
273 ),
274 (
275 "renewenvironment*",
276 DeclareMacro::CmdOrEnv(DeclareCmdOrEnv::NewEnvironment {
277 renew: true,
278 star: true,
279 }),
280 ),
281 ("AtEndOfClass", DeclareMacro::AtEndOfClass),
282 ("AtEndOfPackage", DeclareMacro::AtEndOfPackage),
283 ("AtBeginDocument", DeclareMacro::AtBeginDocument),
284 ("AtEndDocument", DeclareMacro::AtEndDocument),
285 ]
286 .into_iter()
287 {
288 macros.insert(name, Macro::Declare(value));
289 }
290}
291
292static DEFAULT_MACROS: once_cell::sync::Lazy<MacroMap<'static>> =
293 once_cell::sync::Lazy::new(|| {
294 let mut macros = MacroMap::default();
295 define_declarative_macros(&mut macros);
296 macros
297 });
298
299#[derive(Debug, Clone)]
300pub enum Macro<'a> {
301 Declare(DeclareMacro),
303 Cmd(Arc<CmdMacro<'a>>),
305 Env(Arc<EnvMacro<'a>>),
307}
308
309#[derive(Debug)]
310pub struct MacroState<T> {
311 pub reading: Arc<T>,
312 pub arg_protect: u8,
314 pub has_read_tokens: u32,
316}
317
318#[derive(Debug)]
319pub enum MacroNode<'a> {
320 Cmd(MacroState<CmdMacro<'a>>),
321 EnvBegin(MacroState<EnvMacro<'a>>),
322 EnvEnd(MacroState<EnvMacro<'a>>),
323 ArgSlot(Range<usize>),
324 HalfReadingTok(Range<usize>),
325}
326
327enum UpdateAction {
328 New,
329 Renew,
330 Provide,
331}
332
333#[derive(Clone, Copy, PartialEq)]
334enum IfState {
335 LitFalse,
336 TypstTrue,
337 TypstFalse,
338 False,
339 True,
340}
341
342pub struct MacroEngine<'a> {
347 pub spec: CommandSpec,
349 macros: Cow<'a, MacroMap<'a>>,
351 env_stack: Vec<EnvMacro<'a>>,
353 pub reading_macro: Vec<MacroNode<'a>>,
355 reading_if: Vec<Option<IfState>>,
360 pub scanned_tokens: Vec<Tok<'a>>,
362}
363
364impl<'a> TokenStream<'a> for MacroEngine<'a> {
365 fn bump(&mut self, ctx: &mut StreamContext<'a>) {
366 self.do_bump(ctx);
367 }
368}
369
370impl<'a> MacroifyStream<'a> for MacroEngine<'a> {
371 fn get_macro(&self, name: &str) -> Option<Macro<'a>> {
372 self.macros.get(name).cloned()
373 }
374}
375
376impl<'a> MacroEngine<'a> {
377 pub fn new(spec: CommandSpec) -> Self {
379 Self {
380 spec,
381 macros: std::borrow::Cow::Borrowed(DEFAULT_MACROS.deref()),
382 env_stack: Vec::new(),
383 reading_macro: Vec::new(),
384 reading_if: Vec::new(),
385 scanned_tokens: Vec::new(),
386 }
387 }
388
389 fn do_bump(&mut self, ctx: &mut StreamContext<'a>) {
391 const PAGE_SIZE: usize = 4096;
394 const PEEK_CACHE_SIZE: usize = (PAGE_SIZE - 16) / std::mem::size_of::<Tok<'static>>();
396 const PEEK_CACHE_SIZE_M1: usize = PEEK_CACHE_SIZE - 1;
398
399 ctx.next_token();
400 while ctx.peek_outer.buf.len() < PEEK_CACHE_SIZE_M1 {
401 let Some(token) = ctx.peek_full() else {
402 break;
403 };
404
405 match token.0 {
406 Token::CommandName(CommandName::If(i)) => {
408 self.trapped_by_if(ctx, token, i);
409 }
410 Token::CommandName(CommandName::Else) => {
412 self.trapped_by_else(ctx, token);
413 }
414 Token::CommandName(CommandName::EndIf) => {
416 self.trapped_by_endif(ctx, token);
417 }
418 Token::CommandName(CommandName::Generic) => {
423 self.trapped_by_macro(ctx, token, &token.1[1..], false);
424 }
425 Token::CommandName(CommandName::BeginEnvironment) => {
427 self.trapped_by_macro(ctx, token, token.1, true);
428 }
429 _ => {
431 ctx.push_outer(token);
432 ctx.next_token();
433 }
434 }
435 }
436
437 if let Some(e) = ctx.peek_full() {
439 ctx.push_outer(e);
440 }
441
442 ctx.peek_outer.buf.reverse();
444
445 ctx.peek_outer.peeked = ctx.peek_outer.buf.pop();
447 }
448
449 fn skip_false_tokens(&mut self, ctx: &mut StreamContext<'a>) {
451 let mut nested = 0;
452 while let Some(kind) = ctx.peek() {
453 match kind {
454 Token::CommandName(CommandName::If(..)) => {
455 ctx.next_token();
456 nested += 1;
457 }
458 Token::CommandName(CommandName::Else) | Token::CommandName(CommandName::EndIf) => {
459 if nested == 0 {
460 break;
461 }
462 ctx.next_token();
463 nested -= 1;
464 }
465 _ => {
466 ctx.next_token();
467 }
468 }
469 }
470 }
471
472 #[inline]
474 fn trapped_by_if(&mut self, ctx: &mut StreamContext<'a>, token: Tok<'a>, i: IfCommandName) {
475 ctx.next_token();
476 match i {
477 IfCommandName::IfFalse => {
478 ctx.push_outer(token);
479 self.reading_if.push(Some(IfState::LitFalse));
480 }
481 IfCommandName::IfTrue => {
482 self.reading_if.push(Some(IfState::True));
483 }
484 IfCommandName::IfTypst => {
485 ctx.push_outer(token);
486 self.reading_if.push(Some(IfState::TypstTrue));
487 }
488 _ => {
489 ctx.push_outer(token);
490 self.reading_if.push(None);
491 }
492 }
493 }
494
495 #[inline]
497 fn trapped_by_else(&mut self, ctx: &mut StreamContext<'a>, token: Tok<'a>) {
498 ctx.next_token();
499 let last_if = self.reading_if.last().cloned().unwrap_or(None);
500 match last_if {
501 Some(IfState::TypstTrue) => {
502 self.reading_if
503 .last_mut()
504 .unwrap()
505 .replace(IfState::TypstFalse);
506 self.skip_false_tokens(ctx);
507 }
508 Some(IfState::True) => {
509 self.reading_if.last_mut().unwrap().replace(IfState::False);
510 self.skip_false_tokens(ctx);
511 }
512 Some(IfState::False) => {
513 self.reading_if.last_mut().unwrap().replace(IfState::True);
514 }
515 Some(IfState::TypstFalse) => {
516 self.reading_if
517 .last_mut()
518 .unwrap()
519 .replace(IfState::TypstTrue);
520 }
521 Some(IfState::LitFalse) => {
522 self.reading_if.last_mut().unwrap().replace(IfState::True);
523 ctx.push_outer((Token::CommandName(CommandName::EndIf), "\\fi"));
524 }
525 None => {
526 ctx.push_outer(token);
527 }
528 }
529 }
530
531 #[inline]
533 fn trapped_by_endif(&mut self, ctx: &mut StreamContext<'a>, token: Tok<'a>) {
534 ctx.next_token();
535 let last_if = self.reading_if.pop().unwrap_or(None);
536 match last_if {
537 Some(IfState::True | IfState::False) => {}
538 Some(IfState::TypstFalse | IfState::TypstTrue | IfState::LitFalse) | None => {
539 ctx.push_outer(token);
540 }
541 }
542 }
543
544 #[inline]
545 fn trapped_by_macro(
546 &mut self,
547 ctx: &mut StreamContext<'a>,
548 token: Tok<'a>,
549 name: &'a str,
550 is_env: bool,
551 ) -> Option<()> {
552 let Some(m) = self.macros.get(name) else {
554 ctx.push_outer(token);
555 ctx.next_token();
556 return None;
557 };
558
559 let cmd_is_env = matches!(m, Macro::Env(_));
562 if is_env != cmd_is_env {
563 ctx.push_outer(token);
564 ctx.next_token();
565 return None;
566 }
567
568 use DeclareMacro::*;
569 match m {
570 Macro::Declare(CmdOrEnv(c)) => {
571 let (name, action, m) = Self::identify_macro_update(ctx, c)?;
572
573 match action {
575 UpdateAction::New => {
576 if self.get_macro(name).is_some() {
577 ctx.push_outer((Token::Error, name));
578 }
579
580 self.add_macro(name, m);
581 }
582 UpdateAction::Renew => {
583 if self.get_macro(name).is_none() {
584 ctx.push_outer((Token::Error, name));
585 }
586
587 self.add_macro(name, m);
588 }
589 UpdateAction::Provide => {
590 if self.get_macro(name).is_none() {
591 self.add_macro(name, m);
592 }
593 }
594 }
595
596 None
597 }
598 Macro::Declare(
599 DeclareTextCommand
600 | ProvideTextCommand
601 | DeclareTextCommandDefault
602 | ProvideTextCommandDefault,
603 ) => {
604 ctx.push_outer(token);
606 ctx.next_token();
607 None
608 }
609 Macro::Declare(AtEndOfClass | AtEndOfPackage | AtBeginDocument | AtEndDocument) => {
610 ctx.push_outer(token);
612 ctx.next_token();
613 None
614 }
615 Macro::Cmd(cmd) => {
616 ctx.next_token();
617
618 let args = Self::read_macro_args(ctx, cmd.num_args, cmd.opt.clone())?;
620 let expanded = Self::expand_tokens(&args, &cmd.def);
622
623 ctx.extend_inner(expanded.into_iter().rev());
625 if ctx.peek_inner.peeked.is_none() {
628 ctx.next_token();
629 }
630
631 None
632 }
633 Macro::Env(env) => {
634 ctx.next_token();
635
636 let args = Self::read_macro_args(ctx, env.num_args, env.opt.clone())?;
638 let body = Self::read_env_body(ctx, &env.name)?;
639 let expanded_begin = Self::expand_tokens(&args, &env.begin_def);
640 let expanded_end = Self::expand_tokens(&args, &env.end_def);
641
642 ctx.extend_inner(
643 expanded_end
644 .into_iter()
645 .rev()
646 .chain(body.into_iter().rev())
647 .chain(expanded_begin.into_iter().rev()),
648 );
649
650 if ctx.peek_inner.peeked.is_none() {
653 ctx.next_token();
654 }
655
656 None
657 }
658 }
659 }
660
661 fn identify_macro_update(
662 ctx: &mut StreamContext<'a>,
663 c: &DeclareCmdOrEnv,
664 ) -> Option<(&'a str, UpdateAction, Macro<'a>)> {
665 ctx.next_not_trivia()
668 .filter(|nx| *nx == Token::Left(BraceKind::Curly))?;
669 ctx.next_not_trivia();
670
671 let name = if matches!(c, DeclareCmdOrEnv::NewEnvironment { .. }) {
672 ctx.peek_word_opt(BraceKind::Curly)?.1
673 } else {
674 ctx.peek_cmd_name_opt(BraceKind::Curly)?
675 .1
676 .strip_prefix('\\')
677 .unwrap()
678 };
679
680 #[derive(Clone, Copy, PartialEq)]
681 enum MatchState {
682 NArgs,
683 OptArgDefault,
684 DefN,
685 }
686
687 let mut state = MatchState::NArgs;
688
689 let mut num_args: u8 = 0;
690 let mut opt = None;
691 let def;
692 'match_loop: loop {
693 let nx = ctx.peek()?;
694
695 match (state, nx) {
696 (MatchState::NArgs, Token::Left(BraceKind::Bracket)) => {
697 ctx.next_not_trivia();
698 num_args = ctx.peek_u8_opt(BraceKind::Bracket).filter(|e| *e <= 9)?;
699 state = MatchState::OptArgDefault;
700 }
701 (MatchState::OptArgDefault, Token::Left(BraceKind::Bracket)) => {
702 ctx.next_token();
703 opt = Some(ctx.read_until_balanced(BraceKind::Bracket));
704 state = MatchState::DefN;
705 }
706 (_, Token::Left(BraceKind::Curly)) => {
707 ctx.next_token();
708 def = ctx.read_until_balanced(BraceKind::Curly);
709 break 'match_loop;
710 }
711 _ => {
712 def = vec![ctx.peek_full().unwrap()];
713 ctx.next_token();
714 break 'match_loop;
715 }
716 }
717 }
718
719 let mut is_env = false;
720 let mut end_def = None;
721 let action = match c {
722 DeclareCmdOrEnv::NewCommand { renew, star: _ } => {
723 if *renew {
724 UpdateAction::Renew
725 } else {
726 UpdateAction::New
727 }
728 }
729 DeclareCmdOrEnv::DeclareRobustCommand { star: _ } => UpdateAction::New,
730 DeclareCmdOrEnv::ProvideCommand { star: _ } => UpdateAction::Provide,
731 DeclareCmdOrEnv::NewEnvironment { renew, star: _ } => {
732 is_env = true;
733
734 if matches!(ctx.peek()?, Token::Left(BraceKind::Curly)) {
735 ctx.next_token();
736 end_def = Some(ctx.read_until_balanced(BraceKind::Curly));
737 }
738
739 if *renew {
740 UpdateAction::Renew
741 } else {
742 UpdateAction::New
743 }
744 }
745 };
746
747 let def = Self::process_macro_def(def);
748
749 let m = if is_env {
750 let end_def = end_def.map(|e| Self::process_macro_def(e))?;
751 Macro::Env(Arc::new(EnvMacro {
752 name: name.to_owned(),
753 num_args,
754 opt,
755 begin_def: def,
756 end_def,
757 }))
758 } else {
759 Macro::Cmd(Arc::new(CmdMacro {
760 name: name.to_owned(),
761 num_args,
762 opt,
763 def,
764 }))
765 };
766
767 Some((name, action, m))
768 }
769
770 fn read_macro_args(
772 ctx: &mut StreamContext<'a>,
773 num_args: u8,
774 opt: Option<Vec<Tok<'a>>>,
775 ) -> Option<Vec<Vec<Tok<'a>>>> {
776 let mut args = Vec::with_capacity(num_args as usize);
777
778 if num_args == 0 {
779 return Some(args);
780 }
781
782 let mut num_of_read: u8 = 0;
783 loop {
784 match ctx.peek_not_trivia() {
785 Some(Token::Left(BraceKind::Curly)) => {
786 ctx.next_token();
787 args.push(ctx.read_until_balanced(BraceKind::Curly));
788 }
789 Some(Token::Word) => {
790 let t = ctx.peek_full().unwrap().1;
791 let mut split_cnt = 0;
792 for c in t.chars() {
793 args.push(vec![(Token::Word, &t[split_cnt..split_cnt + c.len_utf8()])]);
794 split_cnt += c.len_utf8();
795 num_of_read += 1;
796 if num_of_read == num_args {
797 break;
798 }
799 }
800 if split_cnt < t.len() {
801 ctx.peek_inner.peeked.as_mut().unwrap().1 = &t[split_cnt..];
802 } else {
803 ctx.next_token();
804 }
805 if num_of_read == num_args {
806 break;
807 }
808 }
809 Some(_) => {
810 args.push(vec![ctx.peek_full().unwrap()]);
811 ctx.next_token();
812 }
813 None => {
814 break;
815 }
816 }
817
818 num_of_read += 1;
819 if num_of_read == num_args {
820 break;
821 }
822 }
823
824 if num_of_read != num_args {
825 let mut ok = false;
826 if num_args - num_of_read == 1 {
827 if let Some(opt) = opt {
828 args.push(opt);
829 ok = true;
830 }
831 }
832
833 if !ok {
834 ctx.push_outer((Token::Error, "invalid number of arguments"));
835 return None;
836 }
837 }
838
839 Some(args)
840 }
841
842 fn read_env_body(ctx: &mut StreamContext<'a>, name: &str) -> Option<Vec<Tok<'a>>> {
843 let mut bc = 0;
844 let mut body = Vec::new();
845 loop {
846 let e = ctx.peek_full()?;
847 if e.0 == Token::CommandName(CommandName::EndEnvironment) {
848 if bc == 0 {
849 if e.1 != name {
850 ctx.push_outer((Token::Error, "unmatched environment"));
851 return None;
852 }
853
854 ctx.next_token();
855 break;
856 } else {
857 body.push(e);
858 bc -= 1;
859 }
860 } else if e.0 == Token::CommandName(CommandName::BeginEnvironment) {
861 body.push(e);
862 bc += 1;
863 } else {
864 body.push(e);
865 }
866 ctx.next_token();
867 }
868
869 if bc != 0 {
870 ctx.push_outer((Token::Error, "invalid environment"));
871 return None;
872 }
873
874 Some(body)
875 }
876
877 fn expand_tokens(args: &[Vec<Tok<'a>>], tokens: &[Tok<'a>]) -> Vec<Tok<'a>> {
878 let mut result = vec![];
880 if tokens.is_empty() {
881 return result;
882 }
883
884 let mut i = 0;
885 let mut bc = 0;
886 while i < tokens.len() {
887 let e = &tokens[i];
888 match e.0 {
889 Token::MacroArg(num) => {
890 if let Some(arg) = args.get(num as usize - 1) {
891 result.extend(arg.iter().cloned());
892 }
893 }
894 Token::CommandName(CommandName::Generic) => {
895 let name = e.1.strip_prefix('\\').unwrap();
896 match name {
897 "mitexrecurse" => loop {
898 i += 1;
899 if i >= tokens.len() {
900 break;
901 }
902 let e = &tokens[i];
903 if e.0 == Token::Left(BraceKind::Curly) {
904 if bc > 0 {
905 result.push(*e);
906 }
907 bc += 1;
908 } else if e.0 == Token::Right(BraceKind::Curly) {
909 bc -= 1;
910 if bc == 0 {
911 break;
912 } else {
913 result.push(*e);
914 }
915 } else if bc != 0 {
916 result.push(*e);
917 } else if !e.0.is_trivia() {
918 result.push(*e);
919 break;
920 }
921 },
922 _ => result.push(*e),
923 }
924 }
925 _ => result.push(*e),
926 }
927 i += 1;
928 }
929
930 result
931 }
932
933 pub fn create_scope(&mut self) -> Checkpoint {
935 let _ = self.env_stack;
936
937 (self.macros.to_mut().snapshot(),)
939 }
940
941 pub fn restore(&mut self, (snapshot,): Checkpoint) {
943 let _ = self.env_stack;
944
945 self.macros.to_mut().rollback_to(snapshot);
946 }
947
948 pub fn add_macro(&mut self, name: &'a str, value: Macro<'a>) {
950 self.macros.to_mut().insert(name, value);
951 }
952
953 fn process_macro_def(mut def: Vec<(Token, &str)>) -> Vec<(Token, &str)> {
954 let mut empty_texts = false;
956 for i in 0..def.len() {
957 if def[i].0 == Token::Hash {
958 let next = def.get_mut(i + 1).unwrap();
959 if next.0 == Token::Word {
960 let Some(first_char) = next.1.chars().next() else {
961 continue;
962 };
963
964 if first_char.is_ascii_digit() {
965 let Some(num) = first_char.to_digit(10) else {
966 continue;
967 };
968 if num == 0 || num > 9 {
969 continue;
970 }
971 next.1 = &next.1[1..];
972 if next.1.is_empty() {
973 empty_texts = true;
974 }
975 def[i].0 = Token::MacroArg(num as u8);
976 }
977 }
978 }
979 }
980
981 if !empty_texts {
982 return def;
983 }
984
985 def.retain(|e| e.0 != Token::Word || !e.1.is_empty());
986 def
987 }
988}