texlang_stdlib/
prefix.rs

1//! The `\global`, `\long` and `\outer` prefix commands
2//!
3//! The `\long` and `\outer` commands are designed to place restrictions on macros
4//!    to avoid performance problems.
5//! These restrictions are described in the TeXBook.
6//! Texlang does not currently impose these restrictions because hardware is much better than in the 70s,
7//!   and minimizing the complexity of Texlang's code is more important than stopping TeX users
8//!   from writing slow TeX code.
9//! However, Texlang does enforce the rule that these prefix commands can only come before
10//!  `\def`, `\gdef`, `\edef` and `\xdef`.
11//!
12//! # Developer notes on `\global`
13//!
14//! The `\global` command here is pretty much a nightmare to implement.
15//! One of the core principles of the Texlang implementation is to remove global state,
16//!   but `\global` makes this really hard.
17//! The reason is that it changes the behavior, at run time, of a bunch of
18//!   other commands like `\def` and `\advance`, and it also changes the semantics
19//!   of variable assignment.
20//! It is impossible to scope `\global` tightly because of its wide effects.
21//!
22//! The approach here has two parts.
23//!
24//! First, for variable assignment, we just reimplement what happens in the VM
25//! except we pass a flag that makes the assignment global. This is not too bad
26//! as it's only a few lines of code.
27//!
28//! For commands, it's a little messier.
29//! We maintain a component which has a flag `global` that is set to true by
30//!   the `\global` command.
31//! Commands that can be prefixed with `\global` read the flag and act accordingly.
32//! The problem is that we need the global flag to be reset to false
33//!   at some point; otherwise, `\global` would make *all* subsequent assignments global.
34//! To do this we introduce a convention: any command which can be prefixed
35//!   by `\global` reads the flag a single time using the [Component::read_and_reset_global]
36//!   method.
37//! This method returns the flag value and resets the flag to false.
38//!
39//! In order for the convention to work correctly it is essential that *all* code
40//!   paths within the command call [read_and_reset_global](Component::read_and_reset_global) -
41//!   even if they don't use the result!
42//! For example `\gdef` always creates a macro in the global scope, but it still needs to
43//!   call [read_and_reset_global](Component::read_and_reset_global).
44//! This behavior should be verified with unit tests, and this module provides
45//!   an [assert_global_is_false](get_assert_global_is_false) execution command
46//!   to make this easy.
47//!
48//! Finally, commands which can be prefixed with `\global` are manually added
49//!   to the hash set inside the [Component].
50//! This set is used to validate that the command that follows `\global` is
51//!   allowed to be prefixed by it.
52
53use crate::alias;
54use crate::def;
55use crate::math;
56use std::collections::HashSet;
57use texcraft_stdext::collections::groupingmap;
58use texlang::token::trace;
59use texlang::traits::*;
60use texlang::*;
61
62/// Component for the prefix commands.
63#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
64pub struct Component {
65    scope: groupingmap::Scope,
66    global_defs_value: i32,
67    #[cfg_attr(feature = "serde", serde(skip))]
68    tags: Tags,
69}
70
71impl Default for Component {
72    fn default() -> Self {
73        Component {
74            scope: groupingmap::Scope::Local,
75            global_defs_value: 0,
76            tags: Default::default(),
77        }
78    }
79}
80
81struct Tags {
82    can_be_prefixed_with_any: HashSet<command::Tag>,
83    can_be_prefixed_with_global: HashSet<command::Tag>,
84    global_tag: command::Tag,
85    long_tag: command::Tag,
86    outer_tag: command::Tag,
87}
88
89impl Default for Tags {
90    fn default() -> Self {
91        Self {
92            can_be_prefixed_with_any: vec![def::def_tag()].into_iter().collect(),
93            can_be_prefixed_with_global: vec![math::get_variable_op_tag(), alias::let_tag()]
94                .into_iter()
95                .collect(),
96            global_tag: GLOBAL_TAG.get(),
97            long_tag: LONG_TAG.get(),
98            outer_tag: OUTER_TAG.get(),
99        }
100    }
101}
102
103impl Component {
104    /// Read the value of the global flag and reset the flag to false.
105    ///
106    /// See the module documentation for correct usage of this method.
107    #[inline]
108    pub fn read_and_reset_global(&mut self) -> groupingmap::Scope {
109        match self.global_defs_value.cmp(&0) {
110            std::cmp::Ordering::Less => groupingmap::Scope::Local,
111            std::cmp::Ordering::Equal => {
112                std::mem::replace(&mut self.scope, groupingmap::Scope::Local)
113            }
114            std::cmp::Ordering::Greater => groupingmap::Scope::Global,
115        }
116    }
117
118    fn set_scope(&mut self, scope: groupingmap::Scope) {
119        self.scope = if self.global_defs_value == 0 {
120            scope
121        } else {
122            // If either globaldefs override is enabled we skip setting the scope here so that
123            // we can avoid setting it in the (hotter) variable assignment path.
124            groupingmap::Scope::Local
125        }
126    }
127}
128
129#[derive(Default, Clone, Copy)]
130struct Prefix {
131    global: Option<token::Token>,
132    long: Option<token::Token>,
133    outer: Option<token::Token>,
134}
135
136impl Prefix {
137    fn get_one(&self) -> (token::Token, Kind) {
138        if let Some(global_token) = self.global {
139            (global_token, Kind::Global)
140        } else if let Some(long_token) = self.long {
141            (long_token, Kind::Long)
142        } else if let Some(outer_token) = self.outer {
143            (outer_token, Kind::Outer)
144        } else {
145            panic!("")
146        }
147    }
148}
149
150/// Get the `\globaldefs` command.
151pub fn get_globaldefs<S: HasComponent<Component>>() -> command::BuiltIn<S> {
152    command::BuiltIn::new_variable(variable::Command::new_singleton(
153        |s, _| &s.component().global_defs_value,
154        |s, _| &mut s.component_mut().global_defs_value,
155    ))
156}
157
158/// Get the `\global` command.
159pub fn get_global<S: HasComponent<Component>>() -> command::BuiltIn<S> {
160    command::BuiltIn::new_execution(global_primitive_fn).with_tag(GLOBAL_TAG.get())
161}
162
163static GLOBAL_TAG: command::StaticTag = command::StaticTag::new();
164
165#[inline]
166pub fn variable_assignment_scope_hook<S: HasComponent<Component>>(
167    state: &mut S,
168) -> groupingmap::Scope {
169    state.component_mut().read_and_reset_global()
170}
171
172// Get the `\long` command.
173pub fn get_long<S: HasComponent<Component>>() -> command::BuiltIn<S> {
174    command::BuiltIn::new_execution(long_primitive_fn).with_tag(LONG_TAG.get())
175}
176
177static LONG_TAG: command::StaticTag = command::StaticTag::new();
178
179/// Get the `\outer` command.
180pub fn get_outer<S: HasComponent<Component>>() -> command::BuiltIn<S> {
181    command::BuiltIn::new_execution(outer_primitive_fn).with_tag(OUTER_TAG.get())
182}
183
184static OUTER_TAG: command::StaticTag = command::StaticTag::new();
185
186fn global_primitive_fn<S: HasComponent<Component>>(
187    global_token: token::Token,
188    input: &mut vm::ExecutionInput<S>,
189) -> command::Result<()> {
190    process_prefixes(
191        Prefix {
192            global: Some(global_token),
193            long: None,
194            outer: None,
195        },
196        input,
197    )
198}
199
200fn long_primitive_fn<S: HasComponent<Component>>(
201    long_token: token::Token,
202    input: &mut vm::ExecutionInput<S>,
203) -> command::Result<()> {
204    process_prefixes(
205        Prefix {
206            global: None,
207            long: Some(long_token),
208            outer: None,
209        },
210        input,
211    )
212}
213
214fn outer_primitive_fn<S: HasComponent<Component>>(
215    outer_token: token::Token,
216    input: &mut vm::ExecutionInput<S>,
217) -> command::Result<()> {
218    process_prefixes(
219        Prefix {
220            global: None,
221            long: None,
222            outer: Some(outer_token),
223        },
224        input,
225    )
226}
227
228fn process_prefixes<S: HasComponent<Component>>(
229    mut prefix: Prefix,
230    input: &mut vm::ExecutionInput<S>,
231) -> command::Result<()> {
232    complete_prefix(&mut prefix, input)?;
233    match input.peek()? {
234        None => Err(error::SimpleEndOfInputError::new(
235            input.vm(),
236            "end of input while looking for a command to prefix",
237        )
238        .with_note(
239            r"prefix commands (\global, \long, \outer) must be followed by a command to prefix",
240        )
241        .into()),
242        Some(&t) => match t.value() {
243            token::Value::ControlSequence(name) => {
244                // First check if it's a variable command.
245                if let Some(command::Command::Variable(_)) =
246                    input.commands_map_mut().get_command(&name)
247                {
248                    assert_only_global_prefix(t, prefix, input)?;
249                    if prefix.global.is_some() {
250                        input
251                            .state_mut()
252                            .component_mut()
253                            .set_scope(groupingmap::Scope::Global);
254                    }
255                    return Ok(());
256                }
257                // Next check if it's a command that can be prefixed by any of the prefix command.
258                let component = input.state().component();
259                let tag = input.commands_map().get_tag(&name);
260                if let Some(tag) = tag {
261                    if component.tags.can_be_prefixed_with_any.contains(&tag) {
262                        if prefix.global.is_some() {
263                            input
264                                .state_mut()
265                                .component_mut()
266                                .set_scope(groupingmap::Scope::Global);
267                        }
268                        return Ok(());
269                    }
270                    // Next check if it's a command that can be prefixed by global only. In this case we check
271                    // that no other prefixes are present.
272                    if component.tags.can_be_prefixed_with_global.contains(&tag) {
273                        assert_only_global_prefix(t, prefix, input)?;
274                        if prefix.global.is_some() {
275                            input
276                                .state_mut()
277                                .component_mut()
278                                .set_scope(groupingmap::Scope::Global);
279                        }
280                        return Ok(());
281                    }
282                }
283                // If we make it to here, this is not a valid target for the prefix command.
284                let (prefix_token, kind) = prefix.get_one();
285                Err(Error {
286                    kind,
287                    got: input.vm().trace(t),
288                    prefix: input.vm().trace(prefix_token),
289                }
290                .into())
291            }
292            _ => {
293                let (prefix_token, kind) = prefix.get_one();
294                Err(Error {
295                    kind,
296                    got: input.vm().trace(t),
297                    prefix: input.vm().trace(prefix_token),
298                }
299                .into())
300            }
301        },
302    }
303}
304
305fn complete_prefix<S: HasComponent<Component>>(
306    prefix: &mut Prefix,
307    input: &mut vm::ExecutionInput<S>,
308) -> command::Result<()> {
309    // BUG: spaces and \relax are allowed after prefixes per TeX source sections 1211 and 404.
310    let found_prefix = match input.peek()? {
311        None => false,
312        Some(&t) => match t.value() {
313            token::Value::ControlSequence(name) => {
314                let tag = input.commands_map().get_tag(&name);
315                if tag == Some(input.state().component().tags.global_tag) {
316                    prefix.global = Some(t);
317                    true
318                } else if tag == Some(input.state().component().tags.outer_tag) {
319                    prefix.outer = Some(t);
320                    true
321                } else if tag == Some(input.state().component().tags.long_tag) {
322                    prefix.long = Some(t);
323                    true
324                } else {
325                    false
326                }
327            }
328            _ => false,
329        },
330    };
331    if !found_prefix {
332        return Ok(());
333    }
334    input.consume()?;
335    complete_prefix(prefix, input)
336}
337
338fn assert_only_global_prefix<S: TexlangState>(
339    token: token::Token,
340    prefix: Prefix,
341    input: &vm::ExecutionInput<S>,
342) -> command::Result<()> {
343    if let Some(outer_token) = prefix.outer {
344        Err(Error {
345            kind: Kind::Outer,
346            got: input.vm().trace(token),
347            prefix: input.vm().trace(outer_token),
348        }
349        .into())
350    } else if let Some(long_token) = prefix.long {
351        Err(Error {
352            kind: Kind::Long,
353            got: input.vm().trace(token),
354            prefix: input.vm().trace(long_token),
355        }
356        .into())
357    } else {
358        Ok(())
359    }
360}
361
362#[derive(Debug, Clone, Copy)]
363enum Kind {
364    Global,
365    Long,
366    Outer,
367}
368
369#[derive(Debug)]
370struct Error {
371    kind: Kind,
372    got: trace::SourceCodeTrace,
373    prefix: trace::SourceCodeTrace,
374}
375
376impl error::TexError for Error {
377    fn kind(&self) -> error::Kind {
378        error::Kind::Token(&self.got)
379    }
380
381    fn title(&self) -> String {
382        match self.got.token.unwrap().value() {
383            token::Value::ControlSequence(_) => {
384                format!["this command cannot be prefixed by {}", self.prefix.value]
385            }
386            _ => format![
387                "character tokens cannot be prefixed by {}",
388                self.prefix.value
389            ],
390        }
391    }
392
393    fn source_annotation(&self) -> String {
394        format!["cannot by prefixed by {}", self.prefix.value]
395    }
396
397    fn notes(&self) -> Vec<error::display::Note> {
398        let guidance = match self.kind {
399            Kind::Global => {
400                r"see the documentation for \global for the list of commands it can be used with"
401            }
402            Kind::Long => {
403                r"the \long prefix can only be used with \def, \gdef, \edef and \xdef (or their aliases)"
404            }
405            Kind::Outer => {
406                r"the \outer prefix can only be used with \def, \gdef, \edef and \xdef (or their aliases)"
407            }
408        };
409        vec![
410            guidance.into(),
411            error::display::Note::SourceCodeTrace("the prefix appeared here:".into(), &self.prefix),
412        ]
413    }
414}
415
416/// Get an execution command that checks that the global flag is off.
417///
418/// This command is used for unit testing Texlang.
419/// It tests that functions that can be prefixed with `\global`
420/// are following the convention described in the module docs.
421/// To use it, create a test for the following TeX snippet:
422/// ```tex
423/// \global \command <input to command> \assertGlobalIsFalse
424/// ```
425pub fn get_assert_global_is_false<S: HasComponent<Component>>() -> command::BuiltIn<S> {
426    fn noop_execution_cmd_fn<S: HasComponent<Component>>(
427        token: token::Token,
428        input: &mut vm::ExecutionInput<S>,
429    ) -> command::Result<()> {
430        match input.state_mut().component_mut().read_and_reset_global() {
431            groupingmap::Scope::Global => Err(error::SimpleTokenError::new(
432                input.vm(),
433                token,
434                "assertion failed: global is true",
435            )
436            .into()),
437            groupingmap::Scope::Local => Ok(()),
438        }
439    }
440    command::BuiltIn::new_execution(noop_execution_cmd_fn)
441}
442
443#[cfg(test)]
444mod test {
445    use super::*;
446    use crate::{script, testing::*, the};
447    use std::collections::HashMap;
448    use texlang::variable;
449    use texlang::vm::implement_has_component;
450
451    #[derive(Default)]
452    struct State {
453        script: script::Component,
454        prefix: Component,
455        integer: i32,
456    }
457
458    impl TexlangState for State {
459        fn variable_assignment_scope_hook(state: &mut Self) -> groupingmap::Scope {
460            variable_assignment_scope_hook(state)
461        }
462    }
463
464    implement_has_component![State, (script::Component, script), (Component, prefix),];
465
466    fn initial_commands() -> HashMap<&'static str, command::BuiltIn<State>> {
467        HashMap::from([
468            ("global", get_global()),
469            ("globaldefs", get_globaldefs()),
470            ("long", get_long()),
471            ("outer", get_outer()),
472            ("i", get_integer()),
473            ("the", the::get_the()),
474            ("def", def::get_def()),
475            ("advance", math::get_advance()),
476            (
477                "noOpExpansion",
478                command::BuiltIn::new_expansion(|_, _| Ok(vec![])),
479            ),
480            (
481                "noOpExecution",
482                command::BuiltIn::new_execution(|_, _| Ok(())),
483            ),
484        ])
485    }
486
487    fn get_integer() -> command::BuiltIn<State> {
488        variable::Command::new_singleton(
489            |state: &State, _: variable::Index| -> &i32 { &state.integer },
490            |state: &mut State, _: variable::Index| -> &mut i32 { &mut state.integer },
491        )
492        .into()
493    }
494
495    test_suite![
496        expansion_equality_tests(
497            (non_global, r"\i=5{\i=8}\the\i", "5"),
498            (non_global_2, r"\i=5\i=6{\i=8}\the\i", "6"),
499            (non_global_3, r"\i=5{\i=6{\i=8 \the\i}\the\i}\the\i", "865"),
500            (global, r"\i=5{\global\i=8}\the\i", "8"),
501            (global_squared, r"\i=5{\global\global\i=8}\the\i", "8"),
502            (long, r"\long\def\A{Hello}\A", "Hello"),
503            (outer, r"\outer\def\A{Hello}\A", "Hello"),
504            (
505                many_prefixes,
506                r"\long\outer\global\long\global\outer\def\A{Hello}\A",
507                "Hello"
508            ),
509            (global_defs_1, r"\i=5{\globaldefs=1 \i=8}\the\i", "8"),
510            (global_defs_2, r"\i=5{\globaldefs=-1\global\i=8}\the\i", "5"),
511        ),
512        failure_tests(
513            (global_end_of_input, r"\global"),
514            (global_with_character, r"\global a"),
515            (global_with_undefined_command, r"\global \undefinedCommand"),
516            (
517                global_with_no_op_expansion_command,
518                r"\global \noOpExpansion"
519            ),
520            (
521                global_with_no_op_execution_command,
522                r"\global \noOpExecution"
523            ),
524            (long_prefix_when_global_allowed, r"\long\advance\i 0"),
525            (outer_prefix_when_global_allowed, r"\outer\advance\i 0"),
526        ),
527    ];
528}