Skip to main content

texform_argspec/
lib.rs

1use std::borrow::Cow;
2use std::ops::Deref;
3
4pub use texform_interface::syntax_node::ContentMode;
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum DelimiterToken {
8    Char(char),
9    ControlSeq(Cow<'static, str>),
10}
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub enum ArgForm {
14    Standard,
15    Star,
16    Group,
17    Delimited {
18        open: DelimiterToken,
19        close: DelimiterToken,
20    },
21    Paired {
22        pairs: Cow<'static, [(DelimiterToken, DelimiterToken)]>,
23    },
24}
25
26#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum ValueKind {
28    Content { mode: ContentMode },
29    Delimiter,
30    CSName,
31    Dimension,
32    Integer,
33    KeyVal,
34    Column,
35    Star,
36}
37
38impl ValueKind {
39    pub const fn is_content(&self) -> bool {
40        matches!(self, ValueKind::Content { .. })
41    }
42
43    pub const fn is_delimiter(&self) -> bool {
44        matches!(self, ValueKind::Delimiter)
45    }
46
47    pub const fn is_cs_name(&self) -> bool {
48        matches!(self, ValueKind::CSName)
49    }
50
51    pub const fn is_dimension(&self) -> bool {
52        matches!(self, ValueKind::Dimension)
53    }
54
55    pub const fn is_integer(&self) -> bool {
56        matches!(self, ValueKind::Integer)
57    }
58
59    pub const fn is_keyval(&self) -> bool {
60        matches!(self, ValueKind::KeyVal)
61    }
62
63    pub const fn is_column(&self) -> bool {
64        matches!(self, ValueKind::Column)
65    }
66
67    pub const fn is_star(&self) -> bool {
68        matches!(self, ValueKind::Star)
69    }
70
71    pub const fn content_mode(&self) -> Option<ContentMode> {
72        match self {
73            ValueKind::Content { mode } => Some(*mode),
74            _ => None,
75        }
76    }
77}
78
79#[derive(Debug, Clone, PartialEq, Eq)]
80pub struct ArgSpec {
81    pub required: bool,
82    pub no_leading_space: bool,
83    pub nullable: bool,
84    pub kind: ValueKind,
85    pub form: ArgForm,
86}
87
88impl ArgSpec {
89    pub const fn new(required: bool, kind: ValueKind) -> Self {
90        ArgSpec {
91            required,
92            no_leading_space: false,
93            nullable: false,
94            kind,
95            form: ArgForm::Standard,
96        }
97    }
98
99    pub const fn with_form(
100        required: bool,
101        no_leading_space: bool,
102        kind: ValueKind,
103        form: ArgForm,
104    ) -> Self {
105        ArgSpec {
106            required,
107            no_leading_space,
108            nullable: false,
109            kind,
110            form,
111        }
112    }
113
114    pub const fn mandatory(mode: ContentMode) -> Self {
115        ArgSpec {
116            required: true,
117            no_leading_space: false,
118            nullable: false,
119            kind: ValueKind::Content { mode },
120            form: ArgForm::Standard,
121        }
122    }
123
124    pub const fn optional(mode: ContentMode) -> Self {
125        ArgSpec {
126            required: false,
127            no_leading_space: false,
128            nullable: false,
129            kind: ValueKind::Content { mode },
130            form: ArgForm::Standard,
131        }
132    }
133
134    pub const fn is_required(&self) -> bool {
135        self.required
136    }
137
138    pub const fn is_optional(&self) -> bool {
139        !self.required
140    }
141}
142
143/// A parsed argspec: the structured argument list together with the source
144/// string it was parsed from. Produced by the `argspec!` compile-time macro.
145#[derive(Debug, Clone, Copy, PartialEq, Eq)]
146pub struct ParsedArgSpec {
147    pub args: &'static [ArgSpec],
148    pub source: &'static str,
149}
150
151impl Deref for ParsedArgSpec {
152    type Target = [ArgSpec];
153    fn deref(&self) -> &[ArgSpec] {
154        self.args
155    }
156}
157
158/// Owned counterpart of [`ParsedArgSpec`] for runtime-loaded specs (e.g. YAML).
159#[derive(Debug, Clone, PartialEq, Eq)]
160pub struct OwnedArgSpec {
161    pub args: Vec<ArgSpec>,
162    pub source: String,
163}
164
165impl From<ParsedArgSpec> for OwnedArgSpec {
166    fn from(value: ParsedArgSpec) -> Self {
167        Self {
168            args: value.args.to_vec(),
169            source: value.source.to_string(),
170        }
171    }
172}
173
174#[derive(Debug, Clone, PartialEq, Eq)]
175pub struct ArgSpecParseError {
176    pub context: String,
177    pub char_index: usize,
178    pub message: String,
179}
180
181impl ArgSpecParseError {
182    fn new(context: &str, char_index: usize, message: impl Into<String>) -> Self {
183        Self {
184            context: context.to_string(),
185            char_index,
186            message: message.into(),
187        }
188    }
189}
190
191impl std::fmt::Display for ArgSpecParseError {
192    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
193        write!(
194            f,
195            "invalid argspec ({}) at char {}: {}",
196            self.context, self.char_index, self.message
197        )
198    }
199}
200
201impl std::error::Error for ArgSpecParseError {}
202
203pub fn parse_arg_specs(spec: &str, context: &str) -> Result<Vec<ArgSpec>, ArgSpecParseError> {
204    ArgSpecParser::new(spec, context).parse()
205}
206
207struct ArgSpecParser<'a> {
208    chars: Vec<char>,
209    cursor: usize,
210    context: &'a str,
211}
212
213impl<'a> ArgSpecParser<'a> {
214    fn new(spec: &str, context: &'a str) -> Self {
215        ArgSpecParser {
216            chars: spec.chars().collect(),
217            cursor: 0,
218            context,
219        }
220    }
221
222    fn parse(mut self) -> Result<Vec<ArgSpec>, ArgSpecParseError> {
223        let mut specs = Vec::new();
224
225        loop {
226            self.skip_whitespace();
227            if self.eof() {
228                break;
229            }
230            specs.push(self.parse_one()?);
231        }
232
233        Ok(specs)
234    }
235
236    fn parse_one(&mut self) -> Result<ArgSpec, ArgSpecParseError> {
237        let no_leading_space = self.consume_if('!');
238        let kind_token = self
239            .next_char()
240            .ok_or_else(|| self.err("expected argument token"))?;
241
242        let (required, form, has_ignored_default) = match kind_token {
243            'm' => (true, self.parse_mandatory_form()?, false),
244            'o' => (false, ArgForm::Standard, false),
245            'O' => (false, ArgForm::Standard, true),
246            's' => (false, ArgForm::Star, false),
247            'g' => (false, ArgForm::Group, false),
248            'G' => (false, ArgForm::Group, true),
249            'r' => {
250                if self.peek_char() == Some('<') {
251                    let pairs = self.parse_pair_list()?;
252                    (true, ArgForm::Paired { pairs }, false)
253                } else {
254                    let open = self.parse_delimiter_token()?;
255                    let close = self.parse_delimiter_token()?;
256                    (true, ArgForm::Delimited { open, close }, false)
257                }
258            }
259            'R' => {
260                if self.peek_char() == Some('<') {
261                    let pairs = self.parse_pair_list()?;
262                    (true, ArgForm::Paired { pairs }, true)
263                } else {
264                    let open = self.parse_delimiter_token()?;
265                    let close = self.parse_delimiter_token()?;
266                    (true, ArgForm::Delimited { open, close }, true)
267                }
268            }
269            'd' => {
270                if self.peek_char() == Some('<') {
271                    let pairs = self.parse_pair_list()?;
272                    (false, ArgForm::Paired { pairs }, false)
273                } else {
274                    let open = self.parse_delimiter_token()?;
275                    let close = self.parse_delimiter_token()?;
276                    (false, ArgForm::Delimited { open, close }, false)
277                }
278            }
279            'D' => {
280                if self.peek_char() == Some('<') {
281                    let pairs = self.parse_pair_list()?;
282                    (false, ArgForm::Paired { pairs }, true)
283                } else {
284                    let open = self.parse_delimiter_token()?;
285                    let close = self.parse_delimiter_token()?;
286                    (false, ArgForm::Delimited { open, close }, true)
287                }
288            }
289            other => {
290                return Err(self.err(format!("unsupported argument token `{other}`")));
291            }
292        };
293
294        if has_ignored_default {
295            self.parse_ignored_default_block(kind_token)?;
296        }
297
298        let (kind, nullable) = if matches!(&form, ArgForm::Star) {
299            if self.peek_char() == Some(':') {
300                return Err(self.err("`s` does not accept value type annotation"));
301            }
302            (ValueKind::Star, false)
303        } else {
304            self.parse_value_kind_annotation()?
305        };
306
307        let spec = ArgSpec {
308            required,
309            no_leading_space,
310            nullable,
311            kind,
312            form,
313        };
314        self.validate_spec(spec)
315    }
316
317    fn parse_mandatory_form(&mut self) -> Result<ArgForm, ArgSpecParseError> {
318        if !self.consume_if('{') {
319            return Ok(ArgForm::Standard);
320        }
321
322        if !self.consume_if('}') {
323            return Err(self.err("`m` only supports required braced group syntax `m{}`"));
324        }
325
326        Ok(ArgForm::Group)
327    }
328
329    fn parse_ignored_default_block(&mut self, token: char) -> Result<(), ArgSpecParseError> {
330        if !self.consume_if('{') {
331            return Err(self.err(format!("`{token}` requires a default block like `{{...}}`")));
332        }
333
334        let mut brace_depth = 1usize;
335        while let Some(ch) = self.next_char() {
336            match ch {
337                '\\' => {
338                    if self.peek_char().is_some() {
339                        self.cursor += 1;
340                    }
341                }
342                '{' => brace_depth += 1,
343                '}' => {
344                    brace_depth -= 1;
345                    if brace_depth == 0 {
346                        return Ok(());
347                    }
348                }
349                _ => {}
350            }
351        }
352
353        Err(self.err(format!("unterminated default block for `{token}`")))
354    }
355
356    fn parse_value_kind_annotation(&mut self) -> Result<(ValueKind, bool), ArgSpecParseError> {
357        if !self.consume_if(':') {
358            return Ok((
359                ValueKind::Content {
360                    mode: ContentMode::Math,
361                },
362                false,
363            ));
364        }
365
366        let annotation = self
367            .next_char()
368            .ok_or_else(|| self.err("missing value kind annotation after `:`"))?;
369        let kind = match annotation {
370            'T' => ValueKind::Content {
371                mode: ContentMode::Text,
372            },
373            'D' => ValueKind::Delimiter,
374            'N' => ValueKind::CSName,
375            'L' => ValueKind::Dimension,
376            'I' => ValueKind::Integer,
377            'K' => ValueKind::KeyVal,
378            'C' => ValueKind::Column,
379            other => {
380                return Err(self.err(format!("unsupported value kind annotation `:{other}`")));
381            }
382        };
383        let nullable = self.consume_if('?');
384        Ok((kind, nullable))
385    }
386
387    fn parse_delimiter_token(&mut self) -> Result<DelimiterToken, ArgSpecParseError> {
388        match self.next_char() {
389            Some('\\') => Ok(DelimiterToken::ControlSeq(Cow::Owned(
390                self.parse_control_sequence_name()?,
391            ))),
392            Some(c) if c.is_whitespace() => Err(self.err("delimiter token cannot be whitespace")),
393            Some(c) => Ok(DelimiterToken::Char(c)),
394            None => Err(self.err("missing delimiter token")),
395        }
396    }
397
398    fn parse_pair_list(
399        &mut self,
400    ) -> Result<Cow<'static, [(DelimiterToken, DelimiterToken)]>, ArgSpecParseError> {
401        let mut pairs = Vec::new();
402
403        while self.consume_if('<') {
404            let open = self.parse_pair_delimiter_token()?;
405            self.expect_char(',')?;
406            let close = self.parse_pair_delimiter_token()?;
407            self.expect_char('>')?;
408            pairs.push((open, close));
409        }
410
411        if pairs.is_empty() {
412            return Err(self.err("paired form requires at least one `<open,close>` block"));
413        }
414
415        Ok(Cow::Owned(pairs))
416    }
417
418    fn parse_pair_delimiter_token(&mut self) -> Result<DelimiterToken, ArgSpecParseError> {
419        match self.next_char() {
420            Some('\\') => Ok(DelimiterToken::ControlSeq(Cow::Owned(
421                self.parse_control_sequence_name()?,
422            ))),
423            Some(c) if c.is_whitespace() => Err(self.err("pair delimiter cannot be whitespace")),
424            Some('<') | Some('>') | Some(',') => {
425                Err(self.err("`<`, `>`, `,` are reserved in pair syntax"))
426            }
427            Some(c) => Ok(DelimiterToken::Char(c)),
428            None => Err(self.err("missing pair delimiter token")),
429        }
430    }
431
432    fn parse_control_sequence_name(&mut self) -> Result<String, ArgSpecParseError> {
433        let first = self
434            .next_char()
435            .ok_or_else(|| self.err("expected control sequence name after `\\`"))?;
436
437        let mut name = String::new();
438        name.push(first);
439
440        if first.is_ascii_alphabetic() {
441            while let Some(c) = self.peek_char() {
442                if c.is_ascii_alphabetic() {
443                    name.push(c);
444                    self.cursor += 1;
445                } else {
446                    break;
447                }
448            }
449        }
450
451        Ok(name)
452    }
453
454    fn validate_spec(&self, spec: ArgSpec) -> Result<ArgSpec, ArgSpecParseError> {
455        if spec.no_leading_space && spec.required {
456            return Err(self.err("`!` prefix is only valid for optional argument forms"));
457        }
458        if spec.nullable && (spec.kind.is_star() || spec.kind.is_column()) {
459            return Err(self.err("`?` is not supported for star or column annotations"));
460        }
461
462        match &spec.form {
463            ArgForm::Standard => {
464                if spec.kind.is_star() {
465                    return Err(self.err("star value kind requires `s` form"));
466                }
467            }
468            ArgForm::Star => {
469                if spec.required {
470                    return Err(self.err("star form must be optional"));
471                }
472                if !spec.kind.is_star() {
473                    return Err(self.err("star form must use star value kind"));
474                }
475            }
476            ArgForm::Group => {
477                if spec.kind.is_star() {
478                    return Err(self.err("group form cannot use star value kind"));
479                }
480            }
481            ArgForm::Delimited { .. } | ArgForm::Paired { .. } => {
482                if spec.kind.is_star() {
483                    return Err(self.err("delimited/paired form cannot use star value kind"));
484                }
485                if spec.kind.is_delimiter() {
486                    return Err(self.err("delimiter kind cannot use delimited/paired form"));
487                }
488            }
489        }
490
491        Ok(spec)
492    }
493
494    fn skip_whitespace(&mut self) {
495        while matches!(self.peek_char(), Some(c) if c.is_whitespace()) {
496            self.cursor += 1;
497        }
498    }
499
500    fn expect_char(&mut self, target: char) -> Result<(), ArgSpecParseError> {
501        let got = self
502            .next_char()
503            .ok_or_else(|| self.err(format!("expected `{target}`")))?;
504        if got != target {
505            return Err(self.err(format!("expected `{target}`, found `{got}`")));
506        }
507        Ok(())
508    }
509
510    fn consume_if(&mut self, target: char) -> bool {
511        if self.peek_char() == Some(target) {
512            self.cursor += 1;
513            true
514        } else {
515            false
516        }
517    }
518
519    fn next_char(&mut self) -> Option<char> {
520        let ch = self.peek_char()?;
521        self.cursor += 1;
522        Some(ch)
523    }
524
525    fn peek_char(&self) -> Option<char> {
526        self.chars.get(self.cursor).copied()
527    }
528
529    fn eof(&self) -> bool {
530        self.cursor >= self.chars.len()
531    }
532
533    fn err(&self, msg: impl Into<String>) -> ArgSpecParseError {
534        ArgSpecParseError::new(self.context, self.cursor, msg)
535    }
536}