Skip to main content

texform_core/
column_parser.rs

1//! Column template parser aligned with MathJax ColumnParser semantics.
2//!
3//! This module parses array column templates like `c|c|c` into a structured
4//! `ColumnSpec` value.
5//!
6//! Note:
7//! - Built-in column handlers are implemented.
8//! - `\newcolumntype` runtime extensions are intentionally not supported.
9
10use std::fmt;
11
12use crate::dimension::is_valid_dimension_unit;
13use texform_interface::column::{
14    ArrayPadding, ColumnAlign, ColumnSpec, FrameLine, FrameSide, LineStyle, RowAlign, VerticalAlign,
15};
16
17const MAX_COLUMNS: usize = 10000;
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub enum ColumnParseError {
21    MaxColumns,
22    BadPreamToken(char),
23    MissingColumnDimOrUnits(char),
24    MissingArgForColumn(char),
25    MissingCloseBrace,
26    ColArgNotNum,
27}
28
29impl fmt::Display for ColumnParseError {
30    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31        match self {
32            ColumnParseError::MaxColumns => {
33                write!(
34                    f,
35                    "Too many column specifiers (perhaps looping column definitions?)"
36                )
37            }
38            ColumnParseError::BadPreamToken(c) => write!(f, "Illegal pream-token ({c})"),
39            ColumnParseError::MissingColumnDimOrUnits(c) => write!(
40                f,
41                "Missing dimension or its units for {c} column declaration"
42            ),
43            ColumnParseError::MissingArgForColumn(c) => {
44                write!(f, "Missing argument for {c} column declaration")
45            }
46            ColumnParseError::MissingCloseBrace => write!(f, "Missing close brace"),
47            ColumnParseError::ColArgNotNum => {
48                write!(f, "First argument to * column specifier must be a number")
49            }
50        }
51    }
52}
53
54pub fn parse_column_template(template: &str) -> Result<ColumnSpec, ColumnParseError> {
55    let mut state = ColumnState::new(template);
56    let mut n = 0usize;
57
58    while state.cursor < state.template.len() {
59        if n > MAX_COLUMNS {
60            return Err(ColumnParseError::MaxColumns);
61        }
62        n += 1;
63        let current_char = state
64            .next_char()
65            .ok_or(ColumnParseError::MissingCloseBrace)?;
66        state.current_char = current_char;
67        handle_column_char(current_char, &mut state)?;
68    }
69
70    let mut spec = ColumnSpec::new(template.to_string(), state.template.clone());
71    set_column_aligns(&state, &mut spec);
72    set_column_widths(&state, &mut spec);
73    set_column_spacing(&state, &mut spec);
74    set_column_lines(&state, &mut spec);
75    set_padding(&state, &mut spec);
76    set_column_extras(&state, &mut spec);
77
78    Ok(spec)
79}
80
81#[derive(Clone)]
82struct ColumnState {
83    template: String,
84    cursor: usize,
85    current_char: char,
86    column_index: usize,
87    column_aligns: Vec<Option<ColumnAlign>>,
88    column_widths: Vec<Option<String>>,
89    column_spacing: Vec<Option<String>>,
90    column_lines: Vec<Option<LineStyle>>,
91    column_starts: Vec<Option<String>>,
92    column_ends: Vec<Option<String>>,
93    column_extras: Vec<bool>,
94    row_aligns: Vec<Option<RowAlign>>,
95}
96
97impl ColumnState {
98    fn new(template: &str) -> Self {
99        ColumnState {
100            template: template.to_string(),
101            cursor: 0,
102            current_char: '\0',
103            column_index: 0,
104            column_aligns: Vec::new(),
105            column_widths: Vec::new(),
106            column_spacing: Vec::new(),
107            column_lines: Vec::new(),
108            column_starts: Vec::new(),
109            column_ends: Vec::new(),
110            column_extras: Vec::new(),
111            row_aligns: Vec::new(),
112        }
113    }
114
115    fn next_char(&mut self) -> Option<char> {
116        let rest = self.template.get(self.cursor..)?;
117        let mut chars = rest.chars();
118        let current_char = chars.next()?;
119        self.cursor += current_char.len_utf8();
120        Some(current_char)
121    }
122
123    fn peek_char(&self) -> Option<char> {
124        self.template.get(self.cursor..)?.chars().next()
125    }
126}
127
128fn handle_column_char(c: char, state: &mut ColumnState) -> Result<(), ColumnParseError> {
129    match c {
130        'l' => {
131            set_column_align(state, state.column_index, ColumnAlign::Left);
132            state.column_index += 1;
133            Ok(())
134        }
135        'c' => {
136            set_column_align(state, state.column_index, ColumnAlign::Center);
137            state.column_index += 1;
138            Ok(())
139        }
140        'r' => {
141            set_column_align(state, state.column_index, ColumnAlign::Right);
142            state.column_index += 1;
143            Ok(())
144        }
145        'p' => get_column(state, VerticalAlign::Top, Some(ColumnAlign::Left)),
146        'm' => get_column(state, VerticalAlign::Middle, Some(ColumnAlign::Left)),
147        'b' => get_column(state, VerticalAlign::Bottom, Some(ColumnAlign::Left)),
148        'w' | 'W' => get_column(state, VerticalAlign::Top, None),
149        '|' => {
150            add_rule(state, LineStyle::Solid);
151            Ok(())
152        }
153        ':' => {
154            add_rule(state, LineStyle::Dashed);
155            Ok(())
156        }
157        '>' => {
158            let value = get_braces(state)?;
159            append_column_start(state, state.column_index, &value);
160            Ok(())
161        }
162        '<' => {
163            let idx = state.column_index.saturating_sub(1);
164            let value = get_braces(state)?;
165            append_column_end(state, idx, &value);
166            Ok(())
167        }
168        '@' => {
169            let value = get_braces(state)?;
170            add_at(state, value);
171            Ok(())
172        }
173        '!' => {
174            let value = get_braces(state)?;
175            add_bang(state, value);
176            Ok(())
177        }
178        '*' => repeat(state),
179        'P' => macro_column(state, ">{$}p{#1}<{$}", 1),
180        'M' => macro_column(state, ">{$}m{#1}<{$}", 1),
181        'B' => macro_column(state, ">{$}b{#1}<{$}", 1),
182        ' ' => Ok(()),
183        _ => Err(ColumnParseError::BadPreamToken(c)),
184    }
185}
186
187fn get_column(
188    state: &mut ColumnState,
189    vertical: VerticalAlign,
190    default_align: Option<ColumnAlign>,
191) -> Result<(), ColumnParseError> {
192    let align = if let Some(align) = default_align {
193        align
194    } else {
195        get_align(state)?
196    };
197    let width = get_dimen(state)?;
198    set_column_align(state, state.column_index, align);
199    set_option_string(&mut state.column_widths, state.column_index, width.clone());
200    set_option(
201        &mut state.row_aligns,
202        state.column_index,
203        RowAlign {
204            vertical,
205            width,
206            align,
207        },
208    );
209    state.column_index += 1;
210    Ok(())
211}
212
213fn get_dimen(state: &mut ColumnState) -> Result<String, ColumnParseError> {
214    let dim = get_braces(state)?;
215    if !is_valid_dimension(&dim) {
216        return Err(ColumnParseError::MissingColumnDimOrUnits(
217            state.current_char,
218        ));
219    }
220    Ok(dim)
221}
222
223fn get_align(state: &mut ColumnState) -> Result<ColumnAlign, ColumnParseError> {
224    let align = get_braces(state)?;
225    let lowered = align.to_lowercase();
226    Ok(match lowered.as_str() {
227        "l" => ColumnAlign::Left,
228        "c" => ColumnAlign::Center,
229        "r" => ColumnAlign::Right,
230        _ => ColumnAlign::Empty,
231    })
232}
233
234fn get_braces(state: &mut ColumnState) -> Result<String, ColumnParseError> {
235    while matches!(state.peek_char(), Some(' ')) {
236        state.next_char();
237    }
238
239    if state.cursor >= state.template.len() {
240        return Err(ColumnParseError::MissingArgForColumn(state.current_char));
241    }
242
243    if state.peek_char() != Some('{') {
244        return Ok(state.next_char().unwrap().to_string());
245    }
246
247    state.next_char(); // consume '{'
248    let start = state.cursor;
249    let mut braces = 1usize;
250
251    while state.cursor < state.template.len() {
252        let ch = state.next_char().unwrap();
253        match ch {
254            '\\' => {
255                // Keep escaped content verbatim while skipping brace matching.
256                if state.cursor < state.template.len() {
257                    state.next_char();
258                }
259            }
260            '{' => braces += 1,
261            '}' => {
262                braces -= 1;
263                if braces == 0 {
264                    let end = state.cursor - 1; // consumed '}' is one byte
265                    return Ok(state.template[start..end].to_string());
266                }
267            }
268            _ => {}
269        }
270    }
271
272    Err(ColumnParseError::MissingCloseBrace)
273}
274
275fn macro_column(
276    state: &mut ColumnState,
277    macro_template: &str,
278    n: usize,
279) -> Result<(), ColumnParseError> {
280    let mut args = Vec::with_capacity(n);
281    for _ in 0..n {
282        args.push(get_braces(state)?);
283    }
284    let expansion = substitute_args(&args, macro_template)?;
285    let rest = state.template[state.cursor..].to_string();
286    state.template = format!("{expansion}{rest}");
287    state.cursor = 0;
288    Ok(())
289}
290
291fn add_rule(state: &mut ColumnState, style: LineStyle) {
292    if get_option(&state.column_lines, state.column_index).is_some() {
293        add_at(state, r"\,".to_string());
294    }
295    set_option(&mut state.column_lines, state.column_index, style);
296    if get_option(&state.column_spacing, state.column_index).as_deref() == Some("0") {
297        set_option_string(
298            &mut state.column_starts,
299            state.column_index,
300            r"\hspace{.5em}".to_string(),
301        );
302    }
303}
304
305fn add_at(state: &mut ColumnState, macro_text: String) {
306    let column_index = state.column_index;
307    set_column_extra(state, column_index, true);
308    set_column_align(state, column_index, ColumnAlign::Center);
309
310    if get_option(&state.column_lines, column_index).is_some() {
311        if get_option(&state.column_spacing, column_index).as_deref() == Some(".5em") {
312            if column_index > 0 {
313                append_column_start(state, column_index - 1, r"\hspace{.25em}");
314            }
315        } else if get_option(&state.column_spacing, column_index).is_none() && column_index > 0 {
316            append_column_end(state, column_index - 1, r"\hspace{.5em}");
317        }
318    }
319
320    set_option_string(&mut state.column_starts, column_index, macro_text);
321    set_option_string(&mut state.column_spacing, column_index, "0".to_string());
322    state.column_index += 1;
323    set_option_string(
324        &mut state.column_spacing,
325        state.column_index,
326        "0".to_string(),
327    );
328}
329
330fn add_bang(state: &mut ColumnState, macro_text: String) {
331    let column_index = state.column_index;
332    set_column_extra(state, column_index, true);
333    set_column_align(state, column_index, ColumnAlign::Center);
334
335    let prefix = if get_option(&state.column_spacing, column_index).as_deref() == Some("0")
336        && get_option(&state.column_lines, column_index).is_some()
337    {
338        r"\hspace{.25em}"
339    } else {
340        ""
341    };
342    set_option_string(
343        &mut state.column_starts,
344        column_index,
345        format!("{prefix}{macro_text}"),
346    );
347    if get_option(&state.column_spacing, column_index).is_none() {
348        set_option_string(&mut state.column_spacing, column_index, ".5em".to_string());
349    }
350
351    state.column_index += 1;
352    set_option_string(
353        &mut state.column_spacing,
354        state.column_index,
355        ".5em".to_string(),
356    );
357}
358
359fn repeat(state: &mut ColumnState) -> Result<(), ColumnParseError> {
360    let num = get_braces(state)?;
361    let cols = get_braces(state)?;
362    let parsed = num.parse::<isize>().ok();
363    if parsed.is_none() || parsed.unwrap() < 0 || parsed.unwrap().to_string() != num {
364        return Err(ColumnParseError::ColArgNotNum);
365    }
366    let n = parsed.unwrap() as usize;
367    let rest = state.template[state.cursor..].to_string();
368    state.template = format!("{}{}", cols.repeat(n), rest);
369    state.cursor = 0;
370    Ok(())
371}
372
373fn substitute_args(args: &[String], text: &str) -> Result<String, ColumnParseError> {
374    let mut out = String::new();
375    let chars: Vec<char> = text.chars().collect();
376    let mut cursor = 0usize;
377
378    while cursor < chars.len() {
379        let current_char = chars[cursor];
380        if current_char == '\\' {
381            out.push(current_char);
382            cursor += 1;
383            if cursor < chars.len() {
384                out.push(chars[cursor]);
385                cursor += 1;
386            }
387            continue;
388        }
389        if current_char == '#' {
390            cursor += 1;
391            if cursor >= chars.len() {
392                return Err(ColumnParseError::ColArgNotNum);
393            }
394            let marker = chars[cursor];
395            if marker == '#' {
396                out.push('#');
397                cursor += 1;
398                continue;
399            }
400            if !marker.is_ascii_digit() || marker == '0' {
401                return Err(ColumnParseError::ColArgNotNum);
402            }
403            let idx = (marker as u8 - b'1') as usize;
404            if idx >= args.len() {
405                return Err(ColumnParseError::ColArgNotNum);
406            }
407            out.push_str(&args[idx]);
408            cursor += 1;
409            continue;
410        }
411        out.push(current_char);
412        cursor += 1;
413    }
414
415    Ok(out)
416}
417
418fn set_column_aligns(state: &ColumnState, spec: &mut ColumnSpec) {
419    spec.column_align = state
420        .column_aligns
421        .iter()
422        .map(|a| a.unwrap_or(ColumnAlign::Center))
423        .collect();
424}
425
426fn set_column_widths(state: &ColumnState, spec: &mut ColumnSpec) {
427    if !state.column_widths.iter().any(|w| w.is_some()) {
428        return;
429    }
430    let mut widths = state.column_widths.clone();
431    if widths.len() < state.column_aligns.len() {
432        widths.push(Some("auto".to_string()));
433    }
434    spec.column_width = widths
435        .into_iter()
436        .map(|w| w.unwrap_or_else(|| "auto".to_string()))
437        .collect();
438}
439
440fn set_column_spacing(state: &ColumnState, spec: &mut ColumnSpec) {
441    if !state.column_spacing.iter().any(|s| s.is_some()) {
442        return;
443    }
444    let mut spacing = state.column_spacing.clone();
445    if spacing.len() < state.column_aligns.len() {
446        spacing.push(Some("1em".to_string()));
447    }
448    spec.column_spacing = spacing
449        .into_iter()
450        .skip(1)
451        .map(|s| s.unwrap_or_else(|| "1em".to_string()))
452        .collect();
453}
454
455fn set_column_lines(state: &ColumnState, spec: &mut ColumnSpec) {
456    if !state.column_lines.iter().any(|l| l.is_some()) {
457        return;
458    }
459    let mut lines = state.column_lines.clone();
460    if let Some(Some(style)) = lines.first().copied() {
461        spec.frame.push(FrameLine {
462            side: FrameSide::Left,
463            style,
464        });
465    }
466    if lines.len() > state.column_aligns.len() {
467        if let Some(Some(style)) = lines.pop() {
468            spec.frame.push(FrameLine {
469                side: FrameSide::Right,
470                style,
471            });
472        }
473    } else if lines.len() < state.column_aligns.len() {
474        lines.push(Some(LineStyle::None));
475    }
476    if lines.len() > 1 {
477        spec.column_lines = lines
478            .into_iter()
479            .skip(1)
480            .map(|l| l.unwrap_or(LineStyle::None))
481            .collect();
482    }
483}
484
485fn set_padding(state: &ColumnState, spec: &mut ColumnSpec) {
486    if state.column_aligns.is_empty() {
487        return;
488    }
489    let left_extra = state.column_extras.first().copied().unwrap_or(false);
490    let last_column_index = state.column_aligns.len() - 1;
491    let right_extra = state
492        .column_extras
493        .get(last_column_index)
494        .copied()
495        .unwrap_or(false);
496    if !left_extra && !right_extra {
497        return;
498    }
499
500    let left = get_option(&state.column_spacing, 0).unwrap_or_else(|| ".5em".to_string());
501    let right = if right_extra {
502        get_option(&state.column_spacing, last_column_index).unwrap_or_else(|| ".5em".to_string())
503    } else {
504        ".5em".to_string()
505    };
506    spec.array_padding = Some(ArrayPadding { left, right });
507}
508
509fn set_column_extras(state: &ColumnState, spec: &mut ColumnSpec) {
510    let n = [
511        state.column_aligns.len(),
512        state.column_starts.len(),
513        state.column_ends.len(),
514        state.column_extras.len(),
515        state.row_aligns.len(),
516    ]
517    .into_iter()
518    .max()
519    .unwrap_or(0);
520    spec.column_start = (0..n)
521        .map(|index| get_option(&state.column_starts, index).unwrap_or_default())
522        .collect();
523    spec.column_end = (0..n)
524        .map(|index| get_option(&state.column_ends, index).unwrap_or_default())
525        .collect();
526    spec.column_extra = (0..n)
527        .map(|index| state.column_extras.get(index).copied().unwrap_or(false))
528        .collect();
529    spec.row_align = (0..n)
530        .map(|index| get_option(&state.row_aligns, index))
531        .collect();
532}
533
534fn is_valid_dimension(raw: &str) -> bool {
535    let s = raw.trim();
536    if s.is_empty() {
537        return false;
538    }
539
540    let chars: Vec<char> = s.chars().collect();
541    let mut cursor = 0usize;
542
543    if matches!(chars.get(cursor), Some('+') | Some('-')) {
544        cursor += 1;
545    }
546
547    let mut int_digits = 0usize;
548    while matches!(chars.get(cursor), Some(ch) if ch.is_ascii_digit()) {
549        cursor += 1;
550        int_digits += 1;
551    }
552
553    let mut frac_digits = 0usize;
554    if matches!(chars.get(cursor), Some('.') | Some(',')) {
555        cursor += 1;
556        while matches!(chars.get(cursor), Some(ch) if ch.is_ascii_digit()) {
557            cursor += 1;
558            frac_digits += 1;
559        }
560    }
561
562    if int_digits == 0 && frac_digits == 0 {
563        return false;
564    }
565
566    while matches!(chars.get(cursor), Some(ch) if ch.is_whitespace()) {
567        cursor += 1;
568    }
569
570    let unit_start = cursor;
571    while matches!(chars.get(cursor), Some(ch) if ch.is_ascii_alphabetic()) {
572        cursor += 1;
573    }
574    if unit_start == cursor {
575        return false;
576    }
577    let unit: String = chars[unit_start..cursor].iter().collect();
578    if !is_valid_dimension_unit(&unit) {
579        return false;
580    }
581
582    while matches!(chars.get(cursor), Some(ch) if ch.is_whitespace()) {
583        cursor += 1;
584    }
585
586    cursor == chars.len()
587}
588
589fn set_option<T: Clone>(vec: &mut Vec<Option<T>>, index: usize, value: T) {
590    if vec.len() <= index {
591        vec.resize(index + 1, None);
592    }
593    vec[index] = Some(value);
594}
595
596fn set_option_string(vec: &mut Vec<Option<String>>, index: usize, value: String) {
597    set_option(vec, index, value);
598}
599
600fn set_column_align(state: &mut ColumnState, index: usize, value: ColumnAlign) {
601    set_option(&mut state.column_aligns, index, value);
602}
603
604fn set_column_extra(state: &mut ColumnState, index: usize, value: bool) {
605    if state.column_extras.len() <= index {
606        state.column_extras.resize(index + 1, false);
607    }
608    state.column_extras[index] = value;
609}
610
611fn get_option<T: Clone>(vec: &[Option<T>], index: usize) -> Option<T> {
612    vec.get(index).and_then(|v| v.clone())
613}
614
615fn append_column_start(state: &mut ColumnState, index: usize, value: &str) {
616    let mut cur = get_option(&state.column_starts, index).unwrap_or_default();
617    cur.push_str(value);
618    set_option_string(&mut state.column_starts, index, cur);
619}
620
621fn append_column_end(state: &mut ColumnState, index: usize, value: &str) {
622    let mut cur = get_option(&state.column_ends, index).unwrap_or_default();
623    cur.push_str(value);
624    set_option_string(&mut state.column_ends, index, cur);
625}