carta-core 0.0.3

Shared conversion options, error types, and text/attribute helpers.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
//! Parsing the `$`-delimited template language into a [`Template`] tree.
//!
//! Three passes: a lexer splits the source into literal text and directive tokens (handling `$$`
//! escapes and `$-- …` comments inline); a whitespace pass strips the lines occupied by the
//! directives of a block construct; and a tree builder folds the flat token list into nested
//! `$if$`/`$for$` nodes.
//!
//! ## Comments
//!
//! `$-- …` runs to the end of its line. When the comment begins at the very start of a line (column
//! zero, no preceding character on the line) the line's newline is swallowed with it; otherwise the
//! preceding content and the newline survive.
//!
//! ## Block control directives
//!
//! Whether a `$if$…$endif$` or `$for$…$endfor$` construct is laid out as a block is decided by its
//! opening directive: if `$if$`/`$for$` is the last non-whitespace on its line, the construct is a
//! block. In a block construct, the opening's trailing newline is swallowed, and every other
//! directive of that same construct (`$elseif$`/`$else$`/`$sep$` and the closing `$endif$`/
//! `$endfor$`) that likewise ends its own line has its trailing newline removed. Any indentation
//! *before* the directive survives and prefixes onto the following content, so an indented control
//! line shifts its body rightward. When the opening shares its line with other content the construct
//! is inline: every one of its directives — even a closing one alone on its line — keeps its
//! surrounding whitespace and newline verbatim, so an inline `$for$` whose `$endfor$` sits on its
//! own line emits the blank line that follows it.

use super::TemplateError;
use super::node::{Align, Expr, Node, Pipe, Template};

impl Template {
    /// Parse template source into a tree.
    ///
    /// # Errors
    /// [`TemplateError`] on an unterminated directive, an unmatched `$if$`/`$for$`, a dangling
    /// `$endif$`/`$endfor$`/`$else$`, or an unknown pipe.
    pub fn parse(source: &str) -> Result<Template, TemplateError> {
        let mut tokens = lex(source)?;
        trim_standalone(&mut tokens);
        let mut builder = Builder {
            tokens: &tokens,
            pos: 0,
        };
        let nodes = builder.sequence()?;
        if builder.pos != tokens.len() {
            return Err(TemplateError::new("unexpected control directive"));
        }
        Ok(Template { nodes })
    }
}

/// A lexer token: literal text, or a single directive.
#[derive(Debug, Clone)]
enum Token {
    Text(String),
    Var(Expr),
    Partial {
        name: String,
        map_over: Option<Expr>,
        sep: Option<String>,
    },
    If(Expr),
    ElseIf(Expr),
    Else,
    EndIf,
    For(Expr),
    Sep,
    EndFor,
}

/// Horizontal whitespace for the standalone-line and comment rules (a newline is never "blank").
fn is_blank(c: char) -> bool {
    c == ' ' || c == '\t' || c == '\r'
}

fn lex(source: &str) -> Result<Vec<Token>, TemplateError> {
    let chars: Vec<char> = source.chars().collect();
    let mut tokens: Vec<Token> = Vec::new();
    let mut text = String::new();
    let mut i = 0;
    // True at the start of a line before any character or directive on it — used to decide whether a
    // `$-- …` comment swallows its newline.
    let mut col_clean = true;

    while let Some(&c) = chars.get(i) {
        if c != '$' {
            text.push(c);
            col_clean = c == '\n';
            i += 1;
            continue;
        }

        match chars.get(i + 1) {
            Some('$') => {
                text.push('$');
                col_clean = false;
                i += 2;
            }
            Some('-') if chars.get(i + 2) == Some(&'-') => {
                let mut j = i + 3;
                while let Some(&d) = chars.get(j) {
                    if d == '\n' {
                        break;
                    }
                    j += 1;
                }
                if col_clean {
                    // Column-zero comment: drop the trailing newline too, keeping the line clean.
                    if chars.get(j) == Some(&'\n') {
                        j += 1;
                    }
                } // otherwise the newline (if any) is read as ordinary text next.
                i = j;
            }
            _ => {
                if !text.is_empty() {
                    tokens.push(Token::Text(std::mem::take(&mut text)));
                }
                let (token, next) = directive(&chars, i + 1)?;
                tokens.push(token);
                col_clean = false;
                i = next;
            }
        }
    }
    if !text.is_empty() {
        tokens.push(Token::Text(text));
    }
    Ok(tokens)
}

/// Parse one `$…$` directive whose interior begins at `start`. Returns the token and the index just
/// past the closing `$`.
fn directive(chars: &[char], start: usize) -> Result<(Token, usize), TemplateError> {
    let close = close_index(chars, start)
        .ok_or_else(|| TemplateError::new("unterminated directive (missing closing `$`)"))?;
    let interior: String = chars.get(start..close).unwrap_or_default().iter().collect();
    Ok((interior_token(&interior)?, close + 1))
}

/// Index of the `$` that closes a directive opened at `start`, skipping `$` that fall inside `[…]`
/// separator literals or `"…"` pipe arguments. A newline before the close means the directive is
/// unterminated.
fn close_index(chars: &[char], start: usize) -> Option<usize> {
    let mut i = start;
    let mut in_bracket = false;
    let mut in_quote = false;
    while let Some(&c) = chars.get(i) {
        match c {
            '\n' => return None,
            '"' if !in_bracket => in_quote = !in_quote,
            '[' if !in_quote => in_bracket = true,
            ']' if !in_quote => in_bracket = false,
            '$' if !in_quote && !in_bracket => return Some(i),
            _ => {}
        }
        i += 1;
    }
    None
}

/// Classify a directive's interior text into a token.
fn interior_token(interior: &str) -> Result<Token, TemplateError> {
    let trimmed = interior.trim();
    match trimmed {
        "else" => return Ok(Token::Else),
        "endif" => return Ok(Token::EndIf),
        "sep" => return Ok(Token::Sep),
        "endfor" => return Ok(Token::EndFor),
        _ => {}
    }
    if let Some(arg) = keyword_arg(trimmed, "if") {
        return Ok(Token::If(parse_expr(arg)?));
    }
    if let Some(arg) = keyword_arg(trimmed, "elseif") {
        return Ok(Token::ElseIf(parse_expr(arg)?));
    }
    if let Some(arg) = keyword_arg(trimmed, "for") {
        return Ok(Token::For(parse_expr(arg)?));
    }
    value_token(trimmed)
}

/// If `text` is `keyword(<arg>)`, return the trimmed `<arg>`.
fn keyword_arg<'a>(text: &'a str, keyword: &str) -> Option<&'a str> {
    let rest = text.strip_prefix(keyword)?.trim_start();
    let inner = rest.strip_prefix('(')?.strip_suffix(')')?;
    Some(inner.trim())
}

/// Parse a non-keyword directive: a mapped partial, a plain partial, or a variable interpolation.
fn value_token(text: &str) -> Result<Token, TemplateError> {
    if let Some((target, rest)) = text.split_once(':') {
        let (name, sep) = partial_parts(rest)?;
        return Ok(Token::Partial {
            name,
            map_over: Some(parse_expr(target.trim())?),
            sep,
        });
    }
    if text.contains("()") {
        let (name, sep) = partial_parts(text)?;
        return Ok(Token::Partial {
            name,
            map_over: None,
            sep,
        });
    }
    Ok(Token::Var(parse_expr(text)?))
}

/// Split a partial reference `name()` or `name()[sep]` into its name and optional separator.
fn partial_parts(text: &str) -> Result<(String, Option<String>), TemplateError> {
    let (name, after) = text
        .trim()
        .split_once("()")
        .ok_or_else(|| TemplateError::new("malformed partial (expected `name()`)"))?;
    let after = after.trim();
    let sep = if after.is_empty() {
        None
    } else {
        Some(
            after
                .strip_prefix('[')
                .and_then(|s| s.strip_suffix(']'))
                .ok_or_else(|| TemplateError::new("malformed partial separator (expected `[…]`)"))?
                .to_string(),
        )
    };
    Ok((name.trim().to_string(), sep))
}

/// Parse a variable expression: a dotted path followed by `/pipe` filters.
fn parse_expr(text: &str) -> Result<Expr, TemplateError> {
    let mut parts = text.split('/');
    let head = parts.next().unwrap_or("").trim();
    let path: Vec<String> = head
        .split('.')
        .map(|s| s.trim().to_string())
        .filter(|s| !s.is_empty())
        .collect();
    let mut pipes = Vec::new();
    for part in parts {
        pipes.push(parse_pipe(part.trim())?);
    }
    Ok(Expr { path, pipes })
}

fn parse_pipe(text: &str) -> Result<Pipe, TemplateError> {
    let args = pipe_args(text);
    let name = args.first().map_or("", String::as_str);
    let pipe = match name {
        "uppercase" => Pipe::Uppercase,
        "lowercase" => Pipe::Lowercase,
        "length" => Pipe::Length,
        "reverse" => Pipe::Reverse,
        "first" => Pipe::First,
        "last" => Pipe::Last,
        "rest" => Pipe::Rest,
        "allbutlast" => Pipe::AllButLast,
        "pairs" => Pipe::Pairs,
        "alpha" => Pipe::Alpha,
        "roman" => Pipe::Roman,
        "chomp" => Pipe::Chomp,
        "nowrap" => Pipe::Nowrap,
        "left" | "right" | "center" => {
            let align = match name {
                "right" => Align::Right,
                "center" => Align::Center,
                _ => Align::Left,
            };
            let width = args
                .get(1)
                .and_then(|w| w.parse::<usize>().ok())
                .ok_or_else(|| TemplateError::new("block pipe requires a width"))?;
            Pipe::Block {
                align,
                width,
                left: args.get(2).cloned().unwrap_or_default(),
                right: args.get(3).cloned().unwrap_or_default(),
            }
        }
        other => return Err(TemplateError::new(format!("unknown pipe: {other}"))),
    };
    Ok(pipe)
}

/// Tokenize a pipe's whitespace-separated arguments, honoring `"…"` so a border may contain spaces.
fn pipe_args(text: &str) -> Vec<String> {
    let mut out = Vec::new();
    let mut chars = text.chars().peekable();
    while let Some(&c) = chars.peek() {
        if c.is_whitespace() {
            chars.next();
            continue;
        }
        let mut buf = String::new();
        if c == '"' {
            chars.next();
            for d in chars.by_ref() {
                if d == '"' {
                    break;
                }
                buf.push(d);
            }
        } else {
            while let Some(&d) = chars.peek() {
                if d.is_whitespace() {
                    break;
                }
                buf.push(d);
                chars.next();
            }
        }
        out.push(buf);
    }
    out
}

/// Strip the trailing newline on the line occupied by each block construct's directives.
///
/// A `$if$`/`$for$` whose opening ends its line opens a block: its trailing newline is swallowed,
/// and each later directive of that construct (`$elseif$`/`$else$`/`$sep$`, the closing) that ends
/// its own line is likewise dropped. The block flag rides a nesting stack, so each construct's
/// interior directives consult the construct they belong to, not whichever directive came last.
///
/// Any indentation *preceding* the directive on its line is left in place: it then sits directly
/// in front of the following content, so an indented control line shifts its body rightward.
///
/// A plain `$name()$` partial is treated as standalone under a stricter rule: when only blanks
/// precede it back to the previous newline and a newline immediately follows it, that single
/// newline is absorbed (trailing blanks before the newline, or any non-blank, leave it in place).
///
/// All decisions are taken over the original token text in a first pass, then applied — so trimming
/// one directive's line never perturbs the line analysis of its neighbours.
fn trim_standalone(tokens: &mut [Token]) {
    // First pass over the original tokens. `drop_newline` swallows a block construct directive's
    // whole trailing line; `absorb_partial` swallows just the newline glued to a standalone partial.
    // Both are computed before any text is trimmed so neighbours never perturb one another.
    let mut blocks: Vec<bool> = Vec::new();
    let drop_newline: Vec<bool> = tokens
        .iter()
        .enumerate()
        .map(|(i, token)| match token {
            Token::If(_) | Token::For(_) => {
                let block = forward_blank(tokens, i);
                blocks.push(block);
                block
            }
            Token::ElseIf(_) | Token::Else | Token::Sep => {
                blocks.last().copied().unwrap_or(false) && forward_blank(tokens, i)
            }
            Token::EndIf | Token::EndFor => {
                blocks.pop().unwrap_or(false) && forward_blank(tokens, i)
            }
            _ => false,
        })
        .collect();
    let absorb_partial: Vec<bool> = tokens
        .iter()
        .enumerate()
        .map(|(i, token)| {
            matches!(token, Token::Partial { map_over: None, .. })
                && blank_before(tokens, i)
                && matches!(tokens.get(i + 1), Some(Token::Text(t)) if t.starts_with('\n'))
        })
        .collect();
    // Second pass applies the decisions. The two sets never target the same token: each acts on the
    // text that follows its own directive, and a token is a control directive or a partial, not both.
    for (i, (&drop_nl, &absorb)) in drop_newline.iter().zip(&absorb_partial).enumerate() {
        if drop_nl {
            if let Some(Token::Text(t)) = tokens.get_mut(i + 1) {
                trim_leading_line(t);
            }
        } else if absorb
            && let Some(Token::Text(t)) = tokens.get_mut(i + 1)
            && let Some(rest) = t.strip_prefix('\n')
        {
            *t = rest.to_string();
        }
    }
}

/// Whether everything before token `i` back to the previous newline is whitespace.
fn blank_before(tokens: &[Token], i: usize) -> bool {
    match i.checked_sub(1) {
        None => true,
        Some(prev) => match tokens.get(prev) {
            Some(Token::Text(t)) => match t.rfind('\n') {
                Some(k) => t.get(k + 1..).unwrap_or("").chars().all(is_blank),
                None => prev == 0 && t.chars().all(is_blank),
            },
            _ => false,
        },
    }
}

/// Whether everything after token `i` through the next newline is whitespace.
fn forward_blank(tokens: &[Token], i: usize) -> bool {
    match tokens.get(i + 1) {
        None => true,
        Some(Token::Text(t)) => match t.find('\n') {
            Some(k) => t.get(..k).unwrap_or("").chars().all(is_blank),
            None => i + 1 == tokens.len() - 1 && t.chars().all(is_blank),
        },
        _ => false,
    }
}

/// Drop the leading blanks and the first newline (or the whole string if it has none).
fn trim_leading_line(text: &mut String) {
    match text.find('\n') {
        Some(k) => *text = text.get(k + 1..).unwrap_or("").to_string(),
        None => text.clear(),
    }
}

/// Folds the flat token list into a node tree, matching `$if$`/`$for$` with their terminators.
struct Builder<'a> {
    tokens: &'a [Token],
    pos: usize,
}

impl Builder<'_> {
    fn peek(&self) -> Option<Token> {
        self.tokens.get(self.pos).cloned()
    }

    /// Parse nodes until a terminator (`elseif`/`else`/`endif`/`sep`/`endfor`) or end of input; the
    /// terminator is left unconsumed for the caller.
    fn sequence(&mut self) -> Result<Vec<Node>, TemplateError> {
        let mut nodes = Vec::new();
        while let Some(token) = self.peek() {
            match token {
                Token::Text(s) => {
                    self.pos += 1;
                    nodes.push(Node::Literal(s));
                }
                Token::Var(expr) => {
                    self.pos += 1;
                    nodes.push(Node::Var(expr));
                }
                Token::Partial {
                    name,
                    map_over,
                    sep,
                } => {
                    self.pos += 1;
                    nodes.push(Node::Partial {
                        name,
                        map_over,
                        sep,
                    });
                }
                Token::If(_) => nodes.push(self.conditional()?),
                Token::For(_) => nodes.push(self.loop_node()?),
                Token::ElseIf(_) | Token::Else | Token::EndIf | Token::Sep | Token::EndFor => break,
            }
        }
        Ok(nodes)
    }

    fn conditional(&mut self) -> Result<Node, TemplateError> {
        let Some(Token::If(cond)) = self.peek() else {
            return Err(TemplateError::new("expected `if`"));
        };
        self.pos += 1;
        let mut branches = vec![(cond, self.sequence()?)];
        loop {
            match self.peek() {
                Some(Token::ElseIf(cond)) => {
                    self.pos += 1;
                    branches.push((cond, self.sequence()?));
                }
                Some(Token::Else) => {
                    self.pos += 1;
                    let otherwise = self.sequence()?;
                    self.expect(&Token::EndIf, "endif")?;
                    return Ok(Node::If {
                        branches,
                        otherwise,
                    });
                }
                Some(Token::EndIf) => {
                    self.pos += 1;
                    return Ok(Node::If {
                        branches,
                        otherwise: Vec::new(),
                    });
                }
                _ => return Err(TemplateError::new("unterminated `if` (missing `endif`)")),
            }
        }
    }

    fn loop_node(&mut self) -> Result<Node, TemplateError> {
        let Some(Token::For(expr)) = self.peek() else {
            return Err(TemplateError::new("expected `for`"));
        };
        self.pos += 1;
        // A single-segment loop expression also binds that name to the current element (so
        // `$for(xs)$…$xs$` works, as does `$for(m/pairs)$…$m.key$`); a pipe on the segment does not
        // change the name. `$it$` always refers to the element regardless.
        let bind = match expr.path.as_slice() {
            [only] => Some(only.clone()),
            _ => None,
        };
        let body = self.sequence()?;
        let mut sep = Vec::new();
        match self.peek() {
            Some(Token::Sep) => {
                self.pos += 1;
                sep = self.sequence()?;
                self.expect(&Token::EndFor, "endfor")?;
            }
            Some(Token::EndFor) => {
                self.pos += 1;
            }
            _ => return Err(TemplateError::new("unterminated `for` (missing `endfor`)")),
        }
        Ok(Node::For {
            expr,
            bind,
            body,
            sep,
        })
    }

    fn expect(&mut self, want: &Token, label: &str) -> Result<(), TemplateError> {
        match self.peek() {
            Some(ref got) if std::mem::discriminant(got) == std::mem::discriminant(want) => {
                self.pos += 1;
                Ok(())
            }
            _ => Err(TemplateError::new(format!("expected `{label}`"))),
        }
    }
}