Skip to main content

doing_template/
parser.rs

1use std::sync::LazyLock;
2
3use regex::Regex;
4
5use crate::colors;
6
7static COLOR_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"%((?:[fb]g?)?#[a-fA-F0-9]{6}|[a-zA-Z_]+)").unwrap());
8static PLACEHOLDER_RE: LazyLock<Regex> = LazyLock::new(|| {
9  Regex::new(concat!(
10    r"%(?P<width>-?\d+)?",
11    r"(?:\^(?P<marker>.))?",
12    r"(?:(?P<ichar>[ _t]|[^a-zA-Z0-9\s])(?P<icount>\d+))?",
13    r"(?P<prefix>.[ _t]?)?",
14    r"(?P<kind>shortdate|date|title|section|odnote|idnote|chompnote|note",
15    r"|interval|duration|tags|hr_under|hr|n|t)\b",
16  ))
17  .unwrap()
18});
19
20const ESCAPE_SENTINEL: &str = "\u{E000}";
21
22/// Indentation specification for wrapped/continuation lines.
23#[derive(Clone, Debug, Eq, PartialEq)]
24pub struct Indent {
25  pub count: u32,
26  pub kind: IndentChar,
27}
28
29/// The type of indent character used for wrapped/continuation lines.
30#[derive(Clone, Copy, Debug, Eq, PartialEq)]
31pub enum IndentChar {
32  Custom(char),
33  Space,
34  Tab,
35}
36
37/// A parsed template element — either literal text, a color token, or a placeholder token.
38#[derive(Clone, Debug, Eq, PartialEq)]
39pub enum Token {
40  Color(colors::Color),
41  Literal(String),
42  Placeholder {
43    indent: Option<Indent>,
44    kind: TokenKind,
45    marker: Option<char>,
46    prefix: Option<String>,
47    width: Option<i32>,
48  },
49}
50
51/// The type of placeholder token in a template string.
52#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
53pub enum TokenKind {
54  Chompnote,
55  Date,
56  Duration,
57  Hr,
58  HrUnder,
59  Idnote,
60  Interval,
61  Newline,
62  Note,
63  Odnote,
64  Section,
65  Shortdate,
66  Tab,
67  Tags,
68  Title,
69}
70
71enum TokenMatch<'a> {
72  Color {
73    color: colors::Color,
74    end: usize,
75    start: usize,
76  },
77  Placeholder {
78    caps: regex::Captures<'a>,
79    end: usize,
80    start: usize,
81  },
82}
83
84impl TokenMatch<'_> {
85  fn span(&self) -> (usize, usize) {
86    match self {
87      Self::Color {
88        end,
89        start,
90        ..
91      } => (*start, *end),
92      Self::Placeholder {
93        end,
94        start,
95        ..
96      } => (*start, *end),
97    }
98  }
99}
100
101/// Parse a template string into a sequence of tokens.
102///
103/// Template strings contain literal text interspersed with `%` placeholder tokens.
104/// Recognized tokens include `%date`, `%title`, `%note`, etc., with optional
105/// width, alignment, marker, indent, and prefix modifiers. Color tokens like
106/// `%cyan`, `%boldwhite`, `%reset`, and `%#FF5500` are also recognized.
107///
108/// Escaped percent signs (`\%`) become literal `%` in the output. Unrecognized
109/// `%` sequences are preserved as literal text.
110pub fn parse(template: &str) -> Vec<Token> {
111  let escaped = template.replace("\\%", ESCAPE_SENTINEL);
112
113  // Build a combined list of all matches sorted by position
114  let mut matches: Vec<TokenMatch> = Vec::new();
115
116  for caps in PLACEHOLDER_RE.captures_iter(&escaped) {
117    let m = caps.get(0).unwrap();
118    matches.push(TokenMatch::Placeholder {
119      caps,
120      end: m.end(),
121      start: m.start(),
122    });
123  }
124
125  for caps in COLOR_RE.captures_iter(&escaped) {
126    let m = caps.get(0).unwrap();
127    let color_str = caps.get(1).unwrap().as_str();
128    if let Some((valid, orig_len)) = colors::validate_color(color_str) {
129      // Only add if not overlapping with a placeholder match
130      let start = m.start();
131      let end = start + 1 + orig_len; // +1 for the % prefix
132      let overlaps = matches.iter().any(|tm| {
133        let (ts, te) = tm.span();
134        start < te && end > ts
135      });
136      if !overlaps && let Some(color) = colors::Color::parse(&valid) {
137        matches.push(TokenMatch::Color {
138          color,
139          end,
140          start,
141        });
142      }
143    }
144  }
145
146  matches.sort_by_key(|m| m.span().0);
147
148  let mut tokens = Vec::new();
149  let mut last_end = 0;
150
151  for tm in &matches {
152    let (start, end) = tm.span();
153
154    if start > last_end {
155      tokens.push(Token::Literal(unescape(&escaped[last_end..start])));
156    }
157
158    match tm {
159      TokenMatch::Color {
160        color, ..
161      } => {
162        tokens.push(Token::Color(color.clone()));
163      }
164      TokenMatch::Placeholder {
165        caps, ..
166      } => {
167        let width = caps
168          .name("width")
169          .map(|m| m.as_str().parse::<i32>().unwrap_or(i32::MAX));
170        let marker = caps.name("marker").and_then(|m| m.as_str().chars().next());
171
172        let indent = caps.name("ichar").and_then(|ic| {
173          caps.name("icount").map(|cnt| {
174            let count = cnt.as_str().parse::<u32>().unwrap_or(u32::MAX);
175            let kind = match ic.as_str().chars().next().unwrap() {
176              ' ' | '_' => IndentChar::Space,
177              't' => IndentChar::Tab,
178              c => IndentChar::Custom(c),
179            };
180            Indent {
181              count,
182              kind,
183            }
184          })
185        });
186
187        let prefix = caps.name("prefix").map(|m| m.as_str().to_string());
188
189        let kind = match caps.name("kind").unwrap().as_str() {
190          "chompnote" => TokenKind::Chompnote,
191          "date" => TokenKind::Date,
192          "duration" => TokenKind::Duration,
193          "hr" => TokenKind::Hr,
194          "hr_under" => TokenKind::HrUnder,
195          "idnote" => TokenKind::Idnote,
196          "interval" => TokenKind::Interval,
197          "n" => TokenKind::Newline,
198          "note" => TokenKind::Note,
199          "odnote" => TokenKind::Odnote,
200          "section" => TokenKind::Section,
201          "shortdate" => TokenKind::Shortdate,
202          "t" => TokenKind::Tab,
203          "tags" => TokenKind::Tags,
204          "title" => TokenKind::Title,
205          _ => unreachable!(),
206        };
207
208        tokens.push(Token::Placeholder {
209          indent,
210          kind,
211          marker,
212          prefix,
213          width,
214        });
215      }
216    }
217
218    last_end = end;
219  }
220
221  if last_end < escaped.len() {
222    tokens.push(Token::Literal(unescape(&escaped[last_end..])));
223  }
224
225  tokens
226}
227
228fn unescape(s: &str) -> String {
229  s.replace(ESCAPE_SENTINEL, "%")
230}
231
232#[cfg(test)]
233mod test {
234  use super::*;
235
236  fn placeholder(kind: TokenKind) -> Token {
237    Token::Placeholder {
238      indent: None,
239      kind,
240      marker: None,
241      prefix: None,
242      width: None,
243    }
244  }
245
246  mod parse {
247    use pretty_assertions::assert_eq;
248
249    use super::*;
250
251    #[test]
252    fn it_handles_escaped_percent() {
253      let tokens = parse("\\%date is literal");
254
255      assert_eq!(tokens, vec![Token::Literal("%date is literal".into())]);
256    }
257
258    #[test]
259    fn it_parses_all_token_kinds() {
260      for (input, expected) in [
261        ("%chompnote", TokenKind::Chompnote),
262        ("%date", TokenKind::Date),
263        ("%duration", TokenKind::Duration),
264        ("%hr", TokenKind::Hr),
265        ("%hr_under", TokenKind::HrUnder),
266        ("%idnote", TokenKind::Idnote),
267        ("%interval", TokenKind::Interval),
268        ("%n", TokenKind::Newline),
269        ("%note", TokenKind::Note),
270        ("%odnote", TokenKind::Odnote),
271        ("%section", TokenKind::Section),
272        ("%shortdate", TokenKind::Shortdate),
273        ("%t", TokenKind::Tab),
274        ("%tags", TokenKind::Tags),
275        ("%title", TokenKind::Title),
276      ] {
277        let tokens = parse(input);
278
279        assert_eq!(tokens.len(), 1, "expected one token for {input}");
280        match &tokens[0] {
281          Token::Placeholder {
282            kind, ..
283          } => {
284            assert_eq!(*kind, expected, "wrong kind for {input}")
285          }
286          _ => panic!("expected placeholder for {input}"),
287        }
288      }
289    }
290
291    #[test]
292    fn it_parses_color_tokens() {
293      let tokens = parse("%cyan%date%reset");
294
295      assert_eq!(
296        tokens,
297        vec![
298          Token::Color(colors::Color::Named(colors::NamedColor::Cyan)),
299          placeholder(TokenKind::Date),
300          Token::Color(colors::Color::Named(colors::NamedColor::Reset)),
301        ]
302      );
303    }
304
305    #[test]
306    fn it_parses_color_with_underscores() {
307      let tokens = parse("%bold_white%title");
308
309      assert_eq!(
310        tokens,
311        vec![
312          Token::Color(colors::Color::Named(colors::NamedColor::BoldWhite)),
313          placeholder(TokenKind::Title),
314        ]
315      );
316    }
317
318    #[test]
319    fn it_parses_combined_width_indent_and_prefix() {
320      let tokens = parse("%80_14\u{2503} note");
321
322      assert_eq!(
323        tokens,
324        vec![Token::Placeholder {
325          indent: Some(Indent {
326            count: 14,
327            kind: IndentChar::Space,
328          }),
329          kind: TokenKind::Note,
330          marker: None,
331          prefix: Some("\u{2503} ".into()),
332          width: Some(80),
333        }]
334      );
335    }
336
337    #[test]
338    fn it_parses_empty_string() {
339      let tokens = parse("");
340
341      assert_eq!(tokens, vec![]);
342    }
343
344    #[test]
345    fn it_parses_full_note_modifiers() {
346      let tokens = parse("%^> 8: note");
347
348      assert_eq!(
349        tokens,
350        vec![Token::Placeholder {
351          indent: Some(Indent {
352            count: 8,
353            kind: IndentChar::Space,
354          }),
355          kind: TokenKind::Note,
356          marker: Some('>'),
357          prefix: Some(": ".into()),
358          width: None,
359        }]
360      );
361    }
362
363    #[test]
364    fn it_parses_hex_color_tokens() {
365      let tokens = parse("%#FF5500hello");
366
367      assert_eq!(
368        tokens,
369        vec![
370          Token::Color(colors::Color::Hex {
371            background: false,
372            b: 0x00,
373            g: 0x55,
374            r: 0xFF,
375          }),
376          Token::Literal("hello".into()),
377        ]
378      );
379    }
380
381    #[test]
382    fn it_parses_literal_text() {
383      let tokens = parse("hello world");
384
385      assert_eq!(tokens, vec![Token::Literal("hello world".into())]);
386    }
387
388    #[test]
389    fn it_parses_marker_modifier() {
390      let tokens = parse("%^>note");
391
392      assert_eq!(
393        tokens,
394        vec![Token::Placeholder {
395          indent: None,
396          kind: TokenKind::Note,
397          marker: Some('>'),
398          prefix: None,
399          width: None,
400        }]
401      );
402    }
403
404    #[test]
405    fn it_parses_mixed_literals_and_placeholders() {
406      let tokens = parse("hello %title world");
407
408      assert_eq!(
409        tokens,
410        vec![
411          Token::Literal("hello ".into()),
412          placeholder(TokenKind::Title),
413          Token::Literal(" world".into()),
414        ]
415      );
416    }
417
418    #[test]
419    fn it_handles_overflow_width_gracefully() {
420      let tokens = parse("%999999999999title");
421
422      assert_eq!(
423        tokens,
424        vec![Token::Placeholder {
425          indent: None,
426          kind: TokenKind::Title,
427          marker: None,
428          prefix: None,
429          width: Some(i32::MAX),
430        }]
431      );
432    }
433
434    #[test]
435    fn it_parses_negative_width_modifier() {
436      let tokens = parse("%-10section");
437
438      assert_eq!(
439        tokens,
440        vec![Token::Placeholder {
441          indent: None,
442          kind: TokenKind::Section,
443          marker: None,
444          prefix: None,
445          width: Some(-10),
446        }]
447      );
448    }
449
450    #[test]
451    fn it_parses_positive_width_modifier() {
452      let tokens = parse("%80title");
453
454      assert_eq!(
455        tokens,
456        vec![Token::Placeholder {
457          indent: None,
458          kind: TokenKind::Title,
459          marker: None,
460          prefix: None,
461          width: Some(80),
462        }]
463      );
464    }
465
466    #[test]
467    fn it_parses_prefix_modifier() {
468      let tokens = parse("%: note");
469
470      assert_eq!(
471        tokens,
472        vec![Token::Placeholder {
473          indent: None,
474          kind: TokenKind::Note,
475          marker: None,
476          prefix: Some(": ".into()),
477          width: None,
478        }]
479      );
480    }
481
482    #[test]
483    fn it_parses_prefix_with_separator() {
484      let tokens = parse("%80\u{2551} title");
485
486      assert_eq!(
487        tokens,
488        vec![Token::Placeholder {
489          indent: None,
490          kind: TokenKind::Title,
491          marker: None,
492          prefix: Some("\u{2551} ".into()),
493          width: Some(80),
494        }]
495      );
496    }
497
498    #[test]
499    fn it_parses_space_indent_modifier() {
500      let tokens = parse("% 4note");
501
502      assert_eq!(
503        tokens,
504        vec![Token::Placeholder {
505          indent: Some(Indent {
506            count: 4,
507            kind: IndentChar::Space,
508          }),
509          kind: TokenKind::Note,
510          marker: None,
511          prefix: None,
512          width: None,
513        }]
514      );
515    }
516
517    #[test]
518    fn it_parses_tab_indent_modifier() {
519      let tokens = parse("%t2note");
520
521      assert_eq!(
522        tokens,
523        vec![Token::Placeholder {
524          indent: Some(Indent {
525            count: 2,
526            kind: IndentChar::Tab,
527          }),
528          kind: TokenKind::Note,
529          marker: None,
530          prefix: None,
531          width: None,
532        }]
533      );
534    }
535
536    #[test]
537    fn it_handles_control_characters_in_input() {
538      // Entries containing old sentinel characters (\x01, \x02) should
539      // render correctly without corruption now that we use PUA codepoints.
540      let tokens = parse("hello \x01 and \x02 world");
541
542      assert_eq!(tokens, vec![Token::Literal("hello \x01 and \x02 world".into())]);
543    }
544
545    #[test]
546    fn it_parses_underscore_indent_modifier() {
547      let tokens = parse("%_14note");
548
549      assert_eq!(
550        tokens,
551        vec![Token::Placeholder {
552          indent: Some(Indent {
553            count: 14,
554            kind: IndentChar::Space,
555          }),
556          kind: TokenKind::Note,
557          marker: None,
558          prefix: None,
559          width: None,
560        }]
561      );
562    }
563
564    #[test]
565    fn it_preserves_unknown_percent_sequences() {
566      let tokens = parse("%xyz%date");
567
568      assert_eq!(
569        tokens,
570        vec![Token::Literal("%xyz".into()), placeholder(TokenKind::Date),]
571      );
572    }
573  }
574}