Skip to main content

doing_template/
parser.rs

1use std::sync::LazyLock;
2
3use regex::Regex;
4
5use crate::colors;
6
7static COLOR_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"%((?:[fb]g?)?#[a-fA-F0-9]{6}|[a-zA-Z_]+)").unwrap());
8static PLACEHOLDER_RE: LazyLock<Regex> = LazyLock::new(|| {
9  Regex::new(concat!(
10    r"%(?P<width>-?\d+)?",
11    r"(?:\^(?P<marker>.))?",
12    r"(?:(?P<ichar>[ _t]|[^a-zA-Z0-9\s])(?P<icount>\d+))?",
13    r"(?P<prefix>.[ _t]?)?",
14    r"(?P<kind>shortdate|date|title|section|odnote|idnote|chompnote|note",
15    r"|interval|duration|tags|hr_under|hr|n|t)\b",
16  ))
17  .unwrap()
18});
19
20const ESCAPE_SENTINEL: &str = "\u{E000}";
21
22/// Indentation specification for wrapped/continuation lines.
23#[derive(Clone, Debug, Eq, PartialEq)]
24pub struct Indent {
25  pub count: u32,
26  pub kind: IndentChar,
27}
28
29/// The type of indent character used for wrapped/continuation lines.
30#[derive(Clone, Copy, Debug, Eq, PartialEq)]
31pub enum IndentChar {
32  Custom(char),
33  Space,
34  Tab,
35}
36
37/// A parsed template element — either literal text, a color token, or a placeholder token.
38#[derive(Clone, Debug, Eq, PartialEq)]
39pub enum Token {
40  Color(colors::Color),
41  Literal(String),
42  Placeholder {
43    indent: Option<Indent>,
44    kind: TokenKind,
45    marker: Option<char>,
46    prefix: Option<String>,
47    width: Option<i32>,
48  },
49}
50
51/// The type of placeholder token in a template string.
52#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
53pub enum TokenKind {
54  Chompnote,
55  Date,
56  Duration,
57  Hr,
58  HrUnder,
59  Idnote,
60  Interval,
61  Newline,
62  Note,
63  Odnote,
64  Section,
65  Shortdate,
66  Tab,
67  Tags,
68  Title,
69}
70
71enum TokenMatch<'a> {
72  Color {
73    color: colors::Color,
74    end: usize,
75    start: usize,
76  },
77  Placeholder {
78    caps: regex::Captures<'a>,
79    end: usize,
80    start: usize,
81  },
82}
83
84impl TokenMatch<'_> {
85  fn span(&self) -> (usize, usize) {
86    match self {
87      Self::Color {
88        end,
89        start,
90        ..
91      } => (*start, *end),
92      Self::Placeholder {
93        end,
94        start,
95        ..
96      } => (*start, *end),
97    }
98  }
99}
100
101/// Parse a template string into a sequence of tokens.
102///
103/// Template strings contain literal text interspersed with `%` placeholder tokens.
104/// Recognized tokens include `%date`, `%title`, `%note`, etc., with optional
105/// width, alignment, marker, indent, and prefix modifiers. Color tokens like
106/// `%cyan`, `%boldwhite`, `%reset`, and `%#FF5500` are also recognized.
107///
108/// Escaped percent signs (`\%`) become literal `%` in the output. Unrecognized
109/// `%` sequences are preserved as literal text.
110pub fn parse(template: &str) -> Vec<Token> {
111  let escaped = template.replace("\\%", ESCAPE_SENTINEL);
112
113  // Build a combined list of all matches sorted by position
114  let mut matches: Vec<TokenMatch> = Vec::new();
115
116  for caps in PLACEHOLDER_RE.captures_iter(&escaped) {
117    let m = caps.get(0).unwrap();
118    matches.push(TokenMatch::Placeholder {
119      caps,
120      end: m.end(),
121      start: m.start(),
122    });
123  }
124
125  for caps in COLOR_RE.captures_iter(&escaped) {
126    let m = caps.get(0).unwrap();
127    let color_str = caps.get(1).unwrap().as_str();
128    if let Some(valid) = colors::validate_color(color_str) {
129      // Only add if not overlapping with a placeholder match
130      let start = m.start();
131      let end = start + 1 + valid.len(); // +1 for the % prefix
132      let overlaps = matches.iter().any(|tm| {
133        let (ts, te) = tm.span();
134        start < te && end > ts
135      });
136      if !overlaps && let Some(color) = colors::Color::parse(&valid) {
137        matches.push(TokenMatch::Color {
138          color,
139          end,
140          start,
141        });
142      }
143    }
144  }
145
146  matches.sort_by_key(|m| m.span().0);
147
148  let mut tokens = Vec::new();
149  let mut last_end = 0;
150
151  for tm in &matches {
152    let (start, end) = tm.span();
153
154    if start > last_end {
155      tokens.push(Token::Literal(unescape(&escaped[last_end..start])));
156    }
157
158    match tm {
159      TokenMatch::Color {
160        color, ..
161      } => {
162        tokens.push(Token::Color(color.clone()));
163      }
164      TokenMatch::Placeholder {
165        caps, ..
166      } => {
167        let width = caps.name("width").map(|m| m.as_str().parse::<i32>().unwrap());
168        let marker = caps.name("marker").and_then(|m| m.as_str().chars().next());
169
170        let indent = caps.name("ichar").and_then(|ic| {
171          caps.name("icount").map(|cnt| {
172            let count = cnt.as_str().parse::<u32>().unwrap();
173            let kind = match ic.as_str().chars().next().unwrap() {
174              ' ' | '_' => IndentChar::Space,
175              't' => IndentChar::Tab,
176              c => IndentChar::Custom(c),
177            };
178            Indent {
179              count,
180              kind,
181            }
182          })
183        });
184
185        let prefix = caps.name("prefix").map(|m| m.as_str().to_string());
186
187        let kind = match caps.name("kind").unwrap().as_str() {
188          "chompnote" => TokenKind::Chompnote,
189          "date" => TokenKind::Date,
190          "duration" => TokenKind::Duration,
191          "hr" => TokenKind::Hr,
192          "hr_under" => TokenKind::HrUnder,
193          "idnote" => TokenKind::Idnote,
194          "interval" => TokenKind::Interval,
195          "n" => TokenKind::Newline,
196          "note" => TokenKind::Note,
197          "odnote" => TokenKind::Odnote,
198          "section" => TokenKind::Section,
199          "shortdate" => TokenKind::Shortdate,
200          "t" => TokenKind::Tab,
201          "tags" => TokenKind::Tags,
202          "title" => TokenKind::Title,
203          _ => unreachable!(),
204        };
205
206        tokens.push(Token::Placeholder {
207          indent,
208          kind,
209          marker,
210          prefix,
211          width,
212        });
213      }
214    }
215
216    last_end = end;
217  }
218
219  if last_end < escaped.len() {
220    tokens.push(Token::Literal(unescape(&escaped[last_end..])));
221  }
222
223  tokens
224}
225
226fn unescape(s: &str) -> String {
227  s.replace(ESCAPE_SENTINEL, "%")
228}
229
230#[cfg(test)]
231mod test {
232  use super::*;
233
234  fn placeholder(kind: TokenKind) -> Token {
235    Token::Placeholder {
236      indent: None,
237      kind,
238      marker: None,
239      prefix: None,
240      width: None,
241    }
242  }
243
244  mod parse {
245    use pretty_assertions::assert_eq;
246
247    use super::*;
248
249    #[test]
250    fn it_handles_escaped_percent() {
251      let tokens = parse("\\%date is literal");
252
253      assert_eq!(tokens, vec![Token::Literal("%date is literal".into())]);
254    }
255
256    #[test]
257    fn it_parses_all_token_kinds() {
258      for (input, expected) in [
259        ("%chompnote", TokenKind::Chompnote),
260        ("%date", TokenKind::Date),
261        ("%duration", TokenKind::Duration),
262        ("%hr", TokenKind::Hr),
263        ("%hr_under", TokenKind::HrUnder),
264        ("%idnote", TokenKind::Idnote),
265        ("%interval", TokenKind::Interval),
266        ("%n", TokenKind::Newline),
267        ("%note", TokenKind::Note),
268        ("%odnote", TokenKind::Odnote),
269        ("%section", TokenKind::Section),
270        ("%shortdate", TokenKind::Shortdate),
271        ("%t", TokenKind::Tab),
272        ("%tags", TokenKind::Tags),
273        ("%title", TokenKind::Title),
274      ] {
275        let tokens = parse(input);
276
277        assert_eq!(tokens.len(), 1, "expected one token for {input}");
278        match &tokens[0] {
279          Token::Placeholder {
280            kind, ..
281          } => {
282            assert_eq!(*kind, expected, "wrong kind for {input}")
283          }
284          _ => panic!("expected placeholder for {input}"),
285        }
286      }
287    }
288
289    #[test]
290    fn it_parses_color_tokens() {
291      let tokens = parse("%cyan%date%reset");
292
293      assert_eq!(
294        tokens,
295        vec![
296          Token::Color(colors::Color::Named(colors::NamedColor::Cyan)),
297          placeholder(TokenKind::Date),
298          Token::Color(colors::Color::Named(colors::NamedColor::Reset)),
299        ]
300      );
301    }
302
303    #[test]
304    fn it_parses_combined_width_indent_and_prefix() {
305      let tokens = parse("%80_14\u{2503} note");
306
307      assert_eq!(
308        tokens,
309        vec![Token::Placeholder {
310          indent: Some(Indent {
311            count: 14,
312            kind: IndentChar::Space,
313          }),
314          kind: TokenKind::Note,
315          marker: None,
316          prefix: Some("\u{2503} ".into()),
317          width: Some(80),
318        }]
319      );
320    }
321
322    #[test]
323    fn it_parses_empty_string() {
324      let tokens = parse("");
325
326      assert_eq!(tokens, vec![]);
327    }
328
329    #[test]
330    fn it_parses_full_note_modifiers() {
331      let tokens = parse("%^> 8: note");
332
333      assert_eq!(
334        tokens,
335        vec![Token::Placeholder {
336          indent: Some(Indent {
337            count: 8,
338            kind: IndentChar::Space,
339          }),
340          kind: TokenKind::Note,
341          marker: Some('>'),
342          prefix: Some(": ".into()),
343          width: None,
344        }]
345      );
346    }
347
348    #[test]
349    fn it_parses_hex_color_tokens() {
350      let tokens = parse("%#FF5500hello");
351
352      assert_eq!(
353        tokens,
354        vec![
355          Token::Color(colors::Color::Hex {
356            background: false,
357            b: 0x00,
358            g: 0x55,
359            r: 0xFF,
360          }),
361          Token::Literal("hello".into()),
362        ]
363      );
364    }
365
366    #[test]
367    fn it_parses_literal_text() {
368      let tokens = parse("hello world");
369
370      assert_eq!(tokens, vec![Token::Literal("hello world".into())]);
371    }
372
373    #[test]
374    fn it_parses_marker_modifier() {
375      let tokens = parse("%^>note");
376
377      assert_eq!(
378        tokens,
379        vec![Token::Placeholder {
380          indent: None,
381          kind: TokenKind::Note,
382          marker: Some('>'),
383          prefix: None,
384          width: None,
385        }]
386      );
387    }
388
389    #[test]
390    fn it_parses_mixed_literals_and_placeholders() {
391      let tokens = parse("hello %title world");
392
393      assert_eq!(
394        tokens,
395        vec![
396          Token::Literal("hello ".into()),
397          placeholder(TokenKind::Title),
398          Token::Literal(" world".into()),
399        ]
400      );
401    }
402
403    #[test]
404    fn it_parses_negative_width_modifier() {
405      let tokens = parse("%-10section");
406
407      assert_eq!(
408        tokens,
409        vec![Token::Placeholder {
410          indent: None,
411          kind: TokenKind::Section,
412          marker: None,
413          prefix: None,
414          width: Some(-10),
415        }]
416      );
417    }
418
419    #[test]
420    fn it_parses_positive_width_modifier() {
421      let tokens = parse("%80title");
422
423      assert_eq!(
424        tokens,
425        vec![Token::Placeholder {
426          indent: None,
427          kind: TokenKind::Title,
428          marker: None,
429          prefix: None,
430          width: Some(80),
431        }]
432      );
433    }
434
435    #[test]
436    fn it_parses_prefix_modifier() {
437      let tokens = parse("%: note");
438
439      assert_eq!(
440        tokens,
441        vec![Token::Placeholder {
442          indent: None,
443          kind: TokenKind::Note,
444          marker: None,
445          prefix: Some(": ".into()),
446          width: None,
447        }]
448      );
449    }
450
451    #[test]
452    fn it_parses_prefix_with_separator() {
453      let tokens = parse("%80\u{2551} title");
454
455      assert_eq!(
456        tokens,
457        vec![Token::Placeholder {
458          indent: None,
459          kind: TokenKind::Title,
460          marker: None,
461          prefix: Some("\u{2551} ".into()),
462          width: Some(80),
463        }]
464      );
465    }
466
467    #[test]
468    fn it_parses_space_indent_modifier() {
469      let tokens = parse("% 4note");
470
471      assert_eq!(
472        tokens,
473        vec![Token::Placeholder {
474          indent: Some(Indent {
475            count: 4,
476            kind: IndentChar::Space,
477          }),
478          kind: TokenKind::Note,
479          marker: None,
480          prefix: None,
481          width: None,
482        }]
483      );
484    }
485
486    #[test]
487    fn it_parses_tab_indent_modifier() {
488      let tokens = parse("%t2note");
489
490      assert_eq!(
491        tokens,
492        vec![Token::Placeholder {
493          indent: Some(Indent {
494            count: 2,
495            kind: IndentChar::Tab,
496          }),
497          kind: TokenKind::Note,
498          marker: None,
499          prefix: None,
500          width: None,
501        }]
502      );
503    }
504
505    #[test]
506    fn it_handles_control_characters_in_input() {
507      // Entries containing old sentinel characters (\x01, \x02) should
508      // render correctly without corruption now that we use PUA codepoints.
509      let tokens = parse("hello \x01 and \x02 world");
510
511      assert_eq!(tokens, vec![Token::Literal("hello \x01 and \x02 world".into())]);
512    }
513
514    #[test]
515    fn it_parses_underscore_indent_modifier() {
516      let tokens = parse("%_14note");
517
518      assert_eq!(
519        tokens,
520        vec![Token::Placeholder {
521          indent: Some(Indent {
522            count: 14,
523            kind: IndentChar::Space,
524          }),
525          kind: TokenKind::Note,
526          marker: None,
527          prefix: None,
528          width: None,
529        }]
530      );
531    }
532
533    #[test]
534    fn it_preserves_unknown_percent_sequences() {
535      let tokens = parse("%xyz%date");
536
537      assert_eq!(
538        tokens,
539        vec![Token::Literal("%xyz".into()), placeholder(TokenKind::Date),]
540      );
541    }
542  }
543}