1use std::sync::LazyLock;
2
3use regex::Regex;
4
5use crate::colors;
6
7static COLOR_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"%((?:[fb]g?)?#[a-fA-F0-9]{6}|[a-zA-Z_]+)").unwrap());
8static PLACEHOLDER_RE: LazyLock<Regex> = LazyLock::new(|| {
9 Regex::new(concat!(
10 r"%(?P<width>-?\d+)?",
11 r"(?:\^(?P<marker>.))?",
12 r"(?:(?P<ichar>[ _t]|[^a-zA-Z0-9\s])(?P<icount>\d+))?",
13 r"(?P<prefix>.[ _t]?)?",
14 r"(?P<kind>shortdate|date|title|section|odnote|idnote|chompnote|note",
15 r"|interval|duration|tags|hr_under|hr|n|t)\b",
16 ))
17 .unwrap()
18});
19
20const ESCAPE_SENTINEL: &str = "\u{E000}";
21
22#[derive(Clone, Debug, Eq, PartialEq)]
24pub struct Indent {
25 pub count: u32,
26 pub kind: IndentChar,
27}
28
29#[derive(Clone, Copy, Debug, Eq, PartialEq)]
31pub enum IndentChar {
32 Custom(char),
33 Space,
34 Tab,
35}
36
37#[derive(Clone, Debug, Eq, PartialEq)]
39pub enum Token {
40 Color(colors::Color),
41 Literal(String),
42 Placeholder {
43 indent: Option<Indent>,
44 kind: TokenKind,
45 marker: Option<char>,
46 prefix: Option<String>,
47 width: Option<i32>,
48 },
49}
50
51#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
53pub enum TokenKind {
54 Chompnote,
55 Date,
56 Duration,
57 Hr,
58 HrUnder,
59 Idnote,
60 Interval,
61 Newline,
62 Note,
63 Odnote,
64 Section,
65 Shortdate,
66 Tab,
67 Tags,
68 Title,
69}
70
71enum TokenMatch<'a> {
72 Color {
73 color: colors::Color,
74 end: usize,
75 start: usize,
76 },
77 Placeholder {
78 caps: regex::Captures<'a>,
79 end: usize,
80 start: usize,
81 },
82}
83
84impl TokenMatch<'_> {
85 fn span(&self) -> (usize, usize) {
86 match self {
87 Self::Color {
88 end,
89 start,
90 ..
91 } => (*start, *end),
92 Self::Placeholder {
93 end,
94 start,
95 ..
96 } => (*start, *end),
97 }
98 }
99}
100
101pub fn parse(template: &str) -> Vec<Token> {
111 let escaped = template.replace("\\%", ESCAPE_SENTINEL);
112
113 let mut matches: Vec<TokenMatch> = Vec::new();
115
116 for caps in PLACEHOLDER_RE.captures_iter(&escaped) {
117 let m = caps.get(0).unwrap();
118 matches.push(TokenMatch::Placeholder {
119 caps,
120 end: m.end(),
121 start: m.start(),
122 });
123 }
124
125 for caps in COLOR_RE.captures_iter(&escaped) {
126 let m = caps.get(0).unwrap();
127 let color_str = caps.get(1).unwrap().as_str();
128 if let Some((valid, orig_len)) = colors::validate_color(color_str) {
129 let start = m.start();
131 let end = start + 1 + orig_len; let overlaps = matches.iter().any(|tm| {
133 let (ts, te) = tm.span();
134 start < te && end > ts
135 });
136 if !overlaps && let Some(color) = colors::Color::parse(&valid) {
137 matches.push(TokenMatch::Color {
138 color,
139 end,
140 start,
141 });
142 }
143 }
144 }
145
146 matches.sort_by_key(|m| m.span().0);
147
148 let mut tokens = Vec::new();
149 let mut last_end = 0;
150
151 for tm in &matches {
152 let (start, end) = tm.span();
153
154 if start > last_end {
155 tokens.push(Token::Literal(unescape(&escaped[last_end..start])));
156 }
157
158 match tm {
159 TokenMatch::Color {
160 color, ..
161 } => {
162 tokens.push(Token::Color(color.clone()));
163 }
164 TokenMatch::Placeholder {
165 caps, ..
166 } => {
167 let width = caps
168 .name("width")
169 .map(|m| m.as_str().parse::<i32>().unwrap_or(i32::MAX));
170 let marker = caps.name("marker").and_then(|m| m.as_str().chars().next());
171
172 let indent = caps.name("ichar").and_then(|ic| {
173 caps.name("icount").map(|cnt| {
174 let count = cnt.as_str().parse::<u32>().unwrap_or(u32::MAX);
175 let kind = match ic.as_str().chars().next().unwrap() {
176 ' ' | '_' => IndentChar::Space,
177 't' => IndentChar::Tab,
178 c => IndentChar::Custom(c),
179 };
180 Indent {
181 count,
182 kind,
183 }
184 })
185 });
186
187 let prefix = caps.name("prefix").map(|m| m.as_str().to_string());
188
189 let kind = match caps.name("kind").unwrap().as_str() {
190 "chompnote" => TokenKind::Chompnote,
191 "date" => TokenKind::Date,
192 "duration" => TokenKind::Duration,
193 "hr" => TokenKind::Hr,
194 "hr_under" => TokenKind::HrUnder,
195 "idnote" => TokenKind::Idnote,
196 "interval" => TokenKind::Interval,
197 "n" => TokenKind::Newline,
198 "note" => TokenKind::Note,
199 "odnote" => TokenKind::Odnote,
200 "section" => TokenKind::Section,
201 "shortdate" => TokenKind::Shortdate,
202 "t" => TokenKind::Tab,
203 "tags" => TokenKind::Tags,
204 "title" => TokenKind::Title,
205 _ => unreachable!(),
206 };
207
208 tokens.push(Token::Placeholder {
209 indent,
210 kind,
211 marker,
212 prefix,
213 width,
214 });
215 }
216 }
217
218 last_end = end;
219 }
220
221 if last_end < escaped.len() {
222 tokens.push(Token::Literal(unescape(&escaped[last_end..])));
223 }
224
225 tokens
226}
227
228fn unescape(s: &str) -> String {
229 s.replace(ESCAPE_SENTINEL, "%")
230}
231
232#[cfg(test)]
233mod test {
234 use super::*;
235
236 fn placeholder(kind: TokenKind) -> Token {
237 Token::Placeholder {
238 indent: None,
239 kind,
240 marker: None,
241 prefix: None,
242 width: None,
243 }
244 }
245
246 mod parse {
247 use pretty_assertions::assert_eq;
248
249 use super::*;
250
251 #[test]
252 fn it_handles_escaped_percent() {
253 let tokens = parse("\\%date is literal");
254
255 assert_eq!(tokens, vec![Token::Literal("%date is literal".into())]);
256 }
257
258 #[test]
259 fn it_parses_all_token_kinds() {
260 for (input, expected) in [
261 ("%chompnote", TokenKind::Chompnote),
262 ("%date", TokenKind::Date),
263 ("%duration", TokenKind::Duration),
264 ("%hr", TokenKind::Hr),
265 ("%hr_under", TokenKind::HrUnder),
266 ("%idnote", TokenKind::Idnote),
267 ("%interval", TokenKind::Interval),
268 ("%n", TokenKind::Newline),
269 ("%note", TokenKind::Note),
270 ("%odnote", TokenKind::Odnote),
271 ("%section", TokenKind::Section),
272 ("%shortdate", TokenKind::Shortdate),
273 ("%t", TokenKind::Tab),
274 ("%tags", TokenKind::Tags),
275 ("%title", TokenKind::Title),
276 ] {
277 let tokens = parse(input);
278
279 assert_eq!(tokens.len(), 1, "expected one token for {input}");
280 match &tokens[0] {
281 Token::Placeholder {
282 kind, ..
283 } => {
284 assert_eq!(*kind, expected, "wrong kind for {input}")
285 }
286 _ => panic!("expected placeholder for {input}"),
287 }
288 }
289 }
290
291 #[test]
292 fn it_parses_color_tokens() {
293 let tokens = parse("%cyan%date%reset");
294
295 assert_eq!(
296 tokens,
297 vec![
298 Token::Color(colors::Color::Named(colors::NamedColor::Cyan)),
299 placeholder(TokenKind::Date),
300 Token::Color(colors::Color::Named(colors::NamedColor::Reset)),
301 ]
302 );
303 }
304
305 #[test]
306 fn it_parses_color_with_underscores() {
307 let tokens = parse("%bold_white%title");
308
309 assert_eq!(
310 tokens,
311 vec![
312 Token::Color(colors::Color::Named(colors::NamedColor::BoldWhite)),
313 placeholder(TokenKind::Title),
314 ]
315 );
316 }
317
318 #[test]
319 fn it_parses_combined_width_indent_and_prefix() {
320 let tokens = parse("%80_14\u{2503} note");
321
322 assert_eq!(
323 tokens,
324 vec![Token::Placeholder {
325 indent: Some(Indent {
326 count: 14,
327 kind: IndentChar::Space,
328 }),
329 kind: TokenKind::Note,
330 marker: None,
331 prefix: Some("\u{2503} ".into()),
332 width: Some(80),
333 }]
334 );
335 }
336
337 #[test]
338 fn it_parses_empty_string() {
339 let tokens = parse("");
340
341 assert_eq!(tokens, vec![]);
342 }
343
344 #[test]
345 fn it_parses_full_note_modifiers() {
346 let tokens = parse("%^> 8: note");
347
348 assert_eq!(
349 tokens,
350 vec![Token::Placeholder {
351 indent: Some(Indent {
352 count: 8,
353 kind: IndentChar::Space,
354 }),
355 kind: TokenKind::Note,
356 marker: Some('>'),
357 prefix: Some(": ".into()),
358 width: None,
359 }]
360 );
361 }
362
363 #[test]
364 fn it_parses_hex_color_tokens() {
365 let tokens = parse("%#FF5500hello");
366
367 assert_eq!(
368 tokens,
369 vec![
370 Token::Color(colors::Color::Hex {
371 background: false,
372 b: 0x00,
373 g: 0x55,
374 r: 0xFF,
375 }),
376 Token::Literal("hello".into()),
377 ]
378 );
379 }
380
381 #[test]
382 fn it_parses_literal_text() {
383 let tokens = parse("hello world");
384
385 assert_eq!(tokens, vec![Token::Literal("hello world".into())]);
386 }
387
388 #[test]
389 fn it_parses_marker_modifier() {
390 let tokens = parse("%^>note");
391
392 assert_eq!(
393 tokens,
394 vec![Token::Placeholder {
395 indent: None,
396 kind: TokenKind::Note,
397 marker: Some('>'),
398 prefix: None,
399 width: None,
400 }]
401 );
402 }
403
404 #[test]
405 fn it_parses_mixed_literals_and_placeholders() {
406 let tokens = parse("hello %title world");
407
408 assert_eq!(
409 tokens,
410 vec![
411 Token::Literal("hello ".into()),
412 placeholder(TokenKind::Title),
413 Token::Literal(" world".into()),
414 ]
415 );
416 }
417
418 #[test]
419 fn it_handles_overflow_width_gracefully() {
420 let tokens = parse("%999999999999title");
421
422 assert_eq!(
423 tokens,
424 vec![Token::Placeholder {
425 indent: None,
426 kind: TokenKind::Title,
427 marker: None,
428 prefix: None,
429 width: Some(i32::MAX),
430 }]
431 );
432 }
433
434 #[test]
435 fn it_parses_negative_width_modifier() {
436 let tokens = parse("%-10section");
437
438 assert_eq!(
439 tokens,
440 vec![Token::Placeholder {
441 indent: None,
442 kind: TokenKind::Section,
443 marker: None,
444 prefix: None,
445 width: Some(-10),
446 }]
447 );
448 }
449
450 #[test]
451 fn it_parses_positive_width_modifier() {
452 let tokens = parse("%80title");
453
454 assert_eq!(
455 tokens,
456 vec![Token::Placeholder {
457 indent: None,
458 kind: TokenKind::Title,
459 marker: None,
460 prefix: None,
461 width: Some(80),
462 }]
463 );
464 }
465
466 #[test]
467 fn it_parses_prefix_modifier() {
468 let tokens = parse("%: note");
469
470 assert_eq!(
471 tokens,
472 vec![Token::Placeholder {
473 indent: None,
474 kind: TokenKind::Note,
475 marker: None,
476 prefix: Some(": ".into()),
477 width: None,
478 }]
479 );
480 }
481
482 #[test]
483 fn it_parses_prefix_with_separator() {
484 let tokens = parse("%80\u{2551} title");
485
486 assert_eq!(
487 tokens,
488 vec![Token::Placeholder {
489 indent: None,
490 kind: TokenKind::Title,
491 marker: None,
492 prefix: Some("\u{2551} ".into()),
493 width: Some(80),
494 }]
495 );
496 }
497
498 #[test]
499 fn it_parses_space_indent_modifier() {
500 let tokens = parse("% 4note");
501
502 assert_eq!(
503 tokens,
504 vec![Token::Placeholder {
505 indent: Some(Indent {
506 count: 4,
507 kind: IndentChar::Space,
508 }),
509 kind: TokenKind::Note,
510 marker: None,
511 prefix: None,
512 width: None,
513 }]
514 );
515 }
516
517 #[test]
518 fn it_parses_tab_indent_modifier() {
519 let tokens = parse("%t2note");
520
521 assert_eq!(
522 tokens,
523 vec![Token::Placeholder {
524 indent: Some(Indent {
525 count: 2,
526 kind: IndentChar::Tab,
527 }),
528 kind: TokenKind::Note,
529 marker: None,
530 prefix: None,
531 width: None,
532 }]
533 );
534 }
535
536 #[test]
537 fn it_handles_control_characters_in_input() {
538 let tokens = parse("hello \x01 and \x02 world");
541
542 assert_eq!(tokens, vec![Token::Literal("hello \x01 and \x02 world".into())]);
543 }
544
545 #[test]
546 fn it_parses_underscore_indent_modifier() {
547 let tokens = parse("%_14note");
548
549 assert_eq!(
550 tokens,
551 vec![Token::Placeholder {
552 indent: Some(Indent {
553 count: 14,
554 kind: IndentChar::Space,
555 }),
556 kind: TokenKind::Note,
557 marker: None,
558 prefix: None,
559 width: None,
560 }]
561 );
562 }
563
564 #[test]
565 fn it_preserves_unknown_percent_sequences() {
566 let tokens = parse("%xyz%date");
567
568 assert_eq!(
569 tokens,
570 vec![Token::Literal("%xyz".into()), placeholder(TokenKind::Date),]
571 );
572 }
573 }
574}