1use std::sync::LazyLock;
2
3use regex::Regex;
4
5use crate::colors;
6
7static COLOR_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"%((?:[fb]g?)?#[a-fA-F0-9]{6}|[a-zA-Z_]+)").unwrap());
8static PLACEHOLDER_RE: LazyLock<Regex> = LazyLock::new(|| {
9 Regex::new(concat!(
10 r"%(?P<width>-?\d+)?",
11 r"(?:\^(?P<marker>.))?",
12 r"(?:(?P<ichar>[ _t]|[^a-zA-Z0-9\s])(?P<icount>\d+))?",
13 r"(?P<prefix>.[ _t]?)?",
14 r"(?P<kind>shortdate|date|title|section|odnote|idnote|chompnote|note",
15 r"|interval|duration|tags|hr_under|hr|n|t)\b",
16 ))
17 .unwrap()
18});
19
20const ESCAPE_SENTINEL: &str = "\u{E000}";
21
22#[derive(Clone, Debug, Eq, PartialEq)]
24pub struct Indent {
25 pub count: u32,
26 pub kind: IndentChar,
27}
28
29#[derive(Clone, Copy, Debug, Eq, PartialEq)]
31pub enum IndentChar {
32 Custom(char),
33 Space,
34 Tab,
35}
36
37#[derive(Clone, Debug, Eq, PartialEq)]
39pub enum Token {
40 Color(colors::Color),
41 Literal(String),
42 Placeholder {
43 indent: Option<Indent>,
44 kind: TokenKind,
45 marker: Option<char>,
46 prefix: Option<String>,
47 width: Option<i32>,
48 },
49}
50
51#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
53pub enum TokenKind {
54 Chompnote,
55 Date,
56 Duration,
57 Hr,
58 HrUnder,
59 Idnote,
60 Interval,
61 Newline,
62 Note,
63 Odnote,
64 Section,
65 Shortdate,
66 Tab,
67 Tags,
68 Title,
69}
70
71enum TokenMatch<'a> {
72 Color {
73 color: colors::Color,
74 end: usize,
75 start: usize,
76 },
77 Placeholder {
78 caps: regex::Captures<'a>,
79 end: usize,
80 start: usize,
81 },
82}
83
84impl TokenMatch<'_> {
85 fn span(&self) -> (usize, usize) {
86 match self {
87 Self::Color {
88 end,
89 start,
90 ..
91 } => (*start, *end),
92 Self::Placeholder {
93 end,
94 start,
95 ..
96 } => (*start, *end),
97 }
98 }
99}
100
101pub fn parse(template: &str) -> Vec<Token> {
111 let escaped = template.replace("\\%", ESCAPE_SENTINEL);
112
113 let mut matches: Vec<TokenMatch> = Vec::new();
115
116 for caps in PLACEHOLDER_RE.captures_iter(&escaped) {
117 let m = caps.get(0).unwrap();
118 matches.push(TokenMatch::Placeholder {
119 caps,
120 end: m.end(),
121 start: m.start(),
122 });
123 }
124
125 for caps in COLOR_RE.captures_iter(&escaped) {
126 let m = caps.get(0).unwrap();
127 let color_str = caps.get(1).unwrap().as_str();
128 if let Some(valid) = colors::validate_color(color_str) {
129 let start = m.start();
131 let end = start + 1 + valid.len(); let overlaps = matches.iter().any(|tm| {
133 let (ts, te) = tm.span();
134 start < te && end > ts
135 });
136 if !overlaps && let Some(color) = colors::Color::parse(&valid) {
137 matches.push(TokenMatch::Color {
138 color,
139 end,
140 start,
141 });
142 }
143 }
144 }
145
146 matches.sort_by_key(|m| m.span().0);
147
148 let mut tokens = Vec::new();
149 let mut last_end = 0;
150
151 for tm in &matches {
152 let (start, end) = tm.span();
153
154 if start > last_end {
155 tokens.push(Token::Literal(unescape(&escaped[last_end..start])));
156 }
157
158 match tm {
159 TokenMatch::Color {
160 color, ..
161 } => {
162 tokens.push(Token::Color(color.clone()));
163 }
164 TokenMatch::Placeholder {
165 caps, ..
166 } => {
167 let width = caps.name("width").map(|m| m.as_str().parse::<i32>().unwrap());
168 let marker = caps.name("marker").and_then(|m| m.as_str().chars().next());
169
170 let indent = caps.name("ichar").and_then(|ic| {
171 caps.name("icount").map(|cnt| {
172 let count = cnt.as_str().parse::<u32>().unwrap();
173 let kind = match ic.as_str().chars().next().unwrap() {
174 ' ' | '_' => IndentChar::Space,
175 't' => IndentChar::Tab,
176 c => IndentChar::Custom(c),
177 };
178 Indent {
179 count,
180 kind,
181 }
182 })
183 });
184
185 let prefix = caps.name("prefix").map(|m| m.as_str().to_string());
186
187 let kind = match caps.name("kind").unwrap().as_str() {
188 "chompnote" => TokenKind::Chompnote,
189 "date" => TokenKind::Date,
190 "duration" => TokenKind::Duration,
191 "hr" => TokenKind::Hr,
192 "hr_under" => TokenKind::HrUnder,
193 "idnote" => TokenKind::Idnote,
194 "interval" => TokenKind::Interval,
195 "n" => TokenKind::Newline,
196 "note" => TokenKind::Note,
197 "odnote" => TokenKind::Odnote,
198 "section" => TokenKind::Section,
199 "shortdate" => TokenKind::Shortdate,
200 "t" => TokenKind::Tab,
201 "tags" => TokenKind::Tags,
202 "title" => TokenKind::Title,
203 _ => unreachable!(),
204 };
205
206 tokens.push(Token::Placeholder {
207 indent,
208 kind,
209 marker,
210 prefix,
211 width,
212 });
213 }
214 }
215
216 last_end = end;
217 }
218
219 if last_end < escaped.len() {
220 tokens.push(Token::Literal(unescape(&escaped[last_end..])));
221 }
222
223 tokens
224}
225
226fn unescape(s: &str) -> String {
227 s.replace(ESCAPE_SENTINEL, "%")
228}
229
230#[cfg(test)]
231mod test {
232 use super::*;
233
234 fn placeholder(kind: TokenKind) -> Token {
235 Token::Placeholder {
236 indent: None,
237 kind,
238 marker: None,
239 prefix: None,
240 width: None,
241 }
242 }
243
244 mod parse {
245 use pretty_assertions::assert_eq;
246
247 use super::*;
248
249 #[test]
250 fn it_handles_escaped_percent() {
251 let tokens = parse("\\%date is literal");
252
253 assert_eq!(tokens, vec![Token::Literal("%date is literal".into())]);
254 }
255
256 #[test]
257 fn it_parses_all_token_kinds() {
258 for (input, expected) in [
259 ("%chompnote", TokenKind::Chompnote),
260 ("%date", TokenKind::Date),
261 ("%duration", TokenKind::Duration),
262 ("%hr", TokenKind::Hr),
263 ("%hr_under", TokenKind::HrUnder),
264 ("%idnote", TokenKind::Idnote),
265 ("%interval", TokenKind::Interval),
266 ("%n", TokenKind::Newline),
267 ("%note", TokenKind::Note),
268 ("%odnote", TokenKind::Odnote),
269 ("%section", TokenKind::Section),
270 ("%shortdate", TokenKind::Shortdate),
271 ("%t", TokenKind::Tab),
272 ("%tags", TokenKind::Tags),
273 ("%title", TokenKind::Title),
274 ] {
275 let tokens = parse(input);
276
277 assert_eq!(tokens.len(), 1, "expected one token for {input}");
278 match &tokens[0] {
279 Token::Placeholder {
280 kind, ..
281 } => {
282 assert_eq!(*kind, expected, "wrong kind for {input}")
283 }
284 _ => panic!("expected placeholder for {input}"),
285 }
286 }
287 }
288
289 #[test]
290 fn it_parses_color_tokens() {
291 let tokens = parse("%cyan%date%reset");
292
293 assert_eq!(
294 tokens,
295 vec![
296 Token::Color(colors::Color::Named(colors::NamedColor::Cyan)),
297 placeholder(TokenKind::Date),
298 Token::Color(colors::Color::Named(colors::NamedColor::Reset)),
299 ]
300 );
301 }
302
303 #[test]
304 fn it_parses_combined_width_indent_and_prefix() {
305 let tokens = parse("%80_14\u{2503} note");
306
307 assert_eq!(
308 tokens,
309 vec![Token::Placeholder {
310 indent: Some(Indent {
311 count: 14,
312 kind: IndentChar::Space,
313 }),
314 kind: TokenKind::Note,
315 marker: None,
316 prefix: Some("\u{2503} ".into()),
317 width: Some(80),
318 }]
319 );
320 }
321
322 #[test]
323 fn it_parses_empty_string() {
324 let tokens = parse("");
325
326 assert_eq!(tokens, vec![]);
327 }
328
329 #[test]
330 fn it_parses_full_note_modifiers() {
331 let tokens = parse("%^> 8: note");
332
333 assert_eq!(
334 tokens,
335 vec![Token::Placeholder {
336 indent: Some(Indent {
337 count: 8,
338 kind: IndentChar::Space,
339 }),
340 kind: TokenKind::Note,
341 marker: Some('>'),
342 prefix: Some(": ".into()),
343 width: None,
344 }]
345 );
346 }
347
348 #[test]
349 fn it_parses_hex_color_tokens() {
350 let tokens = parse("%#FF5500hello");
351
352 assert_eq!(
353 tokens,
354 vec![
355 Token::Color(colors::Color::Hex {
356 background: false,
357 b: 0x00,
358 g: 0x55,
359 r: 0xFF,
360 }),
361 Token::Literal("hello".into()),
362 ]
363 );
364 }
365
366 #[test]
367 fn it_parses_literal_text() {
368 let tokens = parse("hello world");
369
370 assert_eq!(tokens, vec![Token::Literal("hello world".into())]);
371 }
372
373 #[test]
374 fn it_parses_marker_modifier() {
375 let tokens = parse("%^>note");
376
377 assert_eq!(
378 tokens,
379 vec![Token::Placeholder {
380 indent: None,
381 kind: TokenKind::Note,
382 marker: Some('>'),
383 prefix: None,
384 width: None,
385 }]
386 );
387 }
388
389 #[test]
390 fn it_parses_mixed_literals_and_placeholders() {
391 let tokens = parse("hello %title world");
392
393 assert_eq!(
394 tokens,
395 vec![
396 Token::Literal("hello ".into()),
397 placeholder(TokenKind::Title),
398 Token::Literal(" world".into()),
399 ]
400 );
401 }
402
403 #[test]
404 fn it_parses_negative_width_modifier() {
405 let tokens = parse("%-10section");
406
407 assert_eq!(
408 tokens,
409 vec![Token::Placeholder {
410 indent: None,
411 kind: TokenKind::Section,
412 marker: None,
413 prefix: None,
414 width: Some(-10),
415 }]
416 );
417 }
418
419 #[test]
420 fn it_parses_positive_width_modifier() {
421 let tokens = parse("%80title");
422
423 assert_eq!(
424 tokens,
425 vec![Token::Placeholder {
426 indent: None,
427 kind: TokenKind::Title,
428 marker: None,
429 prefix: None,
430 width: Some(80),
431 }]
432 );
433 }
434
435 #[test]
436 fn it_parses_prefix_modifier() {
437 let tokens = parse("%: note");
438
439 assert_eq!(
440 tokens,
441 vec![Token::Placeholder {
442 indent: None,
443 kind: TokenKind::Note,
444 marker: None,
445 prefix: Some(": ".into()),
446 width: None,
447 }]
448 );
449 }
450
451 #[test]
452 fn it_parses_prefix_with_separator() {
453 let tokens = parse("%80\u{2551} title");
454
455 assert_eq!(
456 tokens,
457 vec![Token::Placeholder {
458 indent: None,
459 kind: TokenKind::Title,
460 marker: None,
461 prefix: Some("\u{2551} ".into()),
462 width: Some(80),
463 }]
464 );
465 }
466
467 #[test]
468 fn it_parses_space_indent_modifier() {
469 let tokens = parse("% 4note");
470
471 assert_eq!(
472 tokens,
473 vec![Token::Placeholder {
474 indent: Some(Indent {
475 count: 4,
476 kind: IndentChar::Space,
477 }),
478 kind: TokenKind::Note,
479 marker: None,
480 prefix: None,
481 width: None,
482 }]
483 );
484 }
485
486 #[test]
487 fn it_parses_tab_indent_modifier() {
488 let tokens = parse("%t2note");
489
490 assert_eq!(
491 tokens,
492 vec![Token::Placeholder {
493 indent: Some(Indent {
494 count: 2,
495 kind: IndentChar::Tab,
496 }),
497 kind: TokenKind::Note,
498 marker: None,
499 prefix: None,
500 width: None,
501 }]
502 );
503 }
504
505 #[test]
506 fn it_handles_control_characters_in_input() {
507 let tokens = parse("hello \x01 and \x02 world");
510
511 assert_eq!(tokens, vec![Token::Literal("hello \x01 and \x02 world".into())]);
512 }
513
514 #[test]
515 fn it_parses_underscore_indent_modifier() {
516 let tokens = parse("%_14note");
517
518 assert_eq!(
519 tokens,
520 vec![Token::Placeholder {
521 indent: Some(Indent {
522 count: 14,
523 kind: IndentChar::Space,
524 }),
525 kind: TokenKind::Note,
526 marker: None,
527 prefix: None,
528 width: None,
529 }]
530 );
531 }
532
533 #[test]
534 fn it_preserves_unknown_percent_sequences() {
535 let tokens = parse("%xyz%date");
536
537 assert_eq!(
538 tokens,
539 vec![Token::Literal("%xyz".into()), placeholder(TokenKind::Date),]
540 );
541 }
542 }
543}