Skip to main content

doing_taskpaper/
parser.rs

1use std::sync::LazyLock;
2
3use chrono::{Local, NaiveDateTime, TimeZone};
4use regex::Regex;
5
6use crate::{Document, Entry, Note, Section, Tag, Tags};
7
8/// The default section name used when entries appear before any section header.
9pub const DEFAULT_SECTION: &str = "Uncategorized";
10
11static ENTRY_RX: LazyLock<Regex> =
12  LazyLock::new(|| Regex::new(r"^\t- (\d{4}-\d{2}-\d{2} \d{2}:\d{2}) \| (.*?)(?:\s+<([a-f0-9]{32})>)?\s*$").unwrap());
13static SECTION_RX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\S[\S ]+):\s*$").unwrap());
14static TAG_RX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?:^| )(@([^\s(]+)(?:\(([^)]+)\))?)").unwrap());
15
16/// Flush the current entry (with accumulated note lines) into the current section.
17fn flush_entry(current_section: &mut Option<Section>, current_entry: &mut Option<(Entry, Vec<String>)>) {
18  if let Some((mut entry, note_lines)) = current_entry.take() {
19    if !note_lines.is_empty() {
20      *entry.note_mut() = Note::from_lines(note_lines);
21    }
22    if let Some(section) = current_section.as_mut() {
23      section.add_entry(entry);
24    }
25  }
26}
27
28/// Flush the current section into the document.
29fn flush_section(doc: &mut Document, current_section: &mut Option<Section>) {
30  if let Some(section) = current_section.take() {
31    doc.add_section(section);
32  }
33}
34
35/// Parse a doing file string into a structured `Document`.
36///
37/// Recognizes section headers, entries with dates/tags/IDs, and notes.
38/// Non-entry, non-section content is preserved as other content.
39pub fn parse(content: &str) -> Document {
40  let mut doc = Document::new();
41  let mut current_section: Option<Section> = None;
42  let mut current_entry: Option<(Entry, Vec<String>)> = None;
43  let mut found_first_section = false;
44
45  for line in content.lines() {
46    if let Some(caps) = SECTION_RX.captures(line) {
47      flush_entry(&mut current_section, &mut current_entry);
48      flush_section(&mut doc, &mut current_section);
49      found_first_section = true;
50      current_section = Some(Section::new(&caps[1]));
51      continue;
52    }
53
54    if let Some(caps) = ENTRY_RX.captures(line) {
55      flush_entry(&mut current_section, &mut current_entry);
56
57      if !found_first_section {
58        found_first_section = true;
59        current_section = Some(Section::new(DEFAULT_SECTION));
60      }
61
62      let date_str = &caps[1];
63      let raw_title = caps[2].trim();
64      let id = caps.get(3).map(|m| m.as_str());
65
66      let section_name = current_section
67        .as_ref()
68        .map(|s| s.title().to_string())
69        .unwrap_or_default();
70
71      if let Ok(naive) = NaiveDateTime::parse_from_str(date_str, "%Y-%m-%d %H:%M")
72        && let Some(date) = Local
73          .from_local_datetime(&naive)
74          .single()
75          .or_else(|| Local.from_local_datetime(&naive).earliest())
76          .or_else(|| Local.from_local_datetime(&naive).latest())
77          .or_else(|| Some(naive.and_utc().with_timezone(&Local)))
78      {
79        let (title, tags) = parse_tags(raw_title);
80        let entry = Entry::new(date, title, tags, Note::new(), &section_name, id);
81        current_entry = Some((entry, Vec::new()));
82      }
83      continue;
84    }
85
86    if let Some(note_text) = line.strip_prefix("\t\t") {
87      if let Some(ref mut entry) = current_entry {
88        entry.1.push(note_text.to_string());
89      }
90      continue;
91    }
92
93    if !found_first_section {
94      doc.other_content_top_mut().push(line.to_string());
95    } else if let Some(section) = current_section.as_mut() {
96      section.trailing_content_mut().push(line.to_string());
97    } else {
98      doc.other_content_bottom_mut().push(line.to_string());
99    }
100  }
101
102  flush_entry(&mut current_section, &mut current_entry);
103  flush_section(&mut doc, &mut current_section);
104
105  doc
106}
107
108/// Extract tags from a title string, returning the tag-free title and a `Tags` collection.
109fn parse_tags(title: &str) -> (String, Tags) {
110  let mut tags = Vec::new();
111
112  // Collect tag match byte ranges so we can build the cleaned title in one pass
113  let mut tag_ranges: Vec<(usize, usize)> = Vec::new();
114  for caps in TAG_RX.captures_iter(title) {
115    let m = caps.get(1).unwrap();
116    tag_ranges.push((m.start(), m.end()));
117    let name = &caps[2];
118    let value = caps.get(3).map(|m| m.as_str().to_string());
119    tags.push(Tag::new(name, value));
120  }
121
122  // Build cleaned title by skipping tag ranges
123  let mut clean = String::with_capacity(title.len());
124  let mut pos = 0;
125  for (start, end) in &tag_ranges {
126    clean.push_str(&title[pos..*start]);
127    pos = *end;
128  }
129  clean.push_str(&title[pos..]);
130
131  let clean_title = clean.split_whitespace().collect::<Vec<_>>().join(" ");
132  (clean_title, Tags::from_iter(tags))
133}
134
135#[cfg(test)]
136mod test {
137  use super::*;
138
139  mod parse {
140    use chrono::TimeZone;
141    use pretty_assertions::assert_eq;
142
143    use super::*;
144
145    #[test]
146    fn it_handles_entries_without_section() {
147      let content = "\t- 2024-03-17 14:30 | Orphan task";
148      let doc = parse(content);
149
150      assert!(doc.has_section(DEFAULT_SECTION));
151      assert_eq!(doc.entries_in_section(DEFAULT_SECTION).count(), 1);
152    }
153
154    #[test]
155    fn it_parses_empty_content() {
156      let doc = parse("");
157
158      assert!(doc.is_empty());
159    }
160
161    #[test]
162    fn it_parses_empty_sections() {
163      let content = "Currently:\nArchive:";
164      let doc = parse(content);
165
166      assert_eq!(doc.entries_in_section("Currently").count(), 0);
167      assert_eq!(doc.entries_in_section("Archive").count(), 0);
168    }
169
170    #[test]
171    fn it_parses_entries_with_dates_and_titles() {
172      let content = "Currently:\n\t- 2024-03-17 14:30 | Working on feature";
173      let doc = parse(content);
174
175      let entries: Vec<_> = doc.entries_in_section("Currently").collect();
176      assert_eq!(entries.len(), 1);
177      assert_eq!(entries[0].title(), "Working on feature");
178      assert_eq!(
179        entries[0].date(),
180        Local.with_ymd_and_hms(2024, 3, 17, 14, 30, 0).unwrap()
181      );
182    }
183
184    #[test]
185    fn it_parses_entries_with_ids() {
186      let content = "Currently:\n\t- 2024-03-17 14:30 | Working on feature <aaaabbbbccccddddeeeeffffaaaabbbb>";
187      let doc = parse(content);
188
189      let entries: Vec<_> = doc.entries_in_section("Currently").collect();
190      assert_eq!(entries[0].id(), "aaaabbbbccccddddeeeeffffaaaabbbb");
191    }
192
193    #[test]
194    fn it_parses_entries_with_tags() {
195      let content = "Currently:\n\t- 2024-03-17 14:30 | Working on feature @coding @done(2024-03-17 15:00)";
196      let doc = parse(content);
197
198      let entries: Vec<_> = doc.entries_in_section("Currently").collect();
199      assert_eq!(entries[0].title(), "Working on feature");
200      assert!(entries[0].tags().has("coding"));
201      assert!(entries[0].tags().has("done"));
202      assert_eq!(
203        entries[0].tags().iter().find(|t| t.name() == "done").unwrap().value(),
204        Some("2024-03-17 15:00")
205      );
206    }
207
208    #[test]
209    fn it_parses_entry_with_tags_and_id() {
210      let content =
211        "Currently:\n\t- 2024-03-17 14:30 | My task @flag @done(2024-03-17 15:00) <aaaabbbbccccddddeeeeffffaaaabbbb>";
212      let doc = parse(content);
213
214      let entries: Vec<_> = doc.entries_in_section("Currently").collect();
215      assert_eq!(entries[0].title(), "My task");
216      assert!(entries[0].tags().has("flag"));
217      assert!(entries[0].tags().has("done"));
218      assert_eq!(entries[0].id(), "aaaabbbbccccddddeeeeffffaaaabbbb");
219    }
220
221    #[test]
222    fn it_parses_multiple_sections_with_entries() {
223      let content = "\
224Currently:
225\t- 2024-03-17 14:30 | Task A @coding
226\t- 2024-03-17 15:00 | Task B
227Archive:
228\t- 2024-03-16 10:00 | Old task @done(2024-03-16 11:00)";
229      let doc = parse(content);
230
231      assert_eq!(doc.len(), 2);
232      assert_eq!(doc.entries_in_section("Currently").count(), 2);
233      assert_eq!(doc.entries_in_section("Archive").count(), 1);
234    }
235
236    #[test]
237    fn it_parses_notes() {
238      let content = "Currently:\n\t- 2024-03-17 14:30 | Working on feature\n\t\tA note line\n\t\tAnother note";
239      let doc = parse(content);
240
241      let entries: Vec<_> = doc.entries_in_section("Currently").collect();
242      assert_eq!(entries[0].note().len(), 2);
243      assert_eq!(entries[0].note().lines(), &["A note line", "Another note"]);
244    }
245
246    #[test]
247    fn it_parses_section_headers() {
248      let content = "Currently:\nArchive:";
249      let doc = parse(content);
250
251      assert_eq!(doc.len(), 2);
252      let names: Vec<&str> = doc.sections().iter().map(|s| s.title()).collect();
253      assert_eq!(names, vec!["Currently", "Archive"]);
254    }
255
256    #[test]
257    fn it_preserves_other_content_top() {
258      let content = "# My Doing File\n\nCurrently:";
259      let doc = parse(content);
260
261      assert_eq!(doc.other_content_top(), &["# My Doing File", ""]);
262      assert!(doc.has_section("Currently"));
263    }
264
265    #[test]
266    fn it_generates_id_when_none_present() {
267      let content = "Currently:\n\t- 2024-03-17 14:30 | Working on feature";
268      let doc = parse(content);
269
270      let entries: Vec<_> = doc.entries_in_section("Currently").collect();
271      assert_eq!(entries[0].id().len(), 32);
272      assert!(entries[0].id().chars().all(|c| c.is_ascii_hexdigit()));
273    }
274
275    #[test]
276    fn it_round_trips_a_document() {
277      let content = "\
278Currently:
279\t- 2024-03-17 14:30 | Working on feature @coding <aaaabbbbccccddddeeeeffffaaaabbbb>
280\t\tA note about the work
281Archive:
282\t- 2024-03-16 10:00 | Old task @done(2024-03-16 11:00) <bbbbccccddddeeeeffffaaaabbbbcccc>";
283      let doc = parse(content);
284      let output = format!("{doc}");
285
286      assert_eq!(output, content);
287    }
288
289    #[test]
290    fn it_round_trips_with_other_content() {
291      let content = "\
292# My Doing File
293Currently:
294\t- 2024-03-17 14:30 | Task A <aaaabbbbccccddddeeeeffffaaaabbbb>";
295      let doc = parse(content);
296
297      assert_eq!(doc.other_content_top(), &["# My Doing File"]);
298
299      let output = format!("{doc}");
300
301      assert_eq!(
302        output,
303        "# My Doing File\n\nCurrently:\n\t- 2024-03-17 14:30 | Task A <aaaabbbbccccddddeeeeffffaaaabbbb>"
304      );
305    }
306
307    #[test]
308    fn it_merges_duplicate_section_headers() {
309      let content = "\
310Archive:
311\t- 2024-03-16 10:00 | Old task @done(2024-03-16 11:00)
312Archive:
313\t- 2024-03-17 09:00 | Another old task @done(2024-03-17 10:00)";
314      let doc = parse(content);
315
316      assert_eq!(doc.len(), 1);
317      assert_eq!(doc.entries_in_section("Archive").count(), 2);
318    }
319
320    #[test]
321    fn it_preserves_entries_with_dst_ambiguous_timestamps() {
322      // 2024-03-10 02:30 falls in the US spring-forward DST gap (2:00 AM → 3:00 AM).
323      // 2024-11-03 01:30 falls in the US fall-back DST fold (1:00 AM occurs twice).
324      // Regardless of the test machine's timezone, these entries must never be dropped.
325      let content = "\
326Currently:
327\t- 2024-03-10 02:30 | Spring forward task
328\t- 2024-11-03 01:30 | Fall back task
329\t- 2024-06-15 14:00 | Normal task";
330      let doc = parse(content);
331
332      let entries: Vec<_> = doc.entries_in_section("Currently").collect();
333      assert_eq!(entries.len(), 3);
334      assert_eq!(entries[0].title(), "Spring forward task");
335      assert_eq!(entries[1].title(), "Fall back task");
336      assert_eq!(entries[2].title(), "Normal task");
337    }
338
339    #[test]
340    fn it_preserves_inter_section_content_position() {
341      let content = "\
342Currently:
343\t- 2024-03-17 14:30 | Task A <aaaabbbbccccddddeeeeffffaaaabbbb>
344# A comment between sections
345Archive:
346\t- 2024-03-16 10:00 | Task B <bbbbccccddddeeeeffffaaaabbbbcccc>";
347      let doc = parse(content);
348
349      let currently = &doc.sections()[0];
350      assert_eq!(currently.trailing_content(), &["# A comment between sections"]);
351      assert!(doc.other_content_bottom().is_empty());
352    }
353
354    #[test]
355    fn it_round_trips_document_with_comments_between_sections() {
356      let content = "\
357Currently:
358\t- 2024-03-17 14:30 | Task A <aaaabbbbccccddddeeeeffffaaaabbbb>
359# A comment between sections
360Archive:
361\t- 2024-03-16 10:00 | Task B <bbbbccccddddeeeeffffaaaabbbbcccc>";
362      let doc = parse(content);
363      let output = format!("{doc}");
364
365      assert_eq!(output, content);
366    }
367
368    #[test]
369    fn it_only_puts_actual_bottom_content_in_other_content_bottom() {
370      let content = "\
371Currently:
372\t- 2024-03-17 14:30 | Task A <aaaabbbbccccddddeeeeffffaaaabbbb>
373Archive:
374\t- 2024-03-16 10:00 | Task B <bbbbccccddddeeeeffffaaaabbbbcccc>";
375      let doc = parse(content);
376
377      assert!(doc.other_content_bottom().is_empty());
378    }
379
380    #[test]
381    fn it_skips_malformed_lines_gracefully() {
382      let content = "Currently:\n\t- not a valid entry\n\t- 2024-03-17 14:30 | Valid task";
383      let doc = parse(content);
384
385      let entries: Vec<_> = doc.entries_in_section("Currently").collect();
386      assert_eq!(entries.len(), 1);
387      assert_eq!(entries[0].title(), "Valid task");
388    }
389  }
390
391  mod parse_tags {
392    use pretty_assertions::assert_eq;
393
394    use super::*;
395
396    #[test]
397    fn it_extracts_multiple_tags() {
398      let (title, tags) = parse_tags("My task @coding @important @done(2024-03-17 15:00)");
399
400      assert_eq!(title, "My task");
401      assert_eq!(tags.len(), 3);
402      assert!(tags.has("coding"));
403      assert!(tags.has("important"));
404      assert!(tags.has("done"));
405    }
406
407    #[test]
408    fn it_extracts_simple_tags() {
409      let (title, tags) = parse_tags("Working on feature @coding");
410
411      assert_eq!(title, "Working on feature");
412      assert_eq!(tags.len(), 1);
413      assert!(tags.has("coding"));
414    }
415
416    #[test]
417    fn it_extracts_tags_with_values() {
418      let (title, tags) = parse_tags("Task @done(2024-03-17 15:00)");
419
420      assert_eq!(title, "Task");
421      assert_eq!(tags.len(), 1);
422      assert_eq!(tags.iter().next().unwrap().value(), Some("2024-03-17 15:00"));
423    }
424
425    #[test]
426    fn it_handles_tags_in_middle_of_title() {
427      let (title, tags) = parse_tags("Start @flag end");
428
429      assert_eq!(title, "Start end");
430      assert!(tags.has("flag"));
431    }
432
433    #[test]
434    fn it_returns_empty_tags_for_no_tags() {
435      let (title, tags) = parse_tags("Just a plain title");
436
437      assert_eq!(title, "Just a plain title");
438      assert!(tags.is_empty());
439    }
440  }
441}