1use std::sync::LazyLock;
2
3use chrono::{Local, NaiveDateTime, TimeZone};
4use regex::Regex;
5
6use crate::{Document, Entry, Note, Section, Tag, Tags};
7
8pub const DEFAULT_SECTION: &str = "Uncategorized";
10
11static ENTRY_RX: LazyLock<Regex> =
12 LazyLock::new(|| Regex::new(r"^\t- (\d{4}-\d{2}-\d{2} \d{2}:\d{2}) \| (.*?)(?:\s+<([a-f0-9]{32})>)?\s*$").unwrap());
13static SECTION_RX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\S[\S ]+):\s*$").unwrap());
14static TAG_RX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?:^| )(@([^\s(]+)(?:\(([^)]+)\))?)").unwrap());
15
16fn flush_entry(current_section: &mut Option<Section>, current_entry: &mut Option<(Entry, Vec<String>)>) {
18 if let Some((mut entry, note_lines)) = current_entry.take() {
19 if !note_lines.is_empty() {
20 *entry.note_mut() = Note::from_lines(note_lines);
21 }
22 if let Some(section) = current_section.as_mut() {
23 section.add_entry(entry);
24 }
25 }
26}
27
28fn flush_section(doc: &mut Document, current_section: &mut Option<Section>) {
30 if let Some(section) = current_section.take() {
31 doc.add_section(section);
32 }
33}
34
35pub fn parse(content: &str) -> Document {
40 let mut doc = Document::new();
41 let mut current_section: Option<Section> = None;
42 let mut current_entry: Option<(Entry, Vec<String>)> = None;
43 let mut found_first_section = false;
44
45 for line in content.lines() {
46 if let Some(caps) = SECTION_RX.captures(line) {
47 flush_entry(&mut current_section, &mut current_entry);
48 flush_section(&mut doc, &mut current_section);
49 found_first_section = true;
50 current_section = Some(Section::new(&caps[1]));
51 continue;
52 }
53
54 if let Some(caps) = ENTRY_RX.captures(line) {
55 flush_entry(&mut current_section, &mut current_entry);
56
57 if !found_first_section {
58 found_first_section = true;
59 current_section = Some(Section::new(DEFAULT_SECTION));
60 }
61
62 let date_str = &caps[1];
63 let raw_title = caps[2].trim();
64 let id = caps.get(3).map(|m| m.as_str());
65
66 let section_name = current_section
67 .as_ref()
68 .map(|s| s.title().to_string())
69 .unwrap_or_default();
70
71 if let Ok(naive) = NaiveDateTime::parse_from_str(date_str, "%Y-%m-%d %H:%M")
72 && let Some(date) = Local
73 .from_local_datetime(&naive)
74 .single()
75 .or_else(|| Local.from_local_datetime(&naive).earliest())
76 .or_else(|| Local.from_local_datetime(&naive).latest())
77 .or_else(|| Some(naive.and_utc().with_timezone(&Local)))
78 {
79 let (title, tags) = parse_tags(raw_title);
80 let entry = Entry::new(date, title, tags, Note::new(), §ion_name, id);
81 current_entry = Some((entry, Vec::new()));
82 }
83 continue;
84 }
85
86 if let Some(note_text) = line.strip_prefix("\t\t") {
87 if let Some(ref mut entry) = current_entry {
88 entry.1.push(note_text.to_string());
89 }
90 continue;
91 }
92
93 if !found_first_section {
94 doc.other_content_top_mut().push(line.to_string());
95 } else if let Some(section) = current_section.as_mut() {
96 section.trailing_content_mut().push(line.to_string());
97 } else {
98 doc.other_content_bottom_mut().push(line.to_string());
99 }
100 }
101
102 flush_entry(&mut current_section, &mut current_entry);
103 flush_section(&mut doc, &mut current_section);
104
105 doc
106}
107
108fn parse_tags(title: &str) -> (String, Tags) {
110 let mut tags = Vec::new();
111
112 let mut tag_ranges: Vec<(usize, usize)> = Vec::new();
114 for caps in TAG_RX.captures_iter(title) {
115 let m = caps.get(1).unwrap();
116 tag_ranges.push((m.start(), m.end()));
117 let name = &caps[2];
118 let value = caps.get(3).map(|m| m.as_str().to_string());
119 tags.push(Tag::new(name, value));
120 }
121
122 let mut clean = String::with_capacity(title.len());
124 let mut pos = 0;
125 for (start, end) in &tag_ranges {
126 clean.push_str(&title[pos..*start]);
127 pos = *end;
128 }
129 clean.push_str(&title[pos..]);
130
131 let clean_title = clean.split_whitespace().collect::<Vec<_>>().join(" ");
132 (clean_title, Tags::from_iter(tags))
133}
134
135#[cfg(test)]
136mod test {
137 use super::*;
138
139 mod parse {
140 use chrono::TimeZone;
141 use pretty_assertions::assert_eq;
142
143 use super::*;
144
145 #[test]
146 fn it_handles_entries_without_section() {
147 let content = "\t- 2024-03-17 14:30 | Orphan task";
148 let doc = parse(content);
149
150 assert!(doc.has_section(DEFAULT_SECTION));
151 assert_eq!(doc.entries_in_section(DEFAULT_SECTION).count(), 1);
152 }
153
154 #[test]
155 fn it_parses_empty_content() {
156 let doc = parse("");
157
158 assert!(doc.is_empty());
159 }
160
161 #[test]
162 fn it_parses_empty_sections() {
163 let content = "Currently:\nArchive:";
164 let doc = parse(content);
165
166 assert_eq!(doc.entries_in_section("Currently").count(), 0);
167 assert_eq!(doc.entries_in_section("Archive").count(), 0);
168 }
169
170 #[test]
171 fn it_parses_entries_with_dates_and_titles() {
172 let content = "Currently:\n\t- 2024-03-17 14:30 | Working on feature";
173 let doc = parse(content);
174
175 let entries: Vec<_> = doc.entries_in_section("Currently").collect();
176 assert_eq!(entries.len(), 1);
177 assert_eq!(entries[0].title(), "Working on feature");
178 assert_eq!(
179 entries[0].date(),
180 Local.with_ymd_and_hms(2024, 3, 17, 14, 30, 0).unwrap()
181 );
182 }
183
184 #[test]
185 fn it_parses_entries_with_ids() {
186 let content = "Currently:\n\t- 2024-03-17 14:30 | Working on feature <aaaabbbbccccddddeeeeffffaaaabbbb>";
187 let doc = parse(content);
188
189 let entries: Vec<_> = doc.entries_in_section("Currently").collect();
190 assert_eq!(entries[0].id(), "aaaabbbbccccddddeeeeffffaaaabbbb");
191 }
192
193 #[test]
194 fn it_parses_entries_with_tags() {
195 let content = "Currently:\n\t- 2024-03-17 14:30 | Working on feature @coding @done(2024-03-17 15:00)";
196 let doc = parse(content);
197
198 let entries: Vec<_> = doc.entries_in_section("Currently").collect();
199 assert_eq!(entries[0].title(), "Working on feature");
200 assert!(entries[0].tags().has("coding"));
201 assert!(entries[0].tags().has("done"));
202 assert_eq!(
203 entries[0].tags().iter().find(|t| t.name() == "done").unwrap().value(),
204 Some("2024-03-17 15:00")
205 );
206 }
207
208 #[test]
209 fn it_parses_entry_with_tags_and_id() {
210 let content =
211 "Currently:\n\t- 2024-03-17 14:30 | My task @flag @done(2024-03-17 15:00) <aaaabbbbccccddddeeeeffffaaaabbbb>";
212 let doc = parse(content);
213
214 let entries: Vec<_> = doc.entries_in_section("Currently").collect();
215 assert_eq!(entries[0].title(), "My task");
216 assert!(entries[0].tags().has("flag"));
217 assert!(entries[0].tags().has("done"));
218 assert_eq!(entries[0].id(), "aaaabbbbccccddddeeeeffffaaaabbbb");
219 }
220
221 #[test]
222 fn it_parses_multiple_sections_with_entries() {
223 let content = "\
224Currently:
225\t- 2024-03-17 14:30 | Task A @coding
226\t- 2024-03-17 15:00 | Task B
227Archive:
228\t- 2024-03-16 10:00 | Old task @done(2024-03-16 11:00)";
229 let doc = parse(content);
230
231 assert_eq!(doc.len(), 2);
232 assert_eq!(doc.entries_in_section("Currently").count(), 2);
233 assert_eq!(doc.entries_in_section("Archive").count(), 1);
234 }
235
236 #[test]
237 fn it_parses_notes() {
238 let content = "Currently:\n\t- 2024-03-17 14:30 | Working on feature\n\t\tA note line\n\t\tAnother note";
239 let doc = parse(content);
240
241 let entries: Vec<_> = doc.entries_in_section("Currently").collect();
242 assert_eq!(entries[0].note().len(), 2);
243 assert_eq!(entries[0].note().lines(), &["A note line", "Another note"]);
244 }
245
246 #[test]
247 fn it_parses_section_headers() {
248 let content = "Currently:\nArchive:";
249 let doc = parse(content);
250
251 assert_eq!(doc.len(), 2);
252 let names: Vec<&str> = doc.sections().iter().map(|s| s.title()).collect();
253 assert_eq!(names, vec!["Currently", "Archive"]);
254 }
255
256 #[test]
257 fn it_preserves_other_content_top() {
258 let content = "# My Doing File\n\nCurrently:";
259 let doc = parse(content);
260
261 assert_eq!(doc.other_content_top(), &["# My Doing File", ""]);
262 assert!(doc.has_section("Currently"));
263 }
264
265 #[test]
266 fn it_generates_id_when_none_present() {
267 let content = "Currently:\n\t- 2024-03-17 14:30 | Working on feature";
268 let doc = parse(content);
269
270 let entries: Vec<_> = doc.entries_in_section("Currently").collect();
271 assert_eq!(entries[0].id().len(), 32);
272 assert!(entries[0].id().chars().all(|c| c.is_ascii_hexdigit()));
273 }
274
275 #[test]
276 fn it_round_trips_a_document() {
277 let content = "\
278Currently:
279\t- 2024-03-17 14:30 | Working on feature @coding <aaaabbbbccccddddeeeeffffaaaabbbb>
280\t\tA note about the work
281Archive:
282\t- 2024-03-16 10:00 | Old task @done(2024-03-16 11:00) <bbbbccccddddeeeeffffaaaabbbbcccc>";
283 let doc = parse(content);
284 let output = format!("{doc}");
285
286 assert_eq!(output, content);
287 }
288
289 #[test]
290 fn it_round_trips_with_other_content() {
291 let content = "\
292# My Doing File
293Currently:
294\t- 2024-03-17 14:30 | Task A <aaaabbbbccccddddeeeeffffaaaabbbb>";
295 let doc = parse(content);
296
297 assert_eq!(doc.other_content_top(), &["# My Doing File"]);
298
299 let output = format!("{doc}");
300
301 assert_eq!(
302 output,
303 "# My Doing File\n\nCurrently:\n\t- 2024-03-17 14:30 | Task A <aaaabbbbccccddddeeeeffffaaaabbbb>"
304 );
305 }
306
307 #[test]
308 fn it_merges_duplicate_section_headers() {
309 let content = "\
310Archive:
311\t- 2024-03-16 10:00 | Old task @done(2024-03-16 11:00)
312Archive:
313\t- 2024-03-17 09:00 | Another old task @done(2024-03-17 10:00)";
314 let doc = parse(content);
315
316 assert_eq!(doc.len(), 1);
317 assert_eq!(doc.entries_in_section("Archive").count(), 2);
318 }
319
320 #[test]
321 fn it_preserves_entries_with_dst_ambiguous_timestamps() {
322 let content = "\
326Currently:
327\t- 2024-03-10 02:30 | Spring forward task
328\t- 2024-11-03 01:30 | Fall back task
329\t- 2024-06-15 14:00 | Normal task";
330 let doc = parse(content);
331
332 let entries: Vec<_> = doc.entries_in_section("Currently").collect();
333 assert_eq!(entries.len(), 3);
334 assert_eq!(entries[0].title(), "Spring forward task");
335 assert_eq!(entries[1].title(), "Fall back task");
336 assert_eq!(entries[2].title(), "Normal task");
337 }
338
339 #[test]
340 fn it_preserves_inter_section_content_position() {
341 let content = "\
342Currently:
343\t- 2024-03-17 14:30 | Task A <aaaabbbbccccddddeeeeffffaaaabbbb>
344# A comment between sections
345Archive:
346\t- 2024-03-16 10:00 | Task B <bbbbccccddddeeeeffffaaaabbbbcccc>";
347 let doc = parse(content);
348
349 let currently = &doc.sections()[0];
350 assert_eq!(currently.trailing_content(), &["# A comment between sections"]);
351 assert!(doc.other_content_bottom().is_empty());
352 }
353
354 #[test]
355 fn it_round_trips_document_with_comments_between_sections() {
356 let content = "\
357Currently:
358\t- 2024-03-17 14:30 | Task A <aaaabbbbccccddddeeeeffffaaaabbbb>
359# A comment between sections
360Archive:
361\t- 2024-03-16 10:00 | Task B <bbbbccccddddeeeeffffaaaabbbbcccc>";
362 let doc = parse(content);
363 let output = format!("{doc}");
364
365 assert_eq!(output, content);
366 }
367
368 #[test]
369 fn it_only_puts_actual_bottom_content_in_other_content_bottom() {
370 let content = "\
371Currently:
372\t- 2024-03-17 14:30 | Task A <aaaabbbbccccddddeeeeffffaaaabbbb>
373Archive:
374\t- 2024-03-16 10:00 | Task B <bbbbccccddddeeeeffffaaaabbbbcccc>";
375 let doc = parse(content);
376
377 assert!(doc.other_content_bottom().is_empty());
378 }
379
380 #[test]
381 fn it_skips_malformed_lines_gracefully() {
382 let content = "Currently:\n\t- not a valid entry\n\t- 2024-03-17 14:30 | Valid task";
383 let doc = parse(content);
384
385 let entries: Vec<_> = doc.entries_in_section("Currently").collect();
386 assert_eq!(entries.len(), 1);
387 assert_eq!(entries[0].title(), "Valid task");
388 }
389 }
390
391 mod parse_tags {
392 use pretty_assertions::assert_eq;
393
394 use super::*;
395
396 #[test]
397 fn it_extracts_multiple_tags() {
398 let (title, tags) = parse_tags("My task @coding @important @done(2024-03-17 15:00)");
399
400 assert_eq!(title, "My task");
401 assert_eq!(tags.len(), 3);
402 assert!(tags.has("coding"));
403 assert!(tags.has("important"));
404 assert!(tags.has("done"));
405 }
406
407 #[test]
408 fn it_extracts_simple_tags() {
409 let (title, tags) = parse_tags("Working on feature @coding");
410
411 assert_eq!(title, "Working on feature");
412 assert_eq!(tags.len(), 1);
413 assert!(tags.has("coding"));
414 }
415
416 #[test]
417 fn it_extracts_tags_with_values() {
418 let (title, tags) = parse_tags("Task @done(2024-03-17 15:00)");
419
420 assert_eq!(title, "Task");
421 assert_eq!(tags.len(), 1);
422 assert_eq!(tags.iter().next().unwrap().value(), Some("2024-03-17 15:00"));
423 }
424
425 #[test]
426 fn it_handles_tags_in_middle_of_title() {
427 let (title, tags) = parse_tags("Start @flag end");
428
429 assert_eq!(title, "Start end");
430 assert!(tags.has("flag"));
431 }
432
433 #[test]
434 fn it_returns_empty_tags_for_no_tags() {
435 let (title, tags) = parse_tags("Just a plain title");
436
437 assert_eq!(title, "Just a plain title");
438 assert!(tags.is_empty());
439 }
440 }
441}