Skip to main content

todo_tree/
parser.rs

1use regex::{Regex, RegexBuilder};
2use std::path::Path;
3use todo_tree_core::{DEFAULT_REGEX, Priority, TodoItem};
4
5#[derive(Debug, Clone)]
6pub struct TodoParser {
7    pattern: Option<Regex>,
8    tags: Vec<String>,
9    case_sensitive: bool,
10}
11
12impl TodoParser {
13    pub fn new(tags: &[String], case_sensitive: bool) -> Self {
14        Self::with_options(tags, case_sensitive, true, None)
15    }
16
17    pub fn with_options(
18        tags: &[String],
19        case_sensitive: bool,
20        require_colon: bool,
21        custom_regex: Option<&str>,
22    ) -> Self {
23        let pattern = Self::build_pattern(tags, case_sensitive, require_colon, custom_regex);
24        Self {
25            pattern,
26            tags: tags.to_vec(),
27            case_sensitive,
28        }
29    }
30
31    fn build_pattern(
32        tags: &[String],
33        case_sensitive: bool,
34        require_colon: bool,
35        custom_regex: Option<&str>,
36    ) -> Option<Regex> {
37        if tags.is_empty() {
38            return None;
39        }
40
41        let escaped_tags: Vec<String> = tags.iter().map(|t| regex::escape(t)).collect();
42        let tags_alternation = escaped_tags.join("|");
43
44        let mut base_pattern = custom_regex.unwrap_or(DEFAULT_REGEX).to_string();
45        if custom_regex.is_none() && !require_colon {
46            base_pattern = base_pattern.replace(":(.*)", r"(?:\s*$|(?:(?::|\s+)(.*)))");
47        }
48
49        let pattern_string = base_pattern.replace("$TAGS", &tags_alternation);
50        let regex = RegexBuilder::new(&pattern_string)
51            .case_insensitive(!case_sensitive)
52            .multi_line(true)
53            .build()
54            .expect("Failed to build regex pattern");
55
56        Some(regex)
57    }
58
59    pub fn parse_line(&self, line: &str, line_number: usize) -> Option<TodoItem> {
60        let pattern = self.pattern.as_ref()?;
61        if let Some(captures) = pattern.captures(line) {
62            let tag_match = captures.get(2)?;
63            let author = captures.get(3).map(|m| m.as_str().to_string());
64            let message = captures
65                .get(4)
66                .map(|m| m.as_str().trim().to_string())
67                .unwrap_or_default();
68
69            let tag = tag_match.as_str().to_string();
70            let column = tag_match.start() + 1;
71
72            let normalized_tag = if self.case_sensitive {
73                tag
74            } else {
75                self.tags
76                    .iter()
77                    .find(|t| t.eq_ignore_ascii_case(&tag))
78                    .cloned()
79                    .unwrap_or(tag)
80            };
81
82            let priority = Priority::from_tag(&normalized_tag);
83
84            return Some(TodoItem {
85                tag: normalized_tag,
86                message,
87                line: line_number,
88                column,
89                line_content: Some(line.to_string()),
90                author,
91                priority,
92            });
93        }
94
95        None
96    }
97
98    pub fn parse_content(&self, content: &str) -> Vec<TodoItem> {
99        content
100            .lines()
101            .enumerate()
102            .filter_map(|(idx, line)| self.parse_line(line, idx + 1))
103            .collect()
104    }
105
106    pub fn parse_file(&self, path: &Path) -> std::io::Result<Vec<TodoItem>> {
107        let content = std::fs::read_to_string(path)?;
108        Ok(self.parse_content(&content))
109    }
110
111    pub fn tags(&self) -> &[String] {
112        &self.tags
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119    use std::fs;
120    use std::time::{SystemTime, UNIX_EPOCH};
121
122    fn tags() -> Vec<String> {
123        vec!["TODO".to_string(), "FIXME".to_string(), "BUG".to_string()]
124    }
125
126    fn custom_parser(tags: &[String], case_sensitive: bool) -> TodoParser {
127        // Capture layout must match parse_line():
128        // 1 = prefix
129        // 2 = tag
130        // 3 = author
131        // 4 = message
132        TodoParser::with_options(
133            tags,
134            case_sensitive,
135            true,
136            Some(r"(^|\s)($TAGS)(?:\(([^)]+)\))?(?::(.*))?$"),
137        )
138    }
139
140    #[test]
141    fn new_uses_default_options() {
142        let parser = TodoParser::new(&tags(), true);
143        assert_eq!(parser.tags(), &tags());
144        assert!(parser.pattern.is_some());
145    }
146
147    #[test]
148    fn empty_tags_disable_parsing() {
149        let parser = TodoParser::new(&[], true);
150
151        assert!(parser.pattern.is_none());
152        assert!(parser.parse_line("// TODO: message", 1).is_none());
153        assert!(parser.parse_content("// TODO: message").is_empty());
154    }
155
156    #[test]
157    fn parse_line_with_custom_regex_extracts_basic_fields() {
158        let parser = custom_parser(&tags(), true);
159        let item = parser
160            .parse_line(" TODO: write more tests", 7)
161            .expect("expected TODO item");
162
163        assert_eq!(item.tag, "TODO");
164        assert_eq!(item.message, "write more tests");
165        assert_eq!(item.author, None);
166        assert_eq!(item.line, 7);
167        assert_eq!(item.column, 2);
168        assert_eq!(
169            item.line_content.as_deref(),
170            Some(" TODO: write more tests")
171        );
172        assert_eq!(item.priority, Priority::from_tag("TODO"));
173    }
174
175    #[test]
176    fn parse_line_with_custom_regex_extracts_author() {
177        let parser = custom_parser(&tags(), true);
178        let item = parser
179            .parse_line(" FIXME(alice): handle edge case", 3)
180            .expect("expected FIXME item");
181
182        assert_eq!(item.tag, "FIXME");
183        assert_eq!(item.author.as_deref(), Some("alice"));
184        assert_eq!(item.message, "handle edge case");
185        assert_eq!(item.line, 3);
186        assert_eq!(item.column, 2);
187        assert_eq!(item.priority, Priority::from_tag("FIXME"));
188    }
189
190    #[test]
191    fn parse_line_trims_message() {
192        let parser = custom_parser(&tags(), true);
193        let item = parser
194            .parse_line(" TODO:   message with spaces   ", 1)
195            .expect("expected TODO item");
196
197        assert_eq!(item.message, "message with spaces");
198    }
199
200    #[test]
201    fn case_sensitive_parser_rejects_wrong_case() {
202        let parser = custom_parser(&tags(), true);
203
204        assert!(parser.parse_line(" todo: lower-case tag", 1).is_none());
205        assert!(parser.parse_line(" TODO: upper-case tag", 1).is_some());
206    }
207
208    #[test]
209    fn case_insensitive_parser_accepts_and_normalizes_tag() {
210        let parser = custom_parser(&tags(), false);
211        let item = parser
212            .parse_line(" todo: lower-case tag", 1)
213            .expect("expected TODO item");
214
215        // In case-insensitive mode, the tag should be normalized back
216        // to the configured spelling from self.tags.
217        assert_eq!(item.tag, "TODO");
218        assert_eq!(item.message, "lower-case tag");
219        assert_eq!(item.priority, Priority::from_tag("TODO"));
220    }
221
222    #[test]
223    fn case_insensitive_parser_uses_first_matching_configured_tag_spelling() {
224        let tags = vec!["ToDo".to_string(), "FixMe".to_string()];
225        let parser = custom_parser(&tags, false);
226
227        let item = parser
228            .parse_line(" todo: mixed case normalization", 1)
229            .expect("expected ToDo item");
230
231        assert_eq!(item.tag, "ToDo");
232        assert_eq!(item.priority, Priority::from_tag("ToDo"));
233    }
234
235    #[test]
236    fn parse_content_collects_multiple_items_with_correct_line_numbers() {
237        let parser = custom_parser(&tags(), false);
238        let content = "\
239first line
240 TODO: first task
241nothing here
242 fixme(bob): second task
243 BUG: third task";
244
245        let items = parser.parse_content(content);
246
247        assert_eq!(items.len(), 3);
248
249        assert_eq!(items[0].tag, "TODO");
250        assert_eq!(items[0].message, "first task");
251        assert_eq!(items[0].line, 2);
252
253        assert_eq!(items[1].tag, "FIXME");
254        assert_eq!(items[1].author.as_deref(), Some("bob"));
255        assert_eq!(items[1].message, "second task");
256        assert_eq!(items[1].line, 4);
257
258        assert_eq!(items[2].tag, "BUG");
259        assert_eq!(items[2].message, "third task");
260        assert_eq!(items[2].line, 5);
261    }
262
263    #[test]
264    fn parse_file_reads_and_parses_content() {
265        let parser = custom_parser(&tags(), false);
266
267        let unique = SystemTime::now()
268            .duration_since(UNIX_EPOCH)
269            .unwrap()
270            .as_nanos();
271        let path = std::env::temp_dir().join(format!("todo_parser_test_{unique}.txt"));
272
273        fs::write(
274            &path,
275            "\
276ignore
277 TODO: from file
278 FIXME(jane): also from file",
279        )
280        .unwrap();
281
282        let items = parser.parse_file(&path).unwrap();
283        let _ = fs::remove_file(&path);
284
285        assert_eq!(items.len(), 2);
286
287        assert_eq!(items[0].tag, "TODO");
288        assert_eq!(items[0].message, "from file");
289        assert_eq!(items[0].line, 2);
290
291        assert_eq!(items[1].tag, "FIXME");
292        assert_eq!(items[1].author.as_deref(), Some("jane"));
293        assert_eq!(items[1].message, "also from file");
294        assert_eq!(items[1].line, 3);
295    }
296
297    #[test]
298    fn require_colon_true_does_not_match_default_pattern_without_colon() {
299        let parser = TodoParser::with_options(&tags(), false, true, None);
300
301        assert!(parser.parse_line("// TODO missing colon", 1).is_none());
302        assert!(parser.parse_line("// TODO: has colon", 1).is_some());
303    }
304
305    #[test]
306    fn require_colon_false_matches_default_pattern_with_or_without_colon() {
307        let parser = TodoParser::with_options(&tags(), false, false, None);
308
309        let with_colon = parser.parse_line("// TODO: with colon", 1);
310        let with_space = parser.parse_line("// TODO with space", 2);
311        let bare_tag = parser.parse_line("// TODO", 3);
312
313        assert!(with_colon.is_some(), "should match with colon");
314        assert!(
315            with_space.is_some(),
316            "should match with space when colon is optional"
317        );
318        assert!(
319            bare_tag.is_some(),
320            "should match bare tag when colon is optional"
321        );
322
323        let with_space = with_space.unwrap();
324        assert_eq!(with_space.tag, "TODO");
325        assert_eq!(with_space.message, "with space");
326
327        let bare_tag = bare_tag.unwrap();
328        assert_eq!(bare_tag.tag, "TODO");
329        assert_eq!(bare_tag.message, "");
330    }
331
332    #[test]
333    fn require_colon_false_rejects_false_positives() {
334        let parser = TodoParser::with_options(&tags(), false, false, None);
335
336        assert!(
337            parser.parse_line("// TODO.complete()", 4).is_none(),
338            "tag followed by '.' must not match"
339        );
340        assert!(
341            parser.parse_line("// todoList", 5).is_none(),
342            "tag embedded in a word must not match"
343        );
344    }
345
346    #[test]
347    fn require_colon_false_documents_double_colon_behavior() {
348        let parser = TodoParser::with_options(&tags(), false, false, None);
349
350        let item = parser
351            .parse_line("* TODO::module::fn", 6)
352            .expect("double-colon form should match current default regex behavior");
353
354        assert_eq!(item.tag, "TODO");
355        assert_eq!(item.message, ":module::fn");
356    }
357
358    #[test]
359    fn custom_regex_can_support_non_default_syntax() {
360        let tags = vec!["TODO".to_string(), "FIXME".to_string()];
361        let parser = TodoParser::with_options(
362            &tags,
363            false,
364            true,
365            // Matches e.g. "[TODO]{alice}: message"
366            // 1 = prefix
367            // 2 = tag
368            // 3 = author
369            // 4 = message
370            Some(r"(^|\s)\[($TAGS)\](?:\{([^}]+)\})?:(.*)$"),
371        );
372
373        let item = parser
374            .parse_line("[todo]{alice}: custom format works", 10)
375            .expect("expected custom format to match");
376
377        assert_eq!(item.tag, "TODO");
378        assert_eq!(item.author.as_deref(), Some("alice"));
379        assert_eq!(item.message, "custom format works");
380        assert_eq!(item.line, 10);
381        assert_eq!(item.priority, Priority::from_tag("TODO"));
382    }
383
384    #[test]
385    fn default_regex_smoke_test_common_comment_styles() {
386        let parser = TodoParser::with_options(&tags(), false, true, None);
387
388        let slash = parser.parse_line("// TODO: implement feature", 1);
389        let hash = parser.parse_line("# FIXME: fix the bug", 2);
390
391        assert!(slash.is_some(), "default regex should match // TODO: ...");
392        assert!(hash.is_some(), "default regex should match # FIXME: ...");
393
394        let slash = slash.unwrap();
395        assert_eq!(slash.tag, "TODO");
396        assert_eq!(slash.message, "implement feature");
397
398        let hash = hash.unwrap();
399        assert_eq!(hash.tag, "FIXME");
400        assert_eq!(hash.message, "fix the bug");
401    }
402
403    #[test]
404    fn tags_accessor_returns_configured_tags() {
405        let tags = vec!["TODO".to_string(), "FIXME".to_string()];
406        let parser = TodoParser::new(&tags, true);
407
408        assert_eq!(parser.tags(), &tags);
409    }
410}