1use regex::{Regex, RegexBuilder};
2use std::path::Path;
3use todo_tree_core::{DEFAULT_REGEX, Priority, TodoItem};
4
5#[derive(Debug, Clone)]
6pub struct TodoParser {
7 pattern: Option<Regex>,
8 tags: Vec<String>,
9 case_sensitive: bool,
10}
11
12impl TodoParser {
13 pub fn new(tags: &[String], case_sensitive: bool) -> Self {
14 Self::with_options(tags, case_sensitive, true, None)
15 }
16
17 pub fn with_options(
18 tags: &[String],
19 case_sensitive: bool,
20 require_colon: bool,
21 custom_regex: Option<&str>,
22 ) -> Self {
23 let pattern = Self::build_pattern(tags, case_sensitive, require_colon, custom_regex);
24 Self {
25 pattern,
26 tags: tags.to_vec(),
27 case_sensitive,
28 }
29 }
30
31 fn build_pattern(
32 tags: &[String],
33 case_sensitive: bool,
34 require_colon: bool,
35 custom_regex: Option<&str>,
36 ) -> Option<Regex> {
37 if tags.is_empty() {
38 return None;
39 }
40
41 let escaped_tags: Vec<String> = tags.iter().map(|t| regex::escape(t)).collect();
42 let tags_alternation = escaped_tags.join("|");
43
44 let mut base_pattern = custom_regex.unwrap_or(DEFAULT_REGEX).to_string();
45 if custom_regex.is_none() && !require_colon {
46 base_pattern = base_pattern.replace(":(.*)", r"(?:\s*$|(?:(?::|\s+)(.*)))");
47 }
48
49 let pattern_string = base_pattern.replace("$TAGS", &tags_alternation);
50 let regex = RegexBuilder::new(&pattern_string)
51 .case_insensitive(!case_sensitive)
52 .multi_line(true)
53 .build()
54 .expect("Failed to build regex pattern");
55
56 Some(regex)
57 }
58
59 pub fn parse_line(&self, line: &str, line_number: usize) -> Option<TodoItem> {
60 let pattern = self.pattern.as_ref()?;
61 if let Some(captures) = pattern.captures(line) {
62 let tag_match = captures.get(2)?;
63 let author = captures.get(3).map(|m| m.as_str().to_string());
64 let message = captures
65 .get(4)
66 .map(|m| m.as_str().trim().to_string())
67 .unwrap_or_default();
68
69 let tag = tag_match.as_str().to_string();
70 let column = tag_match.start() + 1;
71
72 let normalized_tag = if self.case_sensitive {
73 tag
74 } else {
75 self.tags
76 .iter()
77 .find(|t| t.eq_ignore_ascii_case(&tag))
78 .cloned()
79 .unwrap_or(tag)
80 };
81
82 let priority = Priority::from_tag(&normalized_tag);
83
84 return Some(TodoItem {
85 tag: normalized_tag,
86 message,
87 line: line_number,
88 column,
89 line_content: Some(line.to_string()),
90 author,
91 priority,
92 });
93 }
94
95 None
96 }
97
98 pub fn parse_content(&self, content: &str) -> Vec<TodoItem> {
99 content
100 .lines()
101 .enumerate()
102 .filter_map(|(idx, line)| self.parse_line(line, idx + 1))
103 .collect()
104 }
105
106 pub fn parse_file(&self, path: &Path) -> std::io::Result<Vec<TodoItem>> {
107 let content = std::fs::read_to_string(path)?;
108 Ok(self.parse_content(&content))
109 }
110
111 pub fn tags(&self) -> &[String] {
112 &self.tags
113 }
114}
115
116#[cfg(test)]
117mod tests {
118 use super::*;
119 use std::fs;
120 use std::time::{SystemTime, UNIX_EPOCH};
121
122 fn tags() -> Vec<String> {
123 vec!["TODO".to_string(), "FIXME".to_string(), "BUG".to_string()]
124 }
125
126 fn custom_parser(tags: &[String], case_sensitive: bool) -> TodoParser {
127 TodoParser::with_options(
133 tags,
134 case_sensitive,
135 true,
136 Some(r"(^|\s)($TAGS)(?:\(([^)]+)\))?(?::(.*))?$"),
137 )
138 }
139
140 #[test]
141 fn new_uses_default_options() {
142 let parser = TodoParser::new(&tags(), true);
143 assert_eq!(parser.tags(), &tags());
144 assert!(parser.pattern.is_some());
145 }
146
147 #[test]
148 fn empty_tags_disable_parsing() {
149 let parser = TodoParser::new(&[], true);
150
151 assert!(parser.pattern.is_none());
152 assert!(parser.parse_line("// TODO: message", 1).is_none());
153 assert!(parser.parse_content("// TODO: message").is_empty());
154 }
155
156 #[test]
157 fn parse_line_with_custom_regex_extracts_basic_fields() {
158 let parser = custom_parser(&tags(), true);
159 let item = parser
160 .parse_line(" TODO: write more tests", 7)
161 .expect("expected TODO item");
162
163 assert_eq!(item.tag, "TODO");
164 assert_eq!(item.message, "write more tests");
165 assert_eq!(item.author, None);
166 assert_eq!(item.line, 7);
167 assert_eq!(item.column, 2);
168 assert_eq!(
169 item.line_content.as_deref(),
170 Some(" TODO: write more tests")
171 );
172 assert_eq!(item.priority, Priority::from_tag("TODO"));
173 }
174
175 #[test]
176 fn parse_line_with_custom_regex_extracts_author() {
177 let parser = custom_parser(&tags(), true);
178 let item = parser
179 .parse_line(" FIXME(alice): handle edge case", 3)
180 .expect("expected FIXME item");
181
182 assert_eq!(item.tag, "FIXME");
183 assert_eq!(item.author.as_deref(), Some("alice"));
184 assert_eq!(item.message, "handle edge case");
185 assert_eq!(item.line, 3);
186 assert_eq!(item.column, 2);
187 assert_eq!(item.priority, Priority::from_tag("FIXME"));
188 }
189
190 #[test]
191 fn parse_line_trims_message() {
192 let parser = custom_parser(&tags(), true);
193 let item = parser
194 .parse_line(" TODO: message with spaces ", 1)
195 .expect("expected TODO item");
196
197 assert_eq!(item.message, "message with spaces");
198 }
199
200 #[test]
201 fn case_sensitive_parser_rejects_wrong_case() {
202 let parser = custom_parser(&tags(), true);
203
204 assert!(parser.parse_line(" todo: lower-case tag", 1).is_none());
205 assert!(parser.parse_line(" TODO: upper-case tag", 1).is_some());
206 }
207
208 #[test]
209 fn case_insensitive_parser_accepts_and_normalizes_tag() {
210 let parser = custom_parser(&tags(), false);
211 let item = parser
212 .parse_line(" todo: lower-case tag", 1)
213 .expect("expected TODO item");
214
215 assert_eq!(item.tag, "TODO");
218 assert_eq!(item.message, "lower-case tag");
219 assert_eq!(item.priority, Priority::from_tag("TODO"));
220 }
221
222 #[test]
223 fn case_insensitive_parser_uses_first_matching_configured_tag_spelling() {
224 let tags = vec!["ToDo".to_string(), "FixMe".to_string()];
225 let parser = custom_parser(&tags, false);
226
227 let item = parser
228 .parse_line(" todo: mixed case normalization", 1)
229 .expect("expected ToDo item");
230
231 assert_eq!(item.tag, "ToDo");
232 assert_eq!(item.priority, Priority::from_tag("ToDo"));
233 }
234
235 #[test]
236 fn parse_content_collects_multiple_items_with_correct_line_numbers() {
237 let parser = custom_parser(&tags(), false);
238 let content = "\
239first line
240 TODO: first task
241nothing here
242 fixme(bob): second task
243 BUG: third task";
244
245 let items = parser.parse_content(content);
246
247 assert_eq!(items.len(), 3);
248
249 assert_eq!(items[0].tag, "TODO");
250 assert_eq!(items[0].message, "first task");
251 assert_eq!(items[0].line, 2);
252
253 assert_eq!(items[1].tag, "FIXME");
254 assert_eq!(items[1].author.as_deref(), Some("bob"));
255 assert_eq!(items[1].message, "second task");
256 assert_eq!(items[1].line, 4);
257
258 assert_eq!(items[2].tag, "BUG");
259 assert_eq!(items[2].message, "third task");
260 assert_eq!(items[2].line, 5);
261 }
262
263 #[test]
264 fn parse_file_reads_and_parses_content() {
265 let parser = custom_parser(&tags(), false);
266
267 let unique = SystemTime::now()
268 .duration_since(UNIX_EPOCH)
269 .unwrap()
270 .as_nanos();
271 let path = std::env::temp_dir().join(format!("todo_parser_test_{unique}.txt"));
272
273 fs::write(
274 &path,
275 "\
276ignore
277 TODO: from file
278 FIXME(jane): also from file",
279 )
280 .unwrap();
281
282 let items = parser.parse_file(&path).unwrap();
283 let _ = fs::remove_file(&path);
284
285 assert_eq!(items.len(), 2);
286
287 assert_eq!(items[0].tag, "TODO");
288 assert_eq!(items[0].message, "from file");
289 assert_eq!(items[0].line, 2);
290
291 assert_eq!(items[1].tag, "FIXME");
292 assert_eq!(items[1].author.as_deref(), Some("jane"));
293 assert_eq!(items[1].message, "also from file");
294 assert_eq!(items[1].line, 3);
295 }
296
297 #[test]
298 fn require_colon_true_does_not_match_default_pattern_without_colon() {
299 let parser = TodoParser::with_options(&tags(), false, true, None);
300
301 assert!(parser.parse_line("// TODO missing colon", 1).is_none());
302 assert!(parser.parse_line("// TODO: has colon", 1).is_some());
303 }
304
305 #[test]
306 fn require_colon_false_matches_default_pattern_with_or_without_colon() {
307 let parser = TodoParser::with_options(&tags(), false, false, None);
308
309 let with_colon = parser.parse_line("// TODO: with colon", 1);
310 let with_space = parser.parse_line("// TODO with space", 2);
311 let bare_tag = parser.parse_line("// TODO", 3);
312
313 assert!(with_colon.is_some(), "should match with colon");
314 assert!(
315 with_space.is_some(),
316 "should match with space when colon is optional"
317 );
318 assert!(
319 bare_tag.is_some(),
320 "should match bare tag when colon is optional"
321 );
322
323 let with_space = with_space.unwrap();
324 assert_eq!(with_space.tag, "TODO");
325 assert_eq!(with_space.message, "with space");
326
327 let bare_tag = bare_tag.unwrap();
328 assert_eq!(bare_tag.tag, "TODO");
329 assert_eq!(bare_tag.message, "");
330 }
331
332 #[test]
333 fn require_colon_false_rejects_false_positives() {
334 let parser = TodoParser::with_options(&tags(), false, false, None);
335
336 assert!(
337 parser.parse_line("// TODO.complete()", 4).is_none(),
338 "tag followed by '.' must not match"
339 );
340 assert!(
341 parser.parse_line("// todoList", 5).is_none(),
342 "tag embedded in a word must not match"
343 );
344 }
345
346 #[test]
347 fn require_colon_false_documents_double_colon_behavior() {
348 let parser = TodoParser::with_options(&tags(), false, false, None);
349
350 let item = parser
351 .parse_line("* TODO::module::fn", 6)
352 .expect("double-colon form should match current default regex behavior");
353
354 assert_eq!(item.tag, "TODO");
355 assert_eq!(item.message, ":module::fn");
356 }
357
358 #[test]
359 fn custom_regex_can_support_non_default_syntax() {
360 let tags = vec!["TODO".to_string(), "FIXME".to_string()];
361 let parser = TodoParser::with_options(
362 &tags,
363 false,
364 true,
365 Some(r"(^|\s)\[($TAGS)\](?:\{([^}]+)\})?:(.*)$"),
371 );
372
373 let item = parser
374 .parse_line("[todo]{alice}: custom format works", 10)
375 .expect("expected custom format to match");
376
377 assert_eq!(item.tag, "TODO");
378 assert_eq!(item.author.as_deref(), Some("alice"));
379 assert_eq!(item.message, "custom format works");
380 assert_eq!(item.line, 10);
381 assert_eq!(item.priority, Priority::from_tag("TODO"));
382 }
383
384 #[test]
385 fn default_regex_smoke_test_common_comment_styles() {
386 let parser = TodoParser::with_options(&tags(), false, true, None);
387
388 let slash = parser.parse_line("// TODO: implement feature", 1);
389 let hash = parser.parse_line("# FIXME: fix the bug", 2);
390
391 assert!(slash.is_some(), "default regex should match // TODO: ...");
392 assert!(hash.is_some(), "default regex should match # FIXME: ...");
393
394 let slash = slash.unwrap();
395 assert_eq!(slash.tag, "TODO");
396 assert_eq!(slash.message, "implement feature");
397
398 let hash = hash.unwrap();
399 assert_eq!(hash.tag, "FIXME");
400 assert_eq!(hash.message, "fix the bug");
401 }
402
403 #[test]
404 fn tags_accessor_returns_configured_tags() {
405 let tags = vec!["TODO".to_string(), "FIXME".to_string()];
406 let parser = TodoParser::new(&tags, true);
407
408 assert_eq!(parser.tags(), &tags);
409 }
410}