1use std::collections::HashMap;
6use std::fs;
7use std::path::{Path, PathBuf};
8use std::time::SystemTime;
9
10use crate::error::Error;
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub enum TagAddress {
14 Line(usize),
16 Pattern(String),
18 Chained(Vec<TagAddress>),
23 Unsupported(String),
27}
28
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct TagEntry {
31 pub file: PathBuf,
32 pub address: TagAddress,
33}
34
35#[derive(Debug, Clone)]
36pub struct TagFile {
37 base_dir: PathBuf,
38 path: PathBuf,
41 mtime: SystemTime,
44 by_name: HashMap<String, Vec<TagEntry>>,
45}
46
47impl TagFile {
48 pub fn load(path: &Path) -> Result<Self, Error> {
49 let bytes = fs::read(path).map_err(|_| Error::TagFileNotFound)?;
50 let base_dir = path
51 .parent()
52 .map(|p| p.to_path_buf())
53 .unwrap_or_else(|| PathBuf::from("."));
54 let mtime = fs::metadata(path)
55 .and_then(|m| m.modified())
56 .unwrap_or(SystemTime::UNIX_EPOCH);
57
58 let by_name = if bytes.first().copied() == Some(b'\x0c') {
59 parse_etags(&bytes, &base_dir, path)?
60 } else {
61 let text = std::str::from_utf8(&bytes).map_err(|_| {
62 Error::TagFileParse("not UTF-8".into(), path.to_path_buf(), 0)
63 })?;
64 parse_ctags(text, &base_dir)
65 };
66
67 Ok(TagFile { base_dir, path: path.to_path_buf(), mtime, by_name })
68 }
69
70 pub fn reload_if_changed(&mut self) -> Result<bool, Error> {
76 let new_mtime = match fs::metadata(&self.path).and_then(|m| m.modified()) {
77 Ok(t) => t,
78 Err(_) => return Ok(false),
79 };
80 if new_mtime == self.mtime {
81 return Ok(false);
82 }
83 let fresh = Self::load(&self.path)?;
84 self.mtime = fresh.mtime;
85 self.by_name = fresh.by_name;
86 Ok(true)
87 }
88
89 pub fn lookup(&self, name: &str) -> &[TagEntry] {
90 self.by_name
91 .get(name)
92 .map(Vec::as_slice)
93 .unwrap_or(&[])
94 }
95
96 pub fn names(&self) -> impl Iterator<Item = &str> {
97 self.by_name.keys().map(String::as_str)
98 }
99
100 pub fn base_dir(&self) -> &Path {
101 &self.base_dir
102 }
103
104 pub fn find_walking_up(start: &Path) -> Option<PathBuf> {
108 let mut cur = if start.is_file() {
109 start.parent()?.to_path_buf()
110 } else {
111 start.to_path_buf()
112 };
113 loop {
114 let candidate = cur.join("tags");
115 if candidate.is_file() {
116 return Some(candidate);
117 }
118 if !cur.pop() {
119 return None;
120 }
121 }
122 }
123}
124
125fn parse_ctags(text: &str, base_dir: &Path) -> HashMap<String, Vec<TagEntry>> {
126 let mut by_name: HashMap<String, Vec<TagEntry>> = HashMap::new();
127 for line in text.lines() {
128 if line.is_empty() || line.starts_with("!_TAG_") {
129 continue;
130 }
131 let mut parts = line.splitn(3, '\t');
132 let (Some(name), Some(file_field), Some(rest)) =
133 (parts.next(), parts.next(), parts.next())
134 else {
135 continue;
136 };
137 let Some(address) = parse_ctags_address(rest) else {
138 continue;
139 };
140 let file = base_dir.join(file_field);
141 by_name
142 .entry(name.to_string())
143 .or_default()
144 .push(TagEntry { file, address });
145 }
146 by_name
147}
148
149fn parse_ctags_address(s: &str) -> Option<TagAddress> {
158 let body = match s.find(";\"") {
159 Some(idx) => &s[..idx],
160 None => s,
161 };
162 let body = body.trim();
163 if body.is_empty() {
164 return None;
165 }
166 let parts = split_chain(body);
167 let parsed: Vec<TagAddress> = parts
168 .iter()
169 .map(|p| parse_single_address(p.trim()))
170 .collect();
171 if parsed.is_empty() {
172 return None;
173 }
174 Some(if parsed.len() == 1 {
175 parsed.into_iter().next().unwrap()
176 } else {
177 TagAddress::Chained(parsed)
178 })
179}
180
181fn split_chain(body: &str) -> Vec<String> {
186 let mut out = Vec::new();
187 let mut buf = String::new();
188 let mut in_pat: Option<char> = None;
189 let mut escaped = false;
190 for c in body.chars() {
191 if escaped {
192 buf.push(c);
193 escaped = false;
194 continue;
195 }
196 if c == '\\' {
197 buf.push(c);
198 escaped = true;
199 continue;
200 }
201 match (c, in_pat) {
202 ('/', None) | ('?', None) => {
203 in_pat = Some(c);
204 buf.push(c);
205 }
206 (ch, Some(delim)) if ch == delim => {
207 in_pat = None;
208 buf.push(ch);
209 }
210 (';', None) => {
211 out.push(std::mem::take(&mut buf));
212 }
213 (ch, _) => buf.push(ch),
214 }
215 }
216 if !buf.is_empty() {
217 out.push(buf);
218 }
219 out
220}
221
222fn parse_single_address(body: &str) -> TagAddress {
223 if body.is_empty() {
224 return TagAddress::Unsupported(String::new());
225 }
226 if let Ok(n) = body.parse::<usize>() {
227 return TagAddress::Line(n);
228 }
229 let bytes = body.as_bytes();
230 let first = *bytes.first().unwrap();
231 let last = *bytes.last().unwrap();
232 if (first == b'/' || first == b'?') && first == last && bytes.len() >= 2 {
233 let inner = &body[1..body.len() - 1];
234 return TagAddress::Pattern(inner.to_string());
235 }
236 TagAddress::Unsupported(body.to_string())
237}
238
239fn parse_etags(
240 bytes: &[u8],
241 base_dir: &Path,
242 path: &Path,
243) -> Result<HashMap<String, Vec<TagEntry>>, Error> {
244 let mut by_name: HashMap<String, Vec<TagEntry>> = HashMap::new();
245 let text = std::str::from_utf8(bytes).map_err(|_| {
246 Error::TagFileParse("not UTF-8".into(), path.to_path_buf(), 0)
247 })?;
248 for section in text.split("\x0c\n").skip(1) {
249 let mut lines = section.lines();
250 let Some(header) = lines.next() else { continue };
251 let Some((file_field, _size)) = header.rsplit_once(',') else {
252 continue;
253 };
254 let file = base_dir.join(file_field);
255 for line in lines {
256 let Some((_src, after_del)) = line.split_once('\x7f') else {
257 continue;
258 };
259 let Some((tag, after_soh)) = after_del.split_once('\x01') else {
260 continue;
261 };
262 let Some((line_str, _offset)) = after_soh.split_once(',') else {
263 continue;
264 };
265 let Ok(line_num) = line_str.parse::<usize>() else {
266 continue;
267 };
268 by_name.entry(tag.to_string()).or_default().push(TagEntry {
269 file: file.clone(),
270 address: TagAddress::Line(line_num),
271 });
272 }
273 }
274 Ok(by_name)
275}
276
277pub fn pattern_to_regex(pattern: &str) -> String {
282 let (anchor_start, body) = if let Some(rest) = pattern.strip_prefix('^') {
283 ("^", rest)
284 } else {
285 ("", pattern)
286 };
287 let (body, anchor_end) = if let Some(stripped) = body.strip_suffix('$') {
288 (stripped, "$")
289 } else {
290 (body, "")
291 };
292 format!("{anchor_start}{}{anchor_end}", regex::escape(body))
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298
299 fn tf_from_ctags(text: &str) -> TagFile {
300 let by_name = parse_ctags(text, Path::new("/proj"));
301 TagFile {
302 base_dir: PathBuf::from("/proj"),
303 path: PathBuf::from("/proj/tags"),
304 mtime: std::time::SystemTime::UNIX_EPOCH,
305 by_name,
306 }
307 }
308
309 #[test]
310 fn ctags_three_column_line_parses() {
311 let t = tf_from_ctags("foo\tsrc/lib.rs\t42\n");
312 let entries = t.lookup("foo");
313 assert_eq!(entries.len(), 1);
314 assert_eq!(entries[0].file, PathBuf::from("/proj/src/lib.rs"));
315 assert_eq!(entries[0].address, TagAddress::Line(42));
316 }
317
318 #[test]
319 fn ctags_exuberant_suffix_is_stripped() {
320 let t = tf_from_ctags("foo\tsrc/lib.rs\t42;\"\tf\tfile:\n");
321 assert_eq!(t.lookup("foo")[0].address, TagAddress::Line(42));
322 }
323
324 #[test]
325 fn ctags_metadata_line_is_skipped() {
326 let t = tf_from_ctags("!_TAG_FILE_FORMAT\t2\t/extended format/\nfoo\tsrc/lib.rs\t1\n");
327 assert!(t.lookup("!_TAG_FILE_FORMAT").is_empty());
328 assert_eq!(t.lookup("foo").len(), 1);
329 }
330
331 #[test]
332 fn ctags_forward_slash_pattern_parses() {
333 let t = tf_from_ctags("foo\tsrc/lib.rs\t/^fn foo()$/\n");
334 assert_eq!(
335 t.lookup("foo")[0].address,
336 TagAddress::Pattern("^fn foo()$".into())
337 );
338 }
339
340 #[test]
341 fn ctags_question_mark_pattern_parses() {
342 let t = tf_from_ctags("foo\tsrc/lib.rs\t?pattern?\n");
343 assert_eq!(
344 t.lookup("foo")[0].address,
345 TagAddress::Pattern("pattern".into())
346 );
347 }
348
349 #[test]
350 fn ctags_pattern_with_suffix_strips_suffix() {
351 let t = tf_from_ctags("foo\tsrc/lib.rs\t/^pat$/;\"\tf\n");
352 assert_eq!(
353 t.lookup("foo")[0].address,
354 TagAddress::Pattern("^pat$".into())
355 );
356 }
357
358 #[test]
359 fn ctags_chained_patterns_parse_as_chained() {
360 let t = tf_from_ctags("foo\tsrc/a.rs\t/^anchor$/;/secondary/\n");
361 match &t.lookup("foo")[0].address {
362 TagAddress::Chained(parts) => {
363 assert_eq!(parts.len(), 2);
364 assert_eq!(parts[0], TagAddress::Pattern("^anchor$".into()));
365 assert_eq!(parts[1], TagAddress::Pattern("secondary".into()));
366 }
367 other => panic!("expected Chained, got {other:?}"),
368 }
369 }
370
371 #[test]
372 fn ctags_chained_pattern_then_line() {
373 let t = tf_from_ctags("foo\tsrc/a.rs\t/^anchor$/;42\n");
374 match &t.lookup("foo")[0].address {
375 TagAddress::Chained(parts) => {
376 assert_eq!(parts.len(), 2);
377 assert_eq!(parts[0], TagAddress::Pattern("^anchor$".into()));
378 assert_eq!(parts[1], TagAddress::Line(42));
379 }
380 other => panic!("expected Chained, got {other:?}"),
381 }
382 }
383
384 #[test]
385 fn ctags_unsupported_ex_command_is_captured() {
386 let t = tf_from_ctags("foo\tsrc/a.rs\t:s/foo/bar/g\n");
387 match &t.lookup("foo")[0].address {
388 TagAddress::Unsupported(raw) => assert!(
389 raw.contains(":s/foo/bar"),
390 "raw should contain the bad address, got {raw:?}"
391 ),
392 other => panic!("expected Unsupported, got {other:?}"),
393 }
394 }
395
396 #[test]
397 fn ctags_pattern_with_internal_semicolon_is_preserved() {
398 let t = tf_from_ctags("foo\tsrc/a.rs\t/^a;b$/\n");
401 assert_eq!(
402 t.lookup("foo")[0].address,
403 TagAddress::Pattern("^a;b$".into()),
404 );
405 }
406
407 #[test]
408 fn multiple_entries_for_same_name_accumulate() {
409 let t = tf_from_ctags("foo\ta.rs\t1\nfoo\tb.rs\t2\n");
410 assert_eq!(t.lookup("foo").len(), 2);
411 }
412
413 #[test]
414 fn malformed_ctags_line_is_skipped() {
415 let t = tf_from_ctags("oneword\nfoo\tsrc/lib.rs\t1\n");
416 assert_eq!(t.lookup("foo").len(), 1);
417 assert!(t.lookup("oneword").is_empty());
418 }
419
420 #[test]
421 fn empty_address_is_skipped() {
422 let t = tf_from_ctags("foo\tsrc/lib.rs\t\n");
423 assert!(t.lookup("foo").is_empty());
424 }
425
426 #[test]
427 fn etags_single_section_parses() {
428 let bytes = b"\x0c\nsrc/lib.rs,42\n\x7ffoo\x01100,0\n";
429 let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
430 let entries = by_name.get("foo").unwrap();
431 assert_eq!(entries.len(), 1);
432 assert_eq!(entries[0].file, PathBuf::from("/proj/src/lib.rs"));
433 assert_eq!(entries[0].address, TagAddress::Line(100));
434 }
435
436 #[test]
437 fn etags_multiple_sections_accumulate() {
438 let bytes =
439 b"\x0c\na.rs,10\n\x7ffoo\x011,0\n\x0c\nb.rs,10\n\x7fbar\x012,0\n";
440 let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
441 assert_eq!(by_name.len(), 2);
442 assert!(by_name.contains_key("foo"));
443 assert!(by_name.contains_key("bar"));
444 }
445
446 #[test]
447 fn etags_malformed_line_is_skipped() {
448 let bytes = b"\x0c\nsrc/lib.rs,42\nno-delimiters\n\x7ffoo\x011,0\n";
449 let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
450 assert_eq!(by_name.get("foo").unwrap().len(), 1);
451 }
452
453 #[test]
454 fn pattern_to_regex_preserves_anchors() {
455 assert_eq!(pattern_to_regex("^fn foo()$"), "^fn foo\\(\\)$");
456 assert_eq!(pattern_to_regex("foo"), "foo");
457 assert_eq!(pattern_to_regex("^foo"), "^foo");
458 assert_eq!(pattern_to_regex("foo$"), "foo$");
459 }
460
461 #[test]
462 fn pattern_to_regex_escapes_metacharacters() {
463 assert_eq!(pattern_to_regex("a.b"), "a\\.b");
464 assert_eq!(pattern_to_regex("^a[b]c$"), "^a\\[b\\]c$");
465 }
466
467 #[test]
468 fn find_walking_up_finds_in_same_directory() {
469 let dir = tempfile::tempdir().unwrap();
470 std::fs::write(dir.path().join("tags"), b"").unwrap();
471 let found = TagFile::find_walking_up(dir.path());
472 assert_eq!(found, Some(dir.path().join("tags")));
473 }
474
475 #[test]
476 fn find_walking_up_finds_two_directories_up() {
477 let root = tempfile::tempdir().unwrap();
478 std::fs::write(root.path().join("tags"), b"").unwrap();
479 let nested = root.path().join("a").join("b");
480 std::fs::create_dir_all(&nested).unwrap();
481 let found = TagFile::find_walking_up(&nested);
482 assert_eq!(found, Some(root.path().join("tags")));
483 }
484
485 #[test]
486 fn find_walking_up_returns_none_when_missing() {
487 let dir = tempfile::tempdir().unwrap();
488 assert_eq!(TagFile::find_walking_up(dir.path()), None);
489 }
490
491 #[test]
492 fn reload_if_changed_picks_up_new_entries() {
493 use std::{thread, time::Duration};
494 let dir = tempfile::tempdir().unwrap();
495 let path = dir.path().join("tags");
496 std::fs::write(&path, "foo\tsrc/a.rs\t1\n").unwrap();
497 let mut tf = TagFile::load(&path).unwrap();
498 assert_eq!(tf.lookup("bar").len(), 0);
499
500 thread::sleep(Duration::from_millis(1100));
502 std::fs::write(&path, "foo\tsrc/a.rs\t1\nbar\tsrc/b.rs\t2\n").unwrap();
503
504 assert!(tf.reload_if_changed().unwrap());
505 assert_eq!(tf.lookup("bar").len(), 1);
506 assert!(!tf.reload_if_changed().unwrap());
508 }
509}