Skip to main content

tess/
tags.rs

1//! Tag-file parsing and lookup. Supports ctags (traditional + exuberant
2//! suffix) and etags formats. Public API: `TagFile::load`, `TagFile::lookup`,
3//! `TagFile::find_walking_up`, `TagFile::reload_if_changed`.
4
5use std::collections::HashMap;
6use std::fs;
7use std::path::{Path, PathBuf};
8use std::time::SystemTime;
9
10use crate::error::Error;
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub enum TagAddress {
14    /// 1-based line number, as stored in the tags file.
15    Line(usize),
16    /// ctags `/pattern/` or `?pattern?` with the delimiters stripped.
17    Pattern(String),
18    /// Sequence of addresses joined by `;`. Each step is resolved against
19    /// the file starting from the line found by the previous step.
20    /// Example: `/^anchor$/;/secondary/` searches for `secondary` starting
21    /// after the line matched by `^anchor$`.
22    Chained(Vec<TagAddress>),
23    /// Address form we don't support yet (e.g. `:s/foo/bar/`, multi-command
24    /// ex addresses). Resolution falls back to line 1 and surfaces a
25    /// `[tag address not supported: <raw>]` status hint.
26    Unsupported(String),
27}
28
29#[derive(Debug, Clone, PartialEq, Eq)]
30pub struct TagEntry {
31    pub file: PathBuf,
32    pub address: TagAddress,
33}
34
35#[derive(Debug, Clone)]
36pub struct TagFile {
37    base_dir: PathBuf,
38    /// On-disk path of the tag file, retained so `reload_if_changed` can
39    /// re-stat and re-parse without a separate handle.
40    path: PathBuf,
41    /// Mtime captured at last load. `UNIX_EPOCH` if the filesystem couldn't
42    /// report one (treated as "unknown" — any next mtime triggers reload).
43    mtime: SystemTime,
44    by_name: HashMap<String, Vec<TagEntry>>,
45}
46
47impl TagFile {
48    pub fn load(path: &Path) -> Result<Self, Error> {
49        let bytes = fs::read(path).map_err(|_| Error::TagFileNotFound)?;
50        let base_dir = path
51            .parent()
52            .map(|p| p.to_path_buf())
53            .unwrap_or_else(|| PathBuf::from("."));
54        let mtime = fs::metadata(path)
55            .and_then(|m| m.modified())
56            .unwrap_or(SystemTime::UNIX_EPOCH);
57
58        let by_name = if bytes.first().copied() == Some(b'\x0c') {
59            parse_etags(&bytes, &base_dir, path)?
60        } else {
61            let text = std::str::from_utf8(&bytes).map_err(|_| {
62                Error::TagFileParse("not UTF-8".into(), path.to_path_buf(), 0)
63            })?;
64            parse_ctags(text, &base_dir)
65        };
66
67        Ok(TagFile { base_dir, path: path.to_path_buf(), mtime, by_name })
68    }
69
70    /// Re-stat the on-disk tag file and, if its mtime changed, re-parse it
71    /// in place. Returns `Ok(true)` when a reload happened, `Ok(false)`
72    /// otherwise. Stat or parse errors that occur during reload are
73    /// surfaced as `Err`; callers may choose to surface them as a status
74    /// hint and keep using the previously-loaded state.
75    pub fn reload_if_changed(&mut self) -> Result<bool, Error> {
76        let new_mtime = match fs::metadata(&self.path).and_then(|m| m.modified()) {
77            Ok(t) => t,
78            Err(_) => return Ok(false),
79        };
80        if new_mtime == self.mtime {
81            return Ok(false);
82        }
83        let fresh = Self::load(&self.path)?;
84        self.mtime = fresh.mtime;
85        self.by_name = fresh.by_name;
86        Ok(true)
87    }
88
89    pub fn lookup(&self, name: &str) -> &[TagEntry] {
90        self.by_name
91            .get(name)
92            .map(Vec::as_slice)
93            .unwrap_or(&[])
94    }
95
96    pub fn names(&self) -> impl Iterator<Item = &str> {
97        self.by_name.keys().map(String::as_str)
98    }
99
100    pub fn base_dir(&self) -> &Path {
101        &self.base_dir
102    }
103
104    /// Walk up from `start` looking for a `tags` file. If `start` is a
105    /// regular file, begin at its parent directory. Returns the first
106    /// `tags` found, or `None` at the filesystem root.
107    pub fn find_walking_up(start: &Path) -> Option<PathBuf> {
108        let mut cur = if start.is_file() {
109            start.parent()?.to_path_buf()
110        } else {
111            start.to_path_buf()
112        };
113        loop {
114            let candidate = cur.join("tags");
115            if candidate.is_file() {
116                return Some(candidate);
117            }
118            if !cur.pop() {
119                return None;
120            }
121        }
122    }
123}
124
125fn parse_ctags(text: &str, base_dir: &Path) -> HashMap<String, Vec<TagEntry>> {
126    let mut by_name: HashMap<String, Vec<TagEntry>> = HashMap::new();
127    for line in text.lines() {
128        if line.is_empty() || line.starts_with("!_TAG_") {
129            continue;
130        }
131        let mut parts = line.splitn(3, '\t');
132        let (Some(name), Some(file_field), Some(rest)) =
133            (parts.next(), parts.next(), parts.next())
134        else {
135            continue;
136        };
137        let Some(address) = parse_ctags_address(rest) else {
138            continue;
139        };
140        let file = base_dir.join(file_field);
141        by_name
142            .entry(name.to_string())
143            .or_default()
144            .push(TagEntry { file, address });
145    }
146    by_name
147}
148
149/// Address column has shape:
150///   "42"                                → Line(42)
151///   "42;\""                             → Line(42) (exuberant suffix stripped)
152///   "/^pattern$/"  or  "/pat/;\""       → Pattern("^pattern$") / Pattern("pat")
153///   "?pattern?"                         → Pattern("pattern")
154///   "/foo/;/bar/"                       → Chained([Pattern("foo"), Pattern("bar")])
155///   ":s/...", ":call ..."               → Unsupported(raw)
156///   anything else                       → None (line skipped silently)
157fn parse_ctags_address(s: &str) -> Option<TagAddress> {
158    let body = match s.find(";\"") {
159        Some(idx) => &s[..idx],
160        None => s,
161    };
162    let body = body.trim();
163    if body.is_empty() {
164        return None;
165    }
166    let parts = split_chain(body);
167    let parsed: Vec<TagAddress> = parts
168        .iter()
169        .map(|p| parse_single_address(p.trim()))
170        .collect();
171    if parsed.is_empty() {
172        return None;
173    }
174    Some(if parsed.len() == 1 {
175        parsed.into_iter().next().unwrap()
176    } else {
177        TagAddress::Chained(parsed)
178    })
179}
180
181/// Split a ctags address body on `;` but treat `;` inside a `/.../` or
182/// `?...?` pattern as literal. Empty leading/trailing segments are kept
183/// (and parsed to `Unsupported("")` by `parse_single_address` which
184/// rejects empty), so chain steps stay positional.
185fn split_chain(body: &str) -> Vec<String> {
186    let mut out = Vec::new();
187    let mut buf = String::new();
188    let mut in_pat: Option<char> = None;
189    let mut escaped = false;
190    for c in body.chars() {
191        if escaped {
192            buf.push(c);
193            escaped = false;
194            continue;
195        }
196        if c == '\\' {
197            buf.push(c);
198            escaped = true;
199            continue;
200        }
201        match (c, in_pat) {
202            ('/', None) | ('?', None) => {
203                in_pat = Some(c);
204                buf.push(c);
205            }
206            (ch, Some(delim)) if ch == delim => {
207                in_pat = None;
208                buf.push(ch);
209            }
210            (';', None) => {
211                out.push(std::mem::take(&mut buf));
212            }
213            (ch, _) => buf.push(ch),
214        }
215    }
216    if !buf.is_empty() {
217        out.push(buf);
218    }
219    out
220}
221
222fn parse_single_address(body: &str) -> TagAddress {
223    if body.is_empty() {
224        return TagAddress::Unsupported(String::new());
225    }
226    if let Ok(n) = body.parse::<usize>() {
227        return TagAddress::Line(n);
228    }
229    let bytes = body.as_bytes();
230    let first = *bytes.first().unwrap();
231    let last = *bytes.last().unwrap();
232    if (first == b'/' || first == b'?') && first == last && bytes.len() >= 2 {
233        let inner = &body[1..body.len() - 1];
234        return TagAddress::Pattern(inner.to_string());
235    }
236    TagAddress::Unsupported(body.to_string())
237}
238
239fn parse_etags(
240    bytes: &[u8],
241    base_dir: &Path,
242    path: &Path,
243) -> Result<HashMap<String, Vec<TagEntry>>, Error> {
244    let mut by_name: HashMap<String, Vec<TagEntry>> = HashMap::new();
245    let text = std::str::from_utf8(bytes).map_err(|_| {
246        Error::TagFileParse("not UTF-8".into(), path.to_path_buf(), 0)
247    })?;
248    for section in text.split("\x0c\n").skip(1) {
249        let mut lines = section.lines();
250        let Some(header) = lines.next() else { continue };
251        let Some((file_field, _size)) = header.rsplit_once(',') else {
252            continue;
253        };
254        let file = base_dir.join(file_field);
255        for line in lines {
256            let Some((_src, after_del)) = line.split_once('\x7f') else {
257                continue;
258            };
259            let Some((tag, after_soh)) = after_del.split_once('\x01') else {
260                continue;
261            };
262            let Some((line_str, _offset)) = after_soh.split_once(',') else {
263                continue;
264            };
265            let Ok(line_num) = line_str.parse::<usize>() else {
266                continue;
267            };
268            by_name.entry(tag.to_string()).or_default().push(TagEntry {
269                file: file.clone(),
270                address: TagAddress::Line(line_num),
271            });
272        }
273    }
274    Ok(by_name)
275}
276
277/// Convert a ctags pattern body to a regex pattern. Vi-style `^` / `$`
278/// anchors at the boundaries are preserved as regex anchors; the inner
279/// text is regex-escaped so literal metacharacters in source don't
280/// mis-match.
281pub fn pattern_to_regex(pattern: &str) -> String {
282    let (anchor_start, body) = if let Some(rest) = pattern.strip_prefix('^') {
283        ("^", rest)
284    } else {
285        ("", pattern)
286    };
287    let (body, anchor_end) = if let Some(stripped) = body.strip_suffix('$') {
288        (stripped, "$")
289    } else {
290        (body, "")
291    };
292    format!("{anchor_start}{}{anchor_end}", regex::escape(body))
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298
299    fn tf_from_ctags(text: &str) -> TagFile {
300        let by_name = parse_ctags(text, Path::new("/proj"));
301        TagFile {
302            base_dir: PathBuf::from("/proj"),
303            path: PathBuf::from("/proj/tags"),
304            mtime: std::time::SystemTime::UNIX_EPOCH,
305            by_name,
306        }
307    }
308
309    #[test]
310    fn ctags_three_column_line_parses() {
311        let t = tf_from_ctags("foo\tsrc/lib.rs\t42\n");
312        let entries = t.lookup("foo");
313        assert_eq!(entries.len(), 1);
314        assert_eq!(entries[0].file, PathBuf::from("/proj/src/lib.rs"));
315        assert_eq!(entries[0].address, TagAddress::Line(42));
316    }
317
318    #[test]
319    fn ctags_exuberant_suffix_is_stripped() {
320        let t = tf_from_ctags("foo\tsrc/lib.rs\t42;\"\tf\tfile:\n");
321        assert_eq!(t.lookup("foo")[0].address, TagAddress::Line(42));
322    }
323
324    #[test]
325    fn ctags_metadata_line_is_skipped() {
326        let t = tf_from_ctags("!_TAG_FILE_FORMAT\t2\t/extended format/\nfoo\tsrc/lib.rs\t1\n");
327        assert!(t.lookup("!_TAG_FILE_FORMAT").is_empty());
328        assert_eq!(t.lookup("foo").len(), 1);
329    }
330
331    #[test]
332    fn ctags_forward_slash_pattern_parses() {
333        let t = tf_from_ctags("foo\tsrc/lib.rs\t/^fn foo()$/\n");
334        assert_eq!(
335            t.lookup("foo")[0].address,
336            TagAddress::Pattern("^fn foo()$".into())
337        );
338    }
339
340    #[test]
341    fn ctags_question_mark_pattern_parses() {
342        let t = tf_from_ctags("foo\tsrc/lib.rs\t?pattern?\n");
343        assert_eq!(
344            t.lookup("foo")[0].address,
345            TagAddress::Pattern("pattern".into())
346        );
347    }
348
349    #[test]
350    fn ctags_pattern_with_suffix_strips_suffix() {
351        let t = tf_from_ctags("foo\tsrc/lib.rs\t/^pat$/;\"\tf\n");
352        assert_eq!(
353            t.lookup("foo")[0].address,
354            TagAddress::Pattern("^pat$".into())
355        );
356    }
357
358    #[test]
359    fn ctags_chained_patterns_parse_as_chained() {
360        let t = tf_from_ctags("foo\tsrc/a.rs\t/^anchor$/;/secondary/\n");
361        match &t.lookup("foo")[0].address {
362            TagAddress::Chained(parts) => {
363                assert_eq!(parts.len(), 2);
364                assert_eq!(parts[0], TagAddress::Pattern("^anchor$".into()));
365                assert_eq!(parts[1], TagAddress::Pattern("secondary".into()));
366            }
367            other => panic!("expected Chained, got {other:?}"),
368        }
369    }
370
371    #[test]
372    fn ctags_chained_pattern_then_line() {
373        let t = tf_from_ctags("foo\tsrc/a.rs\t/^anchor$/;42\n");
374        match &t.lookup("foo")[0].address {
375            TagAddress::Chained(parts) => {
376                assert_eq!(parts.len(), 2);
377                assert_eq!(parts[0], TagAddress::Pattern("^anchor$".into()));
378                assert_eq!(parts[1], TagAddress::Line(42));
379            }
380            other => panic!("expected Chained, got {other:?}"),
381        }
382    }
383
384    #[test]
385    fn ctags_unsupported_ex_command_is_captured() {
386        let t = tf_from_ctags("foo\tsrc/a.rs\t:s/foo/bar/g\n");
387        match &t.lookup("foo")[0].address {
388            TagAddress::Unsupported(raw) => assert!(
389                raw.contains(":s/foo/bar"),
390                "raw should contain the bad address, got {raw:?}"
391            ),
392            other => panic!("expected Unsupported, got {other:?}"),
393        }
394    }
395
396    #[test]
397    fn ctags_pattern_with_internal_semicolon_is_preserved() {
398        // `;` inside the slashes must NOT split — the chain splitter
399        // tracks pattern-delimiter context.
400        let t = tf_from_ctags("foo\tsrc/a.rs\t/^a;b$/\n");
401        assert_eq!(
402            t.lookup("foo")[0].address,
403            TagAddress::Pattern("^a;b$".into()),
404        );
405    }
406
407    #[test]
408    fn multiple_entries_for_same_name_accumulate() {
409        let t = tf_from_ctags("foo\ta.rs\t1\nfoo\tb.rs\t2\n");
410        assert_eq!(t.lookup("foo").len(), 2);
411    }
412
413    #[test]
414    fn malformed_ctags_line_is_skipped() {
415        let t = tf_from_ctags("oneword\nfoo\tsrc/lib.rs\t1\n");
416        assert_eq!(t.lookup("foo").len(), 1);
417        assert!(t.lookup("oneword").is_empty());
418    }
419
420    #[test]
421    fn empty_address_is_skipped() {
422        let t = tf_from_ctags("foo\tsrc/lib.rs\t\n");
423        assert!(t.lookup("foo").is_empty());
424    }
425
426    #[test]
427    fn etags_single_section_parses() {
428        let bytes = b"\x0c\nsrc/lib.rs,42\n\x7ffoo\x01100,0\n";
429        let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
430        let entries = by_name.get("foo").unwrap();
431        assert_eq!(entries.len(), 1);
432        assert_eq!(entries[0].file, PathBuf::from("/proj/src/lib.rs"));
433        assert_eq!(entries[0].address, TagAddress::Line(100));
434    }
435
436    #[test]
437    fn etags_multiple_sections_accumulate() {
438        let bytes =
439            b"\x0c\na.rs,10\n\x7ffoo\x011,0\n\x0c\nb.rs,10\n\x7fbar\x012,0\n";
440        let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
441        assert_eq!(by_name.len(), 2);
442        assert!(by_name.contains_key("foo"));
443        assert!(by_name.contains_key("bar"));
444    }
445
446    #[test]
447    fn etags_malformed_line_is_skipped() {
448        let bytes = b"\x0c\nsrc/lib.rs,42\nno-delimiters\n\x7ffoo\x011,0\n";
449        let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
450        assert_eq!(by_name.get("foo").unwrap().len(), 1);
451    }
452
453    #[test]
454    fn pattern_to_regex_preserves_anchors() {
455        assert_eq!(pattern_to_regex("^fn foo()$"), "^fn foo\\(\\)$");
456        assert_eq!(pattern_to_regex("foo"), "foo");
457        assert_eq!(pattern_to_regex("^foo"), "^foo");
458        assert_eq!(pattern_to_regex("foo$"), "foo$");
459    }
460
461    #[test]
462    fn pattern_to_regex_escapes_metacharacters() {
463        assert_eq!(pattern_to_regex("a.b"), "a\\.b");
464        assert_eq!(pattern_to_regex("^a[b]c$"), "^a\\[b\\]c$");
465    }
466
467    #[test]
468    fn find_walking_up_finds_in_same_directory() {
469        let dir = tempfile::tempdir().unwrap();
470        std::fs::write(dir.path().join("tags"), b"").unwrap();
471        let found = TagFile::find_walking_up(dir.path());
472        assert_eq!(found, Some(dir.path().join("tags")));
473    }
474
475    #[test]
476    fn find_walking_up_finds_two_directories_up() {
477        let root = tempfile::tempdir().unwrap();
478        std::fs::write(root.path().join("tags"), b"").unwrap();
479        let nested = root.path().join("a").join("b");
480        std::fs::create_dir_all(&nested).unwrap();
481        let found = TagFile::find_walking_up(&nested);
482        assert_eq!(found, Some(root.path().join("tags")));
483    }
484
485    #[test]
486    fn find_walking_up_returns_none_when_missing() {
487        let dir = tempfile::tempdir().unwrap();
488        assert_eq!(TagFile::find_walking_up(dir.path()), None);
489    }
490
491    #[test]
492    fn reload_if_changed_picks_up_new_entries() {
493        use std::{thread, time::Duration};
494        let dir = tempfile::tempdir().unwrap();
495        let path = dir.path().join("tags");
496        std::fs::write(&path, "foo\tsrc/a.rs\t1\n").unwrap();
497        let mut tf = TagFile::load(&path).unwrap();
498        assert_eq!(tf.lookup("bar").len(), 0);
499
500        // Sleep past filesystem mtime granularity (HFS+ / APFS = 1s).
501        thread::sleep(Duration::from_millis(1100));
502        std::fs::write(&path, "foo\tsrc/a.rs\t1\nbar\tsrc/b.rs\t2\n").unwrap();
503
504        assert!(tf.reload_if_changed().unwrap());
505        assert_eq!(tf.lookup("bar").len(), 1);
506        // A second call without further changes returns false.
507        assert!(!tf.reload_if_changed().unwrap());
508    }
509}