Skip to main content

tess/
tags.rs

1//! Tag-file parsing and lookup. Supports ctags (traditional + exuberant
2//! suffix) and etags formats. Public API: `TagFile::load`, `TagFile::lookup`,
3//! `TagFile::find_walking_up`.
4
5use std::collections::HashMap;
6use std::fs;
7use std::path::{Path, PathBuf};
8
9use crate::error::Error;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum TagAddress {
13    /// 1-based line number, as stored in the tags file.
14    Line(usize),
15    /// ctags `/pattern/` or `?pattern?` with the delimiters stripped.
16    Pattern(String),
17}
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct TagEntry {
21    pub file: PathBuf,
22    pub address: TagAddress,
23}
24
25#[derive(Debug, Clone)]
26pub struct TagFile {
27    base_dir: PathBuf,
28    by_name: HashMap<String, Vec<TagEntry>>,
29}
30
31impl TagFile {
32    pub fn load(path: &Path) -> Result<Self, Error> {
33        let bytes = fs::read(path).map_err(|_| Error::TagFileNotFound)?;
34        let base_dir = path
35            .parent()
36            .map(|p| p.to_path_buf())
37            .unwrap_or_else(|| PathBuf::from("."));
38
39        let by_name = if bytes.first().copied() == Some(b'\x0c') {
40            parse_etags(&bytes, &base_dir, path)?
41        } else {
42            let text = std::str::from_utf8(&bytes).map_err(|_| {
43                Error::TagFileParse("not UTF-8".into(), path.to_path_buf(), 0)
44            })?;
45            parse_ctags(text, &base_dir)
46        };
47
48        Ok(TagFile { base_dir, by_name })
49    }
50
51    pub fn lookup(&self, name: &str) -> &[TagEntry] {
52        self.by_name
53            .get(name)
54            .map(Vec::as_slice)
55            .unwrap_or(&[])
56    }
57
58    pub fn names(&self) -> impl Iterator<Item = &str> {
59        self.by_name.keys().map(String::as_str)
60    }
61
62    pub fn base_dir(&self) -> &Path {
63        &self.base_dir
64    }
65
66    /// Walk up from `start` looking for a `tags` file. If `start` is a
67    /// regular file, begin at its parent directory. Returns the first
68    /// `tags` found, or `None` at the filesystem root.
69    pub fn find_walking_up(start: &Path) -> Option<PathBuf> {
70        let mut cur = if start.is_file() {
71            start.parent()?.to_path_buf()
72        } else {
73            start.to_path_buf()
74        };
75        loop {
76            let candidate = cur.join("tags");
77            if candidate.is_file() {
78                return Some(candidate);
79            }
80            if !cur.pop() {
81                return None;
82            }
83        }
84    }
85}
86
87fn parse_ctags(text: &str, base_dir: &Path) -> HashMap<String, Vec<TagEntry>> {
88    let mut by_name: HashMap<String, Vec<TagEntry>> = HashMap::new();
89    for line in text.lines() {
90        if line.is_empty() || line.starts_with("!_TAG_") {
91            continue;
92        }
93        let mut parts = line.splitn(3, '\t');
94        let (Some(name), Some(file_field), Some(rest)) =
95            (parts.next(), parts.next(), parts.next())
96        else {
97            continue;
98        };
99        let Some(address) = parse_ctags_address(rest) else {
100            continue;
101        };
102        let file = base_dir.join(file_field);
103        by_name
104            .entry(name.to_string())
105            .or_default()
106            .push(TagEntry { file, address });
107    }
108    by_name
109}
110
111/// Address column has shape:
112///   "42"                            → Line(42)
113///   "42;\""                         → Line(42) (exuberant suffix stripped)
114///   "/^pattern$/"  or  "/pat/;\""   → Pattern("^pattern$") / Pattern("pat")
115///   "?pattern?"                     → Pattern("pattern")
116///   anything else                   → None (line skipped silently)
117fn parse_ctags_address(s: &str) -> Option<TagAddress> {
118    let body = match s.find(";\"") {
119        Some(idx) => &s[..idx],
120        None => s,
121    };
122    let body = body.trim();
123    if body.is_empty() {
124        return None;
125    }
126    if let Ok(n) = body.parse::<usize>() {
127        return Some(TagAddress::Line(n));
128    }
129    let bytes = body.as_bytes();
130    let first = *bytes.first()?;
131    let last = *bytes.last()?;
132    if (first == b'/' || first == b'?') && first == last && bytes.len() >= 2 {
133        let inner = &body[1..body.len() - 1];
134        return Some(TagAddress::Pattern(inner.to_string()));
135    }
136    None
137}
138
139fn parse_etags(
140    bytes: &[u8],
141    base_dir: &Path,
142    path: &Path,
143) -> Result<HashMap<String, Vec<TagEntry>>, Error> {
144    let mut by_name: HashMap<String, Vec<TagEntry>> = HashMap::new();
145    let text = std::str::from_utf8(bytes).map_err(|_| {
146        Error::TagFileParse("not UTF-8".into(), path.to_path_buf(), 0)
147    })?;
148    for section in text.split("\x0c\n").skip(1) {
149        let mut lines = section.lines();
150        let Some(header) = lines.next() else { continue };
151        let Some((file_field, _size)) = header.rsplit_once(',') else {
152            continue;
153        };
154        let file = base_dir.join(file_field);
155        for line in lines {
156            let Some((_src, after_del)) = line.split_once('\x7f') else {
157                continue;
158            };
159            let Some((tag, after_soh)) = after_del.split_once('\x01') else {
160                continue;
161            };
162            let Some((line_str, _offset)) = after_soh.split_once(',') else {
163                continue;
164            };
165            let Ok(line_num) = line_str.parse::<usize>() else {
166                continue;
167            };
168            by_name.entry(tag.to_string()).or_default().push(TagEntry {
169                file: file.clone(),
170                address: TagAddress::Line(line_num),
171            });
172        }
173    }
174    Ok(by_name)
175}
176
177/// Convert a ctags pattern body to a regex pattern. Vi-style `^` / `$`
178/// anchors at the boundaries are preserved as regex anchors; the inner
179/// text is regex-escaped so literal metacharacters in source don't
180/// mis-match.
181pub fn pattern_to_regex(pattern: &str) -> String {
182    let (anchor_start, body) = if let Some(rest) = pattern.strip_prefix('^') {
183        ("^", rest)
184    } else {
185        ("", pattern)
186    };
187    let (body, anchor_end) = if let Some(stripped) = body.strip_suffix('$') {
188        (stripped, "$")
189    } else {
190        (body, "")
191    };
192    format!("{anchor_start}{}{anchor_end}", regex::escape(body))
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198
199    fn tf_from_ctags(text: &str) -> TagFile {
200        let by_name = parse_ctags(text, Path::new("/proj"));
201        TagFile {
202            base_dir: PathBuf::from("/proj"),
203            by_name,
204        }
205    }
206
207    #[test]
208    fn ctags_three_column_line_parses() {
209        let t = tf_from_ctags("foo\tsrc/lib.rs\t42\n");
210        let entries = t.lookup("foo");
211        assert_eq!(entries.len(), 1);
212        assert_eq!(entries[0].file, PathBuf::from("/proj/src/lib.rs"));
213        assert_eq!(entries[0].address, TagAddress::Line(42));
214    }
215
216    #[test]
217    fn ctags_exuberant_suffix_is_stripped() {
218        let t = tf_from_ctags("foo\tsrc/lib.rs\t42;\"\tf\tfile:\n");
219        assert_eq!(t.lookup("foo")[0].address, TagAddress::Line(42));
220    }
221
222    #[test]
223    fn ctags_metadata_line_is_skipped() {
224        let t = tf_from_ctags("!_TAG_FILE_FORMAT\t2\t/extended format/\nfoo\tsrc/lib.rs\t1\n");
225        assert!(t.lookup("!_TAG_FILE_FORMAT").is_empty());
226        assert_eq!(t.lookup("foo").len(), 1);
227    }
228
229    #[test]
230    fn ctags_forward_slash_pattern_parses() {
231        let t = tf_from_ctags("foo\tsrc/lib.rs\t/^fn foo()$/\n");
232        assert_eq!(
233            t.lookup("foo")[0].address,
234            TagAddress::Pattern("^fn foo()$".into())
235        );
236    }
237
238    #[test]
239    fn ctags_question_mark_pattern_parses() {
240        let t = tf_from_ctags("foo\tsrc/lib.rs\t?pattern?\n");
241        assert_eq!(
242            t.lookup("foo")[0].address,
243            TagAddress::Pattern("pattern".into())
244        );
245    }
246
247    #[test]
248    fn ctags_pattern_with_suffix_strips_suffix() {
249        let t = tf_from_ctags("foo\tsrc/lib.rs\t/^pat$/;\"\tf\n");
250        assert_eq!(
251            t.lookup("foo")[0].address,
252            TagAddress::Pattern("^pat$".into())
253        );
254    }
255
256    #[test]
257    fn multiple_entries_for_same_name_accumulate() {
258        let t = tf_from_ctags("foo\ta.rs\t1\nfoo\tb.rs\t2\n");
259        assert_eq!(t.lookup("foo").len(), 2);
260    }
261
262    #[test]
263    fn malformed_ctags_line_is_skipped() {
264        let t = tf_from_ctags("oneword\nfoo\tsrc/lib.rs\t1\n");
265        assert_eq!(t.lookup("foo").len(), 1);
266        assert!(t.lookup("oneword").is_empty());
267    }
268
269    #[test]
270    fn empty_address_is_skipped() {
271        let t = tf_from_ctags("foo\tsrc/lib.rs\t\n");
272        assert!(t.lookup("foo").is_empty());
273    }
274
275    #[test]
276    fn etags_single_section_parses() {
277        let bytes = b"\x0c\nsrc/lib.rs,42\n\x7ffoo\x01100,0\n";
278        let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
279        let entries = by_name.get("foo").unwrap();
280        assert_eq!(entries.len(), 1);
281        assert_eq!(entries[0].file, PathBuf::from("/proj/src/lib.rs"));
282        assert_eq!(entries[0].address, TagAddress::Line(100));
283    }
284
285    #[test]
286    fn etags_multiple_sections_accumulate() {
287        let bytes =
288            b"\x0c\na.rs,10\n\x7ffoo\x011,0\n\x0c\nb.rs,10\n\x7fbar\x012,0\n";
289        let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
290        assert_eq!(by_name.len(), 2);
291        assert!(by_name.contains_key("foo"));
292        assert!(by_name.contains_key("bar"));
293    }
294
295    #[test]
296    fn etags_malformed_line_is_skipped() {
297        let bytes = b"\x0c\nsrc/lib.rs,42\nno-delimiters\n\x7ffoo\x011,0\n";
298        let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
299        assert_eq!(by_name.get("foo").unwrap().len(), 1);
300    }
301
302    #[test]
303    fn pattern_to_regex_preserves_anchors() {
304        assert_eq!(pattern_to_regex("^fn foo()$"), "^fn foo\\(\\)$");
305        assert_eq!(pattern_to_regex("foo"), "foo");
306        assert_eq!(pattern_to_regex("^foo"), "^foo");
307        assert_eq!(pattern_to_regex("foo$"), "foo$");
308    }
309
310    #[test]
311    fn pattern_to_regex_escapes_metacharacters() {
312        assert_eq!(pattern_to_regex("a.b"), "a\\.b");
313        assert_eq!(pattern_to_regex("^a[b]c$"), "^a\\[b\\]c$");
314    }
315
316    #[test]
317    fn find_walking_up_finds_in_same_directory() {
318        let dir = tempfile::tempdir().unwrap();
319        std::fs::write(dir.path().join("tags"), b"").unwrap();
320        let found = TagFile::find_walking_up(dir.path());
321        assert_eq!(found, Some(dir.path().join("tags")));
322    }
323
324    #[test]
325    fn find_walking_up_finds_two_directories_up() {
326        let root = tempfile::tempdir().unwrap();
327        std::fs::write(root.path().join("tags"), b"").unwrap();
328        let nested = root.path().join("a").join("b");
329        std::fs::create_dir_all(&nested).unwrap();
330        let found = TagFile::find_walking_up(&nested);
331        assert_eq!(found, Some(root.path().join("tags")));
332    }
333
334    #[test]
335    fn find_walking_up_returns_none_when_missing() {
336        let dir = tempfile::tempdir().unwrap();
337        assert_eq!(TagFile::find_walking_up(dir.path()), None);
338    }
339}