1use std::collections::HashMap;
6use std::fs;
7use std::path::{Path, PathBuf};
8
9use crate::error::Error;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub enum TagAddress {
13 Line(usize),
15 Pattern(String),
17}
18
19#[derive(Debug, Clone, PartialEq, Eq)]
20pub struct TagEntry {
21 pub file: PathBuf,
22 pub address: TagAddress,
23}
24
25#[derive(Debug, Clone)]
26pub struct TagFile {
27 base_dir: PathBuf,
28 by_name: HashMap<String, Vec<TagEntry>>,
29}
30
31impl TagFile {
32 pub fn load(path: &Path) -> Result<Self, Error> {
33 let bytes = fs::read(path).map_err(|_| Error::TagFileNotFound)?;
34 let base_dir = path
35 .parent()
36 .map(|p| p.to_path_buf())
37 .unwrap_or_else(|| PathBuf::from("."));
38
39 let by_name = if bytes.first().copied() == Some(b'\x0c') {
40 parse_etags(&bytes, &base_dir, path)?
41 } else {
42 let text = std::str::from_utf8(&bytes).map_err(|_| {
43 Error::TagFileParse("not UTF-8".into(), path.to_path_buf(), 0)
44 })?;
45 parse_ctags(text, &base_dir)
46 };
47
48 Ok(TagFile { base_dir, by_name })
49 }
50
51 pub fn lookup(&self, name: &str) -> &[TagEntry] {
52 self.by_name
53 .get(name)
54 .map(Vec::as_slice)
55 .unwrap_or(&[])
56 }
57
58 pub fn names(&self) -> impl Iterator<Item = &str> {
59 self.by_name.keys().map(String::as_str)
60 }
61
62 pub fn base_dir(&self) -> &Path {
63 &self.base_dir
64 }
65
66 pub fn find_walking_up(start: &Path) -> Option<PathBuf> {
70 let mut cur = if start.is_file() {
71 start.parent()?.to_path_buf()
72 } else {
73 start.to_path_buf()
74 };
75 loop {
76 let candidate = cur.join("tags");
77 if candidate.is_file() {
78 return Some(candidate);
79 }
80 if !cur.pop() {
81 return None;
82 }
83 }
84 }
85}
86
87fn parse_ctags(text: &str, base_dir: &Path) -> HashMap<String, Vec<TagEntry>> {
88 let mut by_name: HashMap<String, Vec<TagEntry>> = HashMap::new();
89 for line in text.lines() {
90 if line.is_empty() || line.starts_with("!_TAG_") {
91 continue;
92 }
93 let mut parts = line.splitn(3, '\t');
94 let (Some(name), Some(file_field), Some(rest)) =
95 (parts.next(), parts.next(), parts.next())
96 else {
97 continue;
98 };
99 let Some(address) = parse_ctags_address(rest) else {
100 continue;
101 };
102 let file = base_dir.join(file_field);
103 by_name
104 .entry(name.to_string())
105 .or_default()
106 .push(TagEntry { file, address });
107 }
108 by_name
109}
110
111fn parse_ctags_address(s: &str) -> Option<TagAddress> {
118 let body = match s.find(";\"") {
119 Some(idx) => &s[..idx],
120 None => s,
121 };
122 let body = body.trim();
123 if body.is_empty() {
124 return None;
125 }
126 if let Ok(n) = body.parse::<usize>() {
127 return Some(TagAddress::Line(n));
128 }
129 let bytes = body.as_bytes();
130 let first = *bytes.first()?;
131 let last = *bytes.last()?;
132 if (first == b'/' || first == b'?') && first == last && bytes.len() >= 2 {
133 let inner = &body[1..body.len() - 1];
134 return Some(TagAddress::Pattern(inner.to_string()));
135 }
136 None
137}
138
139fn parse_etags(
140 bytes: &[u8],
141 base_dir: &Path,
142 path: &Path,
143) -> Result<HashMap<String, Vec<TagEntry>>, Error> {
144 let mut by_name: HashMap<String, Vec<TagEntry>> = HashMap::new();
145 let text = std::str::from_utf8(bytes).map_err(|_| {
146 Error::TagFileParse("not UTF-8".into(), path.to_path_buf(), 0)
147 })?;
148 for section in text.split("\x0c\n").skip(1) {
149 let mut lines = section.lines();
150 let Some(header) = lines.next() else { continue };
151 let Some((file_field, _size)) = header.rsplit_once(',') else {
152 continue;
153 };
154 let file = base_dir.join(file_field);
155 for line in lines {
156 let Some((_src, after_del)) = line.split_once('\x7f') else {
157 continue;
158 };
159 let Some((tag, after_soh)) = after_del.split_once('\x01') else {
160 continue;
161 };
162 let Some((line_str, _offset)) = after_soh.split_once(',') else {
163 continue;
164 };
165 let Ok(line_num) = line_str.parse::<usize>() else {
166 continue;
167 };
168 by_name.entry(tag.to_string()).or_default().push(TagEntry {
169 file: file.clone(),
170 address: TagAddress::Line(line_num),
171 });
172 }
173 }
174 Ok(by_name)
175}
176
177pub fn pattern_to_regex(pattern: &str) -> String {
182 let (anchor_start, body) = if let Some(rest) = pattern.strip_prefix('^') {
183 ("^", rest)
184 } else {
185 ("", pattern)
186 };
187 let (body, anchor_end) = if let Some(stripped) = body.strip_suffix('$') {
188 (stripped, "$")
189 } else {
190 (body, "")
191 };
192 format!("{anchor_start}{}{anchor_end}", regex::escape(body))
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198
199 fn tf_from_ctags(text: &str) -> TagFile {
200 let by_name = parse_ctags(text, Path::new("/proj"));
201 TagFile {
202 base_dir: PathBuf::from("/proj"),
203 by_name,
204 }
205 }
206
207 #[test]
208 fn ctags_three_column_line_parses() {
209 let t = tf_from_ctags("foo\tsrc/lib.rs\t42\n");
210 let entries = t.lookup("foo");
211 assert_eq!(entries.len(), 1);
212 assert_eq!(entries[0].file, PathBuf::from("/proj/src/lib.rs"));
213 assert_eq!(entries[0].address, TagAddress::Line(42));
214 }
215
216 #[test]
217 fn ctags_exuberant_suffix_is_stripped() {
218 let t = tf_from_ctags("foo\tsrc/lib.rs\t42;\"\tf\tfile:\n");
219 assert_eq!(t.lookup("foo")[0].address, TagAddress::Line(42));
220 }
221
222 #[test]
223 fn ctags_metadata_line_is_skipped() {
224 let t = tf_from_ctags("!_TAG_FILE_FORMAT\t2\t/extended format/\nfoo\tsrc/lib.rs\t1\n");
225 assert!(t.lookup("!_TAG_FILE_FORMAT").is_empty());
226 assert_eq!(t.lookup("foo").len(), 1);
227 }
228
229 #[test]
230 fn ctags_forward_slash_pattern_parses() {
231 let t = tf_from_ctags("foo\tsrc/lib.rs\t/^fn foo()$/\n");
232 assert_eq!(
233 t.lookup("foo")[0].address,
234 TagAddress::Pattern("^fn foo()$".into())
235 );
236 }
237
238 #[test]
239 fn ctags_question_mark_pattern_parses() {
240 let t = tf_from_ctags("foo\tsrc/lib.rs\t?pattern?\n");
241 assert_eq!(
242 t.lookup("foo")[0].address,
243 TagAddress::Pattern("pattern".into())
244 );
245 }
246
247 #[test]
248 fn ctags_pattern_with_suffix_strips_suffix() {
249 let t = tf_from_ctags("foo\tsrc/lib.rs\t/^pat$/;\"\tf\n");
250 assert_eq!(
251 t.lookup("foo")[0].address,
252 TagAddress::Pattern("^pat$".into())
253 );
254 }
255
256 #[test]
257 fn multiple_entries_for_same_name_accumulate() {
258 let t = tf_from_ctags("foo\ta.rs\t1\nfoo\tb.rs\t2\n");
259 assert_eq!(t.lookup("foo").len(), 2);
260 }
261
262 #[test]
263 fn malformed_ctags_line_is_skipped() {
264 let t = tf_from_ctags("oneword\nfoo\tsrc/lib.rs\t1\n");
265 assert_eq!(t.lookup("foo").len(), 1);
266 assert!(t.lookup("oneword").is_empty());
267 }
268
269 #[test]
270 fn empty_address_is_skipped() {
271 let t = tf_from_ctags("foo\tsrc/lib.rs\t\n");
272 assert!(t.lookup("foo").is_empty());
273 }
274
275 #[test]
276 fn etags_single_section_parses() {
277 let bytes = b"\x0c\nsrc/lib.rs,42\n\x7ffoo\x01100,0\n";
278 let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
279 let entries = by_name.get("foo").unwrap();
280 assert_eq!(entries.len(), 1);
281 assert_eq!(entries[0].file, PathBuf::from("/proj/src/lib.rs"));
282 assert_eq!(entries[0].address, TagAddress::Line(100));
283 }
284
285 #[test]
286 fn etags_multiple_sections_accumulate() {
287 let bytes =
288 b"\x0c\na.rs,10\n\x7ffoo\x011,0\n\x0c\nb.rs,10\n\x7fbar\x012,0\n";
289 let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
290 assert_eq!(by_name.len(), 2);
291 assert!(by_name.contains_key("foo"));
292 assert!(by_name.contains_key("bar"));
293 }
294
295 #[test]
296 fn etags_malformed_line_is_skipped() {
297 let bytes = b"\x0c\nsrc/lib.rs,42\nno-delimiters\n\x7ffoo\x011,0\n";
298 let by_name = parse_etags(bytes, Path::new("/proj"), Path::new("/proj/TAGS")).unwrap();
299 assert_eq!(by_name.get("foo").unwrap().len(), 1);
300 }
301
302 #[test]
303 fn pattern_to_regex_preserves_anchors() {
304 assert_eq!(pattern_to_regex("^fn foo()$"), "^fn foo\\(\\)$");
305 assert_eq!(pattern_to_regex("foo"), "foo");
306 assert_eq!(pattern_to_regex("^foo"), "^foo");
307 assert_eq!(pattern_to_regex("foo$"), "foo$");
308 }
309
310 #[test]
311 fn pattern_to_regex_escapes_metacharacters() {
312 assert_eq!(pattern_to_regex("a.b"), "a\\.b");
313 assert_eq!(pattern_to_regex("^a[b]c$"), "^a\\[b\\]c$");
314 }
315
316 #[test]
317 fn find_walking_up_finds_in_same_directory() {
318 let dir = tempfile::tempdir().unwrap();
319 std::fs::write(dir.path().join("tags"), b"").unwrap();
320 let found = TagFile::find_walking_up(dir.path());
321 assert_eq!(found, Some(dir.path().join("tags")));
322 }
323
324 #[test]
325 fn find_walking_up_finds_two_directories_up() {
326 let root = tempfile::tempdir().unwrap();
327 std::fs::write(root.path().join("tags"), b"").unwrap();
328 let nested = root.path().join("a").join("b");
329 std::fs::create_dir_all(&nested).unwrap();
330 let found = TagFile::find_walking_up(&nested);
331 assert_eq!(found, Some(root.path().join("tags")));
332 }
333
334 #[test]
335 fn find_walking_up_returns_none_when_missing() {
336 let dir = tempfile::tempdir().unwrap();
337 assert_eq!(TagFile::find_walking_up(dir.path()), None);
338 }
339}