nix_doc/
tags.rs

1use std::env::current_dir;
2use std::fmt::Write;
3use std::sync::mpsc::channel;
4use std::time::Instant;
5use std::{
6    fmt, fs, io,
7    iter::FromIterator,
8    path::{Path, PathBuf},
9};
10
11use rnix::types::Inherit;
12use rnix::SyntaxNode;
13use rnix::{
14    types::{AttrSet, EntryHolder, Ident, TokenWrapper, TypedNode},
15    SmolStr,
16    SyntaxKind::*,
17    AST,
18};
19use walkdir::WalkDir;
20
21use crate::threadpool::ThreadPool;
22use crate::{is_ignored, is_searchable};
23
24const DEBUG_TIMERS: bool = false;
25
26struct Timer(Instant);
27impl Timer {
28    fn new() -> Self {
29        Self(Instant::now())
30    }
31
32    fn debug_print(&self, name: &str) {
33        if DEBUG_TIMERS {
34            let time = self.0.elapsed();
35            eprintln!(
36                "{}: {:0.4} ms",
37                name,
38                time.as_millis() as f64 + time.subsec_millis() as f64 / 1000.
39            );
40        }
41    }
42}
43
44#[derive(Clone, Debug)]
45enum MemoValue<T> {
46    Uncomputed,
47    Failed,
48    Value(T),
49}
50
51impl<T> Default for MemoValue<T> {
52    fn default() -> Self {
53        Self::Uncomputed
54    }
55}
56
57impl<T> MemoValue<T> {
58    fn get_or_compute<F>(&mut self, f: F) -> Option<&T>
59    where
60        F: FnOnce() -> Option<T>,
61    {
62        match self {
63            MemoValue::Uncomputed => {
64                *self = f().map(MemoValue::Value).unwrap_or(MemoValue::Failed);
65                if let MemoValue::Value(ref v) = self {
66                    Some(v)
67                } else {
68                    None
69                }
70            }
71            MemoValue::Failed => None,
72            MemoValue::Value(ref v) => Some(v),
73        }
74    }
75}
76
77enum Kind {
78    Function,
79    Member,
80}
81
82/// Path interned in an array of all the paths.
83#[derive(Clone, Copy, Debug)]
84struct InternedPath(usize);
85
86macro_rules! impl_from {
87    ($on:ty, $variant:ident, $ty:ty) => {
88        impl From<$ty> for $on {
89            fn from(f: $ty) -> $on {
90                <$on>::$variant(f)
91            }
92        }
93    };
94}
95
96#[derive(Debug)]
97pub enum Error {
98    Io(io::Error),
99}
100
101impl_from!(Error, Io, io::Error);
102
103/// One ctags file entry
104struct Tag {
105    /// Name of the identifier
106    name: SmolStr,
107
108    /// Path relative to the tags file parent dir
109    path: InternedPath,
110
111    /// "address" of the tag, the line it's on, basically.
112    addr: SmolStr,
113
114    /// Kind of tag
115    kind: Kind,
116}
117
118impl fmt::Display for Kind {
119    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
120        match self {
121            Kind::Function => write!(f, "f"),
122            Kind::Member => write!(f, "m"),
123        }
124    }
125}
126
127fn escape(a: &str) -> String {
128    let magics = ['\\', '/', '$', '^'];
129
130    let mut result = String::new();
131    for c in a.chars() {
132        if magics.contains(&c) {
133            result.push('\\');
134        }
135        result.push(c);
136    }
137    result
138}
139
140fn make_addr(a: &str) -> SmolStr {
141    // FIXME: delete this cloned malarkey when we can tell everyone with old nixpkgs to go eat a
142    // nixpkgs-unstable cookie
143    SmolStr::from_iter(["/^", &escape(a), "$/"].iter().cloned())
144}
145
146impl Tag {
147    fn to_string_relative_to(
148        &self,
149        paths: &[PathBuf],
150        p: &Path,
151        memo: &mut Vec<MemoValue<PathBuf>>,
152        out: &mut String,
153    ) -> Option<()> {
154        let relpath =
155            memo[self.path.0].get_or_compute(|| pathdiff::diff_paths(&paths[self.path.0], p))?;
156
157        write!(
158            out,
159            "{}\t{}\t{};\"\t{}",
160            self.name,
161            relpath.display(),
162            make_addr(&self.addr),
163            self.kind
164        )
165        .ok()?;
166        Some(())
167    }
168}
169
170struct FileJob<'a> {
171    file: InternedPath,
172    source: &'a str,
173    results: &'a mut Vec<Tag>,
174}
175
176impl<'a> FileJob<'a> {
177    fn get_source_line(&self, node: &SyntaxNode) -> &str {
178        let defined_at_start = node.text_range().start().to_usize();
179        let prior = &self.source[..defined_at_start];
180        let line_start = prior.rfind('\n').unwrap_or(0);
181        let after = &self.source[defined_at_start..];
182        let line_end = after
183            .find('\n')
184            .unwrap_or(self.source.len() - defined_at_start);
185        let source_line = &self.source[line_start..defined_at_start + line_end];
186        source_line.strip_prefix('\n').unwrap_or(source_line)
187    }
188
189    fn visit_attrset(&mut self, set: &AttrSet) {
190        for ent in set.entries() {
191            let tag = (|| {
192                let val = ent.value()?;
193                let key = ent.key()?;
194
195                let kind = match val.kind() {
196                    NODE_LAMBDA => Kind::Function,
197                    _ => Kind::Member,
198                };
199
200                let source_line = self.get_source_line(key.node());
201
202                let ident = key.path().last().and_then(Ident::cast);
203                let ident_name = ident.as_ref().map(|id| id.as_str())?;
204
205                Some(Tag {
206                    name: ident_name.into(),
207                    path: self.file.clone(),
208                    addr: source_line.into(),
209                    kind,
210                })
211            })();
212
213            if let Some(tag) = tag {
214                self.results.push(tag);
215            }
216        }
217    }
218
219    fn visit_inherit(&mut self, inh: &Inherit) {
220        for id in inh.idents() {
221            (|| {
222                let name = id.as_str();
223                self.results.push(Tag {
224                    name: name.into(),
225                    path: self.file.clone(),
226                    addr: self.get_source_line(id.node()).into(),
227                    kind: Kind::Member,
228                });
229            })();
230        }
231    }
232
233    fn exec(&mut self, ast: &AST) {
234        for evt in ast.node().preorder_with_tokens() {
235            match evt {
236                rnix::WalkEvent::Enter(ent) => {
237                    ent.into_node().map(|n| match n.kind() {
238                        NODE_ATTR_SET => self.visit_attrset(&AttrSet::cast(n).unwrap()),
239                        NODE_INHERIT => self.visit_inherit(&Inherit::cast(n).unwrap()),
240                        _ => (),
241                    });
242                }
243                rnix::WalkEvent::Leave(_) => (),
244            }
245        }
246    }
247
248    /// Runs a file job collecting tags for a path.
249    ///
250    /// `p` must be absolute.
251    pub fn run(p_interned: InternedPath, p: &Path) -> Result<Vec<Tag>, Error> {
252        assert!(p.is_absolute());
253        let contents = fs::read_to_string(p)?;
254        let parsed = rnix::parse(&contents);
255        let mut results = Vec::new();
256
257        let mut job = FileJob {
258            file: p_interned,
259            source: &contents,
260            results: &mut results,
261        };
262
263        job.exec(&parsed);
264
265        // we sort here because the rust sorting algo is supposedly good at a bunch of concatenated
266        // sorted lists, and parallel compute is effectively free
267        results.sort_unstable_by(|e1, e2| e1.name.as_str().cmp(e2.name.as_str()));
268
269        Ok(results)
270    }
271}
272
273/// Writes out the header of the tags file to the writer.
274fn write_header(mut writer: impl io::Write) -> Result<(), Error> {
275    /*
276    !_TAG_FILE_FORMAT	2	/extended format; --format=1 will not append ;" to lines/
277    !_TAG_FILE_SORTED	1	/0=unsorted, 1=sorted, 2=foldcase/
278    !_TAG_OUTPUT_EXCMD	mixed	/number, pattern, mixed, or combineV2/
279    !_TAG_OUTPUT_FILESEP	slash	/slash or backslash/
280    !_TAG_OUTPUT_MODE	u-ctags	/u-ctags or e-ctags/
281    !_TAG_PATTERN_LENGTH_LIMIT	96	/0 for no limit/
282    !_TAG_PROC_CWD	/home/jade/co/neovim/	//
283    !_TAG_PROGRAM_AUTHOR	Universal Ctags Team	//
284    !_TAG_PROGRAM_NAME	Universal Ctags	/Derived from Exuberant Ctags/
285    !_TAG_PROGRAM_URL	https://ctags.io/	/official site/
286    !_TAG_PROGRAM_VERSION	5.9.0	/e70d5a8f3/
287         */
288    writeln!(writer, "!_TAG_FILE_FORMAT\t2\t/extended format/")?;
289    writeln!(
290        writer,
291        "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/"
292    )?;
293    writeln!(writer, "!_TAG_FILE_ENCODING\tutf-8\t//")?;
294    writeln!(writer, "!_TAG_PROGRAM_NAME\tnix-doc tags\t//")?;
295    writeln!(
296        writer,
297        "!_TAG_PROGRAM_URL\thttps://github.com/lf-/nix-doc\t//"
298    )?;
299    Ok(())
300}
301
302/// Builds a tags database into the given writer with paths relative to the current directory, with
303/// the nix files in `dir`
304pub fn run_on_dir(dir: &Path, mut writer: impl io::Write) -> Result<(), Error> {
305    let pool = ThreadPool::default();
306    let (tx, rx) = channel();
307
308    let mut paths_interned = Vec::new();
309    let curdir = current_dir()?;
310
311    //println!("searching {}", dir.display());
312    let walk_t = Timer::new();
313    for direntry in WalkDir::new(dir)
314        .into_iter()
315        .filter_entry(|e| !is_ignored(e))
316        .filter_map(|e| e.ok())
317        .filter(|e| is_searchable(e.path()) && e.path().is_file())
318    {
319        let path = curdir.join(direntry.into_path());
320        let path_ = path.clone();
321        paths_interned.push(path);
322        let path_interned = InternedPath(paths_interned.len() - 1);
323
324        let my_tx = tx.clone();
325        pool.push(move || {
326            let results = FileJob::run(path_interned, &path_);
327            let results = match results {
328                Ok(v) => v,
329                Err(e) => {
330                    eprintln!("Error processing {}: {:?}", &path_.display(), e);
331                    return;
332                }
333            };
334
335            if !results.is_empty() {
336                my_tx.send(results).expect("failed to send tags");
337            }
338        });
339    }
340
341    drop(tx);
342    pool.done();
343    walk_t.debug_print("walk time");
344
345    let mut out = Vec::new();
346    while let Ok(set) = rx.recv() {
347        out.extend(set);
348    }
349
350    let sort_t = Timer::new();
351    out.sort_by(|e1, e2| e1.name.as_str().cmp(e2.name.as_str()));
352    sort_t.debug_print("final sort time");
353
354    let write_t = Timer::new();
355    write_header(&mut writer)?;
356
357    let mut memo = vec![MemoValue::Uncomputed; paths_interned.len()];
358    let mut out_s = String::new();
359
360    for tag in out {
361        out_s.clear();
362        match tag.to_string_relative_to(&paths_interned, &curdir, &mut memo, &mut out_s) {
363            Some(_) => (),
364            None => continue,
365        };
366        writer.write(out_s.as_bytes())?;
367        writer.write(b"\n")?;
368    }
369    write_t.debug_print("write time");
370
371    Ok(())
372}
373
374#[cfg(test)]
375mod tests {
376    use std::{
377        env::{current_dir, set_current_dir},
378        path::PathBuf,
379    };
380
381    use super::*;
382    use expect_test::{expect, Expect};
383
384    fn check(dir: &str, expected: Expect) {
385        let dir = PathBuf::from(dir);
386        let curdir = current_dir().unwrap();
387
388        println!("datadir: {}", &dir.display());
389        set_current_dir(dir).unwrap();
390        let mut out = Vec::new();
391
392        run_on_dir(&PathBuf::from("."), &mut out).unwrap();
393        let out_s = std::str::from_utf8(&out).unwrap();
394        println!("{}", out_s);
395
396        expected.assert_eq(out_s.trim());
397
398        set_current_dir(curdir).unwrap();
399    }
400
401    #[test]
402    fn smoke() {
403        check(
404            "testdata",
405            expect![[r#"
406                !_TAG_FILE_FORMAT	2	/extended format/
407                !_TAG_FILE_SORTED	1	/0=unsorted, 1=sorted, 2=foldcase/
408                !_TAG_FILE_ENCODING	utf-8	//
409                !_TAG_PROGRAM_NAME	nix-doc tags	//
410                !_TAG_PROGRAM_URL	https://github.com/lf-/nix-doc	//
411                c	test.nix	/^   a.b.c = a: 1;$/;"	f
412                ff	test.nix	/^   inherit ff;$/;"	m
413                fixedWidthString	regression-11.nix	/^  fixedWidthString = width: filler: str:$/;"	f
414                grub	test.nix	/^   inherit (n) grub hello;$/;"	m
415                hello	test.nix	/^   inherit (n) grub hello;$/;"	m
416                the-fn	test.nix	/^   the-fn = a: b: {z = a; y = b;};$/;"	f
417                the-snd-fn	test.nix	/^   the-snd-fn = {b, \/* doc *\/ c}: {};$/;"	f
418                withFeature	regression-11.nix	/^  withFeature = with_: feat: "--\${if with_ then "with" else "without"}-\${feat}";$/;"	f
419                withFeatureAs	regression-11.nix	/^  withFeatureAs = with_: feat: value: withFeature with_ feat + optionalString with_ "=\${value}";$/;"	f
420                y	test.nix	/^   the-fn = a: b: {z = a; y = b;};$/;"	m
421                z	test.nix	/^   the-fn = a: b: {z = a; y = b;};$/;"	m"#]],
422        );
423    }
424}