nix_doc/
lib.rs

1//! library components of nix-doc
2pub mod pprint;
3pub mod tags;
4pub mod threadpool;
5
6use crate::pprint::pprint_args;
7use crate::threadpool::ThreadPool;
8
9use colorful::{Color, Colorful};
10use regex::Regex;
11use rnix::types::{AttrSet, EntryHolder, Ident, Lambda, TokenWrapper, TypedNode};
12use rnix::SyntaxKind::*;
13use rnix::{NodeOrToken, SyntaxNode, TextUnit, WalkEvent, AST};
14use walkdir::{DirEntry, WalkDir};
15
16use std::fs;
17use std::iter;
18use std::path::Path;
19use std::sync::mpsc::channel;
20use std::{fmt::Display, str};
21
22pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
23
24const DOC_INDENT: usize = 3;
25
26/// Max size of files we will consider searching. It takes a long time to parse 300k lines of nix
27/// in hackage-packages.nix and no files this big will have search results in them as they
28/// categorically do not contain functions. 200k bytes is ~7.5k lines
29const MAX_FILE_SIZE: u64 = 200_000;
30
31struct SearchResult {
32    /// Name of the function
33    identifier: String,
34
35    /// Dedented documentation comments
36    doc: String,
37
38    /// Parameter block for the function
39    param_block: String,
40
41    /// Start of the definition of the function
42    defined_at_start: usize,
43}
44
45fn find_line(file: &str, pos: usize) -> usize {
46    file[..pos].lines().count()
47}
48
49fn find_pos(file: &str, line: usize, col: usize) -> usize {
50    let mut lines = 1;
51    let mut line_start = 0;
52    for (count, ch) in file.chars().enumerate() {
53        if ch == '\n' {
54            lines += 1;
55            line_start = count;
56        }
57        if lines == line && count - line_start == col {
58            return count;
59        }
60    }
61    unreachable!();
62}
63
64impl SearchResult {
65    fn format<P: Display>(&self, filename: P, line: usize) -> String {
66        format!(
67            "{}\n{} = {}\n# {}",
68            indented(&self.doc, DOC_INDENT),
69            self.identifier.as_str().white().bold(),
70            self.param_block,
71            format!("{}:{}", filename, line).as_str(),
72        )
73    }
74}
75
76/// Should the given path be searched?
77/// TODO: support globbing for files e.g. with lib in their name to improve perf significantly
78///       or avoid looking in absurdly large files like hackage.nix
79pub fn is_searchable(fname: &Path) -> bool {
80    fname.to_str().map(|s| s.ends_with(".nix")).unwrap_or(false)
81}
82
83/// Runs a search for files matching the regex `matching`. Returns a list of such results with the
84/// associated file contents
85fn search_file(file: &Path, matching: &Regex) -> Result<Vec<(SearchResult, usize)>> {
86    // don't bother searching files that are so large they must be generated
87    let length = fs::metadata(file)?.len();
88    if length > MAX_FILE_SIZE {
89        return Ok(Vec::new());
90    }
91
92    let content = fs::read_to_string(file)?;
93    let ast = rnix::parse(&content).as_result()?;
94    let results = search_ast(&matching, &ast);
95
96    Ok(results
97        .into_iter()
98        .map(|res| {
99            let line = find_line(&content, res.defined_at_start);
100            (res, line)
101        })
102        .collect::<Vec<_>>())
103}
104
105/// Is a file hidden or a unicode decode error?
106/// Let's not consider it.
107pub fn is_ignored(entry: &DirEntry) -> bool {
108    entry
109        .file_name()
110        .to_str()
111        .map(|s| s != "." && s.starts_with('.') || s == "target")
112        .unwrap_or(true)
113}
114
115/// Search the `dir` for files with function definitions matching `matching`
116pub fn search<F>(dir: &Path, matching: Regex, should_search: F)
117where
118    F: Fn(&Path) -> bool,
119{
120    let pool = ThreadPool::default();
121    let (tx, rx) = channel();
122
123    //println!("searching {}", dir.display());
124    for direntry in WalkDir::new(dir)
125        .into_iter()
126        .filter_entry(|e| !is_ignored(e))
127        .filter_map(|e| e.ok())
128        .filter(|e| should_search(e.path()) && e.path().is_file())
129    {
130        let my_tx = tx.clone();
131        let matching = matching.clone();
132        pool.push(move || {
133            //println!("{}", direntry.path().display());
134            let results = search_file(direntry.path(), &matching);
135            if let Err(err) = results {
136                eprintln!("Failure handling {}: {}", direntry.path().display(), err);
137                return;
138            }
139            let results = results.unwrap();
140
141            let formatted = results
142                .iter()
143                .map(|(result, line)| result.format(direntry.path().display(), *line))
144                .collect::<Vec<_>>();
145            if !formatted.is_empty() {
146                my_tx
147                    .send(formatted)
148                    .expect("failed to send messages to display");
149            }
150        });
151    }
152
153    drop(tx);
154    pool.done();
155
156    let line = iter::repeat("─")
157        .take(45)
158        .collect::<String>()
159        .color(Color::Grey27);
160    let mut is_first = true;
161
162    while let Ok(results) = rx.recv() {
163        for result in results {
164            if !is_first {
165                println!("{}", &line);
166            } else {
167                is_first = false;
168            }
169            println!("{}", result);
170        }
171    }
172}
173
174/// Searches the given AST for functions called `identifier`
175fn search_ast(identifier: &Regex, ast: &AST) -> Vec<SearchResult> {
176    let mut results = Vec::new();
177    for ev in ast.node().preorder_with_tokens() {
178        match ev {
179            WalkEvent::Enter(enter) => {
180                //println!("enter {:?}", &enter);
181                if let Some(set) = enter.into_node().and_then(AttrSet::cast) {
182                    results.extend(visit_attrset(identifier, &set));
183                }
184            }
185            WalkEvent::Leave(_leave) => {
186                //println!("leave {:?}", &leave);
187            }
188        }
189    }
190    results
191}
192
193/// Emits a string `s` indented by `indent` spaces
194fn indented(s: &str, indent: usize) -> String {
195    let indent_s = iter::repeat(' ').take(indent).collect::<String>();
196    s.split('\n')
197        .map(|line| indent_s.clone() + line)
198        .collect::<Vec<_>>()
199        .join("\n")
200}
201
202/// Cleans up a single line, erasing prefix single line comments but preserving indentation
203fn cleanup_single_line<'a>(s: &'a str) -> &'a str {
204    let mut cmt_new_start = 0;
205    for (idx, ch) in s.char_indices() {
206        // if we find a character, save the byte position after it as our new string start
207        if ch == '#' || ch == '*' {
208            cmt_new_start = idx + 1;
209            break;
210        }
211        // if, instead, we are on a line with no starting comment characters, leave it alone as it
212        // will be handled by dedent later
213        if !ch.is_whitespace() {
214            break;
215        }
216    }
217    &s[cmt_new_start..]
218}
219
220/// Erases indents in comments. This is *almost* a normal dedent function, but it starts by looking
221/// at the second line if it can.
222fn dedent_comment(s: &str) -> String {
223    let mut whitespaces = 0;
224    let mut lines = s.lines();
225    let first = lines.next();
226
227    // scan for whitespace
228    for line in lines.chain(first) {
229        let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
230
231        if line_whitespace != line.len() {
232            // a non-whitespace line, perfect for taking whitespace off of
233            whitespaces = line_whitespace;
234            break;
235        }
236    }
237
238    // maybe the first considered line we found was indented further, so let's look for more lines
239    // that might have a shorter indent. In the case of one line, do nothing.
240    for line in s.lines().skip(1) {
241        let line_whitespace = line.chars().take_while(|ch| ch.is_whitespace()).count();
242
243        if line_whitespace != line.len() {
244            whitespaces = line_whitespace.min(whitespaces);
245        }
246    }
247
248    // delete up to `whitespaces` whitespace characters from each line and reconstitute the string
249    let mut out = String::new();
250    for line in s.lines() {
251        let content_begin = line.find(|ch: char| !ch.is_whitespace()).unwrap_or(0);
252        out.push_str(&line[content_begin.min(whitespaces)..]);
253        out.push('\n');
254    }
255
256    out.truncate(out.trim_end_matches('\n').len());
257    out
258}
259
260/// Deletes whitespace and leading comment characters
261///
262/// Oversight we are choosing to ignore: if you put # characters at the beginning of lines in a
263/// multiline comment, they will be deleted.
264fn cleanup_comments<S: AsRef<str>, I: DoubleEndedIterator<Item = S>>(comment: &mut I) -> String {
265    dedent_comment(
266        &comment
267            .rev()
268            .map(|small_comment| {
269                small_comment
270                    .as_ref()
271                    // space before multiline start
272                    .trim_start()
273                    // multiline starts
274                    .trim_start_matches("/*")
275                    // trailing so we can grab multiline end
276                    .trim_end()
277                    // multiline ends
278                    .trim_end_matches("*/")
279                    // extra space that was in the multiline
280                    .trim()
281                    .split('\n')
282                    // erase single line comments and such
283                    .map(cleanup_single_line)
284                    .collect::<Vec<_>>()
285                    .join("\n")
286            })
287            .collect::<Vec<_>>()
288            .join("\n"),
289    )
290}
291
292/// Get the docs for a specific function
293pub fn get_function_docs(filename: &str, line: usize, col: usize) -> Option<String> {
294    let content = fs::read(filename).ok()?;
295    let decoded = str::from_utf8(&content).ok()?;
296    let pos = find_pos(&decoded, line, col);
297    let rowan_pos = TextUnit::from_usize(pos);
298    let tree = rnix::parse(decoded);
299
300    let mut lambda = None;
301    for node in tree.node().preorder() {
302        match node {
303            WalkEvent::Enter(n) => {
304                if n.text_range().start() >= rowan_pos && n.kind() == NODE_LAMBDA {
305                    lambda = Lambda::cast(n);
306                    break;
307                }
308            }
309            WalkEvent::Leave(_) => (),
310        }
311    }
312    let lambda = lambda?;
313    let res = visit_lambda("func".to_string(), pos, &lambda);
314    Some(res.format(filename, line))
315}
316
317fn visit_lambda(name: String, defined_at_start: usize, lambda: &Lambda) -> SearchResult {
318    // grab the arguments
319    let param_block = pprint_args(&lambda);
320
321    // find the doc comment
322    let comment = find_comment(lambda.node().clone()).unwrap_or_else(|| "".to_string());
323
324    SearchResult {
325        identifier: name,
326        doc: comment,
327        param_block,
328        defined_at_start,
329    }
330}
331
332fn visit_attrset(id_needle: &Regex, set: &AttrSet) -> Vec<SearchResult> {
333    let mut results = Vec::new();
334    for entry in set.entries() {
335        if let Some(lambda) = entry.value().and_then(Lambda::cast) {
336            if let Some(attr) = entry.key() {
337                let ident = attr.path().last().and_then(Ident::cast);
338                let defined_at_start = ident
339                    .as_ref()
340                    .map(|i| i.node().text_range().start().to_usize());
341
342                let ident_name = ident.as_ref().map(|id| id.as_str());
343
344                if ident_name.map(|id| id_needle.is_match(id)) != Some(true) {
345                    // rejected, not matching our pattern
346                    continue;
347                }
348                let ident_name = ident_name.unwrap();
349
350                let res = visit_lambda(ident_name.to_string(), defined_at_start.unwrap(), &lambda);
351                if !res.doc.is_empty() {
352                    results.push(res);
353                }
354            }
355        }
356    }
357    results
358}
359
360fn find_comment(node: SyntaxNode) -> Option<String> {
361    let mut node = NodeOrToken::Node(node);
362    let mut comments = Vec::new();
363    loop {
364        loop {
365            if let Some(new) = node.prev_sibling_or_token() {
366                node = new;
367                break;
368            } else {
369                node = NodeOrToken::Node(node.parent()?);
370            }
371        }
372
373        match node.kind() {
374            TOKEN_COMMENT => match &node {
375                NodeOrToken::Token(token) => comments.push(token.text().clone()),
376                NodeOrToken::Node(_) => unreachable!(),
377            },
378            // This stuff is found as part of `the-fn = f: ...`
379            // here:                           ^^^^^^^^
380            NODE_KEY | TOKEN_ASSIGN => (),
381            t if t.is_trivia() => (),
382            _ => break,
383        }
384    }
385    let doc = cleanup_comments(&mut comments.iter().map(|c| c.as_str()));
386    Some(doc).filter(|it| !it.is_empty())
387}
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392
393    #[test]
394    fn test_bytepos() {
395        let fakefile = "abc\ndef\nghi";
396        assert_eq!(find_pos(fakefile, 2, 2), 5);
397    }
398
399    #[test]
400    fn test_comment_stripping() {
401        let ex1 = ["/* blah blah blah\n      foooo baaar\n   blah */"];
402        assert_eq!(
403            cleanup_comments(&mut ex1.iter()),
404            "blah blah blah\n   foooo baaar\nblah"
405        );
406
407        let ex2 = ["# a1", "#    a2", "# aa"];
408        assert_eq!(cleanup_comments(&mut ex2.iter()), "aa\n   a2\na1");
409    }
410
411    #[test]
412    fn test_dedent() {
413        let ex1 = "a\n   b\n   c\n     d";
414        assert_eq!(dedent_comment(ex1), "a\nb\nc\n  d");
415        let ex2 = "a\nb\nc";
416        assert_eq!(dedent_comment(ex2), ex2);
417        let ex3 = "   a\n   b\n\n     c";
418        assert_eq!(dedent_comment(ex3), "a\nb\n\n  c");
419    }
420
421    #[test]
422    fn test_single_line_comment_stripping() {
423        let ex1 = "    * a";
424        let ex2 = "    # a";
425        let ex3 = "   a";
426        assert_eq!(cleanup_single_line(ex1), " a");
427        assert_eq!(cleanup_single_line(ex2), " a");
428        assert_eq!(cleanup_single_line(ex3), ex3);
429    }
430
431    #[test]
432    fn test_regression_11() {
433        let out = r#"Create a fixed width string with additional prefix to match
434required width.
435
436This function will fail if the input string is longer than the
437requested length.
438
439Type: fixedWidthString :: int -> string -> string
440
441Example:
442  fixedWidthString 5 "0" (toString 15)
443  => "00015""#;
444        let ast = rnix::parse(include_str!("../testdata/regression-11.nix"))
445            .as_result()
446            .unwrap();
447        let results = search_ast(&regex::Regex::new("fixedWidthString").unwrap(), &ast);
448        assert_eq!(results.len(), 1);
449
450        assert_eq!(results[0].doc, out);
451    }
452}