grepdef/
lib.rs

1#![warn(missing_docs)]
2//! Quick search for symbol definitions in various programming languages
3//!
4//! Currently this supports Rust, JS (or TypeScript), and PHP.
5//!
6//! This can be used like "Go to definition" in an IDE, except that instead of using a language
7//! server, it just searches for the definition using text parsing. This is less accurate but often
8//! faster in projects with lots of files or where a language server won't work or hasn't yet
9//! started.
10//!
11//! grepdef since 3.0 is written in Rust and is designed to be extremely fast.
12//!
13//! This can also be used as a library crate for other Rust programs.
14//!
15//! # Example
16//!
17//! The syntax of the CLI is similar to that of `grep` or `ripgrep`: first put the symbol you want
18//! to search for (eg: a function name, class name, etc.) and then list the file(s) or directories
19//! over which you want to search.
20//!
21//! ```text
22//! $ grepdef parseQuery ./src
23//! ./src/queries.js:function parseQuery {
24//! ```
25//!
26//! Just like `grep`, you can add the `-n` option to include line numbers.
27//!
28//! ```text
29//! $ grepdef -n parseQuery ./src
30//! ./src/queries.js:17:function parseQuery {
31//! ```
32//!
33//! The search will be faster if you specify what type of file you are searching for using the
34//! `--type` option.
35//!
36//! ```text
37//! $ grepdef --type js -n parseQuery ./src
38//! ./src/queries.js:17:function parseQuery {
39//! ```
40//!
41//! To use the crate from other Rust code, use [Searcher].
42//!
43//! ```
44//! use grepdef::{Args, Searcher};
45//!
46//! for result in Searcher::new(Args::from_query("parseQuery")).unwrap().search().unwrap() {
47//!     println!("{}", result.to_grep());
48//! }
49//! ```
50
51use clap::Parser;
52use colored::Colorize;
53use ignore::Walk;
54use regex::Regex;
55use serde::Serialize;
56use std::error::Error;
57use std::fs;
58use std::io::{self, BufRead, Seek};
59use std::num::NonZero;
60use std::sync::mpsc;
61use std::time;
62use strum_macros::Display;
63use strum_macros::EnumString;
64
65mod file_type;
66mod query_regex;
67mod threads;
68
69/// The command-line arguments to be used by [Searcher]
70///
71/// Can be passed to [Searcher::new].
72///
73/// The only required property is [Args::query].
74///
75/// # Example
76///
77/// ```
78/// use grepdef::Args;
79/// let config = Args::from_query("parseQuery");
80/// assert_eq!(config.query, String::from("parseQuery"));
81/// assert_eq!(config.file_path, None); // The current directory
82/// assert_eq!(config.file_type, None); // Auto-detect the file type
83/// assert_eq!(config.line_number, false); // Do not print line numbers
84/// ```
85#[derive(Parser, Debug, Default)]
86#[command(
87    version,
88    arg_required_else_help = true,
89    about = "Quick search for symbol definitions in various programming languages",
90    long_about = "Quick search for symbol definitions in various programming languages"
91)]
92pub struct Args {
93    /// (Required) The symbol name (function, class, etc.) to search for
94    pub query: String,
95
96    /// The file path(s) to search; recursively searches directories and respects .gitignore
97    pub file_path: Option<Vec<String>>,
98
99    /// The file type to search (js, php, rs); will guess if not set but this is slower
100    #[arg(short = 't', long = "type")]
101    pub file_type: Option<String>,
102
103    /// Show line numbers of matches if set
104    #[arg(short = 'n', long = "line-number")]
105    pub line_number: bool,
106
107    /// Control color output ("never", "always", "auto"); default "auto"
108    #[arg(long = "color")]
109    pub color: Option<String>,
110
111    /// Disable color (also supports NO_COLOR env)
112    #[arg(long = "no-color")]
113    pub no_color: bool,
114
115    /// Limit the number of results
116    #[arg(short = 'l', long = "limit")]
117    pub limit: Option<usize>,
118
119    /// (Advanced) Print debugging information
120    #[arg(long = "debug")]
121    pub debug: bool,
122
123    /// (Advanced) The searching method
124    #[arg(long = "search-method")]
125    pub search_method: Option<SearchMethod>,
126
127    /// (Advanced) The number of threads to use
128    #[arg(short = 'j', long = "threads")]
129    pub threads: Option<NonZero<usize>>,
130
131    /// The output format; defaults to 'grep'
132    #[arg(long = "format")]
133    pub format: Option<SearchResultFormat>,
134}
135
136impl Args {
137    /// Create a new set of arguments for [Searcher] with the minimal configuration
138    pub fn from_query(query: &str) -> Args {
139        Args {
140            query: query.into(),
141            ..Args::default()
142        }
143    }
144
145    /// Create a new set of arguments for [Searcher]
146    pub fn new(
147        query: String,
148        file_type: Option<String>,
149        file_path: Option<Vec<String>>,
150        line_number: bool,
151    ) -> Args {
152        Args {
153            query,
154            file_type,
155            file_path,
156            line_number,
157            ..Args::default()
158        }
159    }
160}
161
162/// (Advanced) The type of underlying search algorithm to use
163///
164/// In general, a pre-scan is a good idea to quickly skip files that don't have a match, which
165/// should be most files. You shouldn't need to change this from the default.
166#[derive(clap::ValueEnum, Clone, Default, Debug, EnumString, PartialEq, Display)]
167pub enum SearchMethod {
168    /// Pre-scan each file by reading fully into memory and using a Regex
169    #[default]
170    PrescanRegex,
171
172    /// Pre-scan each file by reading bytes until the query is found using memmem
173    PrescanMemmem,
174
175    /// Don't pre-scan files.
176    NoPrescan,
177}
178
179/// The configuration used by a [Searcher]
180///
181/// Created by passing [Args] to [Config::new].
182#[derive(Clone, Debug)]
183struct Config {
184    /// The symbol name (function, class, etc.) being searched for
185    query: String,
186
187    /// The list of file paths to search, ignoring invisible or gitignored files
188    file_paths: Vec<String>,
189
190    /// The type of files to scan (JS or PHP or RS)
191    file_type: FileType,
192
193    /// Include line numbers in results if true
194    line_number: bool,
195
196    /// Output debugging info during search if true
197    debug: bool,
198
199    /// Limit the number of results
200    limit: Option<usize>,
201
202    /// Explicitly disable color output if true
203    no_color: bool,
204
205    /// Explicitly control color output ("never", "always", "auto")
206    color: ColorOption,
207
208    /// The [SearchMethod] to use
209    search_method: SearchMethod,
210
211    /// The number of threads to use for searching files
212    num_threads: NonZero<usize>,
213
214    /// The output format
215    format: SearchResultFormat,
216}
217
218impl Config {
219    /// Create a new Config using an [Args]
220    pub fn new(args: Args) -> Result<Config, String> {
221        if args.debug {
222            let args_formatted = format!("Creating config with args {:?}", args);
223            println!("{}", args_formatted.yellow());
224        }
225        let file_paths = match args.file_path {
226            Some(file_path) => file_path,
227            None => vec![".".into()],
228        };
229        let file_type = match args.file_type {
230            Some(file_type_string) => FileType::from_string(file_type_string.as_str())?,
231            None => FileType::from_file_paths(&file_paths)?,
232        };
233        let color = match args.color {
234            Some(color_option_string) => ColorOption::from_string(color_option_string.as_str())?,
235            None => ColorOption::AUTO,
236        };
237
238        let num_threads = match args.threads {
239            Some(threads) => threads,
240            None => NonZero::new(5).expect("Default number of threads was invalid"),
241        };
242
243        let config = Config {
244            query: args.query,
245            file_paths,
246            file_type,
247            line_number: args.line_number,
248            debug: args.debug,
249            no_color: args.no_color,
250            color,
251            search_method: args.search_method.unwrap_or_default(),
252            limit: args.limit,
253            num_threads,
254            format: args.format.unwrap_or_default(),
255        };
256        debug(&config, format!("Created config {:?}", config).as_str());
257        Ok(config)
258    }
259}
260
261/// The supported file types to search
262///
263/// You can turn a string into a [FileType] using [FileType::from_string] which also supports
264/// type aliases like `javascript`, `javascriptreact`, or `typescript.tsx`.
265#[derive(Clone, Debug)]
266pub enum FileType {
267    /// The JS (or TS) file type
268    JS,
269
270    /// The PHP file type
271    PHP,
272
273    /// The Rust file type
274    RS,
275}
276
277impl FileType {
278    /// Turn a string into a [FileType]
279    ///
280    /// You can turn a string into a [FileType] using [FileType::from_string] which also supports
281    /// type aliases like `javascript`, `javascriptreact`, or `typescript.tsx`.
282    pub fn from_string(file_type_string: &str) -> Result<FileType, String> {
283        match file_type_string {
284            "js" => Ok(FileType::JS),
285            "ts" => Ok(FileType::JS),
286            "jsx" => Ok(FileType::JS),
287            "tsx" => Ok(FileType::JS),
288            "javascript" => Ok(FileType::JS),
289            "javascript.jsx" => Ok(FileType::JS),
290            "javascriptreact" => Ok(FileType::JS),
291            "typescript" => Ok(FileType::JS),
292            "typescript.tsx" => Ok(FileType::JS),
293            "typescriptreact" => Ok(FileType::JS),
294            "php" => Ok(FileType::PHP),
295            "rs" => Ok(FileType::RS),
296            "rust" => Ok(FileType::RS),
297            _ => Err(format!("Invalid file type '{}'", file_type_string)),
298        }
299    }
300
301    /// Get the textual representation of a [FileType]
302    pub fn to_string(&self) -> String {
303        match self {
304            Self::JS => String::from("js"),
305            Self::PHP => String::from("php"),
306            Self::RS => String::from("rs"),
307        }
308    }
309
310    /// Try to guess a [FileType] based on a list of file paths
311    ///
312    /// This can examine files or recursive directories and try to determine the [FileType] to
313    /// search for. It will return the file type of the first file it finds that matches one of the
314    /// file type patterns the crate supports.
315    ///
316    /// If a directory includes multiple supported file types, this could be incorrect, so it's
317    /// more reliable (and faster) to specify a file type explicitly.
318    pub fn from_file_paths(file_paths: &Vec<String>) -> Result<FileType, &'static str> {
319        for file_path in file_paths {
320            let guess = file_type::guess_file_type_from_file_path(file_path);
321            if let Some(value) = guess {
322                return Ok(value);
323            }
324        }
325        Err("Unable to guess file type from file paths")
326    }
327}
328
329/// The supported arguments to the color option
330///
331/// You can turn a string into a [ColorOption] using [ColorOption::from_string].
332#[derive(Clone, Debug)]
333pub enum ColorOption {
334    /// Always colorize
335    ALWAYS,
336
337    /// Never colorize
338    NEVER,
339
340    /// Auto-detect colorize
341    AUTO,
342}
343
344impl ColorOption {
345    /// Convert string to ColorOption
346    pub fn from_string(color_option_string: &str) -> Result<ColorOption, String> {
347        match color_option_string {
348            "always" => Ok(ColorOption::ALWAYS),
349            "never" => Ok(ColorOption::NEVER),
350            "auto" => Ok(ColorOption::AUTO),
351            _ => Err(format!("Invalid color option '{}'", color_option_string)),
352        }
353    }
354}
355
356/// The output format of [SearchResult::to_string]
357#[derive(clap::ValueEnum, Clone, Default, Debug, EnumString, PartialEq, Display, Copy)]
358pub enum SearchResultFormat {
359    /// grep-like output; colon-separated path, line number, and text
360    #[default]
361    Grep,
362
363    /// JSON output; one document per match
364    JsonPerMatch,
365}
366
367/// A result from calling [Searcher::search] or [Searcher::search_and_format]
368///
369/// Note that `line_number` will be set only if [Args::line_number] is true when searching.
370#[derive(Debug, PartialEq, Clone, Serialize)]
371pub struct SearchResult {
372    /// The path to the file containing the symbol definition
373    pub file_path: String,
374
375    /// The line number of the symbol definition in the file
376    pub line_number: Option<usize>,
377
378    /// The symbol definition line
379    pub text: String,
380}
381
382impl SearchResult {
383    /// Return a formatted string for output in the "grep" format
384    ///
385    /// That is, either `file path:text on line` or, if [Args::line_number] is true,
386    /// `file path:line number:text on line`.
387    ///
388    /// # Example
389    ///
390    /// If [Args::line_number] is true,
391    ///
392    /// ```text
393    /// ./src/queries.js:17:function parseQuery {
394    /// ```
395    pub fn to_grep(&self) -> String {
396        match self.line_number {
397            Some(line_number) => format!(
398                "{}:{}:{}",
399                self.file_path.magenta(),
400                line_number.to_string().green(),
401                self.text
402            ),
403            None => format!("{}:{}", self.file_path.magenta(), self.text),
404        }
405    }
406
407    /// Return a formatted string for output in the "JSON_PER_MATCH" format
408    pub fn to_json_per_match(&self) -> String {
409        serde_json::to_string(self).unwrap_or_default()
410    }
411}
412
413/// A struct that can perform a search
414///
415/// This is the main API of this crate.
416///
417/// # Example
418///
419/// ```
420/// use grepdef::{Args, Searcher};
421/// let searcher = Searcher::new(Args::new(
422///     String::from("parseQuery"),
423///     None,
424///     None,
425///     true
426/// ))
427/// .unwrap();
428///
429/// for result in searcher.search_and_format().unwrap() {
430///     println!("{}", result);
431/// }
432///
433/// searcher.search_and_format_callback(|line| println!("{}", line));
434/// ```
435pub struct Searcher {
436    config: Config,
437}
438
439impl Searcher {
440    /// Create a new Config using an [Args]
441    pub fn new(args: Args) -> Result<Searcher, String> {
442        let config = Config::new(args)?;
443        Ok(Searcher { config })
444    }
445
446    /// Perform the search and return formatted strings
447    pub fn search_and_format(&self) -> Result<Vec<String>, Box<dyn Error>> {
448        let results = self.search()?;
449        Ok(results
450            .iter()
451            .map(|result| match self.config.format {
452                SearchResultFormat::Grep => result.to_grep(),
453                SearchResultFormat::JsonPerMatch => result.to_json_per_match(),
454            })
455            .collect())
456    }
457
458    /// Perform the search and run a callback for each formatted string
459    pub fn search_and_format_callback<F>(&self, mut callback: F) -> Result<(), Box<dyn Error>>
460    where
461        F: FnMut(String),
462    {
463        self.search_callback(|result| match self.config.format {
464            SearchResultFormat::Grep => callback(result.to_grep()),
465            SearchResultFormat::JsonPerMatch => callback(result.to_json_per_match()),
466        })
467    }
468
469    /// Perform the search and call the callback for each result
470    pub fn search_callback<F>(&self, mut callback: F) -> Result<(), Box<dyn Error>>
471    where
472        F: FnMut(SearchResult),
473    {
474        // Don't try to even calculate elapsed time if we are not going to print it
475        let start: Option<time::Instant> = if self.config.debug {
476            Some(time::Instant::now())
477        } else {
478            None
479        };
480        let re = query_regex::get_regex_for_query(&self.config.query, &self.config.file_type);
481        let file_type_re = file_type::get_regexp_for_file_type(&self.config.file_type);
482        let mut pool = threads::ThreadPool::new(self.config.num_threads, self.config.debug);
483
484        match self.config.color {
485            ColorOption::ALWAYS => colored::control::set_override(true),
486            ColorOption::NEVER => colored::control::set_override(false),
487            ColorOption::AUTO => (),
488        }
489        if self.config.no_color {
490            colored::control::set_override(false);
491        }
492
493        self.debug("Starting searchers");
494        let mut searched_file_count = 0;
495
496        // Create a scope for tx to live in because it is cloned by all files and we need all
497        // senders to go out of scope for the iterator to end.
498        let rx = {
499            let (tx, rx) = mpsc::channel();
500            for file_path in &self.config.file_paths {
501                for entry in Walk::new(file_path) {
502                    let path = match entry {
503                        Ok(path) => path.into_path(),
504                        Err(err) => {
505                            return Err(Box::new(err));
506                        }
507                    };
508                    if path.is_dir() {
509                        continue;
510                    }
511                    let path = match path.to_str() {
512                        Some(p) => p.to_string(),
513                        None => {
514                            return Err(Box::from("Error getting string from path"));
515                        }
516                    };
517                    if !file_type_re.is_match(&path) {
518                        continue;
519                    }
520                    searched_file_count += 1;
521
522                    let re1 = re.clone();
523                    let path1 = path.clone();
524                    let config1 = self.config.clone();
525                    let tx1 = tx.clone();
526                    pool.execute(move || {
527                        search_file(
528                            &re1,
529                            &path1,
530                            &config1,
531                            move |file_results: Vec<SearchResult>| {
532                                // NOTE: it would be nice to have better error handling for if this
533                                // message send fails, but since normal error handling would happen through
534                                // message sending, I don't know what else to do.
535                                let _ = tx1.send(file_results);
536                            },
537                        );
538                    })
539                }
540            }
541            rx
542        };
543
544        self.debug("Listening to searcher results");
545        let mut result_counter: usize = 0;
546        'all_results: for received_results in rx {
547            for received_result in received_results {
548                result_counter += 1;
549                callback(received_result);
550                // Don't try to even calculate elapsed time if we are not going to print it
551                if let (true, Some(start)) = (self.config.debug, start) {
552                    self.debug(
553                        format!("Found a result in {} ms", start.elapsed().as_millis()).as_str(),
554                    );
555                }
556                if let Some(i) = self.config.limit {
557                    self.debug(format!("This is result {}; limit {}", result_counter, i).as_str());
558                    if result_counter >= i {
559                        self.debug("Limit reached");
560                        pool.stop();
561                        break 'all_results;
562                    }
563                }
564            }
565        }
566
567        self.debug("Waiting for searchers to complete");
568        pool.wait_for_all_jobs_and_stop();
569        self.debug("Searchers complete");
570
571        // Don't try to even calculate elapsed time if we are not going to print it
572        if let (true, Some(start)) = (self.config.debug, start) {
573            self.debug(
574                format!(
575                    "Scanned {} files in {} ms",
576                    searched_file_count,
577                    start.elapsed().as_millis()
578                )
579                .as_str(),
580            );
581        }
582        Ok(())
583    }
584
585    /// Perform the search and return [SearchResult] structs
586    pub fn search(&self) -> Result<Vec<SearchResult>, Box<dyn Error>> {
587        let mut results: Vec<SearchResult> = vec![];
588        let search_result = self.search_callback(|result| results.push(result));
589        match search_result {
590            Ok(_) => Ok(results),
591            Err(err) => Err(err),
592        }
593    }
594
595    fn debug(&self, output: &str) {
596        if self.config.debug {
597            println!("{}", output.yellow());
598        }
599    }
600}
601
602fn debug(config: &Config, output: &str) {
603    if config.debug {
604        println!("{}", output.yellow());
605    }
606}
607
608fn search_file<F>(re: &Regex, file_path: &str, config: &Config, callback: F)
609where
610    F: FnOnce(Vec<SearchResult>) + Send + 'static,
611{
612    debug(config, format!("Scanning file {}", file_path).as_str());
613    let file = fs::File::open(file_path);
614
615    match file {
616        Ok(mut file) => {
617            // Scan the file in big chunks to see if it has what we are looking for. This is more efficient
618            // than going line-by-line on every file since matches should be quite rare.
619            if match config.search_method {
620                SearchMethod::PrescanRegex => !file_type::does_file_match_regexp(&file, re),
621                SearchMethod::PrescanMemmem => {
622                    !file_type::does_file_match_query(&file, &config.query)
623                }
624                SearchMethod::NoPrescan => false,
625            } {
626                debug(
627                    config,
628                    format!("Presearch of {} found no match; skipping", &file_path).as_str(),
629                );
630                callback(vec![]);
631                return;
632            }
633
634            let rewind_result = file.rewind();
635            if rewind_result.is_err() {
636                callback(vec![]);
637                return;
638            }
639            debug(
640                config,
641                format!(
642                    "Presearch of {} was successful; searching for line",
643                    &file_path
644                )
645                .as_str(),
646            );
647            callback(search_file_line_by_line(re, file_path, &file, config));
648        }
649        Err(_) => {
650            callback(vec![]);
651        }
652    }
653}
654
655fn search_file_line_by_line(
656    re: &Regex,
657    file_path: &str,
658    file: &fs::File,
659    config: &Config,
660) -> Vec<SearchResult> {
661    let lines = io::BufReader::new(file).lines();
662    let mut line_counter = 0;
663
664    lines
665        .filter_map(|line| {
666            line_counter += 1;
667            if !match &line {
668                Ok(line) => re.is_match(line),
669                Err(_) => false,
670            } {
671                return None;
672            }
673
674            let text = match line {
675                Ok(line) => line,
676                // If reading the line causes an error (eg: invalid UTF), then skip it by treating
677                // it as empty.
678                Err(_err) => String::from(""),
679            };
680
681            Some(SearchResult {
682                file_path: String::from(file_path),
683                line_number: if config.line_number {
684                    Some(line_counter)
685                } else {
686                    None
687                },
688                text: text.trim().into(),
689            })
690        })
691        .collect()
692}