grepdef/lib.rs
1#![warn(missing_docs)]
2//! Quick search for symbol definitions in various programming languages
3//!
4//! Currently this supports Rust, JS (or TypeScript), and PHP.
5//!
6//! This can be used like "Go to definition" in an IDE, except that instead of using a language
7//! server, it just searches for the definition using text parsing. This is less accurate but often
8//! faster in projects with lots of files or where a language server won't work or hasn't yet
9//! started.
10//!
11//! grepdef since 3.0 is written in Rust and is designed to be extremely fast.
12//!
13//! This can also be used as a library crate for other Rust programs.
14//!
15//! # Example
16//!
17//! The syntax of the CLI is similar to that of `grep` or `ripgrep`: first put the symbol you want
18//! to search for (eg: a function name, class name, etc.) and then list the file(s) or directories
19//! over which you want to search.
20//!
21//! ```text
22//! $ grepdef parseQuery ./src
23//! ./src/queries.js:function parseQuery {
24//! ```
25//!
26//! Just like `grep`, you can add the `-n` option to include line numbers.
27//!
28//! ```text
29//! $ grepdef -n parseQuery ./src
30//! ./src/queries.js:17:function parseQuery {
31//! ```
32//!
33//! The search will be faster if you specify what type of file you are searching for using the
34//! `--type` option.
35//!
36//! ```text
37//! $ grepdef --type js -n parseQuery ./src
38//! ./src/queries.js:17:function parseQuery {
39//! ```
40//!
41//! To use the crate from other Rust code, use [Searcher].
42//!
43//! ```
44//! use grepdef::{Args, Searcher};
45//!
46//! for result in Searcher::new(Args::from_query("parseQuery")).unwrap().search().unwrap() {
47//! println!("{}", result.to_grep());
48//! }
49//! ```
50
51use clap::Parser;
52use colored::Colorize;
53use ignore::Walk;
54use regex::Regex;
55use serde::Serialize;
56use std::error::Error;
57use std::fs;
58use std::io::{self, BufRead, Seek};
59use std::num::NonZero;
60use std::sync::mpsc;
61use std::time;
62use strum_macros::Display;
63use strum_macros::EnumString;
64
65mod file_type;
66mod query_regex;
67mod threads;
68
69/// The command-line arguments to be used by [Searcher]
70///
71/// Can be passed to [Searcher::new].
72///
73/// The only required property is [Args::query].
74///
75/// # Example
76///
77/// ```
78/// use grepdef::Args;
79/// let config = Args::from_query("parseQuery");
80/// assert_eq!(config.query, String::from("parseQuery"));
81/// assert_eq!(config.file_path, None); // The current directory
82/// assert_eq!(config.file_type, None); // Auto-detect the file type
83/// assert_eq!(config.line_number, false); // Do not print line numbers
84/// ```
85#[derive(Parser, Debug, Default)]
86#[command(
87 version,
88 arg_required_else_help = true,
89 about = "Quick search for symbol definitions in various programming languages",
90 long_about = "Quick search for symbol definitions in various programming languages"
91)]
92pub struct Args {
93 /// (Required) The symbol name (function, class, etc.) to search for
94 pub query: String,
95
96 /// The file path(s) to search; recursively searches directories and respects .gitignore
97 pub file_path: Option<Vec<String>>,
98
99 /// The file type to search (js, php, rs); will guess if not set but this is slower
100 #[arg(short = 't', long = "type")]
101 pub file_type: Option<String>,
102
103 /// Show line numbers of matches if set
104 #[arg(short = 'n', long = "line-number")]
105 pub line_number: bool,
106
107 /// Control color output ("never", "always", "auto"); default "auto"
108 #[arg(long = "color")]
109 pub color: Option<String>,
110
111 /// Disable color (also supports NO_COLOR env)
112 #[arg(long = "no-color")]
113 pub no_color: bool,
114
115 /// Limit the number of results
116 #[arg(short = 'l', long = "limit")]
117 pub limit: Option<usize>,
118
119 /// (Advanced) Print debugging information
120 #[arg(long = "debug")]
121 pub debug: bool,
122
123 /// (Advanced) The searching method
124 #[arg(long = "search-method")]
125 pub search_method: Option<SearchMethod>,
126
127 /// (Advanced) The number of threads to use
128 #[arg(short = 'j', long = "threads")]
129 pub threads: Option<NonZero<usize>>,
130
131 /// The output format; defaults to 'grep'
132 #[arg(long = "format")]
133 pub format: Option<SearchResultFormat>,
134}
135
136impl Args {
137 /// Create a new set of arguments for [Searcher] with the minimal configuration
138 pub fn from_query(query: &str) -> Args {
139 Args {
140 query: query.into(),
141 ..Args::default()
142 }
143 }
144
145 /// Create a new set of arguments for [Searcher]
146 pub fn new(
147 query: String,
148 file_type: Option<String>,
149 file_path: Option<Vec<String>>,
150 line_number: bool,
151 ) -> Args {
152 Args {
153 query,
154 file_type,
155 file_path,
156 line_number,
157 ..Args::default()
158 }
159 }
160}
161
162/// (Advanced) The type of underlying search algorithm to use
163///
164/// In general, a pre-scan is a good idea to quickly skip files that don't have a match, which
165/// should be most files. You shouldn't need to change this from the default.
166#[derive(clap::ValueEnum, Clone, Default, Debug, EnumString, PartialEq, Display)]
167pub enum SearchMethod {
168 /// Pre-scan each file by reading fully into memory and using a Regex
169 #[default]
170 PrescanRegex,
171
172 /// Pre-scan each file by reading bytes until the query is found using memmem
173 PrescanMemmem,
174
175 /// Don't pre-scan files.
176 NoPrescan,
177}
178
179/// The configuration used by a [Searcher]
180///
181/// Created by passing [Args] to [Config::new].
182#[derive(Clone, Debug)]
183struct Config {
184 /// The symbol name (function, class, etc.) being searched for
185 query: String,
186
187 /// The list of file paths to search, ignoring invisible or gitignored files
188 file_paths: Vec<String>,
189
190 /// The type of files to scan (JS or PHP or RS)
191 file_type: FileType,
192
193 /// Include line numbers in results if true
194 line_number: bool,
195
196 /// Output debugging info during search if true
197 debug: bool,
198
199 /// Limit the number of results
200 limit: Option<usize>,
201
202 /// Explicitly disable color output if true
203 no_color: bool,
204
205 /// Explicitly control color output ("never", "always", "auto")
206 color: ColorOption,
207
208 /// The [SearchMethod] to use
209 search_method: SearchMethod,
210
211 /// The number of threads to use for searching files
212 num_threads: NonZero<usize>,
213
214 /// The output format
215 format: SearchResultFormat,
216}
217
218impl Config {
219 /// Create a new Config using an [Args]
220 pub fn new(args: Args) -> Result<Config, String> {
221 if args.debug {
222 let args_formatted = format!("Creating config with args {:?}", args);
223 println!("{}", args_formatted.yellow());
224 }
225 let file_paths = match args.file_path {
226 Some(file_path) => file_path,
227 None => vec![".".into()],
228 };
229 let file_type = match args.file_type {
230 Some(file_type_string) => FileType::from_string(file_type_string.as_str())?,
231 None => FileType::from_file_paths(&file_paths)?,
232 };
233 let color = match args.color {
234 Some(color_option_string) => ColorOption::from_string(color_option_string.as_str())?,
235 None => ColorOption::AUTO,
236 };
237
238 let num_threads = match args.threads {
239 Some(threads) => threads,
240 None => NonZero::new(5).expect("Default number of threads was invalid"),
241 };
242
243 let config = Config {
244 query: args.query,
245 file_paths,
246 file_type,
247 line_number: args.line_number,
248 debug: args.debug,
249 no_color: args.no_color,
250 color,
251 search_method: args.search_method.unwrap_or_default(),
252 limit: args.limit,
253 num_threads,
254 format: args.format.unwrap_or_default(),
255 };
256 debug(&config, format!("Created config {:?}", config).as_str());
257 Ok(config)
258 }
259}
260
261/// The supported file types to search
262///
263/// You can turn a string into a [FileType] using [FileType::from_string] which also supports
264/// type aliases like `javascript`, `javascriptreact`, or `typescript.tsx`.
265#[derive(Clone, Debug)]
266pub enum FileType {
267 /// The JS (or TS) file type
268 JS,
269
270 /// The PHP file type
271 PHP,
272
273 /// The Rust file type
274 RS,
275}
276
277impl FileType {
278 /// Turn a string into a [FileType]
279 ///
280 /// You can turn a string into a [FileType] using [FileType::from_string] which also supports
281 /// type aliases like `javascript`, `javascriptreact`, or `typescript.tsx`.
282 pub fn from_string(file_type_string: &str) -> Result<FileType, String> {
283 match file_type_string {
284 "js" => Ok(FileType::JS),
285 "ts" => Ok(FileType::JS),
286 "jsx" => Ok(FileType::JS),
287 "tsx" => Ok(FileType::JS),
288 "javascript" => Ok(FileType::JS),
289 "javascript.jsx" => Ok(FileType::JS),
290 "javascriptreact" => Ok(FileType::JS),
291 "typescript" => Ok(FileType::JS),
292 "typescript.tsx" => Ok(FileType::JS),
293 "typescriptreact" => Ok(FileType::JS),
294 "php" => Ok(FileType::PHP),
295 "rs" => Ok(FileType::RS),
296 "rust" => Ok(FileType::RS),
297 _ => Err(format!("Invalid file type '{}'", file_type_string)),
298 }
299 }
300
301 /// Get the textual representation of a [FileType]
302 pub fn to_string(&self) -> String {
303 match self {
304 Self::JS => String::from("js"),
305 Self::PHP => String::from("php"),
306 Self::RS => String::from("rs"),
307 }
308 }
309
310 /// Try to guess a [FileType] based on a list of file paths
311 ///
312 /// This can examine files or recursive directories and try to determine the [FileType] to
313 /// search for. It will return the file type of the first file it finds that matches one of the
314 /// file type patterns the crate supports.
315 ///
316 /// If a directory includes multiple supported file types, this could be incorrect, so it's
317 /// more reliable (and faster) to specify a file type explicitly.
318 pub fn from_file_paths(file_paths: &Vec<String>) -> Result<FileType, &'static str> {
319 for file_path in file_paths {
320 let guess = file_type::guess_file_type_from_file_path(file_path);
321 if let Some(value) = guess {
322 return Ok(value);
323 }
324 }
325 Err("Unable to guess file type from file paths")
326 }
327}
328
329/// The supported arguments to the color option
330///
331/// You can turn a string into a [ColorOption] using [ColorOption::from_string].
332#[derive(Clone, Debug)]
333pub enum ColorOption {
334 /// Always colorize
335 ALWAYS,
336
337 /// Never colorize
338 NEVER,
339
340 /// Auto-detect colorize
341 AUTO,
342}
343
344impl ColorOption {
345 /// Convert string to ColorOption
346 pub fn from_string(color_option_string: &str) -> Result<ColorOption, String> {
347 match color_option_string {
348 "always" => Ok(ColorOption::ALWAYS),
349 "never" => Ok(ColorOption::NEVER),
350 "auto" => Ok(ColorOption::AUTO),
351 _ => Err(format!("Invalid color option '{}'", color_option_string)),
352 }
353 }
354}
355
356/// The output format of [SearchResult::to_string]
357#[derive(clap::ValueEnum, Clone, Default, Debug, EnumString, PartialEq, Display, Copy)]
358pub enum SearchResultFormat {
359 /// grep-like output; colon-separated path, line number, and text
360 #[default]
361 Grep,
362
363 /// JSON output; one document per match
364 JsonPerMatch,
365}
366
367/// A result from calling [Searcher::search] or [Searcher::search_and_format]
368///
369/// Note that `line_number` will be set only if [Args::line_number] is true when searching.
370#[derive(Debug, PartialEq, Clone, Serialize)]
371pub struct SearchResult {
372 /// The path to the file containing the symbol definition
373 pub file_path: String,
374
375 /// The line number of the symbol definition in the file
376 pub line_number: Option<usize>,
377
378 /// The symbol definition line
379 pub text: String,
380}
381
382impl SearchResult {
383 /// Return a formatted string for output in the "grep" format
384 ///
385 /// That is, either `file path:text on line` or, if [Args::line_number] is true,
386 /// `file path:line number:text on line`.
387 ///
388 /// # Example
389 ///
390 /// If [Args::line_number] is true,
391 ///
392 /// ```text
393 /// ./src/queries.js:17:function parseQuery {
394 /// ```
395 pub fn to_grep(&self) -> String {
396 match self.line_number {
397 Some(line_number) => format!(
398 "{}:{}:{}",
399 self.file_path.magenta(),
400 line_number.to_string().green(),
401 self.text
402 ),
403 None => format!("{}:{}", self.file_path.magenta(), self.text),
404 }
405 }
406
407 /// Return a formatted string for output in the "JSON_PER_MATCH" format
408 pub fn to_json_per_match(&self) -> String {
409 serde_json::to_string(self).unwrap_or_default()
410 }
411}
412
413/// A struct that can perform a search
414///
415/// This is the main API of this crate.
416///
417/// # Example
418///
419/// ```
420/// use grepdef::{Args, Searcher};
421/// let searcher = Searcher::new(Args::new(
422/// String::from("parseQuery"),
423/// None,
424/// None,
425/// true
426/// ))
427/// .unwrap();
428///
429/// for result in searcher.search_and_format().unwrap() {
430/// println!("{}", result);
431/// }
432///
433/// searcher.search_and_format_callback(|line| println!("{}", line));
434/// ```
435pub struct Searcher {
436 config: Config,
437}
438
439impl Searcher {
440 /// Create a new Config using an [Args]
441 pub fn new(args: Args) -> Result<Searcher, String> {
442 let config = Config::new(args)?;
443 Ok(Searcher { config })
444 }
445
446 /// Perform the search and return formatted strings
447 pub fn search_and_format(&self) -> Result<Vec<String>, Box<dyn Error>> {
448 let results = self.search()?;
449 Ok(results
450 .iter()
451 .map(|result| match self.config.format {
452 SearchResultFormat::Grep => result.to_grep(),
453 SearchResultFormat::JsonPerMatch => result.to_json_per_match(),
454 })
455 .collect())
456 }
457
458 /// Perform the search and run a callback for each formatted string
459 pub fn search_and_format_callback<F>(&self, mut callback: F) -> Result<(), Box<dyn Error>>
460 where
461 F: FnMut(String),
462 {
463 self.search_callback(|result| match self.config.format {
464 SearchResultFormat::Grep => callback(result.to_grep()),
465 SearchResultFormat::JsonPerMatch => callback(result.to_json_per_match()),
466 })
467 }
468
469 /// Perform the search and call the callback for each result
470 pub fn search_callback<F>(&self, mut callback: F) -> Result<(), Box<dyn Error>>
471 where
472 F: FnMut(SearchResult),
473 {
474 // Don't try to even calculate elapsed time if we are not going to print it
475 let start: Option<time::Instant> = if self.config.debug {
476 Some(time::Instant::now())
477 } else {
478 None
479 };
480 let re = query_regex::get_regex_for_query(&self.config.query, &self.config.file_type);
481 let file_type_re = file_type::get_regexp_for_file_type(&self.config.file_type);
482 let mut pool = threads::ThreadPool::new(self.config.num_threads, self.config.debug);
483
484 match self.config.color {
485 ColorOption::ALWAYS => colored::control::set_override(true),
486 ColorOption::NEVER => colored::control::set_override(false),
487 ColorOption::AUTO => (),
488 }
489 if self.config.no_color {
490 colored::control::set_override(false);
491 }
492
493 self.debug("Starting searchers");
494 let mut searched_file_count = 0;
495
496 // Create a scope for tx to live in because it is cloned by all files and we need all
497 // senders to go out of scope for the iterator to end.
498 let rx = {
499 let (tx, rx) = mpsc::channel();
500 for file_path in &self.config.file_paths {
501 for entry in Walk::new(file_path) {
502 let path = match entry {
503 Ok(path) => path.into_path(),
504 Err(err) => {
505 return Err(Box::new(err));
506 }
507 };
508 if path.is_dir() {
509 continue;
510 }
511 let path = match path.to_str() {
512 Some(p) => p.to_string(),
513 None => {
514 return Err(Box::from("Error getting string from path"));
515 }
516 };
517 if !file_type_re.is_match(&path) {
518 continue;
519 }
520 searched_file_count += 1;
521
522 let re1 = re.clone();
523 let path1 = path.clone();
524 let config1 = self.config.clone();
525 let tx1 = tx.clone();
526 pool.execute(move || {
527 search_file(
528 &re1,
529 &path1,
530 &config1,
531 move |file_results: Vec<SearchResult>| {
532 // NOTE: it would be nice to have better error handling for if this
533 // message send fails, but since normal error handling would happen through
534 // message sending, I don't know what else to do.
535 let _ = tx1.send(file_results);
536 },
537 );
538 })
539 }
540 }
541 rx
542 };
543
544 self.debug("Listening to searcher results");
545 let mut result_counter: usize = 0;
546 'all_results: for received_results in rx {
547 for received_result in received_results {
548 result_counter += 1;
549 callback(received_result);
550 // Don't try to even calculate elapsed time if we are not going to print it
551 if let (true, Some(start)) = (self.config.debug, start) {
552 self.debug(
553 format!("Found a result in {} ms", start.elapsed().as_millis()).as_str(),
554 );
555 }
556 if let Some(i) = self.config.limit {
557 self.debug(format!("This is result {}; limit {}", result_counter, i).as_str());
558 if result_counter >= i {
559 self.debug("Limit reached");
560 pool.stop();
561 break 'all_results;
562 }
563 }
564 }
565 }
566
567 self.debug("Waiting for searchers to complete");
568 pool.wait_for_all_jobs_and_stop();
569 self.debug("Searchers complete");
570
571 // Don't try to even calculate elapsed time if we are not going to print it
572 if let (true, Some(start)) = (self.config.debug, start) {
573 self.debug(
574 format!(
575 "Scanned {} files in {} ms",
576 searched_file_count,
577 start.elapsed().as_millis()
578 )
579 .as_str(),
580 );
581 }
582 Ok(())
583 }
584
585 /// Perform the search and return [SearchResult] structs
586 pub fn search(&self) -> Result<Vec<SearchResult>, Box<dyn Error>> {
587 let mut results: Vec<SearchResult> = vec![];
588 let search_result = self.search_callback(|result| results.push(result));
589 match search_result {
590 Ok(_) => Ok(results),
591 Err(err) => Err(err),
592 }
593 }
594
595 fn debug(&self, output: &str) {
596 if self.config.debug {
597 println!("{}", output.yellow());
598 }
599 }
600}
601
602fn debug(config: &Config, output: &str) {
603 if config.debug {
604 println!("{}", output.yellow());
605 }
606}
607
608fn search_file<F>(re: &Regex, file_path: &str, config: &Config, callback: F)
609where
610 F: FnOnce(Vec<SearchResult>) + Send + 'static,
611{
612 debug(config, format!("Scanning file {}", file_path).as_str());
613 let file = fs::File::open(file_path);
614
615 match file {
616 Ok(mut file) => {
617 // Scan the file in big chunks to see if it has what we are looking for. This is more efficient
618 // than going line-by-line on every file since matches should be quite rare.
619 if match config.search_method {
620 SearchMethod::PrescanRegex => !file_type::does_file_match_regexp(&file, re),
621 SearchMethod::PrescanMemmem => {
622 !file_type::does_file_match_query(&file, &config.query)
623 }
624 SearchMethod::NoPrescan => false,
625 } {
626 debug(
627 config,
628 format!("Presearch of {} found no match; skipping", &file_path).as_str(),
629 );
630 callback(vec![]);
631 return;
632 }
633
634 let rewind_result = file.rewind();
635 if rewind_result.is_err() {
636 callback(vec![]);
637 return;
638 }
639 debug(
640 config,
641 format!(
642 "Presearch of {} was successful; searching for line",
643 &file_path
644 )
645 .as_str(),
646 );
647 callback(search_file_line_by_line(re, file_path, &file, config));
648 }
649 Err(_) => {
650 callback(vec![]);
651 }
652 }
653}
654
655fn search_file_line_by_line(
656 re: &Regex,
657 file_path: &str,
658 file: &fs::File,
659 config: &Config,
660) -> Vec<SearchResult> {
661 let lines = io::BufReader::new(file).lines();
662 let mut line_counter = 0;
663
664 lines
665 .filter_map(|line| {
666 line_counter += 1;
667 if !match &line {
668 Ok(line) => re.is_match(line),
669 Err(_) => false,
670 } {
671 return None;
672 }
673
674 let text = match line {
675 Ok(line) => line,
676 // If reading the line causes an error (eg: invalid UTF), then skip it by treating
677 // it as empty.
678 Err(_err) => String::from(""),
679 };
680
681 Some(SearchResult {
682 file_path: String::from(file_path),
683 line_number: if config.line_number {
684 Some(line_counter)
685 } else {
686 None
687 },
688 text: text.trim().into(),
689 })
690 })
691 .collect()
692}