1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
use crate::query::Extractor;
use crate::query::ExtractorChooser;
use crate::query::Language;
use anyhow::{bail, Context, Error, Result};
use clap::{crate_authors, crate_version, Arg, ArgMatches, Command};
use itertools::Itertools;
use std::collections::HashMap;
use std::path::PathBuf;
use std::str::FromStr;
/// Invocation for arguments parser
pub enum Invocation {
/// Configuration for language query
DoQuery(QueryOpts),
ShowLanguages,
}
/// Configuration for language query
#[derive(Debug)]
pub struct QueryOpts {
/// Extractor for extracting syntax information of program
pub extractors: Vec<Extractor>,
/// Directory of query files
pub paths: Vec<PathBuf>,
/// Whether ignore .gitignore file or not
pub git_ignore: bool,
/// Information format of extrated syntax
pub format: QueryFormat,
/// Whether sort extrated information or not
pub sort: bool,
}
impl QueryOpts {
/// Build a filetype matcher using provided extractors
pub fn extractor_chooser(&self) -> Result<ExtractorChooser> {
ExtractorChooser::from_extractors(&self.extractors)
}
}
impl Invocation {
/// Build Invocation from input arguments
/// # Example
///
/// ```no_run
/// # fn main() -> anyhow::Result<()> {
/// use rust_hero::query::Invocation;
///
/// let args=[
/// "rust_hero",
/// "-q",
/// "rust",
/// "(function_item (identifier) @id) @function",
/// "--format=classes",
/// "--sort",
/// "--no-gitignore",
/// "data/error.rs",
/// ]
/// .iter()
/// .map(|s| s.to_string())
/// .collect();
/// let invocation = Invocation::from_args(args)?;
/// # Ok(())
/// # }
/// ```
pub fn from_args(args: Vec<String>) -> Result<Self> {
// I'm not super happy with this! I would love for LANGUAGE and QUERY to
// be taken positionally when there is just one so we don't always have
// to specify `-q`. However, I also want to get working on the rest of
// the program so I'm dropping the requirement for now by making `-q`
// required. I think that's an OK tradeoff until I can figure something
// else better because it'll be backwards compatible with the scheme
// I outlined above.
//
// Check
// https://users.rust-lang.org/t/grep-like-argument-parsing-with-clap/63392
// for where I asked about this in public.
let matches = Command::new("rust_hero")
.version(crate_version!())
.author(crate_authors!())
.arg(
Arg::new("additional-query")
.short('q')
.long("query")
.help("a language and query to perform")
.long_help(
"a language and query to perform (at least one is required.) See https://tree-sitter.github.io for information on writing queries. Run tree-grepper --languages for a list of languages.",
)
.number_of_values(2)
.value_names(&["LANGUAGE", "QUERY"])
// .required_unless_present("languages")
// .multiple_values(true)
.default_values(&["rust", "(function_item (identifier) @id) @function"])
)
.arg(
Arg::new("no-gitignore")
.long("no-gitignore")
.help("don't use git's ignore and exclude files to filter files")
)
.arg(
Arg::new("PATHS")
.required_unless_present("PATHS")
.default_value(".")
.help("places to search for matches")
)
.arg(
Arg::new("FORMAT")
.long("format")
.short('f')
.possible_values(&["classes", "lines", "json", "json-lines", "pretty-json"])
.default_value("classes")
.help("what format should we output lines in?")
)
.arg(
Arg::new("SORT")
.long("sort")
.help("sort matches stably")
.long_help("sort matches stably. If this is not specified, output ordering will vary because due to parallelism. Caution: this adds a worst-case `O(n * log(n))` overhead, where `n` is the number of files matched. Avoid it if possible if you care about performance.")
)
.arg(
Arg::new("LANGUAGE")
.long("language")
.short('l')
.help("print the language names tree-grepper knows about")
)
.try_get_matches_from(args)
.context("could not parse args")?;
if matches.is_present("LANGUAGE") {
Ok(Self::ShowLanguages)
} else {
Ok(Self::DoQuery(QueryOpts {
extractors: Self::extractors(&matches)?,
paths: Self::paths(&matches)?,
git_ignore: !matches.is_present("no-gitignore"),
format: QueryFormat::from_str(
matches.value_of("FORMAT").context("format not provided")?,
)
.context("could not set format")?,
sort: matches.is_present("SORT"),
}))
}
}
fn extractors(matches: &ArgMatches) -> Result<Vec<Extractor>> {
let values = match matches.values_of("additional-query") {
Some(values) => values,
None => bail!("queries were required but not provided. This indicates an internal error and you should report it!"),
};
// the most common case is going to be one query, so let's allocate
// that immediately...
let mut query_strings: HashMap<Language, String> = HashMap::with_capacity(1);
// If you have two tree-sitter queries `(one)` and `(two)`, you can
// join them together in a single string like `(one)(two)`. In that
// case, the resulting query will act like an OR and match any of the
// queries inside. Doing this automatically gives us an advantage:
// for however many queries we get on the command line, we will only
// ever have to run one per file, since we can combine them and you
// can't specify queries across multiple languages! Nobody should ever
// notice, except that they won't see as much of a slowdown for adding
// new queries to an invocation as they might expect. (Well, hopefully!)
for (raw_lang, raw_query) in values.tuples() {
let lang = Language::from_str(raw_lang).context("could not parse language")?;
let mut query_out = String::from(raw_query);
let temp_query = lang
.parse_query(raw_query)
.context("could not parse query")?;
if temp_query.capture_names().is_empty() {
query_out.push_str("@query");
}
if let Some(existing) = query_strings.get_mut(&lang) {
existing.push_str(&query_out);
} else {
query_strings.insert(lang, query_out);
}
}
let mut out = Vec::with_capacity(query_strings.len());
for (lang, raw_query) in query_strings {
let query = lang
.parse_query(&raw_query)
.context("could not parse combined query")?;
out.push(Extractor::new(lang, query))
}
Ok(out)
}
fn paths(matches: &ArgMatches) -> Result<Vec<PathBuf>> {
match matches.values_of("PATHS") {
Some(values) =>
values
.map(|raw_path| PathBuf::from_str(raw_path).with_context(|| format!("could not parse a path from {}", raw_path)))
.collect(),
None => bail!("at least one path was required but not provided. This indicates an internal errors and you should report it!"),
}
}
}
/// Information format of extrated syntax
#[derive(Debug)]
pub enum QueryFormat {
/// Classify extracted information
Classes,
/// Display extracted information in Lines
Lines,
/// Display extracted information in Json
Json,
/// Display extracted information in JsonLines
JsonLines,
/// Display extracted information in PrettyJson
PrettyJson,
}
impl FromStr for QueryFormat {
type Err = Error;
fn from_str(s: &str) -> Result<Self> {
match s {
"classes" => Ok(QueryFormat::Classes),
"lines" => Ok(QueryFormat::Lines),
"json" => Ok(QueryFormat::Json),
"json-lines" => Ok(QueryFormat::JsonLines),
"pretty-json" => Ok(QueryFormat::PrettyJson),
_ => bail!("unknown format. See --help for valid formats."),
}
}
}