asrch 0.1.1

Agent-safe bounded code search CLI
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
use std::env;
use std::path::PathBuf;

use clap::{Args as ClapArgs, CommandFactory, Parser, Subcommand};

pub(crate) const MAX_LINE_BYTES: usize = 800;

const MAX_CONTEXT: usize = 5;
const DEFAULT_SAMPLE_CLUSTERS: usize = 3;
const MAX_SAMPLE_CLUSTERS: usize = 5;
const MAX_SURVEY_TERMS: usize = 12;
const MAX_SURVEY_PATHS: usize = 8;

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum Kind {
    Survey,
    Scout,
    Sample,
    Show,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum SearchMode {
    Fixed,
    Identifier,
    Word,
    Regex,
}

impl SearchMode {
    pub(crate) fn label(self) -> &'static str {
        match self {
            Self::Fixed => "fixed",
            Self::Identifier => "identifier",
            Self::Word => "word",
            Self::Regex => "regex",
        }
    }
}

#[derive(Debug)]
pub(crate) struct Args {
    pub(crate) kind: Kind,
    pub(crate) terms: Vec<String>,
    pub(crate) paths: Vec<PathBuf>,
    pub(crate) context: usize,
    pub(crate) clusters: usize,
    pub(crate) page: usize,
    pub(crate) line: Option<usize>,
    pub(crate) mode: SearchMode,
}

#[derive(Debug, Parser)]
#[command(
    name = "asrch",
    version,
    about = "Search source trees and print structured summaries or snippets.",
    after_help = "Requires ripgrep (`rg`) on PATH. Match counts are counts of matching lines.\nSearches respect ripgrep ignore rules and exclude common generated, dependency, log, JSONL, XML, and scratch paths. These exclusions cannot be disabled by a CLI option.\nEach output line is clipped to 800 bytes.",
    disable_help_subcommand = true
)]
struct Cli {
    #[command(subcommand)]
    command: CliCommand,
}

#[derive(Debug, Subcommand)]
enum CliCommand {
    #[command(
        about = "Compare literal terms across one or more paths.",
        after_help = "Reports overall counts and per-path counts. Per-path rows with zero matches are omitted.\nSearch mode options are mutually exclusive. Regular expressions are not accepted."
    )]
    Survey(SurveyCli),
    #[command(
        about = "Summarize the distribution of one query.",
        after_help = "Prints the top 5 directories and top 5 files by matching-line count.\nSearch mode options are mutually exclusive. An unescaped OR (`|`) is rejected with --regex."
    )]
    Scout(ScoutCli),
    #[command(
        about = "Page through nearby match clusters in one file.",
        after_help = "Matches within 2 lines are grouped into one cluster. Each cluster reports its line range, hit count, first match, and last match.\nOne line of context is shown around the first and last match. Use show --line to inspect the middle of a long cluster.\nIf the output reports that more matches exist, narrow the query before relying on later pages.\nSearch mode options are mutually exclusive. An unescaped OR (`|`) is rejected with --regex."
    )]
    Sample(SampleCli),
    #[command(
        about = "Show matching snippets or context around one line.",
        after_help = "Without --line, more than 20 matching lines or an internal scan limit cause the command to fail without printing snippets.\nWith --line, only the specified line and its context are printed; that line does not need to contain the query.\nSearch mode options are mutually exclusive. An unescaped OR (`|`) is rejected with --regex."
    )]
    Show(ShowCli),
}

#[derive(Debug, ClapArgs)]
struct SurveyCli {
    /// Add a literal term. Required. Repeatable up to 12 times.
    #[arg(
        short = 't',
        long = "term",
        value_name = "text",
        required = true,
        action = clap::ArgAction::Append
    )]
    terms: Vec<String>,

    /// Search paths. Maximum: 8.
    #[arg(value_name = "path", default_value = ".")]
    paths: Vec<PathBuf>,

    #[command(flatten)]
    mode: LiteralModeCli,
}

#[derive(Debug, ClapArgs)]
struct ScoutCli {
    /// Query text. Fixed-string matching is the default.
    #[arg(value_name = "query")]
    query: String,

    /// Search path.
    #[arg(value_name = "path", default_value = ".")]
    path: PathBuf,

    #[command(flatten)]
    mode: SearchModeCli,
}

#[derive(Debug, ClapArgs)]
struct SampleCli {
    /// Query text. Fixed-string matching is the default.
    #[arg(value_name = "query")]
    query: String,

    /// One explicit file. Directories are not accepted.
    #[arg(value_name = "file")]
    file: PathBuf,

    /// Clusters per page. Range: 1..=5.
    #[arg(
        long,
        value_name = "N",
        default_value_t = DEFAULT_SAMPLE_CLUSTERS,
        value_parser = parse_clusters,
        allow_hyphen_values = true
    )]
    clusters: usize,

    /// Page number. Minimum: 1.
    #[arg(
        long,
        value_name = "N",
        default_value_t = 1,
        value_parser = parse_positive,
        allow_hyphen_values = true
    )]
    page: usize,

    #[arg(long, hide = true)]
    context: Option<usize>,

    #[command(flatten)]
    mode: SearchModeCli,
}

#[derive(Debug, ClapArgs)]
struct ShowCli {
    /// Query text. Fixed-string matching is the default.
    #[arg(value_name = "query")]
    query: String,

    /// One explicit file. Directories are not accepted.
    #[arg(value_name = "file")]
    file: PathBuf,

    /// Show only this line and its context. Minimum: 1.
    #[arg(
        long,
        value_name = "N",
        value_parser = parse_positive,
        allow_hyphen_values = true
    )]
    line: Option<usize>,

    /// Lines before and after each selected line. Range: 0..=5.
    #[arg(
        long,
        value_name = "N",
        default_value_t = 2,
        value_parser = parse_context,
        allow_hyphen_values = true
    )]
    context: usize,

    #[command(flatten)]
    mode: SearchModeCli,
}

#[derive(Clone, Copy, Debug, Default, ClapArgs)]
#[group(required = false, multiple = false)]
struct LiteralModeCli {
    /// Match ASCII identifier boundaries.
    #[arg(long)]
    identifier: bool,

    /// Match word boundaries.
    #[arg(long)]
    word: bool,
}

#[derive(Clone, Copy, Debug, Default, ClapArgs)]
#[group(required = false, multiple = false)]
struct SearchModeCli {
    /// Match ASCII identifier boundaries.
    #[arg(long)]
    identifier: bool,

    /// Match word boundaries.
    #[arg(long)]
    word: bool,

    /// Interpret the query as a regular expression.
    #[arg(long)]
    regex: bool,
}

pub(crate) fn parse() -> Args {
    let values: Vec<_> = env::args().skip(1).collect();
    reject_unknown_command_or_option(&values);

    match Args::try_from(Cli::parse()) {
        Ok(args) => args,
        Err(message) => exit_error(&message, 2),
    }
}

impl TryFrom<Cli> for Args {
    type Error = String;

    fn try_from(cli: Cli) -> Result<Self, Self::Error> {
        let args = match cli.command {
            CliCommand::Survey(command) => {
                let mut violations = Vec::new();
                if command.terms.len() > MAX_SURVEY_TERMS {
                    violations.push(format!(
                        "survey accepts at most {MAX_SURVEY_TERMS} terms; split the comparison"
                    ));
                }
                if command.paths.len() > MAX_SURVEY_PATHS {
                    violations.push(format!(
                        "survey accepts at most {MAX_SURVEY_PATHS} paths; split the comparison"
                    ));
                }
                if !violations.is_empty() {
                    return Err(violations.join("\n"));
                }
                Self {
                    kind: Kind::Survey,
                    terms: command.terms,
                    paths: command.paths,
                    context: 2,
                    clusters: DEFAULT_SAMPLE_CLUSTERS,
                    page: 1,
                    line: None,
                    mode: command.mode.into(),
                }
            }
            CliCommand::Scout(command) => Self {
                kind: Kind::Scout,
                terms: vec![command.query],
                paths: vec![command.path],
                context: 2,
                clusters: DEFAULT_SAMPLE_CLUSTERS,
                page: 1,
                line: None,
                mode: command.mode.into(),
            },
            CliCommand::Sample(command) => {
                if command.context.is_some() {
                    return Err("sample does not accept --context; it already shows fixed one-line context. Use `asrch show <query> <file> --line N --context M` after choosing a cluster line.".to_string());
                }
                Self {
                    kind: Kind::Sample,
                    terms: vec![command.query],
                    paths: vec![command.file],
                    context: 2,
                    clusters: command.clusters,
                    page: command.page,
                    line: None,
                    mode: command.mode.into(),
                }
            }
            CliCommand::Show(command) => Self {
                kind: Kind::Show,
                terms: vec![command.query],
                paths: vec![command.file],
                context: command.context,
                clusters: DEFAULT_SAMPLE_CLUSTERS,
                page: 1,
                line: command.line,
                mode: command.mode.into(),
            },
        };

        if args.terms.iter().any(|term| term.is_empty()) {
            return Err("queries and survey terms must not be empty".to_string());
        }
        if args.mode == SearchMode::Regex && has_unescaped_pipe(&args.terms[0]) {
            return Err(
                "OR regexes are not accepted by single-query commands; use `asrch survey --term ...`"
                    .to_string(),
            );
        }
        Ok(args)
    }
}

impl From<LiteralModeCli> for SearchMode {
    fn from(mode: LiteralModeCli) -> Self {
        if mode.identifier {
            Self::Identifier
        } else if mode.word {
            Self::Word
        } else {
            Self::Fixed
        }
    }
}

impl From<SearchModeCli> for SearchMode {
    fn from(mode: SearchModeCli) -> Self {
        if mode.identifier {
            Self::Identifier
        } else if mode.word {
            Self::Word
        } else if mode.regex {
            Self::Regex
        } else {
            Self::Fixed
        }
    }
}

fn has_unescaped_pipe(value: &str) -> bool {
    let mut escaped = false;
    for ch in value.chars() {
        if ch == '|' && !escaped {
            return true;
        }
        escaped = ch == '\\' && !escaped;
        if ch != '\\' {
            escaped = false;
        }
    }
    false
}

pub(crate) fn exit_error(message: &str, code: i32) -> ! {
    print_error(message);
    std::process::exit(code);
}

fn print_error(message: &str) {
    let mut end = message.len().min(MAX_LINE_BYTES);
    while end > 0 && !message.is_char_boundary(end) {
        end -= 1;
    }
    eprintln!("{}", &message[..end]);
}

fn parse_positive(value: &str) -> Result<usize, String> {
    let value = value
        .parse::<usize>()
        .map_err(|_| "requires a non-negative integer".to_string())?;
    if value == 0 {
        Err("must be at least 1".to_string())
    } else {
        Ok(value)
    }
}

fn parse_clusters(value: &str) -> Result<usize, String> {
    let value = parse_positive(value)?;
    if value > MAX_SAMPLE_CLUSTERS {
        Err(format!("must be in the range 1..={MAX_SAMPLE_CLUSTERS}"))
    } else {
        Ok(value)
    }
}

fn parse_context(value: &str) -> Result<usize, String> {
    let value = value
        .parse::<usize>()
        .map_err(|_| "requires a non-negative integer".to_string())?;
    if value > MAX_CONTEXT {
        Err(format!("must be in the range 0..={MAX_CONTEXT}"))
    } else {
        Ok(value)
    }
}

fn render_top_help() -> String {
    let mut command = Cli::command();
    command.render_long_help().to_string()
}

fn render_command_help(command_name: &str) -> String {
    let command = Cli::command();
    let mut subcommand = command
        .find_subcommand(command_name)
        .expect("known subcommand")
        .clone()
        .bin_name(format!("asrch {command_name}"));
    subcommand.render_long_help().to_string()
}

fn reject_unknown_command_or_option(values: &[String]) {
    let Some(command) = values.first() else {
        return;
    };
    if matches!(command.as_str(), "-h" | "--help" | "-V" | "--version") {
        return;
    }
    let valid_command = matches!(command.as_str(), "survey" | "scout" | "sample" | "show");
    if command.starts_with('-') {
        exit_error_with_top_help(&format!("unknown option: {command}"));
    }
    if !valid_command {
        exit_error_with_top_help(&format!("unknown command: {command}"));
    }
    if let Some(option) = find_unknown_option(command, &values[1..]) {
        exit_error_with_command_help(command, &format!("unknown option: {option}"));
    }
}

fn find_unknown_option<'a>(command: &str, values: &'a [String]) -> Option<&'a str> {
    let root = Cli::command();
    let command = root.find_subcommand(command).expect("known subcommand");
    let mut expect_value = false;
    for value in values {
        if expect_value {
            expect_value = false;
            continue;
        }
        if matches!(value.as_str(), "-h" | "--help") {
            continue;
        }
        if value == "--" {
            break;
        }
        if let Some(long) = value.strip_prefix("--") {
            let (name, inline_value) = long
                .split_once('=')
                .map_or((long, false), |(name, _)| (name, true));
            let Some(argument) = command
                .get_arguments()
                .find(|argument| argument.get_long() == Some(name))
            else {
                return Some(value);
            };
            expect_value = argument.get_action().takes_values() && !inline_value;
        } else if let Some(short) = value.strip_prefix('-')
            && !short.is_empty()
        {
            let name = short.chars().next().expect("non-empty short option");
            let Some(argument) = command
                .get_arguments()
                .find(|argument| argument.get_short() == Some(name))
            else {
                return Some(value);
            };
            expect_value = argument.get_action().takes_values() && short.chars().count() == 1;
        }
    }
    None
}

fn exit_error_with_top_help(message: &str) -> ! {
    print_error(message);
    eprintln!();
    eprint!("{}", render_top_help());
    std::process::exit(2);
}

fn exit_error_with_command_help(command_name: &str, message: &str) -> ! {
    print_error(message);
    eprintln!();
    eprint!("{}", render_command_help(command_name));
    std::process::exit(2);
}