use std::env;
use std::path::PathBuf;
use clap::{Args as ClapArgs, CommandFactory, Parser, Subcommand};
pub(crate) const MAX_LINE_BYTES: usize = 800;
const MAX_CONTEXT: usize = 5;
const DEFAULT_SAMPLE_CLUSTERS: usize = 3;
const MAX_SAMPLE_CLUSTERS: usize = 5;
const MAX_SURVEY_TERMS: usize = 12;
const MAX_SURVEY_PATHS: usize = 8;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum Kind {
Survey,
Scout,
Sample,
Show,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum SearchMode {
Fixed,
Identifier,
Word,
Regex,
}
impl SearchMode {
pub(crate) fn label(self) -> &'static str {
match self {
Self::Fixed => "fixed",
Self::Identifier => "identifier",
Self::Word => "word",
Self::Regex => "regex",
}
}
}
#[derive(Debug)]
pub(crate) struct Args {
pub(crate) kind: Kind,
pub(crate) terms: Vec<String>,
pub(crate) paths: Vec<PathBuf>,
pub(crate) context: usize,
pub(crate) clusters: usize,
pub(crate) page: usize,
pub(crate) line: Option<usize>,
pub(crate) mode: SearchMode,
}
#[derive(Debug, Parser)]
#[command(
name = "asrch",
version,
about = "Search source trees and print structured summaries or snippets.",
after_help = "Requires ripgrep (`rg`) on PATH. Match counts are counts of matching lines.\nSearches respect ripgrep ignore rules and exclude common generated, dependency, log, JSONL, XML, and scratch paths. These exclusions cannot be disabled by a CLI option.\nEach output line is clipped to 800 bytes.",
disable_help_subcommand = true
)]
struct Cli {
#[command(subcommand)]
command: CliCommand,
}
#[derive(Debug, Subcommand)]
enum CliCommand {
#[command(
about = "Compare literal terms across one or more paths.",
after_help = "Reports overall counts and per-path counts. Per-path rows with zero matches are omitted.\nSearch mode options are mutually exclusive. Regular expressions are not accepted."
)]
Survey(SurveyCli),
#[command(
about = "Summarize the distribution of one query.",
after_help = "Prints the top 5 directories and top 5 files by matching-line count.\nSearch mode options are mutually exclusive. An unescaped OR (`|`) is rejected with --regex."
)]
Scout(ScoutCli),
#[command(
about = "Page through nearby match clusters in one file.",
after_help = "Matches within 2 lines are grouped into one cluster. Each cluster reports its line range, hit count, first match, and last match.\nOne line of context is shown around the first and last match. Use show --line to inspect the middle of a long cluster.\nIf the output reports that more matches exist, narrow the query before relying on later pages.\nSearch mode options are mutually exclusive. An unescaped OR (`|`) is rejected with --regex."
)]
Sample(SampleCli),
#[command(
about = "Show matching snippets or context around one line.",
after_help = "Without --line, more than 20 matching lines or an internal scan limit cause the command to fail without printing snippets.\nWith --line, only the specified line and its context are printed; that line does not need to contain the query.\nSearch mode options are mutually exclusive. An unescaped OR (`|`) is rejected with --regex."
)]
Show(ShowCli),
}
#[derive(Debug, ClapArgs)]
struct SurveyCli {
#[arg(
short = 't',
long = "term",
value_name = "text",
required = true,
action = clap::ArgAction::Append
)]
terms: Vec<String>,
#[arg(value_name = "path", default_value = ".")]
paths: Vec<PathBuf>,
#[command(flatten)]
mode: LiteralModeCli,
}
#[derive(Debug, ClapArgs)]
struct ScoutCli {
#[arg(value_name = "query")]
query: String,
#[arg(value_name = "path", default_value = ".")]
path: PathBuf,
#[command(flatten)]
mode: SearchModeCli,
}
#[derive(Debug, ClapArgs)]
struct SampleCli {
#[arg(value_name = "query")]
query: String,
#[arg(value_name = "file")]
file: PathBuf,
#[arg(
long,
value_name = "N",
default_value_t = DEFAULT_SAMPLE_CLUSTERS,
value_parser = parse_clusters,
allow_hyphen_values = true
)]
clusters: usize,
#[arg(
long,
value_name = "N",
default_value_t = 1,
value_parser = parse_positive,
allow_hyphen_values = true
)]
page: usize,
#[arg(long, hide = true)]
context: Option<usize>,
#[command(flatten)]
mode: SearchModeCli,
}
#[derive(Debug, ClapArgs)]
struct ShowCli {
#[arg(value_name = "query")]
query: String,
#[arg(value_name = "file")]
file: PathBuf,
#[arg(
long,
value_name = "N",
value_parser = parse_positive,
allow_hyphen_values = true
)]
line: Option<usize>,
#[arg(
long,
value_name = "N",
default_value_t = 2,
value_parser = parse_context,
allow_hyphen_values = true
)]
context: usize,
#[command(flatten)]
mode: SearchModeCli,
}
#[derive(Clone, Copy, Debug, Default, ClapArgs)]
#[group(required = false, multiple = false)]
struct LiteralModeCli {
#[arg(long)]
identifier: bool,
#[arg(long)]
word: bool,
}
#[derive(Clone, Copy, Debug, Default, ClapArgs)]
#[group(required = false, multiple = false)]
struct SearchModeCli {
#[arg(long)]
identifier: bool,
#[arg(long)]
word: bool,
#[arg(long)]
regex: bool,
}
pub(crate) fn parse() -> Args {
let values: Vec<_> = env::args().skip(1).collect();
reject_unknown_command_or_option(&values);
match Args::try_from(Cli::parse()) {
Ok(args) => args,
Err(message) => exit_error(&message, 2),
}
}
impl TryFrom<Cli> for Args {
type Error = String;
fn try_from(cli: Cli) -> Result<Self, Self::Error> {
let args = match cli.command {
CliCommand::Survey(command) => {
let mut violations = Vec::new();
if command.terms.len() > MAX_SURVEY_TERMS {
violations.push(format!(
"survey accepts at most {MAX_SURVEY_TERMS} terms; split the comparison"
));
}
if command.paths.len() > MAX_SURVEY_PATHS {
violations.push(format!(
"survey accepts at most {MAX_SURVEY_PATHS} paths; split the comparison"
));
}
if !violations.is_empty() {
return Err(violations.join("\n"));
}
Self {
kind: Kind::Survey,
terms: command.terms,
paths: command.paths,
context: 2,
clusters: DEFAULT_SAMPLE_CLUSTERS,
page: 1,
line: None,
mode: command.mode.into(),
}
}
CliCommand::Scout(command) => Self {
kind: Kind::Scout,
terms: vec![command.query],
paths: vec![command.path],
context: 2,
clusters: DEFAULT_SAMPLE_CLUSTERS,
page: 1,
line: None,
mode: command.mode.into(),
},
CliCommand::Sample(command) => {
if command.context.is_some() {
return Err("sample does not accept --context; it already shows fixed one-line context. Use `asrch show <query> <file> --line N --context M` after choosing a cluster line.".to_string());
}
Self {
kind: Kind::Sample,
terms: vec![command.query],
paths: vec![command.file],
context: 2,
clusters: command.clusters,
page: command.page,
line: None,
mode: command.mode.into(),
}
}
CliCommand::Show(command) => Self {
kind: Kind::Show,
terms: vec![command.query],
paths: vec![command.file],
context: command.context,
clusters: DEFAULT_SAMPLE_CLUSTERS,
page: 1,
line: command.line,
mode: command.mode.into(),
},
};
if args.terms.iter().any(|term| term.is_empty()) {
return Err("queries and survey terms must not be empty".to_string());
}
if args.mode == SearchMode::Regex && has_unescaped_pipe(&args.terms[0]) {
return Err(
"OR regexes are not accepted by single-query commands; use `asrch survey --term ...`"
.to_string(),
);
}
Ok(args)
}
}
impl From<LiteralModeCli> for SearchMode {
fn from(mode: LiteralModeCli) -> Self {
if mode.identifier {
Self::Identifier
} else if mode.word {
Self::Word
} else {
Self::Fixed
}
}
}
impl From<SearchModeCli> for SearchMode {
fn from(mode: SearchModeCli) -> Self {
if mode.identifier {
Self::Identifier
} else if mode.word {
Self::Word
} else if mode.regex {
Self::Regex
} else {
Self::Fixed
}
}
}
fn has_unescaped_pipe(value: &str) -> bool {
let mut escaped = false;
for ch in value.chars() {
if ch == '|' && !escaped {
return true;
}
escaped = ch == '\\' && !escaped;
if ch != '\\' {
escaped = false;
}
}
false
}
pub(crate) fn exit_error(message: &str, code: i32) -> ! {
print_error(message);
std::process::exit(code);
}
fn print_error(message: &str) {
let mut end = message.len().min(MAX_LINE_BYTES);
while end > 0 && !message.is_char_boundary(end) {
end -= 1;
}
eprintln!("{}", &message[..end]);
}
fn parse_positive(value: &str) -> Result<usize, String> {
let value = value
.parse::<usize>()
.map_err(|_| "requires a non-negative integer".to_string())?;
if value == 0 {
Err("must be at least 1".to_string())
} else {
Ok(value)
}
}
fn parse_clusters(value: &str) -> Result<usize, String> {
let value = parse_positive(value)?;
if value > MAX_SAMPLE_CLUSTERS {
Err(format!("must be in the range 1..={MAX_SAMPLE_CLUSTERS}"))
} else {
Ok(value)
}
}
fn parse_context(value: &str) -> Result<usize, String> {
let value = value
.parse::<usize>()
.map_err(|_| "requires a non-negative integer".to_string())?;
if value > MAX_CONTEXT {
Err(format!("must be in the range 0..={MAX_CONTEXT}"))
} else {
Ok(value)
}
}
fn render_top_help() -> String {
let mut command = Cli::command();
command.render_long_help().to_string()
}
fn render_command_help(command_name: &str) -> String {
let command = Cli::command();
let mut subcommand = command
.find_subcommand(command_name)
.expect("known subcommand")
.clone()
.bin_name(format!("asrch {command_name}"));
subcommand.render_long_help().to_string()
}
fn reject_unknown_command_or_option(values: &[String]) {
let Some(command) = values.first() else {
return;
};
if matches!(command.as_str(), "-h" | "--help" | "-V" | "--version") {
return;
}
let valid_command = matches!(command.as_str(), "survey" | "scout" | "sample" | "show");
if command.starts_with('-') {
exit_error_with_top_help(&format!("unknown option: {command}"));
}
if !valid_command {
exit_error_with_top_help(&format!("unknown command: {command}"));
}
if let Some(option) = find_unknown_option(command, &values[1..]) {
exit_error_with_command_help(command, &format!("unknown option: {option}"));
}
}
fn find_unknown_option<'a>(command: &str, values: &'a [String]) -> Option<&'a str> {
let root = Cli::command();
let command = root.find_subcommand(command).expect("known subcommand");
let mut expect_value = false;
for value in values {
if expect_value {
expect_value = false;
continue;
}
if matches!(value.as_str(), "-h" | "--help") {
continue;
}
if value == "--" {
break;
}
if let Some(long) = value.strip_prefix("--") {
let (name, inline_value) = long
.split_once('=')
.map_or((long, false), |(name, _)| (name, true));
let Some(argument) = command
.get_arguments()
.find(|argument| argument.get_long() == Some(name))
else {
return Some(value);
};
expect_value = argument.get_action().takes_values() && !inline_value;
} else if let Some(short) = value.strip_prefix('-')
&& !short.is_empty()
{
let name = short.chars().next().expect("non-empty short option");
let Some(argument) = command
.get_arguments()
.find(|argument| argument.get_short() == Some(name))
else {
return Some(value);
};
expect_value = argument.get_action().takes_values() && short.chars().count() == 1;
}
}
None
}
fn exit_error_with_top_help(message: &str) -> ! {
print_error(message);
eprintln!();
eprint!("{}", render_top_help());
std::process::exit(2);
}
fn exit_error_with_command_help(command_name: &str, message: &str) -> ! {
print_error(message);
eprintln!();
eprint!("{}", render_command_help(command_name));
std::process::exit(2);
}