use std::collections::{BTreeMap, HashSet};
use std::env;
use std::fs;
use std::path::Path;
use crate::cli::{Args, Kind, MAX_LINE_BYTES, SearchMode, exit_error};
use crate::search::{Match, SearchResult, search};
const SHOW_MAX_MATCHES: usize = 20;
#[derive(Clone, Debug)]
struct MatchCluster {
first: Match,
last: Match,
hits: usize,
}
#[derive(Debug)]
struct SampleSelection {
clusters: Vec<MatchCluster>,
total_clusters: usize,
total_pages: usize,
}
#[derive(Debug)]
struct SurveyOverallRow {
term: String,
matches: usize,
files: usize,
dominant_path: String,
scan_limited: bool,
}
#[derive(Debug)]
struct SurveyPathRow {
term: String,
matches: usize,
files: usize,
top_directory: String,
scan_limited: bool,
}
struct Budget {
max_line_bytes: Option<usize>,
}
impl Budget {
fn new(max_line_bytes: Option<usize>) -> Self {
Self { max_line_bytes }
}
fn write(&mut self, value: impl AsRef<str>) {
let original = value.as_ref();
let mut end = original.len();
if let Some(max_line_bytes) = self.max_line_bytes {
end = end.min(max_line_bytes);
}
while end > 0 && !original.is_char_boundary(end) {
end -= 1;
}
println!("{}", &original[..end]);
}
fn write_started_block(&mut self, value: impl AsRef<str>) {
self.write(value);
}
}
pub(crate) fn run(args: Args) {
for path in &args.paths {
if !path.exists() {
exit_error(&format!("path does not exist: {}", path.display()), 2);
}
}
if matches!(args.kind, Kind::Sample | Kind::Show) && !args.paths[0].is_file() {
exit_error(
match args.kind {
Kind::Sample => "sample requires an explicit file path",
Kind::Show => "show requires an explicit file path",
_ => unreachable!(),
},
2,
);
}
if args.kind == Kind::Show
&& let Some(line) = args.line
{
let content = fs::read_to_string(&args.paths[0]).unwrap_or_else(|_| {
exit_error(
&format!("could not read file: {}", args.paths[0].display()),
2,
)
});
let line_count = content.lines().count();
if line > line_count {
exit_error(
&format!("line is outside file: {line} (1..={line_count})"),
2,
);
}
}
let mut out = Budget::new(Some(MAX_LINE_BYTES));
if args.kind == Kind::Survey {
print_survey(&args, &mut out);
} else {
let result = match search(&args, &args.terms[0], &args.paths[0]) {
Ok(result) => result,
Err(message) => exit_error(&format!("asrch: {message}"), 1),
};
if args.kind == Kind::Show
&& args.line.is_none()
&& (result.scan_limited || result.scanned_matches > SHOW_MAX_MATCHES)
{
exit_error(
&format!(
"show has too many matches ({}); narrow the query before showing snippets",
total_label(&result)
),
2,
);
}
match args.kind {
Kind::Survey => unreachable!(),
Kind::Scout => print_scout(&args, &result, &mut out),
Kind::Sample => print_sample(&args, &result, &mut out),
Kind::Show => print_show(&args, &result, &mut out),
}
}
}
fn total_label(result: &SearchResult) -> String {
if result.scan_limited {
format!("at least {}", result.scanned_matches)
} else {
result.scanned_matches.to_string()
}
}
fn toon_value(value: &str) -> String {
if !value.is_empty()
&& value
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | '.' | '/' | '='))
{
return value.to_string();
}
let mut quoted = String::with_capacity(value.len() + 2);
quoted.push('"');
for ch in value.chars() {
if matches!(ch, '"' | '\\') {
quoted.push('\\');
}
quoted.push(ch);
}
quoted.push('"');
quoted
}
fn print_survey(args: &Args, out: &mut Budget) {
out.write("survey:");
out.write(format!(" terms: {}", args.terms.len()));
out.write(format!(" paths: {}", args.paths.len()));
out.write(format!(" mode: {}", args.mode.label()));
let mut overall_rows = Vec::new();
let mut by_path: BTreeMap<String, Vec<SurveyPathRow>> = BTreeMap::new();
for term in &args.terms {
let mut total_matches = 0;
let mut total_files = 0;
let mut scan_limited = false;
let mut dominant_path = "-".to_string();
let mut dominant_matches = 0;
for path in &args.paths {
let base_path = path;
let result = match search(args, term, base_path) {
Ok(result) => result,
Err(message) => exit_error(&format!("asrch: {message}"), 1),
};
let path_label = base_path.display().to_string();
total_matches += result.scanned_matches;
total_files += result.counts.len();
scan_limited |= result.scan_limited;
if result.scanned_matches > dominant_matches {
dominant_matches = result.scanned_matches;
dominant_path = path_label.clone();
}
if result.scanned_matches == 0 && !result.scan_limited {
continue;
}
let top_directory = ranked_directories(&result.counts)
.into_iter()
.next()
.map(|(directory, _)| display_relative_path(&directory, base_path))
.unwrap_or_else(|| "-".to_string());
by_path.entry(path_label).or_default().push(SurveyPathRow {
term: term.clone(),
matches: result.scanned_matches,
files: result.counts.len(),
top_directory,
scan_limited: result.scan_limited,
});
}
overall_rows.push(SurveyOverallRow {
term: term.clone(),
matches: total_matches,
files: total_files,
dominant_path,
scan_limited,
});
}
out.write("overall[term,matches,files,dominant_path]:");
for row in &overall_rows {
let match_label = if row.scan_limited {
format!(">={}", row.matches)
} else {
row.matches.to_string()
};
out.write(format!(
" {},{match_label},{},{}",
toon_value(&row.term),
row.files,
toon_value(&row.dominant_path)
));
}
out.write("by_path:");
for (path, rows) in by_path {
out.write(format!(
" {}[term,matches,files,top_directory]:",
toon_value(&path)
));
for row in rows {
let match_label = if row.scan_limited {
format!(">={}", row.matches)
} else {
row.matches.to_string()
};
out.write(format!(
" {},{match_label},{},{}",
toon_value(&row.term),
row.files,
toon_value(&row.top_directory)
));
}
}
let sum: usize = overall_rows.iter().map(|row| row.matches).sum();
if let Some(row) = overall_rows.iter().max_by_key(|row| row.matches)
&& sum > 0
&& row.matches.saturating_mul(100) >= sum.saturating_mul(80)
&& overall_rows.len() > 1
{
out.write("warnings:");
out.write(format!(
" - {} dominates survey: {}/{sum} matching lines",
toon_value(&row.term),
row.matches
));
}
if args.mode == SearchMode::Fixed {
let short_terms: Vec<_> = args
.terms
.iter()
.filter(|term| term.chars().count() <= 3)
.collect();
if !short_terms.is_empty() {
if sum == 0
|| overall_rows
.iter()
.max_by_key(|row| row.matches)
.is_none_or(|row| row.matches.saturating_mul(100) < sum.saturating_mul(80))
{
out.write("warnings:");
}
let terms = short_terms
.iter()
.map(|term| toon_value(term))
.collect::<Vec<_>>()
.join(",");
out.write(format!(
" - short partial-match terms: {terms}; consider --identifier or --word"
));
}
}
out.write("next: choose one useful term and path, then run asrch scout");
}
fn print_scout(args: &Args, result: &SearchResult, out: &mut Budget) {
out.write("scout:");
out.write(format!(" query: {}", toon_value(&args.terms[0])));
out.write(format!(
" path: {}",
toon_value(&args.paths[0].display().to_string())
));
out.write(format!(" mode: {}", args.mode.label()));
out.write(format!(" matches: {}", total_label(result)));
out.write(format!(" files: {}", result.counts.len()));
broad_notice_toon(result, out);
if result.scanned_matches == 0 {
out.write("top_directories[path,matches]:");
out.write("top_files[path,matches]:");
out.write("next: try another term or path");
return;
}
out.write("top_directories[path,matches]:");
for (path, count) in ranked_directories(&result.counts).into_iter().take(5) {
out.write(format!(
" {},{count}",
toon_value(&display_relative_path(&path, &args.paths[0]))
));
}
out.write("top_files[path,matches]:");
for (path, count) in ranked_counts(&result.counts).into_iter().take(5) {
out.write(format!(
" {},{count}",
toon_value(&display_relative_path(path, &args.paths[0]))
));
}
out.write("next: narrow the path, then use asrch sample");
}
fn display_relative_path(value: &str, base: &Path) -> String {
let path = Path::new(value);
if let (Ok(path), Ok(base)) = (path.canonicalize(), base.canonicalize())
&& let Some(relative) = strip_path_prefix(&path, &base)
{
return relative;
}
if let Some(relative) = strip_path_prefix(path, base) {
return relative;
}
if path.is_absolute()
&& let Ok(cwd) = env::current_dir()
&& let Some(relative) = strip_path_prefix(path, &cwd)
{
return relative;
}
value.to_string()
}
fn strip_path_prefix(path: &Path, base: &Path) -> Option<String> {
path.strip_prefix(base).ok().map(|relative| {
if relative.as_os_str().is_empty() {
".".to_string()
} else {
relative.display().to_string()
}
})
}
fn ranked_counts(counts: &BTreeMap<String, usize>) -> Vec<(&String, &usize)> {
let mut ranked: Vec<_> = counts.iter().collect();
ranked.sort_by(|(path_a, count_a), (path_b, count_b)| {
count_b.cmp(count_a).then_with(|| path_a.cmp(path_b))
});
ranked
}
fn ranked_directories(counts: &BTreeMap<String, usize>) -> Vec<(String, usize)> {
let mut directories = BTreeMap::new();
for (path, count) in counts {
let directory = Path::new(path)
.parent()
.map(|parent| parent.display().to_string())
.filter(|parent| !parent.is_empty())
.unwrap_or_else(|| ".".to_string());
*directories.entry(directory).or_insert(0) += count;
}
let mut ranked: Vec<_> = directories.into_iter().collect();
ranked.sort_by(|(path_a, count_a), (path_b, count_b)| {
count_b.cmp(count_a).then_with(|| path_a.cmp(path_b))
});
ranked
}
fn print_sample(args: &Args, result: &SearchResult, out: &mut Budget) {
let selection = select_clusters(&result.matches, args.clusters, args.page);
out.write(format!(
"sample: query={:?} path={} mode={} matches={} files={} clusters={} page={}/{} selected={}",
args.terms[0],
args.paths[0].display(),
args.mode.label(),
total_label(result),
result.counts.len(),
selection.total_clusters,
args.page,
selection.total_pages,
selection.clusters.len()
));
broad_notice(result, out);
if result.matches.is_empty() {
out.write("No matches.");
return;
}
if selection.clusters.is_empty() {
out.write("No clusters on this page.");
if selection.total_pages > 0 {
out.write(format!("next: use --page 1..{}", selection.total_pages));
}
return;
}
out.write("clusters[index,range,hits,first,last]:");
for (index, cluster) in selection.clusters.iter().enumerate() {
let absolute_index = (args.page - 1) * args.clusters + index + 1;
out.write(format!(
" {},{}..{},{},{}:{}:{},{}:{}:{}",
absolute_index,
cluster.first.line,
cluster.last.line,
cluster.hits,
cluster.first.path,
cluster.first.line,
cluster.first.column,
cluster.last.path,
cluster.last.line,
cluster.last.column
));
out.write(format!("cluster {} first:", absolute_index));
print_snippet(&cluster.first, 1, out);
if cluster.last.line != cluster.first.line {
out.write(format!("cluster {} last:", absolute_index));
print_snippet(&cluster.last, 1, out);
}
}
if args.page < selection.total_pages {
out.write(format!("next: use --page {}", args.page + 1));
}
if result.scanned_matches > result.matches.len() || result.scan_limited {
out.write("More matches exist; narrow the query or path before `show`.");
}
}
fn select_clusters(matches: &[Match], per_page: usize, page: usize) -> SampleSelection {
let clusters = cluster_matches(matches);
let total_clusters = clusters.len();
let total_pages = total_clusters.div_ceil(per_page);
let start = (page - 1) * per_page;
let selected = clusters
.into_iter()
.skip(start)
.take(per_page)
.collect::<Vec<_>>();
SampleSelection {
clusters: selected,
total_clusters,
total_pages,
}
}
fn cluster_matches(matches: &[Match]) -> Vec<MatchCluster> {
let mut by_file: BTreeMap<&str, Vec<&Match>> = BTreeMap::new();
for item in matches {
by_file.entry(&item.path).or_default().push(item);
}
let mut clusters = Vec::new();
for items in by_file.values() {
let mut current: Option<MatchCluster> = None;
for item in items {
match current.as_mut() {
Some(cluster) if item.line <= cluster.last.line + 2 => {
cluster.last = (*item).clone();
cluster.hits += 1;
}
Some(_) => {
clusters.push(current.take().expect("cluster exists"));
current = Some(MatchCluster {
first: (*item).clone(),
last: (*item).clone(),
hits: 1,
});
}
None => {
current = Some(MatchCluster {
first: (*item).clone(),
last: (*item).clone(),
hits: 1,
});
}
}
}
if let Some(cluster) = current {
clusters.push(cluster);
}
}
clusters
}
fn print_show(args: &Args, result: &SearchResult, out: &mut Budget) {
out.write(format!(
"show: query={:?} file={} mode={} matches={} context={}",
args.terms[0],
args.paths[0].display(),
args.mode.label(),
total_label(result),
args.context
));
if let Some(line) = args.line {
print_line_snippet(&args.paths[0], line, args.context, out);
return;
}
if result.matches.is_empty() {
out.write("No matches.");
return;
}
let mut seen = HashSet::new();
for item in &result.matches {
if !seen.insert(item.line) {
continue;
}
print_snippet_started_block(item, args.context, out);
}
}
fn print_line_snippet(path: &Path, line: usize, context: usize, out: &mut Budget) {
let Ok(content) = fs::read_to_string(path) else {
out.write(format!("Could not read file: {}", path.display()));
return;
};
let lines: Vec<_> = content.lines().collect();
if line > lines.len() {
out.write(format!(
"line is outside file: {} (1..={})",
line,
lines.len()
));
return;
}
let item = Match {
path: path.display().to_string(),
line,
column: 1,
text: lines[line - 1].to_string(),
};
print_snippet_started_block(&item, context, out);
}
fn print_snippet(item: &Match, context: usize, out: &mut Budget) {
let path = Path::new(&item.path);
let Ok(content) = fs::read_to_string(path) else {
out.write(format_match(item));
return;
};
let lines: Vec<_> = content.lines().collect();
let start = item.line.saturating_sub(context + 1);
let end = (item.line + context).min(lines.len());
out.write(format!("-- {}:{}:{}", item.path, item.line, item.column));
for (index, text) in lines[start..end].iter().enumerate() {
let number = start + index + 1;
let marker = if number == item.line { '>' } else { ' ' };
out.write(format!("{marker}{number:>6} | {text}"));
}
}
fn print_snippet_started_block(item: &Match, context: usize, out: &mut Budget) {
let path = Path::new(&item.path);
let Ok(content) = fs::read_to_string(path) else {
out.write_started_block(format_match(item));
return;
};
let lines: Vec<_> = content.lines().collect();
let start = item.line.saturating_sub(context + 1);
let end = (item.line + context).min(lines.len());
out.write_started_block(format!("-- {}:{}:{}", item.path, item.line, item.column));
for (index, text) in lines[start..end].iter().enumerate() {
let number = start + index + 1;
let marker = if number == item.line { '>' } else { ' ' };
out.write_started_block(format!("{marker}{number:>6} | {text}"));
}
}
fn format_match(item: &Match) -> String {
format!("{}:{}:{}:{}", item.path, item.line, item.column, item.text)
}
fn broad_notice(result: &SearchResult, out: &mut Budget) {
if result.scan_limited {
out.write("Query is too broad; scan limit reached. Narrow the query or path.");
} else if result.scanned_matches > 1_000 || result.counts.len() > 100 {
out.write("Query is broad; narrow the query or path before reading matches.");
}
}
fn broad_notice_toon(result: &SearchResult, out: &mut Budget) {
if result.scan_limited {
out.write("warnings:");
out.write(" - query too broad: scan limit reached");
} else if result.scanned_matches > 1_000 || result.counts.len() > 100 {
out.write("warnings:");
out.write(" - query broad: narrow query or path before reading matches");
}
}