use std::collections::HashMap;
use grep::matcher::Matcher;
use crate::confirm::{SearchOptions, build_matcher};
use crate::cursor::{self, Cursor, Mode};
use crate::effective_pattern;
pub const DEFAULT_PAGE_SIZE: usize = 50;
pub const DEFAULT_MAX_COLS: usize = 200;
pub struct CompactOpts {
pub mode: Mode,
pub start_after: Option<(String, u64)>,
pub page_size: usize,
pub max_cols: usize,
}
impl Default for CompactOpts {
fn default() -> Self {
Self {
mode: Mode::Matches,
start_after: None,
page_size: DEFAULT_PAGE_SIZE,
max_cols: DEFAULT_MAX_COLS,
}
}
}
pub struct Page {
pub header: String,
pub body: String,
pub total_matches: usize,
pub total_files: usize,
pub first_index: usize,
pub last_index: usize,
pub last_key: Option<(String, u64)>,
pub has_more: bool,
pub fingerprint: u64,
}
impl Page {
pub fn next_cursor(
&self,
mode: Mode,
pattern: String,
opts: SearchOptions,
page_size: usize,
root_hint: Option<String>,
) -> Option<Cursor> {
self.has_more.then(|| Cursor {
mode,
pattern,
opts,
page_size,
last_path: self.last_key.as_ref().map(|(p, _)| p.clone()),
last_lineno: self.last_key.as_ref().map_or(0, |(_, l)| *l),
prev_total: self.total_matches,
fingerprint: self.fingerprint as u32,
root_hint,
})
}
pub fn staleness_note(&self, prev: Option<(usize, u32)>) -> Option<String> {
match prev {
Some((prev_total, prev_fp)) if prev_fp != self.fingerprint as u32 => Some(format!(
"result set changed since the previous page ({prev_total} -> {} matches)",
self.total_matches
)),
_ => None,
}
}
}
fn plural(n: usize) -> &'static str {
if n == 1 { "" } else { "s" }
}
struct Row<'a> {
path: &'a str,
lineno: u64,
is_match: bool,
text: &'a str,
block: usize,
}
pub fn format(raw: &[u8], pattern: &str, opts: SearchOptions, c: CompactOpts) -> Page {
let text = String::from_utf8_lossy(raw);
let rows = parse_rows(&text);
let mut match_idx: Vec<usize> = rows
.iter()
.enumerate()
.filter(|(_, r)| r.is_match)
.map(|(i, _)| i)
.collect();
match_idx.sort_by(|&a, &b| (rows[a].path, rows[a].lineno).cmp(&(rows[b].path, rows[b].lineno)));
let total_matches = match_idx.len();
let mut files: Vec<&str> = rows.iter().filter(|r| r.is_match).map(|r| r.path).collect();
files.sort_unstable();
files.dedup();
let total_files = files.len();
let fingerprint =
cursor::fingerprint(match_idx.iter().map(|&i| (rows[i].path, rows[i].lineno)));
let page_size = c.page_size.max(1);
match c.mode {
Mode::Matches => render_matches(
&rows,
&match_idx,
total_matches,
total_files,
pattern,
opts,
&c,
page_size,
fingerprint,
),
Mode::Files | Mode::Count => render_by_file(
&rows,
&match_idx,
&files,
total_matches,
total_files,
&c,
page_size,
fingerprint,
),
}
}
#[allow(clippy::too_many_arguments)]
fn render_matches(
rows: &[Row],
match_idx: &[usize],
total_matches: usize,
total_files: usize,
pattern: &str,
opts: SearchOptions,
c: &CompactOpts,
page_size: usize,
fingerprint: u64,
) -> Page {
let skip = match &c.start_after {
Some((p, l)) => match_idx
.iter()
.filter(|&&i| (rows[i].path, rows[i].lineno) <= (p.as_str(), *l))
.count(),
None => 0,
};
let window_matches: Vec<usize> = match_idx
.iter()
.copied()
.skip(skip)
.take(page_size)
.collect();
let rendered = window_matches.len();
let window: std::collections::HashSet<usize> = window_matches.iter().copied().collect();
let first_index = if rendered == 0 { 0 } else { skip + 1 };
let last_index = if rendered == 0 { 0 } else { skip + rendered };
let has_more = skip + rendered < total_matches;
let last_key = window_matches
.last()
.map(|&i| (rows[i].path.to_string(), rows[i].lineno));
let header = if total_matches == 0 {
"[no matches]".to_string()
} else {
format!(
"[matches {first_index}-{last_index} of {total_matches} in {total_files} file{}]",
plural(total_files)
)
};
let nearest = nearest_match_per_row(rows);
let mut to_render: Vec<usize> = (0..rows.len())
.filter(|&i| {
if rows[i].is_match {
window.contains(&i)
} else {
nearest[i].is_some_and(|m| window.contains(&m))
}
})
.collect();
to_render.sort_by(|&a, &b| (rows[a].path, rows[a].lineno).cmp(&(rows[b].path, rows[b].lineno)));
let matcher = build_matcher(&effective_pattern(pattern, opts), opts).ok();
let mut body = String::new();
let mut cur_path: Option<&str> = None;
for &i in &to_render {
let r = &rows[i];
if cur_path != Some(r.path) {
body.push_str(r.path);
body.push('\n');
cur_path = Some(r.path);
}
let center = if r.is_match {
matcher
.as_ref()
.and_then(|m| m.find(r.text.as_bytes()).ok().flatten())
.map(|mat| mat.start())
} else {
None
};
let sep = if r.is_match { ':' } else { '-' };
body.push_str(" ");
body.push_str(&r.lineno.to_string());
body.push(sep);
body.push(' ');
body.push_str(&truncate_centered(r.text, c.max_cols, center));
body.push('\n');
}
Page {
header,
body,
total_matches,
total_files,
first_index,
last_index,
has_more,
last_key,
fingerprint,
}
}
#[allow(clippy::too_many_arguments)]
fn render_by_file(
rows: &[Row],
match_idx: &[usize],
files: &[&str],
total_matches: usize,
total_files: usize,
c: &CompactOpts,
page_size: usize,
fingerprint: u64,
) -> Page {
let skip = match &c.start_after {
Some((p, _)) => files.iter().filter(|&&f| f <= p.as_str()).count(),
None => 0,
};
let window: Vec<&str> = files.iter().copied().skip(skip).take(page_size).collect();
let rendered = window.len();
let first_index = if rendered == 0 { 0 } else { skip + 1 };
let last_index = if rendered == 0 { 0 } else { skip + rendered };
let has_more = skip + rendered < total_files;
let last_key = window.last().map(|&p| (p.to_string(), 0));
let counts: HashMap<&str, usize> = if matches!(c.mode, Mode::Count) {
let mut m = HashMap::new();
for &i in match_idx {
*m.entry(rows[i].path).or_insert(0) += 1;
}
m
} else {
HashMap::new()
};
let body: String = match c.mode {
Mode::Count => window
.iter()
.map(|&p| format!("{p}:{}\n", counts.get(p).copied().unwrap_or(0)))
.collect(),
_ => window.iter().map(|&p| format!("{p}\n")).collect(),
};
let header = if total_files == 0 {
"[no matches]".to_string()
} else if matches!(c.mode, Mode::Count) {
format!(
"[count {first_index}-{last_index} of {total_files} file{} \u{b7} {total_matches} match{}]",
plural(total_files),
if total_matches == 1 { "" } else { "es" }
)
} else {
format!("[files {first_index}-{last_index} of {total_files}]")
};
Page {
header,
body,
total_matches,
total_files,
first_index,
last_index,
has_more,
last_key,
fingerprint,
}
}
type Cand<'a> = (&'a str, u64, bool, &'a str);
enum Entry<'a> {
Break,
Row(Option<Cand<'a>>, Option<Cand<'a>>),
}
fn parse_rows(text: &str) -> Vec<Row<'_>> {
let entries: Vec<Entry> = text
.lines()
.filter_map(|line| {
if line == "--" {
return Some(Entry::Break);
}
let m = split_on(line, b':').map(|(p, n, t)| (p, n, true, t));
let c = split_on(line, b'-').map(|(p, n, t)| (p, n, false, t));
match (m, c) {
(None, None) => None, (m, c) => Some(Entry::Row(m, c)),
}
})
.collect();
let anchors: Vec<Option<&str>> = entries
.iter()
.map(|e| match e {
Entry::Row(Some(m), None) => Some(m.0),
Entry::Row(None, Some(c)) => Some(c.0),
_ => None,
})
.collect();
let mut rows = Vec::new();
let mut block = 0usize;
let mut prev_path: Option<&str> = None;
let mut pending_break = false;
for (i, e) in entries.iter().enumerate() {
let (path, lineno, is_match, body) = match e {
Entry::Break => {
pending_break = true;
continue;
}
Entry::Row(Some(m), None) => *m,
Entry::Row(None, Some(c)) => *c,
Entry::Row(Some(m), Some(c)) => {
let near = nearest_anchor(&anchors, i);
if near == Some(c.0) && near != Some(m.0) {
*c
} else {
*m
}
}
Entry::Row(None, None) => continue,
};
if let Some(pp) = prev_path
&& (pp != path || pending_break)
{
block += 1;
}
pending_break = false;
prev_path = Some(path);
rows.push(Row {
path,
lineno,
is_match,
text: body,
block,
});
}
rows
}
fn nearest_anchor<'a>(anchors: &[Option<&'a str>], i: usize) -> Option<&'a str> {
(1..anchors.len()).find_map(|d| {
i.checked_sub(d)
.and_then(|j| anchors[j])
.or_else(|| anchors.get(i + d).copied().flatten())
})
}
fn split_on(line: &str, sep: u8) -> Option<(&str, u64, &str)> {
let bytes = line.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == sep {
let rest = &bytes[i + 1..];
let digits = rest.iter().take_while(|b| b.is_ascii_digit()).count();
if digits > 0 && rest.get(digits) == Some(&sep) {
let lineno: u64 = line[i + 1..i + 1 + digits].parse().ok()?;
return Some((&line[..i], lineno, &line[i + 1 + digits + 1..]));
}
}
i += 1;
}
None
}
fn nearest_match_per_row(rows: &[Row]) -> Vec<Option<usize>> {
let n = rows.len();
let mut out = vec![None; n];
let mut last: Option<usize> = None;
for i in 0..n {
if rows[i].is_match {
last = Some(i);
}
out[i] = last.filter(|&m| rows[m].block == rows[i].block);
}
let mut next: Option<usize> = None;
for i in (0..n).rev() {
if rows[i].is_match {
next = Some(i);
}
let fwd = next.filter(|&m| rows[m].block == rows[i].block);
out[i] = match (out[i], fwd) {
(Some(b), Some(f)) => Some(if i - b <= f - i { b } else { f }),
(b, f) => b.or(f),
};
}
out
}
fn truncate_centered(text: &str, max_cols: usize, center: Option<usize>) -> String {
let char_count = text.chars().count();
if char_count <= max_cols {
return text.to_string();
}
let center_char = match center {
Some(byte) => {
let mut b = byte.min(text.len());
while b > 0 && !text.is_char_boundary(b) {
b -= 1;
}
text[..b].chars().count()
}
None => 0,
};
let before = max_cols / 3;
let start = center_char
.saturating_sub(before)
.min(char_count - max_cols);
let end = start + max_cols;
let char_byte = |ci: usize| {
text.char_indices()
.nth(ci)
.map(|(b, _)| b)
.unwrap_or(text.len())
};
let slice = &text[char_byte(start)..char_byte(end)];
let mut out = String::new();
if start > 0 {
out.push('\u{2026}');
}
out.push_str(slice);
if end < char_count {
out.push('\u{2026}');
}
out
}
#[cfg(test)]
mod tests {
use super::*;
const RAW: &[u8] = b"src/a.rs:1:fn one() {}\n\
src/a.rs:2:fn two() {}\n\
src/b.rs:10:fn three() {}\n";
fn page(raw: &[u8], pattern: &str, c: CompactOpts) -> Page {
format(raw, pattern, SearchOptions::default(), c)
}
#[test]
fn groups_by_file_with_counts() {
let p = page(RAW, "fn", CompactOpts::default());
assert_eq!(p.total_matches, 3);
assert_eq!(p.total_files, 2);
assert!(!p.has_more);
assert_eq!(p.body.matches("src/a.rs\n").count(), 1);
assert!(
p.body
.contains("src/a.rs\n 1: fn one() {}\n 2: fn two() {}\n")
);
assert!(p.body.contains("src/b.rs\n 10: fn three() {}\n"));
assert!(p.header.contains("matches 1-3 of 3 in 2 files"));
}
#[test]
fn paginates_without_dropping_matches() {
let p1 = page(
RAW,
"fn",
CompactOpts {
page_size: 2,
..Default::default()
},
);
assert!(p1.has_more);
assert_eq!((p1.first_index, p1.last_index), (1, 2));
assert!(p1.body.contains(" 1: fn one"));
assert!(p1.body.contains(" 2: fn two"));
assert!(!p1.body.contains("three"));
let p2 = page(
RAW,
"fn",
CompactOpts {
page_size: 2,
start_after: p1.last_key.clone(),
..Default::default()
},
);
assert!(!p2.has_more);
assert_eq!((p2.first_index, p2.last_index), (3, 3));
assert!(p2.body.contains("src/b.rs\n 10: fn three"));
assert!(!p2.body.contains("fn one"));
}
#[test]
fn keyset_survives_unsorted_input_without_dropping_matches() {
const UNSORTED: &[u8] = b"src/a/b.rs:1:fn x\n\
src/a.rs:1:fn y\n\
src/ab.rs:1:fn z\n";
let mut seen = Vec::new();
let mut start_after = None;
for _ in 0..5 {
let p = page(
UNSORTED,
"fn",
CompactOpts {
page_size: 1,
start_after: start_after.clone(),
..Default::default()
},
);
assert_eq!(p.total_matches, 3);
for line in p.body.lines().filter(|l| l.starts_with(" ")) {
seen.push(line.to_string());
}
if !p.has_more {
break;
}
start_after = p.last_key.clone();
}
assert_eq!(seen, vec![" 1: fn y", " 1: fn x", " 1: fn z"]);
}
#[test]
fn keyset_resume_after_last_key() {
let p1 = page(
RAW,
"fn",
CompactOpts {
page_size: 1,
..Default::default()
},
);
assert_eq!(p1.last_key, Some(("src/a.rs".to_string(), 1)));
let p2 = page(
RAW,
"fn",
CompactOpts {
page_size: 1,
start_after: p1.last_key.clone(),
..Default::default()
},
);
assert!(p2.body.contains(" 2: fn two"));
assert!(!p2.body.contains("fn one"));
assert_eq!((p2.first_index, p2.last_index), (2, 2));
}
#[test]
fn files_mode_lists_paths() {
let p = page(
RAW,
"fn",
CompactOpts {
mode: Mode::Files,
..Default::default()
},
);
assert_eq!(p.total_files, 2);
assert!(p.header.contains("files 1-2 of 2"));
assert!(p.body.contains("src/a.rs\n"));
assert!(p.body.contains("src/b.rs\n"));
assert!(!p.body.contains("fn one")); }
#[test]
fn count_mode_tallies_per_file() {
let p = page(
RAW,
"fn",
CompactOpts {
mode: Mode::Count,
..Default::default()
},
);
assert!(p.body.contains("src/a.rs:2\n"));
assert!(p.body.contains("src/b.rs:1\n"));
assert!(p.header.contains("count 1-2 of 2 files"));
assert!(p.header.contains("3 matches"));
}
#[test]
fn fingerprint_stable_across_calls_and_pages() {
let full = page(RAW, "fn", CompactOpts::default());
let paged = page(
RAW,
"fn",
CompactOpts {
page_size: 1,
..Default::default()
},
);
assert_eq!(full.fingerprint, paged.fingerprint);
assert_ne!(full.fingerprint, 0);
}
#[test]
fn truncates_long_line_centered_on_match() {
let long = format!("src/x.rs:1:{}NEEDLE{}\n", "a".repeat(400), "b".repeat(400));
let p = page(
long.as_bytes(),
"NEEDLE",
CompactOpts {
max_cols: 60,
..Default::default()
},
);
let line = p.body.lines().find(|l| l.contains("NEEDLE")).unwrap();
assert!(line.contains('\u{2026}'), "expected ellipsis: {line}");
assert!(line.chars().count() < 100);
}
#[test]
fn truncates_long_multibyte_line_without_panicking() {
let long = format!(
"src/x.rs:1:{}café NEEDLE {}\n",
"é".repeat(300),
"ü".repeat(300)
);
let p = page(
long.as_bytes(),
"NEEDLE",
CompactOpts {
max_cols: 50,
..Default::default()
},
);
let line = p.body.lines().find(|l| l.contains("NEEDLE")).unwrap();
assert!(line.contains('\u{2026}'));
assert!(line.chars().count() < 90);
}
#[test]
fn empty_input_has_no_body() {
let p = page(b"", "fn", CompactOpts::default());
assert_eq!(p.total_matches, 0);
assert!(!p.has_more);
assert!(p.body.is_empty());
assert_eq!(p.header, "[no matches]");
}
#[test]
fn context_lines_attach_to_their_match_and_dont_count() {
let raw = b"f.rs-4-before a\n\
f.rs:5:MATCH a\n\
f.rs-6-after a\n\
--\n\
f.rs-9-before b\n\
f.rs:10:MATCH b\n\
f.rs-11-after b\n";
let p = page(
raw,
"MATCH",
CompactOpts {
page_size: 1,
..Default::default()
},
);
assert_eq!(p.total_matches, 2);
assert_eq!(p.total_files, 1);
assert!(p.has_more);
assert!(p.body.contains(" 5: MATCH a"));
assert!(p.body.contains(" 4- before a"));
assert!(p.body.contains(" 6- after a"));
assert!(!p.body.contains("MATCH b"));
assert!(!p.body.contains("before b"));
}
#[test]
fn context_line_with_colon_digits_in_text_is_not_misparsed() {
let raw = b"f.txt-2-log at 12:34:56 here\n\
f.txt:3:TARGET match\n\
f.txt-4-after line\n";
let p = page(raw, "TARGET", CompactOpts::default());
assert_eq!(p.total_matches, 1, "{}", p.body);
assert_eq!(p.total_files, 1, "{}", p.body);
assert!(p.body.contains("f.txt\n 2- log at 12:34:56 here"));
assert!(p.body.contains(" 3: TARGET match"));
assert!(!p.body.contains("12\n"));
}
#[test]
fn colon_separator_wins_over_hyphen_in_path() {
let p = page(
b"src/a-b-2.rs:42:let x-1 = y-2;\n",
"let",
CompactOpts::default(),
);
assert!(
p.body.contains("src/a-b-2.rs\n 42: let x-1 = y-2;"),
"{}",
p.body
);
assert_eq!(p.total_matches, 1);
}
#[test]
fn start_after_past_end_is_empty() {
let p = page(
RAW,
"fn",
CompactOpts {
start_after: Some(("zzz".to_string(), 0)),
..Default::default()
},
);
assert!(!p.has_more);
assert_eq!(p.first_index, 0);
assert_eq!(p.last_index, 0);
assert!(p.body.is_empty());
assert_eq!(p.last_key, None);
}
}