use super::TextBuffer;
use crate::buffer::Pos;
impl TextBuffer {
pub fn find_char_after_on_line(&self, pos: Pos, needle: char) -> Option<Pos> {
let pos = self.clamp_pos(pos);
let line = self.clamp_line(pos.line);
let line_text = self.line_slice(line);
for (col, ch) in line_text
.chars()
.enumerate()
.skip(pos.col.saturating_add(1))
{
if ch == needle {
return Some(Pos::new(line, col));
}
}
None
}
pub fn find_matches(&self, needle: &str) -> Vec<(Pos, Pos)> {
if needle.is_empty() {
return Vec::new();
}
let needle_chars = needle.chars().count();
let overlap_char_limit = needle_chars.saturating_sub(1);
let mut matches = Vec::new();
let mut overlap = String::new();
let mut processed_chars = 0usize;
let mut last_emitted_end = 0usize;
for chunk in self.rope().chunks() {
let chunk_chars = chunk.chars().count();
if overlap.is_empty() {
collect_segment_matches(
self,
chunk,
processed_chars,
processed_chars,
needle,
needle_chars,
&mut matches,
&mut last_emitted_end,
);
if overlap_char_limit > 0 {
overlap = trailing_chars(chunk, overlap_char_limit);
}
} else {
let overlap_chars = overlap.chars().count();
let segment_start_char = processed_chars.saturating_sub(overlap_chars);
let mut segment = String::with_capacity(overlap.len().saturating_add(chunk.len()));
segment.push_str(&overlap);
segment.push_str(chunk);
collect_segment_matches(
self,
&segment,
segment_start_char,
processed_chars,
needle,
needle_chars,
&mut matches,
&mut last_emitted_end,
);
overlap = trailing_chars(&segment, overlap_char_limit);
}
processed_chars = processed_chars.saturating_add(chunk_chars);
}
matches
}
}
fn collect_segment_matches(
buffer: &TextBuffer,
segment: &str,
segment_start_char: usize,
emit_after_char: usize,
needle: &str,
needle_chars: usize,
out: &mut Vec<(Pos, Pos)>,
last_emitted_end: &mut usize,
) {
let segment_scan_start_char = last_emitted_end.saturating_sub(segment_start_char);
let segment_scan_start_byte = byte_idx_for_char(segment, segment_scan_start_char);
let mut scan_start_byte = segment_scan_start_byte;
let mut scan_start_chars = segment_scan_start_char;
for (match_start_byte_rel, _) in segment[segment_scan_start_byte..].match_indices(needle) {
let match_start_byte = segment_scan_start_byte.saturating_add(match_start_byte_rel);
scan_start_chars = scan_start_chars
.saturating_add(segment[scan_start_byte..match_start_byte].chars().count());
let start_char = segment_start_char.saturating_add(scan_start_chars);
let end_char = start_char.saturating_add(needle_chars);
if start_char >= *last_emitted_end && end_char > emit_after_char {
out.push((buffer.char_to_pos(start_char), buffer.char_to_pos(end_char)));
*last_emitted_end = end_char;
}
scan_start_byte = match_start_byte.saturating_add(needle.len());
scan_start_chars = scan_start_chars.saturating_add(needle_chars);
}
}
fn byte_idx_for_char(text: &str, char_idx: usize) -> usize {
if char_idx == 0 {
return 0;
}
text.char_indices()
.nth(char_idx)
.map(|(byte_idx, _)| byte_idx)
.unwrap_or(text.len())
}
fn trailing_chars(text: &str, char_limit: usize) -> String {
if char_limit == 0 || text.is_empty() {
return String::new();
}
let total_chars = text.chars().count();
if total_chars <= char_limit {
return text.to_string();
}
let skip_chars = total_chars.saturating_sub(char_limit);
let start_byte = text
.char_indices()
.nth(skip_chars)
.map(|(byte_idx, _)| byte_idx)
.unwrap_or(text.len());
text[start_byte..].to_string()
}
#[cfg(test)]
mod tests {
use super::*;
fn naive_find_matches(buffer: &TextBuffer, needle: &str) -> Vec<(Pos, Pos)> {
let source = buffer.to_string();
source
.match_indices(needle)
.map(|(start_byte, matched)| {
let start_char = buffer.rope().byte_to_char(start_byte);
let end_char = buffer
.rope()
.byte_to_char(start_byte.saturating_add(matched.len()));
(buffer.char_to_pos(start_char), buffer.char_to_pos(end_char))
})
.collect()
}
#[test]
fn find_matches_matches_naive_search_across_rope_chunks() {
let mut text = String::new();
for i in 0..40_000usize {
text.push_str(&format!("{i:05x}|"));
}
let buffer = TextBuffer::from_str(&text);
let mut chunks = buffer.rope().chunks();
let first_chunk = chunks.next().expect("expected at least one chunk");
let second_chunk = chunks.next().expect("expected multiple rope chunks");
let boundary_needle = format!(
"{}{}",
trailing_chars(first_chunk, 6),
second_chunk.chars().take(6).collect::<String>()
);
assert_eq!(
buffer.find_matches(&boundary_needle),
naive_find_matches(&buffer, &boundary_needle)
);
}
#[test]
fn find_matches_matches_naive_search_for_unicode_across_rope_chunks() {
let mut text = String::new();
for i in 0..20_000usize {
text.push_str(&format!("α{i:05x}🙂β"));
}
let buffer = TextBuffer::from_str(&text);
let mut chunks = buffer.rope().chunks();
let first_chunk = chunks.next().expect("expected at least one chunk");
let second_chunk = chunks.next().expect("expected multiple rope chunks");
let boundary_needle = format!(
"{}{}",
trailing_chars(first_chunk, 4),
second_chunk.chars().take(4).collect::<String>()
);
assert_eq!(
buffer.find_matches(&boundary_needle),
naive_find_matches(&buffer, &boundary_needle)
);
}
#[test]
fn find_matches_preserves_non_overlapping_semantics_across_chunk_boundaries() {
let text = "a".repeat(120_000);
let buffer = TextBuffer::from_str(&text);
assert!(
buffer.rope().chunks().count() > 1,
"expected multiple rope chunks"
);
assert_eq!(
buffer.find_matches("aaa"),
naive_find_matches(&buffer, "aaa")
);
}
}