use crate::types::*;
use keyhog_core::Chunk;
use std::borrow::Cow;
pub fn local_context_window(text: &str, line: usize, radius: usize) -> &str {
let bytes = text.as_bytes();
let lines_before = line.saturating_sub(radius).saturating_sub(1);
let mut start = 0usize;
for _ in 0..lines_before {
match memchr::memchr(b'\n', &bytes[start..]) {
Some(pos) => start = start + pos + 1,
None => return "",
}
}
let window_lines = radius.saturating_mul(2).saturating_add(1);
let mut end = start;
for n in 0..window_lines {
match memchr::memchr(b'\n', &bytes[end..]) {
Some(pos) => {
end = if n + 1 == window_lines {
end + pos
} else {
end + pos + 1
};
if n + 1 == window_lines {
break;
}
}
None => {
end = bytes.len();
break;
}
}
}
&text[start..end]
}
pub fn compute_line_offsets(text: &str) -> Vec<usize> {
let bytes = text.as_bytes();
let estimated_lines = bytes.len() / 40 + 1;
let mut offsets = Vec::with_capacity(estimated_lines);
offsets.push(0);
let mut start = 0;
while let Some(pos) = memchr::memchr(b'\n', &bytes[start..]) {
offsets.push(start + pos + 1);
start += pos + 1;
}
offsets
}
pub fn match_line_number(
preprocessed: &ScannerPreprocessedText<'_>,
line_offsets: &[usize],
offset: usize,
) -> usize {
preprocessed.line_for_offset(offset).unwrap_or_else(|| {
line_offsets.partition_point(|&lo| lo <= offset)
})
}
pub fn normalize_scannable_chunk<'a>(chunk: &'a Chunk, owned: &'a mut Option<Chunk>) -> &'a Chunk {
let normalized = crate::normalize_chunk_data(&chunk.data);
if let Cow::Owned(data) = normalized {
*owned = Some(Chunk {
data: data.into(),
metadata: chunk.metadata.clone(),
});
owned.as_ref().unwrap_or(chunk)
} else {
chunk
}
}
pub fn find_companion(
preprocessed: &ScannerPreprocessedText<'_>,
primary_line: usize,
companion: &CompiledCompanion,
) -> Option<String> {
let start = primary_line
.saturating_sub(companion.within_lines)
.max(FIRST_LINE_NUMBER);
let end = primary_line.saturating_add(companion.within_lines);
let (window_start, window_end) = line_window_offsets(preprocessed, start, end)?;
let haystack = preprocessed.text.get(window_start..window_end)?;
let group = companion.capture_group.unwrap_or(FIRST_CAPTURE_GROUP_INDEX);
let line_range = start..=end;
if companion.capture_group.is_none() {
for m in companion.regex.find_iter(haystack) {
if m.len() > 4096 {
continue;
}
if let Some(line) = preprocessed.line_for_offset(window_start + m.start()) {
if line_range.contains(&line) {
return Some(m.as_str().to_string());
}
}
}
return None;
}
let mut locs = companion.regex.capture_locations();
let mut cursor = 0usize;
let bytes_total = haystack.len();
while cursor <= bytes_total {
let Some(whole) = companion
.regex
.captures_read_at(&mut locs, haystack, cursor)
else {
break;
};
let mut next = if whole.end() == cursor {
cursor + 1
} else {
whole.end()
};
while next < bytes_total && !haystack.is_char_boundary(next) {
next += 1;
}
let prev_cursor = cursor;
cursor = next;
if let Some((s, e)) = locs.get(group) {
if e.saturating_sub(s) <= 4096 {
if let Some(line) = preprocessed.line_for_offset(window_start + s) {
if line_range.contains(&line) {
return Some(haystack[s..e].to_string());
}
}
}
}
let _ = prev_cursor; }
None
}
pub fn line_window_offsets(
preprocessed: &ScannerPreprocessedText<'_>,
start_line: usize,
end_line: usize,
) -> Option<(usize, usize)> {
let mappings = &preprocessed.mappings;
let prefix_len = monotonic_prefix_len(preprocessed);
let prefix = &mappings[..prefix_len];
let prefix_start_idx = prefix.partition_point(|m| m.line_number < start_line);
let mut start_offset = prefix.get(prefix_start_idx).map(|m| m.start_offset);
let prefix_end_idx = prefix.partition_point(|m| m.line_number <= end_line);
let mut end_offset = (prefix_end_idx > 0).then(|| prefix[prefix_end_idx - 1].end_offset);
for mapping in &mappings[prefix_len..] {
if start_offset.is_none() && mapping.line_number >= start_line {
start_offset = Some(mapping.start_offset);
}
if mapping.line_number <= end_line {
end_offset = Some(mapping.end_offset);
}
}
Some((start_offset?, end_offset?))
}
#[cfg(feature = "multiline")]
fn monotonic_prefix_len(preprocessed: &ScannerPreprocessedText<'_>) -> usize {
preprocessed
.mappings
.partition_point(|m| m.start_offset < preprocessed.original_end)
}
#[cfg(not(feature = "multiline"))]
fn monotonic_prefix_len(preprocessed: &ScannerPreprocessedText<'_>) -> usize {
preprocessed.mappings.len()
}