#![allow(clippy::cast_possible_truncation)]
#![allow(clippy::module_name_repetitions)]
use std::ops::Range;
use memchr::memchr_iter;
pub trait Processor: private::Sealed {
fn width(input: &str) -> usize;
fn grapheme_index_widths(input: &str) -> impl Iterator<Item = (usize, usize)>;
fn last_grapheme_width(input: &str) -> usize;
}
mod private {
pub trait Sealed {}
impl Sealed for super::UnicodeProcessor {}
impl Sealed for super::AsciiProcessor {}
}
#[inline]
pub(crate) fn is_unicode_safe(input: &str) -> bool {
!input.contains('\r') || !input.is_ascii()
}
#[inline]
pub(crate) fn is_ascii_safe(input: &str) -> bool {
input.is_ascii()
}
pub struct UnicodeProcessor;
impl Processor for UnicodeProcessor {
#[inline]
fn width(input: &str) -> usize {
debug_assert!(is_unicode_safe(input));
unicode_width::UnicodeWidthStr::width(input)
}
#[inline]
fn grapheme_index_widths(input: &str) -> impl Iterator<Item = (usize, usize)> {
debug_assert!(is_unicode_safe(input));
unicode_segmentation::UnicodeSegmentation::grapheme_indices(input, true)
.map(|(offset, grapheme)| (offset, unicode_width::UnicodeWidthStr::width(grapheme)))
}
#[inline]
fn last_grapheme_width(input: &str) -> usize {
debug_assert!(is_unicode_safe(input));
unicode_segmentation::UnicodeSegmentation::graphemes(input, true)
.next_back()
.map_or(0, unicode_width::UnicodeWidthStr::width)
}
}
pub struct AsciiProcessor;
impl Processor for AsciiProcessor {
#[inline]
fn width(input: &str) -> usize {
debug_assert!(is_ascii_safe(input));
input.len()
}
#[inline]
fn grapheme_index_widths(input: &str) -> impl Iterator<Item = (usize, usize)> {
debug_assert!(is_ascii_safe(input));
std::iter::repeat_n(1, input.len()).enumerate()
}
#[inline]
fn last_grapheme_width(input: &str) -> usize {
debug_assert!(is_ascii_safe(input));
1
}
}
#[derive(Debug, PartialEq)]
pub struct Span {
pub range: Range<usize>,
pub is_match: bool,
}
#[inline]
pub fn truncate<P: Processor>(input: &str, capacity: u16) -> Result<u16, (&str, usize)> {
if let Some(remaining) = (capacity as usize).checked_sub(P::width(input)) {
Ok(remaining as u16)
} else {
let mut current_length = 0;
for (offset, grapheme_width) in P::grapheme_index_widths(input) {
let next_length = current_length + grapheme_width;
if next_length > capacity as usize {
return Err((&input[..offset], capacity as usize - current_length));
}
current_length = next_length;
}
Ok(capacity - current_length as u16)
}
}
#[inline]
pub fn consume<P: Processor>(input: &str, offset: usize) -> (usize, usize) {
let mut initial_width: usize = 0;
for (idx, grapheme_width) in P::grapheme_index_widths(input) {
match initial_width.checked_sub(offset) {
Some(diff) => return (idx, diff),
None => initial_width += grapheme_width,
}
}
(input.len(), initial_width.saturating_sub(offset))
}
#[inline]
pub fn spans_from_indices<P: Processor>(
indices: &[u32],
rendered: &str,
spans: &mut Vec<Span>,
lines: &mut Vec<Range<usize>>,
) {
spans.clear();
lines.clear();
let mut grapheme_index_iter = P::grapheme_index_widths(rendered);
let mut iter_step_count = 0; let mut start = 0; let mut line_start = 0;
let mut line_end = 0;
for (left, right) in IndexSpans::new(indices) {
let (middle, _) = grapheme_index_iter
.nth(left - iter_step_count)
.expect("Match index does not correspond to grapheme!");
let end = match grapheme_index_iter.nth(right - left) {
Some((end, _)) => {
iter_step_count = right + 2;
end
}
_ => rendered.len(),
};
insert_unmatched_spans(
spans,
rendered,
start,
middle,
lines,
&mut line_start,
&mut line_end,
);
if middle != end {
line_end += 1;
spans.push(Span {
range: middle..end,
is_match: true,
});
}
start = end;
}
insert_unmatched_spans(
spans,
rendered,
start,
rendered.len(),
lines,
&mut line_start,
&mut line_end,
);
lines.push(line_start..line_end);
}
#[inline]
fn insert_unmatched_spans(
spans: &mut Vec<Span>,
rendered: &str,
start: usize,
middle: usize,
lines: &mut Vec<Range<usize>>,
line_start: &mut usize,
line_end: &mut usize,
) {
let mut span_start = start; let block = &rendered[start..middle];
for linebreak_offset in memchr_iter(b'\n', block.as_bytes()) {
let span_end = start + linebreak_offset;
let range = if block[..linebreak_offset].ends_with('\r') {
span_start..span_end - 1
} else {
span_start..span_end
};
if !range.is_empty() {
*line_end += 1;
spans.push(Span {
range,
is_match: false,
});
}
lines.push(*line_start..*line_end);
*line_start = *line_end;
span_start = span_end + 1;
}
if span_start != middle {
*line_end += 1;
spans.push(Span {
range: span_start..middle,
is_match: false,
});
}
}
struct IndexSpans<'a> {
indices: &'a [u32],
cursor: usize,
}
impl<'a> IndexSpans<'a> {
fn new(indices: &'a [u32]) -> Self {
Self { indices, cursor: 0 }
}
}
impl Iterator for IndexSpans<'_> {
type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> {
if self.cursor >= self.indices.len() {
return None;
}
let first = self.indices[self.cursor];
let mut last = first;
let (left, right) = loop {
self.cursor += 1;
match self.indices.get(self.cursor) {
Some(next) => {
if *next == last + 1 {
last += 1;
} else {
break (first, last);
}
}
None => {
break (first, last);
}
}
};
Some((left as _, right as _))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_consume_offset() {
fn assert_consume(input: &str, w: usize, expected: (usize, usize)) {
if is_unicode_safe(input) {
assert_eq!(consume::<UnicodeProcessor>(input, w), expected);
}
if is_ascii_safe(input) {
assert_eq!(consume::<AsciiProcessor>(input, w), expected);
}
}
assert_consume("ab", 3, (2, 0));
assert_consume("ab", 2, (2, 0));
assert_consume("ab", 1, (1, 0));
assert_consume("ab", 0, (0, 0));
assert_consume("", 0, (0, 0));
assert_consume("", 1, (0, 0));
assert_consume("H", 0, (0, 0));
assert_consume("H", 1, (3, 1));
assert_consume("H", 2, (3, 0));
assert_consume("aH", 0, (0, 0));
assert_consume("aH", 1, (1, 0));
assert_consume("aH", 2, (4, 1));
assert_consume("aH", 3, (4, 0));
}
#[test]
fn test_spanned() {
fn assert_matching_vecs<T: std::fmt::Debug + PartialEq>(a: &Vec<T>, b: &Vec<T>) {
for (u, v) in a.iter().zip(b.iter()) {
assert_eq!(u, v);
}
}
fn assert_matching(
indices: Vec<u32>,
input: &'static str,
expected_spans: Vec<Span>,
expected_lines: Vec<Range<usize>>,
) {
let mut spans = Vec::new();
let mut lines = Vec::new();
if is_unicode_safe(input) {
spans_from_indices::<UnicodeProcessor>(&indices, input, &mut spans, &mut lines);
assert_matching_vecs(&spans, &expected_spans);
assert_matching_vecs(&lines, &expected_lines);
}
if is_ascii_safe(input) {
spans_from_indices::<AsciiProcessor>(&indices, input, &mut spans, &mut lines);
assert_matching_vecs(&spans, &expected_spans);
assert_matching_vecs(&lines, &expected_lines);
}
}
assert_matching(
Vec::new(),
"a",
vec![Span {
range: 0..1,
is_match: false,
}],
vec![0..1],
);
assert_matching(
Vec::new(),
"\na",
vec![Span {
range: 1..2,
is_match: false,
}],
vec![0..0, 0..1],
);
assert_matching(
Vec::new(),
"\r\na",
vec![Span {
range: 2..3,
is_match: false,
}],
vec![0..0, 0..1],
);
assert_matching(
Vec::new(),
"a\n\r\nbc",
vec![
Span {
range: 0..1,
is_match: false,
},
Span {
range: 4..6,
is_match: false,
},
],
vec![0..1, 1..1, 1..2],
);
assert_matching(Vec::new(), "", vec![], vec![0..0]);
assert_matching(Vec::new(), "\n", vec![], vec![0..0, 0..0]);
assert_matching(Vec::new(), "\r\n", vec![], vec![0..0, 0..0]);
assert_matching(
vec![0, 2],
"a\nb",
vec![
Span {
range: 0..1,
is_match: true,
},
Span {
range: 2..3,
is_match: true,
},
],
vec![0..1, 1..2],
);
assert_matching(
vec![0, 2],
"abc",
vec![
Span {
range: 0..1,
is_match: true,
},
Span {
range: 1..2,
is_match: false,
},
Span {
range: 2..3,
is_match: true,
},
],
vec![0..3],
);
assert_matching(
vec![0, 2],
"a\r\nH",
vec![
Span {
range: 0..1,
is_match: true,
},
Span {
range: 3..6,
is_match: true,
},
],
vec![0..1, 1..2],
);
assert_matching(
vec![0, 2, 3],
"abcd\nb",
vec![
Span {
range: 0..1,
is_match: true,
},
Span {
range: 1..2,
is_match: false,
},
Span {
range: 2..4,
is_match: true,
},
Span {
range: 5..6,
is_match: false,
},
],
vec![0..3, 3..4],
);
}
#[test]
fn test_next_span() {
let indices: Vec<u32> = vec![1, 2, 4, 5, 6];
let mut is = IndexSpans::new(&indices);
assert_eq!(is.next(), Some((1, 2)));
assert_eq!(is.cursor, 2);
assert_eq!(is.next(), Some((4, 6)));
assert_eq!(is.cursor, 5);
assert_eq!(is.next(), None);
assert_eq!(is.cursor, 5);
let indices: Vec<u32> = vec![];
let mut is = IndexSpans::new(&indices);
assert_eq!(is.next(), None);
assert_eq!(is.cursor, 0);
let indices: Vec<u32> = vec![2];
let mut is = IndexSpans::new(&indices);
assert_eq!(is.next(), Some((2, 2)));
assert_eq!(is.cursor, 1);
assert_eq!(is.next(), None);
assert_eq!(is.cursor, 1);
let indices: Vec<u32> = vec![10, 11, 12, 13];
let mut is = IndexSpans::new(&indices);
assert_eq!(is.next(), Some((10, 13)));
assert_eq!(is.cursor, 4);
assert_eq!(is.next(), None);
assert_eq!(is.cursor, 4);
}
#[test]
fn test_truncate_width() {
fn assert_truncate(input: &str, w: u16, expected: Result<u16, (&str, usize)>) {
if is_unicode_safe(input) {
assert_eq!(truncate::<UnicodeProcessor>(input, w), expected);
}
if is_ascii_safe(input) {
assert_eq!(truncate::<AsciiProcessor>(input, w), expected);
}
}
assert_truncate("", 0, Ok(0));
assert_truncate("ab", 0, Err(("", 0)));
assert_truncate("ab", 1, Err(("a", 0)));
assert_truncate("ab", 2, Ok(0));
assert_truncate("He", 0, Err(("", 0)));
assert_truncate("He", 1, Err(("", 1)));
assert_truncate("He", 2, Err(("H", 0)));
assert_truncate("He", 3, Err(("H", 1)));
assert_truncate("He", 4, Ok(0));
assert_truncate("He", 5, Ok(1));
assert_truncate("aH", 1, Err(("a", 0)));
assert_truncate("aH", 2, Err(("a", 1)));
assert_truncate("aH", 3, Ok(0));
assert_truncate("aH", 4, Ok(1));
}
}