use super::Filter;
use alloc::string::String;
use alloc::vec::Vec;
use core::fmt::{Debug, Formatter};
use core::mem::ManuallyDrop;
use core::ops::Range;
use core::ops::{Deref, DerefMut};
pub(crate) struct TransliteratorBuffer(Vec<u8>);
impl TransliteratorBuffer {
pub(crate) fn from_string(s: String) -> Self {
Self(s.into_bytes())
}
#[allow(clippy::expect_used)] pub(crate) fn into_string(self) -> String {
String::from_utf8(self.0)
.expect("TransliteratorBuffer must contain valid UTF-8 after transliteration")
}
}
struct Hide<'a> {
raw: &'a mut Vec<u8>,
hide_pre_len: usize,
hide_post_len: usize,
}
impl<'a> Hide<'a> {
fn new(raw: &'a mut Vec<u8>) -> Self {
Self {
raw,
hide_pre_len: 0,
hide_post_len: 0,
}
}
fn splice(&mut self, range: Range<usize>, replace_with: impl IntoIterator<Item = u8>) {
let adjusted_range = range.start + self.hide_pre_len..range.end + self.hide_pre_len;
self.raw.splice(adjusted_range, replace_with);
}
fn child(&mut self) -> Hide<'_> {
Hide {
raw: self.raw,
hide_pre_len: self.hide_pre_len,
hide_post_len: self.hide_post_len,
}
}
fn tighten(&mut self, visible_range: Range<usize>) -> Hide<'_> {
debug_assert!(visible_range.start <= self.len());
debug_assert!(visible_range.end <= self.len());
let hide_pre_len = self.hide_pre_len + visible_range.start;
let hide_post_len = self.hide_post_len + (self.len() - visible_range.end);
Hide {
raw: self.raw,
hide_pre_len,
hide_post_len,
}
}
fn hidden_prefix(&self) -> &[u8] {
&self.raw[..self.hide_pre_len]
}
fn hidden_suffix(&self) -> &[u8] {
&self.raw[self.raw.len() - self.hide_post_len..]
}
}
impl Deref for Hide<'_> {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.raw[self.hide_pre_len..self.raw.len() - self.hide_post_len]
}
}
impl DerefMut for Hide<'_> {
fn deref_mut(&mut self) -> &mut Self::Target {
let len = self.raw.len();
&mut self.raw[self.hide_pre_len..len - self.hide_post_len]
}
}
pub(crate) struct Replaceable<'a> {
content: Hide<'a>,
freeze_pre_len: usize,
freeze_post_len: usize,
cursor: usize,
}
impl<'a> Replaceable<'a> {
pub(crate) fn new(buf: &'a mut TransliteratorBuffer) -> Self {
unsafe { Replaceable::from_hide(Hide::new(&mut buf.0)) }
}
unsafe fn from_hide(content: Hide<'a>) -> Self {
debug_assert!(core::str::from_utf8(&content).is_ok());
Self {
content,
freeze_pre_len: 0,
freeze_post_len: 0,
cursor: 0,
}
}
pub(crate) fn replace_modifiable_with_str(&mut self, s: &str) {
self.content.splice(self.allowed_range(), s.bytes());
}
pub(crate) fn as_str(&self) -> &str {
debug_assert!(core::str::from_utf8(&self.content).is_ok());
unsafe { core::str::from_utf8_unchecked(&self.content) }
}
pub(crate) fn as_str_modifiable(&self) -> &str {
&self.as_str()[self.allowed_range()]
}
pub(crate) fn allowed_range(&self) -> Range<usize> {
self.freeze_pre_len..self.allowed_upper_bound()
}
pub(crate) fn cursor(&self) -> usize {
self.cursor
}
pub(crate) fn step_cursor(&mut self) {
let step_len = self.as_str()[self.cursor..]
.chars()
.next()
.map(char::len_utf8)
.unwrap_or(0);
self.cursor += step_len;
}
unsafe fn set_cursor(&mut self, cursor: usize) {
debug_assert!(cursor <= self.allowed_upper_bound());
debug_assert!(cursor >= self.freeze_pre_len);
self.cursor = cursor;
}
pub(crate) fn is_finished(&self) -> bool {
debug_assert!(self.cursor <= self.allowed_upper_bound());
self.cursor >= self.allowed_upper_bound()
}
pub(crate) fn child(&mut self) -> Replaceable<'_> {
Replaceable {
content: self.content.child(),
freeze_pre_len: self.freeze_pre_len,
freeze_post_len: self.freeze_post_len,
cursor: self.cursor,
}
}
pub(crate) fn for_each_run<F>(&mut self, filter: &Filter, mut f: F)
where
F: FnMut(&mut Replaceable),
{
let mut start = self.freeze_pre_len;
while let Some(mut run) = unsafe { self.next_filtered_run(start, filter) } {
f(&mut run);
start = run.allowed_upper_bound();
}
}
pub(super) fn start_match(&mut self) -> RepMatcher<'a, '_, false> {
let cursor = self.cursor;
RepMatcher {
rep: self,
key_match_len: 0,
forward_cursor: cursor,
ante_match_len: 0,
post_match_len: 0,
}
}
unsafe fn next_filtered_run(
&mut self,
start: usize,
filter: &Filter,
) -> Option<Replaceable<'_>> {
if start == self.allowed_upper_bound() {
return None;
}
debug_assert!(
start < self.allowed_upper_bound(),
"start `{start}` must be within the content length `{}`",
self.allowed_upper_bound()
);
debug_assert!(self.as_str().is_char_boundary(start));
let run_start;
let run_end;
if filter == &Filter::all() {
run_start = start;
run_end = self.allowed_upper_bound();
} else {
run_start = self.find_first_char_in_modifiable_range(start, |c| filter.contains(c))?;
run_end = self
.find_first_char_in_modifiable_range(run_start, |c| !filter.contains(c))
.unwrap_or_else(|| self.allowed_upper_bound());
}
let freeze_post_len = self.content.len() - run_end;
Some(Replaceable {
content: self.content.child(),
freeze_pre_len: run_start,
freeze_post_len,
cursor: run_start,
})
}
fn find_first_char_in_modifiable_range<F>(&self, start: usize, f: F) -> Option<usize>
where
F: Fn(char) -> bool,
{
let tail = &self.as_str()[start..self.allowed_upper_bound()];
let (idx, _) = tail.char_indices().find(|&(_, c)| f(c))?;
Some(start + idx)
}
pub(crate) fn allowed_upper_bound(&self) -> usize {
self.content.len() - self.freeze_post_len
}
}
impl Debug for Replaceable<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
write!(f, "{:?}", self.content.hidden_prefix())?;
write!(f, "[[[")?;
write!(f, "{}", &self.as_str()[..self.freeze_pre_len])?;
write!(f, "{{{{{{")?;
write!(f, "{}", &self.as_str()[self.freeze_pre_len..self.cursor()])?;
write!(f, "|||")?;
write!(
f,
"{}",
&self.as_str()[self.cursor()..self.allowed_upper_bound()]
)?;
write!(f, "}}}}}}")?;
write!(f, "{}", &self.as_str()[self.allowed_upper_bound()..])?;
write!(f, "]]]")?;
write!(f, "{:?}", self.content.hidden_suffix())?;
Ok(())
}
}
#[derive(Debug)]
pub(super) struct RepMatcher<'a, 'b, const KEY_FINISHED: bool> {
rep: &'b mut Replaceable<'a>,
key_match_len: usize, ante_match_len: usize, post_match_len: usize, forward_cursor: usize, }
impl<'a, 'b> RepMatcher<'a, 'b, true> {
pub(super) fn finish_match(self) -> Insertable<'a, 'b> {
Insertable::from_matcher(self)
}
fn match_lens(&self) -> MatchLengths {
MatchLengths {
key: self.key_match_len,
ante: self.ante_match_len,
post: self.post_match_len,
}
}
}
impl<'a, 'b> RepMatcher<'a, 'b, false> {
pub(super) fn finish_match(self) -> Insertable<'a, 'b> {
Insertable::from_matcher(self.finish_key())
}
pub(super) fn finish_key(self) -> RepMatcher<'a, 'b, true> {
RepMatcher {
rep: self.rep,
key_match_len: self.key_match_len,
ante_match_len: self.ante_match_len,
post_match_len: self.post_match_len,
forward_cursor: self.forward_cursor,
}
}
}
impl<const KEY_FINISHED: bool> RepMatcher<'_, '_, KEY_FINISHED> {
fn remaining(&self) -> usize {
if KEY_FINISHED {
self.rep.content.len() - self.forward_cursor
} else {
self.rep.allowed_upper_bound() - self.forward_cursor
}
}
fn remaining_forward_slice(&self) -> &str {
if KEY_FINISHED {
&self.rep.as_str()[self.forward_cursor..]
} else {
&self.rep.as_str()[self.forward_cursor..self.rep.allowed_upper_bound()]
}
}
#[inline]
fn ante_cursor(&self) -> usize {
self.rep.cursor - self.ante_match_len
}
fn remaining_ante_slice(&self) -> &str {
&self.rep.as_str()[..self.ante_cursor()]
}
}
impl<const KEY_FINISHED: bool> Utf8Matcher<Forward> for RepMatcher<'_, '_, KEY_FINISHED> {
fn cursor(&self) -> usize {
self.forward_cursor
}
fn str_range(&self, range: Range<usize>) -> Option<&str> {
self.rep.as_str().get(range)
}
fn is_empty(&self) -> bool {
self.remaining() == 0
}
fn match_str(&self, s: &str) -> bool {
self.remaining_forward_slice().starts_with(s)
}
fn match_start_anchor(&self) -> bool {
self.forward_cursor == 0
}
fn match_end_anchor(&self) -> bool {
self.forward_cursor == self.rep.content.len()
}
fn consume(&mut self, len: usize) -> bool {
if len <= self.remaining() {
assert!(self.remaining_forward_slice().is_char_boundary(len));
if KEY_FINISHED {
self.post_match_len += len;
} else {
self.key_match_len += len;
}
self.forward_cursor += len;
true
} else {
false
}
}
fn next_char(&self) -> Option<char> {
self.remaining_forward_slice().chars().next()
}
}
impl<const KEY_FINISHED: bool> Utf8Matcher<Reverse> for RepMatcher<'_, '_, KEY_FINISHED> {
fn cursor(&self) -> usize {
self.ante_cursor()
}
fn str_range(&self, range: Range<usize>) -> Option<&str> {
self.rep.as_str().get(range)
}
fn is_empty(&self) -> bool {
self.ante_cursor() == 0
}
fn match_str(&self, s: &str) -> bool {
self.remaining_ante_slice().ends_with(s)
}
fn match_start_anchor(&self) -> bool {
self.ante_cursor() == 0
}
fn match_end_anchor(&self) -> bool {
self.ante_cursor() == self.rep.content.len()
}
fn consume(&mut self, len: usize) -> bool {
if len <= self.ante_cursor() {
assert!(self
.remaining_ante_slice()
.is_char_boundary(self.ante_cursor() - len));
self.ante_match_len += len;
true
} else {
false
}
}
fn next_char(&self) -> Option<char> {
self.remaining_ante_slice().chars().next_back()
}
}
mod sealed {
pub(crate) trait Sealed {}
impl Sealed for super::Forward {}
impl Sealed for super::Reverse {}
}
pub(super) struct Forward;
pub(super) struct Reverse;
pub(super) trait MatchDirection: sealed::Sealed {}
impl MatchDirection for Forward {}
impl MatchDirection for Reverse {}
pub(super) trait Utf8Matcher<D: MatchDirection>: Debug {
fn cursor(&self) -> usize;
fn str_range(&self, range: Range<usize>) -> Option<&str>;
fn is_empty(&self) -> bool;
fn match_str(&self, s: &str) -> bool;
fn match_and_consume_str(&mut self, s: &str) -> bool {
if self.match_str(s) {
self.consume(s.len())
} else {
false
}
}
fn match_and_consume_char(&mut self, c: char) -> bool {
self.match_and_consume_str(c.encode_utf8(&mut [0; 4]))
}
fn match_start_anchor(&self) -> bool;
fn match_end_anchor(&self) -> bool;
fn consume(&mut self, len: usize) -> bool;
fn next_char(&self) -> Option<char>;
}
#[derive(Debug, Clone, Copy)]
struct MatchLengths {
ante: usize,
key: usize,
post: usize,
}
pub(crate) struct Insertable<'a, 'b> {
_rep: &'b mut Replaceable<'a>,
start: usize,
end_len: usize,
curr: usize,
match_lens: MatchLengths,
cursor_offset: CursorOffset,
}
impl<'a, 'b> Insertable<'a, 'b> {
fn from_matcher(matcher: RepMatcher<'a, 'b, true>) -> Insertable<'a, 'b> {
let match_lens = matcher.match_lens();
let start_idx = matcher.rep.cursor;
let end_idx = start_idx + match_lens.key;
let end_len = matcher.rep.content.len() - end_idx;
Insertable {
_rep: matcher.rep,
start: start_idx,
end_len,
curr: start_idx,
match_lens,
cursor_offset: CursorOffset::Default,
}
}
pub(crate) fn apply_size_hint(&mut self, size: usize) {
let free_bytes = self.free_range().len();
if free_bytes < size {
self._rep.content.splice(
self.end()..self.end(),
core::iter::repeat_n(0, size - free_bytes),
);
}
}
pub(crate) fn push(&mut self, c: char) {
let mut buf = [0; 4];
let c_utf8 = c.encode_utf8(&mut buf);
self.push_str(c_utf8);
debug_assert!(self.curr <= self.end());
}
pub(crate) fn push_str(&mut self, s: &str) {
unsafe { self.push_utf8(s.as_bytes()) };
debug_assert!(self.curr <= self.end());
}
unsafe fn push_utf8(&mut self, code_units: &[u8]) {
if self.free_range().len() >= code_units.len() {
self._rep.content[self.curr..self.curr + code_units.len()].copy_from_slice(code_units);
self.curr += code_units.len();
return;
}
self._rep
.content
.splice(self.free_range(), code_units.iter().copied());
self.curr = self.end();
}
pub(crate) fn curr_replacement_len(&self) -> usize {
self.curr - self.start
}
pub(crate) fn curr_replacement(&self) -> &str {
unsafe { core::str::from_utf8_unchecked(&self._rep.content[self.start..self.curr]) }
}
pub(super) fn set_offset_to_here(&mut self) {
self.cursor_offset = CursorOffset::Byte(self.curr_replacement_len());
}
pub(super) fn set_offset_to_chars_off_end(&mut self, count: u16) {
self.cursor_offset = CursorOffset::CharsOffEnd(count);
}
pub(super) fn set_offset_to_chars_off_start(&mut self, count: u16) {
self.cursor_offset = CursorOffset::CharsOffStart(count);
}
fn cleanup(&mut self) {
self.make_contiguous();
let rep = &self._rep;
let base_cursor = self.start;
let replacement_len = self.curr_replacement_len();
let cursor = match self.cursor_offset {
CursorOffset::Default => {
base_cursor + replacement_len
}
CursorOffset::Byte(offset) => {
base_cursor + offset
}
CursorOffset::CharsOffEnd(count) => {
let post_start = base_cursor + replacement_len;
let post_end = post_start + self.match_lens.post;
let matched_post = &rep.as_str()[post_start..post_end];
let post_offset_len = matched_post
.chars()
.take(count as usize)
.map(char::len_utf8)
.sum::<usize>();
let computed_cursor = base_cursor + replacement_len + post_offset_len;
let max_cursor = rep.allowed_range().end;
max_cursor.min(computed_cursor)
}
CursorOffset::CharsOffStart(count) => {
let ante = &rep.as_str()[..base_cursor];
let matched_ante = &ante[(ante.len() - self.match_lens.ante)..];
let ante_len = matched_ante
.chars()
.rev()
.take(count as usize)
.map(char::len_utf8)
.sum::<usize>();
let computed_cursor = base_cursor - ante_len;
let min_cursor = rep.allowed_range().start;
min_cursor.max(computed_cursor)
}
};
unsafe { self._rep.set_cursor(cursor) };
}
fn make_contiguous(&mut self) {
self._rep
.content
.splice(self.free_range(), core::iter::empty());
}
fn free_range(&self) -> Range<usize> {
debug_assert!(self.curr <= self.end());
self.curr..self.end()
}
fn end(&self) -> usize {
self._rep.content.len() - self.end_len
}
pub(super) fn start_replaceable_adapter(
&mut self,
) -> InsertableToReplaceableAdapter<'a, '_, impl FnMut(usize) + '_> {
let range_start = self.curr;
let child_insertable = Insertable {
_rep: self._rep,
start: self.curr,
end_len: self.end_len,
curr: self.curr,
match_lens: self.match_lens,
cursor_offset: CursorOffset::Default,
};
let on_drop = |new_curr: usize| {
self.curr = new_curr;
};
InsertableToReplaceableAdapter {
child: ManuallyDrop::new(child_insertable),
range_start,
on_drop,
}
}
}
impl Drop for Insertable<'_, '_> {
fn drop(&mut self) {
self.cleanup();
}
}
impl Debug for Insertable<'_, '_> {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
write!(f, "{}|{}", self.curr_replacement(), self.free_range().len())
}
}
pub(super) struct InsertableToReplaceableAdapter<'a, 'b, F>
where
F: FnMut(usize),
{
child: ManuallyDrop<Insertable<'a, 'b>>,
range_start: usize,
on_drop: F,
}
impl<F> InsertableToReplaceableAdapter<'_, '_, F>
where
F: FnMut(usize),
{
pub(super) fn as_replaceable(&mut self) -> InsertableGuard<'_, impl FnMut(&[u8]) + '_> {
self.child.make_contiguous();
let range_end = self.child.curr;
let visible_range = self.range_start..range_end;
let child = self.child.deref_mut();
let hidden_len = child._rep.content.len() - visible_range.len();
let content = &mut child._rep.content;
let modifiable_content = content.tighten(visible_range);
let rep = unsafe { Replaceable::from_hide(modifiable_content) };
let child_curr = &mut child.curr;
let child_end_len = &child.end_len;
let on_drop = move |new_content: &[u8]| {
*child_curr = new_content.len() + hidden_len - child_end_len;
};
InsertableGuard::new(rep, on_drop)
}
}
impl<F> Drop for InsertableToReplaceableAdapter<'_, '_, F>
where
F: FnMut(usize),
{
fn drop(&mut self) {
(self.on_drop)(self.child.curr);
}
}
impl<'a, 'b, F> Deref for InsertableToReplaceableAdapter<'a, 'b, F>
where
F: FnMut(usize),
{
type Target = Insertable<'a, 'b>;
fn deref(&self) -> &Self::Target {
self.child.deref()
}
}
impl<F> DerefMut for InsertableToReplaceableAdapter<'_, '_, F>
where
F: FnMut(usize),
{
fn deref_mut(&mut self) -> &mut Self::Target {
self.child.deref_mut()
}
}
pub(super) struct InsertableGuard<'a, F>
where
F: FnMut(&[u8]),
{
rep: Replaceable<'a>,
on_drop: F,
}
impl<'a, F> InsertableGuard<'a, F>
where
F: FnMut(&[u8]),
{
fn new(rep: Replaceable<'a>, on_drop: F) -> Self {
Self { rep, on_drop }
}
pub(crate) fn child(&mut self) -> Replaceable<'_> {
self.rep.child()
}
}
impl<F> Drop for InsertableGuard<'_, F>
where
F: FnMut(&[u8]),
{
fn drop(&mut self) {
(self.on_drop)(&self.rep.content);
}
}
#[derive(Debug, Clone, Copy, Default)]
enum CursorOffset {
#[default]
Default,
Byte(usize),
CharsOffEnd(u16),
CharsOffStart(u16),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
#[should_panic(expected = "valid UTF-8")]
fn test_into_string_rejects_invalid_utf8() {
let buffer = TransliteratorBuffer(vec![0xFF, 0xFE, 0xFD]);
let _ = buffer.into_string();
}
}