#![allow(dead_code)]
use std::{borrow::Cow, ops::Range};
use unicode_segmentation::{GraphemeCursor, UnicodeSegmentation};
fn clamp_to_char_boundary(s: &str, offset: usize) -> usize {
let mut clamped = offset.min(s.len());
while clamped > 0 && !s.is_char_boundary(clamped) {
clamped -= 1;
}
clamped
}
fn normalize_range(s: &str, range: Range<usize>) -> Range<usize> {
let start = clamp_to_char_boundary(s, range.start);
let end = clamp_to_char_boundary(s, range.end);
if start <= end { start..end } else { end..start }
}
pub trait EditableText: Sized {
fn edit(&mut self, range: Range<usize>, new: impl Into<Self>);
fn slice(&self, range: Range<usize>) -> Option<Cow<str>>;
fn len(&self) -> usize;
fn prev_word_offset(&self, offset: usize) -> Option<usize>;
fn next_word_offset(&self, offset: usize) -> Option<usize>;
fn prev_grapheme_offset(&self, offset: usize) -> Option<usize>;
fn next_grapheme_offset(&self, offset: usize) -> Option<usize>;
fn current_grapheme_offset(&self, offset: usize) -> usize;
fn prev_codepoint_offset(&self, offset: usize) -> Option<usize>;
fn next_codepoint_offset(&self, offset: usize) -> Option<usize>;
fn prev_codepoint(&self, offset: usize) -> Option<char>;
fn preceding_line_break(&self, offset: usize) -> usize;
fn next_line_break(&self, offset: usize) -> usize;
fn is_empty(&self) -> bool;
fn from_str(s: &str) -> Self;
}
impl EditableText for String {
fn edit(&mut self, range: Range<usize>, new: impl Into<Self>) {
let range = normalize_range(self, range);
self.replace_range(range, &new.into());
}
fn slice(&self, range: Range<usize>) -> Option<Cow<str>> {
self.get(normalize_range(self, range)).map(Cow::from)
}
fn len(&self) -> usize {
self.len()
}
fn prev_grapheme_offset(&self, from: usize) -> Option<usize> {
let from = clamp_to_char_boundary(self, from);
let mut c = GraphemeCursor::new(from, self.len(), true);
c.prev_boundary(self, 0).ok().flatten()
}
fn next_grapheme_offset(&self, from: usize) -> Option<usize> {
let from = clamp_to_char_boundary(self, from);
let mut c = GraphemeCursor::new(from, self.len(), true);
c.next_boundary(self, 0).ok().flatten()
}
fn current_grapheme_offset(&self, from: usize) -> usize {
let from = clamp_to_char_boundary(self, from);
if from == self.len() {
self.graphemes(true).count()
} else {
let mut current = self.graphemes(true).count();
let mut iter = self.grapheme_indices(true).peekable();
let mut count = 0;
while let Some((i, _)) = iter.next() {
let ni = if let Some(next) = iter.peek() { next.0 } else { self.len() };
if from >= i && from < ni {
current = count;
break;
}
count += 1;
}
current
}
}
fn prev_codepoint_offset(&self, current_pos: usize) -> Option<usize> {
let current_pos = clamp_to_char_boundary(self, current_pos);
if current_pos == 0 {
None
} else {
self.get(0..current_pos)?.char_indices().next_back().map(|(idx, _)| idx)
}
}
fn next_codepoint_offset(&self, current_pos: usize) -> Option<usize> {
let current_pos = clamp_to_char_boundary(self, current_pos);
if current_pos == self.len() {
None
} else {
self.get(current_pos..)?.chars().next().map(|ch| current_pos + ch.len_utf8())
}
}
fn prev_word_offset(&self, from: usize) -> Option<usize> {
let from = clamp_to_char_boundary(self, from);
let mut offset = from;
let mut passed_alphanumeric = false;
for prev_grapheme in self.get(0..from)?.graphemes(true).rev() {
let is_alphanumeric = prev_grapheme.chars().next()?.is_alphanumeric();
if is_alphanumeric {
passed_alphanumeric = true;
} else if passed_alphanumeric {
return Some(offset);
}
offset -= prev_grapheme.len();
}
None
}
fn next_word_offset(&self, from: usize) -> Option<usize> {
let from = clamp_to_char_boundary(self, from);
let mut offset = from;
let mut passed_alphanumeric = false;
for next_grapheme in self.get(from..)?.graphemes(true) {
let is_alphanumeric = next_grapheme.chars().next()?.is_alphanumeric();
if is_alphanumeric {
passed_alphanumeric = true;
} else if passed_alphanumeric {
return Some(offset);
}
offset += next_grapheme.len();
}
Some(self.len())
}
fn is_empty(&self) -> bool {
self.is_empty()
}
fn from_str(s: &str) -> Self {
s.to_string()
}
fn preceding_line_break(&self, from: usize) -> usize {
let from = clamp_to_char_boundary(self, from);
let mut offset = from;
for byte in self.get(0..from).unwrap_or("").bytes().rev() {
if byte == 0x0a {
return offset;
}
offset -= 1;
}
0
}
fn next_line_break(&self, from: usize) -> usize {
let from = clamp_to_char_boundary(self, from);
for (offset, byte) in self.get(from..).unwrap_or("").bytes().enumerate() {
if byte == 0x0a {
return from + offset;
}
}
self.len()
}
fn prev_codepoint(&self, offset: usize) -> Option<char> {
if let Some(prev) = self.prev_codepoint_offset(offset) {
self[prev..].chars().next()
} else {
None
}
}
}
pub fn len_utf8_from_first_byte(b: u8) -> usize {
match b {
b if b < 0x80 => 1,
b if b < 0xe0 => 2,
b if b < 0xf0 => 3,
_ => 4,
}
}