use std::fmt;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TextViewError {
InvalidRange { start: usize, end: usize },
OffsetOutOfBounds { offset: usize, len: usize },
InvalidUtf8Boundary { offset: usize },
LineOutOfBounds { line: u32, line_count: u32 },
Utf16OffsetOutOfBounds { offset: usize, total: usize },
}
impl fmt::Display for TextViewError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::InvalidRange { start, end } => {
write!(f, "invalid range: start {start} > end {end}")
}
Self::OffsetOutOfBounds { offset, len } => {
write!(f, "offset {offset} out of bounds (len {len})")
}
Self::InvalidUtf8Boundary { offset } => {
write!(f, "offset {offset} is not on a UTF-8 char boundary")
}
Self::LineOutOfBounds { line, line_count } => {
write!(f, "line {line} out of bounds (line_count {line_count})")
}
Self::Utf16OffsetOutOfBounds { offset, total } => {
write!(f, "UTF-16 offset {offset} out of bounds (total {total})")
}
}
}
}
impl std::error::Error for TextViewError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Position {
line: u32,
column: u32,
}
impl Position {
pub const fn new(line: u32, column: u32) -> Self {
Self { line, column }
}
pub const fn line(&self) -> u32 {
self.line
}
pub const fn column(&self) -> u32 {
self.column
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct TextRange {
start: usize,
end: usize,
}
impl TextRange {
pub fn new(start: usize, end: usize) -> Result<Self, TextViewError> {
if start > end {
return Err(TextViewError::InvalidRange { start, end });
}
Ok(Self { start, end })
}
pub fn empty(offset: usize) -> Self {
Self {
start: offset,
end: offset,
}
}
pub fn start(&self) -> usize {
self.start
}
pub fn end(&self) -> usize {
self.end
}
pub fn len(&self) -> usize {
self.end - self.start
}
pub fn is_empty(&self) -> bool {
self.start == self.end
}
pub fn contains(&self, offset: usize) -> bool {
self.start <= offset && offset < self.end
}
pub fn intersects(&self, other: &TextRange) -> bool {
self.start < other.end && other.start < self.end
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Selection {
anchor: usize,
head: usize,
}
impl Selection {
pub const fn new(anchor: usize, head: usize) -> Self {
Self { anchor, head }
}
pub const fn cursor(offset: usize) -> Self {
Self {
anchor: offset,
head: offset,
}
}
pub const fn anchor(&self) -> usize {
self.anchor
}
pub const fn head(&self) -> usize {
self.head
}
pub fn range(&self) -> TextRange {
if self.anchor <= self.head {
TextRange {
start: self.anchor,
end: self.head,
}
} else {
TextRange {
start: self.head,
end: self.anchor,
}
}
}
pub fn is_cursor(&self) -> bool {
self.anchor == self.head
}
pub fn is_forward(&self) -> bool {
self.anchor <= self.head
}
}
#[derive(Debug, Clone)]
pub struct LineIndex {
line_starts: Vec<u32>,
len: u32,
}
impl LineIndex {
pub fn new(text: &str) -> Self {
let len = u32::try_from(text.len()).expect("text length exceeds u32::MAX");
let mut line_starts = vec![0u32];
for (i, b) in text.bytes().enumerate() {
if b == b'\n' {
let next = u32::try_from(i + 1).expect("offset exceeds u32::MAX");
line_starts.push(next);
}
}
Self { line_starts, len }
}
pub fn line_count(&self) -> u32 {
u32::try_from(self.line_starts.len()).expect("line count exceeds u32::MAX")
}
pub fn text_len(&self) -> u32 {
self.len
}
pub fn offset_to_position(&self, text: &str, offset: usize) -> Result<Position, TextViewError> {
let len = self.len as usize;
if offset > len {
return Err(TextViewError::OffsetOutOfBounds { offset, len });
}
if offset < len && !text.is_char_boundary(offset) {
return Err(TextViewError::InvalidUtf8Boundary { offset });
}
let line_idx = self.line_of_offset(offset)?;
let line_start = self.line_starts[line_idx as usize] as usize;
let column = u32::try_from(offset - line_start).expect("column exceeds u32::MAX");
Ok(Position::new(line_idx, column))
}
pub fn position_to_offset(
&self,
text: &str,
position: Position,
) -> Result<usize, TextViewError> {
let line = position.line();
let lc = self.line_count();
if line >= lc {
return Err(TextViewError::LineOutOfBounds {
line,
line_count: lc,
});
}
let line_start = self.line_starts[line as usize] as usize;
let line_end = if line + 1 < lc {
self.line_starts[(line + 1) as usize] as usize
} else {
self.len as usize
};
let col = position.column() as usize;
let offset = line_start + col;
if offset > line_end {
return Err(TextViewError::OffsetOutOfBounds {
offset,
len: self.len as usize,
});
}
if offset < text.len() && !text.is_char_boundary(offset) {
return Err(TextViewError::InvalidUtf8Boundary { offset });
}
Ok(offset)
}
pub fn line_range(&self, line: u32) -> Result<TextRange, TextViewError> {
let lc = self.line_count();
if line >= lc {
return Err(TextViewError::LineOutOfBounds {
line,
line_count: lc,
});
}
let start = self.line_starts[line as usize] as usize;
let end_with_nl = if line + 1 < lc {
self.line_starts[(line + 1) as usize] as usize
} else {
self.len as usize
};
let end = if end_with_nl > start && line + 1 < lc {
end_with_nl - 1
} else {
end_with_nl
};
Ok(TextRange { start, end })
}
pub fn line_range_with_newline(&self, line: u32) -> Result<TextRange, TextViewError> {
let lc = self.line_count();
if line >= lc {
return Err(TextViewError::LineOutOfBounds {
line,
line_count: lc,
});
}
let start = self.line_starts[line as usize] as usize;
let end = if line + 1 < lc {
self.line_starts[(line + 1) as usize] as usize
} else {
self.len as usize
};
Ok(TextRange { start, end })
}
pub fn line_of_offset(&self, offset: usize) -> Result<u32, TextViewError> {
let len = self.len as usize;
if offset > len {
return Err(TextViewError::OffsetOutOfBounds { offset, len });
}
let idx = self
.line_starts
.partition_point(|&s| (s as usize) <= offset);
let line = if idx == 0 { 0 } else { idx - 1 };
Ok(u32::try_from(line).expect("line index exceeds u32::MAX"))
}
}
#[derive(Debug, Clone)]
struct Utf16Anchor {
byte_offset: u32,
utf16_offset: u32,
byte_len: u8,
utf16_len: u8,
}
#[derive(Debug, Clone)]
pub struct Utf16Mapping {
anchors: Vec<Utf16Anchor>,
total_bytes: u32,
total_utf16: u32,
}
impl Utf16Mapping {
pub fn new(text: &str) -> Self {
let mut anchors = Vec::new();
let mut byte_off: u32 = 0;
let mut utf16_off: u32 = 0;
for ch in text.chars() {
let byte_len = ch.len_utf8();
let utf16_len = ch.len_utf16();
if byte_len != 1 {
anchors.push(Utf16Anchor {
byte_offset: byte_off,
utf16_offset: utf16_off,
byte_len: u8::try_from(byte_len).expect("char byte len exceeds u8"),
utf16_len: u8::try_from(utf16_len).expect("char utf16 len exceeds u8"),
});
}
byte_off += u32::try_from(byte_len).expect("byte offset exceeds u32");
utf16_off += u32::try_from(utf16_len).expect("utf16 offset exceeds u32");
}
Self {
anchors,
total_bytes: byte_off,
total_utf16: utf16_off,
}
}
pub fn byte_to_utf16(&self, byte_offset: usize) -> Result<usize, TextViewError> {
let total = self.total_bytes as usize;
if byte_offset > total {
return Err(TextViewError::OffsetOutOfBounds {
offset: byte_offset,
len: total,
});
}
if self.anchors.is_empty() {
return Ok(byte_offset);
}
let idx = self
.anchors
.partition_point(|a| (a.byte_offset as usize) <= byte_offset);
if idx == 0 {
return Ok(byte_offset);
}
let anchor = &self.anchors[idx - 1];
let ab = anchor.byte_offset as usize;
let au = anchor.utf16_offset as usize;
let blen = anchor.byte_len as usize;
let ulen = anchor.utf16_len as usize;
if byte_offset > ab && byte_offset < ab + blen {
return Err(TextViewError::InvalidUtf8Boundary {
offset: byte_offset,
});
}
if byte_offset == ab {
return Ok(au);
}
let ascii_past = byte_offset - (ab + blen);
Ok(au + ulen + ascii_past)
}
pub fn utf16_to_byte(&self, utf16_offset: usize) -> Result<usize, TextViewError> {
let total = self.total_utf16 as usize;
if utf16_offset > total {
return Err(TextViewError::Utf16OffsetOutOfBounds {
offset: utf16_offset,
total,
});
}
if self.anchors.is_empty() {
return Ok(utf16_offset);
}
let idx = self
.anchors
.partition_point(|a| (a.utf16_offset as usize) <= utf16_offset);
if idx == 0 {
return Ok(utf16_offset);
}
let anchor = &self.anchors[idx - 1];
let ab = anchor.byte_offset as usize;
let au = anchor.utf16_offset as usize;
let blen = anchor.byte_len as usize;
let ulen = anchor.utf16_len as usize;
if utf16_offset > au && utf16_offset < au + ulen {
return Err(TextViewError::Utf16OffsetOutOfBounds {
offset: utf16_offset,
total,
});
}
if utf16_offset == au {
return Ok(ab);
}
let ascii_past = utf16_offset - (au + ulen);
Ok(ab + blen + ascii_past)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn position_new() {
let p = Position::new(3, 7);
assert_eq!(p.line(), 3);
assert_eq!(p.column(), 7);
}
#[test]
fn text_range_new_ok() {
let r = TextRange::new(2, 5).unwrap();
assert_eq!(r.start(), 2);
assert_eq!(r.end(), 5);
assert_eq!(r.len(), 3);
assert!(!r.is_empty());
}
#[test]
fn text_range_new_reversed() {
let err = TextRange::new(5, 2).unwrap_err();
assert_eq!(err, TextViewError::InvalidRange { start: 5, end: 2 });
}
#[test]
fn text_range_empty() {
let r = TextRange::empty(10);
assert_eq!(r.start(), 10);
assert_eq!(r.end(), 10);
assert!(r.is_empty());
assert_eq!(r.len(), 0);
}
#[test]
fn text_range_contains() {
let r = TextRange::new(2, 5).unwrap();
assert!(r.contains(2));
assert!(r.contains(4));
assert!(!r.contains(5));
assert!(!r.contains(1));
}
#[test]
fn text_range_intersects() {
let a = TextRange::new(2, 5).unwrap();
let b = TextRange::new(4, 8).unwrap();
let c = TextRange::new(5, 8).unwrap();
assert!(a.intersects(&b));
assert!(!a.intersects(&c));
let e1 = TextRange::empty(3);
let e2 = TextRange::empty(3);
assert!(!e1.intersects(&e2));
}
#[test]
fn selection_cursor() {
let s = Selection::cursor(5);
assert!(s.is_cursor());
assert_eq!(s.anchor(), 5);
assert_eq!(s.head(), 5);
assert!(s.is_forward());
let r = s.range();
assert!(r.is_empty());
}
#[test]
fn selection_forward() {
let s = Selection::new(2, 8);
assert!(!s.is_cursor());
assert!(s.is_forward());
let r = s.range();
assert_eq!(r.start(), 2);
assert_eq!(r.end(), 8);
}
#[test]
fn selection_backward() {
let s = Selection::new(8, 2);
assert!(!s.is_cursor());
assert!(!s.is_forward());
let r = s.range();
assert_eq!(r.start(), 2);
assert_eq!(r.end(), 8);
}
#[test]
fn line_index_empty_text() {
let text = "";
let idx = LineIndex::new(text);
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.text_len(), 0);
let pos = idx.offset_to_position(text, 0).unwrap();
assert_eq!(pos, Position::new(0, 0));
let off = idx.position_to_offset(text, Position::new(0, 0)).unwrap();
assert_eq!(off, 0);
}
#[test]
fn line_index_single_line() {
let text = "hello";
let idx = LineIndex::new(text);
assert_eq!(idx.line_count(), 1);
assert_eq!(idx.text_len(), 5);
let pos = idx.offset_to_position(text, 3).unwrap();
assert_eq!(pos, Position::new(0, 3));
let off = idx.position_to_offset(text, pos).unwrap();
assert_eq!(off, 3);
let pos_end = idx.offset_to_position(text, 5).unwrap();
assert_eq!(pos_end, Position::new(0, 5));
}
#[test]
fn line_index_multi_line() {
let text = "abc\ndef\nghi";
let idx = LineIndex::new(text);
assert_eq!(idx.line_count(), 3);
assert_eq!(idx.text_len(), 11);
let pos = idx.offset_to_position(text, 4).unwrap();
assert_eq!(pos, Position::new(1, 0));
let off = idx.position_to_offset(text, pos).unwrap();
assert_eq!(off, 4);
let pos2 = idx.offset_to_position(text, 9).unwrap();
assert_eq!(pos2, Position::new(2, 1));
let off2 = idx.position_to_offset(text, pos2).unwrap();
assert_eq!(off2, 9);
}
#[test]
fn line_index_multibyte() {
let text = "あいう\nえお";
let idx = LineIndex::new(text);
assert_eq!(idx.line_count(), 2);
let pos = idx.offset_to_position(text, 10).unwrap();
assert_eq!(pos, Position::new(1, 0));
let off = idx.position_to_offset(text, pos).unwrap();
assert_eq!(off, 10);
let pos2 = idx.offset_to_position(text, 3).unwrap();
assert_eq!(pos2, Position::new(0, 3));
let off2 = idx.position_to_offset(text, pos2).unwrap();
assert_eq!(off2, 3);
}
#[test]
fn line_index_roundtrip() {
let text = "hello\nworld\n";
let idx = LineIndex::new(text);
for offset in 0..=text.len() {
if text.is_char_boundary(offset) {
let pos = idx.offset_to_position(text, offset).unwrap();
let back = idx.position_to_offset(text, pos).unwrap();
assert_eq!(back, offset, "roundtrip failed at offset {offset}");
}
}
}
#[test]
fn line_index_offset_out_of_bounds() {
let text = "abc";
let idx = LineIndex::new(text);
let err = idx.offset_to_position(text, 10).unwrap_err();
assert_eq!(err, TextViewError::OffsetOutOfBounds { offset: 10, len: 3 });
}
#[test]
fn line_index_line_out_of_bounds() {
let text = "abc\ndef";
let idx = LineIndex::new(text);
let err = idx.line_range(5).unwrap_err();
assert_eq!(
err,
TextViewError::LineOutOfBounds {
line: 5,
line_count: 2
}
);
}
#[test]
fn line_index_invalid_utf8_boundary() {
let text = "あ";
let idx = LineIndex::new(text);
let err = idx.offset_to_position(text, 1).unwrap_err();
assert_eq!(err, TextViewError::InvalidUtf8Boundary { offset: 1 });
}
#[test]
fn line_index_line_range() {
let text = "abc\ndef\n";
let idx = LineIndex::new(text);
let r0 = idx.line_range(0).unwrap();
assert_eq!(r0.start(), 0);
assert_eq!(r0.end(), 3);
let r1 = idx.line_range(1).unwrap();
assert_eq!(r1.start(), 4);
assert_eq!(r1.end(), 7);
let r2 = idx.line_range(2).unwrap();
assert_eq!(r2.start(), 8);
assert_eq!(r2.end(), 8);
let rn0 = idx.line_range_with_newline(0).unwrap();
assert_eq!(rn0.start(), 0);
assert_eq!(rn0.end(), 4);
let rn1 = idx.line_range_with_newline(1).unwrap();
assert_eq!(rn1.start(), 4);
assert_eq!(rn1.end(), 8);
}
#[test]
fn line_index_line_of_offset() {
let text = "abc\ndef\nghi";
let idx = LineIndex::new(text);
assert_eq!(idx.line_of_offset(0).unwrap(), 0);
assert_eq!(idx.line_of_offset(3).unwrap(), 0);
assert_eq!(idx.line_of_offset(4).unwrap(), 1);
assert_eq!(idx.line_of_offset(8).unwrap(), 2);
assert_eq!(idx.line_of_offset(11).unwrap(), 2);
}
#[test]
fn utf16_mapping_ascii() {
let text = "hello world";
let m = Utf16Mapping::new(text);
for i in 0..=text.len() {
assert_eq!(m.byte_to_utf16(i).unwrap(), i);
assert_eq!(m.utf16_to_byte(i).unwrap(), i);
}
}
#[test]
fn utf16_mapping_japanese() {
let text = "aあb";
let m = Utf16Mapping::new(text);
assert_eq!(m.byte_to_utf16(0).unwrap(), 0);
assert_eq!(m.byte_to_utf16(1).unwrap(), 1);
assert!(m.byte_to_utf16(2).is_err());
assert!(m.byte_to_utf16(3).is_err());
assert_eq!(m.byte_to_utf16(4).unwrap(), 2);
assert_eq!(m.byte_to_utf16(5).unwrap(), 3);
assert_eq!(m.utf16_to_byte(0).unwrap(), 0);
assert_eq!(m.utf16_to_byte(1).unwrap(), 1);
assert_eq!(m.utf16_to_byte(2).unwrap(), 4);
assert_eq!(m.utf16_to_byte(3).unwrap(), 5);
}
#[test]
fn utf16_mapping_surrogate_pair() {
let text = "a😀b";
let m = Utf16Mapping::new(text);
assert_eq!(m.byte_to_utf16(0).unwrap(), 0);
assert_eq!(m.byte_to_utf16(1).unwrap(), 1);
assert!(m.byte_to_utf16(2).is_err());
assert!(m.byte_to_utf16(3).is_err());
assert!(m.byte_to_utf16(4).is_err());
assert_eq!(m.byte_to_utf16(5).unwrap(), 3);
assert_eq!(m.byte_to_utf16(6).unwrap(), 4);
assert_eq!(m.utf16_to_byte(0).unwrap(), 0);
assert_eq!(m.utf16_to_byte(1).unwrap(), 1);
assert!(m.utf16_to_byte(2).is_err());
assert_eq!(m.utf16_to_byte(3).unwrap(), 5);
assert_eq!(m.utf16_to_byte(4).unwrap(), 6);
}
#[test]
fn utf16_mapping_roundtrip() {
let text = "Hello あいう 😀🎉 world";
let m = Utf16Mapping::new(text);
let mut byte_off = 0;
for ch in text.chars() {
let u16_off = m.byte_to_utf16(byte_off).unwrap();
let back = m.utf16_to_byte(u16_off).unwrap();
assert_eq!(back, byte_off, "roundtrip failed at byte {byte_off}");
byte_off += ch.len_utf8();
}
let u16_end = m.byte_to_utf16(byte_off).unwrap();
let back_end = m.utf16_to_byte(u16_end).unwrap();
assert_eq!(back_end, byte_off);
}
#[test]
fn utf16_mapping_out_of_bounds() {
let text = "abc";
let m = Utf16Mapping::new(text);
assert!(m.byte_to_utf16(10).is_err());
assert!(m.utf16_to_byte(10).is_err());
}
#[test]
fn error_display() {
let e = TextViewError::InvalidRange { start: 5, end: 2 };
let s = e.to_string();
assert!(s.contains("5"));
assert!(s.contains("2"));
}
}