use std::{ops::Range, sync::LazyLock};
pub(crate) use crate::text::strs::buf::StrsBuf;
use crate::{
opts::PrintOpts,
text::{Point, TextIndex, TextRange, strs::buf::assert_utf8_boundary},
};
mod buf;
mod line_ranges;
#[repr(transparent)]
pub struct Strs(StrsDST);
impl Strs {
pub(super) fn new(bytes: &StrsBuf, start: u32, len: u32) -> &Self {
let start_and_len = unsafe { std::mem::transmute::<[u32; 2], usize>([start, len]) };
let ptr = std::ptr::slice_from_raw_parts(bytes as *const StrsBuf, start_and_len);
unsafe { &*(ptr as *const Self) }
}
pub fn start_point(&self) -> Point {
let formed = FormedStrs::new(self);
if formed.start == 0 {
Point::default()
} else {
let slices = unsafe {
let (s0, s1) = formed.buf.gapbuf.as_slices();
[str::from_utf8_unchecked(s0), str::from_utf8_unchecked(s1)]
};
formed
.buf
.line_ranges
.point_by_key(formed.start as usize, |[b, _]| b, slices)
.unwrap_or_else(|| formed.buf.line_ranges.max(slices))
}
}
pub fn end_point(&self) -> Point {
let formed = FormedStrs::new(self);
let slices = unsafe {
let (s0, s1) = formed.buf.gapbuf.as_slices();
[str::from_utf8_unchecked(s0), str::from_utf8_unchecked(s1)]
};
let byte = formed.start as usize + formed.len as usize;
if byte == formed.buf.gapbuf.len() {
formed.buf.line_ranges.max(slices)
} else {
formed
.buf
.line_ranges
.point_by_key(byte, |[b, _]| b, slices)
.unwrap_or_else(|| formed.buf.line_ranges.max(slices))
}
}
#[track_caller]
pub fn char_at(&self, p: impl TextIndex) -> Option<char> {
let formed = FormedStrs::new(self);
let range = formed
.buf
.gapbuf
.range(formed.start as usize..formed.start as usize + formed.len as usize);
if range
.get(p.to_byte_index())
.is_none_or(|b| utf8_char_width(*b) == 0)
{
return None;
}
let [s0, s1] = self.to_array();
Some(if p.to_byte_index() < s0.len() {
s0[p.to_byte_index()..].chars().next().unwrap()
} else {
s1[p.to_byte_index() - s0.len()..]
.chars()
.next()
.unwrap_or_else(|| panic!("{self:#?}"))
})
}
#[inline(always)]
#[track_caller]
pub fn point_at_byte(&self, byte: usize) -> Point {
assert!(
byte <= self.len(),
"byte out of bounds: the len is {}, but the byte is {byte}",
self.len()
);
let formed = FormedStrs::new(self);
if byte == self.len() {
self.end_point()
} else if byte == 0 {
Point::default()
} else {
let slices = unsafe {
let (s0, s1) = formed.buf.gapbuf.as_slices();
[str::from_utf8_unchecked(s0), str::from_utf8_unchecked(s1)]
};
formed
.buf
.line_ranges
.point_by_key(byte + formed.start as usize, |[b, _]| b, slices)
.unwrap()
}
}
#[inline(always)]
#[track_caller]
pub fn point_at_char(&self, char: usize) -> Point {
let end_point = self.end_point();
assert!(
char <= end_point.char(),
"char out of bounds: the len is {}, but the char is {char}",
end_point.char()
);
let formed = FormedStrs::new(self);
if char == end_point.char() {
end_point
} else {
let slices = unsafe {
let (s0, s1) = formed.buf.gapbuf.as_slices();
[str::from_utf8_unchecked(s0), str::from_utf8_unchecked(s1)]
};
let start = formed
.buf
.line_ranges
.point_by_key(formed.start as usize, |[b, _]| b, slices)
.unwrap();
formed
.buf
.line_ranges
.point_by_key(start.char() + char, |[_, c]| c, slices)
.unwrap()
}
}
#[inline(always)]
#[track_caller]
pub fn point_at_coords(&self, line: usize, column: usize) -> Point {
let end_point = self.end_point();
assert!(
line <= end_point.line(),
"line out of bounds: the len is {}, but the line is {line}",
end_point.line()
);
let formed = FormedStrs::new(self);
if line == end_point.line() {
end_point
} else {
let slices = unsafe {
let (s0, s1) = formed.buf.gapbuf.as_slices();
[str::from_utf8_unchecked(s0), str::from_utf8_unchecked(s1)]
};
let line = {
let start = self.point_at_byte(formed.start as usize);
start.line() + line
};
let point = formed.buf.line_ranges.point_at_coords(line, column, slices);
if let Some(point) = point {
point
} else {
let next_line_start = if line + 1 == end_point.line() {
end_point
} else {
formed
.buf
.line_ranges
.point_at_coords(line + 1, 0, slices)
.unwrap()
};
next_line_start.rev('\n')
}
}
}
#[track_caller]
pub fn line(&self, n: usize) -> &Strs {
let end_point = self.end_point();
assert!(
n < end_point.line(),
"line out of bounds: the len is {}, but the line is {n}",
end_point.line()
);
let start = self.point_at_coords(n, 0);
let end = if n + 1 == end_point.line() {
end_point
} else {
self.point_at_coords(n + 1, 0)
};
&self[start..end]
}
pub fn full(&self) -> &Strs {
FormedStrs::new(self).buf
}
pub fn last_point(&self) -> Point {
let formed = FormedStrs::new(self);
formed.buf.end_point().rev('\n')
}
pub fn get(&self, range: impl TextRange) -> Option<&Strs> {
let formed = FormedStrs::new(self);
let range = {
let range = range.try_to_range(formed.len as usize)?;
range.start + formed.start as usize..range.end + formed.start as usize
};
let (s0, s1) = formed.buf.gapbuf.range(range.clone()).as_slices();
if s0.first().is_some_and(|b| utf8_char_width(*b) == 0)
|| s1.first().is_some_and(|b| utf8_char_width(*b) == 0)
|| formed
.buf
.gapbuf
.get(range.end)
.is_some_and(|b| utf8_char_width(*b) == 0)
{
return None;
}
Some(Strs::new(
formed.buf,
range.start as u32,
range.len() as u32,
))
}
pub fn empty() -> &'static Self {
Strs::new(&EMPTY_BYTES, 0, 0)
}
#[track_caller]
#[inline]
pub fn slices(&self, range: impl TextRange) -> [&[u8]; 2] {
let formed = FormedStrs::new(self);
let range = {
let range = range.to_range(formed.len as usize);
range.start + formed.start as usize..range.end + formed.start as usize
};
let (s0, s1) = formed.buf.gapbuf.range(range).as_slices();
[s0, s1]
}
pub fn to_array(&self) -> [&str; 2] {
let formed = FormedStrs::new(self);
let range = formed.start as usize..(formed.start + formed.len) as usize;
let (s0, s1) = formed.buf.gapbuf.range(range).as_slices();
[unsafe { std::str::from_utf8_unchecked(s0) }, unsafe {
std::str::from_utf8_unchecked(s1)
}]
}
pub fn lines(&self) -> Lines<'_> {
let formed = FormedStrs::new(self);
Lines::new(
formed.buf,
formed.start as usize,
(formed.start + formed.len) as usize,
)
}
#[inline]
pub fn bytes(&self) -> impl DoubleEndedIterator<Item = u8> {
self.slices(..)
.into_iter()
.flat_map(|slice| slice.iter().copied())
}
pub fn chars(&self) -> impl DoubleEndedIterator<Item = char> {
self.to_array().into_iter().flat_map(str::chars)
}
pub fn char_indices(&self) -> impl DoubleEndedIterator<Item = (usize, char)> {
let [s0, s1] = self.to_array();
s0.char_indices()
.chain(s1.char_indices().map(move |(b, c)| (b + s0.len(), c)))
}
pub fn byte_range(&self) -> Range<usize> {
let formed = FormedStrs::new(self);
formed.start as usize..(formed.start + formed.len) as usize
}
pub fn range(&self) -> Range<Point> {
let formed = FormedStrs::new(self);
let range = self.byte_range();
formed.buf.point_at_byte(range.start)..formed.buf.point_at_byte(range.end)
}
#[track_caller]
#[inline]
pub fn indent(&self, opts: PrintOpts) -> usize {
self.bytes()
.take_while(|&byte| byte == b' ' || byte == b'\t')
.fold(0, |sum, byte| {
if byte == b' ' {
sum + 1
} else {
sum + opts.tabstop as usize - (sum % opts.tabstop as usize)
}
})
}
pub fn len(&self) -> usize {
FormedStrs::new(self).len as usize
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn is_empty_line(&self) -> bool {
self == "\n" || self == "\r\n"
}
pub fn version(&self) -> u64 {
FormedStrs::new(self).buf.version()
}
}
impl<Idx: TextRange> std::ops::Index<Idx> for Strs {
type Output = Self;
fn index(&self, index: Idx) -> &Self::Output {
let formed = FormedStrs::new(self);
let range = index.to_range(formed.len as usize);
assert_utf8_boundary(formed.buf, range.start);
assert_utf8_boundary(formed.buf, range.end);
Self::new(
formed.buf,
range.start as u32 + formed.start,
range.len() as u32,
)
}
}
impl std::fmt::Display for Strs {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let [s0, s1] = self.to_array();
write!(f, "{s0}{s1}")
}
}
impl std::fmt::Debug for Strs {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let [s0, s1] = self.to_array();
write!(f, "{:?}", format!("{s0}{s1}").as_str())
}
}
static EMPTY_BYTES: LazyLock<StrsBuf> = LazyLock::new(StrsBuf::default);
#[must_use]
#[inline]
pub const fn utf8_char_width(b: u8) -> usize {
const UTF8_CHAR_WIDTH: &[u8; 256] = &[
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
UTF8_CHAR_WIDTH[b as usize] as usize
}
pub struct Lines<'b> {
buf: &'b StrsBuf,
start: usize,
end: usize,
finger_back: usize,
}
impl<'b> Lines<'b> {
fn new(bytes: &'b StrsBuf, start: usize, end: usize) -> Self {
Self { buf: bytes, start, end, finger_back: end }
}
fn next_match_back(&mut self) -> Option<usize> {
let range = self.start..self.finger_back;
let (s0, s1) = self.buf.gapbuf.range(range).as_slices();
let pos = s0.iter().chain(s1.iter()).rev().position(|b| *b == b'\n');
match pos {
Some(pos) => {
self.finger_back -= pos + 1;
Some(self.finger_back)
}
None => {
self.finger_back = self.start;
None
}
}
}
}
impl<'b> Iterator for Lines<'b> {
type Item = &'b Strs;
fn next(&mut self) -> Option<Self::Item> {
if self.start == self.end {
return None;
}
let range = self.start..self.finger_back;
let (s0, s1) = self.buf.gapbuf.range(range.clone()).as_slices();
Some(match s0.iter().chain(s1.iter()).position(|b| *b == b'\n') {
Some(pos) => {
let line = Strs::new(self.buf, self.start as u32, pos as u32 + 1);
self.start += pos + 1;
line
}
None => {
let len = self.end - self.start;
let line = Strs::new(self.buf, self.start as u32, len as u32);
self.start = self.end;
line
}
})
}
fn last(mut self) -> Option<Self::Item> {
self.next_back()
}
}
impl DoubleEndedIterator for Lines<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.start == self.end {
return None;
}
Some(match self.next_match_back() {
Some(start) => {
if start + 1 == self.end {
let start = match self.next_match_back() {
Some(start) => start + 1,
None => self.start,
};
let len = self.end - start;
let line = Strs::new(self.buf, start as u32, len as u32);
self.end = start;
line
} else {
let len = self.end - (start + 1);
let line = Strs::new(self.buf, start as u32 + 1, len as u32);
self.end = start + 1;
line
}
}
None => {
let len = self.end - self.start;
self.end = self.start;
Strs::new(self.buf, self.start as u32, len as u32)
}
})
}
}
impl std::iter::FusedIterator for Lines<'_> {}
struct FormedStrs<'b> {
buf: &'b StrsBuf,
start: u32,
len: u32,
}
impl<'b> FormedStrs<'b> {
fn new(strs: &'b Strs) -> Self {
let ptr = strs as *const Strs as *const [StrsBuf];
let [start, len] = unsafe { std::mem::transmute::<usize, [u32; 2]>(ptr.len()) };
Self {
buf: unsafe { &*(ptr as *const StrsBuf) },
start,
len,
}
}
}
#[repr(transparent)]
struct StrsDST([StrsBuf]);