pub trait AsSlicer<'str> {
fn as_slicer(&self) -> StrSlicer<'str>;
fn as_slicer_with_tracker<T: Tracker>(&'str self, tracker: T) -> StrSlicer<'str, T>;
}
impl<'str> AsSlicer<'str> for &'str str {
fn as_slicer(&self) -> StrSlicer<'str> {
StrSlicer::new(self)
}
fn as_slicer_with_tracker<T: Tracker>(&self, tracker: T) -> StrSlicer<'str, T> {
StrSlicer::with_tracker(self, tracker)
}
}
pub trait Tracker {
type Pos;
fn pos(&self) -> Self::Pos;
fn update(&mut self, string: &str, old_byte_pos: usize, new_byte_pos: usize);
}
impl Tracker for () {
type Pos = ();
fn pos(&self) -> Self::Pos {
()
}
fn update(&mut self, _string: &str, _old_byte_pos: usize, _new_byte_pos: usize) {}
}
pub trait Pattern {
fn is_next<'str, T: Tracker>(&mut self, slicer: &StrSlicer<'str, T>) -> bool;
fn skip_until<'str, T: Tracker>(&mut self, slicer: &mut StrSlicer<'str, T>);
unsafe fn skip_over_unchecked<'str, T: Tracker>(&mut self, slicer: &mut StrSlicer<'str, T>);
}
impl<'a> Pattern for &'a str {
fn is_next<'str, T: Tracker>(&mut self, slicer: &StrSlicer<'str, T>) -> bool {
match slicer.cut_off() {
None => false,
Some(cut_off) => cut_off.starts_with(*self)
}
}
fn skip_until<'str, T: Tracker>(&mut self, slicer: &mut StrSlicer<'str, T>) {
let cut_off = match slicer.cut_off() {
None => return, Some(cut_off) => cut_off
};
match cut_off.find(*self) {
None => slicer.skip_to_end(),
Some(offset) => {
let byte_pos = slicer.byte_pos;
unsafe {
slicer.jump_to_unchecked(byte_pos + offset);
}
}
}
}
unsafe fn skip_over_unchecked<'str, T: Tracker>(&mut self, slicer: &mut StrSlicer<'str, T>) {
let byte_pos = slicer.byte_pos;
slicer.jump_to_unchecked(byte_pos + self.len());
}
}
impl Pattern for char {
fn is_next<'str, T: Tracker>(&mut self, slicer: &StrSlicer<'str, T>) -> bool {
match slicer.as_str().chars().next() {
Some(char) => *self == char,
None => false
}
}
fn skip_until<'str, T: Tracker>(&mut self, slicer: &mut StrSlicer<'str, T>) {
let cut_off = match slicer.cut_off() {
None => return, Some(cut_off) => cut_off
};
match cut_off.find(*self) {
None => slicer.skip_to_end(),
Some(offset) => {
let byte_pos = slicer.byte_pos;
unsafe {
slicer.jump_to_unchecked(byte_pos + offset);
}
}
}
}
unsafe fn skip_over_unchecked<'str, T: Tracker>(&mut self, slicer: &mut StrSlicer<'str, T>) {
let byte_pos = slicer.byte_pos;
slicer.jump_to_unchecked(byte_pos + self.len_utf8());
}
}
impl<F: FnMut(char) -> bool> Pattern for F {
fn is_next<'str, T: Tracker>(&mut self, slicer: &StrSlicer<'str, T>) -> bool {
match slicer.as_str().chars().next() {
Some(char) => self(char),
None => false
}
}
fn skip_until<'str, T: Tracker>(&mut self, slicer: &mut StrSlicer<'str, T>) {
let cut_off = match slicer.cut_off() {
None => return, Some(cut_off) => cut_off
};
match cut_off.find(self) {
None => slicer.skip_to_end(),
Some(offset) => {
let byte_pos = slicer.byte_pos;
unsafe {
slicer.jump_to_unchecked(byte_pos + offset)
}
}
}
}
unsafe fn skip_over_unchecked<'str, T: Tracker>(&mut self, slicer: &mut StrSlicer<'str, T>) {
slicer.advance_char();
}
}
#[derive(Debug, Clone, Copy)]
pub struct StrSlicer<'str, T: Tracker = ()> {
string: &'str str,
byte_pos: usize,
tracker: T
}
impl<'str> StrSlicer<'str, ()> {
pub fn new(string: &'str str) -> Self {
Self {
string,
byte_pos: 0,
tracker: ()
}
}
}
impl<'str, T: Tracker> StrSlicer<'str, T> {
pub fn with_tracker(string: &'str str, tracker: T) -> Self {
Self {
string: string,
byte_pos: 0,
tracker
}
}
fn next_char_boundary(&self) -> Option<usize> {
let mut next_byte_pos = self.byte_pos + 1;
loop {
if next_byte_pos >= self.end_byte_pos() {
return None;
}
if self.string.is_char_boundary(next_byte_pos) {
return Some(next_byte_pos);
} else {
next_byte_pos += 1;
continue;
}
}
}
fn advance_char(&mut self) {
let byte_pos = self.next_char_boundary().unwrap_or(self.end_byte_pos());
unsafe {
self.jump_to_unchecked(byte_pos);
}
}
#[inline]
fn end_byte_pos(&self) -> usize {
self.string.len()
}
#[inline]
pub fn as_str(&self) -> &'str str {
self.string
}
pub fn cut_off(&self) -> Option<&'str str> {
if self.is_at_end() {
None
} else {
let start_pos = self.byte_pos;
let end_pos = self.end_byte_pos();
Some(&self.string[start_pos..end_pos])
}
}
#[inline]
pub fn byte_pos(&self) -> usize {
self.byte_pos
}
pub fn jump_to(&mut self, byte_pos: usize) {
if byte_pos > self.end_byte_pos() {
jump_oob_fail(self.string, byte_pos);
}
if self.string.is_char_boundary(byte_pos) {
unsafe {
self.jump_to_unchecked(byte_pos);
}
} else {
jump_char_boundary_fail(self.string, byte_pos)
}
}
pub unsafe fn jump_to_unchecked(&mut self, byte_pos: usize) {
let string = self.as_str();
self.tracker.update(string, self.byte_pos, byte_pos);
self.byte_pos = byte_pos;
}
pub fn tracker(&self) -> &T {
&self.tracker
}
pub fn tracker_mut(&mut self) -> &mut T {
&mut self.tracker
}
#[inline]
pub fn tracker_pos(&self) -> T::Pos {
self.tracker.pos()
}
pub fn skip_num_chars(&mut self, num: usize) {
for _ in 0..num {
self.advance_char();
if self.is_at_end() {
break;
}
}
}
pub fn slice_num_chars(&mut self, num: usize) -> Option<&'str str> {
let start_pos = self.byte_pos;
if start_pos >= self.end_byte_pos() {
None
} else {
self.skip_num_chars(num);
let end_pos = self.byte_pos;
Some(&self.string[start_pos..end_pos])
}
}
pub fn is_next<P: Pattern>(&self, mut pattern: P) -> bool {
pattern.is_next(self)
}
pub fn skip_over<P: Pattern>(&mut self, mut pattern: P) -> bool {
if pattern.is_next(self) {
unsafe {
pattern.skip_over_unchecked(self);
}
true
} else {
false
}
}
pub unsafe fn skip_over_unchecked<P: Pattern>(&mut self, mut pattern: P) {
pattern.skip_over_unchecked(self)
}
pub fn skip_until<P: Pattern>(&mut self, mut pattern: P) {
pattern.skip_until(self);
}
pub fn slice_until<P: Pattern>(&mut self, pattern: P) -> Option<&'str str> {
let start_pos = self.byte_pos;
if start_pos >= self.end_byte_pos() {
None
} else {
self.skip_until(pattern);
let end_pos = self.byte_pos;
Some(&self.string[start_pos..end_pos])
}
}
pub fn skip_until_after<P: Pattern>(&mut self, mut pattern: P) {
pattern.skip_until(self);
if !self.is_at_end() {
unsafe {
pattern.skip_over_unchecked(self);
}
}
}
pub fn slice_until_after<P: Pattern>(&mut self, pattern: P) -> Option<&'str str> {
let start_pos = self.byte_pos;
if start_pos >= self.end_byte_pos() {
None
} else {
self.skip_until_after(pattern);
let end_pos = self.byte_pos;
Some(&self.string[start_pos..end_pos])
}
}
pub fn skip_whitespace(&mut self) {
self.skip_until(|char: char| !char.is_whitespace());
}
pub fn slice_whitespace(&mut self) -> Option<&'str str> {
self.slice_until(|char: char| !char.is_whitespace())
}
pub fn skip_non_whitespace(&mut self) {
self.skip_until(|char: char| char.is_whitespace());
}
pub fn slice_non_whitespace(&mut self) -> Option<&'str str> {
self.slice_until(|char: char| char.is_whitespace())
}
pub fn skip_line(&mut self) {
self.skip_until_after('\n');
}
pub fn slice_line(&mut self) -> Option<&'str str> {
let line = self.slice_until_after('\n');
line.map(|line| {
line.trim_right_matches(|char: char| char == '\n' || char == '\r')
})
}
pub fn skip_to_end(&mut self) {
unsafe {
let byte_pos = self.end_byte_pos();
self.jump_to_unchecked(byte_pos);
}
}
pub fn slice_to_end(&mut self) -> Option<&'str str> {
let start_pos = self.byte_pos;
if start_pos >= self.end_byte_pos() {
None
} else {
self.skip_to_end();
let end_pos = self.byte_pos;
Some(&self.string[start_pos..end_pos])
}
}
pub fn is_at_end(&self) -> bool {
self.byte_pos >= self.end_byte_pos()
}
}
impl<'str, T: Tracker> AsRef<str> for StrSlicer<'str, T> {
fn as_ref(&self) -> &str {
self.string
}
}
fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
if max >= s.len() {
(false, s)
} else {
while !s.is_char_boundary(max) {
max -= 1;
}
(true, &s[..max])
}
}
#[inline(never)]
#[cold]
fn jump_oob_fail(string: &str, byte_pos: usize) -> ! {
const MAX_DISPLAY_LENGTH: usize = 256;
let (truncated, s_trunc) = truncate_to_char_boundary(string, MAX_DISPLAY_LENGTH);
let ellipsis = if truncated { "[...]" } else { "" };
panic!("byte index {} is out of bounds of `{}`{}", byte_pos, s_trunc, ellipsis);
}
#[inline(never)]
#[cold]
fn jump_char_boundary_fail(string: &str, byte_pos: usize) -> ! {
const MAX_DISPLAY_LENGTH: usize = 256;
let (truncated, s_trunc) = truncate_to_char_boundary(string, MAX_DISPLAY_LENGTH);
let ellipsis = if truncated { "[...]" } else { "" };
let mut char_start = byte_pos;
while !string.is_char_boundary(char_start) {
char_start -= 1;
}
let char = string[char_start..].chars().next().unwrap();
let char_byte_range = char_start..(char_start + char.len_utf8());
panic!("byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
byte_pos, char, char_byte_range, s_trunc, ellipsis);
}
pub mod trackers {
use ::Tracker;
const NEWLINE: char = '\n';
#[derive(Debug, Clone)]
pub struct LineTracker {
lines: usize,
line_byte_pos: usize
}
impl LineTracker {
pub fn new() -> Self {
Self {
lines: 0,
line_byte_pos: 0
}
}
#[inline]
pub fn lines(&self) -> usize {
self.lines
}
#[inline]
pub fn line_byte_pos(&self) -> usize {
self.line_byte_pos
}
}
impl Default for LineTracker {
fn default() -> Self {
Self::new()
}
}
impl Tracker for LineTracker {
type Pos = usize;
fn pos(&self) -> Self::Pos {
self.lines
}
fn update(&mut self, string: &str, old_byte_pos: usize, new_byte_pos: usize) {
if new_byte_pos > old_byte_pos {
let mut newline_count = 0;
for (index, _) in string[old_byte_pos..new_byte_pos].match_indices(NEWLINE) {
newline_count += 1;
self.line_byte_pos = index;
}
self.lines += newline_count;
} else if new_byte_pos < old_byte_pos {
let diff = old_byte_pos - new_byte_pos;
let half_len_to_root = old_byte_pos / 2;
if diff > half_len_to_root {
let mut newline_count = 0;
for (index, _) in string[0..new_byte_pos].match_indices(NEWLINE) {
newline_count += 1;
self.line_byte_pos = index;
}
self.lines = newline_count;
} else {
let mut newline_count = 0;
for (index, _) in string[new_byte_pos..old_byte_pos].match_indices(NEWLINE) {
newline_count += 1;
self.line_byte_pos = index;
}
self.lines -= newline_count;
}
}
}
}
}