extern crate alloc;
use alloc::{borrow::Cow, string::String, vec::Vec};
use core::ops::Range;
use memchr::memchr;
#[allow(unused_imports)]
#[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
use crate::println;
use crate::util::{self, is_blank, is_space, trim_left_space, utf8_len, TinyVec};
const SPACE: &[u8] = b" ";
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum Value {
Index(Index),
String(String),
}
impl Value {
pub fn bytes<'a>(&'a self, source: &'a str) -> &'a [u8] {
match self {
Value::Index(index) => index.bytes(source),
Value::String(s) => s.as_bytes(),
}
}
pub fn str<'a>(&'a self, source: &'a str) -> &'a str {
match self {
Value::Index(index) => index.str(source),
Value::String(s) => s.as_str(),
}
}
pub const fn is_empty(&self) -> bool {
match self {
Value::Index(index) => index.is_empty(),
Value::String(s) => s.is_empty(),
}
}
pub const fn len(&self) -> usize {
match self {
Value::Index(index) => index.len(),
Value::String(s) => s.len(),
}
}
}
impl From<&str> for Value {
fn from(s: &str) -> Self {
Value::String(String::from(s))
}
}
impl From<String> for Value {
fn from(s: String) -> Self {
Value::String(s)
}
}
impl From<&[u8]> for Value {
fn from(s: &[u8]) -> Self {
Value::String(String::from_utf8_lossy(s).into_owned())
}
}
impl From<Vec<u8>> for Value {
fn from(s: Vec<u8>) -> Self {
Value::String(String::from_utf8_lossy(&s).into_owned())
}
}
impl From<&[char]> for Value {
fn from(s: &[char]) -> Self {
Value::String(s.iter().collect())
}
}
impl From<Cow<'_, [u8]>> for Value {
fn from(s: Cow<'_, [u8]>) -> Self {
Value::String(String::from_utf8_lossy(&s).into_owned())
}
}
impl From<Cow<'_, str>> for Value {
fn from(s: Cow<'_, str>) -> Self {
Value::String(s.into_owned())
}
}
impl From<&Value> for Value {
fn from(v: &Value) -> Self {
match v {
Value::Index(index) => Value::Index(*index),
Value::String(s) => Value::String(s.clone()),
}
}
}
impl From<(usize, usize)> for Value {
fn from((start, stop): (usize, usize)) -> Self {
Value::Index(Index::new(start, stop))
}
}
impl From<Segment> for Value {
fn from(segment: Segment) -> Self {
Value::Index(Index::new(segment.start(), segment.stop()))
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct Index {
start: usize,
stop: usize,
}
impl Index {
pub fn new(start: usize, stop: usize) -> Self {
Index { start, stop }
}
#[inline(always)]
pub fn start(&self) -> usize {
self.start
}
#[inline(always)]
pub fn stop(&self) -> usize {
self.stop
}
#[inline(always)]
pub fn bytes<'a>(&self, source: &'a str) -> &'a [u8] {
&source.as_bytes()[self.start..self.stop]
}
#[inline(always)]
pub fn str<'a>(&self, source: &'a str) -> &'a str {
unsafe { source.get_unchecked(self.start..self.stop) }
}
#[inline(always)]
pub const fn is_empty(&self) -> bool {
self.start >= self.stop
}
#[inline(always)]
pub fn with_start(&self, v: usize) -> Index {
Index::new(v, self.stop)
}
#[inline(always)]
pub fn with_stop(&self, v: usize) -> Index {
Index::new(self.start, v)
}
#[inline(always)]
pub const fn len(&self) -> usize {
self.stop - self.start
}
}
impl From<Index> for Value {
fn from(index: Index) -> Self {
Value::Index(index)
}
}
impl From<(usize, usize)> for Index {
fn from((start, stop): (usize, usize)) -> Self {
Index::new(start, stop)
}
}
impl From<Segment> for Index {
fn from(segment: Segment) -> Self {
Index::new(segment.start(), segment.stop())
}
}
#[derive(Debug, Clone, Default)]
#[non_exhaustive]
pub enum MultilineValue {
#[default]
Empty,
Indices(TinyVec<Index>),
String(String),
}
impl MultilineValue {
pub fn from_index(index: Index) -> Self {
MultilineValue::Indices(TinyVec::from_single(index))
}
pub fn from_indices(indices: Vec<Index>) -> Self {
MultilineValue::Indices(TinyVec::from_vec(indices))
}
pub fn from_string(s: String) -> Self {
MultilineValue::String(s)
}
pub fn str<'a>(&'a self, source: &'a str) -> Cow<'a, str> {
match self {
MultilineValue::Empty => Cow::Borrowed(""),
MultilineValue::Indices(indices) => {
let first = indices.get(0);
let second = indices.get(1);
if let Some(f) = first {
if second.is_none() {
return Cow::Borrowed(f.str(source));
}
} else {
return Cow::Borrowed("");
}
let mut result = String::new();
result.push_str(first.unwrap().str(source));
let b = second.unwrap().bytes(source);
result.push_str(unsafe { core::str::from_utf8_unchecked(trim_left_space(b)) });
for v in indices.iter().skip(2) {
let b = v.bytes(source);
result.push_str(unsafe { core::str::from_utf8_unchecked(trim_left_space(b)) });
}
Cow::Owned(result)
}
MultilineValue::String(s) => Cow::Borrowed(s.as_str()),
}
}
pub fn bytes<'a>(&'a self, source: &'a str) -> Cow<'a, [u8]> {
match self {
MultilineValue::Empty => Cow::Borrowed(&[]),
MultilineValue::Indices(indices) => {
let first = indices.get(0);
let second = indices.get(1);
if let Some(f) = first {
if second.is_none() {
return Cow::Borrowed(f.bytes(source));
}
} else {
return Cow::Borrowed(&[]);
}
let mut result = Vec::new();
result.extend_from_slice(first.unwrap().bytes(source));
result.extend_from_slice(trim_left_space(second.unwrap().bytes(source)));
for v in indices.iter().skip(2) {
result.extend_from_slice(trim_left_space(v.bytes(source)));
}
Cow::Owned(result)
}
MultilineValue::String(s) => Cow::Borrowed(s.as_bytes()),
}
}
}
impl From<String> for MultilineValue {
fn from(s: String) -> Self {
MultilineValue::String(s)
}
}
impl From<&String> for MultilineValue {
fn from(s: &String) -> Self {
MultilineValue::String(s.clone())
}
}
impl From<&str> for MultilineValue {
fn from(s: &str) -> Self {
MultilineValue::String(String::from(s))
}
}
impl From<&[u8]> for MultilineValue {
fn from(s: &[u8]) -> Self {
MultilineValue::String(String::from_utf8_lossy(s).into_owned())
}
}
impl From<Vec<u8>> for MultilineValue {
fn from(s: Vec<u8>) -> Self {
MultilineValue::String(String::from_utf8_lossy(&s).into_owned())
}
}
impl From<Cow<'_, str>> for MultilineValue {
fn from(s: Cow<'_, str>) -> Self {
MultilineValue::String(s.into_owned())
}
}
impl From<Cow<'_, [u8]>> for MultilineValue {
fn from(s: Cow<'_, [u8]>) -> Self {
MultilineValue::String(String::from_utf8_lossy(&s).into_owned())
}
}
impl From<Value> for MultilineValue {
fn from(v: Value) -> Self {
match v {
Value::Index(index) => MultilineValue::Indices(TinyVec::from_single(index)),
Value::String(s) => MultilineValue::String(s),
}
}
}
impl From<Segment> for MultilineValue {
fn from(segment: Segment) -> Self {
MultilineValue::Indices(TinyVec::from_single(segment.into()))
}
}
impl From<TinyVec<Index>> for MultilineValue {
fn from(indices: TinyVec<Index>) -> Self {
MultilineValue::Indices(indices)
}
}
#[derive(Debug, Clone, Default)]
#[non_exhaustive]
pub enum Lines {
#[default]
Empty,
Segments(Vec<Segment>),
String(String),
}
impl Lines {
pub fn from_segments(segments: Vec<Segment>) -> Self {
Lines::Segments(segments)
}
pub fn from_string(s: String) -> Self {
Lines::String(s)
}
pub fn iter<'a>(&'a self, source: &'a str) -> impl Iterator<Item = Cow<'a, str>> {
LinesIter::new(
match self {
Lines::Empty => LinesIterState::Empty,
Lines::Segments(segments) => LinesIterState::Segments(segments.iter()),
Lines::String(s) => LinesIterState::String(s.split_inclusive('\n')),
},
source,
)
}
}
impl From<String> for Lines {
fn from(s: String) -> Self {
Lines::String(s)
}
}
impl From<&String> for Lines {
fn from(s: &String) -> Self {
Lines::String(s.clone())
}
}
impl From<&str> for Lines {
fn from(s: &str) -> Self {
Lines::String(String::from(s))
}
}
impl From<&[u8]> for Lines {
fn from(s: &[u8]) -> Self {
Lines::String(String::from_utf8_lossy(s).into_owned())
}
}
impl From<Vec<Segment>> for Lines {
fn from(segments: Vec<Segment>) -> Self {
Lines::Segments(segments)
}
}
impl From<&[Segment]> for Lines {
fn from(segments: &[Segment]) -> Self {
Lines::Segments(segments.to_vec())
}
}
enum LinesIterState<'a> {
Empty,
Segments(core::slice::Iter<'a, Segment>),
String(core::str::SplitInclusive<'a, char>),
}
struct LinesIter<'a> {
state: LinesIterState<'a>,
source: &'a str,
}
impl<'a> LinesIter<'a> {
pub fn new(state: LinesIterState<'a>, source: &'a str) -> Self {
LinesIter { state, source }
}
}
impl<'a> Iterator for LinesIter<'a> {
type Item = Cow<'a, str>;
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
match &mut self.state {
LinesIterState::Empty => None,
LinesIterState::Segments(iter) => iter.next().map(|segment| segment.str(self.source)),
LinesIterState::String(iter) => iter.next().map(Cow::Borrowed),
}
}
}
pub type Block = [Segment];
const fn binary_search_block_pos(block: &Block, pos: usize) -> Option<usize> {
let mut left = 0;
let mut right = block.len();
while left < right {
let mid = (left + right) / 2;
if block[mid].start <= pos && pos < block[mid].stop {
return Some(mid);
}
if pos < block[mid].start {
right = mid;
} else {
left = mid + 1;
}
}
None
}
pub trait BlockExt {
fn to_values(&self) -> MultilineValue;
}
impl BlockExt for Block {
fn to_values(&self) -> MultilineValue {
let first = self.first();
let second = self.get(1);
if let Some(f) = first {
if second.is_none() {
return MultilineValue::from_index((f.start(), f.stop()).into());
}
} else {
return MultilineValue::default();
}
let mut result = Vec::with_capacity(self.len());
for v in self.iter() {
result.push((v.start(), v.stop()).into());
}
MultilineValue::from_indices(result)
}
}
pub(crate) fn block_to_values(i: impl IntoIterator<Item = Segment>) -> MultilineValue {
let mut b = i.into_iter();
let first = b.next();
let second = b.next();
if let Some(f) = first {
if second.is_none() {
return MultilineValue::from_index(f.into());
}
} else {
return MultilineValue::default();
}
let mut result = Vec::with_capacity(2 + b.size_hint().0);
result.push(first.unwrap().into());
result.push(second.unwrap().into());
for segment in b {
result.push(segment.into());
}
MultilineValue::from_indices(result)
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub struct Segment {
start: usize,
stop: usize,
padding: u8,
force_newline: bool,
}
impl Segment {
pub fn new(start: usize, stop: usize) -> Self {
Segment {
start,
stop,
padding: 0,
force_newline: false,
}
}
pub fn new_with_padding(start: usize, stop: usize, padding: usize) -> Self {
Segment {
start,
stop,
padding: padding as u8,
force_newline: false,
}
}
#[inline(always)]
pub fn start(&self) -> usize {
self.start
}
#[inline(always)]
pub fn stop(&self) -> usize {
self.stop
}
#[inline(always)]
pub fn padding(&self) -> usize {
self.padding as usize
}
#[inline(always)]
pub fn force_newline(&self) -> bool {
self.force_newline
}
pub fn bytes<'a>(&self, source: &'a str) -> Cow<'a, [u8]> {
if self.padding == 0
&& (!self.force_newline || source.as_bytes().get(self.stop - 1) == Some(&b'\n'))
{
Cow::Borrowed(&source.as_bytes()[self.start..self.stop])
} else {
let mut result = Vec::with_capacity(self.padding() + self.stop - self.start + 1);
result.extend(core::iter::repeat_n(SPACE[0], self.padding()));
result.extend_from_slice(&source.as_bytes()[self.start..self.stop]);
if self.force_newline && !result.is_empty() && *result.last().unwrap() != b'\n' {
result.push(b'\n');
}
Cow::Owned(result)
}
}
pub fn str<'a>(&self, source: &'a str) -> Cow<'a, str> {
if self.padding == 0
&& (!self.force_newline || source.as_bytes().get(self.stop - 1) == Some(&b'\n'))
{
unsafe { Cow::Borrowed(source.get_unchecked(self.start..self.stop)) }
} else {
let mut result = String::with_capacity(self.padding() + self.stop - self.start + 1);
result.extend(core::iter::repeat_n(' ', self.padding()));
unsafe { result.push_str(source.get_unchecked(self.start..self.stop)) };
if self.force_newline && !result.is_empty() && result.as_bytes().last() != Some(&b'\n')
{
result.push('\n');
}
Cow::Owned(result)
}
}
#[inline(always)]
pub const fn len(&self) -> usize {
self.stop - self.start + self.padding as usize
}
pub fn between(&self, other: Segment) -> Segment {
if self.stop != other.stop {
panic!("invalid state");
}
Segment::new_with_padding(
self.start,
other.start,
(self.padding - other.padding) as usize,
)
}
#[inline(always)]
pub const fn is_empty(&self) -> bool {
self.start >= self.stop && self.padding == 0
}
pub fn is_blank(&self, source: &str) -> bool {
let v = &source.as_bytes()[self.start..self.stop];
is_blank(v)
}
pub fn trim_right_space(&self, source: &str) -> Segment {
let v = &source.as_bytes()[self.start..self.stop];
let l = util::trim_right_space_length(v);
if l == v.len() {
Segment::new(self.start, self.start)
} else {
Segment::new_with_padding(self.start, self.stop - l, self.padding as usize)
}
}
pub fn trim_left_space(&self, source: &str) -> Segment {
let v = &source.as_bytes()[self.start..self.stop];
let l = util::trim_left_space_length(v);
Segment::new(self.start + l, self.stop)
}
pub fn trim_left_space_width(&self, mut width: isize, source: &str) -> Segment {
let mut padding = self.padding as isize;
while width > 0 && padding > 0 {
width -= 1;
padding -= 1;
}
if width == 0 {
return Segment::new_with_padding(self.start, self.stop, padding as usize);
}
let v = &source.as_bytes()[self.start..self.stop];
let mut start = self.start;
for &c in v {
if start >= self.stop - 1 || width == 0 {
break;
}
if c == b' ' {
width -= 1;
} else if c == b'\t' {
width -= 4;
} else {
break;
}
start += 1;
}
if width < 0 {
padding = -width;
}
Segment::new_with_padding(start, self.stop, padding as usize)
}
#[inline(always)]
pub fn with_start(&self, v: usize) -> Segment {
Segment::new_with_padding(v, self.stop, self.padding as usize)
}
#[inline(always)]
pub fn with_stop(&self, v: usize) -> Segment {
Segment::new_with_padding(self.start, v, self.padding as usize)
}
#[inline(always)]
pub fn with_padding(&self, v: usize) -> Segment {
Segment::new_with_padding(self.start, self.stop, v)
}
#[inline(always)]
pub fn with_force_newline(&self, v: bool) -> Segment {
Segment {
start: self.start,
stop: self.stop,
padding: self.padding,
force_newline: v,
}
}
#[inline(always)]
pub fn to_index(&self) -> Index {
Index::new(self.start, self.stop)
}
}
impl From<(usize, usize)> for Segment {
fn from((start, stop): (usize, usize)) -> Self {
Segment::new(start, stop)
}
}
impl From<(usize, usize, usize)> for Segment {
fn from((start, stop, padding): (usize, usize, usize)) -> Self {
Segment::new_with_padding(start, stop, padding)
}
}
impl From<Index> for Segment {
fn from(index: Index) -> Self {
Segment::new(index.start(), index.stop())
}
}
impl From<Segment> for Range<usize> {
fn from(segment: Segment) -> Self {
segment.start()..segment.stop()
}
}
pub const EOS: u8 = 0xff;
pub trait Reader<'a> {
fn source(&self) -> &'a str;
fn position(&self) -> (usize, Segment);
fn reset_position(&mut self);
fn set_position(&mut self, line: usize, pos: Segment);
fn set_padding(&mut self, padding: usize);
fn peek_byte(&self) -> u8;
fn peek_line_segment(&self) -> Option<Segment>;
fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)>;
fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)>;
fn advance(&mut self, n: usize);
fn advance_and_set_padding(&mut self, n: usize, padding: usize);
fn advance_line(&mut self);
fn advance_to_eol(&mut self);
fn line_offset(&mut self) -> usize;
fn precending_charater(&self) -> char;
fn skip_blank_lines(&mut self) -> Option<(Cow<'a, [u8]>, Segment)> {
loop {
match self.peek_line_bytes() {
None => return None,
Some((line, seg)) => {
if is_blank(&line) {
self.advance_line();
continue;
}
return Some((line, seg));
}
}
}
}
fn skip_while<F>(&mut self, mut f: F) -> usize
where
F: FnMut(u8) -> bool,
{
let mut i = 0usize;
loop {
let b = self.peek_byte();
if b == EOS {
break;
}
if f(b) {
i += 1;
self.advance(1);
continue;
}
break;
}
i
}
fn skip_spaces(&mut self) -> usize {
self.skip_while(is_space)
}
}
pub struct BasicReader<'a> {
source: &'a str,
bsource: &'a [u8],
source_length: usize,
line: Option<usize>,
pos: Segment,
head: usize,
line_offset: Option<usize>,
}
impl<'a> BasicReader<'a> {
pub fn new(source: &'a str) -> Self {
let bsource: &[u8] = source.as_bytes();
let source_length = bsource.len();
let mut b = BasicReader {
source,
bsource,
source_length,
line: None,
pos: Segment::new(0, 0),
head: 0,
line_offset: None,
};
b.reset_position();
b
}
pub unsafe fn new_unchecked(source: &'a [u8]) -> Self {
Self::new(core::str::from_utf8_unchecked(source))
}
}
impl<'a> Reader<'a> for BasicReader<'a> {
fn source(&self) -> &'a str {
self.source
}
fn position(&self) -> (usize, Segment) {
(self.line.unwrap_or(0), self.pos)
}
fn reset_position(&mut self) {
self.line = None;
self.head = 0;
self.line_offset = None;
self.advance_line();
}
fn set_position(&mut self, line: usize, pos: Segment) {
self.line = Some(line);
self.pos = pos;
self.head = pos.start;
self.line_offset = None;
}
fn set_padding(&mut self, padding: usize) {
self.pos.padding = padding as u8;
}
fn peek_byte(&self) -> u8 {
if self.source_length == 0 {
return EOS;
}
if self.pos.padding() != 0 {
return SPACE[0];
}
if self.pos.start() < self.source_length {
return self.bsource[self.pos.start()];
}
EOS
}
fn peek_line_segment(&self) -> Option<Segment> {
if self.source_length == 0 {
return None;
}
if self.pos.start() < self.source_length {
return Some(self.pos);
}
None
}
fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)> {
if self.source_length == 0 {
return None;
}
if self.pos.start() < self.source_length {
return Some((self.pos.bytes(self.source), self.pos));
}
None
}
fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)> {
if self.source_length == 0 {
return None;
}
if self.pos.start() < self.source_length {
return Some((self.pos.str(self.source), self.pos));
}
None
}
fn advance(&mut self, n: usize) {
if self.source_length == 0 {
return;
}
self.line_offset = None;
if n < self.pos.len() && self.pos.padding() == 0 {
self.pos.start += n;
return;
}
let mut n = n;
while n > 0 && self.pos.start < self.source_length {
if self.pos.padding != 0 {
self.pos.padding -= 1;
n -= 1;
continue;
}
if self.bsource[self.pos.start] == b'\n' {
self.advance_line();
n -= 1;
continue;
}
self.pos.start += 1;
n -= 1;
}
}
fn advance_and_set_padding(&mut self, n: usize, padding: usize) {
self.advance(n);
if padding > self.pos.padding() {
self.set_padding(padding);
}
}
fn advance_line(&mut self) {
self.line_offset = None;
if self.source_length == 0 || self.pos.start >= self.source_length {
return;
}
if self.line.is_some() {
self.pos.start = self.pos.stop;
if self.pos.start >= self.source_length {
return;
}
self.pos.stop = self.source_length;
if self.bsource[self.pos.start] != b'\n' {
if let Some(i) = memchr(b'\n', &self.bsource[self.pos.start..]) {
self.pos.stop = self.pos.start + i + 1;
}
} else {
self.pos.stop = self.pos.start + 1;
}
self.line = Some(self.line.unwrap() + 1);
} else {
if let Some(i) = memchr(b'\n', self.bsource) {
self.pos = (0, i + 1).into();
} else {
self.pos = (0, self.source_length).into();
}
self.line = Some(0);
}
self.head = self.pos.start;
self.pos.padding = 0;
}
fn advance_to_eol(&mut self) {
if self.source_length == 0 || self.pos.start >= self.source_length {
return;
}
self.line_offset = None;
if let Some(i) = memchr(b'\n', &self.bsource[self.pos.start..]) {
self.pos.start += i;
} else {
self.pos.start = self.source_length;
}
self.pos.padding = 0;
}
fn line_offset(&mut self) -> usize {
if self.line_offset.is_none() {
let mut v = 0;
for i in self.head..self.pos.start {
if self.bsource[i] == b'\t' {
v += util::tab_width(v);
} else {
v += 1;
}
}
v -= self.pos.padding();
self.line_offset = Some(v);
}
self.line_offset.unwrap_or(0)
}
fn precending_charater(&self) -> char {
if self.pos.padding() != 0 {
return ' ';
}
if self.pos.start() == 0 {
return '\n';
}
let mut i = self.pos.start() - 1;
loop {
if let Some(l) = utf8_len(self.bsource[i]) {
if l == 1 {
return self.bsource[i] as char;
}
return str::from_utf8(&self.bsource[i..i + l])
.ok()
.and_then(|s| s.chars().next())
.unwrap_or('\u{FFFD}');
}
i -= 1;
if i == 0 {
break;
}
}
'\u{FFFD}'
}
}
pub struct BlockReader<'a> {
source: &'a str,
bsource: &'a [u8],
block: &'a Block,
line: Option<usize>,
pos: Segment,
head: usize,
last: usize,
line_offset: Option<usize>,
}
impl<'a> BlockReader<'a> {
pub fn new(source: &'a str, block: &'a Block) -> Self {
let mut b = BlockReader {
source,
bsource: source.as_bytes(),
block,
line: None,
pos: Segment::new(0, 0),
head: 0,
last: 0,
line_offset: None,
};
b.reset(block);
b
}
pub unsafe fn new_unchecked(source: &'a [u8], block: &'a Block) -> Self {
Self::new(core::str::from_utf8_unchecked(source), block)
}
pub fn reset(&mut self, lines: &'a Block) {
self.block = lines;
self.reset_position();
}
pub fn between_current(&mut self, line: usize, pos: Segment) -> MultilineValue {
if line == self.line.unwrap_or(0) {
let seg = self.block[line];
if pos.start() >= seg.start() && self.pos.start() <= seg.stop() {
return block_to_values(BetweenBlockIterator::single(
pos.start()..self.pos.start(),
));
}
}
block_to_values(BetweenBlockIterator::multi(
BlockReader {
source: self.source,
bsource: self.bsource,
block: self.block,
line: self.line,
pos: self.pos,
head: self.head,
last: self.last,
line_offset: self.line_offset,
},
line,
pos,
))
}
pub fn between(&self, range: Range<usize>) -> MultilineValue {
let from_line = binary_search_block_pos(self.block, range.start).unwrap_or(0);
let mut from_pos = self.block[from_line];
if range.start >= from_pos.start() && range.end <= from_pos.stop() {
return block_to_values(BetweenBlockIterator::single(range));
}
let to_line =
binary_search_block_pos(self.block, range.end).unwrap_or(self.block.len() - 1);
let mut to_pos = self.block[to_line];
to_pos.start = range.end;
from_pos.start = range.start;
block_to_values(BetweenBlockIterator::multi(
BlockReader {
source: self.source,
bsource: self.bsource,
block: self.block,
line: Some(to_line),
pos: to_pos,
head: 0,
last: 0,
line_offset: None,
},
from_line,
from_pos,
))
}
}
struct MultilineBetweenBlock<'a> {
reader: BlockReader<'a>,
start_line: usize,
start_pos: Segment,
current_line: usize,
current_pos: Segment,
}
struct BetweenBlockIterator<'a> {
multi: Option<MultilineBetweenBlock<'a>>,
single: Option<Range<usize>>,
done: bool,
}
impl<'a> BetweenBlockIterator<'a> {
fn multi(mut reader: BlockReader<'a>, line: usize, pos: Segment) -> BetweenBlockIterator<'a> {
let (current_line, current_pos) = reader.position();
reader.set_position(line, pos);
BetweenBlockIterator {
multi: Some(MultilineBetweenBlock {
reader,
start_line: line,
start_pos: pos,
current_line,
current_pos,
}),
single: None,
done: false,
}
}
fn single(range: Range<usize>) -> BetweenBlockIterator<'a> {
BetweenBlockIterator {
multi: None,
single: Some(range),
done: false,
}
}
}
impl<'a> Iterator for BetweenBlockIterator<'a> {
type Item = Segment;
fn next(&mut self) -> Option<Self::Item> {
if self.done {
return None;
}
if let Some(s) = &self.single {
self.done = true;
return Some((s.start, s.end).into());
}
if let Some(m) = &mut self.multi {
let (ln, _) = m.reader.position();
let (_, segment) = m.reader.peek_line_bytes()?;
let start = if ln == m.start_line {
m.start_pos.start()
} else {
segment.start()
};
let stop = if ln == m.current_line {
m.current_pos.start()
} else {
segment.stop()
};
let seg = Segment::new(start, stop);
if ln == m.current_line {
m.reader.advance(stop - start);
self.done = true;
}
m.reader.advance_line();
return Some(seg);
}
None
}
}
impl<'a> Reader<'a> for BlockReader<'a> {
fn source(&self) -> &'a str {
self.source
}
fn position(&self) -> (usize, Segment) {
(self.line.unwrap_or(0), self.pos)
}
fn reset_position(&mut self) {
self.line = None;
self.head = 0;
self.last = 0;
self.line_offset = None;
self.pos.start = 0;
self.pos.stop = 0;
self.pos.padding = 0;
self.pos.force_newline = false;
if let Some(l) = self.block.last() {
self.last = l.stop;
}
self.advance_line();
}
fn set_position(&mut self, line: usize, pos: Segment) {
self.line_offset = None;
self.line = Some(line);
self.pos = pos;
if line < self.block.len() {
self.head = self.block[line].start;
}
}
fn set_padding(&mut self, padding: usize) {
self.line_offset = None;
self.pos.padding = padding as u8;
}
fn peek_byte(&self) -> u8 {
if self.bsource.is_empty() || self.block.is_empty() {
return EOS;
}
if self.pos.padding() != 0 {
return SPACE[0];
}
let l = self.line.unwrap();
if self.pos.is_empty() {
if l < self.block.len() - 1 {
let next = &self.block[l + 1];
if next.padding() != 0 {
return SPACE[0];
}
if next.start < self.bsource.len() {
return self.bsource[next.start];
}
}
return EOS;
} else if self.pos.start < self.bsource.len() {
return self.bsource[self.pos.start];
}
EOS
}
fn peek_line_segment(&self) -> Option<Segment> {
if self.bsource.is_empty() || self.block.is_empty() {
return None;
}
let l = self.line.unwrap();
if self.pos.is_empty() {
if l < self.block.len() - 1 {
let s = self.block[l + 1].start;
if s < self.bsource.len() {
return Some(self.block[l + 1]);
}
}
return None;
} else if self.pos.start < self.bsource.len() {
return Some(self.pos);
}
None
}
fn peek_line_bytes(&self) -> Option<(Cow<'a, [u8]>, Segment)> {
if self.bsource.is_empty() || self.block.is_empty() {
return None;
}
let l = self.line.unwrap();
if self.pos.is_empty() {
if l < self.block.len() - 1 {
let s = self.block[l + 1].start;
if s < self.bsource.len() {
return Some((self.block[l + 1].bytes(self.source), self.block[l + 1]));
}
}
return None;
} else if self.pos.start < self.bsource.len() {
return Some((self.pos.bytes(self.source), self.pos));
}
None
}
fn peek_line(&self) -> Option<(Cow<'a, str>, Segment)> {
if self.bsource.is_empty() || self.block.is_empty() {
return None;
}
let l = self.line.unwrap();
if self.pos.is_empty() {
if l < self.block.len() - 1 {
let s = self.block[l + 1].start;
if s < self.bsource.len() {
return Some((self.block[l + 1].str(self.source), self.block[l + 1]));
}
}
return None;
} else if self.pos.start < self.bsource.len() {
return Some((self.pos.str(self.source), self.pos));
}
None
}
fn advance(&mut self, n: usize) {
if self.bsource.is_empty() || self.block.is_empty() {
return;
}
self.line_offset = None;
if n < self.pos.len() && self.pos.padding() == 0 {
self.pos.start += n;
return;
}
let mut n = n;
while n > 0 && self.pos.start < self.last {
if self.pos.padding != 0 {
self.pos.padding -= 1;
n -= 1;
continue;
}
if self.pos.start >= self.pos.stop - 1 && self.pos.stop < self.last {
self.advance_line();
n -= 1;
continue;
}
self.pos.start += 1;
n -= 1;
}
}
fn advance_and_set_padding(&mut self, n: usize, padding: usize) {
self.advance(n);
if padding > self.pos.padding() {
self.set_padding(padding);
}
}
fn advance_line(&mut self) {
if self.bsource.is_empty() || self.block.is_empty() {
return;
}
let l = match self.line {
Some(l) => l + 1,
None => 0,
};
if l < self.block.len() {
self.set_position(l, self.block[l]);
} else {
self.pos.start = self.source().len();
self.pos.stop = self.pos.start;
self.pos.padding = 0;
}
}
fn advance_to_eol(&mut self) {
if self.bsource.is_empty() || self.block.is_empty() {
return;
}
self.line_offset = None;
let c = self.bsource[self.pos.stop - 1];
if c == b'\n' {
self.pos.start = self.pos.stop - 1;
} else {
self.pos.start = self.pos.stop;
}
}
fn line_offset(&mut self) -> usize {
if self.bsource.is_empty() || self.block.is_empty() {
return 0;
}
if self.line_offset.is_none() {
let mut v = 0;
for i in self.head..self.pos.start {
if self.bsource[i] == b'\t' {
v += util::tab_width(v);
} else {
v += 1;
}
}
v -= self.pos.padding();
self.line_offset = Some(v);
}
self.line_offset.unwrap_or(0)
}
fn precending_charater(&self) -> char {
if self.pos.padding() != 0 {
return ' ';
}
if self.pos.start() == 0 {
return '\n';
}
if self.block.is_empty() {
return '\n';
}
let first_line = &self.block[0];
if self.line.unwrap_or(0) == 0 && self.pos.start() <= first_line.start() {
return '\n';
}
let mut i = self.pos.start() - 1;
loop {
if let Some(l) = utf8_len(self.bsource[i]) {
if l == 1 {
return self.bsource[i] as char;
}
return str::from_utf8(&self.bsource[i..i + l])
.ok()
.and_then(|s| s.chars().next())
.unwrap_or('\u{FFFD}');
}
i -= 1;
if i == 0 {
break;
}
}
if i == 0 {
return '\n';
}
'\u{FFFD}'
}
}
#[cfg(test)]
mod tests {
use super::*;
#[allow(unused_imports)]
#[cfg(all(not(feature = "std"), feature = "no-std-unix-debug"))]
use crate::println;
#[test]
fn test_segment() {
let buffer = "Hello, world!";
let segment: Segment = (0, 5).into();
let s: &[u8] = &segment.bytes(buffer);
assert_eq!(s, b"Hello");
let segment_with_padding = Segment::new_with_padding(0, 5, 3);
let s: &[u8] = &segment_with_padding.bytes(buffer);
assert_eq!(s, b" Hello");
}
#[test]
fn test_raw() {
let buffer = "Hello, world!";
let index = Value::from((0, 5));
let s: &[u8] = index.bytes(buffer);
assert_eq!(s, b"Hello");
let raw_string = Value::from("Hello");
let s: &[u8] = raw_string.bytes(buffer);
assert_eq!(s, b"Hello");
let str: &str = index.str(buffer);
assert_eq!(str, "Hello");
let string = String::from("Hello");
let v = Value::from(string.as_str());
assert_eq!(v.str(buffer), "Hello");
}
#[test]
fn test_bytes_reader() {
let buffer = "Hello, world!\nThis is a test.\n";
let mut reader = BasicReader::new(buffer);
assert_eq!(reader.peek_byte(), b'H');
if let Some((line, segment)) = reader.peek_line_bytes() {
assert_eq!(line.as_ref(), b"Hello, world!\n");
assert_eq!(segment.start(), 0);
assert_eq!(segment.stop(), 14);
} else {
panic!("Expected a line");
}
reader.advance(7);
assert_eq!(reader.peek_byte(), b'w');
reader.advance_line();
assert_eq!(reader.peek_byte(), b'T');
if let Some((line, segment)) = reader.peek_line_bytes() {
assert_eq!(line.as_ref(), b"This is a test.\n");
assert_eq!(segment.start(), 14);
assert_eq!(segment.stop(), 30);
} else {
panic!("Expected a line");
}
reader.advance(100); assert_eq!(reader.peek_byte(), EOS);
assert!(reader.peek_line_bytes().is_none());
}
#[test]
fn test_bytes_reader_empty() {
let buffer = "";
let mut reader = BasicReader::new(buffer);
assert_eq!(reader.peek_byte(), EOS);
assert!(reader.peek_line_bytes().is_none());
reader.advance(10);
assert_eq!(reader.peek_byte(), EOS);
assert!(reader.peek_line_bytes().is_none());
reader.advance_line();
assert_eq!(reader.peek_byte(), EOS);
assert!(reader.peek_line_bytes().is_none());
}
#[test]
fn test_block_reader() {
let buffer = "Hello, world!\nThis is a test.\n";
let lines = [Segment::new(0, 14), Segment::new_with_padding(14, 30, 2)];
let mut reader = BlockReader::new(buffer, &lines);
assert_eq!(reader.peek_byte(), b'H');
if let Some((line, segment)) = reader.peek_line_bytes() {
assert_eq!(line.as_ref(), b"Hello, world!\n");
assert_eq!(segment.start(), 0);
assert_eq!(segment.stop(), 14);
} else {
panic!("Expected a line");
}
reader.advance(13);
assert_eq!(reader.peek_byte(), b'\n');
reader.advance(1);
assert_eq!(reader.peek_byte(), SPACE[0]);
if let Some((line, segment)) = reader.peek_line_bytes() {
assert_eq!(line.as_ref(), b" This is a test.\n");
assert_eq!(segment.start(), 14);
assert_eq!(segment.stop(), 30);
assert_eq!(segment.padding(), 2);
} else {
panic!("Expected a line");
}
reader.advance(3);
assert_eq!(reader.peek_byte(), b'h');
reader.advance(100); assert_eq!(reader.peek_byte(), EOS);
assert!(reader.peek_line_bytes().is_none());
}
#[test]
fn test_block_reader_empty() {
let buffer = "";
let lines: [Segment; 0] = [];
let mut reader = BlockReader::new(buffer, &lines);
assert_eq!(reader.peek_byte(), EOS);
assert!(reader.peek_line_bytes().is_none());
reader.advance(10);
assert_eq!(reader.peek_byte(), EOS);
assert!(reader.peek_line_bytes().is_none());
reader.advance_line();
assert_eq!(reader.peek_byte(), EOS);
assert!(reader.peek_line_bytes().is_none());
}
}