use std::borrow::Cow;
use std::fmt;
use std::iter::{Iterator, Peekable};
use std::str::Utf8Error;
use std::string::FromUtf8Error;
#[derive(thiserror::Error, Debug, PartialEq, Eq)]
pub enum LineError {
#[error(transparent)]
Utf8Error(#[from] Utf8Error),
#[error(transparent)]
FromUtf8Error(#[from] FromUtf8Error),
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct Line<'a> {
pub inner: Cow<'a, str>,
number: usize,
}
impl<'a> PartialOrd for Line<'a> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl<'a> Ord for Line<'a> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.inner.cmp(&other.inner)
}
}
impl<'a> Line<'a> {
pub fn new(line: Cow<'a, str>, line_number: usize) -> Line<'a> {
Line {
inner: line,
number: line_number,
}
}
pub fn as_str(&self) -> &str {
self.inner.as_ref()
}
pub fn number(&self) -> usize {
self.number
}
}
impl<'a> fmt::Display for Line<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Line {}: {}", self.number, self.inner)
}
}
#[derive(Debug)]
pub struct BytesLines<'a>(&'a [u8]);
impl<'a> Iterator for BytesLines<'a> {
type Item = Cow<'a, [u8]>;
fn next(&mut self) -> Option<Self::Item> {
match self.0.iter().position(|val| val == &b'\n') {
Some(pos) => {
let line_end = if pos > 0 && self.0[pos - 1] == b'\r' {
pos - 1
} else {
pos
};
let line = &self.0[..line_end];
self.0 = self.0.split_at(pos + 1).1;
Some(Cow::Borrowed(line))
}
None if !self.0.is_empty() => {
let line = self.0;
self.0 = &[];
Some(Cow::Borrowed(line))
}
None => None,
}
}
}
pub struct LineReader<'a, I: Iterator<Item = Cow<'a, [u8]>>> {
lines: Peekable<I>,
number: usize,
}
impl<'a> LineReader<'a, BytesLines<'a>> {
pub fn from_slice(reader: &'a [u8]) -> LineReader<'a, BytesLines<'a>> {
LineReader {
lines: BytesLines(reader).peekable(),
number: 0,
}
}
}
impl<'a, T: Iterator<Item = Cow<'a, [u8]>>> Iterator for LineReader<'a, T> {
type Item = Result<Line<'a>, LineError>;
fn next(&mut self) -> Option<Self::Item> {
let (mut new_line, line_number) = loop {
let line = self.lines.next()?;
self.number += 1;
if !line.is_empty() {
break (line, self.number);
}
};
loop {
let Some(next) = self.lines.next_if(|line| {
line.starts_with(b" ") || line.starts_with(b"\t") || line.is_empty()
}) else {
break;
};
self.number += 1;
if !next.is_empty() {
new_line.to_mut().extend_from_slice(next.split_at(1).1);
}
}
let new_line = match new_line {
Cow::Owned(bytes) => Cow::Owned(match String::from_utf8(bytes) {
Ok(val) => val,
Err(err) => return Some(Err(err.into())),
}),
Cow::Borrowed(slice) => Cow::Borrowed(match str::from_utf8(slice) {
Ok(val) => val,
Err(err) => return Some(Err(err.into())),
}),
};
if new_line.is_empty() {
None
} else {
Some(Ok(Line::new(new_line, line_number)))
}
}
}
#[cfg(test)]
mod tests {
use super::{Line, LineReader};
use rstest::rstest;
#[rstest]
#[case("", vec![])]
#[case("\n", vec![])]
#[case("asd", vec![Line{inner: "asd".into(), number: 1}])]
#[case("asd\r\n ok", vec![Line{inner: "asd ok".into(), number: 1}])]
#[case("asd with linebreak\r\n \r\n ok", vec![Line{inner: "asd with linebreak ok".into(), number: 1}])]
#[case("weird with linebreak\r\n\r\n ok", vec![Line{inner: "weird with linebreak ok".into(), number: 1}])]
#[case("line1\r\n\r\nline2", vec![Line{inner: "line1".into(), number: 1}, Line{inner: "line2".into(), number: 3}])]
fn test_line_reader(#[case] input: &str, #[case] lines: Vec<Line>) {
let parsed_lines = LineReader::from_slice(input.as_bytes())
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(parsed_lines, lines);
}
}