use std::iter::FusedIterator;
pub const LF: char = '\u{a}';
pub const CR: char = '\u{d}';
pub const LS: char = '\u{2028}';
pub const PS: char = '\u{2029}';
pub fn is_regular_line_terminator(c: char) -> bool {
matches!(c, LF | CR)
}
pub fn is_irregular_line_terminator(c: char) -> bool {
matches!(c, LS | PS)
}
pub fn is_line_terminator(c: char) -> bool {
is_regular_line_terminator(c) || is_irregular_line_terminator(c)
}
const fn to_bytes<const N: usize>(ch: char) -> [u8; N] {
assert!(ch.len_utf8() == N);
let mut bytes = [0u8; N];
ch.encode_utf8(&mut bytes);
bytes
}
const LS_BYTES: [u8; 3] = to_bytes(LS);
const PS_BYTES: [u8; 3] = to_bytes(PS);
pub const LS_OR_PS_FIRST_BYTE: u8 = 0xE2;
pub const LS_LAST_2_BYTES: [u8; 2] = [LS_BYTES[1], LS_BYTES[2]];
pub const PS_LAST_2_BYTES: [u8; 2] = [PS_BYTES[1], PS_BYTES[2]];
pub struct LineTerminatorSplitter<'a> {
text: &'a str,
}
impl<'a> LineTerminatorSplitter<'a> {
pub fn new(text: &'a str) -> Self {
Self { text }
}
}
impl<'a> Iterator for LineTerminatorSplitter<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
if self.text.is_empty() {
return None;
}
for (index, &byte) in self.text.as_bytes().iter().enumerate() {
match byte {
b'\n' => {
unsafe {
let line = self.text.get_unchecked(..index);
self.text = self.text.get_unchecked(index + 1..);
return Some(line);
}
}
b'\r' => {
let line = unsafe { self.text.get_unchecked(..index) };
let skip_bytes =
if self.text.as_bytes().get(index + 1) == Some(&b'\n') { 2 } else { 1 };
self.text = unsafe { self.text.get_unchecked(index + skip_bytes..) };
return Some(line);
}
LS_OR_PS_FIRST_BYTE => {
let next2: [u8; 2] = {
let next2 =
unsafe { self.text.as_bytes().get_unchecked(index + 1..index + 3) };
next2.try_into().unwrap()
};
if matches!(next2, LS_LAST_2_BYTES | PS_LAST_2_BYTES) {
unsafe {
let line = self.text.get_unchecked(..index);
self.text = self.text.get_unchecked(index + 3..);
return Some(line);
}
}
}
_ => {}
}
}
let line = self.text;
self.text = "";
Some(line)
}
}
impl FusedIterator for LineTerminatorSplitter<'_> {}