use std::borrow::Cow;
pub(crate) fn normalize_line_endings(content: &str) -> Cow<'_, str> {
if !content.contains('\r') {
Cow::Borrowed(content)
} else {
Cow::Owned(content.replace("\r\n", "\n").replace('\r', "\n"))
}
}
pub(crate) struct RawBlocks<'a> {
src: &'a str,
pos: usize,
done: bool,
}
pub(crate) fn raw_blocks(content: &str) -> RawBlocks<'_> {
RawBlocks {
src: content,
pos: 0,
done: false,
}
}
impl<'a> Iterator for RawBlocks<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<&'a str> {
if self.done {
return None;
}
let bytes = self.src.as_bytes();
let start = self.pos;
let mut i = start;
while i < bytes.len() {
let first_term = term_len_at(bytes, i);
if first_term == 0 {
i += 1;
continue;
}
let after_first = i + first_term;
let second_term = if after_first < bytes.len() {
term_len_at(bytes, after_first)
} else {
0
};
if second_term > 0 {
let block = &self.src[start..i];
self.pos = after_first + second_term;
return Some(block);
}
i = after_first;
}
self.done = true;
Some(&self.src[start..])
}
}
fn term_len_at(bytes: &[u8], i: usize) -> usize {
match bytes[i] {
b'\r' => {
if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
2
} else {
1
}
}
b'\n' => 1,
_ => 0,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_lf_only_is_borrowed() {
let s = "abc\ndef\n\nghi";
let out = normalize_line_endings(s);
assert!(matches!(out, Cow::Borrowed(_)));
assert_eq!(out, s);
}
#[test]
fn normalize_crlf_collapses_to_lf() {
let s = "abc\r\ndef\r\n\r\nghi";
let out = normalize_line_endings(s);
assert_eq!(out, "abc\ndef\n\nghi");
}
#[test]
fn normalize_bare_cr_collapses_to_lf() {
let s = "abc\rdef\r\rghi";
let out = normalize_line_endings(s);
assert_eq!(out, "abc\ndef\n\nghi");
}
#[test]
fn normalize_mixed_line_endings() {
let s = "abc\r\ndef\nghi\r\rjkl\r\n\nmno";
let out = normalize_line_endings(s);
assert_eq!(out, "abc\ndef\nghi\n\njkl\n\nmno");
}
#[test]
fn raw_blocks_lf_only() {
let s = "a\nb\n\nc\nd";
let v: Vec<&str> = raw_blocks(s).collect();
assert_eq!(v, vec!["a\nb", "c\nd"]);
}
#[test]
fn raw_blocks_crlf() {
let s = "a\r\nb\r\n\r\nc\r\nd";
let v: Vec<&str> = raw_blocks(s).collect();
assert_eq!(v, vec!["a\r\nb", "c\r\nd"]);
}
#[test]
fn raw_blocks_bare_cr() {
let s = "a\rb\r\rc\rd";
let v: Vec<&str> = raw_blocks(s).collect();
assert_eq!(v, vec!["a\rb", "c\rd"]);
}
#[test]
fn raw_blocks_mixed_separators() {
let s = "a\r\nb\r\n\nc\r\nd\n\r\ne";
let v: Vec<&str> = raw_blocks(s).collect();
assert_eq!(v, vec!["a\r\nb", "c\r\nd", "e"]);
}
#[test]
fn raw_blocks_count_matches_normalized_split() {
let inputs = [
"a\nb\n\nc\nd",
"a\r\nb\r\n\r\nc\r\nd",
"a\rb\r\rc\rd",
"a\r\nb\r\n\nc\r\nd\n\r\ne",
"single block no separator",
"",
];
for s in inputs {
let raw_count = raw_blocks(s).count();
let normalized = normalize_line_endings(s);
let norm_count = normalized.split("\n\n").count();
assert_eq!(raw_count, norm_count, "input={s:?}");
}
}
}