use crate::metadata::Quote;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct PotentialDialect {
pub delimiter: u8,
pub quote: Quote,
pub line_terminator: LineTerminator,
}
impl PotentialDialect {
pub const fn new(delimiter: u8, quote: Quote, line_terminator: LineTerminator) -> Self {
Self {
delimiter,
quote,
line_terminator,
}
}
}
#[allow(clippy::upper_case_acronyms)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum LineTerminator {
LF,
CRLF,
CR,
}
impl LineTerminator {
#[allow(dead_code)]
pub const fn as_bytes(&self) -> &'static [u8] {
match self {
LineTerminator::LF => b"\n",
LineTerminator::CRLF => b"\r\n",
LineTerminator::CR => b"\r",
}
}
#[allow(dead_code)]
pub const fn as_str(&self) -> &'static str {
match self {
LineTerminator::LF => "\\n",
LineTerminator::CRLF => "\\r\\n",
LineTerminator::CR => "\\r",
}
}
}
pub const DELIMITERS: &[u8] = b",;\t| ^~#&\xa7/";
pub const QUOTES: &[Quote] = &[
Quote::Some(b'"'), Quote::Some(b'\''), Quote::None, ];
#[allow(dead_code)]
pub const LINE_TERMINATORS: &[LineTerminator] = &[
LineTerminator::CRLF, LineTerminator::LF, LineTerminator::CR, ];
#[allow(dead_code)]
pub fn generate_potential_dialects() -> Vec<PotentialDialect> {
let mut dialects = Vec::with_capacity(DELIMITERS.len() * QUOTES.len() * LINE_TERMINATORS.len());
for &delimiter in DELIMITERS {
for "e in QUOTES {
for &line_terminator in LINE_TERMINATORS {
dialects.push(PotentialDialect::new(delimiter, quote, line_terminator));
}
}
}
dialects
}
pub fn detect_line_terminator(data: &[u8]) -> LineTerminator {
let mut crlf_count = 0;
let mut lf_count = 0;
let mut cr_count = 0;
let mut i = 0;
while i < data.len() {
if data[i] == b'\r' {
if i + 1 < data.len() && data[i + 1] == b'\n' {
crlf_count += 1;
i += 2;
continue;
}
cr_count += 1;
} else if data[i] == b'\n' {
lf_count += 1;
}
i += 1;
}
if crlf_count > 0 && crlf_count >= lf_count && crlf_count >= cr_count {
LineTerminator::CRLF
} else if lf_count >= cr_count {
LineTerminator::LF
} else {
LineTerminator::CR
}
}
pub fn generate_dialects_with_terminator(line_terminator: LineTerminator) -> Vec<PotentialDialect> {
let mut dialects = Vec::with_capacity(DELIMITERS.len() * QUOTES.len());
for &delimiter in DELIMITERS {
for "e in QUOTES {
dialects.push(PotentialDialect::new(delimiter, quote, line_terminator));
}
}
dialects
}
pub fn normalize_line_endings(
data: &[u8],
line_terminator: LineTerminator,
) -> std::borrow::Cow<'_, [u8]> {
use std::borrow::Cow;
match line_terminator {
LineTerminator::LF => Cow::Borrowed(data), LineTerminator::CRLF => {
let mut result = Vec::with_capacity(data.len());
let mut i = 0;
while i < data.len() {
if i + 1 < data.len() && data[i] == b'\r' && data[i + 1] == b'\n' {
result.push(b'\n');
i += 2;
} else {
result.push(data[i]);
i += 1;
}
}
Cow::Owned(result)
}
LineTerminator::CR => {
Cow::Owned(
data.iter()
.map(|&b| if b == b'\r' { b'\n' } else { b })
.collect(),
)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_generate_potential_dialects() {
let dialects = generate_potential_dialects();
assert_eq!(dialects.len(), 99); }
#[test]
fn test_detect_line_terminator() {
assert_eq!(detect_line_terminator(b"a,b\nc,d\n"), LineTerminator::LF);
assert_eq!(
detect_line_terminator(b"a,b\r\nc,d\r\n"),
LineTerminator::CRLF
);
assert_eq!(detect_line_terminator(b"a,b\rc,d\r"), LineTerminator::CR);
}
}