use reovim_driver_codec::{ContentClassifier, ContentType};
pub const CSV: &str = "text/csv";
pub const TSV: &str = "text/tsv";
pub const PSV: &str = "text/psv";
pub const SCSV: &str = "text/scsv";
const MIN_ROWS: usize = 2;
const MIN_COLUMNS: usize = 2;
const SAMPLE_SIZE: usize = 8192;
const CSV_EXTENSIONS: &[(&str, &str)] = &[("csv", CSV), ("tsv", TSV), ("tab", TSV), ("psv", PSV)];
const DELIMITERS: &[(u8, &str)] = &[
(b',', CSV),
(b'\t', TSV),
(b'|', PSV),
(b';', SCSV), ];
pub struct CsvClassifier;
impl CsvClassifier {
#[must_use]
pub const fn new() -> Self {
Self
}
}
#[cfg_attr(coverage_nightly, coverage(off))]
impl Default for CsvClassifier {
fn default() -> Self {
Self::new()
}
}
impl ContentClassifier for CsvClassifier {
#[cfg_attr(coverage_nightly, coverage(off))]
fn classify(&self, raw: &[u8], path: &str) -> Option<ContentType> {
if let Some(ct) = extension_content_type(path) {
return Some(ContentType::new(ct));
}
let sample = if raw.len() > SAMPLE_SIZE {
&raw[..SAMPLE_SIZE]
} else {
raw
};
let text = std::str::from_utf8(sample).ok()?;
let lines: Vec<&str> = text.lines().collect();
if lines.len() < MIN_ROWS {
return None;
}
for &(delim, content_type) in DELIMITERS {
if is_consistent_delimiter(&lines, delim) {
return Some(ContentType::new(content_type));
}
}
None
}
fn priority(&self) -> u8 {
15
}
fn name(&self) -> &'static str {
"csv"
}
}
fn is_consistent_delimiter(lines: &[&str], delim: u8) -> bool {
let delim_char = delim as char;
let mut expected_count = 0;
let mut valid_lines = 0;
for line in lines {
if line.is_empty() {
continue;
}
let count = line.matches(delim_char).count() + 1;
if count < MIN_COLUMNS {
return false;
}
if valid_lines == 0 {
expected_count = count;
} else if count != expected_count {
return false;
}
valid_lines += 1;
}
valid_lines >= MIN_ROWS
}
fn extension_content_type(path: &str) -> Option<&'static str> {
let ext = std::path::Path::new(path).extension()?;
let ext_str = ext.to_str()?;
let lower = ext_str.to_ascii_lowercase();
CSV_EXTENSIONS
.iter()
.find(|(e, _)| *e == lower.as_str())
.map(|(_, ct)| *ct)
}
#[cfg(test)]
#[path = "classifier_tests.rs"]
mod tests;