use std::fmt::Write;
use {
reovim_driver_annotation::{Annotation, AnnotationKind, AnnotationPayload, AnnotationTarget},
reovim_driver_codec::{CodecError, CodecMetadata, ContentType, DecodeResult},
};
pub const CSV_HEADER_KIND: &str = "content.csv.header";
pub const CSV_COLUMN_KIND: &str = "content.csv.column";
pub struct CsvCodec {
delimiter: u8,
content_type: &'static str,
}
impl CsvCodec {
#[must_use]
pub const fn new(delimiter: u8, content_type: &'static str) -> Self {
Self {
delimiter,
content_type,
}
}
#[must_use]
pub const fn delimiter(&self) -> u8 {
self.delimiter
}
}
impl reovim_driver_codec::ContentCodec for CsvCodec {
#[cfg_attr(coverage_nightly, coverage(off))]
fn decode(&self, raw: &[u8]) -> Result<DecodeResult, CodecError> {
let text = std::str::from_utf8(raw)
.map_err(|e| CodecError::Other(format!("CSV decode: invalid UTF-8: {e}")))?;
let (rows, has_header) = parse_csv(text, self.delimiter);
let (content, annotations) = format_table(&rows, has_header);
let mut metadata = CodecMetadata::new(ContentType::new(self.content_type));
metadata.set("delimiter", String::from(self.delimiter as char));
metadata.set("has_header", has_header.to_string());
let line_ending = if text.contains("\r\n") { "crlf" } else { "lf" };
metadata.set("line_ending", line_ending);
Ok(DecodeResult {
content,
annotations,
metadata,
lossy: false,
readonly: false,
})
}
#[cfg_attr(coverage_nightly, coverage(off))]
fn encode(
&self,
content: &str,
metadata: &CodecMetadata,
) -> Option<Result<Vec<u8>, CodecError>> {
let delimiter = metadata
.get("delimiter")
.and_then(|s| s.chars().next())
.unwrap_or(self.delimiter as char);
let line_ending = match metadata.get("line_ending") {
Some("crlf") => "\r\n",
_ => "\n",
};
Some(Ok(encode_csv(content, delimiter, line_ending)))
}
}
fn parse_csv(text: &str, delimiter: u8) -> (Vec<Vec<String>>, bool) {
let mut reader = csv::ReaderBuilder::new()
.delimiter(delimiter)
.has_headers(false)
.flexible(true)
.from_reader(text.as_bytes());
let rows: Vec<Vec<String>> = reader
.records()
.filter_map(Result::ok)
.map(|record| record.iter().map(String::from).collect())
.collect();
let has_header = detect_header(&rows);
(rows, has_header)
}
#[cfg_attr(coverage_nightly, coverage(off))]
fn detect_header(rows: &[Vec<String>]) -> bool {
if rows.len() < 2 {
return false;
}
let first_row = &rows[0];
let first_numeric = first_row
.iter()
.filter(|f| f.parse::<f64>().is_ok())
.count();
if let Some(second_row) = rows.get(1) {
let second_numeric = second_row
.iter()
.filter(|f| f.parse::<f64>().is_ok())
.count();
return first_numeric < second_numeric;
}
false
}
#[cfg_attr(coverage_nightly, coverage(off))]
fn format_table(rows: &[Vec<String>], has_header: bool) -> (String, Vec<Annotation>) {
if rows.is_empty() {
return (String::new(), Vec::new());
}
let col_count = rows.iter().map(Vec::len).max().unwrap_or(0);
let mut widths = vec![0_usize; col_count];
for row in rows {
for (i, field) in row.iter().enumerate() {
if i < col_count {
widths[i] = widths[i].max(field.len());
}
}
}
for w in &mut widths {
*w = (*w).max(3);
}
let mut output = String::with_capacity(rows.len() * col_count * 10);
let mut annotations = Vec::new();
let header_kind = AnnotationKind::new(CSV_HEADER_KIND);
let column_kind = AnnotationKind::new(CSV_COLUMN_KIND);
for (line_idx, row) in rows.iter().enumerate() {
for (col_idx, field) in row.iter().enumerate() {
if col_idx > 0 {
output.push_str(" "); }
let width = widths.get(col_idx).copied().unwrap_or(3);
let _ = write!(output, "{field:<width$}");
}
output.push('\n');
if line_idx == 0 && has_header {
annotations.push(Annotation {
kind: header_kind.clone(),
target: AnnotationTarget::Line(line_idx),
priority: 0,
payload: AnnotationPayload::None,
});
}
annotations.push(Annotation {
kind: column_kind.clone(),
target: AnnotationTarget::Line(line_idx),
priority: 0,
payload: AnnotationPayload::Number(row.len()),
});
}
(output, annotations)
}
#[cfg_attr(coverage_nightly, coverage(off))]
fn encode_csv(content: &str, delimiter: char, line_ending: &str) -> Vec<u8> {
let mut result = Vec::with_capacity(content.len());
for line in content.lines() {
let fields: Vec<&str> = split_aligned_fields(line);
for (i, field) in fields.iter().enumerate() {
if i > 0 {
result.push(delimiter as u8);
}
let trimmed = field.trim();
if trimmed.contains(delimiter)
|| trimmed.contains('"')
|| trimmed.contains('\n')
|| trimmed.contains('\r')
{
result.push(b'"');
for ch in trimmed.bytes() {
if ch == b'"' {
result.push(b'"');
}
result.push(ch);
}
result.push(b'"');
} else {
result.extend_from_slice(trimmed.as_bytes());
}
}
result.extend_from_slice(line_ending.as_bytes());
}
result
}
#[cfg_attr(coverage_nightly, coverage(off))]
fn split_aligned_fields(line: &str) -> Vec<&str> {
if line.is_empty() {
return Vec::new();
}
let mut fields = Vec::new();
let mut start = 0;
let bytes = line.as_bytes();
let mut i = 0;
while i < bytes.len() {
if i + 1 < bytes.len() && bytes[i] == b' ' && bytes[i + 1] == b' ' {
fields.push(line[start..i].trim_end());
while i < bytes.len() && bytes[i] == b' ' {
i += 1;
}
start = i;
} else {
i += 1;
}
}
if start < bytes.len() {
fields.push(line[start..].trim_end());
} else if !fields.is_empty() {
fields.push("");
}
fields
}
#[cfg(test)]
#[path = "codec_tests.rs"]
mod tests;