use crate::converter::{ConversionOptions, ConversionResult, Converter};
use crate::error::ConvertError;
use crate::markdown::{build_table, build_table_plain};
pub struct CsvConverter;
impl Converter for CsvConverter {
fn supported_extensions(&self) -> &[&str] {
&["csv"]
}
fn convert(
&self,
data: &[u8],
_options: &ConversionOptions,
) -> Result<ConversionResult, ConvertError> {
let (text, encoding_warning) = super::decode_text(data);
let mut reader = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(text.as_bytes());
let mut records = reader.records();
let header_record = match records.next() {
Some(Ok(rec)) => rec,
Some(Err(e)) => {
return Err(ConvertError::MalformedDocument {
reason: format!("failed to parse CSV header: {e}"),
});
}
None => {
return Ok(ConversionResult {
markdown: String::new(),
..Default::default()
});
}
};
let headers: Vec<String> = header_record.iter().map(|s| s.to_string()).collect();
let mut rows: Vec<Vec<String>> = Vec::new();
for result in records {
match result {
Ok(record) => {
let row: Vec<String> = record.iter().map(|s| s.to_string()).collect();
rows.push(row);
}
Err(e) => {
return Err(ConvertError::MalformedDocument {
reason: format!("failed to parse CSV row: {e}"),
});
}
}
}
let header_refs: Vec<&str> = headers.iter().map(|s| s.as_str()).collect();
let row_refs: Vec<Vec<&str>> = rows
.iter()
.map(|row| row.iter().map(|s| s.as_str()).collect())
.collect();
let markdown = build_table(&header_refs, &row_refs);
let plain_text = build_table_plain(&header_refs, &row_refs);
let mut warnings = Vec::new();
if let Some(w) = encoding_warning {
warnings.push(w);
}
Ok(ConversionResult {
markdown,
plain_text,
warnings,
..Default::default()
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_csv_simple_table() {
let converter = CsvConverter;
let input = b"A,B,C\n1,2,3\n4,5,6\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.markdown.contains("| A | B | C |"));
assert!(result.markdown.contains("|---|---|---|"));
assert!(result.markdown.contains("| 1 | 2 | 3 |"));
assert!(result.markdown.contains("| 4 | 5 | 6 |"));
}
#[test]
fn test_csv_single_row_header_only() {
let converter = CsvConverter;
let input = b"X,Y,Z\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.markdown.contains("| X | Y | Z |"));
assert!(result.markdown.contains("|---|---|---|"));
let lines: Vec<&str> = result.markdown.lines().collect();
assert_eq!(lines.len(), 2);
}
#[test]
fn test_csv_single_column() {
let converter = CsvConverter;
let input = b"Name\nAlice\nBob\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.markdown.contains("| Name |"));
assert!(result.markdown.contains("| Alice |"));
assert!(result.markdown.contains("| Bob |"));
}
#[test]
fn test_csv_empty_input() {
let converter = CsvConverter;
let result = converter
.convert(b"", &ConversionOptions::default())
.unwrap();
assert_eq!(result.markdown, "");
}
#[test]
fn test_csv_unicode_cjk() {
let converter = CsvConverter;
let input = "이름,나이\n홍길동,30\n田中,25\n".as_bytes();
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.markdown.contains("홍길동"));
assert!(result.markdown.contains("田中"));
assert!(result.markdown.contains("이름"));
}
#[test]
fn test_csv_emoji() {
let converter = CsvConverter;
let input = "Symbol,Meaning\n🚀,Rocket\n✨,Sparkle\n".as_bytes();
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.markdown.contains("🚀"));
assert!(result.markdown.contains("✨"));
}
#[test]
fn test_csv_quoted_fields() {
let converter = CsvConverter;
let input = b"Name,City\nAlice,\"New York\"\nBob,\"San Francisco\"\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.markdown.contains("New York"));
assert!(result.markdown.contains("San Francisco"));
}
#[test]
fn test_csv_short_rows_padded() {
let converter = CsvConverter;
let input = b"A,B,C\n1\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.markdown.contains("| 1 | | |"));
}
#[test]
fn test_csv_whitespace_in_cells() {
let converter = CsvConverter;
let input = b"Key,Value\n hello , world \n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.markdown.contains(" hello "));
assert!(result.markdown.contains(" world "));
}
#[test]
fn test_csv_supported_extensions() {
let converter = CsvConverter;
assert!(converter.supported_extensions().contains(&"csv"));
assert!(!converter.supported_extensions().contains(&"txt"));
}
#[test]
fn test_csv_can_convert() {
let converter = CsvConverter;
assert!(converter.can_convert("csv", &[]));
assert!(!converter.can_convert("json", &[]));
}
#[test]
fn test_csv_no_title_images_warnings() {
let converter = CsvConverter;
let result = converter
.convert(b"A\n1\n", &ConversionOptions::default())
.unwrap();
assert!(result.title.is_none());
assert!(result.images.is_empty());
assert!(result.warnings.is_empty());
}
#[test]
fn test_csv_pipe_in_cell_escaped() {
let converter = CsvConverter;
let input = b"Name,Command\nAlice,echo \"hello\" | grep h\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(
!result.markdown.contains("| echo \"hello\" | grep h |"),
"raw pipe in cell should be escaped, got: {}",
result.markdown
);
assert!(result.markdown.contains("grep h"));
}
#[test]
fn test_csv_plain_text_tab_separated() {
let converter = CsvConverter;
let input = b"Name,Age,City\nAlice,30,Seoul\nBob,25,Tokyo\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.plain_text.contains("Name\tAge\tCity"));
assert!(result.plain_text.contains("Alice\t30\tSeoul"));
assert!(result.plain_text.contains("Bob\t25\tTokyo"));
assert!(!result.plain_text.contains("|"));
assert!(!result.plain_text.contains("---"));
}
#[test]
fn test_csv_plain_text_pipe_in_cell_preserved() {
let converter = CsvConverter;
let input = b"Name,Command\nAlice,\"echo | grep\"\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.plain_text.contains("echo | grep"));
}
#[test]
fn test_csv_non_utf8_decoded_with_warning() {
let converter = CsvConverter;
let input = b"Name,City\nAlice,Montr\xe9al\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.markdown.contains("Montr\u{00e9}al"));
assert!(!result.warnings.is_empty());
}
#[test]
fn test_csv_multiline_quoted_field() {
let converter = CsvConverter;
let input = b"Name,Bio\nAlice,\"Line one\nLine two\"\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(
result.markdown.contains("Line one<br>Line two"),
"multiline cell should use <br>, got: {}",
result.markdown
);
}
#[test]
fn test_csv_escaped_quotes_in_field() {
let converter = CsvConverter;
let input = b"Name,Quote\nAlice,\"She said \"\"hello\"\"\"\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(
result.markdown.contains("She said \"hello\""),
"escaped quotes should be unescaped, got: {}",
result.markdown
);
}
#[test]
fn test_csv_multiline_with_unicode() {
let converter = CsvConverter;
let input = "Name,Note\n홍길동,\"첫째 줄\n둘째 줄 🎉\"\n".as_bytes();
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.markdown.contains("홍길동"));
assert!(
result.markdown.contains("첫째 줄<br>둘째 줄 🎉"),
"CJK + emoji multiline should work, got: {}",
result.markdown
);
}
#[test]
fn test_csv_crlf_in_quoted_field() {
let converter = CsvConverter;
let input = b"A,B\nX,\"line1\r\nline2\"\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(
result.markdown.contains("line1<br>line2"),
"CRLF in quoted field should become <br>, got: {}",
result.markdown
);
}
#[test]
fn test_csv_pipe_and_newline_combined() {
let converter = CsvConverter;
let input = b"Cmd,Output\ntest,\"echo | grep\nhello\"\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(
result.markdown.contains("\\|"),
"pipe should be escaped, got: {}",
result.markdown
);
assert!(
result.markdown.contains("<br>"),
"newline should become <br>, got: {}",
result.markdown
);
}
#[test]
fn test_csv_backslash_in_quoted_field() {
let converter = CsvConverter;
let input = b"Path,Value\nroot,\"C:\\Users\\test\"\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(
result.markdown.contains("C:\\\\Users\\\\test"),
"backslashes should be escaped, got: {}",
result.markdown
);
}
#[test]
fn test_csv_empty_quoted_field() {
let converter = CsvConverter;
let input = b"A,B,C\n1,\"\",3\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(
result.markdown.contains("| 1 | | 3 |"),
"empty quoted field should be empty cell, got: {}",
result.markdown
);
}
#[test]
fn test_csv_plain_text_multiline_preserved() {
let converter = CsvConverter;
let input = b"Name,Bio\nAlice,\"Line one\nLine two\"\n";
let result = converter
.convert(input, &ConversionOptions::default())
.unwrap();
assert!(result.plain_text.contains("Name\tBio"));
assert!(
result.plain_text.contains("Alice\t"),
"plain text should have tab-separated cells, got: {}",
result.plain_text
);
assert!(
result.plain_text.contains("Line one") && result.plain_text.contains("Line two"),
"multiline content should be present in plain text, got: {}",
result.plain_text
);
}
}