#![allow(clippy::uninlined_format_args)]
use std::path::Path;
use async_trait::async_trait;
use tokio::fs;
use super::traits::{ConverterMetadata, DocumentConverter};
use crate::Result;
use crate::types::{
ConversionOptions, ConversionOutput, ConversionResult, FileFormat, OutputFormat, OutputMetadata,
};
#[derive(Debug)]
pub struct CsvConverter {
delimiter: char,
}
impl CsvConverter {
pub fn new() -> Self {
Self { delimiter: ',' }
}
pub fn new_tsv() -> Self {
Self { delimiter: '\t' }
}
fn csv_to_markdown(&self, csv: &str) -> String {
let lines: Vec<&str> = csv.lines().filter(|l| !l.trim().is_empty()).collect();
if lines.is_empty() {
return "# Empty File\n".to_string();
}
let mut markdown = String::new();
markdown.push_str("# Data Table\n\n");
for (idx, line) in lines.iter().enumerate() {
let cells: Vec<&str> = line.split(self.delimiter).collect();
if idx == 0 {
markdown.push('|');
for cell in &cells {
markdown.push_str(&format!(" {} |", cell.trim()));
}
markdown.push('\n');
markdown.push('|');
for _ in &cells {
markdown.push_str("---|");
}
markdown.push('\n');
} else {
markdown.push('|');
for cell in &cells {
markdown.push_str(&format!(" {} |", cell.trim()));
}
markdown.push('\n');
}
}
markdown.push('\n');
markdown
}
fn csv_to_json(&self, csv: &str) -> Result<String> {
let lines: Vec<&str> = csv.lines().filter(|l| !l.trim().is_empty()).collect();
if lines.is_empty() {
return Ok(serde_json::json!({"data": []}).to_string());
}
let headers: Vec<String> = lines[0]
.split(self.delimiter)
.map(|h| h.trim().to_string())
.collect();
let mut rows = Vec::new();
for line in &lines[1..] {
let cells: Vec<&str> = line.split(self.delimiter).collect();
let mut row = serde_json::Map::new();
for (idx, cell) in cells.iter().enumerate() {
if idx < headers.len() {
row.insert(
headers[idx].clone(),
serde_json::Value::String(cell.trim().to_string()),
);
}
}
rows.push(row);
}
let result = serde_json::json!({
"data": rows,
"row_count": rows.len(),
"column_count": headers.len(),
"headers": headers,
});
Ok(serde_json::to_string_pretty(&result)?)
}
}
impl Default for CsvConverter {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl DocumentConverter for CsvConverter {
fn supported_formats(&self) -> Vec<FileFormat> {
vec![FileFormat::Csv, FileFormat::Tsv]
}
fn output_formats(&self) -> Vec<OutputFormat> {
vec![
OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
},
OutputFormat::Json {
structured: true,
include_metadata: true,
},
]
}
async fn convert(
&self,
input: &Path,
output_format: OutputFormat,
_options: ConversionOptions,
) -> Result<ConversionResult> {
eprintln!("🔄 CSV/TSV Conversion (Pure Rust)");
eprintln!(" CSV → Parsing → {:?}", output_format);
eprintln!();
let csv_content = fs::read_to_string(input).await?;
let output_data = match output_format {
OutputFormat::Markdown { .. } => {
eprintln!("📝 Converting to Markdown table...");
let markdown = self.csv_to_markdown(&csv_content);
markdown.into_bytes()
}
OutputFormat::Json { .. } => {
eprintln!("📝 Converting to JSON...");
let json = self.csv_to_json(&csv_content)?;
json.into_bytes()
}
_ => {
return Err(crate::TransmutationError::UnsupportedFormat(format!(
"Output format {:?} not supported for CSV",
output_format
)));
}
};
let output_size = output_data.len() as u64;
let input_size = fs::metadata(input).await?.len();
eprintln!("✅ CSV conversion complete!");
Ok(ConversionResult {
input_path: input.to_path_buf(),
input_format: if self.delimiter == ',' {
FileFormat::Csv
} else {
FileFormat::Tsv
},
output_format,
content: vec![ConversionOutput {
page_number: 1,
data: output_data,
metadata: OutputMetadata {
size_bytes: output_size,
chunk_count: 1,
token_count: None,
},
}],
metadata: crate::types::DocumentMetadata {
title: None,
author: None,
created: None,
modified: None,
page_count: 1,
language: None,
custom: std::collections::HashMap::new(),
},
statistics: crate::types::ConversionStatistics {
input_size_bytes: input_size,
output_size_bytes: output_size,
duration: std::time::Duration::from_secs(0),
pages_processed: 1,
tables_extracted: 1,
images_extracted: 0,
cache_hit: false,
},
})
}
fn metadata(&self) -> ConverterMetadata {
ConverterMetadata {
name: "CSV/TSV Converter".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
description: "CSV/TSV to Markdown tables and JSON (pure Rust)".to_string(),
external_deps: vec![],
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_csv_converter_creation() {
let converter = CsvConverter::new();
let formats = converter.supported_formats();
assert!(formats.contains(&FileFormat::Csv));
assert!(formats.contains(&FileFormat::Tsv));
}
#[test]
fn test_csv_to_markdown_basic() {
let converter = CsvConverter::new();
let csv = "Name,Age\nAlice,30\nBob,25";
let result = converter.csv_to_markdown(csv);
assert!(result.contains("Name"));
assert!(result.contains("Alice"));
}
#[test]
fn test_csv_converter_metadata() {
let converter = CsvConverter::new();
let meta = converter.metadata();
assert_eq!(meta.name, "CSV/TSV Converter");
}
}