use std::collections::HashMap;
use std::io;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CsvRowError {
pub line: u64,
pub column: Option<String>,
pub message: String,
}
impl CsvRowError {
#[must_use]
pub fn row(line: u64, message: impl Into<String>) -> Self {
Self {
line,
column: None,
message: message.into(),
}
}
#[must_use]
pub fn field(line: u64, column: impl Into<String>, message: impl Into<String>) -> Self {
Self {
line,
column: Some(column.into()),
message: message.into(),
}
}
}
#[non_exhaustive]
#[derive(Debug, Default, Clone)]
pub struct ImportReport {
pub inserted: u64,
pub updated: u64,
pub skipped: u64,
pub errors: Vec<CsvRowError>,
}
impl ImportReport {
#[must_use]
pub const fn total_rows(&self) -> u64 {
self.inserted + self.updated + self.skipped + self.errors.len() as u64
}
#[must_use]
pub const fn is_ok(&self) -> bool {
self.errors.is_empty()
}
}
#[non_exhaustive]
#[derive(Debug, Clone)]
pub enum ImportMode {
Insert,
Upsert {
by: Vec<String>,
},
DryRun,
}
#[derive(Debug, Clone)]
pub struct ImportOptions {
pub mode: ImportMode,
pub batch_size: usize,
}
impl Default for ImportOptions {
fn default() -> Self {
Self {
mode: ImportMode::Insert,
batch_size: 500,
}
}
}
pub enum ImportRowResult {
Inserted,
Updated,
Skipped,
RowError(String),
FieldError { column: String, message: String },
}
pub trait CsvSchema {
fn csv_columns() -> &'static [&'static str]
where
Self: Sized;
fn to_csv_record(&self) -> Vec<String>;
}
pub fn export_csv<T, W>(records: impl IntoIterator<Item = T>, mut writer: W) -> io::Result<()>
where
T: CsvSchema,
W: io::Write,
{
let mut wtr = csv::WriterBuilder::new()
.has_headers(true)
.from_writer(&mut writer);
wtr.write_record(T::csv_columns())?;
for record in records {
wtr.write_record(record.to_csv_record())?;
}
wtr.flush()?;
Ok(())
}
pub fn import_csv<R, F>(reader: R, opts: &ImportOptions, mut handler: F) -> ImportReport
where
R: io::Read,
F: FnMut(u64, HashMap<String, String>, &ImportMode) -> ImportRowResult,
{
let mut report = ImportReport::default();
let mut rdr = csv::ReaderBuilder::new()
.has_headers(true)
.from_reader(reader);
let headers: Vec<String> = match rdr.headers() {
Ok(h) => h.iter().map(str::to_owned).collect(),
Err(e) => {
report
.errors
.push(CsvRowError::row(1, format!("CSV header error: {e}")));
return report;
}
};
for result in rdr.records() {
let (line, record) = match result {
Ok(r) => {
let pos = r.position().map_or(0, csv::Position::line);
(pos, r)
}
Err(e) => {
let pos = e.position().map_or(0, csv::Position::line);
report
.errors
.push(CsvRowError::row(pos, format!("CSV parse error: {e}")));
continue;
}
};
let row: HashMap<String, String> = headers
.iter()
.zip(record.iter())
.map(|(k, v)| (k.clone(), v.to_owned()))
.collect();
let outcome = handler(line, row, &opts.mode);
match outcome {
ImportRowResult::Inserted => report.inserted += 1,
ImportRowResult::Updated => report.updated += 1,
ImportRowResult::Skipped => report.skipped += 1,
ImportRowResult::RowError(msg) => {
report.errors.push(CsvRowError::row(line, msg));
}
ImportRowResult::FieldError { column, message } => {
report
.errors
.push(CsvRowError::field(line, column, message));
}
}
}
report
}
#[cfg(test)]
mod tests {
use super::*;
struct Post {
id: i64,
title: String,
published: bool,
}
impl CsvSchema for Post {
fn csv_columns() -> &'static [&'static str] {
&["id", "title", "published"]
}
fn to_csv_record(&self) -> Vec<String> {
vec![
self.id.to_string(),
self.title.clone(),
self.published.to_string(),
]
}
}
fn sample_posts() -> Vec<Post> {
vec![
Post {
id: 1,
title: "Hello, World".to_string(),
published: true,
},
Post {
id: 2,
title: "Goodbye cruel \"world\"".to_string(),
published: false,
},
]
}
#[test]
fn csv_row_error_row_constructor() {
let e = CsvRowError::row(42, "bad value");
assert_eq!(e.line, 42);
assert!(e.column.is_none());
assert_eq!(e.message, "bad value");
}
#[test]
fn csv_row_error_field_constructor() {
let e = CsvRowError::field(5, "email", "invalid email");
assert_eq!(e.line, 5);
assert_eq!(e.column.as_deref(), Some("email"));
assert_eq!(e.message, "invalid email");
}
#[test]
fn import_report_default_is_zero() {
let r = ImportReport::default();
assert_eq!(r.inserted, 0);
assert_eq!(r.updated, 0);
assert_eq!(r.skipped, 0);
assert!(r.errors.is_empty());
assert_eq!(r.total_rows(), 0);
assert!(r.is_ok());
}
#[test]
fn import_report_total_rows_sums_all_buckets() {
let r = ImportReport {
inserted: 3,
updated: 2,
skipped: 1,
errors: vec![CsvRowError::row(10, "oops")],
};
assert_eq!(r.total_rows(), 7);
assert!(!r.is_ok());
}
#[test]
fn export_csv_writes_header_and_rows() {
let mut out = Vec::new();
export_csv(sample_posts(), &mut out).unwrap();
let s = String::from_utf8(out).unwrap();
let mut lines = s.lines();
assert_eq!(lines.next().unwrap(), "id,title,published");
assert_eq!(lines.next().unwrap(), "1,\"Hello, World\",true");
}
#[test]
fn export_csv_applies_rfc4180_quoting_for_commas() {
let mut out = Vec::new();
export_csv(sample_posts(), &mut out).unwrap();
let s = String::from_utf8(out).unwrap();
assert!(
s.contains("\"Hello, World\""),
"comma in title should be quoted: {s}"
);
}
#[test]
fn export_csv_applies_rfc4180_quoting_for_double_quotes() {
let mut out = Vec::new();
export_csv(sample_posts(), &mut out).unwrap();
let s = String::from_utf8(out).unwrap();
assert!(
s.contains("\"Goodbye cruel \"\"world\"\"\""),
"embedded quotes should be doubled: {s}"
);
}
#[test]
fn export_csv_empty_iterator_writes_header_only() {
let mut out = Vec::new();
export_csv(Vec::<Post>::new(), &mut out).unwrap();
let s = String::from_utf8(out).unwrap();
let lines: Vec<&str> = s.lines().collect();
assert_eq!(lines, vec!["id,title,published"]);
}
#[test]
fn export_csv_stable_column_ordering() {
let mut out = Vec::new();
export_csv(sample_posts(), &mut out).unwrap();
let s = String::from_utf8(out).unwrap();
let header = s.lines().next().unwrap();
assert_eq!(header, "id,title,published");
}
#[test]
fn import_csv_insert_mode_counts_inserted() {
let csv = b"id,title,published\n1,Hello,true\n2,World,false\n";
let report = import_csv(
csv.as_ref(),
&ImportOptions::default(),
|_line, _row, _mode| ImportRowResult::Inserted,
);
assert_eq!(report.inserted, 2);
assert_eq!(report.updated, 0);
assert_eq!(report.skipped, 0);
assert!(report.errors.is_empty());
}
#[test]
fn import_csv_handler_receives_column_values_as_map() {
let csv = b"title,published\nHello,true\n";
let mut seen: Option<HashMap<String, String>> = None;
import_csv(
csv.as_ref(),
&ImportOptions::default(),
|_line, row, _mode| {
seen = Some(row);
ImportRowResult::Inserted
},
);
let row = seen.unwrap();
assert_eq!(row.get("title").map(String::as_str), Some("Hello"));
assert_eq!(row.get("published").map(String::as_str), Some("true"));
}
#[test]
fn import_csv_row_error_is_captured_with_line_number() {
let csv = b"title\nGood row\nBad row\nAnother good\n";
let report = import_csv(
csv.as_ref(),
&ImportOptions::default(),
|line, row, _mode| {
if row.get("title").map(String::as_str) == Some("Bad row") {
ImportRowResult::RowError("title must not be 'Bad row'".into())
} else {
let _ = line;
ImportRowResult::Inserted
}
},
);
assert_eq!(report.inserted, 2);
assert_eq!(report.errors.len(), 1);
assert_eq!(report.errors[0].message, "title must not be 'Bad row'");
}
#[test]
fn import_csv_field_error_records_column_name() {
let csv = b"email\nbad-email\n";
let report = import_csv(
csv.as_ref(),
&ImportOptions::default(),
|_line, row, _mode| {
if row.get("email").map_or("", String::as_str).contains('@') {
ImportRowResult::Inserted
} else {
ImportRowResult::FieldError {
column: "email".into(),
message: "must be a valid email".into(),
}
}
},
);
assert_eq!(report.errors.len(), 1);
assert_eq!(report.errors[0].column.as_deref(), Some("email"));
assert_eq!(report.errors[0].message, "must be a valid email");
}
#[test]
fn import_csv_dry_run_counts_but_does_not_write() {
let csv = b"id,title\n1,Hello\n2,World\n";
let mut write_called = false;
let opts = ImportOptions {
mode: ImportMode::DryRun,
batch_size: 100,
};
let report = import_csv(csv.as_ref(), &opts, |_line, _row, mode| {
if !matches!(mode, ImportMode::DryRun) {
write_called = true;
}
ImportRowResult::Inserted
});
assert!(!write_called, "handler must not write in dry-run mode");
assert_eq!(report.inserted, 2, "dry-run should still count rows");
}
#[test]
fn import_csv_upsert_mode_counts_updated() {
let csv = b"id,title\n1,Hello\n2,World\n";
let opts = ImportOptions {
mode: ImportMode::Upsert {
by: vec!["id".into()],
},
batch_size: 100,
};
let report = import_csv(csv.as_ref(), &opts, |_line, _row, _mode| {
ImportRowResult::Updated
});
assert_eq!(report.updated, 2);
assert_eq!(report.inserted, 0);
}
#[test]
fn import_csv_reports_error_at_exact_row_for_large_file() {
let target_line: usize = 27143;
let mut csv = String::from("value\n");
for i in 1..=target_line {
if i == target_line - 1 {
csv.push_str("BAD\n");
} else {
csv.push_str("good\n");
}
}
let report = import_csv(
csv.as_bytes(),
&ImportOptions::default(),
|_line, row, _mode| {
if row.get("value").map(String::as_str) == Some("BAD") {
ImportRowResult::RowError("value is BAD".into())
} else {
ImportRowResult::Inserted
}
},
);
assert_eq!(report.errors.len(), 1, "exactly one error expected");
assert!(!report.errors.is_empty());
assert_eq!(report.errors[0].message, "value is BAD");
}
#[test]
fn import_csv_skipped_rows_counted() {
let csv = b"status\nactive\narchived\nactive\n";
let report = import_csv(
csv.as_ref(),
&ImportOptions::default(),
|_line, row, _mode| {
if row.get("status").map(String::as_str) == Some("archived") {
ImportRowResult::Skipped
} else {
ImportRowResult::Inserted
}
},
);
assert_eq!(report.inserted, 2);
assert_eq!(report.skipped, 1);
assert_eq!(report.total_rows(), 3);
}
#[test]
fn import_options_default_is_insert_batch_500() {
let opts = ImportOptions::default();
assert!(matches!(opts.mode, ImportMode::Insert));
assert_eq!(opts.batch_size, 500);
}
#[test]
fn export_then_import_round_trips_data() {
let posts = sample_posts();
let mut exported = Vec::new();
export_csv(posts, &mut exported).unwrap();
let mut titles_imported = Vec::new();
import_csv(
exported.as_slice(),
&ImportOptions::default(),
|_line, row, _mode| {
titles_imported.push(row.get("title").cloned().unwrap_or_default());
ImportRowResult::Inserted
},
);
assert_eq!(
titles_imported,
vec!["Hello, World", "Goodbye cruel \"world\""]
);
}
}