use std::collections::HashMap;
use crate::http::FormData;
pub(crate) const CSV_IMPORT_MAX_ROWS: usize = 10_000;
pub(crate) const CSV_IMPORT_MAX_BYTES: usize = 8 * 1024 * 1024;
#[derive(Debug, Clone)]
pub(crate) enum RowOutcome {
Inserted {
row_number: usize,
id: i64,
},
Failed {
row_number: usize,
errors: Vec<String>,
},
}
#[derive(Debug, Clone, Default)]
pub(crate) struct ImportReport {
pub total: usize,
pub inserted: usize,
pub failed: usize,
pub outcomes: Vec<RowOutcome>,
}
#[derive(Debug, Clone)]
pub(crate) enum ParseError {
Empty,
TooLarge { size: usize, cap: usize },
TooManyRows { rows: usize, cap: usize },
HeaderMissing,
HeaderEmptyColumn,
UnknownColumns { columns: Vec<String> },
}
impl ParseError {
pub(crate) fn message(&self) -> String {
match self {
ParseError::Empty => "CSV body is empty.".into(),
ParseError::TooLarge { size, cap } => {
format!("CSV body ({size} bytes) exceeds the {cap}-byte cap.")
}
ParseError::TooManyRows { rows, cap } => {
format!("CSV has {rows} rows; cap is {cap}. Split the file and retry.")
}
ParseError::HeaderMissing => "First row must be a header.".into(),
ParseError::HeaderEmptyColumn => "Header has an empty column name.".into(),
ParseError::UnknownColumns { columns } => {
format!(
"Header includes columns the model doesn't declare: {}.",
columns.join(", ")
)
}
}
}
}
pub(crate) fn parse_csv(body: &[u8]) -> Result<(Vec<String>, Vec<Vec<String>>), ParseError> {
if body.is_empty() {
return Err(ParseError::Empty);
}
if body.len() > CSV_IMPORT_MAX_BYTES {
return Err(ParseError::TooLarge {
size: body.len(),
cap: CSV_IMPORT_MAX_BYTES,
});
}
let text = std::str::from_utf8(body).map_err(|_| ParseError::Empty)?;
let mut rows = parse_csv_text(text);
if rows.is_empty() {
return Err(ParseError::HeaderMissing);
}
let header = rows.remove(0);
if header.iter().any(|c| c.trim().is_empty()) {
return Err(ParseError::HeaderEmptyColumn);
}
Ok((header, rows))
}
pub(crate) async fn import_csv_rows(
db: &crate::orm::Db,
entry: &super::types::AdminEntry,
header: &[String],
rows: Vec<Vec<String>>,
) -> ImportReport {
let known_fields: HashMap<&str, ()> = entry.fields.iter().map(|f| (f.name, ())).collect();
let header_known: Vec<bool> = header
.iter()
.map(|h| known_fields.contains_key(h.as_str()))
.collect();
let mut report = ImportReport {
total: rows.len(),
..Default::default()
};
for (idx, row) in rows.into_iter().enumerate() {
let row_number = idx + 2; let mut form = FormData::default();
for (col_idx, value) in row.into_iter().enumerate() {
if !header_known.get(col_idx).copied().unwrap_or(false) {
continue; }
if let Some(name) = header.get(col_idx) {
form.set(name.clone(), value);
}
}
match entry.ops.create(db, &form).await {
Ok(Ok(id)) => {
report.inserted += 1;
report
.outcomes
.push(RowOutcome::Inserted { row_number, id });
}
Ok(Err(errors)) => {
report.failed += 1;
report
.outcomes
.push(RowOutcome::Failed { row_number, errors });
}
Err(e) => {
report.failed += 1;
report.outcomes.push(RowOutcome::Failed {
row_number,
errors: vec![format!("internal error: {e}")],
});
}
}
}
report
}
fn parse_csv_text(text: &str) -> Vec<Vec<String>> {
let mut rows: Vec<Vec<String>> = Vec::new();
let mut row: Vec<String> = Vec::new();
let mut field = String::new();
let mut in_quotes = false;
let mut just_closed_quote = false;
let bytes = text.as_bytes();
let mut i = 0;
while i < bytes.len() {
let c = bytes[i];
if in_quotes {
if c == b'"' {
if bytes.get(i + 1) == Some(&b'"') {
field.push('"');
i += 2;
continue;
} else {
in_quotes = false;
just_closed_quote = true;
i += 1;
continue;
}
} else {
field.push(c as char);
i += 1;
continue;
}
}
match c {
b',' => {
row.push(std::mem::take(&mut field));
just_closed_quote = false;
i += 1;
}
b'\n' => {
row.push(std::mem::take(&mut field));
rows.push(std::mem::take(&mut row));
just_closed_quote = false;
i += 1;
}
b'\r' => {
if bytes.get(i + 1) == Some(&b'\n') {
i += 1; continue;
}
row.push(std::mem::take(&mut field));
rows.push(std::mem::take(&mut row));
just_closed_quote = false;
i += 1;
}
b'"' if field.is_empty() && !just_closed_quote => {
in_quotes = true;
i += 1;
}
_ => {
field.push(c as char);
i += 1;
}
}
}
if !field.is_empty() || !row.is_empty() {
row.push(field);
rows.push(row);
}
while rows
.last()
.map(|r| r.iter().all(|f| f.is_empty()))
.unwrap_or(false)
{
rows.pop();
}
rows
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_simple_header_and_two_rows() {
let csv = "name,published\nFoo,true\nBar,false\n";
let (header, rows) = parse_csv(csv.as_bytes()).unwrap();
assert_eq!(header, vec!["name", "published"]);
assert_eq!(rows.len(), 2);
assert_eq!(rows[0], vec!["Foo", "true"]);
assert_eq!(rows[1], vec!["Bar", "false"]);
}
#[test]
fn handles_quoted_fields_with_commas_and_doubled_quotes() {
let csv = "title,body\n\"Hello, world\",\"She said \"\"hi\"\".\"\n";
let (_, rows) = parse_csv(csv.as_bytes()).unwrap();
assert_eq!(rows[0][0], "Hello, world");
assert_eq!(rows[0][1], "She said \"hi\".");
}
#[test]
fn handles_crlf_line_endings() {
let csv = "a,b\r\n1,2\r\n3,4\r\n";
let (_, rows) = parse_csv(csv.as_bytes()).unwrap();
assert_eq!(rows.len(), 2);
assert_eq!(rows[0], vec!["1", "2"]);
}
#[test]
fn empty_body_errors() {
assert!(matches!(parse_csv(b"").unwrap_err(), ParseError::Empty));
}
#[test]
fn oversized_body_errors() {
let big = vec![b'a'; CSV_IMPORT_MAX_BYTES + 1];
assert!(matches!(
parse_csv(&big).unwrap_err(),
ParseError::TooLarge { .. }
));
}
#[test]
fn empty_header_column_errors() {
let csv = "name,\nFoo,Bar\n";
assert!(matches!(
parse_csv(csv.as_bytes()).unwrap_err(),
ParseError::HeaderEmptyColumn
));
}
#[test]
fn trailing_blank_line_is_dropped() {
let csv = "a\n1\n\n";
let (_, rows) = parse_csv(csv.as_bytes()).unwrap();
assert_eq!(rows, vec![vec!["1".to_string()]]);
}
}