use std::collections::HashMap;
pub(crate) const DEFAULT_HEADERS: &[(&str, &[&str])] = &[
("title", &["title", "article title", "publication title"]),
("authors", &["author", "authors", "creator", "creators"]),
(
"journal",
&["journal", "journal title", "source title", "publication"],
),
("year", &["year", "publication year", "pub year"]),
("volume", &["volume", "vol"]),
("issue", &["issue", "number", "no"]),
("pages", &["pages", "page numbers", "page range"]),
("doi", &["doi", "digital object identifier"]),
("abstract", &["abstract", "summary"]),
("keywords", &["keywords", "tags"]),
("issn", &["issn"]),
("language", &["language", "lang"]),
("publisher", &["publisher"]),
("url", &["url", "link", "web link"]),
("label", &["label"]),
("duplicate_id", &["duplicateid", "duplicate_id"]),
];
#[derive(Debug, Clone)]
pub struct CsvConfig {
pub(crate) header_map: HashMap<String, Vec<String>>,
pub(crate) reverse_map: HashMap<String, String>,
pub(crate) delimiter: u8,
pub(crate) has_header: bool,
pub(crate) quote: u8,
pub(crate) trim: bool,
pub(crate) flexible: bool,
pub(crate) store_original_record: bool,
}
impl Default for CsvConfig {
fn default() -> Self {
Self::new()
}
}
impl CsvConfig {
#[must_use]
pub fn new() -> Self {
let mut config = Self {
header_map: HashMap::new(),
reverse_map: HashMap::new(),
delimiter: b',',
has_header: true,
quote: b'"',
trim: true,
flexible: false,
store_original_record: false,
};
config.set_default_headers();
config
}
fn set_default_headers(&mut self) {
for (field, aliases) in DEFAULT_HEADERS {
self.header_map.insert(
field.to_string(),
aliases.iter().map(|s| s.to_string()).collect(),
);
}
self.rebuild_reverse_map();
}
fn rebuild_reverse_map(&mut self) {
self.reverse_map.clear();
for (field, aliases) in &self.header_map {
for alias in aliases {
self.reverse_map.insert(alias.to_lowercase(), field.clone());
}
}
}
pub fn set_header_mapping(&mut self, field: &str, aliases: Vec<String>) -> &mut Self {
self.header_map.insert(field.to_string(), aliases);
self.rebuild_reverse_map();
self
}
pub fn add_header_aliases(&mut self, field: &str, aliases: Vec<String>) -> &mut Self {
self.header_map
.entry(field.to_string())
.or_default()
.extend(aliases);
self.rebuild_reverse_map();
self
}
pub fn set_delimiter(&mut self, delimiter: u8) -> &mut Self {
self.delimiter = delimiter;
self
}
pub fn set_has_header(&mut self, has_header: bool) -> &mut Self {
self.has_header = has_header;
self
}
pub fn set_quote(&mut self, quote: u8) -> &mut Self {
self.quote = quote;
self
}
pub fn set_trim(&mut self, trim: bool) -> &mut Self {
self.trim = trim;
self
}
pub fn set_flexible(&mut self, flexible: bool) -> &mut Self {
self.flexible = flexible;
self
}
pub fn set_store_original_record(&mut self, store: bool) -> &mut Self {
self.store_original_record = store;
self
}
pub(crate) fn get_field_for_header(&self, header: &str) -> Option<&str> {
let header_lower = header.to_lowercase();
self.reverse_map.get(&header_lower).map(|s| s.as_str())
}
pub fn get_field_mappings(&self) -> &HashMap<String, Vec<String>> {
&self.header_map
}
pub fn validate(&self) -> Result<(), String> {
if self.header_map.is_empty() {
return Err("No header mappings defined".to_string());
}
for (field, aliases) in &self.header_map {
if field.is_empty() {
return Err("Empty field name found in mappings".to_string());
}
if aliases.is_empty() {
return Err(format!("Field '{}' has no aliases defined", field));
}
for alias in aliases {
if alias.is_empty() {
return Err(format!("Empty alias found for field '{}'", field));
}
}
}
if self.delimiter == b'\n' || self.delimiter == b'\r' {
return Err("Delimiter cannot be a newline character".to_string());
}
let mut all_aliases = HashMap::new();
for (field, aliases) in &self.header_map {
for alias in aliases {
let alias_lower = alias.to_lowercase();
if let Some(existing_field) = all_aliases.get(&alias_lower)
&& existing_field != field
{
return Err(format!(
"Alias '{}' is mapped to both '{}' and '{}'",
alias, existing_field, field
));
}
all_aliases.insert(alias_lower, field.clone());
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_config_new() {
let config = CsvConfig::new();
assert_eq!(config.delimiter, b',');
assert!(config.has_header);
assert!(!config.header_map.is_empty());
}
#[test]
fn test_set_header_mapping() {
let mut config = CsvConfig::new();
config.set_header_mapping("title", vec!["my_title".to_string()]);
assert_eq!(config.get_field_for_header("my_title"), Some("title"));
}
#[test]
fn test_add_header_aliases() {
let mut config = CsvConfig::new();
config.add_header_aliases("title", vec!["article_name".to_string()]);
assert_eq!(config.get_field_for_header("title"), Some("title"));
assert_eq!(config.get_field_for_header("article_name"), Some("title"));
}
#[test]
fn test_get_field_for_header_case_insensitive() {
let config = CsvConfig::new();
assert_eq!(config.get_field_for_header("TITLE"), Some("title"));
assert_eq!(config.get_field_for_header("Title"), Some("title"));
}
#[test]
fn test_validate_success() {
let config = CsvConfig::new();
assert!(config.validate().is_ok());
}
#[test]
fn test_validate_empty_mappings() {
let mut config = CsvConfig::new();
config.header_map.clear();
assert!(config.validate().is_err());
}
#[test]
fn test_validate_duplicate_aliases() {
let mut config = CsvConfig::new();
config.set_header_mapping("field1", vec!["alias".to_string()]);
config.set_header_mapping("field2", vec!["alias".to_string()]);
assert!(config.validate().is_err());
}
#[test]
fn test_validate_empty_field_name() {
let mut config = CsvConfig::new();
config.set_header_mapping("", vec!["alias".to_string()]);
assert!(config.validate().is_err());
}
#[test]
fn test_validate_empty_alias() {
let mut config = CsvConfig::new();
config.set_header_mapping("field", vec!["".to_string()]);
assert!(config.validate().is_err());
}
#[test]
fn test_validate_invalid_delimiter() {
let mut config = CsvConfig::new();
config.set_delimiter(b'\n');
assert!(config.validate().is_err());
}
#[test]
fn test_configuration_chaining() {
let mut config = CsvConfig::new();
config
.set_delimiter(b';')
.set_has_header(false)
.set_quote(b'\'')
.set_trim(false)
.set_flexible(true)
.set_store_original_record(true);
assert_eq!(config.delimiter, b';');
assert!(!config.has_header);
assert_eq!(config.quote, b'\'');
assert!(!config.trim);
assert!(config.flexible);
assert!(config.store_original_record);
}
}