use std::collections::HashSet;
use super::ColumnNameCleaner;
use crate::{format_err, Result};
#[derive(Debug, Default)]
pub(crate) struct Uniquifier {
used: HashSet<String>,
}
impl ColumnNameCleaner for Uniquifier {
fn unique_id_for(&mut self, name: &str) -> Result<String> {
let id = name_to_lowercase_id(name);
if self.used.insert(id.to_owned()) {
Ok(id)
} else {
let mut offset = 1;
while offset < 50 {
offset += 1;
let alt_id = format!("{}_{}", id, offset);
if self.used.insert(alt_id.to_owned()) {
return Ok(alt_id);
}
}
Err(format_err!("too many column name collisions"))
}
}
}
#[test]
fn uniquifier_generates_unique_ids() {
let examples = &[
("a", "a"),
("A", "a_2"),
("a_2", "a_2_2"), ("B", "b"),
];
let mut uniqifier = Uniquifier::default();
for &(input, expected) in examples {
assert_eq!(uniqifier.unique_id_for(input).unwrap(), expected);
}
}
fn name_to_lowercase_id(name: &str) -> String {
if name.is_empty() {
"_".to_owned()
} else {
name.char_indices()
.map(|(idx, c)| {
if c == '_' || c.is_ascii_lowercase() {
c
} else if c.is_ascii_uppercase() {
c.to_ascii_lowercase()
} else if idx != 0 && c.is_ascii_digit() {
c
} else {
'_'
}
})
.collect::<String>()
}
}
#[test]
fn name_to_lowercase_id_cleans_non_id_characters() {
let examples = &[("", "_"), ("_aA1?", "_aa1_"), ("1", "_")];
for &(input, expected) in examples {
assert_eq!(name_to_lowercase_id(input), expected);
}
}