crate::ix!();
#[derive(Default,Debug,Clone,PartialEq,Eq)]
pub struct CategoryMap<TxCat:TransactionCategory> {
map: HashMap<StemmedToken, HashSet<TxCat>>,
}
impl<TxCat:TransactionCategory> std::ops::Deref for CategoryMap<TxCat> {
type Target = HashMap<StemmedToken,HashSet<TxCat>>;
fn deref(&self) -> &Self::Target {
&self.map
}
}
impl<TxCat:TransactionCategory> std::ops::DerefMut for CategoryMap<TxCat> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.map
}
}
impl<TxCat:TransactionCategory> CategoryMap<TxCat> {
pub fn empty() -> Self {
Self { map: HashMap::new() }
}
pub fn new() -> CategoryMap<TxCat> {
let golden_csv = TxCat::category_golden_csv();
Self::from_csv(&golden_csv)
}
pub fn from_csv(csv: &str) -> CategoryMap<TxCat> {
let mut map = HashMap::new();
for line in csv.lines() {
let parts: Vec<&str> = line.splitn(2, ',').collect();
if parts.len() != 2 {
continue;
}
let category_str = parts[0].trim();
let desc_str = parts[1].trim();
let categories: HashSet<TxCat> = category_str
.split(';')
.map(|c| {
match TxCat::try_from(c.trim()) {
Ok(val) => val,
Err(_) => panic!("unexpected category string: '{}'", c.trim()),
}
})
.collect();
let desc = desc_str.to_lowercase();
let tokens = preprocess_vendor_description(&desc);
for token in tokens {
let stemmed = StemmedToken::from_str(&token).unwrap();
let cat_entry = map.entry(stemmed).or_insert(HashSet::new());
cat_entry.extend(&categories);
}
}
CategoryMap { map }
}
}