tktax-transaction-category 0.2.2

A Rust library for categorizing financial transactions using Porter stemming, CSV-driven classification, and advanced trait-based extensibility.
Documentation
// ---------------- [ File: tktax-transaction-category/src/create_category_map.rs ]
crate::ix!();

#[derive(Default,Debug,Clone,PartialEq,Eq)]
pub struct CategoryMap<TxCat:TransactionCategory> {
    map: HashMap<StemmedToken, HashSet<TxCat>>,
}

impl<TxCat:TransactionCategory> std::ops::Deref for CategoryMap<TxCat> {
    type Target = HashMap<StemmedToken,HashSet<TxCat>>;
    fn deref(&self) -> &Self::Target {
        &self.map
    }
}

impl<TxCat:TransactionCategory> std::ops::DerefMut for CategoryMap<TxCat> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.map
    }
}

impl<TxCat:TransactionCategory> CategoryMap<TxCat> {

    // this function is used in some tests
    pub fn empty() -> Self {
        Self { map: HashMap::new() }
    }

    pub fn new() -> CategoryMap<TxCat> {
        let golden_csv = TxCat::category_golden_csv();
        Self::from_csv(&golden_csv)
    }

    pub fn from_csv(csv: &str) -> CategoryMap<TxCat> {
        let mut map = HashMap::new();

        for line in csv.lines() {

            let parts: Vec<&str> = line.splitn(2, ',').collect();

            if parts.len() != 2 {
                continue;
            }

            let category_str = parts[0].trim();
            let desc_str = parts[1].trim();

            // category_str might have multiple categories separated by ';'
            let categories: HashSet<TxCat> = category_str
                .split(';')
                .map(|c| {
                    // Replace .expect(...) with a custom panic or fallback:
                    match TxCat::try_from(c.trim()) {
                        Ok(val) => val,
                        Err(_) => panic!("unexpected category string: '{}'", c.trim()),
                    }
                })
            .collect();

            let desc = desc_str.to_lowercase();
            let tokens = preprocess_vendor_description(&desc);

            for token in tokens {
                let stemmed = StemmedToken::from_str(&token).unwrap();
                let cat_entry = map.entry(stemmed).or_insert(HashSet::new());
                cat_entry.extend(&categories);
            }
        }

        CategoryMap { map }
    }
}