1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
// ---------------- [ File: tktax-transaction-category/src/format.rs ]
#![allow(non_camel_case_types)]
crate::ix!();
#[derive(Copy,Clone,PartialEq,Eq)]
pub enum CategorizedTransactionsPrintFormat {
Full,
Short,
}
#[cfg(test)]
mod test_category_map_and_prediction {
use super::*;
use std::str::FromStr;
// -------------------------------------------------------------
// 1. StemmedToken Tests
// -------------------------------------------------------------
#[test]
fn test_stemmed_token_empty() {
// FromStr on "" => might produce "", or " " => " "
// depending on how the stemmer behaves. We'll test.
let token = StemmedToken::from_str("").unwrap();
// It might just be empty:
assert_eq!(token.as_str(), "");
}
#[test]
fn test_stemmed_token_multiple_words() {
// Usually we only feed single tokens, but let's see if we do:
let token = StemmedToken::from_str("businesslike-purchase").unwrap();
// The default porter stem might do something to "businesslike", e.g. "businesslik"
// We'll just confirm no panic.
assert!(!token.as_str().is_empty());
}
// -------------------------------------------------------------
// 2. create_category_map
// -------------------------------------------------------------
#[test]
fn test_create_category_map_empty_csv() {
let map = CategoryMap::<MockTransactionCategory>::empty();
assert!(map.is_empty());
}
#[test]
fn test_create_category_map_single_line() {
// Suppose the CSV line: "FoodServices_Groceries,safeway"
let csv = "FoodServices_Groceries,safeway";
let map = CategoryMap::<MockTransactionCategory>::from_csv(csv);
assert_eq!(map.len(), 1, "Expected exactly one distinct stem");
// The single token is "safeway" -> stem e.g. "safeway"
// The category is FoodServices_Groceries
// Let's find the first entry in the map:
let (stem, categories) = map.iter().next().unwrap();
assert!(categories.contains(&MockTransactionCategory::FoodServices_Groceries));
// The stem might be "safeway" or "safeway" with trimming
assert_eq!(stem.as_str(), "safeway");
}
#[test]
#[should_panic(expected = "unexpected")]
fn test_create_category_map_invalid_category() {
// This line tries to parse an unknown category => triggers .expect("unexpected")
let csv = "NotARealCategory,something";
// Should panic
let _map = CategoryMap::<MockTransactionCategory>::from_csv(csv);
}
#[test]
fn test_create_category_map_repeated_lines() {
// Two lines, same token "safeway", different category sets =>
// it should unify them
let csv = "\
FoodServices_Groceries, safeway
FoodServices_Deli, safeway
";
let map = CategoryMap::<MockTransactionCategory>::from_csv(csv);
assert_eq!(map.len(), 1, "Only the token 'safeway'");
let (_, categories) = map.iter().next().unwrap();
assert_eq!(categories.len(), 2);
assert!(categories.contains(&MockTransactionCategory::FoodServices_Groceries));
assert!(categories.contains(&MockTransactionCategory::FoodServices_Deli));
}
// -------------------------------------------------------------
// 3. predict_category
// -------------------------------------------------------------
#[test]
fn test_predict_category_no_match() {
// Suppose the map is empty or no matching tokens
let mut cat_map = CategoryMap::<MockTransactionCategory>::empty();
let result = predict_category("unmatched text", &cat_map);
assert!(result.is_empty());
}
#[test]
fn test_predict_category_single_match() {
// We'll build a small map by hand:
let mut cat_map = CategoryMap::<MockTransactionCategory>::empty();
let mut set1 = HashSet::new();
set1.insert(MockTransactionCategory::FoodServices_Groceries);
cat_map.insert(StemmedToken::from_str("safeway").unwrap(), set1);
// "desc" => "Went to safeway for groceries"
let result = predict_category("Went to safeway for groceries", &cat_map);
assert_eq!(result.len(), 1);
assert_eq!(*result[0].category(), MockTransactionCategory::FoodServices_Groceries);
// Score should be 1.0 if there's exactly one category in that set
assert_eq!(*result[0].score(), Decimal::ONE);
}
#[test]
fn test_predict_category_multiple_categories_distribution() {
// Suppose "apple" => categories { OfficeSupplies, Electronics }
let mut cat_map = CategoryMap::<MockTransactionCategory>::empty();
let mut set_apple = HashSet::new();
set_apple.insert(MockTransactionCategory::OfficeSupplies);
set_apple.insert(MockTransactionCategory::Electronics);
cat_map.insert(StemmedToken::from_str("appl").unwrap(), set_apple);
// The line "Got an apple device" => single token "appl"
// => The category set has 2 categories => each gets 1/2
let result = predict_category("Got an apple device", &cat_map);
assert_eq!(result.len(), 2);
// We expect each has score = 1 / 2
// The order is by descending score, but they're the same => ties are stable
let s = Decimal::ONE / Decimal::from(2);
assert_eq!(*result[0].score(), s);
assert_eq!(*result[1].score(), s);
// Each category => 0.5
// check we have exactly these two categories:
let cats: HashSet<_> = result.iter().map(|x| x.category()).collect();
assert!(cats.contains(&MockTransactionCategory::OfficeSupplies));
assert!(cats.contains(&MockTransactionCategory::Electronics));
}
#[test]
fn test_predict_category_score_sorting() {
// We'll build a scenario with multiple tokens => each belongs to multiple categories
// Then confirm final sorting is in descending order of score
let mut cat_map = CategoryMap::<MockTransactionCategory>::empty();
// token "bank" => { Financial__Deposit, Financial__Check }
let mut set_bank = HashSet::new();
set_bank.insert(MockTransactionCategory::Financial__Deposit);
set_bank.insert(MockTransactionCategory::Financial__Check);
cat_map.insert(StemmedToken::from_str("bank").unwrap(), set_bank);
// token "food" => { FoodServices_Groceries }
let mut set_food = HashSet::new();
set_food.insert(MockTransactionCategory::FoodServices_Groceries);
cat_map.insert(StemmedToken::from_str("food").unwrap(), set_food);
// Description => "bank food"
// => "bank" yields 2 categories => each gets 0.5
// => "food" yields 1 category => that category gets +1.0 => total => 1.5
// We end with:
// Financial__Deposit => 0.5
// Financial__Check => 0.5
// FoodServices_Groceries => 1.0
// Then we sort desc => top is FoodServices_Groceries => score=1.0
// next are the two financial categories => score=0.5
let result = predict_category("bank food", &cat_map);
assert_eq!(result.len(), 3);
// top is index=0 => category=FoodServices_Groceries => score=1.0
assert_eq!(*result[0].category(), MockTransactionCategory::FoodServices_Groceries);
assert_eq!(*result[0].score(), Decimal::ONE);
// next are the financial categories => each with 0.5
let next_scores: Vec<_> = result[1..]
.iter()
.map(|p| (p.category(), p.score()))
.collect();
assert!(next_scores.contains(&(
&MockTransactionCategory::Financial__Deposit,
&Decimal::from_f64(0.5).unwrap()
)));
assert!(next_scores.contains(&(
&MockTransactionCategory::Financial__Check,
&Decimal::from_f64(0.5).unwrap()
)));
}
// Minimal "vendor description" function:
// Strips punctuation except spaces, lowercases, splits on whitespace
fn preprocess_vendor_description(input: &str) -> Vec<String> {
input
.split_whitespace()
.map(|s| s.trim().to_owned())
.filter(|s| !s.is_empty())
.collect()
}
// --------------- Tests for StemmedToken ---------------
#[test]
fn test_stemmed_token_punctuation() {
// Now we remove punctuation in from_str => "!!!" => "" => stem => ""
let tok = StemmedToken::from_str("!!!").unwrap();
assert_eq!(tok.as_str(), "", "punctuation-only should be empty");
}
#[test]
fn test_stemmed_token_simple() {
let tok = StemmedToken::from_str("running").unwrap();
// typically "running" => "run"
// or "running" => "run" if the stemmer is standard Porter
// We'll do a broad check that it's not empty:
assert!(tok.as_str().contains("run"));
}
// --------------- Tests for create_category_map ---------------
#[test]
fn test_create_category_map_multiple_categories() {
// We'll set up a line with exactly 1 comma => left side has two categories separated by ';'
// right side is the description
let line = "FoodServices_Groceries;FoodServices_Deli, starbuck shop";
let map = CategoryMap::<MockTransactionCategory>::from_csv(line);
// The left side => two categories => {FoodServices_Groceries, FoodServices_Wagshals}
// The right side => "starbuck shop"
// => tokens => ["starbuck", "shop"]
// => each token => gets both categories
// So the map should have 2 entries: "starbuck" => 2-cat set, "shop" => 2-cat set
assert_eq!(map.len(), 2, "We expect 2 distinct stem tokens");
// We'll check that each token's set has size=2
for (_, catset) in map.iter() {
assert_eq!(catset.len(), 2);
assert!(catset.contains(&MockTransactionCategory::FoodServices_Groceries));
assert!(catset.contains(&MockTransactionCategory::FoodServices_Deli));
}
}
// --------------- Tests for predict_category ---------------
#[test]
fn test_predict_category_multiple_tokens_same_category() {
// We want the final score=2 => means 2 recognized tokens => each adds +1
// We'll store 'starbucks' => {FoodServices_Coffee}, 'coffee' => same set
let mut cat_map = CategoryMap::<MockTransactionCategory>::empty();
let mut starbuck_set = HashSet::new();
starbuck_set.insert(MockTransactionCategory::FoodServices_Coffee);
cat_map.insert(
StemmedToken::from_str("starbucks").unwrap(),
starbuck_set.clone()
);
cat_map.insert(
StemmedToken::from_str("coffee").unwrap(),
starbuck_set.clone()
);
let result = predict_category("Starbucks coffee", &cat_map);
// We have 2 recognized tokens => same category => each token => +1 => total=2
assert_eq!(result.len(), 1, "Only 1 category in final predictions");
let top_pred = &result[0];
assert_eq!(*top_pred.category(), MockTransactionCategory::FoodServices_Coffee);
assert_eq!(*top_pred.score(), Decimal::from(2));
}
}