Skip to main content

rustledger_ops/
categorize.rs

1//! Rules-based transaction categorization.
2//!
3//! The [`RulesEngine`] matches transaction payee/narration against a set of
4//! rules to determine the contra-account. Rules can use substring matching,
5//! regex patterns, or exact matching. They are evaluated in priority order
6//! (highest first), with first match winning.
7//!
8//! The engine can also load the built-in merchant dictionary from
9//! [`crate::merchants`] as low-priority fallback rules.
10
11use crate::enrichment::CategorizationMethod;
12use regex::Regex;
13
14/// A categorization rule.
15#[derive(Debug)]
16pub struct Rule {
17    /// Optional name for this rule (for debugging/display).
18    pub name: Option<String>,
19    /// The pattern to match against payee/narration.
20    pub pattern: RulePattern,
21    /// The account to assign when this rule matches.
22    pub account: String,
23    /// Priority for ordering (higher = checked first). Default: 0.
24    /// User rules should use positive priorities, merchant dict uses -1000.
25    pub priority: i32,
26}
27
28/// Pattern types for matching.
29#[derive(Debug)]
30pub enum RulePattern {
31    /// Case-insensitive substring match (fast, no regex overhead).
32    ///
33    /// The stored value **must be lowercase** because [`RulesEngine::categorize`]
34    /// lowercases input text before comparison using a case-sensitive `contains()`.
35    Substring(String),
36    /// Compiled regex pattern.
37    Regex(Regex),
38    /// Exact case-insensitive match.
39    Exact(String),
40}
41
42impl RulePattern {
43    /// Test if this pattern matches the given text.
44    fn matches(&self, text: &str) -> bool {
45        match self {
46            Self::Substring(s) => text.contains(s.as_str()),
47            Self::Regex(r) => r.is_match(text),
48            Self::Exact(s) => text.eq_ignore_ascii_case(s.as_str()),
49        }
50    }
51}
52
53/// Result of a successful categorization match.
54#[derive(Debug, Clone)]
55pub struct RuleMatch {
56    /// The matched account.
57    pub account: String,
58    /// Name of the rule that matched (if any).
59    pub rule_name: Option<String>,
60    /// How this match was determined.
61    pub method: CategorizationMethod,
62    /// Confidence score (1.0 for rules, lower for weaker matches).
63    pub confidence: f64,
64}
65
66/// Rules engine for transaction categorization.
67///
68/// Evaluates rules in priority order (highest first). First match wins.
69/// Supports loading rules from user config, merchant dictionary, or both.
70#[derive(Debug)]
71pub struct RulesEngine {
72    rules: Vec<Rule>,
73}
74
75impl RulesEngine {
76    /// Create an empty rules engine.
77    #[must_use]
78    pub const fn new() -> Self {
79        Self { rules: Vec::new() }
80    }
81
82    /// Add a single rule.
83    ///
84    /// Rules are kept sorted by priority (descending) after each insertion.
85    pub fn add_rule(&mut self, rule: Rule) {
86        self.rules.push(rule);
87        self.rules.sort_by_key(|r| std::cmp::Reverse(r.priority));
88    }
89
90    /// Load rules from substring-based mappings (existing `importers.toml` format).
91    ///
92    /// All patterns are lowercased. Priority is set to 0 (user rules).
93    pub fn load_from_mappings(&mut self, mappings: &[(String, String)]) {
94        for (pattern, account) in mappings {
95            self.rules.push(Rule {
96                name: None,
97                pattern: RulePattern::Substring(pattern.to_lowercase()),
98                account: account.clone(),
99                priority: 0,
100            });
101        }
102        self.rules.sort_by_key(|r| std::cmp::Reverse(r.priority));
103    }
104
105    /// Load rules from regex-based mappings.
106    ///
107    /// Patterns that fail to compile are silently skipped.
108    /// Priority is set to 0 (user rules).
109    pub fn load_from_regex_mappings(&mut self, mappings: &[(String, String)]) {
110        for (pattern, account) in mappings {
111            if let Ok(regex) = regex::RegexBuilder::new(pattern)
112                .case_insensitive(true)
113                .build()
114            {
115                self.rules.push(Rule {
116                    name: Some(pattern.clone()),
117                    pattern: RulePattern::Regex(regex),
118                    account: account.clone(),
119                    priority: 0,
120                });
121            }
122        }
123        self.rules.sort_by_key(|r| std::cmp::Reverse(r.priority));
124    }
125
126    /// Load the built-in merchant dictionary as low-priority rules.
127    pub fn load_merchant_dict(&mut self) {
128        for entry in crate::merchants::MERCHANT_PATTERNS {
129            if let Ok(regex) = regex::RegexBuilder::new(entry.pattern)
130                .case_insensitive(true)
131                .build()
132            {
133                self.rules.push(Rule {
134                    name: Some(entry.category.to_string()),
135                    pattern: RulePattern::Regex(regex),
136                    account: entry.account.to_string(),
137                    priority: -1000, // Below all user rules
138                });
139            }
140        }
141        self.rules.sort_by_key(|r| std::cmp::Reverse(r.priority));
142    }
143
144    /// Categorize a transaction by matching payee and narration against rules.
145    ///
146    /// Returns the first matching rule's account and metadata, or `None` if
147    /// no rule matches.
148    pub fn categorize(&self, payee: Option<&str>, narration: &str) -> Option<RuleMatch> {
149        let payee_lower = payee.map(str::to_lowercase);
150        let narration_lower = narration.to_lowercase();
151
152        for rule in &self.rules {
153            // Try payee first (more specific)
154            if let Some(ref p) = payee_lower
155                && rule.pattern.matches(p)
156            {
157                return Some(RuleMatch {
158                    account: rule.account.clone(),
159                    rule_name: rule.name.clone(),
160                    method: if rule.priority <= -1000 {
161                        CategorizationMethod::MerchantDict
162                    } else {
163                        CategorizationMethod::Rule
164                    },
165                    confidence: 1.0,
166                });
167            }
168            // Then narration
169            if rule.pattern.matches(&narration_lower) {
170                return Some(RuleMatch {
171                    account: rule.account.clone(),
172                    rule_name: rule.name.clone(),
173                    method: if rule.priority <= -1000 {
174                        CategorizationMethod::MerchantDict
175                    } else {
176                        CategorizationMethod::Rule
177                    },
178                    confidence: 1.0,
179                });
180            }
181        }
182
183        None
184    }
185
186    /// Number of loaded rules.
187    #[must_use]
188    pub const fn len(&self) -> usize {
189        self.rules.len()
190    }
191
192    /// Whether the engine has no rules.
193    #[must_use]
194    pub const fn is_empty(&self) -> bool {
195        self.rules.is_empty()
196    }
197}
198
199impl Default for RulesEngine {
200    fn default() -> Self {
201        Self::new()
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[test]
210    fn substring_match() {
211        let mut engine = RulesEngine::new();
212        engine.load_from_mappings(&[("amazon".to_string(), "Expenses:Shopping".to_string())]);
213
214        let result = engine.categorize(Some("AMAZON MARKETPLACE"), "Order #123");
215        assert!(result.is_some());
216        assert_eq!(result.unwrap().account, "Expenses:Shopping");
217    }
218
219    #[test]
220    fn substring_match_narration() {
221        let mut engine = RulesEngine::new();
222        engine.load_from_mappings(&[("coffee".to_string(), "Expenses:Dining:Coffee".to_string())]);
223
224        let result = engine.categorize(None, "Morning coffee at the cafe");
225        assert!(result.is_some());
226        assert_eq!(result.unwrap().account, "Expenses:Dining:Coffee");
227    }
228
229    #[test]
230    fn regex_match() {
231        let mut engine = RulesEngine::new();
232        engine.load_from_regex_mappings(&[(
233            r"UBER(EATS)?".to_string(),
234            "Expenses:Transport".to_string(),
235        )]);
236
237        let result = engine.categorize(Some("UBEREATS"), "food delivery");
238        assert!(result.is_some());
239        assert_eq!(result.unwrap().account, "Expenses:Transport");
240
241        let result = engine.categorize(Some("UBER TRIP"), "ride");
242        assert!(result.is_some());
243    }
244
245    #[test]
246    fn no_match_returns_none() {
247        let mut engine = RulesEngine::new();
248        engine.load_from_mappings(&[("amazon".to_string(), "Expenses:Shopping".to_string())]);
249
250        let result = engine.categorize(Some("STARBUCKS"), "Latte");
251        assert!(result.is_none());
252    }
253
254    #[test]
255    fn priority_ordering() {
256        let mut engine = RulesEngine::new();
257        // Low priority rule
258        engine.add_rule(Rule {
259            name: Some("general".to_string()),
260            pattern: RulePattern::Substring("food".to_string()),
261            account: "Expenses:Food".to_string(),
262            priority: -100,
263        });
264        // High priority rule
265        engine.add_rule(Rule {
266            name: Some("specific".to_string()),
267            pattern: RulePattern::Substring("food".to_string()),
268            account: "Expenses:Groceries".to_string(),
269            priority: 100,
270        });
271
272        let result = engine.categorize(None, "whole food market");
273        assert!(result.is_some());
274        assert_eq!(result.unwrap().account, "Expenses:Groceries");
275    }
276
277    #[test]
278    fn user_rules_beat_merchant_dict() {
279        let mut engine = RulesEngine::new();
280        // User rule (priority 0)
281        engine.load_from_mappings(&[("starbucks".to_string(), "Expenses:Coffee".to_string())]);
282        // Merchant dict (priority -1000)
283        engine.load_merchant_dict();
284
285        let result = engine.categorize(Some("STARBUCKS"), "");
286        assert!(result.is_some());
287        let m = result.unwrap();
288        assert_eq!(m.account, "Expenses:Coffee");
289        assert_eq!(m.method, CategorizationMethod::Rule);
290    }
291
292    #[test]
293    fn merchant_dict_as_fallback() {
294        let mut engine = RulesEngine::new();
295        engine.load_merchant_dict();
296
297        // Netflix should be in the dictionary
298        let result = engine.categorize(Some("NETFLIX.COM"), "");
299        assert!(result.is_some());
300        let m = result.unwrap();
301        assert_eq!(m.method, CategorizationMethod::MerchantDict);
302    }
303
304    #[test]
305    fn exact_match() {
306        let mut engine = RulesEngine::new();
307        engine.add_rule(Rule {
308            name: None,
309            pattern: RulePattern::Exact("rent".to_string()),
310            account: "Expenses:Rent".to_string(),
311            priority: 0,
312        });
313
314        // Exact match works
315        let result = engine.categorize(None, "rent");
316        assert!(result.is_some());
317
318        // Substring doesn't match exact
319        let result = engine.categorize(None, "rent payment");
320        assert!(result.is_none());
321    }
322
323    #[test]
324    fn payee_takes_priority_over_narration() {
325        let mut engine = RulesEngine::new();
326        engine.load_from_mappings(&[("whole foods".to_string(), "Expenses:Groceries".to_string())]);
327        engine.load_from_mappings(&[("whole foods".to_string(), "Expenses:Organic".to_string())]);
328
329        // First rule wins (same priority, same pattern — first added wins)
330        let result = engine.categorize(Some("Whole Foods Market"), "weekly shopping");
331        assert_eq!(result.unwrap().account, "Expenses:Groceries");
332    }
333
334    #[test]
335    fn empty_engine() {
336        let engine = RulesEngine::new();
337        assert!(engine.is_empty());
338        assert_eq!(engine.len(), 0);
339        assert!(engine.categorize(Some("anything"), "anything").is_none());
340    }
341}