rust_rule_miner/mining/
mod.rs

1pub mod apriori;
2pub mod fpgrowth;
3pub mod stats;
4
5use crate::config::MiningConfig;
6use crate::errors::{MiningError, Result};
7use crate::transaction::Transaction;
8use crate::types::{AssociationRule, FrequentItemset, ItemSet, PatternMetrics};
9
10/// Main rule mining engine
11pub struct RuleMiner {
12    config: MiningConfig,
13    transactions: Vec<Transaction>,
14    stats: stats::MiningStats,
15}
16
17impl RuleMiner {
18    /// Create new rule miner with config
19    pub fn new(config: MiningConfig) -> Self {
20        Self {
21            config,
22            transactions: Vec::new(),
23            stats: stats::MiningStats::default(),
24        }
25    }
26
27    /// Add transactions to mine
28    pub fn add_transactions(&mut self, transactions: Vec<Transaction>) -> Result<()> {
29        if transactions.is_empty() {
30            return Err(MiningError::InsufficientData(
31                "No transactions provided".to_string(),
32            ));
33        }
34        self.transactions.extend(transactions);
35        Ok(())
36    }
37
38    /// Add transactions from an iterator (streaming support)
39    ///
40    /// This method allows adding transactions one-by-one from a stream,
41    /// maintaining constant memory usage for the loading phase.
42    ///
43    /// # Example
44    /// ```no_run
45    /// use rust_rule_miner::{RuleMiner, MiningConfig, data_loader::DataLoader, Transaction};
46    /// use chrono::Utc;
47    ///
48    /// let mut miner = RuleMiner::new(MiningConfig::default());
49    ///
50    /// // Add transactions one by one
51    /// let transaction = Transaction::new("tx1".to_string(), vec!["A".to_string()], Utc::now());
52    /// miner.add_transaction(transaction)?;
53    ///
54    /// let rules = miner.mine_association_rules()?;
55    /// # Ok::<(), Box<dyn std::error::Error>>(())
56    /// ```
57    pub fn add_transaction(&mut self, transaction: Transaction) -> Result<()> {
58        self.transactions.push(transaction);
59        Ok(())
60    }
61
62    /// Add transactions from an iterator (batch streaming)
63    ///
64    /// More efficient than add_transaction() when you have an iterator.
65    ///
66    /// # Example
67    /// ```no_run
68    /// use rust_rule_miner::{RuleMiner, MiningConfig, data_loader::{DataLoader, ColumnMapping}};
69    ///
70    /// let mut miner = RuleMiner::new(MiningConfig::default());
71    ///
72    /// // Load from CSV and add to miner
73    /// let mapping = ColumnMapping::simple(0, 1, 2);
74    /// let transactions = DataLoader::from_csv("file.csv", mapping)?;
75    /// miner.add_transactions_from_iter(transactions.into_iter().map(Ok))?;
76    ///
77    /// let rules = miner.mine_association_rules()?;
78    /// # Ok::<(), Box<dyn std::error::Error>>(())
79    /// ```
80    pub fn add_transactions_from_iter<I>(&mut self, iter: I) -> Result<()>
81    where
82        I: Iterator<Item = Result<Transaction>>,
83    {
84        let mut count = 0;
85        for transaction_result in iter {
86            let transaction = transaction_result?;
87            self.transactions.push(transaction);
88            count += 1;
89        }
90
91        if count == 0 {
92            return Err(MiningError::InsufficientData(
93                "No transactions provided from iterator".to_string(),
94            ));
95        }
96
97        Ok(())
98    }
99
100    /// Get transaction count
101    pub fn transaction_count(&self) -> usize {
102        self.transactions.len()
103    }
104
105    /// Mine association rules using configured algorithm
106    pub fn mine_association_rules(&mut self) -> Result<Vec<AssociationRule>> {
107        if self.transactions.is_empty() {
108            return Err(MiningError::InsufficientData(
109                "No transactions to mine".to_string(),
110            ));
111        }
112
113        // Step 1: Find frequent itemsets
114        let frequent_itemsets = match self.config.algorithm {
115            crate::config::MiningAlgorithm::Apriori => {
116                apriori::find_frequent_itemsets(&self.transactions, self.config.min_support)?
117            }
118            crate::config::MiningAlgorithm::FPGrowth => {
119                fpgrowth::find_frequent_itemsets(&self.transactions, self.config.min_support)?
120            }
121            _ => {
122                return Err(MiningError::MiningFailed(
123                    "Algorithm not yet implemented".to_string(),
124                ))
125            }
126        };
127
128        self.stats.frequent_itemsets_count = frequent_itemsets.len();
129
130        // Step 2: Generate association rules
131        let mut rules = self.generate_association_rules(&frequent_itemsets)?;
132
133        // Step 3: Filter bidirectional rules to prevent infinite loops
134        rules = self.filter_bidirectional_rules(rules);
135
136        self.stats.rules_generated = rules.len();
137
138        Ok(rules)
139    }
140
141    /// Filter out bidirectional rules that could cause infinite loops
142    /// For rules like A=>B and B=>A, keep only the one with higher confidence
143    fn filter_bidirectional_rules(&self, rules: Vec<AssociationRule>) -> Vec<AssociationRule> {
144        let mut filtered = Vec::new();
145        let mut seen_pairs = std::collections::HashSet::new();
146
147        // Already sorted by quality score from generate_association_rules
148
149        for rule in rules {
150            // Create canonical pair representation (sorted to be order-independent)
151            let mut pair = vec![rule.antecedent.clone(), rule.consequent.clone()];
152            pair.sort();
153            let pair_key = format!("{:?}", pair);
154
155            if !seen_pairs.contains(&pair_key) {
156                seen_pairs.insert(pair_key);
157                filtered.push(rule);
158            }
159        }
160
161        filtered
162    }
163
164    /// Generate association rules from frequent itemsets
165    fn generate_association_rules(
166        &self,
167        frequent_itemsets: &[FrequentItemset],
168    ) -> Result<Vec<AssociationRule>> {
169        let mut rules = Vec::new();
170
171        for itemset in frequent_itemsets {
172            if itemset.items.len() < 2 {
173                continue; // Need at least 2 items for a rule
174            }
175
176            // Generate all possible splits: A → B where A ∪ B = itemset
177            for antecedent in self.generate_non_empty_subsets(&itemset.items) {
178                let consequent: ItemSet = itemset
179                    .items
180                    .iter()
181                    .filter(|item| !antecedent.contains(item))
182                    .cloned()
183                    .collect();
184
185                if consequent.is_empty() {
186                    continue;
187                }
188
189                // Calculate metrics
190                let metrics = self.calculate_metrics(&antecedent, &consequent, itemset.support);
191
192                // Filter by thresholds
193                if metrics.confidence >= self.config.min_confidence
194                    && metrics.lift >= self.config.min_lift
195                {
196                    rules.push(AssociationRule {
197                        antecedent: antecedent.clone(),
198                        consequent: consequent.clone(),
199                        metrics,
200                    });
201                }
202            }
203        }
204
205        // Sort by quality score
206        rules.sort_by(|a, b| {
207            b.quality_score()
208                .partial_cmp(&a.quality_score())
209                .unwrap_or(std::cmp::Ordering::Equal)
210        });
211
212        Ok(rules)
213    }
214
215    /// Generate all non-empty subsets of an itemset
216    fn generate_non_empty_subsets(&self, items: &[String]) -> Vec<ItemSet> {
217        let mut subsets = Vec::new();
218        let n = items.len();
219
220        // Generate all possible combinations (2^n - 1, excluding empty set and full set)
221        for i in 1..(1 << n) - 1 {
222            let mut subset = Vec::new();
223            for (j, item) in items.iter().enumerate() {
224                if (i & (1 << j)) != 0 {
225                    subset.push(item.clone());
226                }
227            }
228            subsets.push(subset);
229        }
230
231        subsets
232    }
233
234    /// Calculate metrics for a rule
235    fn calculate_metrics(
236        &self,
237        antecedent: &ItemSet,
238        consequent: &ItemSet,
239        both_support: f64,
240    ) -> PatternMetrics {
241        let total = self.transactions.len() as f64;
242
243        // Count occurrences
244        let antecedent_count = self
245            .transactions
246            .iter()
247            .filter(|tx| tx.contains_all(antecedent))
248            .count() as f64;
249
250        let consequent_count = self
251            .transactions
252            .iter()
253            .filter(|tx| tx.contains_all(consequent))
254            .count() as f64;
255
256        let both_count = self
257            .transactions
258            .iter()
259            .filter(|tx| tx.contains_all(antecedent) && tx.contains_all(consequent))
260            .count() as f64;
261
262        // Calculate metrics
263        let confidence = if antecedent_count > 0.0 {
264            both_count / antecedent_count
265        } else {
266            0.0
267        };
268
269        let support = both_support;
270
271        let p_consequent = consequent_count / total;
272        let lift = if p_consequent > 0.0 {
273            confidence / p_consequent
274        } else {
275            0.0
276        };
277
278        let conviction = if confidence < 1.0 && p_consequent < 1.0 {
279            (1.0 - p_consequent) / (1.0 - confidence)
280        } else {
281            f64::INFINITY
282        };
283
284        PatternMetrics {
285            confidence,
286            support,
287            lift,
288            conviction,
289            avg_time_gap: None,
290            time_variance: None,
291        }
292    }
293
294    /// Get mining statistics
295    pub fn stats(&self) -> &stats::MiningStats {
296        &self.stats
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303    use chrono::Utc;
304
305    #[test]
306    fn test_basic_mining() {
307        let transactions = vec![
308            Transaction::new("tx1", vec!["A".to_string(), "B".to_string()], Utc::now()),
309            Transaction::new("tx2", vec!["A".to_string(), "B".to_string()], Utc::now()),
310            Transaction::new("tx3", vec!["A".to_string(), "C".to_string()], Utc::now()),
311        ];
312
313        let config = MiningConfig {
314            min_support: 0.5,
315            min_confidence: 0.6,
316            min_lift: 1.0,
317            ..Default::default()
318        };
319
320        let mut miner = RuleMiner::new(config);
321        miner.add_transactions(transactions).unwrap();
322
323        let rules = miner.mine_association_rules().unwrap();
324        assert!(!rules.is_empty());
325    }
326}