fp_growth/
algorithm.rs

1//! `algorithm` is the core module of FP-Growth algorithm.
2//! It implements the algorithm based on the internal data structs [`crate::tree::Node<T>`] and [`crate::tree::Tree<T>`].
3
4use std::{
5    cmp::Ordering,
6    collections::{HashMap, HashSet},
7    usize,
8};
9
10use crate::tree::Tree;
11use crate::ItemType;
12
13#[allow(clippy::upper_case_acronyms)]
14#[derive(Clone, Debug)]
15pub struct FPResult<T> {
16    frequent_patterns: Vec<(Vec<T>, usize)>,
17    elimination_sets: HashSet<Vec<T>>,
18}
19
20impl<T: ItemType> FPResult<T> {
21    pub fn new(
22        frequent_patterns: Vec<(Vec<T>, usize)>,
23        elimination_sets: HashSet<Vec<T>>,
24    ) -> FPResult<T> {
25        FPResult {
26            frequent_patterns,
27            elimination_sets,
28        }
29    }
30
31    pub fn frequent_patterns_num(&self) -> usize {
32        self.frequent_patterns.len()
33    }
34
35    pub fn frequent_patterns(&self) -> Vec<(Vec<T>, usize)> {
36        self.frequent_patterns.clone()
37    }
38
39    pub fn elimination_sets_num(&self) -> usize {
40        self.elimination_sets.len()
41    }
42
43    pub fn elimination_sets(&self) -> Vec<Vec<T>> {
44        self.elimination_sets.clone().into_iter().collect()
45    }
46}
47
48/// `FPGrowth<T>` represents an algorithm instance, it should include the `transactions` input
49/// and minimum support value as the initial config. Once it is created, you could run
50/// [`FPGrowth::find_frequent_patterns()`] to start the frequent pattern mining.
51// `transactions` will be sorted and deduplicated before starting the algorithm.
52#[allow(clippy::upper_case_acronyms)]
53pub struct FPGrowth<T> {
54    transactions: Vec<Vec<T>>,
55    minimum_support: usize,
56}
57
58impl<T: ItemType> FPGrowth<T> {
59    /// Create a FP-Growth algorithm instance with the given `transactions` and `minimum_support`.
60    pub fn new(transactions: Vec<Vec<T>>, minimum_support: usize) -> FPGrowth<T> {
61        FPGrowth {
62            transactions,
63            minimum_support,
64        }
65    }
66
67    /// Find frequent patterns in the given transactions using FP-Growth.
68    pub fn find_frequent_patterns(&self) -> FPResult<T> {
69        // Collect and preprocess the transactions.
70        let mut items = HashMap::new();
71        for transaction in self.transactions.clone().into_iter() {
72            let mut item_set: HashSet<T> = HashSet::new();
73            for &item in transaction.iter() {
74                // Check whether we have inserted the same item in a transaction before,
75                // make sure we won't calculate the wrong support.
76                match item_set.contains(&item) {
77                    true => continue,
78                    false => {
79                        item_set.insert(item);
80                        let count = items.entry(item).or_insert(0);
81                        *count += 1;
82                    }
83                };
84            }
85        }
86
87        // Clean up the items whose support is lower than the minimum_support.
88        let cleaned_items: HashMap<&T, &usize> = items
89            .iter()
90            .filter(|(_, &count)| count >= self.minimum_support)
91            .collect();
92        let mut elimination_sets = HashSet::new();
93
94        let mut tree = Tree::<T>::new();
95        for transaction in self.transactions.clone().into_iter() {
96            let mut cleaned_transaction: Vec<T> = transaction
97                .clone()
98                .into_iter()
99                .filter(|item| cleaned_items.contains_key(item))
100                .collect();
101            if cleaned_transaction.len() != transaction.len() {
102                elimination_sets.insert(transaction);
103            }
104            cleaned_transaction.sort_by(|a, b| {
105                let &a_counter = cleaned_items.get(a).unwrap();
106                let &b_counter = cleaned_items.get(b).unwrap();
107                match b_counter.cmp(a_counter) {
108                    Ordering::Equal => {
109                        // When counter is the same, we will sort by T itself.
110                        // e.g. ["c", "b", "a"] -> ["a", "b", "c"]
111                        match b.cmp(a) {
112                            Ordering::Greater => Ordering::Less,
113                            Ordering::Less => Ordering::Greater,
114                            Ordering::Equal => Ordering::Equal,
115                        }
116                    }
117                    Ordering::Less => Ordering::Less,
118                    Ordering::Greater => Ordering::Greater,
119                }
120            });
121            // After sort cleaned_transaction, remove consecutive items from it then.
122            cleaned_transaction.dedup();
123            tree.add_transaction(cleaned_transaction);
124        }
125
126        let mut fp_result = self.find_with_suffix(&tree, &[]);
127        fp_result.elimination_sets.extend(elimination_sets);
128        fp_result
129    }
130
131    fn find_with_suffix(&self, tree: &Tree<T>, suffix: &[T]) -> FPResult<T> {
132        let mut fp_result = FPResult::new(vec![], HashSet::new());
133        for (item, nodes) in tree.get_all_items_nodes().iter() {
134            let mut support = 0;
135            for node in nodes.iter() {
136                support += node.count();
137            }
138            let mut frequent_pattern = vec![*item];
139            frequent_pattern.append(&mut Vec::from(suffix));
140            if support >= self.minimum_support && !suffix.contains(item) {
141                fp_result
142                    .frequent_patterns
143                    .push((frequent_pattern.clone(), support));
144
145                let partial_tree = Tree::generate_partial_tree(&tree.generate_prefix_path(*item));
146                let mut mid_fp_result = self.find_with_suffix(&partial_tree, &frequent_pattern);
147                fp_result
148                    .frequent_patterns
149                    .append(&mut mid_fp_result.frequent_patterns);
150                fp_result
151                    .elimination_sets
152                    .extend(mid_fp_result.elimination_sets);
153            } else {
154                fp_result.elimination_sets.insert(frequent_pattern);
155            }
156        }
157        fp_result
158    }
159}