reductionml_core/
inverse_hash_table.rs

1use serde::{Deserialize, Serialize};
2
3use crate::{
4    hash::{hash_bytes, FNV_PRIME},
5    parsers::ParsedFeature,
6    FeatureHash, FeatureIndex,
7};
8
9#[derive(Serialize, Deserialize, Debug, Hash, Eq, PartialEq, PartialOrd, Ord, Clone)]
10pub enum Feature {
11    // namespace, key
12    Simple {
13        namespace: String,
14        name: String,
15    },
16    // namespace, key, chain_hashed value
17    SimpleWithStringValue {
18        namespace: String,
19        name: String,
20        value: String,
21    },
22    // namespace, offset
23    Anonymous {
24        namespace: String,
25        offset: u32,
26    },
27    // An interacted feature should not contain interacted features itself. Maybe this is the wrong way to represent this?
28    Interacted {
29        terms: Vec<Feature>,
30    },
31}
32
33impl Feature {
34    pub fn hash(&self, hash_seed: u32) -> FeatureHash {
35        match &self {
36            Feature::Simple { namespace, name } => {
37                let namespace_hash = hash_bytes(namespace.as_bytes(), hash_seed);
38                hash_bytes(name.as_bytes(), namespace_hash).into()
39            }
40            Feature::SimpleWithStringValue {
41                namespace,
42                name,
43                value,
44            } => {
45                let namespace_hash = hash_bytes(namespace.as_bytes(), hash_seed);
46                let name_key_hash = hash_bytes(name.as_bytes(), namespace_hash);
47                hash_bytes(value.as_bytes(), name_key_hash).into()
48            }
49            Feature::Anonymous { namespace, offset } => {
50                let namespace_hash = hash_bytes(namespace.as_bytes(), hash_seed);
51                (namespace_hash + offset).into()
52            }
53            // In a very cool property hashing of the interacted feature does not need to take into account bit masking until the very end
54            // In fact, the produced result is idenitical if interim values are masked or just the final value.
55            Feature::Interacted { terms } => {
56                let val0 = Feature::hash(terms.first().unwrap(), hash_seed);
57                let mut hash_so_far = (FNV_PRIME).wrapping_mul(*val0);
58                for term in terms[1..terms.len() - 1].iter() {
59                    hash_so_far =
60                        (FNV_PRIME).wrapping_mul(hash_so_far ^ *Feature::hash(term, hash_seed));
61                }
62                hash_so_far ^= *Feature::hash(terms.last().unwrap(), hash_seed);
63                hash_so_far.into()
64            }
65        }
66    }
67
68    pub fn from_parsed_feature(parsed_feature: &ParsedFeature, namespace: &str) -> Self {
69        match parsed_feature {
70            ParsedFeature::Simple { name, .. } => Self::Simple {
71                namespace: namespace.to_string(),
72                name: name.to_string(),
73            },
74            ParsedFeature::SimpleWithStringValue { name, value } => Self::SimpleWithStringValue {
75                namespace: namespace.to_string(),
76                name: name.to_string(),
77                value: value.to_string(),
78            },
79            ParsedFeature::Anonymous { offset, .. } => Self::Anonymous {
80                namespace: namespace.to_string(),
81                offset: *offset,
82            },
83        }
84    }
85}
86
87#[derive(Serialize, Deserialize)]
88pub struct InverseHashTable {
89    hash_table: std::collections::HashMap<FeatureIndex, std::collections::HashSet<Feature>>,
90}
91
92impl Default for InverseHashTable {
93    fn default() -> Self {
94        Self::new()
95    }
96}
97
98impl InverseHashTable {
99    pub fn new() -> Self {
100        Self {
101            hash_table: std::collections::HashMap::new(),
102        }
103    }
104
105    pub fn insert(&mut self, idx: FeatureIndex, feature: Feature) {
106        if let Some(features) = self.hash_table.get_mut(&idx) {
107            features.insert(feature);
108        } else {
109            let mut features = std::collections::HashSet::new();
110            features.insert(feature);
111            self.hash_table.insert(idx, features);
112        }
113    }
114
115    pub fn get(&self, idx: FeatureIndex) -> Option<&std::collections::HashSet<Feature>> {
116        self.hash_table.get(&idx)
117    }
118}