kanata_parser/
subset.rs

1//! Collection where keys are slices of a type, and supports a get operation to check if the key
2//! exists, is a subset of any existing key, or is neither of the aforementioned cases.
3//!
4//! In the underlying structure the value is cloned for each participating member of slice key, so
5//! you should ensure values are cheaply clonable. If the value is not, consider putting it inside
6//! `Rc` or `Arc`.
7
8use rustc_hash::FxHashMap;
9use std::hash::Hash;
10
11// # Design considerations:
12//
13// It was considered whether `key` should be in an `Arc` instead of a `Box` or whether
14// `SsmKeyValue` should be wrapped in `Arc`.
15//
16// ## No usage of `Arc`
17//
18// With no reference counting, the key slice `&[K]` will be allocated in a separate box for every
19// instance of `SsmKeyValue`. The number of instances of the key is equal to the length of the key.
20// This is obviously the best choice if key lengths are mostly expected to be 1. For key lengths
21// larger than 1, the point at which an `Arc` would be better would need to be measured.
22//
23// ## `key: Arc<[K]>`
24//
25// The benefit of using an `Arc` for the key instead of `Box` is that clones don't create a new
26// allocation. The downside is that the allocations use more space, namely there is an extra
27// `2 * usize` in the allocation for the strong and weak pointers, so 16 extra bytes.
28//
29// Kanata uses `K=u16` only today (August 2025). This means perfectly sized allocations, it would
30// take a 3 length key for `Box` to begin to reach `Arc`'s size:
31//   - Arc: 16 + (3*2) = 22 bytes
32//   - Box:  3 x (3*2) = 18 bytes
33//
34// A 4-length key is much worse:
35//   - Arc: 16 + (4*2) = 22 bytes
36//   - Box:  4 x (4*2) = 32 bytes
37//
38// In practice, allocators have allocation space overhead and/or minimum allocation sizes. With the
39// effects of these overheads and CPU caching, the estimate of when `Arc` outperforms `Box` for
40// read-only usage is likely a key length of 3 or even 2. Read-only is notable because Kanata
41// doesn't care about write performance; write only happens at parse time and only reads are done
42// for standard runtime.
43//
44// ## Vec<Arc<SsmKeyValue<...>>
45//
46// This has the downside of needing to follow two pointers to dereference `key`. For `Box`-only,
47// or `key: Arc<[K]>`, this is not the case. Having two indirections is not desirable.
48
49#[derive(Debug, Clone)]
50pub struct SubsetMap<K, V> {
51    map: FxHashMap<K, Vec<SsmKeyValue<K, V>>>,
52}
53
54#[derive(Debug, Clone)]
55struct SsmKeyValue<K, V> {
56    key: Box<[K]>,
57    value: V,
58}
59
60impl<K, V> SsmKeyValue<K, V>
61where
62    K: Clone,
63{
64    fn ssmkv_new(key: impl AsRef<[K]>, value: V) -> Self {
65        Self {
66            key: key.as_ref().to_vec().into_boxed_slice(),
67            value,
68        }
69    }
70}
71
72#[derive(Debug, Copy, Clone, PartialEq, Eq)]
73pub enum GetOrIsSubsetOfKnownKey<T> {
74    HasValue(T),
75    IsSubset,
76    Neither,
77}
78
79#[derive(Debug, Copy, Clone, PartialEq, Eq)]
80pub enum SsmKeyExistedBeforeInsert {
81    Existed,
82    NotThere,
83}
84
85use GetOrIsSubsetOfKnownKey::*;
86
87impl<K, V> Default for SubsetMap<K, V>
88where
89    K: Clone + PartialEq + Ord + Hash,
90    V: Clone,
91{
92    fn default() -> Self {
93        Self::ssm_new()
94    }
95}
96
97impl<K, V> SubsetMap<K, V>
98where
99    K: Clone + PartialEq + Ord + Hash,
100    V: Clone,
101{
102    pub fn ssm_new() -> Self {
103        Self {
104            map: FxHashMap::default(),
105        }
106    }
107
108    /// Inserts a potentially unsorted key. Sorts the key and then calls ssm_insert_ksorted.
109    pub fn ssm_insert(&mut self, mut key: impl AsMut<[K]>, val: V) -> SsmKeyExistedBeforeInsert {
110        key.as_mut().sort();
111        self.ssm_insert_ksorted(key.as_mut(), val)
112    }
113
114    /// Inserts a sorted key. Failure to enforce that the key is sorted results in defined but
115    /// unspecified behaviour.
116    pub fn ssm_insert_ksorted(
117        &mut self,
118        key: impl AsRef<[K]>,
119        val: V,
120    ) -> SsmKeyExistedBeforeInsert {
121        let mut key_existed = SsmKeyExistedBeforeInsert::NotThere;
122        for k in key.as_ref().iter().cloned() {
123            let keyvals_for_key_item = self.map.entry(k).or_default();
124            match keyvals_for_key_item
125                .binary_search_by(|probe| probe.key.as_ref().cmp(key.as_ref()))
126            {
127                Ok(pos) => {
128                    key_existed = SsmKeyExistedBeforeInsert::Existed;
129                    keyvals_for_key_item[pos] = SsmKeyValue::ssmkv_new(key.as_ref(), val.clone());
130                }
131                Err(pos) => {
132                    keyvals_for_key_item
133                        .insert(pos, SsmKeyValue::ssmkv_new(key.as_ref(), val.clone()));
134                }
135            }
136        }
137        key_existed
138    }
139
140    /// Gets using a potentially unsorted key. Sorts the key then calls
141    /// ssm_get_or_is_subset_ksorted.
142    pub fn ssm_get_or_is_subset(&self, mut key: impl AsMut<[K]>) -> GetOrIsSubsetOfKnownKey<V> {
143        key.as_mut().sort();
144        self.ssm_get_or_is_subset_ksorted(key.as_mut())
145    }
146
147    /// Gets using a sorted key. Failure to enforce a sorted key results in defined but unspecified
148    /// behaviour.
149    pub fn ssm_get_or_is_subset_ksorted(
150        &self,
151        get_key: impl AsRef<[K]>,
152    ) -> GetOrIsSubsetOfKnownKey<V> {
153        let get_key = get_key.as_ref();
154        if get_key.is_empty() {
155            return match self.is_empty() {
156                true => Neither,
157                false => IsSubset,
158            };
159        }
160        match self.map.get(&get_key[0]) {
161            None => Neither,
162            Some(keyvals_for_key_item) => {
163                match keyvals_for_key_item
164                    .binary_search_by(|probe| probe.key.as_ref().cmp(get_key.as_ref()))
165                {
166                    Ok(pos) => HasValue(keyvals_for_key_item[pos].value.clone()),
167                    Err(_) => {
168                        for kv in keyvals_for_key_item.iter() {
169                            if get_key.iter().all(|kitem| kv.key.contains(kitem)) {
170                                return IsSubset;
171                            }
172                        }
173                        Neither
174                    }
175                }
176            }
177        }
178    }
179
180    pub fn is_empty(&self) -> bool {
181        self.map.is_empty()
182    }
183}