kanata_parser/subset.rs
1//! Collection where keys are slices of a type, and supports a get operation to check if the key
2//! exists, is a subset of any existing key, or is neither of the aforementioned cases.
3//!
4//! In the underlying structure the value is cloned for each participating member of slice key, so
5//! you should ensure values are cheaply clonable. If the value is not, consider putting it inside
6//! `Rc` or `Arc`.
7
8use rustc_hash::FxHashMap;
9use std::hash::Hash;
10
11// # Design considerations:
12//
13// It was considered whether `key` should be in an `Arc` instead of a `Box` or whether
14// `SsmKeyValue` should be wrapped in `Arc`.
15//
16// ## No usage of `Arc`
17//
18// With no reference counting, the key slice `&[K]` will be allocated in a separate box for every
19// instance of `SsmKeyValue`. The number of instances of the key is equal to the length of the key.
20// This is obviously the best choice if key lengths are mostly expected to be 1. For key lengths
21// larger than 1, the point at which an `Arc` would be better would need to be measured.
22//
23// ## `key: Arc<[K]>`
24//
25// The benefit of using an `Arc` for the key instead of `Box` is that clones don't create a new
26// allocation. The downside is that the allocations use more space, namely there is an extra
27// `2 * usize` in the allocation for the strong and weak pointers, so 16 extra bytes.
28//
29// Kanata uses `K=u16` only today (August 2025). This means perfectly sized allocations, it would
30// take a 3 length key for `Box` to begin to reach `Arc`'s size:
31// - Arc: 16 + (3*2) = 22 bytes
32// - Box: 3 x (3*2) = 18 bytes
33//
34// A 4-length key is much worse:
35// - Arc: 16 + (4*2) = 22 bytes
36// - Box: 4 x (4*2) = 32 bytes
37//
38// In practice, allocators have allocation space overhead and/or minimum allocation sizes. With the
39// effects of these overheads and CPU caching, the estimate of when `Arc` outperforms `Box` for
40// read-only usage is likely a key length of 3 or even 2. Read-only is notable because Kanata
41// doesn't care about write performance; write only happens at parse time and only reads are done
42// for standard runtime.
43//
44// ## Vec<Arc<SsmKeyValue<...>>
45//
46// This has the downside of needing to follow two pointers to dereference `key`. For `Box`-only,
47// or `key: Arc<[K]>`, this is not the case. Having two indirections is not desirable.
48
49#[derive(Debug, Clone)]
50pub struct SubsetMap<K, V> {
51 map: FxHashMap<K, Vec<SsmKeyValue<K, V>>>,
52}
53
54#[derive(Debug, Clone)]
55struct SsmKeyValue<K, V> {
56 key: Box<[K]>,
57 value: V,
58}
59
60impl<K, V> SsmKeyValue<K, V>
61where
62 K: Clone,
63{
64 fn ssmkv_new(key: impl AsRef<[K]>, value: V) -> Self {
65 Self {
66 key: key.as_ref().to_vec().into_boxed_slice(),
67 value,
68 }
69 }
70}
71
72#[derive(Debug, Copy, Clone, PartialEq, Eq)]
73pub enum GetOrIsSubsetOfKnownKey<T> {
74 HasValue(T),
75 IsSubset,
76 Neither,
77}
78
79#[derive(Debug, Copy, Clone, PartialEq, Eq)]
80pub enum SsmKeyExistedBeforeInsert {
81 Existed,
82 NotThere,
83}
84
85use GetOrIsSubsetOfKnownKey::*;
86
87impl<K, V> Default for SubsetMap<K, V>
88where
89 K: Clone + PartialEq + Ord + Hash,
90 V: Clone,
91{
92 fn default() -> Self {
93 Self::ssm_new()
94 }
95}
96
97impl<K, V> SubsetMap<K, V>
98where
99 K: Clone + PartialEq + Ord + Hash,
100 V: Clone,
101{
102 pub fn ssm_new() -> Self {
103 Self {
104 map: FxHashMap::default(),
105 }
106 }
107
108 /// Inserts a potentially unsorted key. Sorts the key and then calls ssm_insert_ksorted.
109 pub fn ssm_insert(&mut self, mut key: impl AsMut<[K]>, val: V) -> SsmKeyExistedBeforeInsert {
110 key.as_mut().sort();
111 self.ssm_insert_ksorted(key.as_mut(), val)
112 }
113
114 /// Inserts a sorted key. Failure to enforce that the key is sorted results in defined but
115 /// unspecified behaviour.
116 pub fn ssm_insert_ksorted(
117 &mut self,
118 key: impl AsRef<[K]>,
119 val: V,
120 ) -> SsmKeyExistedBeforeInsert {
121 let mut key_existed = SsmKeyExistedBeforeInsert::NotThere;
122 for k in key.as_ref().iter().cloned() {
123 let keyvals_for_key_item = self.map.entry(k).or_default();
124 match keyvals_for_key_item
125 .binary_search_by(|probe| probe.key.as_ref().cmp(key.as_ref()))
126 {
127 Ok(pos) => {
128 key_existed = SsmKeyExistedBeforeInsert::Existed;
129 keyvals_for_key_item[pos] = SsmKeyValue::ssmkv_new(key.as_ref(), val.clone());
130 }
131 Err(pos) => {
132 keyvals_for_key_item
133 .insert(pos, SsmKeyValue::ssmkv_new(key.as_ref(), val.clone()));
134 }
135 }
136 }
137 key_existed
138 }
139
140 /// Gets using a potentially unsorted key. Sorts the key then calls
141 /// ssm_get_or_is_subset_ksorted.
142 pub fn ssm_get_or_is_subset(&self, mut key: impl AsMut<[K]>) -> GetOrIsSubsetOfKnownKey<V> {
143 key.as_mut().sort();
144 self.ssm_get_or_is_subset_ksorted(key.as_mut())
145 }
146
147 /// Gets using a sorted key. Failure to enforce a sorted key results in defined but unspecified
148 /// behaviour.
149 pub fn ssm_get_or_is_subset_ksorted(
150 &self,
151 get_key: impl AsRef<[K]>,
152 ) -> GetOrIsSubsetOfKnownKey<V> {
153 let get_key = get_key.as_ref();
154 if get_key.is_empty() {
155 return match self.is_empty() {
156 true => Neither,
157 false => IsSubset,
158 };
159 }
160 match self.map.get(&get_key[0]) {
161 None => Neither,
162 Some(keyvals_for_key_item) => {
163 match keyvals_for_key_item
164 .binary_search_by(|probe| probe.key.as_ref().cmp(get_key.as_ref()))
165 {
166 Ok(pos) => HasValue(keyvals_for_key_item[pos].value.clone()),
167 Err(_) => {
168 for kv in keyvals_for_key_item.iter() {
169 if get_key.iter().all(|kitem| kv.key.contains(kitem)) {
170 return IsSubset;
171 }
172 }
173 Neither
174 }
175 }
176 }
177 }
178 }
179
180 pub fn is_empty(&self) -> bool {
181 self.map.is_empty()
182 }
183}