1use std::hash::{Hash, Hasher};
2use std::sync::atomic::{AtomicUsize, Ordering};
3use std::sync::{Arc, RwLock};
4
5use formualizer_common::{ExcelError, LiteralValue, SheetId};
6use rustc_hash::FxHashMap;
7use smallvec::SmallVec;
8
9use crate::builtins::lookup::lookup_utils::cmp_for_lookup;
10use crate::engine::range_view::RangeView;
11
12#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
13pub struct LookupIndexKey {
14 pub(crate) sheet_id: SheetId,
15 pub(crate) start_row: u32,
16 pub(crate) start_col: u32,
17 pub(crate) end_row: u32,
18 pub(crate) end_col: u32,
19 pub(crate) axis: LookupAxis,
20 pub(crate) snapshot_id: u64,
21}
22
23#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
24pub enum LookupAxis {
25 ColumnInView(usize),
26 RowInView(usize),
27}
28
29#[derive(Debug, Eq, PartialEq)]
30pub enum LookupHashKey {
31 Number(u64),
32 Text(Box<str>),
33 Boolean(bool),
34 Empty,
35}
36
37impl Hash for LookupHashKey {
38 fn hash<H: Hasher>(&self, state: &mut H) {
39 match self {
40 Self::Number(bits) => {
41 0u8.hash(state);
42 bits.hash(state);
43 }
44 Self::Text(text) => {
45 1u8.hash(state);
46 text.hash(state);
47 }
48 Self::Boolean(value) => {
49 2u8.hash(state);
50 value.hash(state);
51 }
52 Self::Empty => {
53 3u8.hash(state);
54 }
55 }
56 }
57}
58
59impl LookupHashKey {
60 pub(crate) fn from_literal(value: &LiteralValue) -> Option<Self> {
61 match value {
62 LiteralValue::Number(n) => Some(Self::Number(normalize_f64_bits(*n))),
63 LiteralValue::Int(i) => Some(Self::Number(normalize_f64_bits(*i as f64))),
64 LiteralValue::Text(s) => Some(Self::Text(s.to_lowercase().into_boxed_str())),
65 LiteralValue::Boolean(b) => Some(Self::Boolean(*b)),
66 LiteralValue::Empty => Some(Self::Empty),
67 LiteralValue::Error(_)
68 | LiteralValue::Array(_)
69 | LiteralValue::Date(_)
70 | LiteralValue::DateTime(_)
71 | LiteralValue::Time(_)
72 | LiteralValue::Duration(_)
73 | LiteralValue::Pending => None,
74 }
75 }
76}
77
78fn normalize_f64_bits(n: f64) -> u64 {
79 if n.is_nan() {
80 return f64::NAN.to_bits();
81 }
82 let rounded = n.round();
83 if (n - rounded).abs() < 1e-12 {
84 rounded.to_bits()
85 } else {
86 n.to_bits()
87 }
88}
89
90#[derive(Debug, Clone, Default)]
91pub struct DuplicateIndices {
92 pub(crate) first: usize,
93 pub(crate) last: usize,
94 pub(crate) all: SmallVec<[usize; 1]>,
95}
96
97pub struct LookupIndex {
98 pub(crate) len: usize,
99 pub(crate) bytes: usize,
100 pub(crate) entries: FxHashMap<LookupHashKey, DuplicateIndices>,
101 pub(crate) cell_values: Box<[LiteralValue]>,
102 pub(crate) first_empty: Option<usize>,
103}
104
105impl LookupIndex {
106 pub(crate) fn build(
107 view: &RangeView<'_>,
108 axis: LookupAxis,
109 ) -> Result<BuildOutcome, ExcelError> {
110 let (rows, cols) = view.dims();
111 let len = match axis {
112 LookupAxis::ColumnInView(col) => {
113 if col >= cols {
114 return Ok(BuildOutcome::Degenerate);
115 }
116 rows
117 }
118 LookupAxis::RowInView(row) => {
119 if row >= rows {
120 return Ok(BuildOutcome::Degenerate);
121 }
122 cols
123 }
124 };
125 if len == 0 {
126 return Ok(BuildOutcome::Degenerate);
127 }
128
129 let mut entries: FxHashMap<LookupHashKey, DuplicateIndices> = FxHashMap::default();
130 let mut cell_values = Vec::with_capacity(len);
131 let mut first_empty = None;
132 let mut error_count = 0usize;
133
134 for idx in 0..len {
135 let value = match axis {
136 LookupAxis::ColumnInView(col) => view.get_cell(idx, col),
137 LookupAxis::RowInView(row) => view.get_cell(row, idx),
138 };
139 if matches!(value, LiteralValue::Error(_)) {
140 error_count += 1;
141 }
142 if matches!(value, LiteralValue::Empty) && first_empty.is_none() {
143 first_empty = Some(idx);
144 }
145 if let Some(key) = LookupHashKey::from_literal(&value) {
146 let dups = entries.entry(key).or_insert_with(|| DuplicateIndices {
147 first: idx,
148 last: idx,
149 all: SmallVec::new(),
150 });
151 if dups.all.is_empty() {
152 dups.first = idx;
153 }
154 dups.last = idx;
155 dups.all.push(idx);
156 }
157 cell_values.push(value);
158 }
159
160 if error_count > 0 {
161 return Ok(BuildOutcome::ErrorInLookupAxis);
162 }
163
164 let bytes = estimate_bytes(len, entries.len());
165 Ok(BuildOutcome::Built(Self {
166 len,
167 bytes,
168 entries,
169 cell_values: cell_values.into_boxed_slice(),
170 first_empty,
171 }))
172 }
173
174 pub(crate) fn find_first_exact(&self, needle: &LiteralValue) -> Option<usize> {
175 let hash_key = LookupHashKey::from_literal(needle)?;
176 if let Some(dups) = self.entries.get(&hash_key) {
177 for &idx in &dups.all {
178 if cmp_for_lookup(needle, &self.cell_values[idx]) == Some(0) {
179 return Some(idx);
180 }
181 }
182 }
183 if let Some(n) = numeric_zero_candidate(needle)
184 && n.abs() < 1e-12
185 {
186 return self.first_empty;
187 }
188 None
189 }
190
191 pub(crate) fn find_last_exact(&self, needle: &LiteralValue) -> Option<usize> {
192 let hash_key = LookupHashKey::from_literal(needle)?;
193 if let Some(dups) = self.entries.get(&hash_key) {
194 for &idx in dups.all.iter().rev() {
195 if cmp_for_lookup(needle, &self.cell_values[idx]) == Some(0) {
196 return Some(idx);
197 }
198 }
199 }
200 if let Some(n) = numeric_zero_candidate(needle)
201 && n.abs() < 1e-12
202 {
203 return self.first_empty;
204 }
205 None
206 }
207}
208
209fn numeric_zero_candidate(needle: &LiteralValue) -> Option<f64> {
210 match needle {
211 LiteralValue::Number(n) => Some(*n),
212 LiteralValue::Int(i) => Some(*i as f64),
213 _ => None,
214 }
215}
216
217pub(crate) fn estimate_bytes(len: usize, entries: usize) -> usize {
218 len.saturating_mul(std::mem::size_of::<LiteralValue>().saturating_add(8))
219 .saturating_add(entries.saturating_mul(96))
220 .saturating_add(256)
221}
222
223pub(crate) enum BuildOutcome {
224 Built(LookupIndex),
225 ErrorInLookupAxis,
226 Degenerate,
227}
228
229const LOOKUP_INDEX_BUILD_THRESHOLD: u32 = 3;
230const CALL_COUNT_PRUNE_LIMIT: usize = 4096;
231
232#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
233pub struct LookupIndexCacheReport {
234 pub(crate) builds: usize,
235 pub(crate) hits: usize,
236 pub(crate) misses: usize,
237 pub(crate) skipped_volatile: usize,
238 pub(crate) skipped_error: usize,
239 pub(crate) skipped_tiny: usize,
240 pub(crate) skipped_cap: usize,
241 pub(crate) skipped_below_threshold: usize,
242 pub(crate) bytes_in_cache: usize,
243 pub(crate) entries_count: usize,
244}
245
246pub struct LookupIndexCache {
247 inner: RwLock<FxHashMap<LookupIndexKey, Arc<LookupIndex>>>,
248 call_counts: RwLock<FxHashMap<LookupIndexKey, u32>>,
249 volatile_keys: RwLock<FxHashMap<LookupIndexKey, ()>>,
250 build_threshold: u32,
251 bytes_in_use: AtomicUsize,
252 max_bytes: usize,
253 builds: AtomicUsize,
254 hits: AtomicUsize,
255 misses: AtomicUsize,
256 skipped_volatile: AtomicUsize,
257 skipped_error: AtomicUsize,
258 skipped_tiny: AtomicUsize,
259 skipped_cap: AtomicUsize,
260 skipped_below_threshold: AtomicUsize,
261}
262
263fn volatile_key(mut key: LookupIndexKey) -> LookupIndexKey {
264 key.snapshot_id = 0;
265 key
266}
267
268impl LookupIndexCache {
269 pub(crate) fn new(max_bytes: usize) -> Self {
270 Self {
271 inner: RwLock::new(FxHashMap::default()),
272 call_counts: RwLock::new(FxHashMap::default()),
273 volatile_keys: RwLock::new(FxHashMap::default()),
274 build_threshold: LOOKUP_INDEX_BUILD_THRESHOLD,
275 bytes_in_use: AtomicUsize::new(0),
276 max_bytes,
277 builds: AtomicUsize::new(0),
278 hits: AtomicUsize::new(0),
279 misses: AtomicUsize::new(0),
280 skipped_volatile: AtomicUsize::new(0),
281 skipped_error: AtomicUsize::new(0),
282 skipped_tiny: AtomicUsize::new(0),
283 skipped_cap: AtomicUsize::new(0),
284 skipped_below_threshold: AtomicUsize::new(0),
285 }
286 }
287
288 pub(crate) fn get(&self, key: &LookupIndexKey) -> Option<Arc<LookupIndex>> {
289 let found = self
290 .inner
291 .read()
292 .ok()
293 .and_then(|guard| guard.get(key).cloned());
294 if found.is_some() {
295 self.hits.fetch_add(1, Ordering::Relaxed);
296 } else {
297 self.misses.fetch_add(1, Ordering::Relaxed);
298 }
299 found
300 }
301
302 pub(crate) fn should_build(&self, key: LookupIndexKey) -> bool {
303 let Ok(mut guard) = self.call_counts.write() else {
304 self.skipped_below_threshold.fetch_add(1, Ordering::Relaxed);
305 return false;
306 };
307 if guard.len() > CALL_COUNT_PRUNE_LIMIT {
308 guard.retain(|existing_key, _| existing_key.snapshot_id == key.snapshot_id);
309 }
310 let count = guard.entry(key).or_insert(0);
311 *count = count.saturating_add(1);
312 if *count <= self.build_threshold {
313 self.skipped_below_threshold.fetch_add(1, Ordering::Relaxed);
314 return false;
315 }
316 true
317 }
318
319 pub(crate) fn would_exceed_cap(&self, bytes: usize) -> bool {
320 self.bytes_in_use
321 .load(Ordering::Relaxed)
322 .saturating_add(bytes)
323 > self.max_bytes
324 }
325
326 pub(crate) fn is_known_volatile(&self, key: &LookupIndexKey) -> bool {
327 let volatile_key = volatile_key(*key);
328 self.volatile_keys
329 .read()
330 .map(|guard| guard.contains_key(&volatile_key))
331 .unwrap_or(false)
332 }
333
334 pub(crate) fn note_volatile_key(&self, key: LookupIndexKey) {
335 if let Ok(mut guard) = self.volatile_keys.write() {
336 if guard.len() > CALL_COUNT_PRUNE_LIMIT {
337 guard.clear();
338 }
339 guard.insert(volatile_key(key), ());
340 }
341 }
342
343 pub(crate) fn insert_if_room(
344 &self,
345 key: LookupIndexKey,
346 index: LookupIndex,
347 ) -> Option<Arc<LookupIndex>> {
348 let bytes = index.bytes;
349 let current = self.bytes_in_use.load(Ordering::Relaxed);
350 if current.saturating_add(bytes) > self.max_bytes {
351 self.skipped_cap.fetch_add(1, Ordering::Relaxed);
352 return None;
353 }
354 let index = Arc::new(index);
355 if let Ok(mut guard) = self.inner.write() {
356 if let Some(existing) = guard.get(&key) {
357 self.hits.fetch_add(1, Ordering::Relaxed);
358 return Some(existing.clone());
359 }
360 guard.insert(key, index.clone());
361 self.bytes_in_use.fetch_add(bytes, Ordering::Relaxed);
362 self.builds.fetch_add(1, Ordering::Relaxed);
363 Some(index)
364 } else {
365 None
366 }
367 }
368
369 pub(crate) fn note_skipped_volatile(&self) {
370 self.skipped_volatile.fetch_add(1, Ordering::Relaxed);
371 }
372
373 pub(crate) fn note_skipped_error(&self) {
374 self.skipped_error.fetch_add(1, Ordering::Relaxed);
375 }
376
377 pub(crate) fn note_skipped_tiny(&self) {
378 self.skipped_tiny.fetch_add(1, Ordering::Relaxed);
379 }
380
381 pub(crate) fn note_skipped_cap(&self) {
382 self.skipped_cap.fetch_add(1, Ordering::Relaxed);
383 }
384
385 pub(crate) fn reset_counters(&self) {
386 self.builds.store(0, Ordering::Relaxed);
387 self.hits.store(0, Ordering::Relaxed);
388 self.misses.store(0, Ordering::Relaxed);
389 self.skipped_volatile.store(0, Ordering::Relaxed);
390 self.skipped_error.store(0, Ordering::Relaxed);
391 self.skipped_tiny.store(0, Ordering::Relaxed);
392 self.skipped_cap.store(0, Ordering::Relaxed);
393 self.skipped_below_threshold.store(0, Ordering::Relaxed);
394 }
395
396 pub(crate) fn report(&self) -> LookupIndexCacheReport {
397 let entries_count = self
398 .inner
399 .read()
400 .map(|guard| guard.len())
401 .unwrap_or_default();
402 LookupIndexCacheReport {
403 builds: self.builds.load(Ordering::Relaxed),
404 hits: self.hits.load(Ordering::Relaxed),
405 misses: self.misses.load(Ordering::Relaxed),
406 skipped_volatile: self.skipped_volatile.load(Ordering::Relaxed),
407 skipped_error: self.skipped_error.load(Ordering::Relaxed),
408 skipped_tiny: self.skipped_tiny.load(Ordering::Relaxed),
409 skipped_cap: self.skipped_cap.load(Ordering::Relaxed),
410 skipped_below_threshold: self.skipped_below_threshold.load(Ordering::Relaxed),
411 bytes_in_cache: self.bytes_in_use.load(Ordering::Relaxed),
412 entries_count,
413 }
414 }
415}