harper_pos_utils/chunker/
cached_chunker.rs

1use lru::LruCache;
2use std::hash::Hash;
3use std::num::NonZeroUsize;
4use std::sync::Mutex;
5
6use super::Chunker;
7use crate::UPOS;
8
9/// Wraps any chunker implementation to add an LRU Cache.
10/// Useful for incremental lints.
11pub struct CachedChunker<C: Chunker> {
12    inner: C,
13    cache: Mutex<LruCache<CacheKey, Vec<bool>>>,
14}
15
16impl<C: Chunker> CachedChunker<C> {
17    pub fn new(inner: C, capacity: NonZeroUsize) -> Self {
18        Self {
19            inner,
20            cache: Mutex::new(LruCache::new(capacity)),
21        }
22    }
23}
24
25impl<C: Chunker> Chunker for CachedChunker<C> {
26    fn chunk_sentence(&self, sentence: &[String], tags: &[Option<UPOS>]) -> Vec<bool> {
27        let key = CacheKey::new(sentence, tags);
28
29        // Attempt a cache hit.
30        // We put this in the block so `read` gets dropped as early as possible.
31        if let Ok(mut read) = self.cache.try_lock()
32            && let Some(result) = read.get(&key)
33        {
34            return result.clone();
35        };
36
37        // We don't want to hold the lock since it may take a while to run the chunker.
38        let result = self.inner.chunk_sentence(sentence, tags);
39
40        if let Ok(mut cache) = self.cache.try_lock() {
41            cache.put(key, result.clone());
42        }
43
44        result
45    }
46}
47
48#[derive(Hash, PartialEq, Eq)]
49struct CacheKey {
50    sentence: Vec<String>,
51    tags: Vec<Option<UPOS>>,
52}
53
54impl CacheKey {
55    fn new(sentence: &[String], tags: &[Option<UPOS>]) -> Self {
56        Self {
57            sentence: sentence.to_vec(),
58            tags: tags.to_vec(),
59        }
60    }
61}