halldyll_core/storage/
normalized.rs1use std::collections::HashMap;
4use std::sync::RwLock;
5use url::Url;
6
7use crate::types::Document;
8
9pub struct NormalizedStore {
11 documents: RwLock<HashMap<String, Document>>,
13 max_documents: usize,
15}
16
17impl Default for NormalizedStore {
18 fn default() -> Self {
19 Self::new(10000)
20 }
21}
22
23impl NormalizedStore {
24 pub fn new(max_documents: usize) -> Self {
26 Self {
27 documents: RwLock::new(HashMap::new()),
28 max_documents,
29 }
30 }
31
32 pub fn store(&self, document: Document) {
34 let url_key = document.source_url.to_string();
35
36 {
38 let documents = self.documents.read().unwrap();
39 if documents.len() >= self.max_documents {
40 drop(documents);
41 let mut documents = self.documents.write().unwrap();
42
43 let to_remove: Vec<_> = documents
45 .iter()
46 .take(self.max_documents / 10)
47 .map(|(k, _)| k.clone())
48 .collect();
49 for k in to_remove {
50 documents.remove(&k);
51 }
52 }
53 }
54
55 self.documents.write().unwrap().insert(url_key, document);
56 }
57
58 pub fn get(&self, url: &Url) -> Option<Document> {
60 let url_key = url.to_string();
61 self.documents.read().unwrap().get(&url_key).cloned()
62 }
63
64 pub fn has(&self, url: &Url) -> bool {
66 let url_key = url.to_string();
67 self.documents.read().unwrap().contains_key(&url_key)
68 }
69
70 pub fn len(&self) -> usize {
72 self.documents.read().unwrap().len()
73 }
74
75 pub fn is_empty(&self) -> bool {
77 self.documents.read().unwrap().is_empty()
78 }
79
80 pub fn clear(&self) {
82 self.documents.write().unwrap().clear();
83 }
84
85 pub fn iter<F>(&self, mut f: F)
87 where
88 F: FnMut(&Document),
89 {
90 let documents = self.documents.read().unwrap();
91 for doc in documents.values() {
92 f(doc);
93 }
94 }
95
96 pub fn export_all(&self) -> Vec<Document> {
98 self.documents.read().unwrap().values().cloned().collect()
99 }
100}