use std::collections::HashMap;
use std::sync::RwLock;
use url::Url;
use crate::types::Document;
pub struct NormalizedStore {
documents: RwLock<HashMap<String, Document>>,
max_documents: usize,
}
impl Default for NormalizedStore {
fn default() -> Self {
Self::new(10000)
}
}
impl NormalizedStore {
pub fn new(max_documents: usize) -> Self {
Self {
documents: RwLock::new(HashMap::new()),
max_documents,
}
}
pub fn store(&self, document: Document) {
let url_key = document.source_url.to_string();
{
let documents = self.documents.read().unwrap();
if documents.len() >= self.max_documents {
drop(documents);
let mut documents = self.documents.write().unwrap();
let to_remove: Vec<_> = documents
.iter()
.take(self.max_documents / 10)
.map(|(k, _)| k.clone())
.collect();
for k in to_remove {
documents.remove(&k);
}
}
}
self.documents.write().unwrap().insert(url_key, document);
}
pub fn get(&self, url: &Url) -> Option<Document> {
let url_key = url.to_string();
self.documents.read().unwrap().get(&url_key).cloned()
}
pub fn has(&self, url: &Url) -> bool {
let url_key = url.to_string();
self.documents.read().unwrap().contains_key(&url_key)
}
pub fn len(&self) -> usize {
self.documents.read().unwrap().len()
}
pub fn is_empty(&self) -> bool {
self.documents.read().unwrap().is_empty()
}
pub fn clear(&self) {
self.documents.write().unwrap().clear();
}
pub fn iter<F>(&self, mut f: F)
where
F: FnMut(&Document),
{
let documents = self.documents.read().unwrap();
for doc in documents.values() {
f(doc);
}
}
pub fn export_all(&self) -> Vec<Document> {
self.documents.read().unwrap().values().cloned().collect()
}
}