halldyll_core/storage/
snapshot.rs1use sha2::{Sha256, Digest};
4use std::collections::HashMap;
5use std::sync::RwLock;
6use url::Url;
7
8
9#[derive(Debug, Clone)]
11pub struct RawSnapshot {
12 pub url: Url,
14 pub html: String,
16 pub headers: HashMap<String, String>,
18 pub encoding: String,
20 pub compressed_size: Option<u64>,
22 pub decompressed_size: u64,
24 pub timestamp: chrono::DateTime<chrono::Utc>,
26 pub hash: String,
28}
29
30impl RawSnapshot {
31 pub fn new(url: Url, html: String, headers: HashMap<String, String>) -> Self {
33 let hash = Self::compute_hash(&html);
34 let decompressed_size = html.len() as u64;
35
36 Self {
37 url,
38 html,
39 headers,
40 encoding: "utf-8".to_string(),
41 compressed_size: None,
42 decompressed_size,
43 timestamp: chrono::Utc::now(),
44 hash,
45 }
46 }
47
48 pub fn from_response(
50 url: Url,
51 status_code: u16,
52 headers: HashMap<String, String>,
53 body: Vec<u8>,
54 ) -> Self {
55 let html = String::from_utf8_lossy(&body).to_string();
56 let hash = Self::compute_hash(&html);
57 let decompressed_size = body.len() as u64;
58
59 let mut snapshot_headers = headers;
60 snapshot_headers.insert("x-status-code".to_string(), status_code.to_string());
61
62 Self {
63 url,
64 html,
65 headers: snapshot_headers,
66 encoding: "utf-8".to_string(),
67 compressed_size: None,
68 decompressed_size,
69 timestamp: chrono::Utc::now(),
70 hash,
71 }
72 }
73
74 fn compute_hash(content: &str) -> String {
76 let mut hasher = Sha256::new();
77 hasher.update(content.as_bytes());
78 format!("{:x}", hasher.finalize())
79 }
80
81 pub fn serialize(&self) -> Vec<u8> {
83 let mut output = String::new();
85
86 output.push_str(&format!("URL: {}\n", self.url));
88 output.push_str(&format!("Timestamp: {}\n", self.timestamp.to_rfc3339()));
89 output.push_str(&format!("Hash: {}\n", self.hash));
90 output.push_str(&format!("Encoding: {}\n", self.encoding));
91
92 for (key, value) in &self.headers {
94 output.push_str(&format!("{}: {}\n", key, value));
95 }
96
97 output.push_str("\n\n\n");
99
100 output.push_str(&self.html);
102
103 output.into_bytes()
104 }
105}
106
107pub struct SnapshotStore {
109 snapshots: RwLock<HashMap<String, RawSnapshot>>,
111 url_index: RwLock<HashMap<String, String>>,
113 max_snapshots: usize,
115}
116
117impl Default for SnapshotStore {
118 fn default() -> Self {
119 Self::new(10000)
120 }
121}
122
123impl SnapshotStore {
124 pub fn new(max_snapshots: usize) -> Self {
126 Self {
127 snapshots: RwLock::new(HashMap::new()),
128 url_index: RwLock::new(HashMap::new()),
129 max_snapshots,
130 }
131 }
132
133 pub fn store(&self, snapshot: RawSnapshot) -> String {
135 let hash = snapshot.hash.clone();
136 let url_key = snapshot.url.to_string();
137
138 {
140 let snapshots = self.snapshots.read().unwrap();
141 if snapshots.len() >= self.max_snapshots {
142 drop(snapshots);
144 let mut snapshots = self.snapshots.write().unwrap();
145 let mut url_index = self.url_index.write().unwrap();
146
147 let to_remove: Vec<_> = snapshots
149 .keys()
150 .take(self.max_snapshots / 10)
151 .cloned()
152 .collect();
153 for h in to_remove {
154 snapshots.remove(&h);
155 url_index.retain(|_, v| v != &h);
157 }
158 }
159 }
160
161 self.snapshots.write().unwrap().insert(hash.clone(), snapshot);
163 self.url_index.write().unwrap().insert(url_key, hash.clone());
164
165 hash
166 }
167
168 pub fn get_by_hash(&self, hash: &str) -> Option<RawSnapshot> {
170 self.snapshots.read().unwrap().get(hash).cloned()
171 }
172
173 pub fn get_by_url(&self, url: &Url) -> Option<RawSnapshot> {
175 let url_key = url.to_string();
176 let hash = self.url_index.read().unwrap().get(&url_key)?.clone();
177 self.get_by_hash(&hash)
178 }
179
180 pub fn has_hash(&self, hash: &str) -> bool {
182 self.snapshots.read().unwrap().contains_key(hash)
183 }
184
185 pub fn len(&self) -> usize {
187 self.snapshots.read().unwrap().len()
188 }
189
190 pub fn is_empty(&self) -> bool {
192 self.snapshots.read().unwrap().is_empty()
193 }
194
195 pub fn clear(&self) {
197 self.snapshots.write().unwrap().clear();
198 self.url_index.write().unwrap().clear();
199 }
200}