iroh_blobs/store/fs/validate.rs
1//! Validation of the store's contents.
2use std::collections::BTreeSet;
3
4use redb::ReadableTable;
5
6use super::{
7 raw_outboard_size, tables::Tables, ActorResult, ActorState, DataLocation, EntryState, Hash,
8 OutboardLocation,
9};
10use crate::{
11 store::{fs::tables::BaoFilePart, ConsistencyCheckProgress, ReportLevel},
12 util::progress::BoxedProgressSender,
13};
14
15impl ActorState {
16 //! This performs a full consistency check. Eventually it will also validate
17 //! file content again, but that part is not yet implemented.
18 //!
19 //! Currently the following checks are performed for complete entries:
20 //!
21 //! Check that the data in the entries table is consistent with the data in
22 //! the inline_data and inline_outboard tables.
23 //!
24 //! For every entry where data_location is inline, the inline_data table
25 //! must contain the data. For every entry where
26 //! data_location is not inline, the inline_data table must not contain data.
27 //! Instead, the data must exist as a file in the data directory or be
28 //! referenced to one or many external files.
29 //!
30 //! For every entry where outboard_location is inline, the inline_outboard
31 //! table must contain the outboard. For every entry where outboard_location
32 //! is not inline, the inline_outboard table must not contain data, and the
33 //! outboard must exist as a file in the data directory. Outboards are never
34 //! external.
35 //!
36 //! In addition to these consistency checks, it is checked that the size of
37 //! the outboard is consistent with the size of the data.
38 //!
39 //! For partial entries, it is checked that the data and outboard files
40 //! exist.
41 //!
42 //! In addition to the consistency checks, it is checked that there are no
43 //! orphaned or unexpected files in the data directory. Also, all entries of
44 //! all tables are dumped at trace level. This is helpful for debugging and
45 //! also ensures that the data can be read.
46 //!
47 //! Note that during validation, a set of all hashes will be kept in memory.
48 //! So to validate exceedingly large stores, the validation process will
49 //! consume a lot of memory.
50 //!
51 //! In addition, validation is a blocking operation that will make the store
52 //! unresponsive for the duration of the validation.
53 pub(super) fn consistency_check(
54 &mut self,
55 db: &redb::Database,
56 repair: bool,
57 progress: BoxedProgressSender<ConsistencyCheckProgress>,
58 ) -> ActorResult<()> {
59 use crate::util::progress::ProgressSender;
60 let mut invalid_entries = BTreeSet::new();
61 macro_rules! send {
62 ($level:expr, $entry:expr, $($arg:tt)*) => {
63 if let Err(_) = progress.blocking_send(ConsistencyCheckProgress::Update { message: format!($($arg)*), level: $level, entry: $entry }) {
64 return Ok(());
65 }
66 };
67 }
68 macro_rules! trace {
69 ($($arg:tt)*) => {
70 send!(ReportLevel::Trace, None, $($arg)*)
71 };
72 }
73 macro_rules! info {
74 ($($arg:tt)*) => {
75 send!(ReportLevel::Info, None, $($arg)*)
76 };
77 }
78 macro_rules! warn {
79 ($($arg:tt)*) => {
80 send!(ReportLevel::Warn, None, $($arg)*)
81 };
82 }
83 macro_rules! entry_warn {
84 ($hash:expr, $($arg:tt)*) => {
85 send!(ReportLevel::Warn, Some($hash), $($arg)*)
86 };
87 }
88 macro_rules! entry_info {
89 ($hash:expr, $($arg:tt)*) => {
90 send!(ReportLevel::Info, Some($hash), $($arg)*)
91 };
92 }
93 macro_rules! error {
94 ($($arg:tt)*) => {
95 send!(ReportLevel::Error, None, $($arg)*)
96 };
97 }
98 macro_rules! entry_error {
99 ($hash:expr, $($arg:tt)*) => {
100 invalid_entries.insert($hash);
101 send!(ReportLevel::Error, Some($hash), $($arg)*)
102 };
103 }
104 let mut delete_after_commit = Default::default();
105 let txn = db.begin_write()?;
106 {
107 let mut tables = Tables::new(&txn, &mut delete_after_commit)?;
108 let blobs = &mut tables.blobs;
109 let inline_data = &mut tables.inline_data;
110 let inline_outboard = &mut tables.inline_outboard;
111 let tags = &mut tables.tags;
112 let mut orphaned_inline_data = BTreeSet::new();
113 let mut orphaned_inline_outboard = BTreeSet::new();
114 let mut orphaned_data = BTreeSet::new();
115 let mut orphaned_outboardard = BTreeSet::new();
116 let mut orphaned_sizes = BTreeSet::new();
117 // first, dump the entire data content at trace level
118 trace!("dumping blobs");
119 match blobs.iter() {
120 Ok(iter) => {
121 for item in iter {
122 match item {
123 Ok((k, v)) => {
124 let hash = k.value();
125 let entry = v.value();
126 trace!("blob {} -> {:?}", hash.to_hex(), entry);
127 }
128 Err(cause) => {
129 error!("failed to access blob item: {}", cause);
130 }
131 }
132 }
133 }
134 Err(cause) => {
135 error!("failed to iterate blobs: {}", cause);
136 }
137 }
138 trace!("dumping inline_data");
139 match inline_data.iter() {
140 Ok(iter) => {
141 for item in iter {
142 match item {
143 Ok((k, v)) => {
144 let hash = k.value();
145 let data = v.value();
146 trace!("inline_data {} -> {:?}", hash.to_hex(), data.len());
147 }
148 Err(cause) => {
149 error!("failed to access inline data item: {}", cause);
150 }
151 }
152 }
153 }
154 Err(cause) => {
155 error!("failed to iterate inline_data: {}", cause);
156 }
157 }
158 trace!("dumping inline_outboard");
159 match inline_outboard.iter() {
160 Ok(iter) => {
161 for item in iter {
162 match item {
163 Ok((k, v)) => {
164 let hash = k.value();
165 let data = v.value();
166 trace!("inline_outboard {} -> {:?}", hash.to_hex(), data.len());
167 }
168 Err(cause) => {
169 error!("failed to access inline outboard item: {}", cause);
170 }
171 }
172 }
173 }
174 Err(cause) => {
175 error!("failed to iterate inline_outboard: {}", cause);
176 }
177 }
178 trace!("dumping tags");
179 match tags.iter() {
180 Ok(iter) => {
181 for item in iter {
182 match item {
183 Ok((k, v)) => {
184 let tag = k.value();
185 let value = v.value();
186 trace!("tags {} -> {:?}", tag, value);
187 }
188 Err(cause) => {
189 error!("failed to access tag item: {}", cause);
190 }
191 }
192 }
193 }
194 Err(cause) => {
195 error!("failed to iterate tags: {}", cause);
196 }
197 }
198
199 // perform consistency check for each entry
200 info!("validating blobs");
201 // set of a all hashes that are referenced by the blobs table
202 let mut entries = BTreeSet::new();
203 match blobs.iter() {
204 Ok(iter) => {
205 for item in iter {
206 let Ok((hash, entry)) = item else {
207 error!("failed to access blob item");
208 continue;
209 };
210 let hash = hash.value();
211 entries.insert(hash);
212 entry_info!(hash, "validating blob");
213 let entry = entry.value();
214 match entry {
215 EntryState::Complete {
216 data_location,
217 outboard_location,
218 } => {
219 let data_size = match data_location {
220 DataLocation::Inline(_) => {
221 let Ok(inline_data) = inline_data.get(hash) else {
222 entry_error!(hash, "inline data can not be accessed");
223 continue;
224 };
225 let Some(inline_data) = inline_data else {
226 entry_error!(hash, "inline data missing");
227 continue;
228 };
229 inline_data.value().len() as u64
230 }
231 DataLocation::Owned(size) => {
232 let path = self.options.path.owned_data_path(&hash);
233 let Ok(metadata) = path.metadata() else {
234 entry_error!(hash, "owned data file does not exist");
235 continue;
236 };
237 if metadata.len() != size {
238 entry_error!(
239 hash,
240 "owned data file size mismatch: {}",
241 path.display()
242 );
243 continue;
244 }
245 size
246 }
247 DataLocation::External(paths, size) => {
248 for path in paths {
249 let Ok(metadata) = path.metadata() else {
250 entry_error!(
251 hash,
252 "external data file does not exist: {}",
253 path.display()
254 );
255 invalid_entries.insert(hash);
256 continue;
257 };
258 if metadata.len() != size {
259 entry_error!(
260 hash,
261 "external data file size mismatch: {}",
262 path.display()
263 );
264 invalid_entries.insert(hash);
265 continue;
266 }
267 }
268 size
269 }
270 };
271 match outboard_location {
272 OutboardLocation::Inline(_) => {
273 let Ok(inline_outboard) = inline_outboard.get(hash) else {
274 entry_error!(
275 hash,
276 "inline outboard can not be accessed"
277 );
278 continue;
279 };
280 let Some(inline_outboard) = inline_outboard else {
281 entry_error!(hash, "inline outboard missing");
282 continue;
283 };
284 let outboard_size = inline_outboard.value().len() as u64;
285 if outboard_size != raw_outboard_size(data_size) {
286 entry_error!(hash, "inline outboard size mismatch");
287 }
288 }
289 OutboardLocation::Owned => {
290 let Ok(metadata) =
291 self.options.path.owned_outboard_path(&hash).metadata()
292 else {
293 entry_error!(
294 hash,
295 "owned outboard file does not exist"
296 );
297 continue;
298 };
299 let outboard_size = metadata.len();
300 if outboard_size != raw_outboard_size(data_size) {
301 entry_error!(hash, "owned outboard size mismatch");
302 }
303 }
304 OutboardLocation::NotNeeded => {
305 if raw_outboard_size(data_size) != 0 {
306 entry_error!(
307 hash,
308 "outboard not needed but data size is not zero"
309 );
310 }
311 }
312 }
313 }
314 EntryState::Partial { .. } => {
315 if !self.options.path.owned_data_path(&hash).exists() {
316 entry_error!(hash, "persistent partial entry has no data");
317 }
318 if !self.options.path.owned_outboard_path(&hash).exists() {
319 entry_error!(hash, "persistent partial entry has no outboard");
320 }
321 }
322 }
323 }
324 }
325 Err(cause) => {
326 error!("failed to iterate blobs: {}", cause);
327 }
328 };
329 if repair {
330 info!("repairing - removing invalid entries found so far");
331 for hash in &invalid_entries {
332 blobs.remove(hash)?;
333 }
334 }
335 info!("checking for orphaned inline data");
336 match inline_data.iter() {
337 Ok(iter) => {
338 for item in iter {
339 let Ok((hash, _)) = item else {
340 error!("failed to access inline data item");
341 continue;
342 };
343 let hash = hash.value();
344 if !entries.contains(&hash) {
345 orphaned_inline_data.insert(hash);
346 entry_error!(hash, "orphaned inline data");
347 }
348 }
349 }
350 Err(cause) => {
351 error!("failed to iterate inline_data: {}", cause);
352 }
353 };
354 info!("checking for orphaned inline outboard data");
355 match inline_outboard.iter() {
356 Ok(iter) => {
357 for item in iter {
358 let Ok((hash, _)) = item else {
359 error!("failed to access inline outboard item");
360 continue;
361 };
362 let hash = hash.value();
363 if !entries.contains(&hash) {
364 orphaned_inline_outboard.insert(hash);
365 entry_error!(hash, "orphaned inline outboard");
366 }
367 }
368 }
369 Err(cause) => {
370 error!("failed to iterate inline_outboard: {}", cause);
371 }
372 };
373 info!("checking for unexpected or orphaned files");
374 for entry in self.options.path.data_path.read_dir()? {
375 let entry = entry?;
376 let path = entry.path();
377 if !path.is_file() {
378 warn!("unexpected entry in data directory: {}", path.display());
379 continue;
380 }
381 match path.extension().and_then(|x| x.to_str()) {
382 Some("data") => match path.file_stem().and_then(|x| x.to_str()) {
383 Some(stem) => {
384 let mut hash = [0u8; 32];
385 let Ok(_) = hex::decode_to_slice(stem, &mut hash) else {
386 warn!("unexpected data file in data directory: {}", path.display());
387 continue;
388 };
389 let hash = Hash::from(hash);
390 if !entries.contains(&hash) {
391 orphaned_data.insert(hash);
392 entry_warn!(hash, "orphaned data file");
393 }
394 }
395 None => {
396 warn!("unexpected data file in data directory: {}", path.display());
397 }
398 },
399 Some("obao4") => match path.file_stem().and_then(|x| x.to_str()) {
400 Some(stem) => {
401 let mut hash = [0u8; 32];
402 let Ok(_) = hex::decode_to_slice(stem, &mut hash) else {
403 warn!(
404 "unexpected outboard file in data directory: {}",
405 path.display()
406 );
407 continue;
408 };
409 let hash = Hash::from(hash);
410 if !entries.contains(&hash) {
411 orphaned_outboardard.insert(hash);
412 entry_warn!(hash, "orphaned outboard file");
413 }
414 }
415 None => {
416 warn!(
417 "unexpected outboard file in data directory: {}",
418 path.display()
419 );
420 }
421 },
422 Some("sizes4") => match path.file_stem().and_then(|x| x.to_str()) {
423 Some(stem) => {
424 let mut hash = [0u8; 32];
425 let Ok(_) = hex::decode_to_slice(stem, &mut hash) else {
426 warn!(
427 "unexpected outboard file in data directory: {}",
428 path.display()
429 );
430 continue;
431 };
432 let hash = Hash::from(hash);
433 if !entries.contains(&hash) {
434 orphaned_sizes.insert(hash);
435 entry_warn!(hash, "orphaned outboard file");
436 }
437 }
438 None => {
439 warn!(
440 "unexpected outboard file in data directory: {}",
441 path.display()
442 );
443 }
444 },
445 _ => {
446 warn!("unexpected file in data directory: {}", path.display());
447 }
448 }
449 }
450 if repair {
451 info!("repairing - removing orphaned files and inline data");
452 for hash in orphaned_inline_data {
453 entry_info!(hash, "deleting orphaned inline data");
454 inline_data.remove(&hash)?;
455 }
456 for hash in orphaned_inline_outboard {
457 entry_info!(hash, "deleting orphaned inline outboard");
458 inline_outboard.remove(&hash)?;
459 }
460 for hash in orphaned_data {
461 tables.delete_after_commit.insert(hash, [BaoFilePart::Data]);
462 }
463 for hash in orphaned_outboardard {
464 tables
465 .delete_after_commit
466 .insert(hash, [BaoFilePart::Outboard]);
467 }
468 for hash in orphaned_sizes {
469 tables
470 .delete_after_commit
471 .insert(hash, [BaoFilePart::Sizes]);
472 }
473 }
474 }
475 txn.commit()?;
476 if repair {
477 info!("repairing - deleting orphaned files");
478 for (hash, part) in delete_after_commit.into_inner() {
479 let path = match part {
480 BaoFilePart::Data => self.options.path.owned_data_path(&hash),
481 BaoFilePart::Outboard => self.options.path.owned_outboard_path(&hash),
482 BaoFilePart::Sizes => self.options.path.owned_sizes_path(&hash),
483 };
484 entry_info!(hash, "deleting orphaned file: {}", path.display());
485 if let Err(cause) = std::fs::remove_file(&path) {
486 entry_error!(hash, "failed to delete orphaned file: {}", cause);
487 }
488 }
489 }
490 Ok(())
491 }
492}