iroh_blobs/store/fs/
validate.rs

1//! Validation of the store's contents.
2use std::collections::BTreeSet;
3
4use redb::ReadableTable;
5
6use super::{
7    raw_outboard_size, tables::Tables, ActorResult, ActorState, DataLocation, EntryState, Hash,
8    OutboardLocation,
9};
10use crate::{
11    store::{fs::tables::BaoFilePart, ConsistencyCheckProgress, ReportLevel},
12    util::progress::BoxedProgressSender,
13};
14
15impl ActorState {
16    //! This performs a full consistency check. Eventually it will also validate
17    //! file content again, but that part is not yet implemented.
18    //!
19    //! Currently the following checks are performed for complete entries:
20    //!
21    //! Check that the data in the entries table is consistent with the data in
22    //! the inline_data and inline_outboard tables.
23    //!
24    //! For every entry where data_location is inline, the inline_data table
25    //! must contain the data. For every entry where
26    //! data_location is not inline, the inline_data table must not contain data.
27    //! Instead, the data must exist as a file in the data directory or be
28    //! referenced to one or many external files.
29    //!
30    //! For every entry where outboard_location is inline, the inline_outboard
31    //! table must contain the outboard. For every entry where outboard_location
32    //! is not inline, the inline_outboard table must not contain data, and the
33    //! outboard must exist as a file in the data directory. Outboards are never
34    //! external.
35    //!
36    //! In addition to these consistency checks, it is checked that the size of
37    //! the outboard is consistent with the size of the data.
38    //!
39    //! For partial entries, it is checked that the data and outboard files
40    //! exist.
41    //!
42    //! In addition to the consistency checks, it is checked that there are no
43    //! orphaned or unexpected files in the data directory. Also, all entries of
44    //! all tables are dumped at trace level. This is helpful for debugging and
45    //! also ensures that the data can be read.
46    //!
47    //! Note that during validation, a set of all hashes will be kept in memory.
48    //! So to validate exceedingly large stores, the validation process will
49    //! consume a lot of memory.
50    //!
51    //! In addition, validation is a blocking operation that will make the store
52    //! unresponsive for the duration of the validation.
53    pub(super) fn consistency_check(
54        &mut self,
55        db: &redb::Database,
56        repair: bool,
57        progress: BoxedProgressSender<ConsistencyCheckProgress>,
58    ) -> ActorResult<()> {
59        use crate::util::progress::ProgressSender;
60        let mut invalid_entries = BTreeSet::new();
61        macro_rules! send {
62            ($level:expr, $entry:expr, $($arg:tt)*) => {
63                if let Err(_) = progress.blocking_send(ConsistencyCheckProgress::Update { message: format!($($arg)*), level: $level, entry: $entry }) {
64                    return Ok(());
65                }
66            };
67        }
68        macro_rules! trace {
69            ($($arg:tt)*) => {
70                send!(ReportLevel::Trace, None, $($arg)*)
71            };
72        }
73        macro_rules! info {
74            ($($arg:tt)*) => {
75                send!(ReportLevel::Info, None, $($arg)*)
76            };
77        }
78        macro_rules! warn {
79            ($($arg:tt)*) => {
80                send!(ReportLevel::Warn, None, $($arg)*)
81            };
82        }
83        macro_rules! entry_warn {
84            ($hash:expr, $($arg:tt)*) => {
85                send!(ReportLevel::Warn, Some($hash), $($arg)*)
86            };
87        }
88        macro_rules! entry_info {
89            ($hash:expr, $($arg:tt)*) => {
90                send!(ReportLevel::Info, Some($hash), $($arg)*)
91            };
92        }
93        macro_rules! error {
94            ($($arg:tt)*) => {
95                send!(ReportLevel::Error, None, $($arg)*)
96            };
97        }
98        macro_rules! entry_error {
99            ($hash:expr, $($arg:tt)*) => {
100                invalid_entries.insert($hash);
101                send!(ReportLevel::Error, Some($hash), $($arg)*)
102            };
103        }
104        let mut delete_after_commit = Default::default();
105        let txn = db.begin_write()?;
106        {
107            let mut tables = Tables::new(&txn, &mut delete_after_commit)?;
108            let blobs = &mut tables.blobs;
109            let inline_data = &mut tables.inline_data;
110            let inline_outboard = &mut tables.inline_outboard;
111            let tags = &mut tables.tags;
112            let mut orphaned_inline_data = BTreeSet::new();
113            let mut orphaned_inline_outboard = BTreeSet::new();
114            let mut orphaned_data = BTreeSet::new();
115            let mut orphaned_outboardard = BTreeSet::new();
116            let mut orphaned_sizes = BTreeSet::new();
117            // first, dump the entire data content at trace level
118            trace!("dumping blobs");
119            match blobs.iter() {
120                Ok(iter) => {
121                    for item in iter {
122                        match item {
123                            Ok((k, v)) => {
124                                let hash = k.value();
125                                let entry = v.value();
126                                trace!("blob {} -> {:?}", hash.to_hex(), entry);
127                            }
128                            Err(cause) => {
129                                error!("failed to access blob item: {}", cause);
130                            }
131                        }
132                    }
133                }
134                Err(cause) => {
135                    error!("failed to iterate blobs: {}", cause);
136                }
137            }
138            trace!("dumping inline_data");
139            match inline_data.iter() {
140                Ok(iter) => {
141                    for item in iter {
142                        match item {
143                            Ok((k, v)) => {
144                                let hash = k.value();
145                                let data = v.value();
146                                trace!("inline_data {} -> {:?}", hash.to_hex(), data.len());
147                            }
148                            Err(cause) => {
149                                error!("failed to access inline data item: {}", cause);
150                            }
151                        }
152                    }
153                }
154                Err(cause) => {
155                    error!("failed to iterate inline_data: {}", cause);
156                }
157            }
158            trace!("dumping inline_outboard");
159            match inline_outboard.iter() {
160                Ok(iter) => {
161                    for item in iter {
162                        match item {
163                            Ok((k, v)) => {
164                                let hash = k.value();
165                                let data = v.value();
166                                trace!("inline_outboard {} -> {:?}", hash.to_hex(), data.len());
167                            }
168                            Err(cause) => {
169                                error!("failed to access inline outboard item: {}", cause);
170                            }
171                        }
172                    }
173                }
174                Err(cause) => {
175                    error!("failed to iterate inline_outboard: {}", cause);
176                }
177            }
178            trace!("dumping tags");
179            match tags.iter() {
180                Ok(iter) => {
181                    for item in iter {
182                        match item {
183                            Ok((k, v)) => {
184                                let tag = k.value();
185                                let value = v.value();
186                                trace!("tags {} -> {:?}", tag, value);
187                            }
188                            Err(cause) => {
189                                error!("failed to access tag item: {}", cause);
190                            }
191                        }
192                    }
193                }
194                Err(cause) => {
195                    error!("failed to iterate tags: {}", cause);
196                }
197            }
198
199            // perform consistency check for each entry
200            info!("validating blobs");
201            // set of a all hashes that are referenced by the blobs table
202            let mut entries = BTreeSet::new();
203            match blobs.iter() {
204                Ok(iter) => {
205                    for item in iter {
206                        let Ok((hash, entry)) = item else {
207                            error!("failed to access blob item");
208                            continue;
209                        };
210                        let hash = hash.value();
211                        entries.insert(hash);
212                        entry_info!(hash, "validating blob");
213                        let entry = entry.value();
214                        match entry {
215                            EntryState::Complete {
216                                data_location,
217                                outboard_location,
218                            } => {
219                                let data_size = match data_location {
220                                    DataLocation::Inline(_) => {
221                                        let Ok(inline_data) = inline_data.get(hash) else {
222                                            entry_error!(hash, "inline data can not be accessed");
223                                            continue;
224                                        };
225                                        let Some(inline_data) = inline_data else {
226                                            entry_error!(hash, "inline data missing");
227                                            continue;
228                                        };
229                                        inline_data.value().len() as u64
230                                    }
231                                    DataLocation::Owned(size) => {
232                                        let path = self.options.path.owned_data_path(&hash);
233                                        let Ok(metadata) = path.metadata() else {
234                                            entry_error!(hash, "owned data file does not exist");
235                                            continue;
236                                        };
237                                        if metadata.len() != size {
238                                            entry_error!(
239                                                hash,
240                                                "owned data file size mismatch: {}",
241                                                path.display()
242                                            );
243                                            continue;
244                                        }
245                                        size
246                                    }
247                                    DataLocation::External(paths, size) => {
248                                        for path in paths {
249                                            let Ok(metadata) = path.metadata() else {
250                                                entry_error!(
251                                                    hash,
252                                                    "external data file does not exist: {}",
253                                                    path.display()
254                                                );
255                                                invalid_entries.insert(hash);
256                                                continue;
257                                            };
258                                            if metadata.len() != size {
259                                                entry_error!(
260                                                    hash,
261                                                    "external data file size mismatch: {}",
262                                                    path.display()
263                                                );
264                                                invalid_entries.insert(hash);
265                                                continue;
266                                            }
267                                        }
268                                        size
269                                    }
270                                };
271                                match outboard_location {
272                                    OutboardLocation::Inline(_) => {
273                                        let Ok(inline_outboard) = inline_outboard.get(hash) else {
274                                            entry_error!(
275                                                hash,
276                                                "inline outboard can not be accessed"
277                                            );
278                                            continue;
279                                        };
280                                        let Some(inline_outboard) = inline_outboard else {
281                                            entry_error!(hash, "inline outboard missing");
282                                            continue;
283                                        };
284                                        let outboard_size = inline_outboard.value().len() as u64;
285                                        if outboard_size != raw_outboard_size(data_size) {
286                                            entry_error!(hash, "inline outboard size mismatch");
287                                        }
288                                    }
289                                    OutboardLocation::Owned => {
290                                        let Ok(metadata) =
291                                            self.options.path.owned_outboard_path(&hash).metadata()
292                                        else {
293                                            entry_error!(
294                                                hash,
295                                                "owned outboard file does not exist"
296                                            );
297                                            continue;
298                                        };
299                                        let outboard_size = metadata.len();
300                                        if outboard_size != raw_outboard_size(data_size) {
301                                            entry_error!(hash, "owned outboard size mismatch");
302                                        }
303                                    }
304                                    OutboardLocation::NotNeeded => {
305                                        if raw_outboard_size(data_size) != 0 {
306                                            entry_error!(
307                                                hash,
308                                                "outboard not needed but data size is not zero"
309                                            );
310                                        }
311                                    }
312                                }
313                            }
314                            EntryState::Partial { .. } => {
315                                if !self.options.path.owned_data_path(&hash).exists() {
316                                    entry_error!(hash, "persistent partial entry has no data");
317                                }
318                                if !self.options.path.owned_outboard_path(&hash).exists() {
319                                    entry_error!(hash, "persistent partial entry has no outboard");
320                                }
321                            }
322                        }
323                    }
324                }
325                Err(cause) => {
326                    error!("failed to iterate blobs: {}", cause);
327                }
328            };
329            if repair {
330                info!("repairing - removing invalid entries found so far");
331                for hash in &invalid_entries {
332                    blobs.remove(hash)?;
333                }
334            }
335            info!("checking for orphaned inline data");
336            match inline_data.iter() {
337                Ok(iter) => {
338                    for item in iter {
339                        let Ok((hash, _)) = item else {
340                            error!("failed to access inline data item");
341                            continue;
342                        };
343                        let hash = hash.value();
344                        if !entries.contains(&hash) {
345                            orphaned_inline_data.insert(hash);
346                            entry_error!(hash, "orphaned inline data");
347                        }
348                    }
349                }
350                Err(cause) => {
351                    error!("failed to iterate inline_data: {}", cause);
352                }
353            };
354            info!("checking for orphaned inline outboard data");
355            match inline_outboard.iter() {
356                Ok(iter) => {
357                    for item in iter {
358                        let Ok((hash, _)) = item else {
359                            error!("failed to access inline outboard item");
360                            continue;
361                        };
362                        let hash = hash.value();
363                        if !entries.contains(&hash) {
364                            orphaned_inline_outboard.insert(hash);
365                            entry_error!(hash, "orphaned inline outboard");
366                        }
367                    }
368                }
369                Err(cause) => {
370                    error!("failed to iterate inline_outboard: {}", cause);
371                }
372            };
373            info!("checking for unexpected or orphaned files");
374            for entry in self.options.path.data_path.read_dir()? {
375                let entry = entry?;
376                let path = entry.path();
377                if !path.is_file() {
378                    warn!("unexpected entry in data directory: {}", path.display());
379                    continue;
380                }
381                match path.extension().and_then(|x| x.to_str()) {
382                    Some("data") => match path.file_stem().and_then(|x| x.to_str()) {
383                        Some(stem) => {
384                            let mut hash = [0u8; 32];
385                            let Ok(_) = hex::decode_to_slice(stem, &mut hash) else {
386                                warn!("unexpected data file in data directory: {}", path.display());
387                                continue;
388                            };
389                            let hash = Hash::from(hash);
390                            if !entries.contains(&hash) {
391                                orphaned_data.insert(hash);
392                                entry_warn!(hash, "orphaned data file");
393                            }
394                        }
395                        None => {
396                            warn!("unexpected data file in data directory: {}", path.display());
397                        }
398                    },
399                    Some("obao4") => match path.file_stem().and_then(|x| x.to_str()) {
400                        Some(stem) => {
401                            let mut hash = [0u8; 32];
402                            let Ok(_) = hex::decode_to_slice(stem, &mut hash) else {
403                                warn!(
404                                    "unexpected outboard file in data directory: {}",
405                                    path.display()
406                                );
407                                continue;
408                            };
409                            let hash = Hash::from(hash);
410                            if !entries.contains(&hash) {
411                                orphaned_outboardard.insert(hash);
412                                entry_warn!(hash, "orphaned outboard file");
413                            }
414                        }
415                        None => {
416                            warn!(
417                                "unexpected outboard file in data directory: {}",
418                                path.display()
419                            );
420                        }
421                    },
422                    Some("sizes4") => match path.file_stem().and_then(|x| x.to_str()) {
423                        Some(stem) => {
424                            let mut hash = [0u8; 32];
425                            let Ok(_) = hex::decode_to_slice(stem, &mut hash) else {
426                                warn!(
427                                    "unexpected outboard file in data directory: {}",
428                                    path.display()
429                                );
430                                continue;
431                            };
432                            let hash = Hash::from(hash);
433                            if !entries.contains(&hash) {
434                                orphaned_sizes.insert(hash);
435                                entry_warn!(hash, "orphaned outboard file");
436                            }
437                        }
438                        None => {
439                            warn!(
440                                "unexpected outboard file in data directory: {}",
441                                path.display()
442                            );
443                        }
444                    },
445                    _ => {
446                        warn!("unexpected file in data directory: {}", path.display());
447                    }
448                }
449            }
450            if repair {
451                info!("repairing - removing orphaned files and inline data");
452                for hash in orphaned_inline_data {
453                    entry_info!(hash, "deleting orphaned inline data");
454                    inline_data.remove(&hash)?;
455                }
456                for hash in orphaned_inline_outboard {
457                    entry_info!(hash, "deleting orphaned inline outboard");
458                    inline_outboard.remove(&hash)?;
459                }
460                for hash in orphaned_data {
461                    tables.delete_after_commit.insert(hash, [BaoFilePart::Data]);
462                }
463                for hash in orphaned_outboardard {
464                    tables
465                        .delete_after_commit
466                        .insert(hash, [BaoFilePart::Outboard]);
467                }
468                for hash in orphaned_sizes {
469                    tables
470                        .delete_after_commit
471                        .insert(hash, [BaoFilePart::Sizes]);
472                }
473            }
474        }
475        txn.commit()?;
476        if repair {
477            info!("repairing - deleting orphaned files");
478            for (hash, part) in delete_after_commit.into_inner() {
479                let path = match part {
480                    BaoFilePart::Data => self.options.path.owned_data_path(&hash),
481                    BaoFilePart::Outboard => self.options.path.owned_outboard_path(&hash),
482                    BaoFilePart::Sizes => self.options.path.owned_sizes_path(&hash),
483                };
484                entry_info!(hash, "deleting orphaned file: {}", path.display());
485                if let Err(cause) = std::fs::remove_file(&path) {
486                    entry_error!(hash, "failed to delete orphaned file: {}", cause);
487                }
488            }
489        }
490        Ok(())
491    }
492}