docufort/integrity.rs
1//! This module contains the integrity check function for a docufort file.
2//!
3//! This will read the file from the beginning to the end, checking the integrity of the file.
4//! It will attempt to correct any errors it finds in the data using any available ECC data.
5
6use std::io::SeekFrom;
7
8use crate::{core::{Block, BlockInputs, BlockState}, read::{read_magic_number, verify_configs}, recovery::{try_read_block, BlockReadSummary}, ComponentTag, CorruptDataSegment, FileLike, ReadWriteError};
9
10
11/// The struct returned when we were able to recover the file.
12///
13/// Includes statistics on the file and the last block state.
14#[derive(Debug)]
15pub struct IntegrityCheckOk{
16 pub last_block_state:Option<BlockState>,
17 ///Number of errors we fixed and wrote back to the file
18 ///Does not indicate number of bytes corrected
19 ///To estimate: ECC_LEN/2 is number of correctable errors per 255 bytes
20 ///So if we did not return Err::Corruption, there was always less than that many errors per 'ecc chunk'.
21 pub errors_corrected: usize,
22 ///Number of bytes of 'Content' (without ECC data counted) in the file.
23 pub data_contents: u64,
24 ///Number of bytes of 'Content' in the compressed form (no ECC counted).
25 pub data_size_on_disk: u64,
26 ///Number of Blocks in file
27 pub num_blocks:usize,
28 ///This is the index up to which we checked
29 ///It may be in the middle of a block
30 pub file_len_checked:u64,
31 ///These are all the content data segments that are not 'as written'
32 ///They can either be corrupted and have no ECC or
33 ///they can be corrupted beyond what ECC can do.
34 pub corrupted_segments: Vec<CorruptDataSegment>,
35 ///Contains the block start position and the time stamp found there
36 pub block_times: Vec<(u64,u64)>
37
38}
39#[derive(Debug)]
40pub enum IntegrityErr{
41 Other(ReadWriteError),
42 ///This only returns if a Component Header (or hash) is corrupted.
43 ///We cannot process the file any farther. We only read Front to Back so the position is all the farther we checked the file.
44 ///The file may still be able to succeed at tail recovery if this corruption is earlier than the second to last block.
45 ///If found in the last block, then a tail recovery would truncate this block.
46 ///Integrity check handles the last block, so if you have this error then somehow part of the file got corrupted, badly.
47 Corruption(u64,ComponentTag), // TODO: Make a hash recovery routine in the unlikely event the hash is corrupt and nothing else is.
48 ///This is really an implementation error, where we find the wrong 'pattern' of headers. This should only occur in testing ideally.
49 InvalidBlockStructure{start_of_bad_component:u64},
50 ///Either the MAGIC_NUMBER, the V1 tag, or the ECC_LEN don't match this compiled program.
51 ///Most likely would happen if you upgraded or have multiple docufort wrappers that use a different ECC_LEN
52 ///You should only open docufort files that were written with the current compiled software.
53 FileConfigMisMatch
54}
55impl From<std::io::Error> for IntegrityErr{
56 fn from(value: std::io::Error) -> Self {
57 Self::Other(value.into())
58 }
59}
60impl From<ReadWriteError> for IntegrityErr{
61 fn from(value: ReadWriteError) -> Self {
62 Self::Other(value)
63 }
64}
65impl std::fmt::Display for IntegrityErr {
66 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
67 match self {
68 IntegrityErr::Other(err) => write!(f, "Other error: {}", err),
69 IntegrityErr::Corruption(pos, tag) => write!(f, "Corruption detected at position {} for component {:?}", pos, tag),
70 IntegrityErr::InvalidBlockStructure { start_of_bad_component } =>
71 write!(f, "Invalid block structure detected at position {}", start_of_bad_component),
72 IntegrityErr::FileConfigMisMatch => write!(f, "File configuration mismatch"),
73 }
74 }
75}
76impl std::error::Error for IntegrityErr {
77 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
78 match self {
79 IntegrityErr::Other(err) => Some(err),
80 _ => None,
81 }
82 }
83}
84/// This function will read a docufort file and check the integrity of the file.
85/// It will attempt to correct any errors it finds in the data using any available ECC data.
86/// If it finds a corruption that it cannot correct, it will return an error.
87/// If it finds a block that is not closed, it will return Ok, and the file_len_checked will be the position of the last complete segment.
88/// # Arguments
89/// * `file_path` - The path to the docufort file.
90/// # Returns
91/// A Result containing the summary of the check.
92/// ## Ok
93/// Contains the summary of the check.
94///
95/// Note: May return Ok if content is corrupted beyond ECC repair (or no ECC enabled). Check the `corrupted_segments` for details.
96/// This is because we can still read past the corruption and find the next block, and recover other data.
97/// This is not fatal to docufort, but it is a problem for the user's data.
98/// ## Err
99/// - File is not a docufort file
100/// - File is not written with the same configuration as this compiled program (ECC_LEN or version mismatch)
101/// - A Block Component is corrupted beyond repair, preventing further reading of the file
102/// - The block structure is invalid
103/// - An IO error occurred
104pub fn integrity_check_file<RW:FileLike, B: BlockInputs>(file: &mut RW) -> Result<IntegrityCheckOk, IntegrityErr> {
105 let mut file_len = file.len()?;
106 let mut errors_corrected = 0;
107 let mut data_contents = 0;
108 let mut data_size_on_disk = 0;
109 let mut num_blocks = 0;
110 let mut corrupted_segments = Vec::new();
111 let mut block_times = Vec::new();
112
113 if !verify_configs(file)?{return Err(IntegrityErr::FileConfigMisMatch)}
114 let mut last_state= None;
115 loop {
116 let cur_pos = file.seek(SeekFrom::Current(0))?;
117 let res = read_magic_number(file, true);
118 let after_read_pos = file.seek(SeekFrom::Current(0))?;
119 if cur_pos > file_len || after_read_pos > file_len || res.is_err() {//we read too far from when the fn was originally called.
120 //We set the file_len to reflect how far we have integrity checked
121 file_len = if cur_pos>file_len{file_len}else{cur_pos};
122 break;
123 }
124 errors_corrected += res?;
125 let bs = try_read_block::<_, B>(file, true,true)?;//if we get an error now, there is some non-integrity problem
126 last_state = Some(bs);
127 match last_state.as_ref().unwrap() {
128 BlockState::Closed(BlockReadSummary { errors_corrected: e, block, corrupted_content_blocks, block_start, block_start_timestamp, .. }) => {
129 errors_corrected += e;
130 corrupted_segments.extend_from_slice(corrupted_content_blocks.as_slice());
131 match block {
132 Block::A { middle, .. } => {
133 if let Some(decomp_len) = middle.compressed {
134 data_contents += decomp_len as u64;
135 data_size_on_disk += middle.data_len as u64;
136 }else{
137 data_contents += middle.data_len as u64;
138 data_size_on_disk += middle.data_len as u64;
139 }
140 },
141 Block::B { middle, .. } => middle.iter().for_each(|(_,c)|{
142 if let Some(decomp_len) = c.compressed {
143 data_contents += decomp_len as u64;
144 data_size_on_disk += c.data_len as u64;
145 }else{
146 data_contents += c.data_len as u64;
147 data_size_on_disk += c.data_len as u64;
148 }
149 }),
150 }
151 num_blocks += 1;
152 block_times.push((*block_start,*block_start_timestamp))
153 // let BlockEnd { hash, .. } = block.clone().take_end();
154 // assert_eq!(&hash_as_read[..],hash.hash());//impl assertion since we are error correcting every block
155 },
156 BlockState::OpenABlock { truncate_at } |
157 BlockState::OpenBBlock { truncate_at, .. } => {
158 //We set the file_len to reflect how far we have integrity checked
159 file_len = *truncate_at;
160 break;
161 },
162 BlockState::IncompleteStartHeader { truncate_at } => {
163 //We set the file_len to reflect how far we have integrity checked
164 file_len = *truncate_at;
165 break;
166 },
167 BlockState::InvalidBlockStructure { end_of_last_good_component, .. } =>{
168 return Err(IntegrityErr::InvalidBlockStructure { start_of_bad_component: *end_of_last_good_component})
169 }
170 BlockState::ProbablyNotStartHeader { start_from } => {
171 return Err(IntegrityErr::Corruption(*start_from,ComponentTag::StartHeader))
172 }
173 BlockState::DataCorruption { component_start, component_tag,.. } => {
174 return Err(IntegrityErr::Corruption(*component_start,*component_tag))
175 },
176 }
177 }
178 Ok(IntegrityCheckOk {
179 last_block_state: last_state,
180 errors_corrected,
181 data_contents,
182 data_size_on_disk,
183 num_blocks,
184 file_len_checked: file_len,
185 corrupted_segments,
186 block_times
187 })
188}