1use std::marker::PhantomData;
3use std::{
4 collections::HashSet,
5 ffi::OsStr,
6 fs,
7 io::BufRead,
8 path::{Path, PathBuf},
9};
10use thiserror::Error;
11
12pub use walkdir;
14
15fn read_file_to_digest_input(path: &Path, input: &mut impl digest::Digest) -> std::io::Result<()> {
17 let file = fs::File::open(path)?;
18
19 let mut reader = std::io::BufReader::new(file);
20
21 loop {
22 let length = {
23 let buffer = reader.fill_buf()?;
24 input.update(buffer);
25 buffer.len()
26 };
27 if length == 0 {
28 break;
29 }
30 reader.consume(length);
31 }
32
33 Ok(())
34}
35
36#[derive(Debug, Error)]
37pub enum DigestError {
38 #[error("could not convert OsStr string to utf8")]
39 OsStrConversionError,
40 #[error("io Error: {}", _0)]
41 IoError(std::io::Error),
42 #[error("walkdir Error: {}", _0)]
43 WalkdirError(walkdir::Error),
44 #[error("an entry that was supposed to be a file, contains sub-entries")]
45 FileWithSubentriesError,
46 #[error("file not supported: {}", _0)]
47 FileNotSupported(String),
48}
49
50impl From<std::io::Error> for DigestError {
51 fn from(err: std::io::Error) -> Self {
52 DigestError::IoError(err)
53 }
54}
55
56impl From<walkdir::Error> for DigestError {
57 fn from(err: walkdir::Error) -> Self {
58 DigestError::WalkdirError(err)
59 }
60}
61
62pub struct AdditionalDataWriter<'a, D> {
64 used: bool,
65 hasher: &'a mut D,
66}
67
68impl<'a, D> AdditionalDataWriter<'a, D>
69where
70 D: digest::Digest,
71{
72 pub fn input(&mut self, bytes: &[u8]) {
73 if !bytes.is_empty() {
74 if !self.used {
75 self.hasher.update([0]);
76 self.used = true;
77 }
78 self.hasher.update(bytes);
79 }
80 }
81}
82
83pub struct RecursiveDigestBuilder<Digest, FFilter, FAData> {
84 filter: FFilter,
85 additional_data: FAData,
86 digest: std::marker::PhantomData<Digest>,
87}
88
89impl<D, FFilter, FAData> RecursiveDigestBuilder<D, FFilter, FAData>
90where
91 FFilter: Fn(&walkdir::DirEntry) -> bool,
92 FAData: Fn(&walkdir::DirEntry, &mut AdditionalDataWriter<'_, D>) -> Result<(), DigestError>,
93{
94 pub fn filter<F: Fn(&walkdir::DirEntry) -> bool>(
96 self,
97 filter: F,
98 ) -> RecursiveDigestBuilder<D, F, FAData> {
99 RecursiveDigestBuilder {
100 filter,
101 additional_data: self.additional_data,
102 digest: self.digest,
103 }
104 }
105
106 pub fn additional_data<
107 F: Fn(&walkdir::DirEntry, &mut AdditionalDataWriter<'_, D>) -> Result<(), DigestError>,
108 >(
109 self,
110 f: F,
111 ) -> RecursiveDigestBuilder<D, FFilter, F> {
112 RecursiveDigestBuilder {
113 filter: self.filter,
114 additional_data: f,
115 digest: self.digest,
116 }
117 }
118
119 pub fn build(self) -> RecursiveDigest<D, FFilter, FAData> {
120 RecursiveDigest {
121 digest: self.digest,
122 filter: self.filter,
123 additional_data: self.additional_data,
124 }
125 }
126}
127
128pub struct RecursiveDigest<Digest, FFilter, FAData> {
132 digest: PhantomData<Digest>,
133 filter: FFilter,
134 additional_data: FAData,
135}
136
137impl<D>
138 RecursiveDigest<
139 D,
140 Box<dyn Fn(&walkdir::DirEntry) -> bool>,
141 Box<
142 dyn Fn(&walkdir::DirEntry, &mut AdditionalDataWriter<'_, D>) -> Result<(), DigestError>,
143 >,
144 >
145where
146 D: digest::Digest + digest::FixedOutput,
147{
148 #[must_use]
150 pub fn new() -> RecursiveDigestBuilder<
151 D,
152 Box<dyn Fn(&walkdir::DirEntry) -> bool>,
153 Box<
154 dyn Fn(&walkdir::DirEntry, &mut AdditionalDataWriter<'_, D>) -> Result<(), DigestError>,
155 >,
156 > {
157 RecursiveDigestBuilder {
158 filter: Box::new(|_| true),
159 additional_data: Box::new(|_, _| Ok(())),
160 digest: PhantomData,
161 }
162 }
163}
164
165#[cfg(unix)]
166fn hash_osstr<D: digest::Digest>(digest: &mut D, s: &OsStr) {
167 use std::os::unix::ffi::OsStrExt;
168 digest.update(s.as_bytes());
169}
170
171#[cfg(not(unix))]
172fn hash_osstr<D: digest::Digest>(digest: &mut D, s: &OsStr) {
173 digest.update(s.to_string_lossy().as_bytes());
174}
175
176impl<D, FFilter, FAData> RecursiveDigest<D, FFilter, FAData>
177where
178 FFilter: Fn(&walkdir::DirEntry) -> bool,
179 FAData: Fn(&walkdir::DirEntry, &mut AdditionalDataWriter<'_, D>) -> Result<(), DigestError>,
180 D: digest::Digest + digest::FixedOutput,
181{
182 pub fn get_digest_of(&self, root_path: &Path) -> Result<Vec<u8>, DigestError> {
183 let mut hashers = vec![];
184
185 fn flush_up_one_level<D: digest::Digest + digest::FixedOutput>(hashers: &mut Vec<D>) {
187 let hasher = hashers.pop().expect("must not be empty yet");
188 let h2 = hashers
189 .last_mut()
190 .expect("must not happen");
191 <D as digest::Digest>::update(h2, hasher.finalize_fixed().as_slice());
192 }
193
194 let base_depth = root_path.components().count();
195
196 let mut first = true;
197 for entry in walkdir::WalkDir::new(root_path)
198 .follow_links(false)
199 .sort_by(|a, b| a.path().cmp(b.path()))
200 .into_iter()
201 .filter_entry(|entry| {
202 if first {
204 debug_assert_eq!(root_path, entry.path());
205 first = false;
206 return true;
207 }
208
209 (self.filter)(entry)
210 })
211 {
212 let entry = entry?;
213 let entry_depth = entry.path().components().count();
214
215 debug_assert!(base_depth <= entry_depth);
216 let depth = entry_depth - base_depth;
217 let hasher_size_required = depth + 1;
218
219 while hasher_size_required <= hashers.len() {
223 flush_up_one_level(&mut hashers);
224 }
225 hashers.push(D::new());
226
227 debug_assert_eq!(hashers.len(), hasher_size_required);
228
229 let file_type = entry.file_type();
230
231 if 0 < depth {
234 let hasher = hashers.get_mut(depth - 1).expect("must not happen");
235
236 let mut name_hasher = D::new();
237 hash_osstr(
239 &mut name_hasher,
240 entry.path().file_name().expect("must have a file_name"),
241 );
242 (self.additional_data)(
244 &entry,
245 &mut AdditionalDataWriter {
246 hasher,
247 used: false,
248 },
249 )?;
250 <D as digest::Digest>::update(hasher, name_hasher.finalize_fixed().as_slice());
251 }
252
253 if file_type.is_file() {
255 self.read_content_of_file(
256 entry.path(),
257 hashers.last_mut().expect("must not happen"),
258 )?;
259 } else if file_type.is_symlink() {
260 self.read_content_of_symlink(
261 entry.path(),
262 hashers.last_mut().expect("must not happen"),
263 )?;
264 } else if file_type.is_dir() {
265 let hasher = hashers.last_mut().expect("must not happen");
266 <D as digest::Digest>::update(hasher, b"D");
267 } else {
268 return Err(DigestError::FileNotSupported(
269 entry.path().display().to_string(),
270 ));
271 }
272 }
273
274 loop {
275 if hashers.len() == 1 {
276 return Ok(hashers
277 .pop()
278 .expect("must not fail")
279 .finalize_fixed()
280 .to_vec());
281 }
282 flush_up_one_level(&mut hashers);
283 }
284 }
285
286 fn read_content_of_file(
287 &self,
288 full_path: &Path,
289 parent_hasher: &mut D,
290 ) -> Result<(), DigestError> {
291 <D as digest::Digest>::update(parent_hasher, b"F");
292 read_file_to_digest_input(full_path, parent_hasher)?;
293 Ok(())
294 }
295
296 fn read_content_of_symlink(
297 &self,
298 full_path: &Path,
299 parent_hasher: &mut D,
300 ) -> Result<(), DigestError> {
301 <D as digest::Digest>::update(parent_hasher, b"L");
302 <D as digest::Digest>::update(parent_hasher,
303 full_path
304 .read_link()?
305 .to_str()
306 .ok_or(DigestError::OsStrConversionError)?
307 .as_bytes(),
308 );
309 Ok(())
310 }
311}
312
313#[deprecated]
314pub fn get_recursive_digest_for_paths<D: digest::Digest + digest::FixedOutput, H>(
315 root_path: &Path,
316 paths: HashSet<PathBuf, H>,
317) -> Result<Vec<u8>, DigestError>
318where
319 H: std::hash::BuildHasher,
320{
321 let h = RecursiveDigest::<D, _, _>::new()
322 .filter(|entry| {
323 let rel_path = entry
324 .path()
325 .strip_prefix(root_path)
326 .expect("must be prefix");
327 paths.contains(rel_path)
328 })
329 .build();
330
331 h.get_digest_of(root_path)
332}
333
334#[deprecated]
335pub fn get_recursive_digest_for_dir<
336 Digest: digest::Digest + digest::FixedOutput,
337 H: std::hash::BuildHasher,
338>(
339 root_path: &Path,
340 rel_path_ignore_list: &HashSet<PathBuf, H>,
341) -> Result<Vec<u8>, DigestError> {
342 let h = RecursiveDigest::<Digest, _, _>::new()
343 .filter(|entry| {
344 let rel_path = entry
345 .path()
346 .strip_prefix(root_path)
347 .expect("must be prefix");
348 !rel_path_ignore_list.contains(rel_path)
349 })
350 .build();
351
352 h.get_digest_of(root_path)
353}