1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
use sha2::digest::{self, OutputSizeUser, FixedOutputReset, FixedOutput, Reset, Update, Digest, HashMarker};
use sha2::digest::{generic_array::typenum::Unsigned, crypto_common::BlockSizeUser};
use sha2::{Sha256, Sha512};
use std::{io::Write, mem};

use crate::config::*;

static ZEROES: [u8; 128] = [0u8; 128];  // sort of arbitrary power of two >= hash input block sizes

/// Trait for the inner hash algorithms we support (currently implemented for [`Sha256`] and [`Sha512`]).
/// 
/// It adds some information we need, some useful functions, and declares all the trait bounds we need
/// so we have them in one place.
pub trait InnerHash: Digest + BlockSizeUser + Clone + Default {
    /// The value of [`InnerHashAlgorithm`] that corresponds to this hash algorithm.
    const INNER_HASH_ALGORITHM: InnerHashAlgorithm;

    /// Update the hash state with given data, padded with zero bytes to the given size.
    /// This will panic if `data.len() > padded_size`.
    fn update_padded(&mut self, data: &[u8], padded_size: usize) {
        self.update(data);
        self.update_zeroes(padded_size.checked_sub(data.len()).unwrap());
    }

    /// Update the hash state with the given amount of zero bytes
    fn update_zeroes(&mut self, amount: usize) {
        let (quotient, remainder) = (amount / ZEROES.len(), amount % ZEROES.len());
        if remainder != 0 { self.update(&ZEROES[..remainder]); }
        for _ in 0..quotient { self.update(&ZEROES); }
    }
}

impl InnerHash for Sha256 {
    const INNER_HASH_ALGORITHM: InnerHashAlgorithm = InnerHashAlgorithm::Sha256;
}

impl InnerHash for Sha512 {
    const INNER_HASH_ALGORITHM: InnerHashAlgorithm = InnerHashAlgorithm::Sha512;
}

/// Logically this represents a fixed-size block of data to be hashed (padded with zeroes if needed.)
/// It actually remembers only the hash state and how many more bytes are needed, not the data itself.
/// But that's an implementation detail.
#[derive(Clone)]
struct FixedSizeBlock<D> where D: InnerHash {
    inner: D,
    remaining: usize,
}

impl<D> FixedSizeBlock<D> where D: InnerHash {
    fn new<S: AsRef<[u8]> + Clone + Default>(config: &FsVerityConfig<D, S>) -> Self {
        Self { inner: config.salted_digest(), remaining: config.block_size }
    }

    /// Appends data to block, panics if it doesn't fit.
    fn append(&mut self, data: &[u8]) {
        self.inner.update(data);
        self.remaining = self.remaining.checked_sub(data.len()).unwrap();
    }

    /// Fills the remaining space in the block with zero bytes.
    fn fill_to_end(&mut self) {
        self.inner.update_zeroes(self.remaining);
        self.remaining = 0;
    }

    /// Appends as much as possible to the block, returning the data that wouldn't fit.
    fn overflowing_append<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
        let (a, b) = data.split_at(self.remaining.min(data.len()));
        self.append(a);
        b
    }

    // Returns the final hash of the block, consuming it.
    fn finalize_into(mut self, dest: &mut digest::Output<D>) {
        self.fill_to_end();
        self.inner.finalize_into(dest);
    }

    /// Return the final hash of the block, and then reset its state to a copy of the given block.
    fn finalize_into_and_reset<S: AsRef<[u8]> + Clone + Default>(&mut self, dest: &mut digest::Output<D>, config: &FsVerityConfig<D, S>) {
        self.fill_to_end();

        let Self {inner, ..} = mem::replace(self, Self::new(config));

        inner.finalize_into(dest);
    }
}

/// Split out so we can pass it to FixedBlock::finalize_into_and_reset while self.levels is borrowed mutably.
#[derive(Clone)]
struct FsVerityConfig<D=Sha256, S=[u8; 0]> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {
    block_size: usize,
    /// We have to keep the actual salt around (not just its digest) as it is needed for the final hash operation.
    salt: S,
    salted_digest: D,
}

impl<D, S> FsVerityConfig<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {
    fn new(block_size: usize, salt: S) -> Self {
        // TODO error instead of panic?
        assert!(salt.as_ref().len() <= MAX_SALT_SIZE);
        assert!(block_size.is_power_of_two());
        assert!(block_size >= D::OutputSize::USIZE * 2);
        assert!(D::OutputSize::USIZE <= MAX_DIGEST_SIZE);

        let mut salted_digest = <D as digest::Digest>::new();
        // in practice this will run either 0 or 1 iterations, due to low MAX_SALT_SIZE
        for chunk in salt.as_ref().chunks(D::BlockSize::USIZE) {
            salted_digest.update_padded(chunk, D::BlockSize::USIZE);
        }

        Self {
            block_size,
            salt,
            salted_digest,
        }
    }

    /// Returns an instance of the hash algorithm which has been fed the given salt,
    /// zero-padded to a multiple of the hash algorithm's input block size.
    fn salted_digest(&self) -> D {
        self.salted_digest.clone()
    }

    fn inner_hash_algorithm(&self) -> InnerHashAlgorithm {
        D::INNER_HASH_ALGORITHM
    }
}

/// Calculates an fs-verity measurement over the input data.
#[derive(Clone)]
pub struct FsVerityDigest<D=Sha256, S=[u8; 0]> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {
    /// The parameters of the verity hash
    config: FsVerityConfig<D, S>,
    /// The currently relevant hierarchy of blocks in the Merkle tree.
    levels: Vec<FixedSizeBlock<D>>,
}

impl<D> FsVerityDigest<D> where D: InnerHash {

    /// Creates a new instance of `FsVerityDigest` with an empty salt.
    ///
    /// This is probably what you want.
    pub fn new() -> Self {
        Self::new_with_salt(Default::default())
    }
}

impl<D, S> FsVerityDigest<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {

    /// Creates a new instance of `FsVerityDigest` with the given salt. The salt will be mixed
    /// into every internal hash calculation.
    ///
    /// Note that the current Linux kernel does not allow you to read back the salt used for a
    /// particular verity-protected file, which may be a problem depending on your use case.
    ///
    /// This will panic if the salt is longer than [`MAX_SALT_SIZE`] bytes.
    pub fn new_with_salt(salt: S) -> Self {
        Self::new_with_salt_and_block_size(salt, DEFAULT_BLOCK_SIZE)
    }

    /// Creates a new instance of `FsVerityDigest` with the given salt and a custom block size.
    ///
    /// This will panic if the salt is longer than [`MAX_SALT_SIZE`] bytes.
    ///
    /// If you want to be compatible with the Linux kernel implementation it is *not* a good idea
    /// to change the `block_size`, as the kernel currently requires it to be equal to the system
    /// page size, which is 4096 on most architectures. Some modern 64 bit ARM systems [have a
    /// 64kB page size](https://www.kernel.org/doc/Documentation/arm64/memory.txt) though.
    ///
    /// The block size must be a power of two, and it must be at least twice the size of the
    /// digests produced by the inner hash algorithm. This code will panic otherwise.
    pub fn new_with_salt_and_block_size(salt: S, block_size: usize) -> Self {

        let config = FsVerityConfig::new(block_size, salt);

        Self {
            config,
            levels: vec![],
        }
    }

    /// Returns the [`InnerHashAlgorithm`] value corresponding to the used inner hash algorithm.
    pub fn inner_hash_algorithm(&self) -> InnerHashAlgorithm {
        self.config.inner_hash_algorithm()
    }
}

impl<D, S> Default for FsVerityDigest<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {
    fn default() -> Self { Self::new_with_salt(Default::default()) }
}

impl<D, S> Update for FsVerityDigest<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {

    fn update(&mut self, data: &[u8]) {
        // self.levels represents the hierarchy of currently-relevant Merkle tree blocks.
        // level 0 is filled with input data. when the block at level n fills up, the hash of its
        // contents is appended to the block at level n + 1, and it is reset to an empty state.
        // this process can repeat if that causes the next level to fill up and so on.
        //
        // we do not actually keep the data written into each block, only the hash state.
        //
        // invariants:
        // - level 0 is (once it's created) never empty. it *may* be completely full.
        // - levels 1..n are never full, they always have room for one more hash. they *may* be empty.
        // - overflow is never larger than self.block_size
        //
        // the reason for the asymmetry between flushing of level[0] and the others is that it makes
        // flushing the final state (at the end of file) a lot simpler, because it guarantees that
        // each level will produce exactly one more digest for the next level during the final flush.
        // things could be a bit simpler if we always got input data in multiples of block_size and
        // if we knew the amount of data ahead of time so we could special-case the last block.
        for chunk in data.as_ref().chunks(self.config.block_size) {

            // keep moving up the hierarchy as long as dealing with the current level produces data
            // that needs to be appended to the next level.
            let mut keep_space_for_one_digest = false;
            let mut last_digest: digest::Output<Self>;
            let mut overflow = chunk;  // input data is treated as overflow into level[0]
            for level in self.levels.iter_mut() {

                // due to multiple reasons, the overflowing_append call will only ever split the data
                // in overflow across two blocks when writing input data (into level 0.) splitting a
                // digest across two blocks would be incorrect, so it is good that this never happens.
                // (the first reason is that valid block sizes and currently known digest sizes are all
                // powers of two, so the block size is always an exact multiple of the digest size.
                // the second reason is that (as mentioned) we always make sure there is room for one
                // more full digest.)
                overflow = level.overflowing_append(overflow);
                if keep_space_for_one_digest {
                    // done if there is enough space left in this level for one full digest
                    if level.remaining >= D::OutputSize::USIZE {
                        assert!(overflow.len() == 0);  // if remaining > 0 there can't be overflow
                        break;
                    }
                } else {
                    // done if there was no overflow, even if the block is now totally full
                    if overflow.len() == 0 { break; }
                }

                // can't write directly into last_digest because overflow is (sometimes) a
                // reference to last_digest, so we have to wait until we're done with overflow.
                let mut tmp: digest::Output<Self> = Default::default();
                level.finalize_into_and_reset(&mut tmp, &self.config);
                level.append(overflow);
                last_digest = tmp;

                overflow = &last_digest;

                keep_space_for_one_digest = true;  // only false for level[0]
            }

            // if there is still overflow, add a new top level to the Merkle tree
            if overflow.len() != 0 {

                // note that this is very unlikely, with default settings you will have to write
                // more bytes than can be counted with an u64 before hitting this.
                assert!(self.levels.len() < MAX_LEVELS);

                let mut level = FixedSizeBlock::new(&self.config);
                level.append(overflow);
                self.levels.push(level);
            }
        }
    }
}

impl<D, S> OutputSizeUser for FsVerityDigest<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {
    type OutputSize = D::OutputSize;
}

impl<D, S> FixedOutput for FsVerityDigest<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {

    fn finalize_into(mut self, out: &mut digest::Output<Self>) {

        // a block is block_size bytes; a digest is D::OutputSize::USIZE bytes.
        // dividing them gives the "compression factor", meaning how many times smaller the
        // representation of the data becomes each time it moves up a level in the tree.
        // this means that for every level we go up, each byte in that level represents
        // this many times more input bytes than the previous level.
        // we will use this to recover the total number of bytes written from the number
        // of bytes written to each level. this is needed to calculate the final digest.
        // we could also have tracked the number of bytes written directly of course, but
        // besides showing off I guess this is a good consistency check.
        let compression_factor = self.config.block_size / D::OutputSize::USIZE;
        let mut total_size: usize = 0;
        let mut scale: usize = 1;  // at level[0], each byte represents 1 input byte

        // flush all levels, and calculate the hash of the top level. in the docs for fs_verity
        // this is called the "root hash". zero length files are defined to have a root hash of
        // all zeroes, and we can comply with this easily by initializing last_digest to zero.
        // the root hash is ambiguous by itself, since it is simply a hash of block_size bytes
        // of data, and that data could have been either file data or digests of other blocks.
        // you always need the file size as well to properly interpret the root hash.
        // this means there is no additional risk of ambiguity if a block is ever discovered
        // which happens to hash to all zeros.
        let mut last_digest: digest::Output<Self> = Default::default();
        let mut overflow: &[u8] = &[];
        for mut level in self.levels.drain(..) {
            total_size += scale * (self.config.block_size - level.remaining);
            level.append(overflow);
            level.finalize_into(&mut last_digest);
            overflow = &last_digest;
            scale *= compression_factor;
        }

        // the root hash, file size, hash algorithm, and salt are combined into a structure
        // called a 'verity descriptor'. the (salted) hash of this data is the final result,
        // and it is called a 'verity measurement'.
        // https://www.kernel.org/doc/html/latest/filesystems/fsverity.html#fs-verity-descriptor

        let mut descriptor: D = self.config.salted_digest();
        descriptor.update(&[1]);
        descriptor.update(&[self.config.inner_hash_algorithm() as u8]);
        descriptor.update(&[self.config.block_size.trailing_zeros() as u8]);
        descriptor.update(&[self.config.salt.as_ref().len() as u8]);
        descriptor.update(&[0; 4]);
        descriptor.update(&(total_size as u64).to_le_bytes());
        descriptor.update_padded(&last_digest, MAX_DIGEST_SIZE);
        descriptor.update_padded(self.config.salt.as_ref(), MAX_SALT_SIZE);
        descriptor.update_zeroes(144);

        descriptor.finalize_into(out);
    }


}

/// NOTE: This reports the base hash function's input block size, *not* the Merkle tree block size you
/// specify when creating the `FsVerityDigest`!
///
/// While this may be confusing, it is probably more faithful to the purpose of the `BlockInput` trait.
/// We don't need to buffer an entire Merkle tree block in memory; data can be efficiently processed
/// in chunks of the base hash's input block size.
impl<D, S> BlockSizeUser for FsVerityDigest<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {
    /// Equal to the inner hash algorithm's block size.
    type BlockSize = D::BlockSize;
}

/// Resets to a blank state, but with the same Merkle tree block size and salt
impl<D, S> Reset for FsVerityDigest<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {
    fn reset(&mut self) {
        // Pretty basic implementation but good enough
        *self = Self::new_with_salt_and_block_size(self.config.salt.clone(), self.config.block_size);
    }
}

impl<D, S> FixedOutputReset for FsVerityDigest<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {
     fn finalize_into_reset(&mut self, out: &mut digest::Output<Self>) {
        // Pretty basic implementation but good enough
        let new = Self::new_with_salt_and_block_size(self.config.salt.clone(), self.config.block_size);
        let old = mem::replace(self, new);
        FixedOutput::finalize_into(old, out);
     }
}

impl<D, S> Write for FsVerityDigest<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {
    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
        Update::update(self, buf);
        Ok(buf.len())
    }

    fn flush(&mut self) -> std::io::Result<()> {
        Ok(())
    }
}

impl<D, S> HashMarker for FsVerityDigest<D, S> where D: InnerHash, S: AsRef<[u8]> + Clone + Default {

}

/// Alias for `FsVerityDigest<Sha256>`
pub type FsVeritySha256<S> = FsVerityDigest<Sha256, S>;

/// Alias for `FsVerityDigest<Sha512>`
pub type FsVeritySha512<S> = FsVerityDigest<Sha512, S>;

/// For trait objects of [`FsVerityDigest`], when the inner hash is not statically known
pub trait DynFsVerityDigest: sha2::digest::DynDigest + Write {
    fn inner_hash_algorithm(&self) -> InnerHashAlgorithm;
}
impl<D: InnerHash + 'static, S: AsRef<[u8]> + Clone + Default + 'static> DynFsVerityDigest for FsVerityDigest<D, S> {
    fn inner_hash_algorithm(&self) -> InnerHashAlgorithm {
        self.config.inner_hash_algorithm()
    }
}

/// Like [`FsVerityDigest::new`], but you can choose the hash algorithm at runtime.
pub fn new_dyn(inner_hash: InnerHashAlgorithm) -> Box<dyn DynFsVerityDigest> {
    match inner_hash {
        InnerHashAlgorithm::Sha256 => { Box::new(FsVeritySha256::new()) }
        InnerHashAlgorithm::Sha512 => { Box::new(FsVeritySha512::new()) }
    }
}

/// Like [`FsVerityDigest::new_with_salt`], but you can choose the hash algorithm at runtime.
///
/// Please check the linked function for additional notes about specifying a salt.
pub fn new_dyn_with_salt<S: AsRef<[u8]> + Clone + Default + 'static>(inner_hash: InnerHashAlgorithm, salt: S) -> Box<dyn DynFsVerityDigest> {
    match inner_hash {
        InnerHashAlgorithm::Sha256 => { Box::new(FsVeritySha256::new_with_salt(salt)) }
        InnerHashAlgorithm::Sha512 => { Box::new(FsVeritySha512::new_with_salt(salt)) }
    }
}

/// Like [`FsVerityDigest::new_with_salt_and_block_size`], but you can choose the hash algorithm at runtime.
///
/// Please check the linked function for additional notes about specifying a salt and block size.
pub fn new_dyn_with_salt_and_block_size<S: AsRef<[u8]> + Clone + Default + 'static>(inner_hash: InnerHashAlgorithm, salt: S, block_size: usize) -> Box<dyn DynFsVerityDigest> {
    match inner_hash {
        InnerHashAlgorithm::Sha256 => { Box::new(FsVeritySha256::new_with_salt_and_block_size(salt, block_size)) }
        InnerHashAlgorithm::Sha512 => { Box::new(FsVeritySha512::new_with_salt_and_block_size(salt, block_size)) }
    }
}

#[cfg(test)]
mod tests {
    use std::io::BufReader;
    use std::fs::File;
    use crate::InnerHashAlgorithm;

    use super::new_dyn;

    #[test]
    fn test_testfiles() {

        // 'longfile' takes a while in debug mode, about 20 seconds for me.
        // in release mode it takes about a second.
        // sha256:e228078ebe9c4f7fe0c5d6a76fb2e317f5ea8bdfb227d7741e5c57cff739b5fa testfiles/longfile
        let testfiles = "
        sha256:3d248ca542a24fc62d1c43b916eae5016878e2533c88238480b26128a1f1af95 testfiles/empty
        sha256:f5c2b9ded1595acfe8a996795264d488dd6140531f6a01f8f8086a83fd835935 testfiles/hashblock_0_0
        sha256:5c00a54bd1d8341d7bbad060ff1b8e88ed2646d7bb38db6e752cd1cff66c0a78 testfiles/hashblock_0_-1
        sha256:a7abb76568871169a79104d00679fae6521dfdb2a2648e380c02b10e96e217ff testfiles/hashblock_0_1
        sha256:c4b519068d8c8c68fd5e362fc3526c5b11e15f8eb72d4678017906f9e7f2d137 testfiles/hashblock_-1_0
        sha256:09510d2dbb55fa16f2768165c42d19c4da43301dfaa05705b2ecb4aaa4a5686a testfiles/hashblock_1_0
        sha256:7aa0bb537c623562f898386ac88acd319267e4ab3200f3fd1cf648cfdb4a0379 testfiles/hashblock_-1_-1
        sha256:f804e9777f91d3697ca015303c23251ad3d80205184cfa3d1066ab28cb906330 testfiles/hashblock_-1_1
        sha256:26159b4fc68c63881c25c33b23f2583ffaa64fee411af33c3b03238eea56755c testfiles/hashblock_1_-1
        sha256:57bed0934bf3ab4610d54938f03cff27bd0d9d76c9a77e283f9fb2b7e29c5ab8 testfiles/hashblock_1_1
        sha256:3fd7a78101899a79cd337b1b4e5414be8bcb376b133370156ef6e65026d930ed testfiles/oneblock
        sha256:c0b9455d545b6b1ee5e7b227bd1ed463aaa530a4840dcd93465163a2b3aff0da testfiles/oneblockplusonebyte
        sha256:9845e616f7d2f7a1cd6742f0546a36d2e74d4eb8ae7d9bdc0b0df982c27861b7 testfiles/onebyte
        ".trim().lines().map(|l| {
            let l = l.trim();
            let (digest, path) = l.split_once(" ").unwrap();
            let (digest_type, digest) = digest.split_once(":").unwrap();
            let digest_type = digest_type.parse::<super::InnerHashAlgorithm>().unwrap();
            let digest = hex::decode(digest).unwrap();
            (digest_type, digest, path)
        }).collect::<Vec<_>>();

        for (digest_type, digest, path) in testfiles {
            assert!(digest_type == InnerHashAlgorithm::Sha256);
            let mut f = BufReader::new(File::open(path).unwrap());
            let mut tmp = new_dyn(digest_type);
            std::io::copy(&mut f, &mut tmp).unwrap();
            let out = tmp.finalize();

            let tmp = hex::encode(&digest);
            let tmp2 = hex::encode(&out);
            assert!(out.as_ref() == &digest, "expected: {} found: {} for file: {}", tmp, tmp2, path);
        }
    }
}