fast_rsync/
signature.rs

1use std::collections::HashMap;
2use std::error::Error;
3use std::fmt;
4
5use arrayref::array_ref;
6
7use crate::consts::{BLAKE2_MAGIC, MD4_MAGIC};
8use crate::crc::Crc;
9use crate::hasher::BuildCrcHasher;
10use crate::hashmap_variant::SecondLayerMap;
11use crate::md4::{md4, md4_many, MD4_SIZE};
12
13/// An rsync signature.
14///
15/// A signature contains hashed information about a block of data. It is used to compute a delta
16/// against that data.
17#[derive(Clone, Debug, Eq, PartialEq)]
18pub struct Signature {
19    signature_type: SignatureType,
20    block_size: u32,
21    crypto_hash_size: u32,
22    // This contains a valid serialized signature which must contain the correct magic for `signature_type`
23    // and a matching `block_size` and `crypto_hash_size`.
24    signature: Vec<u8>,
25}
26
27/// A signature with a block index, suitable for calculating deltas.
28#[derive(Clone, Debug, Eq, PartialEq)]
29pub struct IndexedSignature<'a> {
30    pub(crate) signature_type: SignatureType,
31    pub(crate) block_size: u32,
32    pub(crate) crypto_hash_size: u32,
33    /// crc -> crypto hash -> block index
34    pub(crate) blocks: HashMap<Crc, SecondLayerMap<&'a [u8], u32>, BuildCrcHasher>,
35}
36
37/// The hash type used with within the signature.
38/// Note that this library generally only supports MD4 signatures.
39#[derive(Copy, Clone, Debug, Eq, PartialEq)]
40pub(crate) enum SignatureType {
41    Md4,
42    Blake2,
43}
44
45impl SignatureType {
46    const SIZE: usize = 4;
47    fn from_magic(bytes: [u8; Self::SIZE]) -> Option<Self> {
48        match u32::from_be_bytes(bytes) {
49            BLAKE2_MAGIC => Some(SignatureType::Blake2),
50            MD4_MAGIC => Some(SignatureType::Md4),
51            _ => None,
52        }
53    }
54    fn to_magic(self) -> [u8; Self::SIZE] {
55        match self {
56            SignatureType::Md4 => MD4_MAGIC,
57            SignatureType::Blake2 => BLAKE2_MAGIC,
58        }
59        .to_be_bytes()
60    }
61}
62
63/// Indicates that a signature was not valid.
64#[derive(Debug)]
65pub struct SignatureParseError(());
66
67impl fmt::Display for SignatureParseError {
68    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
69        f.write_str("invalid or unsupported signature")
70    }
71}
72
73impl Error for SignatureParseError {}
74
75/// Options for [Signature::calculate].
76#[derive(Copy, Clone, Debug)]
77pub struct SignatureOptions {
78    /// The granularity of the signature.
79    /// Smaller block sizes yield larger, but more precise, signatures.
80    pub block_size: u32,
81    /// The number of bytes to use from the MD4 hash. Must be at most 16.
82    /// The larger this is, the less likely that a delta will be mis-applied.
83    pub crypto_hash_size: u32,
84}
85
86impl Signature {
87    const HEADER_SIZE: usize = SignatureType::SIZE + 2 * 4; // magic, block_size, then crypto_hash_size
88
89    /// Compute an MD4 signature for the given data.
90    ///
91    /// `options.block_size` must be greater than zero. `options.crypto_hash_size` must be at most 16, the length of an MD4 hash.
92    /// Panics if the provided options are invalid.
93    pub fn calculate(buf: &[u8], options: SignatureOptions) -> Signature {
94        assert!(options.block_size > 0);
95        assert!(options.crypto_hash_size <= MD4_SIZE as u32);
96        let num_blocks = buf.chunks(options.block_size as usize).len();
97
98        let signature_type = SignatureType::Md4;
99
100        let mut signature = Vec::with_capacity(
101            Self::HEADER_SIZE + num_blocks * (Crc::SIZE + options.crypto_hash_size as usize),
102        );
103
104        signature.extend_from_slice(&signature_type.to_magic());
105        signature.extend_from_slice(&options.block_size.to_be_bytes());
106        signature.extend_from_slice(&options.crypto_hash_size.to_be_bytes());
107
108        // Hash all the blocks (with the CRC as well as MD4)
109        let chunks = buf.chunks_exact(options.block_size as usize);
110        let remainder = chunks.remainder();
111        for (block, md4_hash) in md4_many(chunks).chain(if remainder.is_empty() {
112            None
113        } else {
114            // Manually tack on the last block if necessary, since `md4_many`
115            // requires every block to be identical in size
116            Some((remainder, md4(remainder)))
117        }) {
118            // would be nice to use `chunks_exact_mut`, but it doesn't work for zero sizes
119            let crc = Crc::new().update(block);
120            let crypto_hash = &md4_hash[..options.crypto_hash_size as usize];
121            signature.extend_from_slice(&crc.to_bytes());
122            signature.extend_from_slice(crypto_hash);
123        }
124        Signature {
125            signature_type: SignatureType::Md4,
126            block_size: options.block_size,
127            crypto_hash_size: options.crypto_hash_size,
128            signature,
129        }
130    }
131
132    /// Read a binary signature.
133    pub fn deserialize(signature: Vec<u8>) -> Result<Signature, SignatureParseError> {
134        if signature.len() < Self::HEADER_SIZE {
135            return Err(SignatureParseError(()));
136        }
137        let signature_type = SignatureType::from_magic(*array_ref![signature, 0, 4])
138            .ok_or(SignatureParseError(()))?;
139        let block_size = u32::from_be_bytes(*array_ref![signature, 4, 4]);
140        let crypto_hash_size = u32::from_be_bytes(*array_ref![signature, 8, 4]);
141        let block_signature_size = Crc::SIZE + crypto_hash_size as usize;
142        if (signature.len() - Self::HEADER_SIZE) % block_signature_size != 0 {
143            return Err(SignatureParseError(()));
144        }
145        Ok(Signature {
146            signature_type,
147            block_size,
148            crypto_hash_size,
149            signature,
150        })
151    }
152
153    /// Get the serialized form of this signature.
154    pub fn serialized(&self) -> &[u8] {
155        &self.signature
156    }
157
158    /// Get ownership of the serialized form of this signature.
159    pub fn into_serialized(self) -> Vec<u8> {
160        self.signature
161    }
162
163    fn blocks(&self) -> impl ExactSizeIterator<Item = (Crc, &[u8])> {
164        self.signature[Self::HEADER_SIZE..]
165            .chunks(Crc::SIZE + self.crypto_hash_size as usize)
166            .map(|b| {
167                (
168                    Crc::from_bytes(*array_ref!(b, 0, Crc::SIZE)),
169                    &b[Crc::SIZE..],
170                )
171            })
172    }
173
174    /// Convert a signature to a form suitable for computing deltas.
175    pub fn index(&self) -> IndexedSignature<'_> {
176        let blocks = self.blocks();
177        let mut block_index: HashMap<Crc, SecondLayerMap<&[u8], u32>, BuildCrcHasher> =
178            HashMap::with_capacity_and_hasher(blocks.len(), BuildCrcHasher::default());
179        for (idx, (crc, crypto_hash)) in blocks.enumerate() {
180            block_index
181                .entry(crc)
182                .or_default()
183                .insert(crypto_hash, idx as u32);
184        }
185
186        // Multiple blocks having the same `Crc` value means that the hashmap will reserve more
187        // capacity than needed. This is particularly noticable when `self.blocks` contains a very
188        // large number of values
189        block_index.shrink_to_fit();
190
191        IndexedSignature {
192            signature_type: self.signature_type,
193            block_size: self.block_size,
194            crypto_hash_size: self.crypto_hash_size,
195            blocks: block_index,
196        }
197    }
198}