chksum_hash_md5/lib.rs
1//! This crate provides an implementation of the MD5 hash function based on [RFC 1321: The MD5 Message-Digest Algorithm](https://tools.ietf.org/html/rfc1321).
2//!
3//! # Setup
4//!
5//! To use this crate, add the following entry to your `Cargo.toml` file in the `dependencies` section:
6//!
7//! ```toml
8//! [dependencies]
9//! chksum-hash-md5 = "0.0.1"
10//! ```
11//!
12//! Alternatively, you can use the [`cargo add`](https://doc.rust-lang.org/cargo/commands/cargo-add.html) subcommand:
13//!
14//! ```sh
15//! cargo add chksum-hash-md5
16//! ```
17//!
18//! # Batch Processing
19//!
20//! The digest of known-size data can be calculated with the [`hash`] function.
21//!
22//! ```rust
23//! use chksum_hash_md5 as md5;
24//!
25//! let digest = md5::hash("example data");
26//! assert_eq!(
27//! digest.to_hex_lowercase(),
28//! "5c71dbb287630d65ca93764c34d9aa0d"
29//! );
30//! ```
31//!
32//! # Stream Processing
33//!
34//! The digest of data streams can be calculated chunk-by-chunk with a consumer created by calling the [`default`] function.
35//!
36//! ```rust
37//! // Import all necessary items
38//! # use std::io;
39//! # use std::path::PathBuf;
40//! use std::fs::File;
41//! use std::io::Read;
42//!
43//! use chksum_hash_md5 as md5;
44//!
45//! # fn wrapper(path: PathBuf) -> io::Result<()> {
46//! // Create a hash instance
47//! let mut hash = md5::default();
48//!
49//! // Open a file and create a buffer for incoming data
50//! let mut file = File::open(path)?;
51//! let mut buffer = vec![0; 64];
52//!
53//! // Iterate chunk by chunk
54//! while let Ok(count) = file.read(&mut buffer) {
55//! // EOF reached, exit loop
56//! if count == 0 {
57//! break;
58//! }
59//!
60//! // Update the hash with data
61//! hash.update(&buffer[..count]);
62//! }
63//!
64//! // Calculate the digest
65//! let digest = hash.digest();
66//! // Cast the digest to hex and compare
67//! assert_eq!(
68//! digest.to_hex_lowercase(),
69//! "5c71dbb287630d65ca93764c34d9aa0d"
70//! );
71//! # Ok(())
72//! # }
73//! ```
74//!
75//! # Internal Buffering
76//!
77//! An internal buffer is utilized due to the unknown size of data chunks.
78//!
79//! The size of this buffer is at least as large as one hash block of data processed at a time.
80//!
81//! To mitigate buffering-related performance issues, ensure the length of processed chunks is a multiple of the block size.
82//!
83//! # Input Type
84//!
85//! Anything that implements `AsRef<[u8]>` can be passed as input.
86//!
87//! ```rust
88//! use chksum_hash_md5 as md5;
89//!
90//! let digest = md5::default()
91//! .update("str")
92//! .update(b"bytes")
93//! .update([0x75, 0x38])
94//! .digest();
95//! assert_eq!(
96//! digest.to_hex_lowercase(),
97//! "31d94eaa1dc8532e8abb3bf607143bb6"
98//! );
99//! ```
100//!
101//! Since [`Digest`] implements `AsRef<[u8]>`, digests can be chained to calculate hash of a hash digest.
102//!
103//! ```rust
104//! use chksum_hash_md5 as md5;
105//!
106//! let digest = md5::hash(b"example data");
107//! let digest = md5::hash(digest);
108//! assert_eq!(
109//! digest.to_hex_lowercase(),
110//! "ee0e86bdb46a9046da76942e807bba7c"
111//! );
112//! ```
113//!
114//! # Disclaimer
115//!
116//! The MD5 hash function should be used only for backward compatibility due to security issues.
117//!
118//! Check [RFC 6151: Updated Security Considerations for the MD5 Message-Digest and the HMAC-MD5 Algorithms](https://www.rfc-editor.org/rfc/rfc6151) for more details.
119//!
120//! # License
121//!
122//! This crate is licensed under the MIT License.
123
124#![cfg_attr(docsrs, feature(doc_auto_cfg))]
125#![forbid(unsafe_code)]
126
127pub mod block;
128pub mod digest;
129pub mod state;
130
131use chksum_hash_core as core;
132
133use crate::block::Block;
134#[doc(inline)]
135pub use crate::block::LENGTH_BYTES as BLOCK_LENGTH_BYTES;
136#[doc(inline)]
137pub use crate::digest::{Digest, LENGTH_BYTES as DIGEST_LENGTH_BYTES};
138#[doc(inline)]
139pub use crate::state::State;
140
141/// Creates a new hash.
142///
143/// # Example
144///
145/// ```rust
146/// use chksum_hash_md5 as md5;
147///
148/// let digest = md5::new().digest();
149/// assert_eq!(
150/// digest.to_hex_lowercase(),
151/// "d41d8cd98f00b204e9800998ecf8427e"
152/// );
153///
154/// let digest = md5::new().update("data").digest();
155/// assert_eq!(
156/// digest.to_hex_lowercase(),
157/// "8d777f385d3dfec8815d20f7496026dc"
158/// );
159/// ```
160#[must_use]
161pub fn new() -> Update {
162 Update::new()
163}
164
165/// Creates a default hash.
166///
167/// # Example
168///
169/// ```rust
170/// use chksum_hash_md5 as md5;
171///
172/// let digest = md5::default().digest();
173/// assert_eq!(
174/// digest.to_hex_lowercase(),
175/// "d41d8cd98f00b204e9800998ecf8427e"
176/// );
177///
178/// let digest = md5::default().update("data").digest();
179/// assert_eq!(
180/// digest.to_hex_lowercase(),
181/// "8d777f385d3dfec8815d20f7496026dc"
182/// );
183/// ```
184#[must_use]
185pub fn default() -> Update {
186 core::default()
187}
188
189/// Computes the hash of the given input.
190///
191/// # Example
192///
193/// ```rust
194/// use chksum_hash_md5 as md5;
195///
196/// let digest = md5::hash("data");
197/// assert_eq!(
198/// digest.to_hex_lowercase(),
199/// "8d777f385d3dfec8815d20f7496026dc"
200/// );
201/// ```
202pub fn hash(data: impl AsRef<[u8]>) -> Digest {
203 core::hash::<Update>(data)
204}
205
206/// A hash state containing an internal buffer that can handle an unknown amount of input data.
207///
208/// # Example
209///
210/// ```rust
211/// use chksum_hash_md5 as md5;
212///
213/// // Create a new hash instance
214/// let mut hash = md5::Update::new();
215///
216/// // Fill with data
217/// hash.update("data");
218///
219/// // Finalize and create a digest
220/// let digest = hash.finalize().digest();
221/// assert_eq!(
222/// digest.to_hex_lowercase(),
223/// "8d777f385d3dfec8815d20f7496026dc"
224/// );
225///
226/// // Reset to default values
227/// hash.reset();
228///
229/// // Produce a hash digest using internal finalization
230/// let digest = hash.digest();
231/// assert_eq!(
232/// digest.to_hex_lowercase(),
233/// "d41d8cd98f00b204e9800998ecf8427e"
234/// );
235/// ```
236#[derive(Clone, Debug, Eq, PartialEq)]
237#[cfg_attr(feature = "fuzzing", derive(arbitrary::Arbitrary))]
238pub struct Update {
239 state: State,
240 unprocessed: Vec<u8>,
241 processed: usize,
242}
243
244impl Update {
245 /// Creates a new hash.
246 #[must_use]
247 pub fn new() -> Self {
248 let state = state::new();
249 let unprocessed = Vec::with_capacity(BLOCK_LENGTH_BYTES);
250 let processed = 0;
251 Self {
252 state,
253 unprocessed,
254 processed,
255 }
256 }
257
258 /// Updates the internal state with an input data.
259 ///
260 /// # Performance issues
261 ///
262 /// To achieve maximum performance, the length of incoming data parts should be a multiple of the block length.
263 ///
264 /// In any other case, an internal buffer is used, which can cause a speed decrease in performance.
265 pub fn update(&mut self, data: impl AsRef<[u8]>) -> &mut Self {
266 let data = data.as_ref();
267
268 // The `chunks_exact` method doesn't drain original vector so it needs to be handled manually
269 for _ in 0..(self.unprocessed.len() / BLOCK_LENGTH_BYTES) {
270 let block = {
271 let chunk = self.unprocessed.drain(..BLOCK_LENGTH_BYTES);
272 let chunk = chunk.as_slice();
273 Block::try_from(chunk)
274 .expect("chunk length must be exact size as block")
275 .into()
276 };
277 self.state = self.state.update(block);
278 self.processed = self.processed.wrapping_add(BLOCK_LENGTH_BYTES);
279 }
280
281 if self.unprocessed.is_empty() {
282 // Internal buffer is empty, incoming data can be processed without buffering.
283 let mut chunks = data.chunks_exact(BLOCK_LENGTH_BYTES);
284 for chunk in chunks.by_ref() {
285 let block = Block::try_from(chunk)
286 .expect("chunk length must be exact size as block")
287 .into();
288 self.state = self.state.update(block);
289 self.processed = self.processed.wrapping_add(BLOCK_LENGTH_BYTES);
290 }
291 let remainder = chunks.remainder();
292 if !remainder.is_empty() {
293 self.unprocessed.extend(remainder);
294 }
295 } else if (self.unprocessed.len() + data.len()) < BLOCK_LENGTH_BYTES {
296 // Not enough data even for one block.
297 self.unprocessed.extend(data);
298 } else {
299 // Create the first block from the buffer, create the second (and every other) block from incoming data.
300 let unprocessed = self.unprocessed.len() % BLOCK_LENGTH_BYTES;
301 let missing = BLOCK_LENGTH_BYTES - unprocessed;
302 let (fillment, data) = data.split_at(missing);
303 let block = {
304 let mut block = [0u8; BLOCK_LENGTH_BYTES];
305 let (first_part, second_part) = block.split_at_mut(self.unprocessed.len());
306 first_part.copy_from_slice(self.unprocessed.drain(..self.unprocessed.len()).as_slice());
307 second_part[..missing].copy_from_slice(fillment);
308 block
309 };
310 let mut chunks = block.chunks_exact(BLOCK_LENGTH_BYTES);
311 for chunk in chunks.by_ref() {
312 let block = Block::try_from(chunk)
313 .expect("chunk length must be exact size as block")
314 .into();
315 self.state = self.state.update(block);
316 self.processed = self.processed.wrapping_add(BLOCK_LENGTH_BYTES);
317 }
318 let remainder = chunks.remainder();
319 assert!(remainder.is_empty(), "chunks remainder must be empty");
320
321 let mut chunks = data.chunks_exact(BLOCK_LENGTH_BYTES);
322 for chunk in chunks.by_ref() {
323 let block = Block::try_from(chunk)
324 .expect("chunk length must be exact size as block")
325 .into();
326 self.state = self.state.update(block);
327 self.processed = self.processed.wrapping_add(BLOCK_LENGTH_BYTES);
328 }
329 let remainder = chunks.remainder();
330 self.unprocessed.extend(remainder);
331 }
332
333 self
334 }
335
336 /// Applies padding and produces the finalized state.
337 #[must_use]
338 pub fn finalize(&self) -> Finalize {
339 let mut state = self.state;
340 let mut processed = self.processed;
341 let unprocessed = {
342 let mut chunks = self.unprocessed.chunks_exact(BLOCK_LENGTH_BYTES);
343 for chunk in chunks.by_ref() {
344 let block = Block::try_from(chunk)
345 .expect("chunk length must be exact size as block")
346 .into();
347 state = state.update(block);
348 processed = processed.wrapping_add(BLOCK_LENGTH_BYTES);
349 }
350 chunks.remainder()
351 };
352
353 let length = {
354 let length = unprocessed.len().wrapping_add(processed) as u64;
355 let length = length.wrapping_mul(8); // convert byte-length into bits-length
356 length.to_le_bytes()
357 };
358
359 if (unprocessed.len() + 1 + length.len()) <= BLOCK_LENGTH_BYTES {
360 let padding = {
361 let mut padding = [0u8; BLOCK_LENGTH_BYTES];
362 padding[..unprocessed.len()].copy_from_slice(&unprocessed[..unprocessed.len()]);
363 padding[unprocessed.len()] = 0x80;
364 padding[(BLOCK_LENGTH_BYTES - length.len())..].copy_from_slice(&length);
365 padding
366 };
367
368 let block = {
369 let block = &padding[..];
370 Block::try_from(block)
371 .expect("padding length must exact size as block")
372 .into()
373 };
374 state = state.update(block);
375 } else {
376 let padding = {
377 let mut padding = [0u8; BLOCK_LENGTH_BYTES * 2];
378 padding[..unprocessed.len()].copy_from_slice(&unprocessed[..unprocessed.len()]);
379 padding[unprocessed.len()] = 0x80;
380 padding[(BLOCK_LENGTH_BYTES * 2 - length.len())..].copy_from_slice(&length);
381 padding
382 };
383
384 let block = {
385 let block = &padding[..BLOCK_LENGTH_BYTES];
386 Block::try_from(block)
387 .expect("padding length must exact size as block")
388 .into()
389 };
390 state = state.update(block);
391
392 let block = {
393 let block = &padding[BLOCK_LENGTH_BYTES..];
394 Block::try_from(block)
395 .expect("padding length must exact size as block")
396 .into()
397 };
398 state = state.update(block);
399 }
400
401 Finalize { state }
402 }
403
404 /// Resets the internal state to default values.
405 pub fn reset(&mut self) -> &mut Self {
406 self.state = self.state.reset();
407 self.unprocessed.clear();
408 self.processed = 0;
409 self
410 }
411
412 /// Produces the hash digest using internal finalization.
413 #[must_use]
414 pub fn digest(&self) -> Digest {
415 self.finalize().digest()
416 }
417}
418
419impl core::Update for Update {
420 type Digest = Digest;
421 type Finalize = Finalize;
422
423 fn update(&mut self, data: impl AsRef<[u8]>) {
424 self.update(data);
425 }
426
427 fn finalize(&self) -> Self::Finalize {
428 self.finalize()
429 }
430
431 fn reset(&mut self) {
432 self.reset();
433 }
434}
435
436impl Default for Update {
437 fn default() -> Self {
438 Self::new()
439 }
440}
441
442/// A finalized hash state.
443#[derive(Clone, Copy, Debug, Eq, PartialEq)]
444pub struct Finalize {
445 state: State,
446}
447
448impl Finalize {
449 /// Creates and returns the hash digest.
450 #[must_use]
451 #[rustfmt::skip]
452 pub fn digest(&self) -> Digest {
453 let State { a, b, c, d } = self.state;
454 let [a, b, c, d] = [
455 a.to_le_bytes(),
456 b.to_le_bytes(),
457 c.to_le_bytes(),
458 d.to_le_bytes(),
459 ];
460 Digest::new([
461 a[0], a[1], a[2], a[3],
462 b[0], b[1], b[2], b[3],
463 c[0], c[1], c[2], c[3],
464 d[0], d[1], d[2], d[3],
465 ])
466 }
467
468 /// Resets the hash state to the in-progress state.
469 #[must_use]
470 pub fn reset(&self) -> Update {
471 Update::new()
472 }
473}
474
475impl core::Finalize for Finalize {
476 type Digest = Digest;
477 type Update = Update;
478
479 fn digest(&self) -> Self::Digest {
480 self.digest()
481 }
482
483 fn reset(&self) -> Self::Update {
484 self.reset()
485 }
486}
487
488#[cfg(test)]
489mod tests {
490 use super::*;
491
492 #[test]
493 fn empty() {
494 let digest = default().digest().to_hex_lowercase();
495 assert_eq!(digest, "d41d8cd98f00b204e9800998ecf8427e");
496
497 let digest = new().digest().to_hex_lowercase();
498 assert_eq!(digest, "d41d8cd98f00b204e9800998ecf8427e");
499 }
500
501 #[test]
502 fn reset() {
503 let digest = new().update("data").reset().digest().to_hex_lowercase();
504 assert_eq!(digest, "d41d8cd98f00b204e9800998ecf8427e");
505
506 let digest = new().update("data").finalize().reset().digest().to_hex_lowercase();
507 assert_eq!(digest, "d41d8cd98f00b204e9800998ecf8427e");
508 }
509
510 #[test]
511 fn hello_world() {
512 let digest = new().update("Hello World").digest().to_hex_lowercase();
513 assert_eq!(digest, "b10a8db164e0754105b7a99be72e3fe5");
514
515 let digest = new()
516 .update("Hello")
517 .update(" ")
518 .update("World")
519 .digest()
520 .to_hex_lowercase();
521 assert_eq!(digest, "b10a8db164e0754105b7a99be72e3fe5");
522 }
523
524 #[test]
525 fn rust_book() {
526 let phrase = "Welcome to The Rust Programming Language, an introductory book about Rust. The Rust programming \
527 language helps you write faster, more reliable software. High-level ergonomics and low-level \
528 control are often at odds in programming language design; Rust challenges that conflict. \
529 Through balancing powerful technical capacity and a great developer experience, Rust gives you \
530 the option to control low-level details (such as memory usage) without all the hassle \
531 traditionally associated with such control.";
532
533 let digest = hash(phrase).to_hex_lowercase();
534 assert_eq!(digest, "21e3b4863269295e1670e055ffb57c2e");
535 }
536
537 #[test]
538 fn zeroes() {
539 let data = vec![0u8; 64];
540
541 let digest = new().update(&data[..60]).digest().to_hex_lowercase();
542 assert_eq!(digest, "a302a771ee0e3127b8950f0a67d17e49");
543
544 let digest = new()
545 .update(&data[..60])
546 .update(&data[60..])
547 .digest()
548 .to_hex_lowercase();
549 assert_eq!(digest, "3b5d3c7d207e37dceeedd301e35e2e58");
550 }
551}