ssdeep/internals/
generate_easy_std.rs

1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: Copyright (C) 2023–2025 Tsukasa OI <floss_ssdeep@irq.a4lg.com>.
3
4//! Easy generator functions depending on the standard I/O.
5
6#![cfg(all(feature = "std", feature = "easy-functions"))]
7
8use std::fs::File;
9use std::io::Read;
10use std::path::Path;
11
12use crate::internals::generate::{Generator, GeneratorError};
13use crate::internals::hash::RawFuzzyHash;
14use crate::internals::macros::invariant;
15
16/// The error type describing either a generator error or an I/O error.
17///
18/// This type contains either:
19/// *   A fuzzy hash generator error ([`GeneratorError`]) or
20/// *   An I/O error ([`std::io::Error`]).
21#[derive(Debug)]
22pub enum GeneratorOrIOError {
23    /// An error caused by the fuzzy hash generator.
24    GeneratorError(GeneratorError),
25
26    /// An error caused by an internal I/O operation.
27    IOError(std::io::Error),
28}
29
30impl core::fmt::Display for GeneratorOrIOError {
31    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
32        match self {
33            GeneratorOrIOError::GeneratorError(err) => err.fmt(f),
34            GeneratorOrIOError::IOError(err) => err.fmt(f),
35        }
36    }
37}
38
39impl From<GeneratorError> for GeneratorOrIOError {
40    // For wrapping with the '?' operator
41    fn from(value: GeneratorError) -> Self {
42        GeneratorOrIOError::GeneratorError(value)
43    }
44}
45
46impl From<std::io::Error> for GeneratorOrIOError {
47    // For wrapping with the '?' operator
48    fn from(value: std::io::Error) -> Self {
49        GeneratorOrIOError::IOError(value)
50    }
51}
52
53impl std::error::Error for GeneratorOrIOError {
54    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
55        match self {
56            GeneratorOrIOError::GeneratorError(err) => Some(err),
57            GeneratorOrIOError::IOError(err) => Some(err),
58        }
59    }
60}
61
62/// Constant temporary buffer size for "easy" functions.
63const BUFFER_SIZE: usize = 32768;
64
65/// Generates a fuzzy hash from a given reader stream.
66///
67/// This is an internal function to allow other functions to
68/// prepare a [`Generator`] object.
69///
70/// # Performance Consideration
71///
72/// It doesn't use [`BufReader`](std::io::BufReader) because the internal buffer
73/// is large enough.  Note that the default buffer size of `BufReader` is
74/// normally 8KiB (while [buffer size](BUFFER_SIZE) here has 32KiB).
75#[inline]
76fn hash_stream_common<R: Read>(
77    generator: &mut Generator,
78    reader: &mut R,
79) -> Result<RawFuzzyHash, GeneratorOrIOError> {
80    let mut buffer = [0u8; BUFFER_SIZE];
81    loop {
82        let len = reader.read(&mut buffer)?; // grcov-excl-br-line:IO
83        if len == 0 {
84            break;
85        }
86        invariant!(len <= buffer.len());
87        generator.update(&buffer[0..len]);
88    }
89    Ok(generator.finalize()?)
90}
91
92/// Generates a fuzzy hash from a given reader stream.
93///
94/// # Example
95///
96/// ```
97/// use std::error::Error;
98/// use std::fs::File;
99///
100/// fn main() -> Result<(), ssdeep::GeneratorOrIOError> {
101///     let mut stream = File::open("data/examples/hello.txt")?;
102///     let fuzzy_hash = ssdeep::hash_stream(&mut stream)?;
103///     let fuzzy_hash_str = fuzzy_hash.to_string();
104///     assert_eq!(fuzzy_hash_str, "3:aaX8v:aV");
105///     Ok(())
106/// }
107/// ```
108pub fn hash_stream<R: Read>(reader: &mut R) -> Result<RawFuzzyHash, GeneratorOrIOError> {
109    let mut generator = Generator::new();
110    hash_stream_common(&mut generator, reader)
111}
112
113/// Generates a fuzzy hash from a given file.
114///
115/// # Example
116///
117/// ```
118/// use std::error::Error;
119///
120/// fn main() -> Result<(), ssdeep::GeneratorOrIOError> {
121///     let fuzzy_hash = ssdeep::hash_file("data/examples/hello.txt")?;
122///     let fuzzy_hash_str = fuzzy_hash.to_string();
123///     assert_eq!(fuzzy_hash_str, "3:aaX8v:aV");
124///     Ok(())
125/// }
126/// ```
127///
128/// # Note
129///
130/// This function expects that the file size does not change while
131/// generating.  On normal use cases, you hash a fixed file to generate
132/// a fuzzy hash to interchange information about the file with others.
133/// So, this assumption should be safe for most users.
134///
135/// Also, failing to meet this requirement only causes this function to
136/// return an error (incorrect result will not be produced).
137/// So, this function is always safe.
138///
139/// If the file size could change while generating a fuzzy hash,
140/// use [`hash_stream()`] instead.
141pub fn hash_file<P: AsRef<Path>>(path: P) -> Result<RawFuzzyHash, GeneratorOrIOError> {
142    let mut file = File::open(path)?;
143    let mut generator = Generator::new();
144    generator.set_fixed_input_size(file.metadata()?.len())?; // grcov-excl-br-line:IO
145    hash_stream_common(&mut generator, &mut file)
146}
147
148mod tests;