ssdeep/internals/generate_easy_std.rs
1// SPDX-License-Identifier: MIT
2// SPDX-FileCopyrightText: Copyright (C) 2023–2025 Tsukasa OI <floss_ssdeep@irq.a4lg.com>.
3
4//! Easy generator functions depending on the standard I/O.
5
6#![cfg(all(feature = "std", feature = "easy-functions"))]
7
8use std::fs::File;
9use std::io::Read;
10use std::path::Path;
11
12use crate::internals::generate::{Generator, GeneratorError};
13use crate::internals::hash::RawFuzzyHash;
14use crate::internals::macros::invariant;
15
16/// The error type describing either a generator error or an I/O error.
17///
18/// This type contains either:
19/// * A fuzzy hash generator error ([`GeneratorError`]) or
20/// * An I/O error ([`std::io::Error`]).
21#[derive(Debug)]
22pub enum GeneratorOrIOError {
23 /// An error caused by the fuzzy hash generator.
24 GeneratorError(GeneratorError),
25
26 /// An error caused by an internal I/O operation.
27 IOError(std::io::Error),
28}
29
30impl core::fmt::Display for GeneratorOrIOError {
31 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
32 match self {
33 GeneratorOrIOError::GeneratorError(err) => err.fmt(f),
34 GeneratorOrIOError::IOError(err) => err.fmt(f),
35 }
36 }
37}
38
39impl From<GeneratorError> for GeneratorOrIOError {
40 // For wrapping with the '?' operator
41 fn from(value: GeneratorError) -> Self {
42 GeneratorOrIOError::GeneratorError(value)
43 }
44}
45
46impl From<std::io::Error> for GeneratorOrIOError {
47 // For wrapping with the '?' operator
48 fn from(value: std::io::Error) -> Self {
49 GeneratorOrIOError::IOError(value)
50 }
51}
52
53impl std::error::Error for GeneratorOrIOError {
54 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
55 match self {
56 GeneratorOrIOError::GeneratorError(err) => Some(err),
57 GeneratorOrIOError::IOError(err) => Some(err),
58 }
59 }
60}
61
62/// Constant temporary buffer size for "easy" functions.
63const BUFFER_SIZE: usize = 32768;
64
65/// Generates a fuzzy hash from a given reader stream.
66///
67/// This is an internal function to allow other functions to
68/// prepare a [`Generator`] object.
69///
70/// # Performance Consideration
71///
72/// It doesn't use [`BufReader`](std::io::BufReader) because the internal buffer
73/// is large enough. Note that the default buffer size of `BufReader` is
74/// normally 8KiB (while [buffer size](BUFFER_SIZE) here has 32KiB).
75#[inline]
76fn hash_stream_common<R: Read>(
77 generator: &mut Generator,
78 reader: &mut R,
79) -> Result<RawFuzzyHash, GeneratorOrIOError> {
80 let mut buffer = [0u8; BUFFER_SIZE];
81 loop {
82 let len = reader.read(&mut buffer)?; // grcov-excl-br-line:IO
83 if len == 0 {
84 break;
85 }
86 invariant!(len <= buffer.len());
87 generator.update(&buffer[0..len]);
88 }
89 Ok(generator.finalize()?)
90}
91
92/// Generates a fuzzy hash from a given reader stream.
93///
94/// # Example
95///
96/// ```
97/// use std::error::Error;
98/// use std::fs::File;
99///
100/// fn main() -> Result<(), ssdeep::GeneratorOrIOError> {
101/// let mut stream = File::open("data/examples/hello.txt")?;
102/// let fuzzy_hash = ssdeep::hash_stream(&mut stream)?;
103/// let fuzzy_hash_str = fuzzy_hash.to_string();
104/// assert_eq!(fuzzy_hash_str, "3:aaX8v:aV");
105/// Ok(())
106/// }
107/// ```
108pub fn hash_stream<R: Read>(reader: &mut R) -> Result<RawFuzzyHash, GeneratorOrIOError> {
109 let mut generator = Generator::new();
110 hash_stream_common(&mut generator, reader)
111}
112
113/// Generates a fuzzy hash from a given file.
114///
115/// # Example
116///
117/// ```
118/// use std::error::Error;
119///
120/// fn main() -> Result<(), ssdeep::GeneratorOrIOError> {
121/// let fuzzy_hash = ssdeep::hash_file("data/examples/hello.txt")?;
122/// let fuzzy_hash_str = fuzzy_hash.to_string();
123/// assert_eq!(fuzzy_hash_str, "3:aaX8v:aV");
124/// Ok(())
125/// }
126/// ```
127///
128/// # Note
129///
130/// This function expects that the file size does not change while
131/// generating. On normal use cases, you hash a fixed file to generate
132/// a fuzzy hash to interchange information about the file with others.
133/// So, this assumption should be safe for most users.
134///
135/// Also, failing to meet this requirement only causes this function to
136/// return an error (incorrect result will not be produced).
137/// So, this function is always safe.
138///
139/// If the file size could change while generating a fuzzy hash,
140/// use [`hash_stream()`] instead.
141pub fn hash_file<P: AsRef<Path>>(path: P) -> Result<RawFuzzyHash, GeneratorOrIOError> {
142 let mut file = File::open(path)?;
143 let mut generator = Generator::new();
144 generator.set_fixed_input_size(file.metadata()?.len())?; // grcov-excl-br-line:IO
145 hash_stream_common(&mut generator, &mut file)
146}
147
148mod tests;