1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
//! An implementation of fuzzyhash/ssdeep hash algorithm. The //! original [CTPH](https://www.sciencedirect.com/science/article/pii/S1742287606000764?via%3Dihub) //! paper describes how this fuzzy hash is computed. //! //! # Examples //! //! **Build a fuzzy hash from blocks of data, like a stream**: //! //! ```no_run //! use fuzzyhash::FuzzyHash; //! use std::io::Read; //! //! let mut file = std::fs::File::open("/path/to/my/file").unwrap(); //! let mut fuzzy_hash = FuzzyHash::default(); //! //! loop { //! let mut buffer = vec![0; 1024]; //! let count = file.read(&mut buffer).unwrap(); //! //! fuzzy_hash.update(buffer); //! //! if count < 1024 { //! break; //! } //! } //! //! fuzzy_hash.finalize(); //! //! println!("Fuzzy hash of data: {}", fuzzy_hash); //! ``` //! //! **Hash some data**: //! ```no_run //! use fuzzyhash::FuzzyHash; //! //! let mut buffer = Vec::new(); //! //! buffer.push(0xde); //! buffer.push(0xad); //! buffer.push(0xbe); //! buffer.push(0xef); //! // ... //! //! println!("Fuzzy hash of data: {}", FuzzyHash::new(buffer)); //! ``` //! #![warn(missing_docs)] mod blockhash; mod compare; mod constants; mod error; mod hasher; mod roll; pub use constants::Modes; use hasher::Hasher; use std::ffi::{CStr, CString}; use std::fmt; use std::path::Path; use std::os::raw::c_char; /// Result of fuzzy hash operations pub type Result<T> = std::result::Result<T, error::Error>; /// Hasher for fuzzy algorithm pub struct FuzzyHash { hasher: Hasher, hash: Option<String>, } impl Default for FuzzyHash { fn default() -> Self { Self { hasher: Hasher::new(), hash: None, } } } impl FuzzyHash { /// Construct a new FuzzyHash from source data /// /// # Example /// /// ```no_run /// use std::fs::read; /// use std::io::Read; /// use fuzzyhash::FuzzyHash; /// /// let mut data = read("/usr/bin/bash").unwrap(); /// let mut fuzzy_hash = FuzzyHash::new(data); /// ``` /// pub fn new<S: AsRef<[u8]>>(input: S) -> Self { let input = input.as_ref(); let mut this = Self::default(); this.hasher.update(input, input.len()); this.finalize(); this } /// Hash a file pointed to by `path`. /// /// # Example /// ```no_run /// use fuzzyhash::{FuzzyHash}; /// let hash = FuzzyHash::file("/home/me/a_large_file.bin").unwrap(); /// ``` /// pub fn file<P: AsRef<Path>>(path: P) -> std::result::Result<Self, std::io::Error> { let mut file = std::fs::File::open(path.as_ref())?; FuzzyHash::read(&mut file) } /// Hash target implementing `std::io::Read` /// /// # Example /// ``` /// use fuzzyhash::FuzzyHash; /// use std::io::{Cursor, Read}; /// /// let mut cursor = Cursor::new(vec![1,2,3,4,5,6,7,8,9,10]); /// let fuzzy = FuzzyHash::read(&mut cursor); /// ``` pub fn read<R: std::io::Read>(reader: &mut R) -> std::result::Result<Self, std::io::Error> { let mut hasher = Hasher::new(); loop { let mut buffer = [0; 1024]; let len = reader.read(&mut buffer)?; hasher.update(&buffer, len); if len < 1024 { break; } } let mut this = Self { hasher, hash: None }; this.finalize(); Ok(this) } /// Add chunk to the data source pub fn update<S: AsRef<[u8]>>(&mut self, input: S) { let input = input.as_ref(); self.hasher.update(input, input.len()); } /// Called to finalize the hashing and generate a string value pub fn finalize(&mut self) { if self.hash.is_none() { self.hash = self.hasher.digest(constants::Modes::None).ok(); } } /// Compare two fuzzy hashes /// /// # Arguments /// * `first` - first fuzzy hash to compare /// * `second` - second fuzzy hash to compare /// /// # Example /// ``` /// use fuzzyhash::FuzzyHash; /// assert_eq!(FuzzyHash::compare( /// "96:U57GjXnLt9co6pZwvLhJluvrszNgMFwO6MFG8SvkpjTWf:Hj3BeoEcNJ0TspgIG8SvkpjTg", /// "96:U57GjXnLt9co6pZwvLhJluvrs1eRTxYARdEallia:Hj3BeoEcNJ0TsI9xYeia3R").unwrap(), /// 63); /// ``` pub fn compare<S: AsRef<str>, T: AsRef<str>>(first: S, second: T) -> Result<u32> { compare::compare(first, second) } /// Compare this fuzzy hash against another /// /// # Arguments /// * `other` - compare this fuzzy hash to `other` /// /// # Example /// ``` /// use fuzzyhash::FuzzyHash; /// let mut fuzzy_hash = FuzzyHash::new("some data to hash for the purposes of running a test"); /// assert_eq!(fuzzy_hash.compare_to( /// &"3:HEREar5MFUul0U0KMP:knl8lkKMP".into()), /// Some(18)); /// ``` pub fn compare_to(&self, other: &FuzzyHash) -> Option<u32> { self.hash .as_ref() .and_then(|ref hash| FuzzyHash::compare(hash, &other.to_string()).ok()) } } impl fmt::Display for FuzzyHash { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.hash.as_ref().unwrap_or(&String::new())) } } impl From<&str> for FuzzyHash { fn from(s: &str) -> Self { Self { hasher: Hasher::new(), hash: Some(s.to_string()), } } } impl From<String> for FuzzyHash { fn from(s: String) -> Self { Self { hasher: Hasher::new(), hash: Some(s), } } } /// Returns the fuzzy hash of arbitrary data. This method provides better FFI compatibility. /// /// # Arguments /// * `buf` - a pointer to the array containing the data to hash /// * `length` - length of buf /// /// # Safety /// /// This is function is `unsafe` as it is intended to read a string from FFI /// /// # Example /// ``` /// use fuzzyhash::{fuzzyhash}; /// use std::ffi::CString; /// /// let data = "this is our test data!".to_string(); /// let hash = unsafe { CString::from_raw(fuzzyhash(data.as_bytes().as_ptr(), data.len())) }; /// let hash = hash.into_string().unwrap(); /// println!("Fuzzy Hash: {}", hash); /// assert_eq!(hash, "3:YKKGhR0tn:YRGRmn"); /// /// ``` #[no_mangle] pub unsafe extern "C" fn fuzzyhash(buf: *const u8, length: usize) -> *mut c_char { let data = std::slice::from_raw_parts(buf, length); let mut fuzzy_hash = FuzzyHash::new(data); fuzzy_hash.finalize(); let s = CString::new(fuzzy_hash.to_string()).unwrap(); s.into_raw() } /// FFI Compatible fuzzy hash comparisons. /// /// # Arguments /// * `first` - a C style fuzzy hash string /// * `second` - a C style fuzzy hash string /// /// # Safety /// /// This is function is `unsafe` as it is intended to read strings from FFI /// /// # Example /// ``` /// use fuzzyhash::{fuzzyhash_compare}; /// use std::ffi::CString; /// /// let first = CString::new("this is our test data for a fuzzy hash comparison!").unwrap(); /// let second = CString::new("this is my test data for a fuzzy hash comparison!").unwrap(); /// let compared = unsafe { fuzzyhash_compare(first.as_ptr(), second.as_ptr()) }; /// println!("Fuzzy Hash: {}", compared); /// assert_eq!(compared, 17); /// ``` #[no_mangle] pub unsafe extern "C" fn fuzzyhash_compare(first: *const c_char, second: *const c_char) -> u32 { let f = FuzzyHash::new(CStr::from_ptr(first).to_string_lossy().into_owned()); let s = FuzzyHash::new(CStr::from_ptr(second).to_string_lossy().into_owned()); f.compare_to(&s).unwrap_or(0) }