1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257
// SPDX-License-Identifier: GPL-2.0-or-later
// SPDX-FileCopyrightText: Copyright Andrew Tridgell <tridge@samba.org> 2002
// SPDX-FileCopyrightText: Copyright (C) 2006 ManTech International Corporation
// SPDX-FileCopyrightText: Copyright (C) 2013 Helmut Grohne <helmut@subdivi.de>
// SPDX-FileCopyrightText: Copyright (C) 2017, 2023 Tsukasa OI <floss_ssdeep@irq.a4lg.com>
use core::ops::AddAssign;
use crate::generate::fnv_table::FNV_HASH_INIT;
#[cfg(not(feature = "opt-reduce-fnv-table"))]
use crate::generate::fnv_table::FNV_TABLE;
#[cfg(feature = "opt-reduce-fnv-table")]
use crate::generate::fnv_table::FNV_HASH_PRIME;
use crate::hash::{
FuzzyHashData, RawFuzzyHash, LongRawFuzzyHash,
fuzzy_raw_type
};
use crate::hash::block::{
block_size, block_hash,
BlockHashSize, ConstrainedBlockHashSize,
BlockHashSizes, ConstrainedBlockHashSizes
};
use crate::intrinsics::{likely, unlikely};
use crate::macros::{optionally_unsafe, invariant};
mod fnv_table;
#[cfg(test)]
mod tests;
/// Hasher which computes the lowest 6 bits of a 32-bit FNV-1 variant.
///
/// This variant of FNV-1 hash has the regular prime constant of `0x01000193`
/// but has a different initial state: `0x28021967` (in contrast to regular
/// FNV-1-32's `0x811c9dc5`).
///
/// Since ssdeep only uses the lowest 6 bits of the hash value, it ignores any
/// higher bits, enabling updating the state table-based (instead of multiply
/// and xor) unless the `opt-reduce-fnv-table` feature is enabled.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct PartialFNVHash(u8);
impl PartialFNVHash {
/// Creates a new [`PartialFNVHash`] with the initial value.
#[inline]
pub fn new() -> Self { PartialFNVHash(FNV_HASH_INIT) }
/// Updates the hash value by processing a byte.
#[inline]
pub fn update_by_byte(&mut self, ch: u8) -> &mut Self {
cfg_if::cfg_if! {
if #[cfg(not(feature = "opt-reduce-fnv-table"))] {
optionally_unsafe! {
invariant!((self.value() as usize) < block_hash::ALPHABET_SIZE);
self.0 = FNV_TABLE
[self.value() as usize] // grcov-excl-br-line:ARRAY
[(ch & (block_hash::ALPHABET_SIZE as u8).wrapping_sub(1)) as usize]; // grcov-excl-br-line:ARRAY
}
}
else {
self.0 = ((self.0 as u32).wrapping_mul(FNV_HASH_PRIME) ^ (ch as u32)) as u8;
}
}
self
}
/// Updates the hash value by processing an iterator of [`u8`].
pub fn update_by_iter(&mut self, iter: impl Iterator<Item = u8>) -> &mut Self{
for ch in iter {
self.update_by_byte(ch);
}
self
}
/// Updates the hash value by processing a slice of [`u8`].
pub fn update(&mut self, buf: &[u8]) -> &mut Self {
for &ch in buf.iter() {
self.update_by_byte(ch);
}
self
}
/// Returns the current hash value.
///
/// Note that there's no "finalization" on the FNV hash.
/// You can even continue updating after reading the hash value.
#[inline]
pub fn value(&self) -> u8 {
cfg_if::cfg_if! {
if #[cfg(not(feature = "opt-reduce-fnv-table"))] {
optionally_unsafe! {
invariant!(self.0 < (block_hash::ALPHABET_SIZE as u8));
}
self.0
}
else {
self.0 & (block_hash::ALPHABET_SIZE as u8).wrapping_sub(1)
}
}
}
}
impl Default for PartialFNVHash {
fn default() -> Self {
Self::new()
}
}
impl AddAssign<&[u8]> for PartialFNVHash {
/// Updates the hash value by processing a slice of [`u8`].
#[inline(always)]
fn add_assign(&mut self, buffer: &[u8]) {
self.update(buffer);
}
}
impl<const N: usize> AddAssign<&[u8; N]> for PartialFNVHash {
/// Updates the hash value by processing an array of [`u8`].
#[inline(always)]
fn add_assign(&mut self, buffer: &[u8; N]) {
self.update(&buffer[..]);
}
}
impl AddAssign<u8> for PartialFNVHash {
/// Updates the hash value by processing a byte.
#[inline(always)]
fn add_assign(&mut self, byte: u8) {
self.update_by_byte(byte);
}
}
/// See [`RollingHash::WINDOW_SIZE`].
const ROLLING_WINDOW: usize = 7;
/// Hasher which computes a variant of 32-bit rolling hash as used in ssdeep.
///
/// In ssdeep, this is the most important hash function to decide whether to
/// trigger a context update based on the last 7 bytes it met.
///
/// Specifically, [`RollingHash`] implements the rolling hash implemented in
/// ssdeep version 2.13 or later. This is the first version that officially
/// supported ≧4GiB files and implemented a true rolling hash function.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct RollingHash {
/// Current rolling window index.
///
/// **Performance Analysis:**
/// Substituting this variable's type to `usize` (more semantically correct)
/// resulted in some slowdown (~10%). Keeping this state for now.
index: u32,
/// Hash component 1.
///
/// This is the sum of the last [`WINDOW_SIZE`](Self::WINDOW_SIZE) bytes.
///
/// In the "ssdeep-compatible" configuration, this value is in the range
/// of `0..=1785` (`0..=(7*0xff)`).
h1: u32,
/// Hash component 2.
///
/// This is the sum of the last [`WINDOW_SIZE`](Self::WINDOW_SIZE) bytes
/// but the more recent byte has a higher weight (the latest byte has a
/// weight of [`WINDOW_SIZE`](Self::WINDOW_SIZE) and the last (fading) byte
/// has a weight of 1).
///
/// In the "ssdeep-compatible" configuration, this value is in the range
/// of `0..=7140` (`0..=(sum(1..=7)*0xff)`).
h2: u32,
/// Hash component 3.
///
/// This is the only "big" component of the hash.
/// Each time it processes a byte, this value is left-shifted by
/// [`H3_LSHIFT`](Self::H3_LSHIFT) and xor-ed with the latest byte value.
///
/// If it processes [`WINDOW_SIZE`](Self::WINDOW_SIZE) bytes,
/// older bytes are shifted out (larger than its MSB).
h3: u32,
/// The last [`WINDOW_SIZE`](Self::WINDOW_SIZE) bytes of the processed data.
window: [u8; ROLLING_WINDOW],
}
impl RollingHash {
/// The window size of the rolling hash.
///
/// This is 7 bytes in ssdeep.
pub const WINDOW_SIZE: usize = ROLLING_WINDOW;
/// Left shift width of [`h3`](Self::h3) for each byte.
///
/// This is 5 in ssdeep.
const H3_LSHIFT: usize = 5;
/// Creates a new [`RollingHash`] with the initial value.
pub fn new() -> Self {
RollingHash {
index: 0,
h1: 0,
h2: 0,
h3: 0,
window: [0; ROLLING_WINDOW],
}
}
/// Updates the hash value by processing a byte.
#[inline]
pub fn update_by_byte(&mut self, ch: u8) -> &mut Self {
optionally_unsafe! {
invariant!((self.index as usize) < Self::WINDOW_SIZE);
}
self.h2 = self.h2.wrapping_sub(self.h1);
self.h2 = self.h2.wrapping_add(
u32::wrapping_mul(ROLLING_WINDOW as u32, ch as u32));
self.h1 = self.h1.wrapping_add(ch as u32);
self.h1 = self.h1.wrapping_sub(self.window[self.index as usize] as u32); // grcov-excl-br-line:ARRAY
self.window[self.index as usize] = ch; // grcov-excl-br-line:ARRAY
self.index += 1;
if self.index as usize == ROLLING_WINDOW {
self.index = 0;
}
self.h3 <<= Self::H3_LSHIFT;
self.h3 ^= ch as u32;
self
}
/// Updates the hash value by processing an iterator of [`u8`].
pub fn update_by_iter(&mut self, iter: impl Iterator<Item = u8>) -> &mut Self{
for ch in iter {
self.update_by_byte(ch);
}
self
}
/// Updates the hash value by processing a slice of [`u8`].
pub fn update(&mut self, buf: &[u8]) -> &mut Self {
for &ch in buf.iter() {
self.update_by_byte(ch);
}
self
}
/// Returns the current hash value.
///
/// Note that there's no "finalization" on this rolling hash.
/// You can even continue updating after reading the hash value.
///
/// This is the sum of its three internal states (`h1`, `h2`, and `h3`).
/// See the source code and the private documentation for
/// its mathematical details.
#[inline]
pub fn value(&self) -> u32 {
self.h1.wrapping_add(self.h2).wrapping_add(self.h3)
}
}
impl Default for RollingHash {
fn default() -> Self {
Self::new()
}
}
impl AddAssign<&[u8]> for RollingHash {
/// Updates the hash value by processing a slice of [`u8`].
#[inline(always)]
fn add_assign(&mut self, buffer: &[u8]) {
self.update(buffer);
}
}
impl<const N: usize> AddAssign<&[u8; N]> for RollingHash {
/// Updates the hash value by processing an array of [`u8`].
#[inline(always)]
fn add_assign(&mut self, buffer: &[u8; N]) {
self.update(&buffer[..]);
}
}
impl AddAssign<u8> for RollingHash {
/// Updates the hash value by processing a byte.
#[inline(always)]
fn add_assign(&mut self, byte: u8) {
self.update_by_byte(byte);
}
}
/// The invalid character for a "not filled" marker.
const BLOCKHASH_CHAR_NIL: u8 = 0xff;
/// Block hash context.
///
/// All operations are performed in [`Generator`] except initialization.
#[derive(Debug, Clone, Copy)]
struct BlockHashContext {
/// Current index to update [`blockhash`](Self::blockhash).
blockhash_index: usize,
/// Block hash contents.
blockhash: [u8; block_hash::FULL_SIZE],
/// The last block hash character used when truncating.
blockhash_ch_half: u8,
/// Block hash updater (a FNV-1 hasher) for full block hash.
h_full: PartialFNVHash,
/// Block hash updater (a FNV-1 hasher) for truncated block hash.
h_half: PartialFNVHash,
}
impl BlockHashContext {
/// Creates a new [`BlockHashContext`] with the initial value.
///
/// It performs full initialization of the all [`BlockHashContext`] fields.
pub fn new() -> Self {
BlockHashContext {
blockhash_index: 0,
blockhash: [BLOCKHASH_CHAR_NIL; block_hash::FULL_SIZE],
blockhash_ch_half: BLOCKHASH_CHAR_NIL,
h_full: PartialFNVHash::new(),
h_half: PartialFNVHash::new()
}
}
/// Performs a partial initialization.
///
/// It effectively resets the state to the initial one but does not
/// necessarily reinitialize all fields.
pub fn reset(&mut self) {
self.blockhash_index = 0;
// partial initialization of the block hash buffer
self.blockhash[block_hash::FULL_SIZE - 1] = BLOCKHASH_CHAR_NIL;
self.blockhash_ch_half = BLOCKHASH_CHAR_NIL;
self.h_full = PartialFNVHash::new();
self.h_half = PartialFNVHash::new();
}
}
/// Fuzzy hash generator.
///
/// This type generates fuzzy hashes from a given data.
///
/// # Default Output
///
/// ## Normalization
///
/// The output of the generator is not normalized. If you want to convert it
/// to a normalized form, use separate methods like
/// [`RawFuzzyHash::normalize()`].
///
/// In other words, this generator (itself) does not have the direct equivalent
/// to the `FUZZY_FLAG_ELIMSEQ` flag of libfuzzy's `fuzzy_digest` function.
///
/// ## Truncation
///
/// By default (using [`finalize()`](Self::finalize()) method), the output has a
/// short, truncated form.
///
/// By using [`finalize_without_truncation()`](Self::finalize_without_truncation()),
/// you can retrieve a non-truncated form as a result. This is equivalent to
/// the `FUZZY_FLAG_NOTRUNC` flag of libfuzzy's `fuzzy_digest` function.
///
/// # Input Types
///
/// This type has three update methods accepting three different types:
///
/// 1. [`update()`](Self::update())
/// (accepting a slice of [`u8`] - byte buffer)
/// 2. [`update_by_iter()`](Self::update_by_iter())
/// (accepting an iterator of [`u8`] - stream of bytes)
/// 3. [`update_by_byte()`](Self::update_by_byte())
/// (accepting [`u8`] - single byte)
///
/// # Input Size
///
/// The input size has a *hard* maximum limit (inclusive):
/// [`MAX_INPUT_SIZE`](Self::MAX_INPUT_SIZE) (192GiB).
/// This is due to the mathematical limit of
/// [the 32-bit rolling hash](`RollingHash`) and piece-splitting behavior.
///
/// On the other hand, if the input size is too small, the result will not be
/// meaningful enough. This *soft* lower limit (inclusive) is declared as
/// [`MIN_RECOMMENDED_INPUT_SIZE`](Self::MIN_RECOMMENDED_INPUT_SIZE) and
/// you can check the
/// [`may_warn_about_small_input_size()`](Self::may_warn_about_small_input_size())
/// method to check whether the size is too small to be meaningful enough.
///
/// Note: even if it's doubtful to be meaningful enough, a fuzzy hash generated
/// from such a small input is still valid. You don't have to reject them
/// just because they are too small. This *soft* limit is for diagnostics.
///
/// If you know the total size of the input, you can improve the performance by
/// using either the [`set_fixed_input_size()`](Self::set_fixed_input_size()) method
/// or the [`set_fixed_input_size_in_usize()`](Self::set_fixed_input_size_in_usize())
/// method.
///
/// # Examples
///
/// ```rust
/// use ssdeep::{Generator, RawFuzzyHash};
///
/// let mut generator = Generator::new();
/// let buf1: &[u8] = b"Hello, ";
/// let buf2: &[u8; 6] = b"World!";
///
/// // Optional but supplying the *total* input size first improves the performance.
/// // This is the total size of three update calls below.
/// generator.set_fixed_input_size_in_usize(buf1.len() + buf2.len() + 1).unwrap();
///
/// // Update the internal state of the generator.
/// // Of course, you can update multiple times.
/// if true {
/// // Option 1: `+=` operator overload
/// generator += buf1;
/// generator += buf2;
/// generator += b'\n';
/// }
/// else {
/// // Option 2: `update()`-family functions
/// // (unlike `+=`, iterators are supported)
/// generator.update(buf1);
/// generator.update_by_iter((*buf2).into_iter());
/// generator.update_by_byte(b'\n');
/// }
///
/// // Retrieve the fuzzy hash and convert to the string.
/// let hash: RawFuzzyHash = generator.finalize().unwrap();
/// assert_eq!(hash.to_string(), "3:aaX8v:aV");
/// ```
#[derive(Debug, Clone)]
pub struct Generator {
/// Processed input size.
///
/// This value may be inaccurate if the generator has fed more than the
/// maximum *hard* size limit (finalization should fail in that case).
input_size: u64,
/// Optional fixed size set by the
/// [`set_fixed_input_size()`](Self::set_fixed_input_size()) method.
fixed_size: Option<u64>,
/// Border size to consider advancing [`bhidx_start`](Self::bhidx_start)
/// (or, to perform a block hash elimination).
///
/// Directly corresponds to: [`bhidx_start`](Self::bhidx_start).
elim_border: u64,
/// Start of the block hash "index" to process.
///
/// The "index" is equivalent to the *base-2 logarithm* form
/// of the block size. In [`Generator`], it is used as an actual index
/// of [`bh_context`](Self::bh_context).
bhidx_start: usize,
/// End of the block hash "index" to process.
///
/// See also: [`bhidx_start`](Self::bhidx_start)
bhidx_end: usize,
/// End of the block hash "index" to process (set by a given fixed size).
///
/// See also:
///
/// * [`bhidx_start`](Self::bhidx_start)
/// * [`set_fixed_input_size()`](Self::set_fixed_input_size())
bhidx_end_limit: usize,
/// Rolling hash mask to prevent piece split matching
/// before the index [`bhidx_start`](Self::bhidx_start).
///
/// Directly corresponds to: [`bhidx_start`](Self::bhidx_start).
roll_mask: u32,
/// Global rolling hash to control piece splitting.
roll_hash: RollingHash,
/// Block hash contexts per block size.
bh_context: [BlockHashContext; block_size::NUM_VALID],
/// Effectively a [`BlockHashContext::h_full`] but for the block size
/// larger than the biggest valid block size.
h_last: PartialFNVHash,
/// Whether to update [`h_last`](Self::h_last).
is_last: bool,
}
/// The error type representing an invalid or an unsupported operation of
/// [the generator](Generator).
///
/// # Compatibility Note
///
/// Since the version 0.3, the representation of this enum will not be
/// specified as specific representation of this enum is not important.
#[repr(u8)]
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GeneratorError {
/// [The fixed size](Generator::set_fixed_input_size()) has a mismatch with
/// either the previously set value or the final input size.
FixedSizeMismatch,
/// [The fixed size](Generator::set_fixed_input_size()) is
/// [too large](Generator::MAX_INPUT_SIZE).
FixedSizeTooLarge,
/// The total input size on finalization is
/// [too large](Generator::MAX_INPUT_SIZE).
InputSizeTooLarge,
/// The output would cause a buffer overflow for a specific output type.
///
/// This error only occurs when:
///
/// * Truncation is disabled,
/// * The output type is a short form
/// (because of those conditions, it only occurs on a raw
/// [`Generator::finalize_raw()`] call) and
/// * The resulting block hash 2 is longer than that of the
/// short form limit ([`block_hash::HALF_SIZE`]).
OutputOverflow,
}
impl GeneratorError {
/// Checks whether this error is raised by one of the "size too large" cases.
///
/// It returns [`true`] on either [`FixedSizeTooLarge`](Self::FixedSizeTooLarge)
/// or [`InputSizeTooLarge`](Self::InputSizeTooLarge) variants.
pub fn is_size_too_large_error(&self) -> bool {
matches!(self, GeneratorError::FixedSizeTooLarge | GeneratorError::InputSizeTooLarge)
}
}
impl core::fmt::Display for GeneratorError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str(match self {
GeneratorError::FixedSizeMismatch => "current state mismatches to the fixed size previously set",
GeneratorError::FixedSizeTooLarge => "fixed size is too large to generate a fuzzy hash",
GeneratorError::InputSizeTooLarge => "input size is too large to generate a fuzzy hash",
GeneratorError::OutputOverflow => "output is too large for specific fuzzy hash variant",
})
}
}
#[cfg(feature = "std")]
impl std::error::Error for GeneratorError {}
#[cfg(all(not(feature = "std"), feature = "nightly"))]
impl core::error::Error for GeneratorError {}
impl Generator {
/// Returns the preferred maximum input size at the specified block size.
///
/// If the total input size exceeds this border, the double block size
/// (*base-2 logarithm* form: `log_block_size + 1`) is preferred for
/// the final block size (if the block hash for double block size has
/// enough length: `block_hash::HALF_SIZE`).
const fn guessed_preferred_max_input_size_at(log_block_size: u8) -> u64 {
block_size::from_log_internal(log_block_size) as u64 * block_hash::FULL_SIZE as u64
}
/// The maximum input size (inclusive).
///
/// ssdeep has an upper limit of 192GiB (inclusive).
///
/// This is a *hard* limit. Feeding data larger than this constant size is
/// an invalid operation.
pub const MAX_INPUT_SIZE: u64 =
Self::guessed_preferred_max_input_size_at(block_size::NUM_VALID as u8 - 1);
/// The recommended minimum input size (inclusive).
///
/// This is a *soft* limit. Although it's doubtful that the result from the
/// input smaller than this constant size is meaningful enough,
/// it's still valid. It might be useful for diagnostics.
pub const MIN_RECOMMENDED_INPUT_SIZE: u64 = 4096 + 1;
/// Creates a new [`Generator`] object.
pub fn new() -> Self {
Generator {
input_size: 0,
fixed_size: None,
elim_border: Self::guessed_preferred_max_input_size_at(0),
bhidx_start: 0,
bhidx_end: 1,
bhidx_end_limit: block_size::NUM_VALID - 1,
roll_mask: 0,
roll_hash: RollingHash::new(),
bh_context: [BlockHashContext::new(); block_size::NUM_VALID],
h_last: PartialFNVHash::new(),
is_last: false
}
}
/// Performs a partial initialization.
///
/// It effectively resets the state to the initial one but does not
/// necessarily reinitialize all internal fields.
pub fn reset(&mut self) {
self.input_size = 0;
self.fixed_size = None;
self.elim_border = Self::guessed_preferred_max_input_size_at(0);
self.bhidx_start = 0;
self.bhidx_end = 1;
self.bhidx_end_limit = block_size::NUM_VALID - 1;
self.roll_mask = 0;
self.roll_hash = RollingHash::new();
self.bh_context[0].reset();
// skip bh_context[1..block_size::NUM_VALID] initialization
// skip h_last initialization
self.is_last = false;
}
/// Retrieves the input size fed to the generator object.
#[inline(always)]
pub fn input_size(&self) -> u64 { self.input_size }
/// Checks whether a ssdeep-compatible client may raise a warning due to
/// its small input size (less meaningful fuzzy hashes will be generated
/// on the finalization).
///
/// The result is based on either the fixed size or the current input size.
/// So, this method should be used after calling either:
///
/// * [`set_fixed_input_size()`](Self::set_fixed_input_size())
/// or similar methods
/// * [`finalize()`](Self::finalize())
/// or similar methods
///
/// and before resetting the state.
#[inline]
pub fn may_warn_about_small_input_size(&self) -> bool {
self.fixed_size.unwrap_or(self.input_size) < Self::MIN_RECOMMENDED_INPUT_SIZE
}
/// Returns the suitable initial block size (equal to or greater than
/// `start`) for the specified input size (in *base-2 logarithm* form).
///
/// `start` is also in a *base-2 logarithm* form.
///
/// This method returns a good candidate but not always suitable for the
/// final fuzzy hash. The final guess is performed in the
/// [`guess_output_log_block_size()`](Self::guess_output_log_block_size())
/// method.
fn get_log_block_size_from_input_size(size: u64, start: usize) -> usize {
let size_unit = Self::guessed_preferred_max_input_size_at(0);
if size <= size_unit {
return start;
}
let high_size = (size - 1) / size_unit; // grcov-excl-br-line:DIVZERO
optionally_unsafe! {
invariant!(high_size > 0);
}
usize::max(start, (crate::utils::u64_ilog2(high_size) + 1) as usize)
}
/// Set the fixed input size for optimal performance.
///
/// This method sets the internal upper limit of the block size to
/// update per byte. It improves the performance by preventing unnecessary
/// block hash updates (that will never be used by the final fuzzy hash).
///
/// This method returns an error if:
///
/// 1. `size` is larger than [`MAX_INPUT_SIZE`](Self::MAX_INPUT_SIZE)
/// ([`GeneratorError::FixedSizeTooLarge`]) or
/// 2. The fixed size is previously set but the new one is different
/// ([`GeneratorError::FixedSizeMismatch`]).
pub fn set_fixed_input_size(&mut self, size: u64) -> Result<(), GeneratorError> {
if size > Self::MAX_INPUT_SIZE {
return Err(GeneratorError::FixedSizeTooLarge);
}
if let Some(expected_size) = self.fixed_size {
if expected_size != size {
return Err(GeneratorError::FixedSizeMismatch);
}
}
self.fixed_size = Some(size);
self.bhidx_end_limit = usize::min(
block_size::NUM_VALID - 1,
Self::get_log_block_size_from_input_size(size, 0) + 1
);
Ok(())
}
/// Set the fixed input size for optimal performance.
///
/// This is a thin wrapper of the
/// [`set_fixed_input_size()`](Self::set_fixed_input_size()) method.
///
/// Although that this implementation handles [`u64`] as the native input
/// size type and
/// [the file size in the Rust standard library](std::fs::Metadata::len)
/// is represented as [`u64`], it's not rare that you want to give a
/// [`usize`] to hash a buffer (or your program uses [`usize`] for its
/// native size representation).
///
/// It accepts `size` in [`usize`] and if this size is larger than
/// 64-bits, an error containing [`GeneratorError::FixedSizeTooLarge`]
/// is returned. Other than that, this is the same as
/// [`set_fixed_input_size()`](Self::set_fixed_input_size()).
#[inline]
pub fn set_fixed_input_size_in_usize(&mut self, size: usize) -> Result<(), GeneratorError> {
// grcov-excl-br-start
if let Ok(size) = u64::try_from(size) {
self.set_fixed_input_size(size)
}
else {
// grcov-excl-start: Only reproduces in 128-bit usize environments.
Err(GeneratorError::FixedSizeTooLarge)
// grcov-excl-stop
}
// grcov-excl-br-stop
}
}
/// Template to generate [`Generator::update()`]-like methods.
///
/// * `$self`
/// A mutable reference to the [`Generator`] object.
/// * `$buffer`
/// An iterator-like object (each item is in [`u8`]) to consume.
/// * `$proc_per_byte`
/// Statements to run each time the generator consumes a byte
/// (e.g. on the iterator variant, advance the `input_size` variable).
macro_rules! generator_update_template {
($self: expr, $buffer: expr, $proc_per_byte: block) => {
optionally_unsafe! {
cfg_if::cfg_if! {
if #[cfg(feature = "unsafe")] {
let mut bhrange0 = $self.bh_context.as_mut_ptr().add($self.bhidx_start);
let mut bhrange1 = $self.bh_context.as_mut_ptr().add($self.bhidx_end);
let mut bh: *mut BlockHashContext;
let mut bh_next: *mut BlockHashContext;
}
}
for ch in $buffer {
$proc_per_byte;
$self.roll_hash.update_by_byte(ch);
if $self.is_last {
$self.h_last.update_by_byte(ch);
}
cfg_if::cfg_if! {
if #[cfg(feature = "unsafe")] {
bh = bhrange0;
loop {
(*bh).h_full.update_by_byte(ch);
(*bh).h_half.update_by_byte(ch);
bh = bh.add(1);
if bh == bhrange1 {
break;
}
}
}
else {
for bh1 in &mut $self.bh_context[$self.bhidx_start..$self.bhidx_end] {
bh1.h_full.update_by_byte(ch);
bh1.h_half.update_by_byte(ch);
}
}
}
let h_org = $self.roll_hash.value().wrapping_add(1);
let mut h = h_org / block_size::MIN;
if unlikely(h_org == 0) {
continue;
}
if likely(h & $self.roll_mask != 0) {
continue;
}
if h_org % block_size::MIN != 0 {
continue;
}
h >>= $self.bhidx_start;
cfg_if::cfg_if! {
if #[cfg(feature = "unsafe")] {
#[doc(hidden)]
macro_rules! bh_loop_2 {
($block: block) => {
bh = bhrange0;
loop {
bh_next = bh.add(1);
$block
bh = bh_next;
if bh >= bhrange1 {
break;
}
}
};
}
}
else {
let mut i = $self.bhidx_start;
#[doc(hidden)]
macro_rules! bh_loop_2 {
($block: block) => {
loop {
$block;
i += 1;
if i >= $self.bhidx_end {
break;
}
}
};
}
}
}
bh_loop_2!({
cfg_if::cfg_if! {
if #[cfg(feature = "unsafe")] {
#[doc(hidden)]
macro_rules! bh_curr {() => { *bh }}
#[doc(hidden)]
macro_rules! bh_next {() => { *bh_next }}
}
else {
#[doc(hidden)]
macro_rules! bh_curr {() => { $self.bh_context[i] }}
#[doc(hidden)]
macro_rules! bh_next {() => { $self.bh_context[i+1] }}
}
}
if unlikely(
bh_curr!().blockhash_index == 0 // grcov-excl-br-line:ARRAY
)
{
// New block size candidate is found.
if $self.bhidx_end > $self.bhidx_end_limit {
// If this is not constrained by bhidx_end_limit
// (set by the fixed input size) and it has reached
// to the largest index, enable "last" FNV hash updates.
// It will be used for block hash 2 if the final block size
// is the maximum valid one.
if $self.bhidx_end_limit == block_size::NUM_VALID - 1 && !$self.is_last {
$self.h_last = bh_curr!().h_full; // grcov-excl-br-line:ARRAY
$self.is_last = true;
}
}
else {
// Reset the block hash context and advance bhidx_end
// so that the generator can begin block hash context updates.
bh_next!().reset(); // grcov-excl-br-line:ARRAY
bh_next!().h_full = bh_curr!().h_full; // grcov-excl-br-line:ARRAY
bh_next!().h_half = bh_curr!().h_half; // grcov-excl-br-line:ARRAY
$self.bhidx_end += 1;
cfg_if::cfg_if! {
if #[cfg(feature = "unsafe")] {
bhrange1 = bhrange1.add(1);
}
}
}
}
cfg_if::cfg_if! {
if #[cfg(feature = "unsafe")] {
#[doc(hidden)]
macro_rules! bh_curr_reused {() => { *bh }}
}
else {
let bh_curr_reused = &mut $self.bh_context[i]; // grcov-excl-br-line:ARRAY
#[doc(hidden)]
macro_rules! bh_curr_reused {() => { bh_curr_reused }}
}
}
invariant!(bh_curr_reused!().blockhash_index < block_hash::FULL_SIZE);
bh_curr_reused!().blockhash[bh_curr_reused!().blockhash_index] = bh_curr_reused!().h_full.value(); // grcov-excl-br-line:ARRAY
bh_curr_reused!().blockhash_ch_half = bh_curr_reused!().h_half.value();
if bh_curr_reused!().blockhash_index < block_hash::FULL_SIZE - 1 {
bh_curr_reused!().blockhash_index += 1;
bh_curr_reused!().h_full = PartialFNVHash::new();
if bh_curr_reused!().blockhash_index < block_hash::HALF_SIZE {
bh_curr_reused!().blockhash_ch_half = BLOCKHASH_CHAR_NIL;
bh_curr_reused!().h_half = PartialFNVHash::new();
}
}
else if $self.bhidx_end - $self.bhidx_start >= 2
&& $self.elim_border < $self.fixed_size.unwrap_or($self.input_size)
&& bh_next!().blockhash_index >= block_hash::HALF_SIZE // grcov-excl-br-line:ARRAY
{
// (Block hash elimination)
// Current block hash context will be never used on the final fuzzy hash.
// Advance bhidx_start and prepare for the next block hash elimination.
$self.bhidx_start += 1;
cfg_if::cfg_if! {
if #[cfg(feature = "unsafe")] {
bhrange0 = bhrange0.add(1);
}
}
$self.roll_mask = $self.roll_mask.wrapping_mul(2).wrapping_add(1);
$self.elim_border = $self.elim_border.wrapping_mul(2);
}
// Loop between bhidx_start and the maximum matched index.
if (h & 1) != 0 {
break;
}
h >>= 1;
});
}
}
};
}
impl Generator {
/// Process data, updating the internal state.
pub fn update(&mut self, buffer: &[u8]) -> &mut Self {
self.input_size =
if let Ok(size) = u64::try_from(buffer.len()) { // grcov-excl-br-line: else branch only in 128-bit usize environments.
self.input_size.saturating_add(size)
}
else {
// grcov-excl-start: Only reproduces in 128-bit usize environments.
Self::MAX_INPUT_SIZE + 1
// grcov-excl-stop
};
// grcov-generator-start
generator_update_template!(self, buffer.iter().cloned(), {});
// grcov-generator-stop
self
}
/// Process data (an iterator), updating the internal state.
pub fn update_by_iter(&mut self, iter: impl Iterator<Item = u8>) -> &mut Self{
// grcov-generator-start
generator_update_template!(self, iter, {
self.input_size = self.input_size.saturating_add(1);
});
// grcov-generator-stop
self
}
/// Process a byte, updating the internal state.
pub fn update_by_byte(&mut self, ch: u8) -> &mut Self {
self.input_size = self.input_size.saturating_add(1);
// grcov-generator-start
generator_update_template!(self, [ch; 1], {});
// grcov-generator-stop
self
}
/// Guess the final block size based on the current internal state.
///
/// First, the generator prefers the return value of
/// [`get_log_block_size_from_input_size()`](Self::get_log_block_size_from_input_size()).
///
/// But if the resulting fuzzy hash is too short, we have to half
/// the block size until it finds a fuzzy hash of suitable length.
/// In other words, it tries to find a block hash until:
///
/// * It find a block size so that corresponding block hash is already
/// at least [`block_hash::HALF_SIZE`] chars in length
/// (one character may be appended on the finalization process) or
/// * It reaches the lower bound ([`bhidx_start`](Self::bhidx_start)).
///
/// The resulting block size and the corresponding block hash are used as:
///
/// 1. Block size part
/// 2. Block hash 1
///
/// For the block hash 2 part, the block hash for double block size is used.
fn guess_output_log_block_size(&self) -> usize {
let mut log_block_size =
Self::get_log_block_size_from_input_size(self.input_size, self.bhidx_start);
log_block_size = usize::min(log_block_size, self.bhidx_end - 1);
optionally_unsafe! {
invariant!(log_block_size < self.bh_context.len());
}
while log_block_size > self.bhidx_start
&& self.bh_context[log_block_size].blockhash_index < block_hash::HALF_SIZE // grcov-excl-br-line:ARRAY
{
log_block_size -= 1;
optionally_unsafe! {
invariant!(log_block_size < self.bh_context.len());
}
}
log_block_size
}
/// Retrieves the resulting fuzzy hash.
///
/// Usually, you should use the [`finalize()`](Self::finalize()) method (a
/// wrapper of this method) instead because it passes the `TRUNC` option
/// [`true`] to this method (as the default ssdeep option).
///
/// Although some methods including this is named *finalize*, you can
/// continue feeding more data and updating the internal state without
/// problems. Still, it's hard to find such use cases so that using
/// [`Generator`] like this is useful.
#[allow(clippy::branches_sharing_code)]
pub fn finalize_raw<const TRUNC: bool, const S1: usize, const S2: usize>(&self)
-> Result<fuzzy_raw_type!(S1, S2), GeneratorError>
where
BlockHashSize<S1>: ConstrainedBlockHashSize,
BlockHashSize<S2>: ConstrainedBlockHashSize,
BlockHashSizes<S1, S2>: ConstrainedBlockHashSizes
{
if let Some(input_size) = self.fixed_size {
if input_size != self.input_size {
return Err(GeneratorError::FixedSizeMismatch);
}
}
if Self::MAX_INPUT_SIZE < self.input_size {
return Err(GeneratorError::InputSizeTooLarge);
}
let log_block_size = self.guess_output_log_block_size();
let mut fuzzy: fuzzy_raw_type!(S1, S2) = FuzzyHashData::new();
fuzzy.log_blocksize = log_block_size as u8;
// Copy block hash 1
let roll_value = self.roll_hash.value();
optionally_unsafe! {
invariant!(log_block_size < self.bh_context.len());
}
let bh_0 = &self.bh_context[log_block_size]; // grcov-excl-br-line:ARRAY
{
let mut sz = bh_0.blockhash_index;
if bh_0.blockhash[block_hash::FULL_SIZE - 1] != BLOCKHASH_CHAR_NIL {
sz += 1;
}
optionally_unsafe! {
invariant!(sz <= fuzzy.blockhash1.len());
invariant!(sz <= bh_0.blockhash.len());
}
fuzzy.blockhash1[0..sz].clone_from_slice(&bh_0.blockhash[0..sz]); // grcov-excl-br-line:ARRAY
fuzzy.len_blockhash1 = sz as u8;
if roll_value != 0 {
if sz == block_hash::FULL_SIZE {
fuzzy.blockhash1[block_hash::FULL_SIZE - 1] = bh_0.h_full.value(); // grcov-excl-br-line:ARRAY
}
else {
optionally_unsafe! {
invariant!(sz < block_hash::FULL_SIZE);
}
fuzzy.blockhash1[sz] = bh_0.h_full.value(); // grcov-excl-br-line:ARRAY
fuzzy.len_blockhash1 += 1;
}
}
}
// Copy block hash 2 or adjust block hashes.
if log_block_size < self.bhidx_end - 1 {
// Copy block hash 2 (normal path)
optionally_unsafe! {
invariant!(log_block_size + 1 < self.bh_context.len());
}
let bh_1 = &self.bh_context[log_block_size + 1]; // grcov-excl-br-line:ARRAY
if TRUNC {
let mut sz = bh_1.blockhash_index;
if bh_1.blockhash_ch_half != BLOCKHASH_CHAR_NIL {
debug_assert!(sz >= block_hash::HALF_SIZE); // invariant
sz = block_hash::HALF_SIZE;
fuzzy.blockhash2[0..(sz - 1)].clone_from_slice(&bh_1.blockhash[0..(sz - 1)]); // grcov-excl-br-line:ARRAY
fuzzy.blockhash2[sz - 1] = if roll_value != 0 { bh_1.h_half.value() } else { bh_1.blockhash_ch_half }; // grcov-excl-br-line:ARRAY
}
else {
optionally_unsafe! {
invariant!(sz <= fuzzy.blockhash2.len());
invariant!(sz <= bh_1.blockhash.len());
}
fuzzy.blockhash2[0..sz].clone_from_slice(&bh_1.blockhash[0..sz]); // grcov-excl-br-line:ARRAY
if roll_value != 0 {
optionally_unsafe! {
invariant!(sz < fuzzy.blockhash2.len());
}
fuzzy.blockhash2[sz] = bh_1.h_half.value(); // grcov-excl-br-line:ARRAY
sz += 1;
}
}
fuzzy.len_blockhash2 = sz as u8;
}
else {
let mut sz = bh_1.blockhash_index;
if bh_1.blockhash[block_hash::FULL_SIZE - 1] != BLOCKHASH_CHAR_NIL {
sz += 1;
}
#[allow(clippy::collapsible_if)]
if !<fuzzy_raw_type!(S1, S2)>::IS_LONG_FORM {
if sz > S2 {
// Overflow will occur if:
// 1. truncation is disabled (!TRUNC),
// 2. the output is short and
// 3. the input (block hash 2) is large enough.
return Err(GeneratorError::OutputOverflow);
}
}
optionally_unsafe! {
invariant!(sz <= fuzzy.blockhash2.len());
invariant!(sz <= bh_1.blockhash.len());
}
fuzzy.blockhash2[0..sz].clone_from_slice(&bh_1.blockhash[0..sz]); // grcov-excl-br-line:ARRAY
fuzzy.len_blockhash2 = sz as u8;
if roll_value != 0 {
#[allow(clippy::collapsible_else_if)]
if !<fuzzy_raw_type!(S1, S2)>::IS_LONG_FORM {
if sz >= S2 {
// Overflow will occur if:
// 1. truncation is disabled (!TRUNC),
// 2. the output is short and
// 3. the input (block hash 2) is large enough.
return Err(GeneratorError::OutputOverflow);
}
optionally_unsafe! {
invariant!(sz < S2);
}
fuzzy.blockhash2[sz] = bh_1.h_full.value(); // grcov-excl-br-line:ARRAY
fuzzy.len_blockhash2 += 1;
}
else {
if sz == block_hash::FULL_SIZE {
fuzzy.blockhash2[block_hash::FULL_SIZE - 1] = bh_1.h_full.value(); // grcov-excl-br-line:ARRAY
}
else {
optionally_unsafe! {
invariant!(sz < block_hash::FULL_SIZE);
}
fuzzy.blockhash2[sz] = bh_1.h_full.value(); // grcov-excl-br-line:ARRAY
fuzzy.len_blockhash2 += 1;
}
}
}
}
}
else if roll_value != 0 {
debug_assert!(log_block_size == 0 || log_block_size == block_size::NUM_VALID - 1);
if log_block_size == 0 {
// No pieces are matched but at least one byte is processed.
fuzzy.blockhash2[0] = bh_0.h_full.value(); // grcov-excl-br-line:ARRAY
fuzzy.len_blockhash2 = 1;
}
else {
// We need to handle block hash 2 for the largest valid block
// size specially because effective block size of the block hash
// 2 is not valid (and no regular pieces are available).
fuzzy.blockhash2[0] = self.h_last.value(); // grcov-excl-br-line:ARRAY
fuzzy.len_blockhash2 = 1;
}
}
else {
// We are not confident enough that we have processed a byte.
// Note: there's an easy way to trigger this condition:
// feed seven zero bytes at the end.
fuzzy.len_blockhash2 = 0;
}
Ok(fuzzy)
}
/// Retrieves the resulting fuzzy hash.
///
/// The type of resulting fuzzy hash ([`RawFuzzyHash`]) is in
/// a raw form (not normalized). This is the default behavior of ssdeep.
///
/// This is equivalent to the libfuzzy's `fuzzy_digest` function
/// without non-default flags.
#[inline]
pub fn finalize(&self) -> Result<RawFuzzyHash, GeneratorError> {
self.finalize_raw::<true, {block_hash::FULL_SIZE}, {block_hash::HALF_SIZE}>()
}
/// Retrieves the resulting fuzzy hash, *not* truncating the second block hash.
///
/// Note that *not* doing the truncation is usually not what you want.
///
/// This is equivalent to the `FUZZY_FLAG_NOTRUNC` flag of libfuzzy's
/// `fuzzy_digest` function.
#[inline]
pub fn finalize_without_truncation(&self) -> Result<LongRawFuzzyHash, GeneratorError> {
self.finalize_raw::<false, {block_hash::FULL_SIZE}, {block_hash::FULL_SIZE}>()
}
}
impl Default for Generator {
fn default() -> Self {
Self::new()
}
}
impl AddAssign<&[u8]> for Generator {
/// Updates the hash value by processing a slice of [`u8`].
#[inline(always)]
fn add_assign(&mut self, buffer: &[u8]) {
self.update(buffer);
}
}
impl<const N: usize> AddAssign<&[u8; N]> for Generator {
/// Updates the hash value by processing an array of [`u8`].
#[inline(always)]
fn add_assign(&mut self, buffer: &[u8; N]) {
self.update(&buffer[..]);
}
}
impl AddAssign<u8> for Generator {
/// Updates the hash value by processing a byte.
#[inline(always)]
fn add_assign(&mut self, byte: u8) {
self.update_by_byte(byte);
}
}
/// Constant assertions related to this module.
#[doc(hidden)]
mod const_asserts {
use super::*;
use static_assertions::{const_assert, const_assert_eq, const_assert_ne};
// Compare with original ssdeep constants
// fuzzy.c: ROLLING_WINDOW
const_assert_eq!(ROLLING_WINDOW, 7);
// fuzzy.c: HASH_INIT
const_assert_eq!(FNV_HASH_INIT, 0x27);
// ssdeep.h: SSDEEP_MIN_FILE_SIZE
// (note that this size is exclusive, unlike inclusive MIN_RECOMMENDED_INPUT_SIZE)
const_assert_eq!(Generator::MIN_RECOMMENDED_INPUT_SIZE - 1, 4096);
// Compare with generated table
const_assert_eq!(block_hash::ALPHABET_SIZE, crate::generate::fnv_table::_ALPHABET_SIZE);
// ALPHABET_SIZE and FNV_HASH_INIT properties (for PartialFNVHash)
const_assert!(0 < block_hash::ALPHABET_SIZE && block_hash::ALPHABET_SIZE <= 256);
const_assert!(block_hash::ALPHABET_SIZE.is_power_of_two());
const_assert!((FNV_HASH_INIT as u16) < (block_hash::ALPHABET_SIZE as u16));
// Consistency between module-local and struct-global one.
const_assert_eq!(ROLLING_WINDOW, RollingHash::WINDOW_SIZE);
// RollingHash::h3 is a rolling component and an input byte
// are vanished after processing more RollingHash::WINDOW_SIZE bytes.
// grcov-excl-br-start
#[cfg(test)]
#[test]
fn rolling_hash_h3_shift_amount() {
assert!(RollingHash::WINDOW_SIZE
.checked_mul(RollingHash::H3_LSHIFT)
.and_then(|x| u32::try_from(x).ok())
.map(|x| x >= u32::BITS)
.unwrap_or(false));
}
// grcov-excl-br-end
// BLOCKHASH_CHAR_NIL must be outside any valid characters.
const_assert!(block_hash::ALPHABET_SIZE <= BLOCKHASH_CHAR_NIL as usize);
// Compare with a precomputed value.
const_assert_eq!(Generator::MAX_INPUT_SIZE, 192u64 * 1024 * 1024 * 1024);
// and not u64::MAX
const_assert_ne!(Generator::MAX_INPUT_SIZE, u64::MAX);
// Rolling hash value of u32::MAX does not make a piece.
// Because we use rolling hash value + 1 to determine piece splitting
// (unlike the original implementation) for faster processing, we have to
// (additionally) take care of an arithmetic overflow.
const_assert_ne!(u32::MAX % block_size::MIN, block_size::MIN - 1);
}