wnfs_unixfs_file/chunker/
rabin.rs

1//! Implements a rabin based chunker, following the implementation from [here](https://github.com/ipfs-shipyard/DAGger/blob/master/internal/dagger/chunker/rabin/impl.go).
2
3use bytes::{Bytes, BytesMut};
4use std::io;
5use tokio::io::{AsyncRead, AsyncReadExt};
6use wnfs_common::utils::{boxed_stream, BoxStream, CondSend};
7
8/// Rabin fingerprinting based chunker.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct Rabin {
11    init_state: u64,
12    mask: u64,
13    mins_sans_preheat: usize,
14    preset: Preset,
15    config: Config,
16}
17
18pub const GO_IPFS_V0_PRESET: Preset = Presets::GoIpfsV0.into_preset();
19
20impl Default for Rabin {
21    fn default() -> Self {
22        Rabin::new(Config::default(), GO_IPFS_V0_PRESET)
23    }
24}
25
26impl Rabin {
27    pub fn new(config: Config, preset: Preset) -> Self {
28        assert!(
29            config.min_size < config.max_size,
30            "Invalid config, max_size must be larger than min_size"
31        );
32
33        // Due to out_table[0] always being 0, this is simply the value 1
34        // but derive it longform nevertheless
35        let init_state = (preset.out_table[0].wrapping_shl(8) | 1)
36            ^ (preset.mod_table[(preset.out_table[0] >> preset.deg_shift) as usize]);
37        debug_assert_eq!(init_state, 1);
38
39        Rabin {
40            init_state,
41            mins_sans_preheat: config.min_size - preset.window_size,
42            mask: 1 << (config.mask_bits - 1),
43            preset,
44            config,
45        }
46    }
47
48    pub fn chunks<'a, R: AsyncRead + Unpin + CondSend + 'a>(
49        self,
50        mut source: R,
51    ) -> BoxStream<'a, io::Result<Bytes>> {
52        boxed_stream(async_stream::stream! {
53            let target_size = 3 * self.config.max_size;
54            let mut buf = BytesMut::with_capacity(target_size);
55            let mut use_entire_buffer = false;
56            let preset = &self.preset;
57
58            while !use_entire_buffer {
59                // Fill buffer
60                let mut total_read = 0;
61                while buf.len() < target_size {
62                    let read = source.read_buf(&mut buf).await?;
63                    total_read += read;
64                    if read == 0 {
65                        break;
66                    }
67                }
68
69                // abort early if we have not received enough data
70                if buf.len() < self.config.min_size {
71                    if !buf.is_empty() {
72                        yield Ok(buf.freeze());
73                    }
74                    break;
75                }
76
77                let post_buf_idx = buf.len();
78
79                // read 0 bytes => the source is exhausted
80                use_entire_buffer = total_read == 0;
81
82                let mut cur_idx = 0;
83
84                loop {
85                    let last_idx = cur_idx;
86                    let mut next_round_max = last_idx + self.config.max_size;
87
88                    // we will be running out of data, but still *could* run a round
89                    if next_round_max > post_buf_idx {
90                        // abort early if we are allowed to
91                        if !use_entire_buffer {
92                            break;
93                        }
94                        // signify where we stop hard
95                        next_round_max = post_buf_idx;
96                    }
97
98                    // in this case we will *NOT* be able to run another round at all
99                    if cur_idx + self.config.min_size >= post_buf_idx {
100                        if use_entire_buffer && post_buf_idx != cur_idx {
101                            let chunk = buf.clone().freeze().slice(cur_idx..post_buf_idx);
102                            yield Ok(chunk);
103                        }
104                        break;
105                    }
106
107                    // reset
108                    let mut state = self.init_state;
109
110                    // preheat
111                    cur_idx += self.mins_sans_preheat;
112                    for i in 1..=preset.window_size {
113                        if i == preset.window_size {
114                            state ^= preset.out_table[1];
115                        } else {
116                            state ^= preset.out_table[0];
117                        }
118                        let table_point = preset.mod_table[(state >> preset.deg_shift) as usize];
119                        state = (state.wrapping_shl(8) | buf[cur_idx] as u64) ^ table_point;
120                        cur_idx += 1;
121                    }
122
123                    // cycle
124                    while cur_idx < next_round_max
125                        && ((state & self.mask) != self.config.target_value)
126                    {
127                        state ^= preset.out_table[
128                            buf[cur_idx - preset.window_size] as usize
129                        ];
130                        let table_point = preset.mod_table[(state >> preset.deg_shift) as usize];
131                        state = (state.wrapping_shl(8) | (buf[cur_idx] as u64)) ^ table_point;
132                        cur_idx += 1;
133                    }
134
135                    // always a find at this point, we bailed on short buffers earlier
136                    let chunk = buf.clone().freeze().slice(last_idx..cur_idx);
137                    yield Ok(chunk);
138                }
139
140                // remove processed data
141                let _ = buf.split_to(cur_idx);
142            }
143        })
144    }
145}
146
147#[derive(Debug, Clone, PartialEq, Eq)]
148pub struct Preset {
149    out_table: [u64; 256],
150    mod_table: [u64; 256],
151    window_size: usize,
152    deg_shift: usize,
153    polynomial: u64,
154}
155
156#[derive(Debug, Clone, PartialEq, Eq)]
157pub struct Config {
158    /// State value denoting a chunk boundary.
159    pub target_value: u64,
160    /// Amount of bits of state to compare to target on every iteration. For a random input the average chunk size is about `2**m`.
161    pub mask_bits: usize,
162    /// Maximum data chunk size.
163    pub max_size: usize,
164    /// Minimum data chunk size.
165    pub min_size: usize,
166}
167
168impl Default for Config {
169    fn default() -> Self {
170        // The default used by IPFS.
171        Config {
172            target_value: 0,
173            mask_bits: 18,
174            max_size: 393_216, // (2**18)+(2**18)/2
175            min_size: 87_381,  // (2**18)/3
176        }
177    }
178}
179
180#[derive(Copy, Clone, Debug)]
181pub enum Presets {
182    GoIpfsV0,
183}
184
185impl Presets {
186    pub const fn into_preset(self) -> Preset {
187        Preset {
188            polynomial: 0x3df305dfb2a805,
189            window_size: 16,
190            deg_shift: 45,
191            out_table: [
192                0x0,
193                0x17fa63217c2ad7,
194                0x1207c39d4afdab,
195                0x5fda0bc36d77c,
196                0x19fc82e5275353,
197                0xe06e1c45b7984,
198                0xbfb41786daef8,
199                0x1c01225911842f,
200                0xe0a0015fc0ea3,
201                0x19f06334802474,
202                0x1c0dc388b6f308,
203                0xbf7a0a9cad9df,
204                0x17f682f0db5df0,
205                0xce1d1a77727,
206                0x5f1416d91a05b,
207                0x120b224ced8a8c,
208                0x1c14002bf81d46,
209                0xbee630a843791,
210                0xe13c3b6b2e0ed,
211                0x19e9a097ceca3a,
212                0x5e882cedf4e15,
213                0x1212e1efa364c2,
214                0x17ef415395b3be,
215                0x152272e99969,
216                0x121e003e0413e5,
217                0x5e4631f783932,
218                0x19c3a34eee4e,
219                0x17e3a08232c499,
220                0xbe282db2340b6,
221                0x1c18e1fa5f6a61,
222                0x19e5414669bd1d,
223                0xe1f22671597ca,
224                0x5db0588429289,
225                0x122166a93eb85e,
226                0x17dcc615086f22,
227                0x26a5347445f5,
228                0x1c27876d65c1da,
229                0xbdde44c19eb0d,
230                0xe2044f02f3c71,
231                0x19da27d15316a6,
232                0xbd1059dbe9c2a,
233                0x1c2b66bcc2b6fd,
234                0x19d6c600f46181,
235                0xe2ca521884b56,
236                0x122d877899cf79,
237                0x5d7e459e5e5ae,
238                0x2a44e5d332d2,
239                0x17d027c4af1805,
240                0x19cf05a3ba8fcf,
241                0xe356682c6a518,
242                0xbc8c63ef07264,
243                0x1c32a51f8c58b3,
244                0x3387469ddc9c,
245                0x17c9e467e1f64b,
246                0x123444dbd72137,
247                0x5ce27faab0be0,
248                0x17c505b646816c,
249                0x3f66973aabbb,
250                0x5c2c62b0c7cc7,
251                0x1238a50a705610,
252                0xe39875361d23f,
253                0x19c3e4721df8e8,
254                0x1c3e44ce2b2f94,
255                0xbc427ef570543,
256                0xbb60b10852512,
257                0x1c4c6831f90fc5,
258                0x19b1c88dcfd8b9,
259                0xe4babacb3f26e,
260                0x124a89f5a27641,
261                0x5b0ead4de5c96,
262                0x4d4a68e88bea,
263                0x17b7294994a13d,
264                0x5bc0b05792bb1,
265                0x12466824050166,
266                0x17bbc89833d61a,
267                0x41abb94ffccd,
268                0x1c4089e05e78e2,
269                0xbbaeac1225235,
270                0xe474a7d148549,
271                0x19bd295c68af9e,
272                0x17a20b3b7d3854,
273                0x58681a011283,
274                0x5a5c8a637c5ff,
275                0x125fab874bef28,
276                0xe5e89de5a6b07,
277                0x19a4eaff2641d0,
278                0x1c594a431096ac,
279                0xba329626cbc7b,
280                0x19a80b2e8136f7,
281                0xe52680ffd1c20,
282                0xbafc8b3cbcb5c,
283                0x1c55ab92b7e18b,
284                0x5489cba665a4,
285                0x17aeeaeada4f73,
286                0x12534a56ec980f,
287                0x5a9297790b2d8,
288                0xe6d0e98c7b79b,
289                0x19976db9bb9d4c,
290                0x1c6acd058d4a30,
291                0xb90ae24f160e7,
292                0x17918c7de0e4c8,
293                0x6bef5c9cce1f,
294                0x5964fe0aa1963,
295                0x126c2cc1d633b4,
296                0x670e8d3bb938,
297                0x179d6dac4793ef,
298                0x1260cd10714493,
299                0x59aae310d6e44,
300                0x199b8c681cea6b,
301                0xe61ef4960c0bc,
302                0xb9c4ff55617c0,
303                0x1c662cd42a3d17,
304                0x12790eb33faadd,
305                0x5836d9243800a,
306                0x7ecd2e755776,
307                0x1784ae0f097da1,
308                0xb858c5618f98e,
309                0x1c7fef7764d359,
310                0x19824fcb520425,
311                0xe782cea2e2ef2,
312                0x1c730ea6c3a47e,
313                0xb896d87bf8ea9,
314                0xe74cd3b8959d5,
315                0x198eae1af57302,
316                0x58f8c43e4f72d,
317                0x1275ef6298ddfa,
318                0x17884fdeae0a86,
319                0x722cffd22051,
320                0x176c16210a4a24,
321                0x9675007660f3,
322                0x56bd5bc40b78f,
323                0x1291b69d3c9d58,
324                0xe9094c42d1977,
325                0x196af7e55133a0,
326                0x1c97575967e4dc,
327                0xb6d34781bce0b,
328                0x19661634f64487,
329                0xe9c75158a6e50,
330                0xb61d5a9bcb92c,
331                0x1c9bb688c093fb,
332                0x9a94d1d117d4,
333                0x1760f7f0ad3d03,
334                0x129d574c9bea7f,
335                0x567346de7c0a8,
336                0xb78160af25762,
337                0x1c82752b8e7db5,
338                0x197fd597b8aac9,
339                0xe85b6b6c4801e,
340                0x128494efd50431,
341                0x57ef7cea92ee6,
342                0x8357729ff99a,
343                0x17793453e3d34d,
344                0x572161f0e59c1,
345                0x1288753e727316,
346                0x1775d58244a46a,
347                0x8fb6a3388ebd,
348                0x1c8e94fa290a92,
349                0xb74f7db552045,
350                0xe89576763f739,
351                0x197334461fddee,
352                0x12b713a948d8ad,
353                0x54d708834f27a,
354                0xb0d034022506,
355                0x174ab3157e0fd1,
356                0xb4b914c6f8bfe,
357                0x1cb1f26d13a129,
358                0x194c52d1257655,
359                0xeb631f0595c82,
360                0x1cbd13bcb4d60e,
361                0xb47709dc8fcd9,
362                0xebad021fe2ba5,
363                0x1940b300820172,
364                0x541915993855d,
365                0x12bbf278efaf8a,
366                0x174652c4d978f6,
367                0xbc31e5a55221,
368                0xea31382b0c5eb,
369                0x195970a3ccef3c,
370                0x1ca4d01ffa3840,
371                0xb5eb33e861297,
372                0x175f91679796b8,
373                0xa5f246ebbc6f,
374                0x55852fadd6b13,
375                0x12a231dba141c4,
376                0xa913974ccb48,
377                0x175370b630e19f,
378                0x12aed00a0636e3,
379                0x554b32b7a1c34,
380                0x195591726b981b,
381                0xeaff25317b2cc,
382                0xb5252ef2165b0,
383                0x1ca831ce5d4f67,
384                0x1cda1d318f6f36,
385                0xb207e10f345e1,
386                0xedddeacc5929d,
387                0x1927bd8db9b84a,
388                0x5269fd4a83c65,
389                0x12dcfcf5d416b2,
390                0x17215c49e2c1ce,
391                0xdb3f689eeb19,
392                0x12d01d24736195,
393                0x52a7e050f4b42,
394                0xd7deb9399c3e,
395                0x172dbd9845b6e9,
396                0xb2c9fc15432c6,
397                0x1cd6fce0281811,
398                0x192b5c5c1ecf6d,
399                0xed13f7d62e5ba,
400                0xce1d1a777270,
401                0x17347e3b0b58a7,
402                0x12c9de873d8fdb,
403                0x533bda641a50c,
404                0x19329fff502123,
405                0xec8fcde2c0bf4,
406                0xb355c621adc88,
407                0x1ccf3f4366f65f,
408                0xec41d0f8b7cd3,
409                0x193e7e2ef75604,
410                0x1cc3de92c18178,
411                0xb39bdb3bdabaf,
412                0x17389feaac2f80,
413                0xc2fccbd00557,
414                0x53f5c77e6d22b,
415                0x12c53f569af8fc,
416                0x190118b9cdfdbf,
417                0xefb7b98b1d768,
418                0xb06db24870014,
419                0x1cfcb805fb2ac3,
420                0xfd9a5ceaaeec,
421                0x1707f97d96843b,
422                0x12fa59c1a05347,
423                0x5003ae0dc7990,
424                0x170b18ac31f31c,
425                0xf17b8d4dd9cb,
426                0x50cdb317b0eb7,
427                0x12f6b810072460,
428                0xef79a4916a04f,
429                0x190df9686a8a98,
430                0x1cf059d45c5de4,
431                0xb0a3af5207733,
432                0x515189235e0f9,
433                0x12ef7bb349ca2e,
434                0x1712db0f7f1d52,
435                0xe8b82e033785,
436                0x1ce99a7712b3aa,
437                0xb13f9566e997d,
438                0xeee59ea584e01,
439                0x19143acb2464d6,
440                0xb1f1887c9ee5a,
441                0x1ce57ba6b5c48d,
442                0x1918db1a8313f1,
443                0xee2b83bff3926,
444                0x12e39a62eebd09,
445                0x519f9439297de,
446                0xe459ffa440a2,
447                0x171e3aded86a75,
448            ],
449            mod_table: [
450                0x0,
451                0x3df305dfb2a805,
452                0x46150e60d7f80f,
453                0x7be60bbf65500a,
454                0x8c2a1cc1aff01e,
455                0xb1d9191e1d581b,
456                0xca3f12a1780811,
457                0xf7cc177ecaa014,
458                0x1185439835fe03c,
459                0x125a73c5ced4839,
460                0x15e4137e3881833,
461                0x163b2323c3ab036,
462                0x1947e2542f01022,
463                0x1a98d209d42b827,
464                0x1d26b2b2227e82d,
465                0x1ef982efd954028,
466                0x20d5b76d90d687d,
467                0x230a87306bfc078,
468                0x24b4e78b9da9072,
469                0x276bd7d66683877,
470                0x281716a18a29863,
471                0x2bc826fc7103066,
472                0x2c764647875606c,
473                0x2fa9761a7c7c869,
474                0x3150f4f5a528841,
475                0x328fc4a85e02044,
476                0x3531a413a85704e,
477                0x36ee944e537d84b,
478                0x39925539bfd785f,
479                0x3a4d656444fd05a,
480                0x3df305dfb2a8050,
481                0x3e2c35824982855,
482                0x41ab6edb21ad0fa,
483                0x42745e86da878ff,
484                0x45ca3e3d2cd28f5,
485                0x46150e60d7f80f0,
486                0x4969cf173b520e4,
487                0x4ab6ff4ac0788e1,
488                0x4d089ff1362d8eb,
489                0x4ed7afaccd070ee,
490                0x502e2d4314530c6,
491                0x53f11d1eef798c3,
492                0x544f7da5192c8c9,
493                0x57904df8e2060cc,
494                0x58ec8c8f0eac0d8,
495                0x5b33bcd2f5868dd,
496                0x5c8ddc6903d38d7,
497                0x5f52ec34f8f90d2,
498                0x617ed9b6b17b887,
499                0x62a1e9eb4a51082,
500                0x651f8950bc04088,
501                0x66c0b90d472e88d,
502                0x69bc787aab84899,
503                0x6a63482750ae09c,
504                0x6ddd289ca6fb096,
505                0x6e0218c15dd1893,
506                0x70fb9a2e84858bb,
507                0x7324aa737faf0be,
508                0x749acac889fa0b4,
509                0x7745fa9572d08b1,
510                0x78393be29e7a8a5,
511                0x7be60bbf65500a0,
512                0x7c586b0493050aa,
513                0x7f875b59682f8af,
514                0x8089edebb8709f1,
515                0x8356ddb6435a1f4,
516                0x84e8bd0db50f1fe,
517                0x87378d504e259fb,
518                0x884b4c27a28f9ef,
519                0x8b947c7a59a51ea,
520                0x8c2a1cc1aff01e0,
521                0x8ff52c9c54da9e5,
522                0x910cae738d8e9cd,
523                0x92d39e2e76a41c8,
524                0x956dfe9580f11c2,
525                0x96b2cec87bdb9c7,
526                0x99ce0fbf97719d3,
527                0x9a113fe26c5b1d6,
528                0x9daf5f599a0e1dc,
529                0x9e706f0461249d9,
530                0xa05c5a8628a618c,
531                0xa3836adbd38c989,
532                0xa43d0a6025d9983,
533                0xa7e23a3ddef3186,
534                0xa89efb4a3259192,
535                0xab41cb17c973997,
536                0xacffabac3f2699d,
537                0xaf209bf1c40c198,
538                0xb1d9191e1d581b0,
539                0xb2062943e6729b5,
540                0xb5b849f810279bf,
541                0xb66779a5eb0d1ba,
542                0xb91bb8d207a71ae,
543                0xbac4888ffc8d9ab,
544                0xbd7ae8340ad89a1,
545                0xbea5d869f1f21a4,
546                0xc122833099dd90b,
547                0xc2fdb36d62f710e,
548                0xc543d3d694a2104,
549                0xc69ce38b6f88901,
550                0xc9e022fc8322915,
551                0xca3f12a17808110,
552                0xcd81721a8e5d11a,
553                0xce5e4247757791f,
554                0xd0a7c0a8ac23937,
555                0xd378f0f55709132,
556                0xd4c6904ea15c138,
557                0xd719a0135a7693d,
558                0xd8656164b6dc929,
559                0xdbba51394df612c,
560                0xdc043182bba3126,
561                0xdfdb01df4089923,
562                0xe1f7345d090b176,
563                0xe2280400f221973,
564                0xe59664bb0474979,
565                0xe64954e6ff5e17c,
566                0xe935959113f4168,
567                0xeaeaa5cce8de96d,
568                0xed54c5771e8b967,
569                0xee8bf52ae5a1162,
570                0xf07277c53cf514a,
571                0xf3ad4798c7df94f,
572                0xf4132723318a945,
573                0xf7cc177ecaa0140,
574                0xf8b0d609260a154,
575                0xfb6fe654dd20951,
576                0xfcd186ef2b7595b,
577                0xff0eb6b2d05f15e,
578                0x10113dbd770e13e2,
579                0x102cceb8a8bcbbe7,
580                0x105728b317d9ebed,
581                0x106adbb6c86b43e8,
582                0x109d17a1b6a1e3fc,
583                0x10a0e4a469134bf9,
584                0x10db02afd6761bf3,
585                0x10e6f1aa09c4b3f6,
586                0x11096984f451f3de,
587                0x11349a812be35bdb,
588                0x114f7c8a94860bd1,
589                0x11728f8f4b34a3d4,
590                0x1185439835fe03c0,
591                0x11b8b09dea4cabc5,
592                0x11c356965529fbcf,
593                0x11fea5938a9b53ca,
594                0x121c66cbae037b9f,
595                0x122195ce71b1d39a,
596                0x125a73c5ced48390,
597                0x126780c011662b95,
598                0x12904cd76fac8b81,
599                0x12adbfd2b01e2384,
600                0x12d659d90f7b738e,
601                0x12ebaadcd0c9db8b,
602                0x130432f22d5c9ba3,
603                0x1339c1f7f2ee33a6,
604                0x134227fc4d8b63ac,
605                0x137fd4f99239cba9,
606                0x138818eeecf36bbd,
607                0x13b5ebeb3341c3b8,
608                0x13ce0de08c2493b2,
609                0x13f3fee553963bb7,
610                0x140b8b50c514c318,
611                0x143678551aa66b1d,
612                0x144d9e5ea5c33b17,
613                0x14706d5b7a719312,
614                0x1487a14c04bb3306,
615                0x14ba5249db099b03,
616                0x14c1b442646ccb09,
617                0x14fc4747bbde630c,
618                0x1513df69464b2324,
619                0x152e2c6c99f98b21,
620                0x1555ca67269cdb2b,
621                0x15683962f92e732e,
622                0x159ff57587e4d33a,
623                0x15a2067058567b3f,
624                0x15d9e07be7332b35,
625                0x15e4137e38818330,
626                0x1606d0261c19ab65,
627                0x163b2323c3ab0360,
628                0x1640c5287cce536a,
629                0x167d362da37cfb6f,
630                0x168afa3addb65b7b,
631                0x16b7093f0204f37e,
632                0x16ccef34bd61a374,
633                0x16f11c3162d30b71,
634                0x171e841f9f464b59,
635                0x1723771a40f4e35c,
636                0x17589111ff91b356,
637                0x1765621420231b53,
638                0x1792ae035ee9bb47,
639                0x17af5d06815b1342,
640                0x17d4bb0d3e3e4348,
641                0x17e94808e18ceb4d,
642                0x1819a363cc891a13,
643                0x18245066133bb216,
644                0x185fb66dac5ee21c,
645                0x1862456873ec4a19,
646                0x1895897f0d26ea0d,
647                0x18a87a7ad2944208,
648                0x18d39c716df11202,
649                0x18ee6f74b243ba07,
650                0x1901f75a4fd6fa2f,
651                0x193c045f9064522a,
652                0x1947e2542f010220,
653                0x197a1151f0b3aa25,
654                0x198ddd468e790a31,
655                0x19b02e4351cba234,
656                0x19cbc848eeaef23e,
657                0x19f63b4d311c5a3b,
658                0x1a14f8151584726e,
659                0x1a290b10ca36da6b,
660                0x1a52ed1b75538a61,
661                0x1a6f1e1eaae12264,
662                0x1a98d209d42b8270,
663                0x1aa5210c0b992a75,
664                0x1adec707b4fc7a7f,
665                0x1ae334026b4ed27a,
666                0x1b0cac2c96db9252,
667                0x1b315f2949693a57,
668                0x1b4ab922f60c6a5d,
669                0x1b774a2729bec258,
670                0x1b8086305774624c,
671                0x1bbd753588c6ca49,
672                0x1bc6933e37a39a43,
673                0x1bfb603be8113246,
674                0x1c03158e7e93cae9,
675                0x1c3ee68ba12162ec,
676                0x1c4500801e4432e6,
677                0x1c78f385c1f69ae3,
678                0x1c8f3f92bf3c3af7,
679                0x1cb2cc97608e92f2,
680                0x1cc92a9cdfebc2f8,
681                0x1cf4d99900596afd,
682                0x1d1b41b7fdcc2ad5,
683                0x1d26b2b2227e82d0,
684                0x1d5d54b99d1bd2da,
685                0x1d60a7bc42a97adf,
686                0x1d976bab3c63dacb,
687                0x1daa98aee3d172ce,
688                0x1dd17ea55cb422c4,
689                0x1dec8da083068ac1,
690                0x1e0e4ef8a79ea294,
691                0x1e33bdfd782c0a91,
692                0x1e485bf6c7495a9b,
693                0x1e75a8f318fbf29e,
694                0x1e8264e46631528a,
695                0x1ebf97e1b983fa8f,
696                0x1ec471ea06e6aa85,
697                0x1ef982efd9540280,
698                0x1f161ac124c142a8,
699                0x1f2be9c4fb73eaad,
700                0x1f500fcf4416baa7,
701                0x1f6dfcca9ba412a2,
702                0x1f9a30dde56eb2b6,
703                0x1fa7c3d83adc1ab3,
704                0x1fdc25d385b94ab9,
705                0x1fe1d6d65a0be2bc,
706            ],
707        }
708    }
709}
710
711#[cfg(test)]
712mod bootstrap {
713    const DEG_TARGET: usize = 53; // Largest prime smaller than (64 - 8)
714
715    // const DEG_SHIFT: usize = DEG_TARGET - 8;
716
717    pub fn generate_lookup_tables(pol: u64, w_size: usize) -> ([u64; 256], [u64; 256]) {
718        // Calculate table for sliding out bytes. The byte to slide out is used as
719        // the index for the table, the value contains the following:
720        // out_table[b] = Hash(b || 0 ||        ...        || 0)
721        //                          \ windowsize-1 zero bytes /
722        // To slide out byte b_0 for window size w with known hash
723        // H := H(b_0 || ... || b_w), it is sufficient to add out_table[b_0]:
724        //    H(b_0 || ... || b_w) + H(b_0 || 0 || ... || 0)
725        //  = H(b_0 + b_0 || b_1 + 0 || ... || b_w + 0)
726        //  = H(    0     || b_1 || ...     || b_w)
727        //
728        // Afterwards a new byte can be shifted in.
729        let mut out_table = [0; 256];
730        let mut mod_table = [0; 256];
731
732        for b in 0u64..256 {
733            let mut h = modulus(b, pol);
734            for _ in 0..w_size - 1 {
735                h = modulus(h << 8, pol);
736            }
737            out_table[b as usize] = h;
738        }
739
740        // Calculate table for reduction mod Polynomial
741        // mod_table[b] = A | B, where A = (b(x) * x^k mod pol) and  B = b(x) * x^k
742        //
743        // The 8 bits above deg(Polynomial) determine what happens next and so
744        // these bits are used as a lookup to this table. The value is split in
745        // two parts: Part A contains the result of the modulus operation, part
746        // B is used to cancel out the 8 top bits so that one XOR operation is
747        // enough to reduce modulo Polynomial
748        for b in 0u64..256 {
749            mod_table[b as usize] = modulus(b << DEG_TARGET, pol) | b << DEG_TARGET;
750        }
751
752        (out_table, mod_table)
753    }
754
755    /// The degree of the polynomial.
756    const fn deg(pol: u64) -> i64 {
757        64 - (pol.leading_zeros() as i64) - 1
758    }
759
760    /// Modulus result of numerator%denominator.
761    /// see https://en.wikipedia.org/wiki/Division_algorithm
762    fn modulus(mut numerator: u64, denominator: u64) -> u64 {
763        assert_ne!(denominator, 0, "division by zero");
764        if numerator == 0 {
765            return 0;
766        }
767
768        let denom_deg = deg(denominator);
769
770        let mut deg_diff = deg(numerator) - denom_deg;
771        while deg_diff >= 0 {
772            numerator ^= denominator << usize::try_from(deg_diff).unwrap();
773            deg_diff = deg(numerator) - denom_deg;
774        }
775
776        numerator
777    }
778}
779
780#[cfg(test)]
781mod tests {
782    use super::*;
783    use futures::TryStreamExt;
784    use proptest::prelude::*;
785    use rand::{rngs::StdRng, SeedableRng};
786
787    #[tokio::test]
788    async fn test_simple_chunks() {
789        let chunker = Rabin::new(Config::default(), GO_IPFS_V0_PRESET);
790
791        println!("1 chunk");
792        let data: Vec<u8> = (0..1024u32).flat_map(|i| i.to_le_bytes()).collect();
793        let bytes = std::io::Cursor::new(&data);
794        let split = chunker.clone().chunks(bytes);
795        let result: Vec<_> = split.try_collect().await.unwrap();
796        assert_eq!(result.len(), 1);
797        assert_eq!(result[0], &data);
798
799        println!(">= 4 chunks");
800        let data: Vec<u8> = (0..393_216u32).flat_map(|i| i.to_le_bytes()).collect();
801        test_rabin_roundtrip_data(data).await;
802    }
803
804    #[tokio::test]
805    async fn test_more_chunks() {
806        let seed = 1;
807        let mut rng = StdRng::seed_from_u64(seed);
808        for size in [
809            0,
810            100,
811            1024,
812            1024 * 512,
813            1024 * 1024,
814            1024 * 1024 * 5,
815            1024 * 1024 * 100,
816        ] {
817            println!("size {size}");
818            let mut data = vec![0u8; size];
819            rng.fill_bytes(&mut data);
820            test_rabin_roundtrip_data(data).await;
821            println!("----");
822        }
823    }
824
825    async fn test_rabin_roundtrip_data(data: Vec<u8>) {
826        let config = Config::default();
827        let chunker = Rabin::new(config.clone(), GO_IPFS_V0_PRESET);
828
829        let mut rng = StdRng::seed_from_u64(0);
830        // split into randomized chunks
831        let mut chunks: Vec<Vec<u8>> = Vec::new();
832        let mut index = 0;
833        while index < data.len() {
834            let split = rng.gen_range(index..=data.len());
835            if split != index {
836                let chunk = data[index..split].to_vec();
837                index = split;
838                chunks.push(chunk);
839            }
840        }
841        println!(
842            "chunks: {:?}",
843            chunks.iter().map(|c| c.len()).collect::<Vec<_>>()
844        );
845        let stream = futures::stream::iter(chunks.iter().map(|s| io::Result::Ok(&s[..])));
846        let reader = tokio_util::io::StreamReader::new(stream);
847        let split = chunker.chunks(reader);
848        let chunks = split.try_collect::<Vec<_>>().await.unwrap();
849
850        if data.is_empty() {
851            assert!(chunks.is_empty());
852        } else if data.len() < config.min_size {
853            assert_eq!(chunks.len(), 1);
854        }
855
856        assert_eq!(chunks.iter().map(|v| v.len()).sum::<usize>(), data.len());
857        let mut together = Vec::with_capacity(data.len());
858        for (i, chunk) in chunks.iter().enumerate() {
859            assert!(chunk.len() <= config.max_size);
860            if data.len() >= config.min_size && i < chunks.len() - 1 {
861                assert!(chunk.len() >= config.min_size)
862            }
863            together.extend_from_slice(chunk);
864        }
865
866        assert_eq!(data, together);
867    }
868
869    fn arb_random_slice(max: usize) -> impl Strategy<Value = Vec<u8>> {
870        (0usize..max, any::<u64>()).prop_map(|(len, seed)| {
871            let mut rng = StdRng::seed_from_u64(seed);
872            let mut data = vec![0u8; len];
873            rng.fill_bytes(&mut data);
874            data
875        })
876    }
877
878    proptest! {
879        #![proptest_config(ProptestConfig {
880            fork: true,
881            .. ProptestConfig::default()
882        })]
883
884        #[ignore]
885        #[test]
886        fn test_rabin_roundtrip_small(data in arb_random_slice(1024 * 1024 * 2)) {
887            let rt = tokio::runtime::Builder::new_current_thread()
888                .build()
889                .unwrap();
890            rt.block_on(async move { test_rabin_roundtrip_data(data).await });
891        }
892
893        #[ignore]
894        #[test]
895        fn test_rabin_roundtrip_large(data in arb_random_slice(1024 * 1024 * 20)) {
896            let rt = tokio::runtime::Builder::new_current_thread()
897                .build()
898                .unwrap();
899            rt.block_on(async move { test_rabin_roundtrip_data(data).await });
900        }
901    }
902
903    #[test]
904    fn test_go_preset() {
905        let preset = GO_IPFS_V0_PRESET;
906
907        let (out_table, mod_table) =
908            bootstrap::generate_lookup_tables(preset.polynomial, preset.window_size);
909        assert_eq!(out_table, preset.out_table);
910        assert_eq!(mod_table, preset.mod_table);
911    }
912}