1use crate::core::dictionary::Dictionary;
2use crate::encoders::encoding::DecodeError;
3use crate::compression::CompressionAlgorithm;
4use crate::hashing::HashAlgorithm;
5use std::io::{Read, Write};
6
7const CHUNK_SIZE: usize = 4096; pub struct StreamingEncoder<'a, W: Write> {
15 dictionary: &'a Dictionary,
16 writer: W,
17 compress_algo: Option<CompressionAlgorithm>,
18 compress_level: u32,
19 hash_algo: Option<HashAlgorithm>,
20 xxhash_config: crate::hashing::XxHashConfig,
21}
22
23impl<'a, W: Write> StreamingEncoder<'a, W> {
24 pub fn new(dictionary: &'a Dictionary, writer: W) -> Self {
31 StreamingEncoder {
32 dictionary,
33 writer,
34 compress_algo: None,
35 compress_level: 6,
36 hash_algo: None,
37 xxhash_config: crate::hashing::XxHashConfig::default(),
38 }
39 }
40
41 pub fn with_compression(mut self, algo: CompressionAlgorithm, level: u32) -> Self {
43 self.compress_algo = Some(algo);
44 self.compress_level = level;
45 self
46 }
47
48 pub fn with_hashing(mut self, algo: HashAlgorithm) -> Self {
50 self.hash_algo = Some(algo);
51 self
52 }
53
54 pub fn with_xxhash_config(mut self, config: crate::hashing::XxHashConfig) -> Self {
56 self.xxhash_config = config;
57 self
58 }
59
60 pub fn encode<R: Read>(&mut self, reader: &mut R) -> std::io::Result<Option<Vec<u8>>> {
68 if let Some(algo) = self.compress_algo {
70 return self.encode_with_compression(reader, algo);
71 }
72
73 let hash = match self.dictionary.mode() {
75 crate::core::config::EncodingMode::Chunked => {
76 self.encode_chunked(reader)?
77 }
78 crate::core::config::EncodingMode::ByteRange => {
79 self.encode_byte_range(reader)?
80 }
81 crate::core::config::EncodingMode::BaseConversion => {
82 let mut buffer = Vec::new();
84 reader.read_to_end(&mut buffer)?;
85
86 let hash = self.hash_algo.map(|algo| crate::hashing::hash(&buffer, algo));
87
88 let encoded = crate::encoders::encoding::encode(&buffer, self.dictionary);
89 self.writer.write_all(encoded.as_bytes())?;
90 hash
91 }
92 };
93
94 Ok(hash)
95 }
96
97 fn encode_with_compression<R: Read>(&mut self, reader: &mut R, algo: CompressionAlgorithm) -> std::io::Result<Option<Vec<u8>>> {
99 use std::io::Cursor;
100
101 let mut compressed_data = Vec::new();
103 let hash = self.compress_stream(reader, &mut compressed_data, algo)?;
104
105 let mut cursor = Cursor::new(compressed_data);
107 match self.dictionary.mode() {
108 crate::core::config::EncodingMode::Chunked => {
109 self.encode_chunked_no_hash(&mut cursor)?;
110 }
111 crate::core::config::EncodingMode::ByteRange => {
112 self.encode_byte_range_no_hash(&mut cursor)?;
113 }
114 crate::core::config::EncodingMode::BaseConversion => {
115 let buffer = cursor.into_inner();
116 let encoded = crate::encoders::encoding::encode(&buffer, self.dictionary);
117 self.writer.write_all(encoded.as_bytes())?;
118 }
119 }
120
121 Ok(hash)
122 }
123
124 fn compress_stream<R: Read>(&mut self, reader: &mut R, output: &mut Vec<u8>, algo: CompressionAlgorithm) -> std::io::Result<Option<Vec<u8>>> {
126 use flate2::write::GzEncoder;
127 use xz2::write::XzEncoder;
128
129 let hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
130
131 match algo {
132 CompressionAlgorithm::Gzip => {
133 let mut encoder = GzEncoder::new(output, flate2::Compression::new(self.compress_level));
134 let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
135 encoder.finish()?;
136 Ok(hash)
137 }
138 CompressionAlgorithm::Zstd => {
139 let mut encoder = zstd::stream::write::Encoder::new(output, self.compress_level as i32)
140 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
141 let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
142 encoder.finish()?;
143 Ok(hash)
144 }
145 CompressionAlgorithm::Brotli => {
146 let mut encoder = brotli::CompressorWriter::new(output, 4096, self.compress_level, 22);
147 let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
148 Ok(hash)
149 }
150 CompressionAlgorithm::Lzma => {
151 let mut encoder = XzEncoder::new(output, self.compress_level);
152 let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
153 encoder.finish()?;
154 Ok(hash)
155 }
156 CompressionAlgorithm::Lz4 | CompressionAlgorithm::Snappy => {
157 let mut buffer = Vec::new();
160 reader.read_to_end(&mut buffer)?;
161
162 let hash = self.hash_algo.map(|algo| crate::hashing::hash(&buffer, algo));
163
164 let compressed = match algo {
165 CompressionAlgorithm::Lz4 => {
166 lz4::block::compress(&buffer, None, false)
167 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
168 }
169 CompressionAlgorithm::Snappy => {
170 let mut encoder = snap::raw::Encoder::new();
171 encoder.compress_vec(&buffer)
172 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
173 }
174 _ => unreachable!()
175 };
176 output.extend_from_slice(&compressed);
177 Ok(hash)
178 }
179 }
180 }
181
182 fn copy_with_hash<R: Read>(reader: &mut R, writer: &mut impl Write, mut hasher: Option<HasherWriter>) -> std::io::Result<Option<Vec<u8>>> {
183 let mut buffer = vec![0u8; CHUNK_SIZE];
184
185 loop {
186 let bytes_read = reader.read(&mut buffer)?;
187 if bytes_read == 0 {
188 break;
189 }
190
191 let chunk = &buffer[..bytes_read];
192 if let Some(ref mut h) = hasher {
193 h.update(chunk);
194 }
195 writer.write_all(chunk)?;
196 }
197
198 Ok(hasher.map(|h| h.finalize()))
199 }
200
201 fn encode_chunked<R: Read>(&mut self, reader: &mut R) -> std::io::Result<Option<Vec<u8>>> {
202 let base = self.dictionary.base();
203 let bits_per_char = (base as f64).log2() as usize;
204 let bytes_per_group = bits_per_char;
205
206 let aligned_chunk_size = (CHUNK_SIZE / bytes_per_group) * bytes_per_group;
208 let mut buffer = vec![0u8; aligned_chunk_size];
209
210 let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
211
212 loop {
213 let bytes_read = reader.read(&mut buffer)?;
214 if bytes_read == 0 {
215 break;
216 }
217
218 let chunk = &buffer[..bytes_read];
219 if let Some(ref mut h) = hasher {
220 h.update(chunk);
221 }
222
223 let encoded = crate::encoders::chunked::encode_chunked(chunk, self.dictionary);
224 self.writer.write_all(encoded.as_bytes())?;
225 }
226
227 Ok(hasher.map(|h| h.finalize()))
228 }
229
230 fn encode_chunked_no_hash<R: Read>(&mut self, reader: &mut R) -> std::io::Result<()> {
231 let base = self.dictionary.base();
232 let bits_per_char = (base as f64).log2() as usize;
233 let bytes_per_group = bits_per_char;
234
235 let aligned_chunk_size = (CHUNK_SIZE / bytes_per_group) * bytes_per_group;
236 let mut buffer = vec![0u8; aligned_chunk_size];
237
238 loop {
239 let bytes_read = reader.read(&mut buffer)?;
240 if bytes_read == 0 {
241 break;
242 }
243
244 let encoded = crate::encoders::chunked::encode_chunked(&buffer[..bytes_read], self.dictionary);
245 self.writer.write_all(encoded.as_bytes())?;
246 }
247
248 Ok(())
249 }
250
251 fn encode_byte_range<R: Read>(&mut self, reader: &mut R) -> std::io::Result<Option<Vec<u8>>> {
252 let mut buffer = vec![0u8; CHUNK_SIZE];
253 let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
254
255 loop {
256 let bytes_read = reader.read(&mut buffer)?;
257 if bytes_read == 0 {
258 break;
259 }
260
261 let chunk = &buffer[..bytes_read];
262 if let Some(ref mut h) = hasher {
263 h.update(chunk);
264 }
265
266 let encoded = crate::encoders::byte_range::encode_byte_range(chunk, self.dictionary);
267 self.writer.write_all(encoded.as_bytes())?;
268 }
269
270 Ok(hasher.map(|h| h.finalize()))
271 }
272
273 fn encode_byte_range_no_hash<R: Read>(&mut self, reader: &mut R) -> std::io::Result<()> {
274 let mut buffer = vec![0u8; CHUNK_SIZE];
275
276 loop {
277 let bytes_read = reader.read(&mut buffer)?;
278 if bytes_read == 0 {
279 break;
280 }
281
282 let encoded = crate::encoders::byte_range::encode_byte_range(&buffer[..bytes_read], self.dictionary);
283 self.writer.write_all(encoded.as_bytes())?;
284 }
285
286 Ok(())
287 }
288}
289
290pub struct StreamingDecoder<'a, W: Write> {
296 dictionary: &'a Dictionary,
297 writer: W,
298 decompress_algo: Option<CompressionAlgorithm>,
299 hash_algo: Option<HashAlgorithm>,
300 xxhash_config: crate::hashing::XxHashConfig,
301}
302
303impl<'a, W: Write> StreamingDecoder<'a, W> {
304 pub fn new(dictionary: &'a Dictionary, writer: W) -> Self {
311 StreamingDecoder {
312 dictionary,
313 writer,
314 decompress_algo: None,
315 hash_algo: None,
316 xxhash_config: crate::hashing::XxHashConfig::default(),
317 }
318 }
319
320 pub fn with_decompression(mut self, algo: CompressionAlgorithm) -> Self {
322 self.decompress_algo = Some(algo);
323 self
324 }
325
326 pub fn with_hashing(mut self, algo: HashAlgorithm) -> Self {
328 self.hash_algo = Some(algo);
329 self
330 }
331
332 pub fn with_xxhash_config(mut self, config: crate::hashing::XxHashConfig) -> Self {
334 self.xxhash_config = config;
335 self
336 }
337
338 pub fn decode<R: Read>(&mut self, reader: &mut R) -> Result<Option<Vec<u8>>, DecodeError> {
346 if let Some(algo) = self.decompress_algo {
348 return self.decode_with_decompression(reader, algo);
349 }
350
351 match self.dictionary.mode() {
353 crate::core::config::EncodingMode::Chunked => {
354 self.decode_chunked(reader)
355 }
356 crate::core::config::EncodingMode::ByteRange => {
357 self.decode_byte_range(reader)
358 }
359 crate::core::config::EncodingMode::BaseConversion => {
360 let mut buffer = String::new();
362 reader.read_to_string(&mut buffer)
363 .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
364 let decoded = crate::encoders::encoding::decode(&buffer, self.dictionary)?;
365
366 let hash = self.hash_algo.map(|algo| crate::hashing::hash(&decoded, algo));
367
368 self.writer.write_all(&decoded)
369 .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
370 Ok(hash)
371 }
372 }
373 }
374
375 fn decode_with_decompression<R: Read>(&mut self, reader: &mut R, algo: CompressionAlgorithm) -> Result<Option<Vec<u8>>, DecodeError> {
377 use std::io::Cursor;
378
379 let mut compressed_data = Vec::new();
381 {
382 let mut temp_decoder = StreamingDecoder::new(self.dictionary, &mut compressed_data);
383 temp_decoder.decode(reader)?;
384 }
385
386 let mut cursor = Cursor::new(compressed_data);
388 let hash = self.decompress_stream(&mut cursor, algo)
389 .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
390
391 Ok(hash)
392 }
393
394 fn decompress_stream<R: Read>(&mut self, reader: &mut R, algo: CompressionAlgorithm) -> std::io::Result<Option<Vec<u8>>> {
396 use flate2::read::GzDecoder;
397 use xz2::read::XzDecoder;
398
399 let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
400
401 match algo {
402 CompressionAlgorithm::Gzip => {
403 let mut decoder = GzDecoder::new(reader);
404 Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
405 }
406 CompressionAlgorithm::Zstd => {
407 let mut decoder = zstd::stream::read::Decoder::new(reader)
408 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
409 Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
410 }
411 CompressionAlgorithm::Brotli => {
412 let mut decoder = brotli::Decompressor::new(reader, 4096);
413 Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
414 }
415 CompressionAlgorithm::Lzma => {
416 let mut decoder = XzDecoder::new(reader);
417 Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
418 }
419 CompressionAlgorithm::Lz4 | CompressionAlgorithm::Snappy => {
420 let mut compressed = Vec::new();
422 reader.read_to_end(&mut compressed)?;
423
424 let decompressed = match algo {
425 CompressionAlgorithm::Lz4 => {
426 lz4::block::decompress(&compressed, Some(100 * 1024 * 1024))
427 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
428 }
429 CompressionAlgorithm::Snappy => {
430 let mut decoder = snap::raw::Decoder::new();
431 decoder.decompress_vec(&compressed)
432 .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
433 }
434 _ => unreachable!()
435 };
436
437 let hash = self.hash_algo.map(|algo| crate::hashing::hash(&decompressed, algo));
438 self.writer.write_all(&decompressed)?;
439 return Ok(hash);
440 }
441 }
442
443 Ok(hasher.map(|h| h.finalize()))
444 }
445
446 fn copy_with_hash_to_writer<R: Read>(reader: &mut R, writer: &mut W, hasher: &mut Option<HasherWriter>) -> std::io::Result<()> {
447 let mut buffer = vec![0u8; CHUNK_SIZE];
448
449 loop {
450 let bytes_read = reader.read(&mut buffer)?;
451 if bytes_read == 0 {
452 break;
453 }
454
455 let chunk = &buffer[..bytes_read];
456 if let Some(ref mut h) = hasher {
457 h.update(chunk);
458 }
459 writer.write_all(chunk)?;
460 }
461
462 Ok(())
463 }
464
465 fn decode_chunked<R: Read>(&mut self, reader: &mut R) -> Result<Option<Vec<u8>>, DecodeError> {
466 let base = self.dictionary.base();
467 let bits_per_char = (base as f64).log2() as usize;
468 let chars_per_group = 8 / bits_per_char;
469
470 let mut text_buffer = String::new();
472 let mut char_buffer = vec![0u8; CHUNK_SIZE];
473 let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
474
475 loop {
476 let bytes_read = reader.read(&mut char_buffer)
477 .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
478 if bytes_read == 0 {
479 break;
480 }
481
482 let chunk_str = std::str::from_utf8(&char_buffer[..bytes_read])
483 .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
484 text_buffer.push_str(chunk_str);
485
486 let chars: Vec<char> = text_buffer.chars().collect();
488 let complete_groups = (chars.len() / chars_per_group) * chars_per_group;
489
490 if complete_groups > 0 {
491 let to_decode: String = chars[..complete_groups].iter().collect();
492 let decoded = crate::encoders::chunked::decode_chunked(&to_decode, self.dictionary)?;
493
494 if let Some(ref mut h) = hasher {
495 h.update(&decoded);
496 }
497
498 self.writer.write_all(&decoded)
499 .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
500
501 text_buffer = chars[complete_groups..].iter().collect();
503 }
504 }
505
506 if !text_buffer.is_empty() {
508 let decoded = crate::encoders::chunked::decode_chunked(&text_buffer, self.dictionary)?;
509
510 if let Some(ref mut h) = hasher {
511 h.update(&decoded);
512 }
513
514 self.writer.write_all(&decoded)
515 .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
516 }
517
518 Ok(hasher.map(|h| h.finalize()))
519 }
520
521 fn decode_byte_range<R: Read>(&mut self, reader: &mut R) -> Result<Option<Vec<u8>>, DecodeError> {
522 let mut char_buffer = vec![0u8; CHUNK_SIZE];
523 let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
524
525 loop {
526 let bytes_read = reader.read(&mut char_buffer)
527 .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
528 if bytes_read == 0 {
529 break;
530 }
531
532 let chunk_str = std::str::from_utf8(&char_buffer[..bytes_read])
533 .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
534
535 let decoded = crate::encoders::byte_range::decode_byte_range(chunk_str, self.dictionary)?;
536
537 if let Some(ref mut h) = hasher {
538 h.update(&decoded);
539 }
540
541 self.writer.write_all(&decoded)
542 .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
543 }
544
545 Ok(hasher.map(|h| h.finalize()))
546 }
547}
548
549enum HasherWriter {
551 Md5(md5::Md5),
552 Sha224(sha2::Sha224),
553 Sha256(sha2::Sha256),
554 Sha384(sha2::Sha384),
555 Sha512(sha2::Sha512),
556 Sha3_224(sha3::Sha3_224),
557 Sha3_256(sha3::Sha3_256),
558 Sha3_384(sha3::Sha3_384),
559 Sha3_512(sha3::Sha3_512),
560 Keccak224(sha3::Keccak224),
561 Keccak256(sha3::Keccak256),
562 Keccak384(sha3::Keccak384),
563 Keccak512(sha3::Keccak512),
564 Blake2b(blake2::Blake2b512),
565 Blake2s(blake2::Blake2s256),
566 Blake3(blake3::Hasher),
567 Crc16(Box<crc::Digest<'static, u16>>),
568 Crc32(Box<crc::Digest<'static, u32>>),
569 Crc32c(Box<crc::Digest<'static, u32>>),
570 Crc64(Box<crc::Digest<'static, u64>>),
571 XxHash32(twox_hash::XxHash32),
572 XxHash64(twox_hash::XxHash64),
573 XxHash3_64(twox_hash::xxhash3_64::Hasher),
574 XxHash3_128(twox_hash::xxhash3_128::Hasher),
575}
576
577impl HasherWriter {
578 fn update(&mut self, data: &[u8]) {
579 use sha2::Digest;
580 use std::hash::Hasher;
581
582 match self {
583 HasherWriter::Md5(h) => { h.update(data); }
584 HasherWriter::Sha224(h) => { h.update(data); }
585 HasherWriter::Sha256(h) => { h.update(data); }
586 HasherWriter::Sha384(h) => { h.update(data); }
587 HasherWriter::Sha512(h) => { h.update(data); }
588 HasherWriter::Sha3_224(h) => { h.update(data); }
589 HasherWriter::Sha3_256(h) => { h.update(data); }
590 HasherWriter::Sha3_384(h) => { h.update(data); }
591 HasherWriter::Sha3_512(h) => { h.update(data); }
592 HasherWriter::Keccak224(h) => { h.update(data); }
593 HasherWriter::Keccak256(h) => { h.update(data); }
594 HasherWriter::Keccak384(h) => { h.update(data); }
595 HasherWriter::Keccak512(h) => { h.update(data); }
596 HasherWriter::Blake2b(h) => { h.update(data); }
597 HasherWriter::Blake2s(h) => { h.update(data); }
598 HasherWriter::Blake3(h) => { h.update(data); }
599 HasherWriter::Crc16(digest) => { digest.update(data); }
600 HasherWriter::Crc32(digest) => { digest.update(data); }
601 HasherWriter::Crc32c(digest) => { digest.update(data); }
602 HasherWriter::Crc64(digest) => { digest.update(data); }
603 HasherWriter::XxHash32(h) => { h.write(data); }
604 HasherWriter::XxHash64(h) => { h.write(data); }
605 HasherWriter::XxHash3_64(h) => { h.write(data); }
606 HasherWriter::XxHash3_128(h) => { h.write(data); }
607 }
608 }
609
610 fn finalize(self) -> Vec<u8> {
611 use sha2::Digest;
612 use std::hash::Hasher;
613
614 match self {
615 HasherWriter::Md5(h) => h.finalize().to_vec(),
616 HasherWriter::Sha224(h) => h.finalize().to_vec(),
617 HasherWriter::Sha256(h) => h.finalize().to_vec(),
618 HasherWriter::Sha384(h) => h.finalize().to_vec(),
619 HasherWriter::Sha512(h) => h.finalize().to_vec(),
620 HasherWriter::Sha3_224(h) => h.finalize().to_vec(),
621 HasherWriter::Sha3_256(h) => h.finalize().to_vec(),
622 HasherWriter::Sha3_384(h) => h.finalize().to_vec(),
623 HasherWriter::Sha3_512(h) => h.finalize().to_vec(),
624 HasherWriter::Keccak224(h) => h.finalize().to_vec(),
625 HasherWriter::Keccak256(h) => h.finalize().to_vec(),
626 HasherWriter::Keccak384(h) => h.finalize().to_vec(),
627 HasherWriter::Keccak512(h) => h.finalize().to_vec(),
628 HasherWriter::Blake2b(h) => h.finalize().to_vec(),
629 HasherWriter::Blake2s(h) => h.finalize().to_vec(),
630 HasherWriter::Blake3(h) => h.finalize().as_bytes().to_vec(),
631 HasherWriter::Crc16(digest) => digest.finalize().to_be_bytes().to_vec(),
632 HasherWriter::Crc32(digest) => digest.finalize().to_be_bytes().to_vec(),
633 HasherWriter::Crc32c(digest) => digest.finalize().to_be_bytes().to_vec(),
634 HasherWriter::Crc64(digest) => digest.finalize().to_be_bytes().to_vec(),
635 HasherWriter::XxHash32(h) => (h.finish() as u32).to_be_bytes().to_vec(),
636 HasherWriter::XxHash64(h) => h.finish().to_be_bytes().to_vec(),
637 HasherWriter::XxHash3_64(h) => h.finish().to_be_bytes().to_vec(),
638 HasherWriter::XxHash3_128(h) => {
639 let hash = h.finish_128();
640 let mut result = Vec::with_capacity(16);
641 result.extend_from_slice(&hash.to_be_bytes());
642 result
643 },
644 }
645 }
646}
647
648fn create_hasher_writer(algo: HashAlgorithm, config: &crate::hashing::XxHashConfig) -> HasherWriter {
649 use sha2::Digest;
650
651 match algo {
652 HashAlgorithm::Md5 => HasherWriter::Md5(md5::Md5::new()),
653 HashAlgorithm::Sha224 => HasherWriter::Sha224(sha2::Sha224::new()),
654 HashAlgorithm::Sha256 => HasherWriter::Sha256(sha2::Sha256::new()),
655 HashAlgorithm::Sha384 => HasherWriter::Sha384(sha2::Sha384::new()),
656 HashAlgorithm::Sha512 => HasherWriter::Sha512(sha2::Sha512::new()),
657 HashAlgorithm::Sha3_224 => HasherWriter::Sha3_224(sha3::Sha3_224::new()),
658 HashAlgorithm::Sha3_256 => HasherWriter::Sha3_256(sha3::Sha3_256::new()),
659 HashAlgorithm::Sha3_384 => HasherWriter::Sha3_384(sha3::Sha3_384::new()),
660 HashAlgorithm::Sha3_512 => HasherWriter::Sha3_512(sha3::Sha3_512::new()),
661 HashAlgorithm::Keccak224 => HasherWriter::Keccak224(sha3::Keccak224::new()),
662 HashAlgorithm::Keccak256 => HasherWriter::Keccak256(sha3::Keccak256::new()),
663 HashAlgorithm::Keccak384 => HasherWriter::Keccak384(sha3::Keccak384::new()),
664 HashAlgorithm::Keccak512 => HasherWriter::Keccak512(sha3::Keccak512::new()),
665 HashAlgorithm::Blake2b => HasherWriter::Blake2b(blake2::Blake2b512::new()),
666 HashAlgorithm::Blake2s => HasherWriter::Blake2s(blake2::Blake2s256::new()),
667 HashAlgorithm::Blake3 => HasherWriter::Blake3(blake3::Hasher::new()),
668 HashAlgorithm::Crc16 => {
669 static CRC: crc::Crc<u16> = crc::Crc::<u16>::new(&crc::CRC_16_IBM_SDLC);
670 HasherWriter::Crc16(Box::new(CRC.digest()))
671 }
672 HashAlgorithm::Crc32 => {
673 static CRC: crc::Crc<u32> = crc::Crc::<u32>::new(&crc::CRC_32_ISO_HDLC);
674 HasherWriter::Crc32(Box::new(CRC.digest()))
675 }
676 HashAlgorithm::Crc32c => {
677 static CRC: crc::Crc<u32> = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);
678 HasherWriter::Crc32c(Box::new(CRC.digest()))
679 }
680 HashAlgorithm::Crc64 => {
681 static CRC: crc::Crc<u64> = crc::Crc::<u64>::new(&crc::CRC_64_ECMA_182);
682 HasherWriter::Crc64(Box::new(CRC.digest()))
683 }
684 HashAlgorithm::XxHash32 => HasherWriter::XxHash32(twox_hash::XxHash32::with_seed(config.seed as u32)),
685 HashAlgorithm::XxHash64 => HasherWriter::XxHash64(twox_hash::XxHash64::with_seed(config.seed)),
686 HashAlgorithm::XxHash3_64 => {
687 if let Some(ref secret) = config.secret {
688 HasherWriter::XxHash3_64(
689 twox_hash::xxhash3_64::Hasher::with_seed_and_secret(config.seed, secret.as_slice())
690 .expect("XXH3 secret validation should have been done in XxHashConfig::with_secret")
691 )
692 } else {
693 HasherWriter::XxHash3_64(twox_hash::xxhash3_64::Hasher::with_seed(config.seed))
694 }
695 }
696 HashAlgorithm::XxHash3_128 => {
697 if let Some(ref secret) = config.secret {
698 HasherWriter::XxHash3_128(
699 twox_hash::xxhash3_128::Hasher::with_seed_and_secret(config.seed, secret.as_slice())
700 .expect("XXH3 secret validation should have been done in XxHashConfig::with_secret")
701 )
702 } else {
703 HasherWriter::XxHash3_128(twox_hash::xxhash3_128::Hasher::with_seed(config.seed))
704 }
705 }
706 }
707}
708
709#[cfg(test)]
710mod tests {
711 use super::*;
712 use crate::{DictionariesConfig, Dictionary};
713 use std::io::Cursor;
714
715 fn get_dictionary(name: &str) -> Dictionary {
716 let config = DictionariesConfig::load_default().unwrap();
717 let alphabet_config = config.get_dictionary(name).unwrap();
718
719 match alphabet_config.mode {
720 crate::core::config::EncodingMode::ByteRange => {
721 let start = alphabet_config.start_codepoint.unwrap();
722 Dictionary::new_with_mode_and_range(Vec::new(), alphabet_config.mode.clone(), None, Some(start)).unwrap()
723 }
724 _ => {
725 let chars: Vec<char> = alphabet_config.chars.chars().collect();
726 let padding = alphabet_config.padding.as_ref().and_then(|s| s.chars().next());
727 Dictionary::new_with_mode(chars, alphabet_config.mode.clone(), padding).unwrap()
728 }
729 }
730 }
731
732 #[test]
733 fn test_streaming_encode_decode_base64() {
734 let dictionary = get_dictionary("base64");
735 let data = b"Hello, World! This is a streaming test with multiple chunks of data.";
736
737 let mut encoded_output = Vec::new();
739 {
740 let mut encoder = StreamingEncoder::new(&dictionary, &mut encoded_output);
741 let mut reader = Cursor::new(data);
742 encoder.encode(&mut reader).unwrap();
743 }
744
745 let mut decoded_output = Vec::new();
747 {
748 let mut decoder = StreamingDecoder::new(&dictionary, &mut decoded_output);
749 let mut reader = Cursor::new(&encoded_output);
750 decoder.decode(&mut reader).unwrap();
751 }
752
753 assert_eq!(data, &decoded_output[..]);
754 }
755
756 #[test]
757 fn test_streaming_encode_decode_base100() {
758 let dictionary = get_dictionary("base100");
759 let data = b"Test data for byte range streaming";
760
761 let mut encoded_output = Vec::new();
763 {
764 let mut encoder = StreamingEncoder::new(&dictionary, &mut encoded_output);
765 let mut reader = Cursor::new(data);
766 encoder.encode(&mut reader).unwrap();
767 }
768
769 let mut decoded_output = Vec::new();
771 {
772 let mut decoder = StreamingDecoder::new(&dictionary, &mut decoded_output);
773 let mut reader = Cursor::new(&encoded_output);
774 decoder.decode(&mut reader).unwrap();
775 }
776
777 assert_eq!(data, &decoded_output[..]);
778 }
779
780 #[test]
781 fn test_streaming_large_data() {
782 let dictionary = get_dictionary("base64");
783 let data: Vec<u8> = (0..100000).map(|i| (i % 256) as u8).collect();
785
786 let mut encoded_output = Vec::new();
788 {
789 let mut encoder = StreamingEncoder::new(&dictionary, &mut encoded_output);
790 let mut reader = Cursor::new(&data);
791 encoder.encode(&mut reader).unwrap();
792 }
793
794 let mut decoded_output = Vec::new();
796 {
797 let mut decoder = StreamingDecoder::new(&dictionary, &mut decoded_output);
798 let mut reader = Cursor::new(&encoded_output);
799 decoder.decode(&mut reader).unwrap();
800 }
801
802 assert_eq!(data, decoded_output);
803 }
804}