base_d/encoders/
streaming.rs

1use crate::core::dictionary::Dictionary;
2use crate::encoders::encoding::DecodeError;
3use crate::compression::CompressionAlgorithm;
4use crate::hashing::HashAlgorithm;
5use std::io::{Read, Write};
6
7const CHUNK_SIZE: usize = 4096; // 4KB chunks
8
9/// Streaming encoder for processing large amounts of data efficiently.
10///
11/// Processes data in chunks to avoid loading entire files into memory.
12/// Suitable for encoding large files or network streams.
13/// Supports optional compression and hashing during encoding.
14pub struct StreamingEncoder<'a, W: Write> {
15    dictionary: &'a Dictionary,
16    writer: W,
17    compress_algo: Option<CompressionAlgorithm>,
18    compress_level: u32,
19    hash_algo: Option<HashAlgorithm>,
20    xxhash_config: crate::hashing::XxHashConfig,
21}
22
23impl<'a, W: Write> StreamingEncoder<'a, W> {
24    /// Creates a new streaming encoder.
25    ///
26    /// # Arguments
27    ///
28    /// * `dictionary` - The dictionary to use for encoding
29    /// * `writer` - The destination for encoded output
30    pub fn new(dictionary: &'a Dictionary, writer: W) -> Self {
31        StreamingEncoder {
32            dictionary,
33            writer,
34            compress_algo: None,
35            compress_level: 6,
36            hash_algo: None,
37            xxhash_config: crate::hashing::XxHashConfig::default(),
38        }
39    }
40    
41    /// Sets compression algorithm and level.
42    pub fn with_compression(mut self, algo: CompressionAlgorithm, level: u32) -> Self {
43        self.compress_algo = Some(algo);
44        self.compress_level = level;
45        self
46    }
47    
48    /// Sets hash algorithm for computing hash during encoding.
49    pub fn with_hashing(mut self, algo: HashAlgorithm) -> Self {
50        self.hash_algo = Some(algo);
51        self
52    }
53
54    /// Sets xxHash configuration (seed and secret).
55    pub fn with_xxhash_config(mut self, config: crate::hashing::XxHashConfig) -> Self {
56        self.xxhash_config = config;
57        self
58    }
59    
60    /// Encodes data from a reader in chunks.
61    ///
62    /// Note: BaseConversion mode requires reading the entire input at once
63    /// due to the mathematical nature of the algorithm. For truly streaming
64    /// behavior, use Chunked or ByteRange modes.
65    /// 
66    /// Returns the computed hash if hash_algo was set, otherwise None.
67    pub fn encode<R: Read>(&mut self, reader: &mut R) -> std::io::Result<Option<Vec<u8>>> {
68        // If compression is enabled, we need to compress then encode
69        if let Some(algo) = self.compress_algo {
70            return self.encode_with_compression(reader, algo);
71        }
72        
73        // No compression - encode directly with optional hashing
74        let hash = match self.dictionary.mode() {
75            crate::core::config::EncodingMode::Chunked => {
76                self.encode_chunked(reader)?
77            }
78            crate::core::config::EncodingMode::ByteRange => {
79                self.encode_byte_range(reader)?
80            }
81            crate::core::config::EncodingMode::BaseConversion => {
82                // Mathematical mode requires entire input - read all and encode
83                let mut buffer = Vec::new();
84                reader.read_to_end(&mut buffer)?;
85                
86                let hash = self.hash_algo.map(|algo| crate::hashing::hash(&buffer, algo));
87                
88                let encoded = crate::encoders::encoding::encode(&buffer, self.dictionary);
89                self.writer.write_all(encoded.as_bytes())?;
90                hash
91            }
92        };
93        
94        Ok(hash)
95    }
96    
97    /// Encode with compression: compress stream then encode compressed data.
98    fn encode_with_compression<R: Read>(&mut self, reader: &mut R, algo: CompressionAlgorithm) -> std::io::Result<Option<Vec<u8>>> {
99        use std::io::Cursor;
100        
101        // Compress the input stream
102        let mut compressed_data = Vec::new();
103        let hash = self.compress_stream(reader, &mut compressed_data, algo)?;
104        
105        // Encode the compressed data
106        let mut cursor = Cursor::new(compressed_data);
107        match self.dictionary.mode() {
108            crate::core::config::EncodingMode::Chunked => {
109                self.encode_chunked_no_hash(&mut cursor)?;
110            }
111            crate::core::config::EncodingMode::ByteRange => {
112                self.encode_byte_range_no_hash(&mut cursor)?;
113            }
114            crate::core::config::EncodingMode::BaseConversion => {
115                let buffer = cursor.into_inner();
116                let encoded = crate::encoders::encoding::encode(&buffer, self.dictionary);
117                self.writer.write_all(encoded.as_bytes())?;
118            }
119        }
120        
121        Ok(hash)
122    }
123    
124    /// Compress a stream with optional hashing.
125    fn compress_stream<R: Read>(&mut self, reader: &mut R, output: &mut Vec<u8>, algo: CompressionAlgorithm) -> std::io::Result<Option<Vec<u8>>> {
126        use flate2::write::GzEncoder;
127        use xz2::write::XzEncoder;
128
129        let hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
130        
131        match algo {
132            CompressionAlgorithm::Gzip => {
133                let mut encoder = GzEncoder::new(output, flate2::Compression::new(self.compress_level));
134                let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
135                encoder.finish()?;
136                Ok(hash)
137            }
138            CompressionAlgorithm::Zstd => {
139                let mut encoder = zstd::stream::write::Encoder::new(output, self.compress_level as i32)
140                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
141                let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
142                encoder.finish()?;
143                Ok(hash)
144            }
145            CompressionAlgorithm::Brotli => {
146                let mut encoder = brotli::CompressorWriter::new(output, 4096, self.compress_level, 22);
147                let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
148                Ok(hash)
149            }
150            CompressionAlgorithm::Lzma => {
151                let mut encoder = XzEncoder::new(output, self.compress_level);
152                let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
153                encoder.finish()?;
154                Ok(hash)
155            }
156            CompressionAlgorithm::Lz4 | CompressionAlgorithm::Snappy => {
157                // LZ4 and Snappy don't have streaming encoders in their crates
158                // Read all, compress, write
159                let mut buffer = Vec::new();
160                reader.read_to_end(&mut buffer)?;
161                
162                let hash = self.hash_algo.map(|algo| crate::hashing::hash(&buffer, algo));
163                
164                let compressed = match algo {
165                    CompressionAlgorithm::Lz4 => {
166                        lz4::block::compress(&buffer, None, false)
167                            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
168                    }
169                    CompressionAlgorithm::Snappy => {
170                        let mut encoder = snap::raw::Encoder::new();
171                        encoder.compress_vec(&buffer)
172                            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
173                    }
174                    _ => unreachable!()
175                };
176                output.extend_from_slice(&compressed);
177                Ok(hash)
178            }
179        }
180    }
181    
182    fn copy_with_hash<R: Read>(reader: &mut R, writer: &mut impl Write, mut hasher: Option<HasherWriter>) -> std::io::Result<Option<Vec<u8>>> {
183        let mut buffer = vec![0u8; CHUNK_SIZE];
184        
185        loop {
186            let bytes_read = reader.read(&mut buffer)?;
187            if bytes_read == 0 {
188                break;
189            }
190            
191            let chunk = &buffer[..bytes_read];
192            if let Some(ref mut h) = hasher {
193                h.update(chunk);
194            }
195            writer.write_all(chunk)?;
196        }
197        
198        Ok(hasher.map(|h| h.finalize()))
199    }
200    
201    fn encode_chunked<R: Read>(&mut self, reader: &mut R) -> std::io::Result<Option<Vec<u8>>> {
202        let base = self.dictionary.base();
203        let bits_per_char = (base as f64).log2() as usize;
204        let bytes_per_group = bits_per_char;
205
206        // Adjust chunk size to align with encoding groups
207        let aligned_chunk_size = (CHUNK_SIZE / bytes_per_group) * bytes_per_group;
208        let mut buffer = vec![0u8; aligned_chunk_size];
209
210        let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
211        
212        loop {
213            let bytes_read = reader.read(&mut buffer)?;
214            if bytes_read == 0 {
215                break;
216            }
217            
218            let chunk = &buffer[..bytes_read];
219            if let Some(ref mut h) = hasher {
220                h.update(chunk);
221            }
222            
223            let encoded = crate::encoders::chunked::encode_chunked(chunk, self.dictionary);
224            self.writer.write_all(encoded.as_bytes())?;
225        }
226        
227        Ok(hasher.map(|h| h.finalize()))
228    }
229    
230    fn encode_chunked_no_hash<R: Read>(&mut self, reader: &mut R) -> std::io::Result<()> {
231        let base = self.dictionary.base();
232        let bits_per_char = (base as f64).log2() as usize;
233        let bytes_per_group = bits_per_char;
234        
235        let aligned_chunk_size = (CHUNK_SIZE / bytes_per_group) * bytes_per_group;
236        let mut buffer = vec![0u8; aligned_chunk_size];
237        
238        loop {
239            let bytes_read = reader.read(&mut buffer)?;
240            if bytes_read == 0 {
241                break;
242            }
243            
244            let encoded = crate::encoders::chunked::encode_chunked(&buffer[..bytes_read], self.dictionary);
245            self.writer.write_all(encoded.as_bytes())?;
246        }
247        
248        Ok(())
249    }
250    
251    fn encode_byte_range<R: Read>(&mut self, reader: &mut R) -> std::io::Result<Option<Vec<u8>>> {
252        let mut buffer = vec![0u8; CHUNK_SIZE];
253        let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
254        
255        loop {
256            let bytes_read = reader.read(&mut buffer)?;
257            if bytes_read == 0 {
258                break;
259            }
260            
261            let chunk = &buffer[..bytes_read];
262            if let Some(ref mut h) = hasher {
263                h.update(chunk);
264            }
265            
266            let encoded = crate::encoders::byte_range::encode_byte_range(chunk, self.dictionary);
267            self.writer.write_all(encoded.as_bytes())?;
268        }
269        
270        Ok(hasher.map(|h| h.finalize()))
271    }
272    
273    fn encode_byte_range_no_hash<R: Read>(&mut self, reader: &mut R) -> std::io::Result<()> {
274        let mut buffer = vec![0u8; CHUNK_SIZE];
275        
276        loop {
277            let bytes_read = reader.read(&mut buffer)?;
278            if bytes_read == 0 {
279                break;
280            }
281            
282            let encoded = crate::encoders::byte_range::encode_byte_range(&buffer[..bytes_read], self.dictionary);
283            self.writer.write_all(encoded.as_bytes())?;
284        }
285        
286        Ok(())
287    }
288}
289
290/// Streaming decoder for processing large amounts of encoded data efficiently.
291///
292/// Processes data in chunks to avoid loading entire files into memory.
293/// Suitable for decoding large files or network streams.
294/// Supports optional decompression and hashing during decoding.
295pub struct StreamingDecoder<'a, W: Write> {
296    dictionary: &'a Dictionary,
297    writer: W,
298    decompress_algo: Option<CompressionAlgorithm>,
299    hash_algo: Option<HashAlgorithm>,
300    xxhash_config: crate::hashing::XxHashConfig,
301}
302
303impl<'a, W: Write> StreamingDecoder<'a, W> {
304    /// Creates a new streaming decoder.
305    ///
306    /// # Arguments
307    ///
308    /// * `dictionary` - The dictionary used for encoding
309    /// * `writer` - The destination for decoded output
310    pub fn new(dictionary: &'a Dictionary, writer: W) -> Self {
311        StreamingDecoder {
312            dictionary,
313            writer,
314            decompress_algo: None,
315            hash_algo: None,
316            xxhash_config: crate::hashing::XxHashConfig::default(),
317        }
318    }
319    
320    /// Sets decompression algorithm.
321    pub fn with_decompression(mut self, algo: CompressionAlgorithm) -> Self {
322        self.decompress_algo = Some(algo);
323        self
324    }
325    
326    /// Sets hash algorithm for computing hash during decoding.
327    pub fn with_hashing(mut self, algo: HashAlgorithm) -> Self {
328        self.hash_algo = Some(algo);
329        self
330    }
331
332    /// Sets xxHash configuration (seed and secret).
333    pub fn with_xxhash_config(mut self, config: crate::hashing::XxHashConfig) -> Self {
334        self.xxhash_config = config;
335        self
336    }
337    
338    /// Decodes data from a reader in chunks.
339    ///
340    /// Note: BaseConversion mode requires reading the entire input at once
341    /// due to the mathematical nature of the algorithm. For truly streaming
342    /// behavior, use Chunked or ByteRange modes.
343    /// 
344    /// Returns the computed hash if hash_algo was set, otherwise None.
345    pub fn decode<R: Read>(&mut self, reader: &mut R) -> Result<Option<Vec<u8>>, DecodeError> {
346        // If decompression is enabled, decode then decompress
347        if let Some(algo) = self.decompress_algo {
348            return self.decode_with_decompression(reader, algo);
349        }
350        
351        // No decompression - decode directly with optional hashing
352        match self.dictionary.mode() {
353            crate::core::config::EncodingMode::Chunked => {
354                self.decode_chunked(reader)
355            }
356            crate::core::config::EncodingMode::ByteRange => {
357                self.decode_byte_range(reader)
358            }
359            crate::core::config::EncodingMode::BaseConversion => {
360                // Mathematical mode requires entire input
361                let mut buffer = String::new();
362                reader.read_to_string(&mut buffer)
363                    .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
364                let decoded = crate::encoders::encoding::decode(&buffer, self.dictionary)?;
365                
366                let hash = self.hash_algo.map(|algo| crate::hashing::hash(&decoded, algo));
367                
368                self.writer.write_all(&decoded)
369                    .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
370                Ok(hash)
371            }
372        }
373    }
374    
375    /// Decode with decompression: decode stream then decompress decoded data.
376    fn decode_with_decompression<R: Read>(&mut self, reader: &mut R, algo: CompressionAlgorithm) -> Result<Option<Vec<u8>>, DecodeError> {
377        use std::io::Cursor;
378        
379        // Decode the input stream to get compressed data
380        let mut compressed_data = Vec::new();
381        {
382            let mut temp_decoder = StreamingDecoder::new(self.dictionary, &mut compressed_data);
383            temp_decoder.decode(reader)?;
384        }
385        
386        // Decompress and write to output with optional hashing
387        let mut cursor = Cursor::new(compressed_data);
388        let hash = self.decompress_stream(&mut cursor, algo)
389            .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
390        
391        Ok(hash)
392    }
393    
394    /// Decompress a stream with optional hashing.
395    fn decompress_stream<R: Read>(&mut self, reader: &mut R, algo: CompressionAlgorithm) -> std::io::Result<Option<Vec<u8>>> {
396        use flate2::read::GzDecoder;
397        use xz2::read::XzDecoder;
398
399        let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
400        
401        match algo {
402            CompressionAlgorithm::Gzip => {
403                let mut decoder = GzDecoder::new(reader);
404                Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
405            }
406            CompressionAlgorithm::Zstd => {
407                let mut decoder = zstd::stream::read::Decoder::new(reader)
408                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
409                Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
410            }
411            CompressionAlgorithm::Brotli => {
412                let mut decoder = brotli::Decompressor::new(reader, 4096);
413                Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
414            }
415            CompressionAlgorithm::Lzma => {
416                let mut decoder = XzDecoder::new(reader);
417                Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
418            }
419            CompressionAlgorithm::Lz4 | CompressionAlgorithm::Snappy => {
420                // LZ4 and Snappy don't have streaming decoders
421                let mut compressed = Vec::new();
422                reader.read_to_end(&mut compressed)?;
423                
424                let decompressed = match algo {
425                    CompressionAlgorithm::Lz4 => {
426                        lz4::block::decompress(&compressed, Some(100 * 1024 * 1024))
427                            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
428                    }
429                    CompressionAlgorithm::Snappy => {
430                        let mut decoder = snap::raw::Decoder::new();
431                        decoder.decompress_vec(&compressed)
432                            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
433                    }
434                    _ => unreachable!()
435                };
436                
437                let hash = self.hash_algo.map(|algo| crate::hashing::hash(&decompressed, algo));
438                self.writer.write_all(&decompressed)?;
439                return Ok(hash);
440            }
441        }
442        
443        Ok(hasher.map(|h| h.finalize()))
444    }
445    
446    fn copy_with_hash_to_writer<R: Read>(reader: &mut R, writer: &mut W, hasher: &mut Option<HasherWriter>) -> std::io::Result<()> {
447        let mut buffer = vec![0u8; CHUNK_SIZE];
448        
449        loop {
450            let bytes_read = reader.read(&mut buffer)?;
451            if bytes_read == 0 {
452                break;
453            }
454            
455            let chunk = &buffer[..bytes_read];
456            if let Some(ref mut h) = hasher {
457                h.update(chunk);
458            }
459            writer.write_all(chunk)?;
460        }
461        
462        Ok(())
463    }
464    
465    fn decode_chunked<R: Read>(&mut self, reader: &mut R) -> Result<Option<Vec<u8>>, DecodeError> {
466        let base = self.dictionary.base();
467        let bits_per_char = (base as f64).log2() as usize;
468        let chars_per_group = 8 / bits_per_char;
469
470        // Read text in chunks
471        let mut text_buffer = String::new();
472        let mut char_buffer = vec![0u8; CHUNK_SIZE];
473        let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
474        
475        loop {
476            let bytes_read = reader.read(&mut char_buffer)
477                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
478            if bytes_read == 0 {
479                break;
480            }
481            
482            let chunk_str = std::str::from_utf8(&char_buffer[..bytes_read])
483                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
484            text_buffer.push_str(chunk_str);
485            
486            // Process complete character groups
487            let chars: Vec<char> = text_buffer.chars().collect();
488            let complete_groups = (chars.len() / chars_per_group) * chars_per_group;
489            
490            if complete_groups > 0 {
491                let to_decode: String = chars[..complete_groups].iter().collect();
492                let decoded = crate::encoders::chunked::decode_chunked(&to_decode, self.dictionary)?;
493                
494                if let Some(ref mut h) = hasher {
495                    h.update(&decoded);
496                }
497                
498                self.writer.write_all(&decoded)
499                    .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
500                
501                // Keep remaining chars for next iteration
502                text_buffer = chars[complete_groups..].iter().collect();
503            }
504        }
505        
506        // Process any remaining characters
507        if !text_buffer.is_empty() {
508            let decoded = crate::encoders::chunked::decode_chunked(&text_buffer, self.dictionary)?;
509            
510            if let Some(ref mut h) = hasher {
511                h.update(&decoded);
512            }
513            
514            self.writer.write_all(&decoded)
515                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
516        }
517        
518        Ok(hasher.map(|h| h.finalize()))
519    }
520    
521    fn decode_byte_range<R: Read>(&mut self, reader: &mut R) -> Result<Option<Vec<u8>>, DecodeError> {
522        let mut char_buffer = vec![0u8; CHUNK_SIZE];
523        let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo, &self.xxhash_config));
524        
525        loop {
526            let bytes_read = reader.read(&mut char_buffer)
527                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
528            if bytes_read == 0 {
529                break;
530            }
531            
532            let chunk_str = std::str::from_utf8(&char_buffer[..bytes_read])
533                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
534            
535            let decoded = crate::encoders::byte_range::decode_byte_range(chunk_str, self.dictionary)?;
536            
537            if let Some(ref mut h) = hasher {
538                h.update(&decoded);
539            }
540            
541            self.writer.write_all(&decoded)
542                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
543        }
544        
545        Ok(hasher.map(|h| h.finalize()))
546    }
547}
548
549// Helper for managing hash state during streaming
550enum HasherWriter {
551    Md5(md5::Md5),
552    Sha224(sha2::Sha224),
553    Sha256(sha2::Sha256),
554    Sha384(sha2::Sha384),
555    Sha512(sha2::Sha512),
556    Sha3_224(sha3::Sha3_224),
557    Sha3_256(sha3::Sha3_256),
558    Sha3_384(sha3::Sha3_384),
559    Sha3_512(sha3::Sha3_512),
560    Keccak224(sha3::Keccak224),
561    Keccak256(sha3::Keccak256),
562    Keccak384(sha3::Keccak384),
563    Keccak512(sha3::Keccak512),
564    Blake2b(blake2::Blake2b512),
565    Blake2s(blake2::Blake2s256),
566    Blake3(blake3::Hasher),
567    Crc16(Box<crc::Digest<'static, u16>>),
568    Crc32(Box<crc::Digest<'static, u32>>),
569    Crc32c(Box<crc::Digest<'static, u32>>),
570    Crc64(Box<crc::Digest<'static, u64>>),
571    XxHash32(twox_hash::XxHash32),
572    XxHash64(twox_hash::XxHash64),
573    XxHash3_64(twox_hash::xxhash3_64::Hasher),
574    XxHash3_128(twox_hash::xxhash3_128::Hasher),
575}
576
577impl HasherWriter {
578    fn update(&mut self, data: &[u8]) {
579        use sha2::Digest;
580        use std::hash::Hasher;
581        
582        match self {
583            HasherWriter::Md5(h) => { h.update(data); }
584            HasherWriter::Sha224(h) => { h.update(data); }
585            HasherWriter::Sha256(h) => { h.update(data); }
586            HasherWriter::Sha384(h) => { h.update(data); }
587            HasherWriter::Sha512(h) => { h.update(data); }
588            HasherWriter::Sha3_224(h) => { h.update(data); }
589            HasherWriter::Sha3_256(h) => { h.update(data); }
590            HasherWriter::Sha3_384(h) => { h.update(data); }
591            HasherWriter::Sha3_512(h) => { h.update(data); }
592            HasherWriter::Keccak224(h) => { h.update(data); }
593            HasherWriter::Keccak256(h) => { h.update(data); }
594            HasherWriter::Keccak384(h) => { h.update(data); }
595            HasherWriter::Keccak512(h) => { h.update(data); }
596            HasherWriter::Blake2b(h) => { h.update(data); }
597            HasherWriter::Blake2s(h) => { h.update(data); }
598            HasherWriter::Blake3(h) => { h.update(data); }
599            HasherWriter::Crc16(digest) => { digest.update(data); }
600            HasherWriter::Crc32(digest) => { digest.update(data); }
601            HasherWriter::Crc32c(digest) => { digest.update(data); }
602            HasherWriter::Crc64(digest) => { digest.update(data); }
603            HasherWriter::XxHash32(h) => { h.write(data); }
604            HasherWriter::XxHash64(h) => { h.write(data); }
605            HasherWriter::XxHash3_64(h) => { h.write(data); }
606            HasherWriter::XxHash3_128(h) => { h.write(data); }
607        }
608    }
609    
610    fn finalize(self) -> Vec<u8> {
611        use sha2::Digest;
612        use std::hash::Hasher;
613        
614        match self {
615            HasherWriter::Md5(h) => h.finalize().to_vec(),
616            HasherWriter::Sha224(h) => h.finalize().to_vec(),
617            HasherWriter::Sha256(h) => h.finalize().to_vec(),
618            HasherWriter::Sha384(h) => h.finalize().to_vec(),
619            HasherWriter::Sha512(h) => h.finalize().to_vec(),
620            HasherWriter::Sha3_224(h) => h.finalize().to_vec(),
621            HasherWriter::Sha3_256(h) => h.finalize().to_vec(),
622            HasherWriter::Sha3_384(h) => h.finalize().to_vec(),
623            HasherWriter::Sha3_512(h) => h.finalize().to_vec(),
624            HasherWriter::Keccak224(h) => h.finalize().to_vec(),
625            HasherWriter::Keccak256(h) => h.finalize().to_vec(),
626            HasherWriter::Keccak384(h) => h.finalize().to_vec(),
627            HasherWriter::Keccak512(h) => h.finalize().to_vec(),
628            HasherWriter::Blake2b(h) => h.finalize().to_vec(),
629            HasherWriter::Blake2s(h) => h.finalize().to_vec(),
630            HasherWriter::Blake3(h) => h.finalize().as_bytes().to_vec(),
631            HasherWriter::Crc16(digest) => digest.finalize().to_be_bytes().to_vec(),
632            HasherWriter::Crc32(digest) => digest.finalize().to_be_bytes().to_vec(),
633            HasherWriter::Crc32c(digest) => digest.finalize().to_be_bytes().to_vec(),
634            HasherWriter::Crc64(digest) => digest.finalize().to_be_bytes().to_vec(),
635            HasherWriter::XxHash32(h) => (h.finish() as u32).to_be_bytes().to_vec(),
636            HasherWriter::XxHash64(h) => h.finish().to_be_bytes().to_vec(),
637            HasherWriter::XxHash3_64(h) => h.finish().to_be_bytes().to_vec(),
638            HasherWriter::XxHash3_128(h) => {
639                let hash = h.finish_128();
640                let mut result = Vec::with_capacity(16);
641                result.extend_from_slice(&hash.to_be_bytes());
642                result
643            },
644        }
645    }
646}
647
648fn create_hasher_writer(algo: HashAlgorithm, config: &crate::hashing::XxHashConfig) -> HasherWriter {
649    use sha2::Digest;
650
651    match algo {
652        HashAlgorithm::Md5 => HasherWriter::Md5(md5::Md5::new()),
653        HashAlgorithm::Sha224 => HasherWriter::Sha224(sha2::Sha224::new()),
654        HashAlgorithm::Sha256 => HasherWriter::Sha256(sha2::Sha256::new()),
655        HashAlgorithm::Sha384 => HasherWriter::Sha384(sha2::Sha384::new()),
656        HashAlgorithm::Sha512 => HasherWriter::Sha512(sha2::Sha512::new()),
657        HashAlgorithm::Sha3_224 => HasherWriter::Sha3_224(sha3::Sha3_224::new()),
658        HashAlgorithm::Sha3_256 => HasherWriter::Sha3_256(sha3::Sha3_256::new()),
659        HashAlgorithm::Sha3_384 => HasherWriter::Sha3_384(sha3::Sha3_384::new()),
660        HashAlgorithm::Sha3_512 => HasherWriter::Sha3_512(sha3::Sha3_512::new()),
661        HashAlgorithm::Keccak224 => HasherWriter::Keccak224(sha3::Keccak224::new()),
662        HashAlgorithm::Keccak256 => HasherWriter::Keccak256(sha3::Keccak256::new()),
663        HashAlgorithm::Keccak384 => HasherWriter::Keccak384(sha3::Keccak384::new()),
664        HashAlgorithm::Keccak512 => HasherWriter::Keccak512(sha3::Keccak512::new()),
665        HashAlgorithm::Blake2b => HasherWriter::Blake2b(blake2::Blake2b512::new()),
666        HashAlgorithm::Blake2s => HasherWriter::Blake2s(blake2::Blake2s256::new()),
667        HashAlgorithm::Blake3 => HasherWriter::Blake3(blake3::Hasher::new()),
668        HashAlgorithm::Crc16 => {
669            static CRC: crc::Crc<u16> = crc::Crc::<u16>::new(&crc::CRC_16_IBM_SDLC);
670            HasherWriter::Crc16(Box::new(CRC.digest()))
671        }
672        HashAlgorithm::Crc32 => {
673            static CRC: crc::Crc<u32> = crc::Crc::<u32>::new(&crc::CRC_32_ISO_HDLC);
674            HasherWriter::Crc32(Box::new(CRC.digest()))
675        }
676        HashAlgorithm::Crc32c => {
677            static CRC: crc::Crc<u32> = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);
678            HasherWriter::Crc32c(Box::new(CRC.digest()))
679        }
680        HashAlgorithm::Crc64 => {
681            static CRC: crc::Crc<u64> = crc::Crc::<u64>::new(&crc::CRC_64_ECMA_182);
682            HasherWriter::Crc64(Box::new(CRC.digest()))
683        }
684        HashAlgorithm::XxHash32 => HasherWriter::XxHash32(twox_hash::XxHash32::with_seed(config.seed as u32)),
685        HashAlgorithm::XxHash64 => HasherWriter::XxHash64(twox_hash::XxHash64::with_seed(config.seed)),
686        HashAlgorithm::XxHash3_64 => {
687            if let Some(ref secret) = config.secret {
688                HasherWriter::XxHash3_64(
689                    twox_hash::xxhash3_64::Hasher::with_seed_and_secret(config.seed, secret.as_slice())
690                        .expect("XXH3 secret validation should have been done in XxHashConfig::with_secret")
691                )
692            } else {
693                HasherWriter::XxHash3_64(twox_hash::xxhash3_64::Hasher::with_seed(config.seed))
694            }
695        }
696        HashAlgorithm::XxHash3_128 => {
697            if let Some(ref secret) = config.secret {
698                HasherWriter::XxHash3_128(
699                    twox_hash::xxhash3_128::Hasher::with_seed_and_secret(config.seed, secret.as_slice())
700                        .expect("XXH3 secret validation should have been done in XxHashConfig::with_secret")
701                )
702            } else {
703                HasherWriter::XxHash3_128(twox_hash::xxhash3_128::Hasher::with_seed(config.seed))
704            }
705        }
706    }
707}
708
709#[cfg(test)]
710mod tests {
711    use super::*;
712    use crate::{DictionariesConfig, Dictionary};
713    use std::io::Cursor;
714    
715    fn get_dictionary(name: &str) -> Dictionary {
716        let config = DictionariesConfig::load_default().unwrap();
717        let alphabet_config = config.get_dictionary(name).unwrap();
718        
719        match alphabet_config.mode {
720            crate::core::config::EncodingMode::ByteRange => {
721                let start = alphabet_config.start_codepoint.unwrap();
722                Dictionary::new_with_mode_and_range(Vec::new(), alphabet_config.mode.clone(), None, Some(start)).unwrap()
723            }
724            _ => {
725                let chars: Vec<char> = alphabet_config.chars.chars().collect();
726                let padding = alphabet_config.padding.as_ref().and_then(|s| s.chars().next());
727                Dictionary::new_with_mode(chars, alphabet_config.mode.clone(), padding).unwrap()
728            }
729        }
730    }
731    
732    #[test]
733    fn test_streaming_encode_decode_base64() {
734        let dictionary = get_dictionary("base64");
735        let data = b"Hello, World! This is a streaming test with multiple chunks of data.";
736        
737        // Encode
738        let mut encoded_output = Vec::new();
739        {
740            let mut encoder = StreamingEncoder::new(&dictionary, &mut encoded_output);
741            let mut reader = Cursor::new(data);
742            encoder.encode(&mut reader).unwrap();
743        }
744        
745        // Decode
746        let mut decoded_output = Vec::new();
747        {
748            let mut decoder = StreamingDecoder::new(&dictionary, &mut decoded_output);
749            let mut reader = Cursor::new(&encoded_output);
750            decoder.decode(&mut reader).unwrap();
751        }
752        
753        assert_eq!(data, &decoded_output[..]);
754    }
755    
756    #[test]
757    fn test_streaming_encode_decode_base100() {
758        let dictionary = get_dictionary("base100");
759        let data = b"Test data for byte range streaming";
760        
761        // Encode
762        let mut encoded_output = Vec::new();
763        {
764            let mut encoder = StreamingEncoder::new(&dictionary, &mut encoded_output);
765            let mut reader = Cursor::new(data);
766            encoder.encode(&mut reader).unwrap();
767        }
768        
769        // Decode
770        let mut decoded_output = Vec::new();
771        {
772            let mut decoder = StreamingDecoder::new(&dictionary, &mut decoded_output);
773            let mut reader = Cursor::new(&encoded_output);
774            decoder.decode(&mut reader).unwrap();
775        }
776        
777        assert_eq!(data, &decoded_output[..]);
778    }
779    
780    #[test]
781    fn test_streaming_large_data() {
782        let dictionary = get_dictionary("base64");
783        // Create 100KB of data
784        let data: Vec<u8> = (0..100000).map(|i| (i % 256) as u8).collect();
785        
786        // Encode
787        let mut encoded_output = Vec::new();
788        {
789            let mut encoder = StreamingEncoder::new(&dictionary, &mut encoded_output);
790            let mut reader = Cursor::new(&data);
791            encoder.encode(&mut reader).unwrap();
792        }
793        
794        // Decode
795        let mut decoded_output = Vec::new();
796        {
797            let mut decoder = StreamingDecoder::new(&dictionary, &mut decoded_output);
798            let mut reader = Cursor::new(&encoded_output);
799            decoder.decode(&mut reader).unwrap();
800        }
801        
802        assert_eq!(data, decoded_output);
803    }
804}