base_d/encoders/
streaming.rs

1use crate::core::dictionary::Dictionary;
2use crate::encoders::encoding::DecodeError;
3use crate::compression::CompressionAlgorithm;
4use crate::hashing::HashAlgorithm;
5use std::io::{Read, Write};
6
7const CHUNK_SIZE: usize = 4096; // 4KB chunks
8
9/// Streaming encoder for processing large amounts of data efficiently.
10///
11/// Processes data in chunks to avoid loading entire files into memory.
12/// Suitable for encoding large files or network streams.
13/// Supports optional compression and hashing during encoding.
14pub struct StreamingEncoder<'a, W: Write> {
15    dictionary: &'a Dictionary,
16    writer: W,
17    compress_algo: Option<CompressionAlgorithm>,
18    compress_level: u32,
19    hash_algo: Option<HashAlgorithm>,
20}
21
22impl<'a, W: Write> StreamingEncoder<'a, W> {
23    /// Creates a new streaming encoder.
24    ///
25    /// # Arguments
26    ///
27    /// * `dictionary` - The dictionary to use for encoding
28    /// * `writer` - The destination for encoded output
29    pub fn new(dictionary: &'a Dictionary, writer: W) -> Self {
30        StreamingEncoder { 
31            dictionary, 
32            writer,
33            compress_algo: None,
34            compress_level: 6,
35            hash_algo: None,
36        }
37    }
38    
39    /// Sets compression algorithm and level.
40    pub fn with_compression(mut self, algo: CompressionAlgorithm, level: u32) -> Self {
41        self.compress_algo = Some(algo);
42        self.compress_level = level;
43        self
44    }
45    
46    /// Sets hash algorithm for computing hash during encoding.
47    pub fn with_hashing(mut self, algo: HashAlgorithm) -> Self {
48        self.hash_algo = Some(algo);
49        self
50    }
51    
52    /// Encodes data from a reader in chunks.
53    ///
54    /// Note: BaseConversion mode requires reading the entire input at once
55    /// due to the mathematical nature of the algorithm. For truly streaming
56    /// behavior, use Chunked or ByteRange modes.
57    /// 
58    /// Returns the computed hash if hash_algo was set, otherwise None.
59    pub fn encode<R: Read>(&mut self, reader: &mut R) -> std::io::Result<Option<Vec<u8>>> {
60        // If compression is enabled, we need to compress then encode
61        if let Some(algo) = self.compress_algo {
62            return self.encode_with_compression(reader, algo);
63        }
64        
65        // No compression - encode directly with optional hashing
66        let hash = match self.dictionary.mode() {
67            crate::core::config::EncodingMode::Chunked => {
68                self.encode_chunked(reader)?
69            }
70            crate::core::config::EncodingMode::ByteRange => {
71                self.encode_byte_range(reader)?
72            }
73            crate::core::config::EncodingMode::BaseConversion => {
74                // Mathematical mode requires entire input - read all and encode
75                let mut buffer = Vec::new();
76                reader.read_to_end(&mut buffer)?;
77                
78                let hash = self.hash_algo.map(|algo| crate::hashing::hash(&buffer, algo));
79                
80                let encoded = crate::encoders::encoding::encode(&buffer, self.dictionary);
81                self.writer.write_all(encoded.as_bytes())?;
82                hash
83            }
84        };
85        
86        Ok(hash)
87    }
88    
89    /// Encode with compression: compress stream then encode compressed data.
90    fn encode_with_compression<R: Read>(&mut self, reader: &mut R, algo: CompressionAlgorithm) -> std::io::Result<Option<Vec<u8>>> {
91        use std::io::Cursor;
92        
93        // Compress the input stream
94        let mut compressed_data = Vec::new();
95        let hash = self.compress_stream(reader, &mut compressed_data, algo)?;
96        
97        // Encode the compressed data
98        let mut cursor = Cursor::new(compressed_data);
99        match self.dictionary.mode() {
100            crate::core::config::EncodingMode::Chunked => {
101                self.encode_chunked_no_hash(&mut cursor)?;
102            }
103            crate::core::config::EncodingMode::ByteRange => {
104                self.encode_byte_range_no_hash(&mut cursor)?;
105            }
106            crate::core::config::EncodingMode::BaseConversion => {
107                let buffer = cursor.into_inner();
108                let encoded = crate::encoders::encoding::encode(&buffer, self.dictionary);
109                self.writer.write_all(encoded.as_bytes())?;
110            }
111        }
112        
113        Ok(hash)
114    }
115    
116    /// Compress a stream with optional hashing.
117    fn compress_stream<R: Read>(&mut self, reader: &mut R, output: &mut Vec<u8>, algo: CompressionAlgorithm) -> std::io::Result<Option<Vec<u8>>> {
118        use flate2::write::GzEncoder;
119        use xz2::write::XzEncoder;
120        
121        let hasher = self.hash_algo.map(|algo| create_hasher_writer(algo));
122        
123        match algo {
124            CompressionAlgorithm::Gzip => {
125                let mut encoder = GzEncoder::new(output, flate2::Compression::new(self.compress_level));
126                let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
127                encoder.finish()?;
128                Ok(hash)
129            }
130            CompressionAlgorithm::Zstd => {
131                let mut encoder = zstd::stream::write::Encoder::new(output, self.compress_level as i32)
132                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
133                let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
134                encoder.finish()?;
135                Ok(hash)
136            }
137            CompressionAlgorithm::Brotli => {
138                let mut encoder = brotli::CompressorWriter::new(output, 4096, self.compress_level, 22);
139                let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
140                Ok(hash)
141            }
142            CompressionAlgorithm::Lzma => {
143                let mut encoder = XzEncoder::new(output, self.compress_level);
144                let hash = Self::copy_with_hash(reader, &mut encoder, hasher)?;
145                encoder.finish()?;
146                Ok(hash)
147            }
148            CompressionAlgorithm::Lz4 | CompressionAlgorithm::Snappy => {
149                // LZ4 and Snappy don't have streaming encoders in their crates
150                // Read all, compress, write
151                let mut buffer = Vec::new();
152                reader.read_to_end(&mut buffer)?;
153                
154                let hash = self.hash_algo.map(|algo| crate::hashing::hash(&buffer, algo));
155                
156                let compressed = match algo {
157                    CompressionAlgorithm::Lz4 => {
158                        lz4::block::compress(&buffer, None, false)
159                            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
160                    }
161                    CompressionAlgorithm::Snappy => {
162                        let mut encoder = snap::raw::Encoder::new();
163                        encoder.compress_vec(&buffer)
164                            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
165                    }
166                    _ => unreachable!()
167                };
168                output.extend_from_slice(&compressed);
169                Ok(hash)
170            }
171        }
172    }
173    
174    fn copy_with_hash<R: Read>(reader: &mut R, writer: &mut impl Write, mut hasher: Option<HasherWriter>) -> std::io::Result<Option<Vec<u8>>> {
175        let mut buffer = vec![0u8; CHUNK_SIZE];
176        
177        loop {
178            let bytes_read = reader.read(&mut buffer)?;
179            if bytes_read == 0 {
180                break;
181            }
182            
183            let chunk = &buffer[..bytes_read];
184            if let Some(ref mut h) = hasher {
185                h.update(chunk);
186            }
187            writer.write_all(chunk)?;
188        }
189        
190        Ok(hasher.map(|h| h.finalize()))
191    }
192    
193    fn encode_chunked<R: Read>(&mut self, reader: &mut R) -> std::io::Result<Option<Vec<u8>>> {
194        let base = self.dictionary.base();
195        let bits_per_char = (base as f64).log2() as usize;
196        let bytes_per_group = bits_per_char;
197        
198        // Adjust chunk size to align with encoding groups
199        let aligned_chunk_size = (CHUNK_SIZE / bytes_per_group) * bytes_per_group;
200        let mut buffer = vec![0u8; aligned_chunk_size];
201        
202        let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo));
203        
204        loop {
205            let bytes_read = reader.read(&mut buffer)?;
206            if bytes_read == 0 {
207                break;
208            }
209            
210            let chunk = &buffer[..bytes_read];
211            if let Some(ref mut h) = hasher {
212                h.update(chunk);
213            }
214            
215            let encoded = crate::encoders::chunked::encode_chunked(chunk, self.dictionary);
216            self.writer.write_all(encoded.as_bytes())?;
217        }
218        
219        Ok(hasher.map(|h| h.finalize()))
220    }
221    
222    fn encode_chunked_no_hash<R: Read>(&mut self, reader: &mut R) -> std::io::Result<()> {
223        let base = self.dictionary.base();
224        let bits_per_char = (base as f64).log2() as usize;
225        let bytes_per_group = bits_per_char;
226        
227        let aligned_chunk_size = (CHUNK_SIZE / bytes_per_group) * bytes_per_group;
228        let mut buffer = vec![0u8; aligned_chunk_size];
229        
230        loop {
231            let bytes_read = reader.read(&mut buffer)?;
232            if bytes_read == 0 {
233                break;
234            }
235            
236            let encoded = crate::encoders::chunked::encode_chunked(&buffer[..bytes_read], self.dictionary);
237            self.writer.write_all(encoded.as_bytes())?;
238        }
239        
240        Ok(())
241    }
242    
243    fn encode_byte_range<R: Read>(&mut self, reader: &mut R) -> std::io::Result<Option<Vec<u8>>> {
244        let mut buffer = vec![0u8; CHUNK_SIZE];
245        let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo));
246        
247        loop {
248            let bytes_read = reader.read(&mut buffer)?;
249            if bytes_read == 0 {
250                break;
251            }
252            
253            let chunk = &buffer[..bytes_read];
254            if let Some(ref mut h) = hasher {
255                h.update(chunk);
256            }
257            
258            let encoded = crate::encoders::byte_range::encode_byte_range(chunk, self.dictionary);
259            self.writer.write_all(encoded.as_bytes())?;
260        }
261        
262        Ok(hasher.map(|h| h.finalize()))
263    }
264    
265    fn encode_byte_range_no_hash<R: Read>(&mut self, reader: &mut R) -> std::io::Result<()> {
266        let mut buffer = vec![0u8; CHUNK_SIZE];
267        
268        loop {
269            let bytes_read = reader.read(&mut buffer)?;
270            if bytes_read == 0 {
271                break;
272            }
273            
274            let encoded = crate::encoders::byte_range::encode_byte_range(&buffer[..bytes_read], self.dictionary);
275            self.writer.write_all(encoded.as_bytes())?;
276        }
277        
278        Ok(())
279    }
280}
281
282/// Streaming decoder for processing large amounts of encoded data efficiently.
283///
284/// Processes data in chunks to avoid loading entire files into memory.
285/// Suitable for decoding large files or network streams.
286/// Supports optional decompression and hashing during decoding.
287pub struct StreamingDecoder<'a, W: Write> {
288    dictionary: &'a Dictionary,
289    writer: W,
290    decompress_algo: Option<CompressionAlgorithm>,
291    hash_algo: Option<HashAlgorithm>,
292}
293
294impl<'a, W: Write> StreamingDecoder<'a, W> {
295    /// Creates a new streaming decoder.
296    ///
297    /// # Arguments
298    ///
299    /// * `dictionary` - The dictionary used for encoding
300    /// * `writer` - The destination for decoded output
301    pub fn new(dictionary: &'a Dictionary, writer: W) -> Self {
302        StreamingDecoder { 
303            dictionary, 
304            writer,
305            decompress_algo: None,
306            hash_algo: None,
307        }
308    }
309    
310    /// Sets decompression algorithm.
311    pub fn with_decompression(mut self, algo: CompressionAlgorithm) -> Self {
312        self.decompress_algo = Some(algo);
313        self
314    }
315    
316    /// Sets hash algorithm for computing hash during decoding.
317    pub fn with_hashing(mut self, algo: HashAlgorithm) -> Self {
318        self.hash_algo = Some(algo);
319        self
320    }
321    
322    /// Decodes data from a reader in chunks.
323    ///
324    /// Note: BaseConversion mode requires reading the entire input at once
325    /// due to the mathematical nature of the algorithm. For truly streaming
326    /// behavior, use Chunked or ByteRange modes.
327    /// 
328    /// Returns the computed hash if hash_algo was set, otherwise None.
329    pub fn decode<R: Read>(&mut self, reader: &mut R) -> Result<Option<Vec<u8>>, DecodeError> {
330        // If decompression is enabled, decode then decompress
331        if let Some(algo) = self.decompress_algo {
332            return self.decode_with_decompression(reader, algo);
333        }
334        
335        // No decompression - decode directly with optional hashing
336        match self.dictionary.mode() {
337            crate::core::config::EncodingMode::Chunked => {
338                self.decode_chunked(reader)
339            }
340            crate::core::config::EncodingMode::ByteRange => {
341                self.decode_byte_range(reader)
342            }
343            crate::core::config::EncodingMode::BaseConversion => {
344                // Mathematical mode requires entire input
345                let mut buffer = String::new();
346                reader.read_to_string(&mut buffer)
347                    .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
348                let decoded = crate::encoders::encoding::decode(&buffer, self.dictionary)?;
349                
350                let hash = self.hash_algo.map(|algo| crate::hashing::hash(&decoded, algo));
351                
352                self.writer.write_all(&decoded)
353                    .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
354                Ok(hash)
355            }
356        }
357    }
358    
359    /// Decode with decompression: decode stream then decompress decoded data.
360    fn decode_with_decompression<R: Read>(&mut self, reader: &mut R, algo: CompressionAlgorithm) -> Result<Option<Vec<u8>>, DecodeError> {
361        use std::io::Cursor;
362        
363        // Decode the input stream to get compressed data
364        let mut compressed_data = Vec::new();
365        {
366            let mut temp_decoder = StreamingDecoder::new(self.dictionary, &mut compressed_data);
367            temp_decoder.decode(reader)?;
368        }
369        
370        // Decompress and write to output with optional hashing
371        let mut cursor = Cursor::new(compressed_data);
372        let hash = self.decompress_stream(&mut cursor, algo)
373            .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
374        
375        Ok(hash)
376    }
377    
378    /// Decompress a stream with optional hashing.
379    fn decompress_stream<R: Read>(&mut self, reader: &mut R, algo: CompressionAlgorithm) -> std::io::Result<Option<Vec<u8>>> {
380        use flate2::read::GzDecoder;
381        use xz2::read::XzDecoder;
382        
383        let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo));
384        
385        match algo {
386            CompressionAlgorithm::Gzip => {
387                let mut decoder = GzDecoder::new(reader);
388                Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
389            }
390            CompressionAlgorithm::Zstd => {
391                let mut decoder = zstd::stream::read::Decoder::new(reader)
392                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
393                Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
394            }
395            CompressionAlgorithm::Brotli => {
396                let mut decoder = brotli::Decompressor::new(reader, 4096);
397                Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
398            }
399            CompressionAlgorithm::Lzma => {
400                let mut decoder = XzDecoder::new(reader);
401                Self::copy_with_hash_to_writer(&mut decoder, &mut self.writer, &mut hasher)?;
402            }
403            CompressionAlgorithm::Lz4 | CompressionAlgorithm::Snappy => {
404                // LZ4 and Snappy don't have streaming decoders
405                let mut compressed = Vec::new();
406                reader.read_to_end(&mut compressed)?;
407                
408                let decompressed = match algo {
409                    CompressionAlgorithm::Lz4 => {
410                        lz4::block::decompress(&compressed, Some(100 * 1024 * 1024))
411                            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
412                    }
413                    CompressionAlgorithm::Snappy => {
414                        let mut decoder = snap::raw::Decoder::new();
415                        decoder.decompress_vec(&compressed)
416                            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
417                    }
418                    _ => unreachable!()
419                };
420                
421                let hash = self.hash_algo.map(|algo| crate::hashing::hash(&decompressed, algo));
422                self.writer.write_all(&decompressed)?;
423                return Ok(hash);
424            }
425        }
426        
427        Ok(hasher.map(|h| h.finalize()))
428    }
429    
430    fn copy_with_hash_to_writer<R: Read>(reader: &mut R, writer: &mut W, hasher: &mut Option<HasherWriter>) -> std::io::Result<()> {
431        let mut buffer = vec![0u8; CHUNK_SIZE];
432        
433        loop {
434            let bytes_read = reader.read(&mut buffer)?;
435            if bytes_read == 0 {
436                break;
437            }
438            
439            let chunk = &buffer[..bytes_read];
440            if let Some(ref mut h) = hasher {
441                h.update(chunk);
442            }
443            writer.write_all(chunk)?;
444        }
445        
446        Ok(())
447    }
448    
449    fn decode_chunked<R: Read>(&mut self, reader: &mut R) -> Result<Option<Vec<u8>>, DecodeError> {
450        let base = self.dictionary.base();
451        let bits_per_char = (base as f64).log2() as usize;
452        let chars_per_group = 8 / bits_per_char;
453        
454        // Read text in chunks
455        let mut text_buffer = String::new();
456        let mut char_buffer = vec![0u8; CHUNK_SIZE];
457        let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo));
458        
459        loop {
460            let bytes_read = reader.read(&mut char_buffer)
461                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
462            if bytes_read == 0 {
463                break;
464            }
465            
466            let chunk_str = std::str::from_utf8(&char_buffer[..bytes_read])
467                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
468            text_buffer.push_str(chunk_str);
469            
470            // Process complete character groups
471            let chars: Vec<char> = text_buffer.chars().collect();
472            let complete_groups = (chars.len() / chars_per_group) * chars_per_group;
473            
474            if complete_groups > 0 {
475                let to_decode: String = chars[..complete_groups].iter().collect();
476                let decoded = crate::encoders::chunked::decode_chunked(&to_decode, self.dictionary)?;
477                
478                if let Some(ref mut h) = hasher {
479                    h.update(&decoded);
480                }
481                
482                self.writer.write_all(&decoded)
483                    .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
484                
485                // Keep remaining chars for next iteration
486                text_buffer = chars[complete_groups..].iter().collect();
487            }
488        }
489        
490        // Process any remaining characters
491        if !text_buffer.is_empty() {
492            let decoded = crate::encoders::chunked::decode_chunked(&text_buffer, self.dictionary)?;
493            
494            if let Some(ref mut h) = hasher {
495                h.update(&decoded);
496            }
497            
498            self.writer.write_all(&decoded)
499                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
500        }
501        
502        Ok(hasher.map(|h| h.finalize()))
503    }
504    
505    fn decode_byte_range<R: Read>(&mut self, reader: &mut R) -> Result<Option<Vec<u8>>, DecodeError> {
506        let mut char_buffer = vec![0u8; CHUNK_SIZE];
507        let mut hasher = self.hash_algo.map(|algo| create_hasher_writer(algo));
508        
509        loop {
510            let bytes_read = reader.read(&mut char_buffer)
511                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
512            if bytes_read == 0 {
513                break;
514            }
515            
516            let chunk_str = std::str::from_utf8(&char_buffer[..bytes_read])
517                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
518            
519            let decoded = crate::encoders::byte_range::decode_byte_range(chunk_str, self.dictionary)?;
520            
521            if let Some(ref mut h) = hasher {
522                h.update(&decoded);
523            }
524            
525            self.writer.write_all(&decoded)
526                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
527        }
528        
529        Ok(hasher.map(|h| h.finalize()))
530    }
531}
532
533// Helper for managing hash state during streaming
534enum HasherWriter {
535    Md5(md5::Md5),
536    Sha224(sha2::Sha224),
537    Sha256(sha2::Sha256),
538    Sha384(sha2::Sha384),
539    Sha512(sha2::Sha512),
540    Sha3_224(sha3::Sha3_224),
541    Sha3_256(sha3::Sha3_256),
542    Sha3_384(sha3::Sha3_384),
543    Sha3_512(sha3::Sha3_512),
544    Keccak224(sha3::Keccak224),
545    Keccak256(sha3::Keccak256),
546    Keccak384(sha3::Keccak384),
547    Keccak512(sha3::Keccak512),
548    Blake2b(blake2::Blake2b512),
549    Blake2s(blake2::Blake2s256),
550    Blake3(blake3::Hasher),
551    Crc16(Box<crc::Digest<'static, u16>>),
552    Crc32(Box<crc::Digest<'static, u32>>),
553    Crc32c(Box<crc::Digest<'static, u32>>),
554    Crc64(Box<crc::Digest<'static, u64>>),
555    XxHash32(twox_hash::XxHash32),
556    XxHash64(twox_hash::XxHash64),
557    XxHash3_64(twox_hash::xxhash3_64::Hasher),
558    XxHash3_128(twox_hash::xxhash3_128::Hasher),
559}
560
561impl HasherWriter {
562    fn update(&mut self, data: &[u8]) {
563        use sha2::Digest;
564        use std::hash::Hasher;
565        
566        match self {
567            HasherWriter::Md5(h) => { h.update(data); }
568            HasherWriter::Sha224(h) => { h.update(data); }
569            HasherWriter::Sha256(h) => { h.update(data); }
570            HasherWriter::Sha384(h) => { h.update(data); }
571            HasherWriter::Sha512(h) => { h.update(data); }
572            HasherWriter::Sha3_224(h) => { h.update(data); }
573            HasherWriter::Sha3_256(h) => { h.update(data); }
574            HasherWriter::Sha3_384(h) => { h.update(data); }
575            HasherWriter::Sha3_512(h) => { h.update(data); }
576            HasherWriter::Keccak224(h) => { h.update(data); }
577            HasherWriter::Keccak256(h) => { h.update(data); }
578            HasherWriter::Keccak384(h) => { h.update(data); }
579            HasherWriter::Keccak512(h) => { h.update(data); }
580            HasherWriter::Blake2b(h) => { h.update(data); }
581            HasherWriter::Blake2s(h) => { h.update(data); }
582            HasherWriter::Blake3(h) => { h.update(data); }
583            HasherWriter::Crc16(digest) => { digest.update(data); }
584            HasherWriter::Crc32(digest) => { digest.update(data); }
585            HasherWriter::Crc32c(digest) => { digest.update(data); }
586            HasherWriter::Crc64(digest) => { digest.update(data); }
587            HasherWriter::XxHash32(h) => { h.write(data); }
588            HasherWriter::XxHash64(h) => { h.write(data); }
589            HasherWriter::XxHash3_64(h) => { h.write(data); }
590            HasherWriter::XxHash3_128(h) => { h.write(data); }
591        }
592    }
593    
594    fn finalize(self) -> Vec<u8> {
595        use sha2::Digest;
596        use std::hash::Hasher;
597        
598        match self {
599            HasherWriter::Md5(h) => h.finalize().to_vec(),
600            HasherWriter::Sha224(h) => h.finalize().to_vec(),
601            HasherWriter::Sha256(h) => h.finalize().to_vec(),
602            HasherWriter::Sha384(h) => h.finalize().to_vec(),
603            HasherWriter::Sha512(h) => h.finalize().to_vec(),
604            HasherWriter::Sha3_224(h) => h.finalize().to_vec(),
605            HasherWriter::Sha3_256(h) => h.finalize().to_vec(),
606            HasherWriter::Sha3_384(h) => h.finalize().to_vec(),
607            HasherWriter::Sha3_512(h) => h.finalize().to_vec(),
608            HasherWriter::Keccak224(h) => h.finalize().to_vec(),
609            HasherWriter::Keccak256(h) => h.finalize().to_vec(),
610            HasherWriter::Keccak384(h) => h.finalize().to_vec(),
611            HasherWriter::Keccak512(h) => h.finalize().to_vec(),
612            HasherWriter::Blake2b(h) => h.finalize().to_vec(),
613            HasherWriter::Blake2s(h) => h.finalize().to_vec(),
614            HasherWriter::Blake3(h) => h.finalize().as_bytes().to_vec(),
615            HasherWriter::Crc16(digest) => digest.finalize().to_be_bytes().to_vec(),
616            HasherWriter::Crc32(digest) => digest.finalize().to_be_bytes().to_vec(),
617            HasherWriter::Crc32c(digest) => digest.finalize().to_be_bytes().to_vec(),
618            HasherWriter::Crc64(digest) => digest.finalize().to_be_bytes().to_vec(),
619            HasherWriter::XxHash32(h) => (h.finish() as u32).to_be_bytes().to_vec(),
620            HasherWriter::XxHash64(h) => h.finish().to_be_bytes().to_vec(),
621            HasherWriter::XxHash3_64(h) => h.finish().to_be_bytes().to_vec(),
622            HasherWriter::XxHash3_128(h) => {
623                let hash = h.finish_128();
624                let mut result = Vec::with_capacity(16);
625                result.extend_from_slice(&hash.to_be_bytes());
626                result
627            },
628        }
629    }
630}
631
632fn create_hasher_writer(algo: HashAlgorithm) -> HasherWriter {
633    use sha2::Digest;
634    
635    
636    match algo {
637        HashAlgorithm::Md5 => HasherWriter::Md5(md5::Md5::new()),
638        HashAlgorithm::Sha224 => HasherWriter::Sha224(sha2::Sha224::new()),
639        HashAlgorithm::Sha256 => HasherWriter::Sha256(sha2::Sha256::new()),
640        HashAlgorithm::Sha384 => HasherWriter::Sha384(sha2::Sha384::new()),
641        HashAlgorithm::Sha512 => HasherWriter::Sha512(sha2::Sha512::new()),
642        HashAlgorithm::Sha3_224 => HasherWriter::Sha3_224(sha3::Sha3_224::new()),
643        HashAlgorithm::Sha3_256 => HasherWriter::Sha3_256(sha3::Sha3_256::new()),
644        HashAlgorithm::Sha3_384 => HasherWriter::Sha3_384(sha3::Sha3_384::new()),
645        HashAlgorithm::Sha3_512 => HasherWriter::Sha3_512(sha3::Sha3_512::new()),
646        HashAlgorithm::Keccak224 => HasherWriter::Keccak224(sha3::Keccak224::new()),
647        HashAlgorithm::Keccak256 => HasherWriter::Keccak256(sha3::Keccak256::new()),
648        HashAlgorithm::Keccak384 => HasherWriter::Keccak384(sha3::Keccak384::new()),
649        HashAlgorithm::Keccak512 => HasherWriter::Keccak512(sha3::Keccak512::new()),
650        HashAlgorithm::Blake2b => HasherWriter::Blake2b(blake2::Blake2b512::new()),
651        HashAlgorithm::Blake2s => HasherWriter::Blake2s(blake2::Blake2s256::new()),
652        HashAlgorithm::Blake3 => HasherWriter::Blake3(blake3::Hasher::new()),
653        HashAlgorithm::Crc16 => {
654            static CRC: crc::Crc<u16> = crc::Crc::<u16>::new(&crc::CRC_16_IBM_SDLC);
655            HasherWriter::Crc16(Box::new(CRC.digest()))
656        }
657        HashAlgorithm::Crc32 => {
658            static CRC: crc::Crc<u32> = crc::Crc::<u32>::new(&crc::CRC_32_ISO_HDLC);
659            HasherWriter::Crc32(Box::new(CRC.digest()))
660        }
661        HashAlgorithm::Crc32c => {
662            static CRC: crc::Crc<u32> = crc::Crc::<u32>::new(&crc::CRC_32_ISCSI);
663            HasherWriter::Crc32c(Box::new(CRC.digest()))
664        }
665        HashAlgorithm::Crc64 => {
666            static CRC: crc::Crc<u64> = crc::Crc::<u64>::new(&crc::CRC_64_ECMA_182);
667            HasherWriter::Crc64(Box::new(CRC.digest()))
668        }
669        HashAlgorithm::XxHash32 => HasherWriter::XxHash32(twox_hash::XxHash32::with_seed(0)),
670        HashAlgorithm::XxHash64 => HasherWriter::XxHash64(twox_hash::XxHash64::with_seed(0)),
671        HashAlgorithm::XxHash3_64 => HasherWriter::XxHash3_64(twox_hash::xxhash3_64::Hasher::with_seed(0)),
672        HashAlgorithm::XxHash3_128 => HasherWriter::XxHash3_128(twox_hash::xxhash3_128::Hasher::with_seed(0)),
673    }
674}
675
676#[cfg(test)]
677mod tests {
678    use super::*;
679    use crate::{DictionariesConfig, Dictionary};
680    use std::io::Cursor;
681    
682    fn get_dictionary(name: &str) -> Dictionary {
683        let config = DictionariesConfig::load_default().unwrap();
684        let alphabet_config = config.get_dictionary(name).unwrap();
685        
686        match alphabet_config.mode {
687            crate::core::config::EncodingMode::ByteRange => {
688                let start = alphabet_config.start_codepoint.unwrap();
689                Dictionary::new_with_mode_and_range(Vec::new(), alphabet_config.mode.clone(), None, Some(start)).unwrap()
690            }
691            _ => {
692                let chars: Vec<char> = alphabet_config.chars.chars().collect();
693                let padding = alphabet_config.padding.as_ref().and_then(|s| s.chars().next());
694                Dictionary::new_with_mode(chars, alphabet_config.mode.clone(), padding).unwrap()
695            }
696        }
697    }
698    
699    #[test]
700    fn test_streaming_encode_decode_base64() {
701        let dictionary = get_dictionary("base64");
702        let data = b"Hello, World! This is a streaming test with multiple chunks of data.";
703        
704        // Encode
705        let mut encoded_output = Vec::new();
706        {
707            let mut encoder = StreamingEncoder::new(&dictionary, &mut encoded_output);
708            let mut reader = Cursor::new(data);
709            encoder.encode(&mut reader).unwrap();
710        }
711        
712        // Decode
713        let mut decoded_output = Vec::new();
714        {
715            let mut decoder = StreamingDecoder::new(&dictionary, &mut decoded_output);
716            let mut reader = Cursor::new(&encoded_output);
717            decoder.decode(&mut reader).unwrap();
718        }
719        
720        assert_eq!(data, &decoded_output[..]);
721    }
722    
723    #[test]
724    fn test_streaming_encode_decode_base100() {
725        let dictionary = get_dictionary("base100");
726        let data = b"Test data for byte range streaming";
727        
728        // Encode
729        let mut encoded_output = Vec::new();
730        {
731            let mut encoder = StreamingEncoder::new(&dictionary, &mut encoded_output);
732            let mut reader = Cursor::new(data);
733            encoder.encode(&mut reader).unwrap();
734        }
735        
736        // Decode
737        let mut decoded_output = Vec::new();
738        {
739            let mut decoder = StreamingDecoder::new(&dictionary, &mut decoded_output);
740            let mut reader = Cursor::new(&encoded_output);
741            decoder.decode(&mut reader).unwrap();
742        }
743        
744        assert_eq!(data, &decoded_output[..]);
745    }
746    
747    #[test]
748    fn test_streaming_large_data() {
749        let dictionary = get_dictionary("base64");
750        // Create 100KB of data
751        let data: Vec<u8> = (0..100000).map(|i| (i % 256) as u8).collect();
752        
753        // Encode
754        let mut encoded_output = Vec::new();
755        {
756            let mut encoder = StreamingEncoder::new(&dictionary, &mut encoded_output);
757            let mut reader = Cursor::new(&data);
758            encoder.encode(&mut reader).unwrap();
759        }
760        
761        // Decode
762        let mut decoded_output = Vec::new();
763        {
764            let mut decoder = StreamingDecoder::new(&dictionary, &mut decoded_output);
765            let mut reader = Cursor::new(&encoded_output);
766            decoder.decode(&mut reader).unwrap();
767        }
768        
769        assert_eq!(data, decoded_output);
770    }
771}