base_d/
streaming.rs

1use crate::alphabet::Alphabet;
2use crate::encoding::DecodeError;
3use std::io::{Read, Write};
4
5const CHUNK_SIZE: usize = 4096; // 4KB chunks
6
7/// Streaming encoder for processing large amounts of data efficiently.
8///
9/// Processes data in chunks to avoid loading entire files into memory.
10/// Suitable for encoding large files or network streams.
11pub struct StreamingEncoder<'a, W: Write> {
12    alphabet: &'a Alphabet,
13    writer: W,
14}
15
16impl<'a, W: Write> StreamingEncoder<'a, W> {
17    /// Creates a new streaming encoder.
18    ///
19    /// # Arguments
20    ///
21    /// * `alphabet` - The alphabet to use for encoding
22    /// * `writer` - The destination for encoded output
23    pub fn new(alphabet: &'a Alphabet, writer: W) -> Self {
24        StreamingEncoder { alphabet, writer }
25    }
26    
27    /// Encodes data from a reader in chunks.
28    ///
29    /// Note: BaseConversion mode requires reading the entire input at once
30    /// due to the mathematical nature of the algorithm. For truly streaming
31    /// behavior, use Chunked or ByteRange modes.
32    pub fn encode<R: Read>(&mut self, reader: &mut R) -> std::io::Result<()> {
33        match self.alphabet.mode() {
34            crate::config::EncodingMode::Chunked => {
35                self.encode_chunked(reader)
36            }
37            crate::config::EncodingMode::ByteRange => {
38                self.encode_byte_range(reader)
39            }
40            crate::config::EncodingMode::BaseConversion => {
41                // Mathematical mode requires entire input - read all and encode
42                let mut buffer = Vec::new();
43                reader.read_to_end(&mut buffer)?;
44                let encoded = crate::encoding::encode(&buffer, self.alphabet);
45                self.writer.write_all(encoded.as_bytes())?;
46                Ok(())
47            }
48        }
49    }
50    
51    fn encode_chunked<R: Read>(&mut self, reader: &mut R) -> std::io::Result<()> {
52        let base = self.alphabet.base();
53        let bits_per_char = (base as f64).log2() as usize;
54        let bytes_per_group = bits_per_char;
55        
56        // Adjust chunk size to align with encoding groups
57        let aligned_chunk_size = (CHUNK_SIZE / bytes_per_group) * bytes_per_group;
58        let mut buffer = vec![0u8; aligned_chunk_size];
59        
60        loop {
61            let bytes_read = reader.read(&mut buffer)?;
62            if bytes_read == 0 {
63                break;
64            }
65            
66            let encoded = crate::chunked::encode_chunked(&buffer[..bytes_read], self.alphabet);
67            self.writer.write_all(encoded.as_bytes())?;
68        }
69        
70        Ok(())
71    }
72    
73    fn encode_byte_range<R: Read>(&mut self, reader: &mut R) -> std::io::Result<()> {
74        let mut buffer = vec![0u8; CHUNK_SIZE];
75        
76        loop {
77            let bytes_read = reader.read(&mut buffer)?;
78            if bytes_read == 0 {
79                break;
80            }
81            
82            let encoded = crate::byte_range::encode_byte_range(&buffer[..bytes_read], self.alphabet);
83            self.writer.write_all(encoded.as_bytes())?;
84        }
85        
86        Ok(())
87    }
88}
89
90/// Streaming decoder for processing large amounts of encoded data efficiently.
91///
92/// Processes data in chunks to avoid loading entire files into memory.
93/// Suitable for decoding large files or network streams.
94pub struct StreamingDecoder<'a, W: Write> {
95    alphabet: &'a Alphabet,
96    writer: W,
97}
98
99impl<'a, W: Write> StreamingDecoder<'a, W> {
100    /// Creates a new streaming decoder.
101    ///
102    /// # Arguments
103    ///
104    /// * `alphabet` - The alphabet used for encoding
105    /// * `writer` - The destination for decoded output
106    pub fn new(alphabet: &'a Alphabet, writer: W) -> Self {
107        StreamingDecoder { alphabet, writer }
108    }
109    
110    /// Decodes data from a reader in chunks.
111    ///
112    /// Note: BaseConversion mode requires reading the entire input at once
113    /// due to the mathematical nature of the algorithm. For truly streaming
114    /// behavior, use Chunked or ByteRange modes.
115    pub fn decode<R: Read>(&mut self, reader: &mut R) -> Result<(), DecodeError> {
116        match self.alphabet.mode() {
117            crate::config::EncodingMode::Chunked => {
118                self.decode_chunked(reader)
119            }
120            crate::config::EncodingMode::ByteRange => {
121                self.decode_byte_range(reader)
122            }
123            crate::config::EncodingMode::BaseConversion => {
124                // Mathematical mode requires entire input
125                let mut buffer = String::new();
126                reader.read_to_string(&mut buffer)
127                    .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
128                let decoded = crate::encoding::decode(&buffer, self.alphabet)?;
129                self.writer.write_all(&decoded)
130                    .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
131                Ok(())
132            }
133        }
134    }
135    
136    fn decode_chunked<R: Read>(&mut self, reader: &mut R) -> Result<(), DecodeError> {
137        let base = self.alphabet.base();
138        let bits_per_char = (base as f64).log2() as usize;
139        let chars_per_group = 8 / bits_per_char;
140        
141        // Read text in chunks
142        let mut text_buffer = String::new();
143        let mut char_buffer = vec![0u8; CHUNK_SIZE];
144        
145        loop {
146            let bytes_read = reader.read(&mut char_buffer)
147                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
148            if bytes_read == 0 {
149                break;
150            }
151            
152            let chunk_str = std::str::from_utf8(&char_buffer[..bytes_read])
153                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
154            text_buffer.push_str(chunk_str);
155            
156            // Process complete character groups
157            let chars: Vec<char> = text_buffer.chars().collect();
158            let complete_groups = (chars.len() / chars_per_group) * chars_per_group;
159            
160            if complete_groups > 0 {
161                let to_decode: String = chars[..complete_groups].iter().collect();
162                let decoded = crate::chunked::decode_chunked(&to_decode, self.alphabet)?;
163                self.writer.write_all(&decoded)
164                    .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
165                
166                // Keep remaining chars for next iteration
167                text_buffer = chars[complete_groups..].iter().collect();
168            }
169        }
170        
171        // Process any remaining characters
172        if !text_buffer.is_empty() {
173            let decoded = crate::chunked::decode_chunked(&text_buffer, self.alphabet)?;
174            self.writer.write_all(&decoded)
175                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
176        }
177        
178        Ok(())
179    }
180    
181    fn decode_byte_range<R: Read>(&mut self, reader: &mut R) -> Result<(), DecodeError> {
182        let mut char_buffer = vec![0u8; CHUNK_SIZE];
183        
184        loop {
185            let bytes_read = reader.read(&mut char_buffer)
186                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
187            if bytes_read == 0 {
188                break;
189            }
190            
191            let chunk_str = std::str::from_utf8(&char_buffer[..bytes_read])
192                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
193            
194            let decoded = crate::byte_range::decode_byte_range(chunk_str, self.alphabet)?;
195            self.writer.write_all(&decoded)
196                .map_err(|_| DecodeError::InvalidCharacter('\0'))?;
197        }
198        
199        Ok(())
200    }
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206    use crate::{AlphabetsConfig, Alphabet};
207    use std::io::Cursor;
208    
209    fn get_alphabet(name: &str) -> Alphabet {
210        let config = AlphabetsConfig::load_default().unwrap();
211        let alphabet_config = config.get_alphabet(name).unwrap();
212        
213        match alphabet_config.mode {
214            crate::config::EncodingMode::ByteRange => {
215                let start = alphabet_config.start_codepoint.unwrap();
216                Alphabet::new_with_mode_and_range(Vec::new(), alphabet_config.mode.clone(), None, Some(start)).unwrap()
217            }
218            _ => {
219                let chars: Vec<char> = alphabet_config.chars.chars().collect();
220                let padding = alphabet_config.padding.as_ref().and_then(|s| s.chars().next());
221                Alphabet::new_with_mode(chars, alphabet_config.mode.clone(), padding).unwrap()
222            }
223        }
224    }
225    
226    #[test]
227    fn test_streaming_encode_decode_base64() {
228        let alphabet = get_alphabet("base64");
229        let data = b"Hello, World! This is a streaming test with multiple chunks of data.";
230        
231        // Encode
232        let mut encoded_output = Vec::new();
233        {
234            let mut encoder = StreamingEncoder::new(&alphabet, &mut encoded_output);
235            let mut reader = Cursor::new(data);
236            encoder.encode(&mut reader).unwrap();
237        }
238        
239        // Decode
240        let mut decoded_output = Vec::new();
241        {
242            let mut decoder = StreamingDecoder::new(&alphabet, &mut decoded_output);
243            let mut reader = Cursor::new(&encoded_output);
244            decoder.decode(&mut reader).unwrap();
245        }
246        
247        assert_eq!(data, &decoded_output[..]);
248    }
249    
250    #[test]
251    fn test_streaming_encode_decode_base100() {
252        let alphabet = get_alphabet("base100");
253        let data = b"Test data for byte range streaming";
254        
255        // Encode
256        let mut encoded_output = Vec::new();
257        {
258            let mut encoder = StreamingEncoder::new(&alphabet, &mut encoded_output);
259            let mut reader = Cursor::new(data);
260            encoder.encode(&mut reader).unwrap();
261        }
262        
263        // Decode
264        let mut decoded_output = Vec::new();
265        {
266            let mut decoder = StreamingDecoder::new(&alphabet, &mut decoded_output);
267            let mut reader = Cursor::new(&encoded_output);
268            decoder.decode(&mut reader).unwrap();
269        }
270        
271        assert_eq!(data, &decoded_output[..]);
272    }
273    
274    #[test]
275    fn test_streaming_large_data() {
276        let alphabet = get_alphabet("base64");
277        // Create 100KB of data
278        let data: Vec<u8> = (0..100000).map(|i| (i % 256) as u8).collect();
279        
280        // Encode
281        let mut encoded_output = Vec::new();
282        {
283            let mut encoder = StreamingEncoder::new(&alphabet, &mut encoded_output);
284            let mut reader = Cursor::new(&data);
285            encoder.encode(&mut reader).unwrap();
286        }
287        
288        // Decode
289        let mut decoded_output = Vec::new();
290        {
291            let mut decoder = StreamingDecoder::new(&alphabet, &mut decoded_output);
292            let mut reader = Cursor::new(&encoded_output);
293            decoder.decode(&mut reader).unwrap();
294        }
295        
296        assert_eq!(data, decoded_output);
297    }
298}