base64/write/encoder.rs
1use std::{cmp, fmt};
2use std::io::{ErrorKind, Result, Write};
3
4use ::{encode_config_slice, Config};
5use encode::encode_to_slice;
6
7pub(crate) const BUF_SIZE: usize = 1024;
8/// The most bytes whose encoding will fit in `BUF_SIZE`
9const MAX_INPUT_LEN: usize = BUF_SIZE / 4 * 3;
10// 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping)
11const MIN_ENCODE_CHUNK_SIZE: usize = 3;
12
13/// A `Write` implementation that base64 encodes data before delegating to the wrapped writer.
14///
15/// Because base64 has special handling for the end of the input data (padding, etc), there's a
16/// `finish()` method on this type that encodes any leftover input bytes and adds padding if
17/// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but
18/// any error that occurs when invoking the underlying writer will be suppressed. If you want to
19/// handle such errors, call `finish()` yourself.
20///
21/// # Examples
22///
23/// ```
24/// use std::io::Write;
25///
26/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
27/// let mut wrapped_writer = Vec::new();
28/// {
29/// let mut enc = base64::write::EncoderWriter::new(
30/// &mut wrapped_writer, base64::STANDARD);
31///
32/// // handle errors as you normally would
33/// enc.write_all(b"asdf").unwrap();
34/// // could leave this out to be called by Drop, if you don't care
35/// // about handling errors
36/// enc.finish().unwrap();
37///
38/// }
39///
40/// // base64 was written to the writer
41/// assert_eq!(b"YXNkZg==", &wrapped_writer[..]);
42///
43/// ```
44///
45/// # Panics
46///
47/// Calling `write()` after `finish()` is invalid and will panic.
48///
49/// # Errors
50///
51/// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be
52/// returned as per the contract of `Write`.
53///
54/// # Performance
55///
56/// It has some minor performance loss compared to encoding slices (a couple percent).
57/// It does not do any heap allocation.
58pub struct EncoderWriter<'a, W: 'a + Write> {
59 config: Config,
60 /// Where encoded data is written to
61 w: &'a mut W,
62 /// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
63 /// with the next `write()`, encode it, then proceed with the rest of the input normally.
64 extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
65 /// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`.
66 extra_input_occupied_len: usize,
67 /// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer
68 /// did not write last time.
69 output: [u8; BUF_SIZE],
70 /// How much of `output` is occupied with encoded data that couldn't be written last time
71 output_occupied_len: usize,
72 /// True iff padding / partial last chunk has been written.
73 finished: bool,
74 /// panic safety: don't write again in destructor if writer panicked while we were writing to it
75 panicked: bool,
76}
77
78impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
79 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
80 write!(
81 f,
82 "extra_input: {:?} extra_input_occupied_len:{:?} output[..5]: {:?} output_occupied_len: {:?}",
83 self.extra_input,
84 self.extra_input_occupied_len,
85 &self.output[0..5],
86 self.output_occupied_len
87 )
88 }
89}
90
91/// Rotate the items in the slice to the left by [n_first].
92fn rotate_left(slice: &mut [u8], n_first: usize) {
93 // Based on C++ implementation taken from
94 // https://en.cppreference.com/w/cpp/algorithm/rotate
95 //
96 // template<class ForwardIt>
97 // ForwardIt rotate(ForwardIt first, ForwardIt n_first, ForwardIt last)
98 // {
99 // if(first == n_first) return last;
100 // if(n_first == last) return first;
101 //
102 // ForwardIt read = n_first;
103 // ForwardIt write = first;
104 // ForwardIt next_read = first; // read position for when "read" hits "last"
105 //
106 // while(read != last) {
107 // if(write == next_read) next_read = read; // track where "first" went
108 // std::iter_swap(write++, read++);
109 // }
110 //
111 // // rotate the remaining sequence into place
112 // (rotate)(write, next_read, last);
113 // return write;
114 // }
115
116 let last = slice.len();
117 if n_first == 0 || n_first == last {
118 return;
119 }
120 let mut read = n_first;
121 let mut write = 0;
122 let mut next_read = 0;
123 while read != last {
124 if write == next_read {
125 next_read = read;
126 }
127 slice.swap(write, read);
128 write += 1;
129 read += 1;
130 }
131 rotate_left(&mut slice[write..], next_read - write);
132}
133
134impl<'a, W: Write> EncoderWriter<'a, W> {
135 /// Create a new encoder that will write to the provided delegate writer `w`.
136 pub fn new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W> {
137 EncoderWriter {
138 config,
139 w,
140 extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
141 extra_input_occupied_len: 0,
142 output: [0u8; BUF_SIZE],
143 output_occupied_len: 0,
144 finished: false,
145 panicked: false,
146 }
147 }
148
149 /// Encode all remaining buffered data and write it, including any trailing incomplete input
150 /// triples and associated padding.
151 ///
152 /// Once this succeeds, no further writes can be performed, as that would produce invalid
153 /// base64.
154 ///
155 /// This may write to the delegate writer multiple times if the delegate writer does not accept all input provided
156 /// to its `write` each invocation.
157 ///
158 /// # Errors
159 ///
160 /// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
161 pub fn finish(&mut self) -> Result<()> {
162 if self.finished {
163 return Ok(());
164 };
165
166 self.write_all_encoded_output()?;
167
168 if self.extra_input_occupied_len > 0 {
169 let encoded_len = encode_config_slice(
170 &self.extra_input[..self.extra_input_occupied_len],
171 self.config,
172 &mut self.output[..],
173 );
174
175 self.output_occupied_len = encoded_len;
176
177 self.write_all_encoded_output()?;
178
179 // write succeeded, do not write the encoding of extra again if finish() is retried
180 self.extra_input_occupied_len = 0;
181 }
182
183 self.finished = true;
184 Ok(())
185 }
186
187 /// Write as much of the encoded output to the delegate writer as it will accept, and store the
188 /// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`.
189 ///
190 /// # Errors
191 ///
192 /// Errors from the delegate writer are returned. In the case of an error,
193 /// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean
194 /// that no write took place.
195 fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
196 self.panicked = true;
197 let res = self.w.write(&self.output[..current_output_len]);
198 self.panicked = false;
199
200 res.map(|consumed| {
201 debug_assert!(consumed <= current_output_len);
202
203 if consumed < current_output_len {
204 self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap();
205 // If we're blocking on I/O, the minor inefficiency of copying bytes to the
206 // start of the buffer is the least of our concerns...
207 // Rotate moves more than we need to, but copy_within isn't stabilized yet.
208 rotate_left(&mut self.output[..], consumed);
209 } else {
210 self.output_occupied_len = 0;
211 }
212 })
213 }
214
215 /// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`.
216 ///
217 /// This is basically write_all for the remaining buffered data but without the undesirable
218 /// abort-on-`Ok(0)` behavior.
219 ///
220 /// # Errors
221 ///
222 /// Any error emitted by the delegate writer abort the write loop and is returned, unless it's
223 /// `Interrupted`, in which case the error is ignored and writes will continue.
224 fn write_all_encoded_output(&mut self) -> Result<()> {
225 while self.output_occupied_len > 0 {
226 let remaining_len = self.output_occupied_len;
227 match self.write_to_delegate(remaining_len) {
228 // try again on interrupts ala write_all
229 Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
230 // other errors return
231 Err(e) => return Err(e),
232 // success no-ops because remaining length is already updated
233 Ok(_) => {}
234 };
235 }
236
237 debug_assert_eq!(0, self.output_occupied_len);
238 Ok(())
239 }
240}
241
242impl<'a, W: Write> Write for EncoderWriter<'a, W> {
243 /// Encode input and then write to the delegate writer.
244 ///
245 /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
246 /// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which
247 /// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See
248 /// https://github.com/rust-lang/rust/issues/56889 for more on that.
249 ///
250 /// If the previous call to `write` provided more (encoded) data than the delegate writer could
251 /// accept in a single call to its `write`, the remaining data is buffered. As long as buffered
252 /// data is present, subsequent calls to `write` will try to write the remaining buffered data
253 /// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or
254 /// an error.
255 ///
256 /// # Errors
257 ///
258 /// Any errors emitted by the delegate writer are returned.
259 fn write(&mut self, input: &[u8]) -> Result<usize> {
260 if self.finished {
261 panic!("Cannot write more after calling finish()");
262 }
263
264 if input.is_empty() {
265 return Ok(0);
266 }
267
268 // The contract of `Write::write` places some constraints on this implementation:
269 // - a call to `write()` represents at most one call to a wrapped `Write`, so we can't
270 // iterate over the input and encode multiple chunks.
271 // - Errors mean that "no bytes were written to this writer", so we need to reset the
272 // internal state to what it was before the error occurred
273
274 // before reading any input, write any leftover encoded output from last time
275 if self.output_occupied_len > 0 {
276 let current_len = self.output_occupied_len;
277 return self
278 .write_to_delegate(current_len)
279 // did not read any input
280 .map(|_| 0);
281 }
282
283 debug_assert_eq!(0, self.output_occupied_len);
284
285 // how many bytes, if any, were read into `extra` to create a triple to encode
286 let mut extra_input_read_len = 0;
287 let mut input = input;
288
289 let orig_extra_len = self.extra_input_occupied_len;
290
291 let mut encoded_size = 0;
292 // always a multiple of MIN_ENCODE_CHUNK_SIZE
293 let mut max_input_len = MAX_INPUT_LEN;
294
295 // process leftover un-encoded input from last write
296 if self.extra_input_occupied_len > 0 {
297 debug_assert!(self.extra_input_occupied_len < 3);
298 if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE {
299 // Fill up `extra`, encode that into `output`, and consume as much of the rest of
300 // `input` as possible.
301 // We could write just the encoding of `extra` by itself but then we'd have to
302 // return after writing only 4 bytes, which is inefficient if the underlying writer
303 // would make a syscall.
304 extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len;
305 debug_assert!(extra_input_read_len > 0);
306 // overwrite only bytes that weren't already used. If we need to rollback extra_len
307 // (when the subsequent write errors), the old leading bytes will still be there.
308 self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE]
309 .copy_from_slice(&input[0..extra_input_read_len]);
310
311 let len = encode_to_slice(
312 &self.extra_input[0..MIN_ENCODE_CHUNK_SIZE],
313 &mut self.output[..],
314 self.config.char_set.encode_table(),
315 );
316 debug_assert_eq!(4, len);
317
318 input = &input[extra_input_read_len..];
319
320 // consider extra to be used up, since we encoded it
321 self.extra_input_occupied_len = 0;
322 // don't clobber where we just encoded to
323 encoded_size = 4;
324 // and don't read more than can be encoded
325 max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE;
326
327 // fall through to normal encoding
328 } else {
329 // `extra` and `input` are non empty, but `|extra| + |input| < 3`, so there must be
330 // 1 byte in each.
331 debug_assert_eq!(1, input.len());
332 debug_assert_eq!(1, self.extra_input_occupied_len);
333
334 self.extra_input[self.extra_input_occupied_len] = input[0];
335 self.extra_input_occupied_len += 1;
336 return Ok(1);
337 };
338 } else if input.len() < MIN_ENCODE_CHUNK_SIZE {
339 // `extra` is empty, and `input` fits inside it
340 self.extra_input[0..input.len()].copy_from_slice(input);
341 self.extra_input_occupied_len = input.len();
342 return Ok(input.len());
343 };
344
345 // either 0 or 1 complete chunks encoded from extra
346 debug_assert!(encoded_size == 0 || encoded_size == 4);
347 debug_assert!(
348 // didn't encode extra input
349 MAX_INPUT_LEN == max_input_len
350 // encoded one triple
351 || MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE
352 );
353
354 // encode complete triples only
355 let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE);
356 let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len);
357 debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE);
358 debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE);
359
360 encoded_size += encode_to_slice(
361 &input[..(input_chunks_to_encode_len)],
362 &mut self.output[encoded_size..],
363 self.config.char_set.encode_table(),
364 );
365
366 // not updating `self.output_occupied_len` here because if the below write fails, it should
367 // "never take place" -- the buffer contents we encoded are ignored and perhaps retried
368 // later, if the consumer chooses.
369
370 self.write_to_delegate(encoded_size)
371 // no matter whether we wrote the full encoded buffer or not, we consumed the same
372 // input
373 .map(|_| extra_input_read_len + input_chunks_to_encode_len)
374 .map_err(|e| {
375 // in case we filled and encoded `extra`, reset extra_len
376 self.extra_input_occupied_len = orig_extra_len;
377
378 e
379 })
380 }
381
382 /// Because this is usually treated as OK to call multiple times, it will *not* flush any
383 /// incomplete chunks of input or write padding.
384 fn flush(&mut self) -> Result<()> {
385 self.write_all_encoded_output()?;
386 self.w.flush()
387 }
388}
389
390impl<'a, W: Write> Drop for EncoderWriter<'a, W> {
391 fn drop(&mut self) {
392 if !self.panicked {
393 // like `BufWriter`, ignore errors during drop
394 let _ = self.finish();
395 }
396 }
397}