Skip to main content

ai_gif/reader/
mod.rs

1use alloc::borrow::Cow;
2use alloc::vec::Vec;
3use core::convert::{TryFrom, TryInto};
4use core::iter::FusedIterator;
5use core::mem;
6use core::num::NonZeroU64;
7use no_std_io::io::Read;
8
9use crate::common::{Block, Frame};
10use crate::{AnyExtension, Extension, Repeat};
11
12mod buf_reader;
13mod converter;
14mod decoder;
15
16use buf_reader::BufReader;
17
18pub use self::decoder::{
19    Decoded, DecodingError, DecodingFormatError, FrameDataType, FrameDecoder, OutputBuffer,
20    StreamingDecoder, Version, PLTE_CHANNELS,
21};
22
23pub use self::converter::ColorOutput;
24use self::converter::PixelConverter;
25
26#[derive(Clone, Debug)]
27/// The maximum amount of memory the decoder is allowed to use for each frame
28pub enum MemoryLimit {
29    /// Enforce no memory limit.
30    ///
31    /// If you intend to process images from unknown origins this is a potentially dangerous
32    /// constant to use, as your program could be vulnerable to decompression bombs. That is,
33    /// malicious images crafted specifically to require an enormous amount of memory to process
34    /// while having a disproportionately small file size.
35    ///
36    /// The risks for modern machines are a bit smaller as the size of each frame cannot
37    /// exceed 16GiB, but this is still a significant amount of memory.
38    Unlimited,
39    /// Limit the amount of memory that can be used for a single frame to this many bytes.
40    ///
41    /// It may not be enforced precisely due to allocator overhead
42    /// and the decoder potentially allocating small auxiliary buffers,
43    /// but it will precisely limit the size of the output buffer for each frame.
44    //
45    // The `NonZero` type is used to make FFI simpler.
46    // Due to the guaranteed niche optimization, `Unlimited` will be represented as `0`,
47    // and the whole enum as a simple `u64`.
48    Bytes(NonZeroU64),
49}
50
51impl MemoryLimit {
52    fn check_size(&self, size: usize) -> Result<(), DecodingError> {
53        match self {
54            Self::Unlimited => Ok(()),
55            Self::Bytes(limit) => {
56                if size as u64 <= limit.get() {
57                    Ok(())
58                } else {
59                    Err(DecodingError::MemoryLimit)
60                }
61            }
62        }
63    }
64
65    fn buffer_size(&self, color: ColorOutput, width: u16, height: u16) -> Option<usize> {
66        let pixels = u64::from(width) * u64::from(height);
67
68        let bytes_per_pixel = match color {
69            ColorOutput::Indexed => 1,
70            ColorOutput::RGBA => 4,
71        };
72
73        // This cannot overflow because the maximum possible value is 16GiB, well within u64 range
74        let total_bytes = pixels * bytes_per_pixel;
75
76        // On 32-bit platforms the size of the output buffer may not be representable
77        let usize_bytes = usize::try_from(total_bytes).ok()?;
78
79        match self {
80            Self::Unlimited => Some(usize_bytes),
81            Self::Bytes(limit) => {
82                if total_bytes > limit.get() {
83                    None
84                } else {
85                    Some(usize_bytes)
86                }
87            }
88        }
89    }
90
91    #[inline]
92    fn try_reserve(&self, vec: &mut Vec<u8>, additional: usize) -> Result<(), DecodingError> {
93        let len = vec
94            .len()
95            .checked_add(additional)
96            .ok_or(DecodingError::MemoryLimit)?;
97        self.check_size(len)?;
98        vec.try_reserve(additional)
99            .map_err(|_| DecodingError::OutOfMemory)?;
100        Ok(())
101    }
102}
103
104/// Options for opening a GIF decoder. [`DecodeOptions::read_info`] will start the decoder.
105#[derive(Clone, Debug)]
106pub struct DecodeOptions {
107    memory_limit: MemoryLimit,
108    color_output: ColorOutput,
109    check_frame_consistency: bool,
110    skip_frame_decoding: bool,
111    check_for_end_code: bool,
112    allow_unknown_blocks: bool,
113}
114
115impl Default for DecodeOptions {
116    fn default() -> Self {
117        Self::new()
118    }
119}
120
121impl DecodeOptions {
122    /// Creates a new decoder builder
123    #[must_use]
124    #[inline]
125    pub fn new() -> Self {
126        Self {
127            memory_limit: MemoryLimit::Bytes(50_000_000.try_into().unwrap()), // 50 MB
128            color_output: ColorOutput::Indexed,
129            check_frame_consistency: false,
130            skip_frame_decoding: false,
131            check_for_end_code: false,
132            allow_unknown_blocks: false,
133        }
134    }
135
136    /// Configure how color data is decoded.
137    #[inline]
138    pub fn set_color_output(&mut self, color: ColorOutput) {
139        self.color_output = color;
140    }
141
142    /// Configure a memory limit for decoding.
143    pub fn set_memory_limit(&mut self, limit: MemoryLimit) {
144        self.memory_limit = limit;
145    }
146
147    /// Configure if frames must be within the screen descriptor.
148    ///
149    /// The default is `false`.
150    ///
151    /// When turned on, all frame descriptors being read must fit within the screen descriptor or
152    /// otherwise an error is returned and the stream left in an unspecified state.
153    ///
154    /// When turned off, frames may be arbitrarily larger or offset in relation to the screen. Many
155    /// other decoder libraries handle this in highly divergent ways. This moves all checks to the
156    /// caller, for example to emulate a specific style.
157    pub fn check_frame_consistency(&mut self, check: bool) {
158        self.check_frame_consistency = check;
159    }
160
161    /// Configure whether to skip decoding frames.
162    ///
163    /// The default is false.
164    ///
165    /// When turned on, LZW decoding is skipped. [`Decoder::read_next_frame`] will return
166    /// compressed LZW bytes in frame's data.
167    /// [`Decoder::next_frame_info`] will return the metadata of the next frame as usual.
168    /// This is useful to count frames without incurring the overhead of decoding.
169    pub fn skip_frame_decoding(&mut self, skip: bool) {
170        self.skip_frame_decoding = skip;
171    }
172
173    /// Configure if LZW encoded blocks must end with a marker end code.
174    ///
175    /// The default is `false`.
176    ///
177    /// When turned on, all image data blocks—which are LZW encoded—must contain a special bit
178    /// sequence signalling the end of the data. LZW processing terminates when this code is
179    /// encountered. The specification states that it must be the last code output by the encoder
180    /// for an image.
181    ///
182    /// When turned off then image data blocks can simply end. Note that this might silently ignore
183    /// some bits of the last or second to last byte.
184    pub fn check_lzw_end_code(&mut self, check: bool) {
185        self.check_for_end_code = check;
186    }
187
188    /// Configure if unknown blocks are allowed to be decoded.
189    ///
190    /// The default is `false`.
191    ///
192    /// When turned on, the decoder will allow unknown blocks to be in the
193    /// `BlockStart` position.
194    ///
195    /// When turned off, decoded block starts must mark an `Image`, `Extension`,
196    /// or `Trailer` block. Otherwise, the decoded image will return an error.
197    /// If an unknown block error is returned from decoding, enabling this
198    /// setting may allow for a further state of decoding on the next attempt.
199    ///
200    /// This option also allows unknown extension blocks. The decoder assumes the follow the same
201    /// block layout, i.e. a sequence of zero-length terminated sub-blocks immediately follow the
202    /// extension introducer.
203    pub fn allow_unknown_blocks(&mut self, check: bool) {
204        self.allow_unknown_blocks = check;
205    }
206
207    /// Reads the logical screen descriptor including the global color palette
208    ///
209    /// Returns a [`Decoder`]. All decoder configuration has to be done beforehand.
210    pub fn read_info<R: Read>(self, r: R) -> Result<Decoder<R>, DecodingError> {
211        Decoder::with_no_init(r, StreamingDecoder::with_options(&self), self).init()
212    }
213}
214
215struct ReadDecoder<R: Read> {
216    reader: BufReader<R>,
217    decoder: StreamingDecoder,
218    at_eof: bool,
219}
220
221impl<R: Read> ReadDecoder<R> {
222    #[inline(never)]
223    fn decode_next(
224        &mut self,
225        write_into: &mut OutputBuffer<'_>,
226    ) -> Result<Option<Decoded>, DecodingError> {
227        while !self.at_eof {
228            let (consumed, result) = {
229                let buf = self.reader.fill_buf()?;
230                if buf.is_empty() {
231                    return Err(DecodingError::UnexpectedEof);
232                }
233
234                self.decoder.update(buf, write_into)?
235            };
236            self.reader.consume(consumed);
237            match result {
238                Decoded::Nothing => (),
239                Decoded::BlockStart(Block::Trailer) => {
240                    self.at_eof = true;
241                }
242                result => return Ok(Some(result)),
243            }
244        }
245        Ok(None)
246    }
247
248    fn into_inner(self) -> R {
249        self.reader.into_inner()
250    }
251
252    fn decode_next_bytes(&mut self, out: &mut OutputBuffer<'_>) -> Result<usize, DecodingError> {
253        match self.decode_next(out)? {
254            Some(Decoded::BytesDecoded(len)) => Ok(len.get()),
255            Some(Decoded::DataEnd) => Ok(0),
256            _ => Err(DecodingError::format("unexpected data")),
257        }
258    }
259}
260/// Headers for supported extensions.
261const EXT_NAME_NETSCAPE: &[u8] = b"NETSCAPE2.0";
262const EXT_NAME_XMP: &[u8] = b"XMP DataXMP";
263const EXT_NAME_ICC: &[u8] = b"ICCRGBG1012";
264
265/// State when parsing application extension
266enum AppExtensionState {
267    /// Waiting for app name
268    None,
269    Netscape,
270    Xmp,
271    Icc,
272    Skip,
273}
274
275#[allow(dead_code)]
276/// GIF decoder. Create [`DecodeOptions`] to get started, and call [`DecodeOptions::read_info`].
277pub struct Decoder<R: Read> {
278    decoder: ReadDecoder<R>,
279    pixel_converter: PixelConverter,
280    memory_limit: MemoryLimit,
281    bg_color: Option<u8>,
282    repeat: Repeat,
283    current_frame: Frame<'static>,
284    current_frame_data_type: FrameDataType,
285    app_extension_state: AppExtensionState,
286    /// XMP metadata bytes.
287    xmp_metadata: Option<Vec<u8>>,
288    /// ICC profile bytes.
289    icc_profile: Option<Vec<u8>>,
290}
291
292impl<R> Decoder<R>
293where
294    R: Read,
295{
296    /// Create a new decoder with default options.
297    #[inline]
298    pub fn new(reader: R) -> Result<Self, DecodingError> {
299        DecodeOptions::new().read_info(reader)
300    }
301
302    /// Return a builder that allows configuring limits etc.
303    #[must_use]
304    #[inline]
305    pub fn build() -> DecodeOptions {
306        DecodeOptions::new()
307    }
308
309    fn with_no_init(reader: R, decoder: StreamingDecoder, options: DecodeOptions) -> Self {
310        Self {
311            decoder: ReadDecoder {
312                reader: BufReader::new(reader),
313                decoder,
314                at_eof: false,
315            },
316            bg_color: None,
317            pixel_converter: PixelConverter::new(options.color_output),
318            memory_limit: options.memory_limit.clone(),
319            repeat: Repeat::default(),
320            current_frame: Frame::default(),
321            current_frame_data_type: FrameDataType::Pixels,
322            app_extension_state: AppExtensionState::None,
323            xmp_metadata: None,
324            icc_profile: None,
325        }
326    }
327
328    fn init(mut self) -> Result<Self, DecodingError> {
329        const APP_EXTENSION: AnyExtension = AnyExtension(Extension::Application as u8);
330        loop {
331            match self.decoder.decode_next(&mut OutputBuffer::None)? {
332                Some(Decoded::BackgroundColor(bg_color)) => {
333                    self.bg_color = Some(bg_color);
334                }
335                Some(Decoded::GlobalPalette(palette)) => {
336                    self.pixel_converter.set_global_palette(palette.into());
337                }
338                Some(Decoded::SubBlock {
339                    ext: APP_EXTENSION,
340                    is_last,
341                }) => {
342                    self.read_application_extension(is_last)?;
343                }
344                Some(Decoded::HeaderEnd) => break,
345                Some(_) => {
346                    // There will be extra events when parsing application extension
347                    continue;
348                }
349                None => {
350                    return Err(DecodingError::format(
351                        "file does not contain any image data",
352                    ))
353                }
354            }
355        }
356        // If the background color is invalid, ignore it
357        if let Some(palette) = self.pixel_converter.global_palette() {
358            if self.bg_color.unwrap_or(0) as usize >= (palette.len() / PLTE_CHANNELS) {
359                self.bg_color = None;
360            }
361        }
362        Ok(self)
363    }
364
365    fn read_application_extension(&mut self, is_last: bool) -> Result<(), DecodingError> {
366        let data = self.decoder.decoder.last_ext_sub_block();
367        match self.app_extension_state {
368            AppExtensionState::None => {
369                // GIF spec requires len == 11
370                self.app_extension_state = match data {
371                    EXT_NAME_NETSCAPE => AppExtensionState::Netscape,
372                    EXT_NAME_XMP => {
373                        self.xmp_metadata = Some(Vec::new());
374                        AppExtensionState::Xmp
375                    }
376                    EXT_NAME_ICC => {
377                        self.icc_profile = Some(Vec::new());
378                        AppExtensionState::Icc
379                    }
380                    _ => AppExtensionState::Skip,
381                }
382            }
383            AppExtensionState::Netscape => {
384                if let [1, rest @ ..] = data {
385                    if let Ok(repeat) = rest.try_into().map(u16::from_le_bytes) {
386                        self.repeat = if repeat == 0 {
387                            Repeat::Infinite
388                        } else {
389                            Repeat::Finite(repeat)
390                        };
391                    }
392                }
393                self.app_extension_state = AppExtensionState::Skip;
394            }
395            AppExtensionState::Xmp => {
396                if let Some(xmp_metadata) = &mut self.xmp_metadata {
397                    // XMP is not written as a valid "pascal-string", so we need to stitch together
398                    // the text from our collected sublock-lengths.
399                    self.memory_limit
400                        .try_reserve(xmp_metadata, 1 + data.len())?;
401                    xmp_metadata.push(data.len() as u8);
402                    xmp_metadata.extend_from_slice(data);
403                    if is_last {
404                        // XMP adds a "ramp" of 257 bytes to the end of the metadata to let the "pascal-strings"
405                        // parser converge to the null byte. The ramp looks like "0x01, 0xff, .., 0x01, 0x00".
406                        // For convenience and to allow consumers to not be bothered with this implementation detail,
407                        // we cut the ramp.
408                        const RAMP_SIZE: usize = 257;
409                        if xmp_metadata.len() >= RAMP_SIZE
410                            && xmp_metadata.ends_with(&[0x03, 0x02, 0x01, 0x00])
411                            && xmp_metadata[xmp_metadata.len() - RAMP_SIZE..]
412                                .starts_with(&[0x01, 0x0ff])
413                        {
414                            xmp_metadata.truncate(xmp_metadata.len() - RAMP_SIZE);
415                        }
416                    }
417                }
418            }
419            AppExtensionState::Icc => {
420                if let Some(icc) = &mut self.icc_profile {
421                    self.memory_limit.try_reserve(icc, data.len())?;
422                    icc.extend_from_slice(data);
423                }
424            }
425            AppExtensionState::Skip => {}
426        };
427        if is_last {
428            self.app_extension_state = AppExtensionState::None;
429        }
430        Ok(())
431    }
432
433    /// Returns the next frame info
434    pub fn next_frame_info(&mut self) -> Result<Option<&Frame<'static>>, DecodingError> {
435        loop {
436            match self.decoder.decode_next(&mut OutputBuffer::None)? {
437                Some(Decoded::FrameMetadata(frame_data_type)) => {
438                    self.current_frame = self.decoder.decoder.current_frame_mut().take();
439                    self.current_frame_data_type = frame_data_type;
440                    if self.current_frame.palette.is_none() && self.global_palette().is_none() {
441                        return Err(DecodingError::format(
442                            "no color table available for current frame",
443                        ));
444                    }
445                    break;
446                }
447                Some(_) => (),
448                None => return Ok(None),
449            }
450        }
451        Ok(Some(&self.current_frame))
452    }
453
454    /// Query information about the frame previously advanced with [`Self::next_frame_info`].
455    ///
456    /// Returns `None` past the end of file.
457    pub fn current_frame_info(&self) -> Option<&Frame<'static>> {
458        if self.decoder.at_eof {
459            None
460        } else {
461            Some(&self.current_frame)
462        }
463    }
464
465    /// Reads the next frame from the image.
466    ///
467    /// Do not call `Self::next_frame_info` beforehand.
468    /// Deinterlaces the result.
469    ///
470    /// You can also call `.into_iter()` on the decoder to use it as a regular iterator.
471    pub fn read_next_frame(&mut self) -> Result<Option<&Frame<'static>>, DecodingError> {
472        if self.next_frame_info()?.is_some() {
473            match self.current_frame_data_type {
474                FrameDataType::Pixels => {
475                    self.pixel_converter.read_frame(
476                        &mut self.current_frame,
477                        &mut |out| self.decoder.decode_next_bytes(out),
478                        &self.memory_limit,
479                    )?;
480                }
481                FrameDataType::Lzw { min_code_size } => {
482                    let mut vec = if matches!(self.current_frame.buffer, Cow::Owned(_)) {
483                        let mut vec =
484                            mem::replace(&mut self.current_frame.buffer, Cow::Borrowed(&[]))
485                                .into_owned();
486                        vec.clear();
487                        vec
488                    } else {
489                        Vec::new()
490                    };
491                    // Guesstimate 2bpp
492                    vec.try_reserve(
493                        usize::from(self.current_frame.width)
494                            * usize::from(self.current_frame.height)
495                            / 4,
496                    )
497                    .map_err(|_| DecodingError::OutOfMemory)?;
498                    self.copy_lzw_into_buffer(min_code_size, &mut vec)?;
499                    self.current_frame.buffer = Cow::Owned(vec);
500                }
501            }
502            Ok(Some(&self.current_frame))
503        } else {
504            Ok(None)
505        }
506    }
507
508    /// This is private for iterator's use
509    fn take_current_frame(&mut self) -> Option<Frame<'static>> {
510        if self.current_frame.buffer.is_empty() {
511            return None;
512        }
513        Some(self.current_frame.take())
514    }
515
516    /// Reads the data of the current frame into a pre-allocated buffer.
517    ///
518    /// `Self::next_frame_info` needs to be called beforehand.
519    /// The length of `buf` must be at least `Self::buffer_size`.
520    /// Deinterlaces the result.
521    pub fn read_into_buffer(&mut self, buf: &mut [u8]) -> Result<(), DecodingError> {
522        self.pixel_converter
523            .read_into_buffer(&self.current_frame, buf, &mut |out| {
524                self.decoder.decode_next_bytes(out)
525            })
526    }
527
528    fn copy_lzw_into_buffer(
529        &mut self,
530        min_code_size: u8,
531        buf: &mut Vec<u8>,
532    ) -> Result<(), DecodingError> {
533        // `write_lzw_pre_encoded_frame` smuggles `min_code_size` in the first byte.
534        buf.push(min_code_size);
535        loop {
536            match self.decoder.decode_next(&mut OutputBuffer::Vec(buf))? {
537                Some(Decoded::LzwDataCopied(_len)) => {}
538                Some(Decoded::DataEnd) => return Ok(()),
539                _ => return Err(DecodingError::format("unexpected data")),
540            }
541        }
542    }
543
544    /// Reads data of the current frame into a pre-allocated buffer until the buffer has been
545    /// filled completely.
546    ///
547    /// The buffer length must be an even number of pixels (multiple of 4 if decoding RGBA).
548    ///
549    /// `Self::next_frame_info` needs to be called beforehand. Returns `true` if the supplied
550    /// buffer could be filled completely. Should not be called after `false` had been returned.
551    pub fn fill_buffer(&mut self, buf: &mut [u8]) -> Result<bool, DecodingError> {
552        self.pixel_converter
553            .fill_buffer(&self.current_frame, buf, &mut |out| {
554                self.decoder.decode_next_bytes(out)
555            })
556    }
557
558    /// Output buffer size
559    pub fn buffer_size(&self) -> usize {
560        self.pixel_converter
561            .buffer_size(&self.current_frame)
562            .unwrap()
563    }
564
565    /// Line length of the current frame
566    pub fn line_length(&self) -> usize {
567        self.pixel_converter.line_length(&self.current_frame)
568    }
569
570    /// Returns the color palette relevant for the frame that has been decoded
571    #[inline]
572    pub fn palette(&self) -> Result<&[u8], DecodingError> {
573        Ok(match self.current_frame.palette {
574            Some(ref table) => table,
575            None => self.global_palette().ok_or_else(|| {
576                DecodingError::format("no color table available for current frame")
577            })?,
578        })
579    }
580
581    /// The global color palette
582    pub fn global_palette(&self) -> Option<&[u8]> {
583        self.pixel_converter.global_palette()
584    }
585
586    /// Width of the image
587    #[inline]
588    pub fn width(&self) -> u16 {
589        self.decoder.decoder.width()
590    }
591
592    /// Height of the image
593    #[inline]
594    pub fn height(&self) -> u16 {
595        self.decoder.decoder.height()
596    }
597
598    /// XMP metadata stored in the image.
599    #[inline]
600    #[must_use]
601    pub fn xmp_metadata(&self) -> Option<&[u8]> {
602        self.xmp_metadata.as_deref()
603    }
604
605    /// ICC profile stored in the image.
606    #[inline]
607    #[must_use]
608    pub fn icc_profile(&self) -> Option<&[u8]> {
609        self.icc_profile.as_deref()
610    }
611
612    /// Abort decoding and recover the `io::Read` instance
613    pub fn into_inner(self) -> R {
614        self.decoder.into_inner()
615    }
616
617    /// Index of the background color in the global palette
618    ///
619    /// In practice this is not used, and the background is
620    /// always transparent
621    pub fn bg_color(&self) -> Option<usize> {
622        self.bg_color.map(|v| v as usize)
623    }
624
625    /// Number of loop repetitions
626    #[inline]
627    pub fn repeat(&self) -> Repeat {
628        self.repeat
629    }
630}
631
632impl<R: Read> IntoIterator for Decoder<R> {
633    type Item = Result<Frame<'static>, DecodingError>;
634    type IntoIter = DecoderIter<R>;
635
636    #[inline]
637    fn into_iter(self) -> Self::IntoIter {
638        DecoderIter {
639            inner: self,
640            ended: false,
641        }
642    }
643}
644
645/// Use `decoder.into_iter()` to iterate over the frames
646pub struct DecoderIter<R: Read> {
647    inner: Decoder<R>,
648    ended: bool,
649}
650
651impl<R: Read> DecoderIter<R> {
652    /// Abort decoding and recover the `io::Read` instance
653    ///
654    /// Use `for frame in iter.by_ref()` to be able to call this afterwards.
655    pub fn into_inner(self) -> R {
656        self.inner.into_inner()
657    }
658}
659
660impl<R: Read> FusedIterator for DecoderIter<R> {}
661
662impl<R: Read> Iterator for DecoderIter<R> {
663    type Item = Result<Frame<'static>, DecodingError>;
664
665    fn next(&mut self) -> Option<Self::Item> {
666        if !self.ended {
667            match self.inner.read_next_frame() {
668                Ok(Some(_)) => self.inner.take_current_frame().map(Ok),
669                Ok(None) => {
670                    self.ended = true;
671                    None
672                }
673                Err(err) => {
674                    self.ended = true;
675                    Some(Err(err))
676                }
677            }
678        } else {
679            None
680        }
681    }
682}