anyreader/
format.rs

1use crate::AnyReader;
2use peekable::Peekable;
3use std::fmt::{Debug, Formatter};
4use std::io;
5use std::io::Read;
6use tracing::trace;
7
8/// A reader that contains a detected file format.
9///
10/// ## Read from compressed formats
11/// ```
12/// use anyreader::AnyFormat;
13/// let compressed_data = zstd::encode_all("hello compressed world".as_bytes(), 1).unwrap();
14/// let mut reader = AnyFormat::from_reader(compressed_data.as_slice()).unwrap();
15/// assert!(reader.kind.is_zstd());
16/// assert_eq!(std::io::read_to_string(reader).unwrap(), "hello compressed world");
17/// ```
18///
19/// ## Detect and read from compressed archive formats
20/// ```
21/// # fn make_tar_zst_archive(data: &str) -> Vec<u8> {
22/// #     let mut builder = tar::Builder::new(Vec::new());
23/// #     let mut header = tar::Header::new_gnu();
24/// #     header.set_size(data.len() as u64);
25/// #     builder.append_data(&mut header, "file-name", data.as_bytes()).unwrap();
26/// #     let tar_file = builder.into_inner().unwrap();
27/// #     zstd::encode_all(&tar_file[..], 1).unwrap()
28/// # }
29/// use anyreader::AnyFormat;
30/// let tar_gz = make_tar_zst_archive("hello tar world");
31/// let mut reader = AnyFormat::from_reader(tar_gz.as_slice()).unwrap();
32/// assert!(reader.kind.is_tar());
33/// let mut archive = tar::Archive::new(reader);
34/// let mut entry = archive.entries().unwrap().next().unwrap().unwrap();
35/// assert_eq!(std::io::read_to_string(entry).unwrap(), "hello tar world");
36/// ```
37pub struct AnyFormat<T: Read> {
38    pub kind: FormatKind,
39    reader: Peekable<AnyReader<T>>,
40}
41
42impl<T: Read> AnyFormat<T> {
43    pub fn from_reader(reader: T) -> io::Result<AnyFormat<T>> {
44        const MAX_PEEK_BUFFER_SIZE: usize = 262;
45
46        let compression_reader = AnyReader::from_reader(reader)?;
47        let format: FormatKind = (&compression_reader).into();
48        trace!(format=%format, "initial format kind detected, attempting refinement");
49        let mut reader = Peekable::with_capacity(compression_reader, MAX_PEEK_BUFFER_SIZE);
50        reader.fill_peek_buf().ok();
51        let buf = crate::peek_upto::<MAX_PEEK_BUFFER_SIZE>(&mut reader);
52        trace!("peeked {} bytes", buf.len());
53
54        let format: FormatKind = if infer::archive::is_tar(buf) {
55            FormatKind::Tar
56        } else if infer::archive::is_zip(buf) {
57            FormatKind::Zip
58        } else if infer::app::is_coff(buf)
59            || infer::app::is_elf(buf)
60            || infer::app::is_mach(buf)
61            || infer::app::is_dex(buf)
62            || infer::app::is_llvm(buf)
63            || infer::app::is_java(buf)
64            || infer::app::is_elf(buf)
65            || infer::app::is_dll(buf)
66            || infer::app::is_exe(buf)
67            || infer::app::is_wasm(buf)
68        {
69            FormatKind::Executable
70        } else {
71            format
72        };
73
74        trace!("format detected: {format:?}");
75
76        Ok(AnyFormat {
77            kind: format,
78            reader,
79        })
80    }
81
82    pub fn get_ref(&self) -> &T {
83        self.reader.get_ref().1.get_ref()
84    }
85}
86
87impl<T: Read> Debug for AnyFormat<T> {
88    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
89        f.debug_struct("AnyFormat")
90            .field("kind", &self.kind)
91            .finish()
92    }
93}
94
95impl<T: Read> Read for AnyFormat<T> {
96    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
97        self.reader.read(buf)
98    }
99}
100
101/// Supported file/compression formats.
102#[derive(
103    Debug,
104    Copy,
105    Clone,
106    Eq,
107    PartialEq,
108    Hash,
109    Default,
110    strum::EnumString,
111    strum::Display,
112    strum::EnumIs,
113)]
114#[strum(serialize_all = "lowercase", ascii_case_insensitive)]
115pub enum FormatKind {
116    /// Gzip compression
117    Gzip,
118    /// ZStandard compression
119    Zstd,
120    /// Bzip2 compression
121    Bzip2,
122    /// XZ compression
123    Xz,
124    /// Zip archive
125    Zip,
126    /// Tar archive. Note: this may be compressed with any of the
127    /// previous compression formats (i.e. tar.gz, tar.zst, ...)
128    Tar,
129    /// An executable format, such as ELF.
130    Executable,
131    /// Unknown format. This is the fallback when the format is not recognized, and
132    /// the associated [AnyFormat] will read the data as-is.
133    #[default]
134    Unknown,
135}
136
137impl<T: Read> From<&AnyReader<T>> for FormatKind {
138    /// Convert a `CompressionReader` into a `FormatKind`.
139    fn from(reader: &AnyReader<T>) -> Self {
140        match reader {
141            AnyReader::Gzip(_) => FormatKind::Gzip,
142            AnyReader::Zst(_) => FormatKind::Zstd,
143            AnyReader::Bzip2(_) => FormatKind::Bzip2,
144            AnyReader::Xz(_) => FormatKind::Xz,
145            AnyReader::Unknown(_) => FormatKind::Unknown,
146        }
147    }
148}
149
150impl<T: Read> From<AnyReader<T>> for FormatKind {
151    fn from(reader: AnyReader<T>) -> Self {
152        (&reader).into()
153    }
154}