cpio_reader/
lib.rs

1#![doc = include_str!("../README.md")]
2#![no_std]
3#![deny(unsafe_code)]
4
5use {
6    bitflags::bitflags,
7    core::{convert::TryInto, str},
8};
9
10/// Returns an iterator that iterates over each content of the given cpio file.
11///
12/// The iterator checks if the header of an entry is correct. If it is corrupt (e.g., wrong magic
13/// value), the iterator stops iterating.
14pub fn iter_files(cpio_binary: &[u8]) -> impl Iterator<Item = Entry<'_>> {
15    Iter::new(cpio_binary)
16}
17
18/// An entry of a cpio file.
19#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
20pub struct Entry<'a> {
21    dev: Option<u32>,
22    devmajor: Option<u32>,
23    devminor: Option<u32>,
24    ino: u32,
25    mode: Mode,
26    uid: u32,
27    gid: u32,
28    nlink: u32,
29    rdev: Option<u32>,
30    rdevmajor: Option<u32>,
31    rdevminor: Option<u32>,
32    mtime: u64,
33    name: &'a str,
34    file: &'a [u8],
35}
36impl<'a> Entry<'a> {
37    /// Returns the device number of the device which contained the file.
38    ///
39    /// This method returns [`None`] if the cpio file format is either New ASCII Format or New CRC
40    /// Format. For these formats, use [`Entry::devmajor`] and [`Entry::devminor`].
41    #[must_use]
42    pub fn dev(&self) -> Option<u32> {
43        self.dev
44    }
45
46    /// Returns the major number of the device which contained the file.
47    ///
48    /// This method returns [`None`] if the entry format is either Old Binary Format or Portable
49    /// ASCII Format. For these formats, use [`Entry::dev`].
50    #[must_use]
51    pub fn devmajor(&self) -> Option<u32> {
52        self.devmajor
53    }
54
55    /// Returns the minor number of the device which contained the file.
56    ///
57    /// This method returns [`None`] if the entry format is either Old Binary Format or Portable
58    /// ASCII Format. For these formats, use [`Entry::dev`].
59    #[must_use]
60    pub fn devminor(&self) -> Option<u32> {
61        self.devminor
62    }
63
64    /// Returns the inode number of the file.
65    #[must_use]
66    pub fn ino(&self) -> u32 {
67        self.ino
68    }
69
70    /// Returns the [`Mode`] value of the file, which contains the file's permission information
71    /// and file type.
72    #[must_use]
73    pub fn mode(&self) -> Mode {
74        self.mode
75    }
76
77    /// Returns the user id of the owner of the file.
78    #[must_use]
79    pub fn uid(&self) -> u32 {
80        self.uid
81    }
82
83    /// Returns the group id of the owner of the file.
84    #[must_use]
85    pub fn gid(&self) -> u32 {
86        self.gid
87    }
88
89    /// Returns the number of links to this file.
90    #[must_use]
91    pub fn nlink(&self) -> u32 {
92        self.nlink
93    }
94
95    /// Returns the associated device number if the entry is block special device or character
96    /// special device. For the other types of entries, the caller should not use this method.
97    ///
98    /// This method returns [`None`] if the entry format is either New ASCII Format or New CRC
99    /// Format. For these formats, use [`Entry::rdevmajor`] and [`Entry::rdevminor`].
100    #[must_use]
101    pub fn rdev(&self) -> Option<u32> {
102        self.rdev
103    }
104
105    /// Returns the associated device major number if the entry is block special device or
106    /// character special device. For the other types of entries, the caller should not use this
107    /// method.
108    ///
109    /// This method returns [`None`] if the entry format is either Old Binary Format or Portable
110    /// ASCII Format. For these formats, use [`Entry::rdev`].
111    #[must_use]
112    pub fn rdevmajor(&self) -> Option<u32> {
113        self.rdevmajor
114    }
115
116    /// Returns the associated device minor number if the entry is block special device or
117    /// character special device. For the other types of entries, the caller should not use this
118    /// method.
119    ///
120    /// This method returns [`None`] if the entry format is either Old Binary Format or Portable
121    /// ASCII Format. For these formats, use [`Entry::rdev`].
122    #[must_use]
123    pub fn rdevminor(&self) -> Option<u32> {
124        self.rdevminor
125    }
126
127    /// Returns the modification time of this file.
128    #[must_use]
129    pub fn mtime(&self) -> u64 {
130        self.mtime
131    }
132
133    /// Returns the filename.
134    #[must_use]
135    pub fn name(&self) -> &'a str {
136        self.name
137    }
138
139    /// Returns the content of this file.
140    ///
141    /// This method returns the path to the original file if the file is a symbolic link. For the
142    /// New ASCII Format and New CRC Format, this method returns an empty slice if the file is a
143    /// hard link and is not the last entry of the multiple duplicate files.
144    #[must_use]
145    pub fn file(&self) -> &'a [u8] {
146        self.file
147    }
148
149    fn interpret_as_old_binary(binary: &'a [u8]) -> Option<(Self, &'a [u8])> {
150        const MAGIC: u16 = 0o070_707;
151
152        let mut byte_array = ByteArray::new(binary);
153
154        let magic = [byte_array.proceed_byte()?, byte_array.proceed_byte()?];
155
156        let endianness = if u16::from_be_bytes(magic) == MAGIC {
157            Endianness::Big
158        } else if u16::from_le_bytes(magic) == MAGIC {
159            Endianness::Little
160        } else {
161            return None;
162        };
163
164        let dev = byte_array.proceed_u16(endianness)?;
165        let ino = byte_array.proceed_u16(endianness)?;
166        let mode = byte_array.proceed_u16(endianness)?;
167        let u_id = byte_array.proceed_u16(endianness)?;
168        let g_id = byte_array.proceed_u16(endianness)?;
169        let nlink = byte_array.proceed_u16(endianness)?;
170        let r_dev = byte_array.proceed_u16(endianness)?;
171        let mtime_most: u64 = byte_array.proceed_u16(endianness)?.into();
172        let mtime_least: u64 = byte_array.proceed_u16(endianness)?.into();
173        let namesize = byte_array.proceed_u16(endianness)?;
174        let filesize_most_byte: u32 = byte_array.proceed_u16(endianness)?.into();
175        let filesize_least_byte: u32 = byte_array.proceed_u16(endianness)?.into();
176
177        let filesize = (filesize_most_byte << 16) | filesize_least_byte;
178
179        if namesize == 0 {
180            return None;
181        }
182
183        let name = byte_array.proceed_str((namesize - 1).into())?;
184
185        byte_array.skip_bytes((namesize % 2 + 1).into()); // +1 for the terminating null character.
186
187        let file = byte_array.proceed_bytes(filesize.try_into().unwrap())?;
188
189        let mode = Mode::from_bits(mode.into())?;
190
191        let old_binary = Self {
192            dev: Some(dev.into()),
193            devmajor: None,
194            devminor: None,
195            ino: ino.into(),
196            mode,
197            uid: u_id.into(),
198            gid: g_id.into(),
199            nlink: nlink.into(),
200            rdev: Some(r_dev.into()),
201            rdevmajor: None,
202            rdevminor: None,
203            mtime: (mtime_most << 16) | mtime_least,
204            name,
205            file,
206        };
207
208        byte_array.skip_bytes((filesize % 2).try_into().unwrap());
209
210        Some((old_binary, byte_array.into_inner()))
211    }
212
213    fn interpret_as_portable_ascii(binary: &'a [u8]) -> Option<(Self, &'a [u8])> {
214        const MAGIC: &str = "070707";
215
216        let mut byte_array = ByteArray::new(binary);
217
218        let magic = byte_array.proceed_str(6)?;
219
220        if magic != MAGIC {
221            return None;
222        }
223
224        let dev = byte_array.proceed_str_into_octal_u32(6)?;
225        let ino = byte_array.proceed_str_into_octal_u32(6)?;
226        let mode = byte_array.proceed_str_into_octal_u32(6)?;
227        let u_id = byte_array.proceed_str_into_octal_u32(6)?;
228        let g_id = byte_array.proceed_str_into_octal_u32(6)?;
229        let nlink = byte_array.proceed_str_into_octal_u32(6)?;
230        let r_dev = byte_array.proceed_str_into_octal_u32(6)?;
231        let mtime = byte_array.proceed_str_into_octal_u64(11)?;
232        let namesize = byte_array.proceed_str_into_octal_u32(6)?;
233        let filesize = byte_array.proceed_str_into_octal_u64(11)?;
234
235        if namesize == 0 {
236            return None;
237        }
238
239        let name = byte_array.proceed_str((namesize - 1).try_into().unwrap())?;
240
241        byte_array.skip_bytes(1); // For the terminating '\0'.
242
243        let file = byte_array.proceed_bytes(filesize.try_into().unwrap())?;
244
245        let mode = Mode::from_bits(mode)?;
246
247        let portable_ascii = Self {
248            dev: Some(dev),
249            devmajor: None,
250            devminor: None,
251            ino,
252            mode,
253            uid: u_id,
254            gid: g_id,
255            nlink,
256            rdev: Some(r_dev),
257            rdevmajor: None,
258            rdevminor: None,
259            mtime,
260            name,
261            file,
262        };
263
264        Some((portable_ascii, byte_array.into_inner()))
265    }
266
267    fn interpret_as_new_ascii_or_crc(binary: &'a [u8]) -> Option<(Self, &'a [u8])> {
268        const MAGIC_NEW_ASCII: &str = "070701";
269        const MAGIC_CRC: &str = "070702";
270
271        let mut byte_array = ByteArray::new(binary);
272
273        let is_crc = match byte_array.proceed_str(6)? {
274            MAGIC_CRC => true,
275            MAGIC_NEW_ASCII => false,
276            _ => return None,
277        };
278
279        let ino = byte_array.proceed_str_into_hex()?;
280        let mode = byte_array.proceed_str_into_hex()?;
281        let u_id = byte_array.proceed_str_into_hex()?;
282        let g_id = byte_array.proceed_str_into_hex()?;
283        let nlink = byte_array.proceed_str_into_hex()?;
284        let mtime: u64 = byte_array.proceed_str_into_hex()?.into();
285        let filesize = byte_array.proceed_str_into_hex()?;
286        let devmajor = byte_array.proceed_str_into_hex()?;
287        let devminor = byte_array.proceed_str_into_hex()?;
288        let r_devmajor = byte_array.proceed_str_into_hex()?;
289        let r_devminor = byte_array.proceed_str_into_hex()?;
290        let namesize = byte_array.proceed_str_into_hex()?;
291        let check = byte_array.proceed_str_into_hex()?;
292
293        if namesize == 0 {
294            return None;
295        }
296
297        let name = byte_array.proceed_str((namesize - 1).try_into().unwrap())?;
298
299        // For the terminating `\0`.
300        byte_array.skip_bytes(1);
301
302        byte_array.skip_to_next_multiple_of_four();
303
304        let file = byte_array.proceed_bytes(filesize.try_into().unwrap())?;
305
306        let mode = Mode::from_bits(mode)?;
307
308        let checksum = file
309            .iter()
310            .fold(0_u32, |acc, &x| acc.wrapping_add(x.into()));
311
312        // Refer to line 1277, copyin.c, GNU cpio 2.13. It does not check the checksum of the
313        // symbolic files.
314        if is_crc && !mode.contains(Mode::SYMBOLIK_LINK) && (checksum != check) {
315            return None;
316        }
317
318        let new_ascii = Self {
319            ino,
320            mode,
321            uid: u_id,
322            gid: g_id,
323            nlink,
324            mtime,
325            dev: None,
326            devmajor: Some(devmajor),
327            devminor: Some(devminor),
328            rdev: None,
329            rdevmajor: Some(r_devmajor),
330            rdevminor: Some(r_devminor),
331            name,
332            file,
333        };
334
335        byte_array.skip_to_next_multiple_of_four();
336
337        Some((new_ascii, byte_array.into_inner()))
338    }
339
340    fn new(binary: &'a [u8]) -> Option<(Self, &'a [u8])> {
341        Self::interpret_as_old_binary(binary)
342            .or_else(|| Self::interpret_as_portable_ascii(binary))
343            .or_else(|| Self::interpret_as_new_ascii_or_crc(binary))
344            .filter(|(entry, _)| entry.name() != "TRAILER!!!")
345    }
346}
347
348bitflags! {
349    /// File information.
350    #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
351    pub struct Mode: u32 {
352        /// User executable bit.
353        const USER_EXECUTABLE = 0o000_100;
354        /// User writable bit.
355        const USER_WRITABLE = 0o000_200;
356        /// User readable bit.
357        const USER_READABLE = 0o000_400;
358
359        /// Group executable bit.
360        const GROUP_EXECUTABLE = 0o000_010;
361        /// Group writable bit.
362        const GROUP_WRITABLE = 0o000_020;
363        /// Group readable bit.
364        const GROUP_READABLE = 0o000_040;
365
366        /// Executable bit for the other groups.
367        const WORLD_EXECUTABLE = 0o000_001;
368        /// Writable bit for the other groups.
369        const WORLD_WRITABLE = 0o000_002;
370        /// Readable bit for the other groups.
371        const WORLD_READABLE = 0o000_004;
372
373        /// Sticky bit.
374        const STICKY = 0o001_000;
375        /// SGID bit.
376        const SGID = 0o002_000;
377        /// SUID bit.
378        const SUID = 0o004_000;
379
380        /// Named pipe or FIFO.
381        const NAMED_PIPE_FIFO = 0o010_000;
382        /// Character special device.
383        const CHARACTER_SPECIAL_DEVICE = 0o020_000;
384        /// Directory.
385        const DIRECTORY = 0o040_000;
386        /// Block special device.
387        const BLOCK_SPECIAL_DEVICE = 0o060_000;
388
389        /// Regular file.
390        const REGULAR_FILE = 0o100_000;
391        /// Symbolik link.
392        const SYMBOLIK_LINK = 0o120_000;
393        /// Socket.
394        const SOCKET = 0o140_000;
395    }
396}
397
398struct Iter<'a>(&'a [u8]);
399impl<'a> Iter<'a> {
400    fn new(binary: &'a [u8]) -> Self {
401        Self(binary)
402    }
403}
404impl<'a> Iterator for Iter<'a> {
405    type Item = Entry<'a>;
406
407    fn next(&mut self) -> Option<Self::Item> {
408        if self.0.is_empty() {
409            None
410        } else {
411            let (entry, remaining) = Entry::new(self.0)?;
412
413            self.0 = remaining;
414
415            Some(entry)
416        }
417    }
418}
419
420struct ByteArray<'a> {
421    binary: &'a [u8],
422    current: usize,
423}
424impl<'a> ByteArray<'a> {
425    fn new(binary: &'a [u8]) -> Self {
426        Self { binary, current: 0 }
427    }
428
429    fn into_inner(self) -> &'a [u8] {
430        self.binary
431    }
432
433    fn proceed_byte(&mut self) -> Option<u8> {
434        let byte = self.binary.first().copied()?;
435
436        self.skip_bytes(1);
437
438        Some(byte)
439    }
440
441    fn proceed_bytes(&mut self, n: usize) -> Option<&'a [u8]> {
442        let bytes = self.binary.get(..n)?;
443
444        self.skip_bytes(n);
445
446        Some(bytes)
447    }
448
449    fn proceed_str_into_octal_u32(&mut self, n: usize) -> Option<u32> {
450        self.proceed_str(n)
451            .and_then(|s| u32::from_str_radix(s, 8).ok())
452    }
453
454    fn proceed_str_into_octal_u64(&mut self, n: usize) -> Option<u64> {
455        self.proceed_str(n)
456            .and_then(|s| u64::from_str_radix(s, 8).ok())
457    }
458
459    fn proceed_str_into_hex(&mut self) -> Option<u32> {
460        self.proceed_str(8)
461            .and_then(|s| u32::from_str_radix(s, 16).ok())
462    }
463
464    fn proceed_str(&mut self, n: usize) -> Option<&'a str> {
465        self.proceed_bytes(n)
466            .and_then(|bytes| str::from_utf8(bytes).ok())
467    }
468
469    fn proceed_u16(&mut self, endianness: Endianness) -> Option<u16> {
470        Some(endianness.u8_array_to_u16([self.proceed_byte()?, self.proceed_byte()?]))
471    }
472
473    fn skip_to_next_multiple_of_four(&mut self) {
474        self.skip_bytes((4 - self.current % 4) % 4);
475    }
476
477    fn skip_bytes(&mut self, n: usize) {
478        self.binary = self.binary.get(n..).unwrap_or_default();
479        self.current += n;
480    }
481}
482
483#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
484enum Endianness {
485    Big,
486    Little,
487}
488impl Endianness {
489    fn u8_array_to_u16(self, bytes: [u8; 2]) -> u16 {
490        match self {
491            Self::Big => u16::from_be_bytes(bytes),
492            Self::Little => u16::from_le_bytes(bytes),
493        }
494    }
495}