rawzip/
extra_fields.rs

1use crate::{utils::le_u16, Error, ErrorKind, Header};
2use std::io::Write;
3
4/// A numeric identifier for an extra field in a Zip archive.
5///
6/// Constants defined here correspond to the IDs defined in the Zip specification.
7///
8/// See sections 4.5 and 4.6 of the Zip spec.
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10pub struct ExtraFieldId(u16);
11
12impl ExtraFieldId {
13    pub const ZIP64: Self = Self(0x0001);
14    pub const AV_INFO: Self = Self(0x0007);
15    pub const EXTENDED_LANGUAGE_ENCODING: Self = Self(0x0008);
16    pub const OS2: Self = Self(0x0009);
17    pub const NTFS: Self = Self(0x000a);
18    pub const OPENVMS: Self = Self(0x000c);
19    pub const UNIX: Self = Self(0x000d);
20    pub const FILE_STREAM_AND_FORK_DESCRIPTORS: Self = Self(0x000e);
21    pub const PATCH_DESCRIPTOR: Self = Self(0x000f);
22    pub const PKCS7_STORE: Self = Self(0x0014);
23    pub const X509_CERT_ID_AND_SIG: Self = Self(0x0015);
24    pub const X509_CERT_ID_CENTRAL_DIR: Self = Self(0x0016);
25    pub const STRONG_ENCRYPTION_HEADER: Self = Self(0x0017);
26    pub const RECORD_MANAGEMENT_CONTROLS: Self = Self(0x0018);
27    pub const PKCS7_ENCRYPTION_RECIPIENT_CERT_LIST: Self = Self(0x0019);
28    pub const TIMESTAMP_RECORD: Self = Self(0x0020);
29    pub const POLICY_DECRYPTION_KEY_RECORD: Self = Self(0x0021);
30    pub const SMARTCRYPT_KEY_PROVIDER: Self = Self(0x0022);
31    pub const SMARTCRYPT_POLICY_KEY_DATA: Self = Self(0x0023);
32    pub const IBM_S390_AS400_UNCOMPRESSED: Self = Self(0x0065);
33    pub const IBM_S390_AS400_COMPRESSED: Self = Self(0x0066);
34    pub const POSZIP_4690: Self = Self(0x4690);
35    pub const EXTENDED_TIMESTAMP: Self = Self(0x5455);
36    pub const INFO_ZIP_UNIX_ORIGINAL: Self = Self(0x5855);
37    pub const INFO_ZIP_UNIX: Self = Self(0x7855);
38    pub const INFO_ZIP_UNIX_UID_GID: Self = Self(0x7875);
39    pub const JAVA_JAR: Self = Self(0xCAFE);
40    pub const ANDROID_ZIP_ALIGNMENT: Self = Self(0xD935);
41    pub const MACINTOSH: Self = Self(0x07c8);
42    pub const ACORN_SPARKFS: Self = Self(0x4341);
43    pub const WINDOWS_NT_SECURITY_DESCRIPTOR: Self = Self(0x4653);
44    pub const AOS_VS_ACL: Self = Self(0x5356);
45    pub const INFO_ZIP_UNICODE_COMMENT: Self = Self(0x6375);
46    pub const INFO_ZIP_UNICODE_PATH: Self = Self(0x7075);
47    pub const DATA_STREAM_ALIGNMENT: Self = Self(0xa11e);
48    pub const MICROSOFT_OPEN_PACKAGING_GROWTH_HINT: Self = Self(0xa220);
49
50    /// Returns the raw `u16` value of the extra field ID.
51    #[inline]
52    pub const fn new(id: u16) -> Self {
53        Self(id)
54    }
55
56    /// Returns the raw `u16` value of the extra field ID.
57    #[inline]
58    pub const fn as_u16(self) -> u16 {
59        self.0
60    }
61}
62
63/// An iterator over extra field entries in a Zip archive.
64///
65/// This follows zip spec section 4.5 defines extensible data fields:
66///
67/// - Header ID - 2 bytes
68/// - Data Size - 2 bytes
69/// - Data - variable length
70///
71/// If the iterator encounters malformed or truncated data, it will stop
72/// yielding entries. You can check [`ExtraFields::remaining_bytes()`] after
73/// iteration to detect if any data was left unparsed.
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
75pub struct ExtraFields<'a> {
76    data: &'a [u8],
77}
78
79impl<'a> ExtraFields<'a> {
80    /// Creates a new iterator over the extra fields in the provided data slice.
81    #[inline]
82    pub fn new(data: &'a [u8]) -> Self {
83        Self { data }
84    }
85
86    /// Returns the remaining unparsed bytes in the extra field data.
87    #[inline]
88    pub fn remaining_bytes(&self) -> &'a [u8] {
89        self.data
90    }
91
92    #[inline]
93    fn next_data(&mut self) -> Option<&'a [u8]> {
94        let scratch = self.data;
95        if scratch.len() < 4 {
96            return None;
97        }
98
99        let size = le_u16(&scratch[2..4]) as usize;
100        let total_field_len = size + 4;
101        if scratch.len() < total_field_len {
102            return None;
103        }
104
105        let (body, rest) = scratch.split_at(total_field_len);
106
107        // Only advance once we have the entire entry
108        self.data = rest;
109        Some(body)
110    }
111}
112
113impl<'a> Iterator for ExtraFields<'a> {
114    type Item = (ExtraFieldId, &'a [u8]);
115
116    #[inline]
117    fn next(&mut self) -> Option<Self::Item> {
118        let next_chunk = self.next_data()?;
119        let kind = le_u16(&next_chunk[0..2]);
120        let body = &next_chunk[4..];
121        Some((ExtraFieldId(kind), body))
122    }
123}
124
125/// Container for extra fields with a shared data buffer and cached sizes.
126#[derive(Debug, Clone)]
127pub(crate) struct ExtraFieldsContainer {
128    entries: StackVec<Header, 5>,
129    data_buffer: StackVec<u8, 15>,
130    pub(crate) local_size: u16,
131    pub(crate) central_size: u16,
132}
133
134impl ExtraFieldsContainer {
135    pub fn new() -> Self {
136        Self {
137            entries: StackVec::new(Header::new(0)),
138            data_buffer: StackVec::new(0u8),
139            local_size: 0,
140            central_size: 0,
141        }
142    }
143
144    pub fn add_field(
145        &mut self,
146        id: ExtraFieldId,
147        data: &[u8],
148        location: Header,
149    ) -> Result<(), Error> {
150        let size_delta = 4 + data.len();
151        let mut current_size = 0;
152        if location.includes_local() {
153            current_size = self.local_size;
154        }
155        if location.includes_central() {
156            current_size = std::cmp::max(self.central_size, current_size);
157        }
158
159        if size_delta + (current_size as usize) > u16::MAX as usize {
160            return Err(Error::from(ErrorKind::InvalidInput {
161                msg: "extra field data too large".to_string(),
162            }));
163        }
164
165        let mut buffer = [0u8; 4];
166        buffer[0..2].copy_from_slice(&id.as_u16().to_le_bytes());
167        buffer[2..4].copy_from_slice(&(data.len() as u16).to_le_bytes());
168        self.data_buffer.extend_from_slice(&buffer);
169        self.data_buffer.extend_from_slice(data);
170        if location.includes_local() {
171            self.local_size += size_delta as u16;
172        }
173        if location.includes_central() {
174            self.central_size += size_delta as u16;
175        }
176
177        self.entries.push(location);
178        Ok(())
179    }
180
181    fn write_extra_fields_iter(
182        &self,
183        writer: &mut impl Write,
184        filter: Header,
185    ) -> Result<(), Error> {
186        let fields = self.data_buffer.as_slice();
187        let mut extra_fields = ExtraFields::new(fields);
188        let entries = self.entries.as_slice();
189        for entry in entries {
190            let extra_field = extra_fields.next_data().expect("Entry should have data");
191            let write = entry.intersects(filter);
192            if write {
193                writer.write_all(extra_field)?;
194            }
195        }
196        Ok(())
197    }
198
199    #[inline]
200    pub fn write_extra_fields(&self, writer: &mut impl Write, filter: Header) -> Result<(), Error> {
201        if filter == Header::LOCAL && self.local_size == 0 {
202            // No local fields to write
203            Ok(())
204        } else if filter == Header::CENTRAL && self.central_size == 0 {
205            // No central fields to write
206            Ok(())
207        } else if self.local_size == 0 || self.central_size == 0 {
208            // If everything is one sided, we can dump everything
209            writer.write_all(self.data_buffer.as_slice())?;
210            Ok(())
211        } else {
212            self.write_extra_fields_iter(writer, filter)
213        }
214    }
215}
216
217/// A stack-first vector that avoids heap allocation for small amounts of data.
218///
219/// A poor man's `smallvec` as we aren't able to store as many elements inline
220/// (by one byte), but it's still an extremely effective no dependency, no
221/// unsafe solution, as benchmarks showed a 33% throughput improvement when
222/// writing out files with timestamps.
223#[derive(Debug, Clone)]
224pub(crate) enum StackVec<T, const N: usize>
225where
226    T: Copy + Clone,
227{
228    /// Inline storage for up to N elements
229    Small { data: [T; N], len: u8 },
230    /// Heap storage for more elements
231    Large(Vec<T>),
232}
233
234impl<T, const N: usize> StackVec<T, N>
235where
236    T: Copy + Clone,
237{
238    pub fn new(default_val: T) -> Self {
239        Self::Small {
240            data: [default_val; N],
241            len: 0,
242        }
243    }
244
245    pub fn push(&mut self, item: T) {
246        match self {
247            Self::Small { data, len } => {
248                if (*len as usize) < N {
249                    // Still fits in small storage
250                    data[*len as usize] = item;
251                    *len += 1;
252                } else {
253                    // Need to promote to large storage
254                    let mut vec = Vec::with_capacity(N + 1);
255                    vec.extend_from_slice(&data[..N]);
256                    vec.push(item);
257                    *self = Self::Large(vec);
258                }
259            }
260            Self::Large(vec) => {
261                vec.push(item);
262            }
263        }
264    }
265
266    pub fn as_slice(&self) -> &[T] {
267        match self {
268            Self::Small { data, len } => &data[..*len as usize],
269            Self::Large(vec) => vec.as_slice(),
270        }
271    }
272}
273
274// Specialized methods for StackVec<u8, N> (byte buffers)
275impl<const N: usize> StackVec<u8, N> {
276    pub fn extend_from_slice(&mut self, slice: &[u8]) {
277        match self {
278            Self::Small { data, len } => {
279                let current_len = *len as usize;
280                let end = current_len + slice.len();
281                if end <= N {
282                    data[current_len..current_len + slice.len()].copy_from_slice(slice);
283                    *len += slice.len() as u8;
284                } else {
285                    // Need to promote to large buffer
286                    let mut vec = Vec::with_capacity(current_len + slice.len());
287                    vec.extend_from_slice(&data[..current_len]);
288                    vec.extend_from_slice(slice);
289                    *self = Self::Large(vec);
290                }
291            }
292            Self::Large(vec) => {
293                vec.extend_from_slice(slice);
294            }
295        }
296    }
297}
298
299#[derive(Debug)]
300pub enum StackVecIter<'a, T, const N: usize>
301where
302    T: Copy + Clone,
303{
304    Small {
305        data: &'a [T; N],
306        len: u8,
307        index: u8,
308    },
309    Large(std::slice::Iter<'a, T>),
310}
311
312impl<'a, T, const N: usize> Iterator for StackVecIter<'a, T, N>
313where
314    T: Copy + Clone,
315{
316    type Item = &'a T;
317
318    fn next(&mut self) -> Option<Self::Item> {
319        match self {
320            Self::Small { data, len, index } => {
321                if *index < *len {
322                    let result = &data[*index as usize];
323                    *index += 1;
324                    Some(result)
325                } else {
326                    None
327                }
328            }
329            Self::Large(iter) => iter.next(),
330        }
331    }
332}
333
334#[cfg(test)]
335mod tests {
336    use std::io::Cursor;
337
338    use super::*;
339
340    #[test]
341    fn test_partial_parsing_with_remaining_bytes() {
342        let data = [0x55, 0x54, 0x01, 0x00, 0xFF, 0x01, 0x00, 0x05];
343        let mut iter = ExtraFields::new(&data);
344        assert_eq!(iter.remaining_bytes(), &data);
345
346        let (id, body) = iter.next().unwrap();
347        assert_eq!(id, ExtraFieldId::EXTENDED_TIMESTAMP);
348        assert_eq!(body, &[0xFF]);
349
350        assert_eq!(iter.next(), None);
351        assert_eq!(iter.remaining_bytes(), &[0x01, 0x00, 0x05]);
352    }
353
354    #[test]
355    fn test_unknown_field_id() {
356        let data = [0xFF, 0xFF, 0x02, 0x00, 0xDE, 0xAD];
357        let mut iter = ExtraFields::new(&data);
358
359        let (id, body) = iter.next().unwrap();
360        assert_eq!(id, ExtraFieldId(0xFFFF));
361        assert_eq!(body, &[0xDE, 0xAD]);
362
363        assert_eq!(iter.next(), None);
364    }
365
366    #[test]
367    fn test_stack_vec_u8_inline_operations() {
368        let mut buf = StackVec::<u8, 4>::new(0);
369        assert_eq!(buf.as_slice(), &[]);
370
371        buf.push(1);
372        assert_eq!(buf.as_slice(), &[1]);
373
374        buf.extend_from_slice(&[2, 3]);
375        assert_eq!(buf.as_slice(), &[1, 2, 3]);
376    }
377
378    #[test]
379    fn test_stack_vec_u8_promote_to_heap() {
380        let mut buf = StackVec::<u8, 2>::new(0);
381
382        // Fill inline capacity
383        buf.extend_from_slice(&[1, 2]);
384        assert_eq!(buf.as_slice(), &[1, 2]);
385
386        // Force promotion to heap
387        buf.extend_from_slice(&[3, 4, 5]);
388        assert_eq!(buf.as_slice(), &[1, 2, 3, 4, 5]);
389
390        buf.push(6);
391        assert_eq!(buf.as_slice(), &[1, 2, 3, 4, 5, 6]);
392    }
393
394    #[test]
395    fn test_stack_vec_size_constraints() {
396        // Test that StackVec for bytes is same size as Vec
397        assert!(
398            std::mem::size_of::<StackVec<u8, 15>>() <= 24,
399            "StackVec should not exceed Vec size on 64 bits"
400        );
401    }
402
403    #[test]
404    fn test_stack_vec_clone() {
405        let mut buf = StackVec::<u8, 2>::new(0);
406        buf.extend_from_slice(&[1, 2, 3]); // Force heap promotion
407
408        let cloned = buf.clone();
409        assert_eq!(buf.as_slice(), cloned.as_slice());
410    }
411
412    fn round_trip_extra_fields(fields: &[(ExtraFieldId, &[u8], Header)]) {
413        let mut container = ExtraFieldsContainer::new();
414
415        for (id, data, location) in fields {
416            container.add_field(*id, data, *location).unwrap();
417        }
418
419        for location in [Header::LOCAL, Header::CENTRAL] {
420            let mut cursor = Cursor::new(Vec::new());
421            container.write_extra_fields(&mut cursor, location).unwrap();
422
423            let written_fields = fields
424                .iter()
425                .filter(|&&(_, _, loc)| loc == location)
426                .map(|&(id, data, _)| (id, data))
427                .collect::<Vec<_>>();
428            let read_fields = ExtraFields::new(cursor.get_ref()).collect::<Vec<_>>();
429
430            assert_eq!(written_fields, read_fields);
431        }
432    }
433
434    #[test]
435    fn test_extra_fields() {
436        // Only local extra fields
437        round_trip_extra_fields(&[
438            (ExtraFieldId::new(0), &[0u8; 16], Header::LOCAL),
439            (ExtraFieldId::new(1), &[1u8; 16], Header::LOCAL),
440        ]);
441
442        // Only central extra fields
443        round_trip_extra_fields(&[
444            (ExtraFieldId::new(0), &[0u8; 16], Header::CENTRAL),
445            (ExtraFieldId::new(1), &[1u8; 16], Header::CENTRAL),
446        ]);
447
448        // Mixed extra fields where the local and central sizes are the same
449        round_trip_extra_fields(&[
450            (ExtraFieldId::new(0), &[0u8; 16], Header::CENTRAL),
451            (ExtraFieldId::new(1), &[1u8; 16], Header::LOCAL),
452        ]);
453    }
454}