Skip to main content

tar_framing/
pax.rs

1//! PAX record parsing, active-global updates, and per-member metadata state.
2
3use std::{
4    collections::{HashMap, hash_map::Entry},
5    fmt,
6    str::FromStr,
7    sync::Arc,
8};
9
10use super::PaxKind;
11
12const UTF8_HDRCHARSET: &str = "ISO-IR 10646 2000 UTF-8";
13const BINARY_HDRCHARSET: &str = "BINARY";
14
15/// An error encountered while parsing pax extended-header records.
16#[derive(Debug, thiserror::Error)]
17pub enum PaxError {
18    /// A pax payload did not consist of valid extended-header records.
19    #[error("invalid pax records: {reason}")]
20    InvalidRecords {
21        /// A concise description of the grammar violation.
22        reason: &'static str,
23    },
24    /// A pax text component that must be UTF-8 is not valid UTF-8.
25    #[error("pax records contain invalid UTF-8 text")]
26    InvalidUtf8,
27    /// A pax record keyword is neither standard nor an accepted namespaced extension.
28    #[error("invalid or unknown pax keyword {keyword:?}")]
29    InvalidKeyword {
30        /// The rejected keyword.
31        keyword: String,
32    },
33    /// A pax decimal integer field is malformed or exceeds this API's integer range.
34    #[error("invalid pax {keyword} value: {value:?}")]
35    InvalidInteger {
36        /// The affected standard keyword.
37        keyword: &'static str,
38        /// The rejected textual value.
39        value: String,
40    },
41    /// A pax file-time value is malformed or exceeds this API's integer range.
42    #[error("invalid pax {keyword} time value: {value:?}")]
43    InvalidTime {
44        /// The affected standard keyword.
45        keyword: &'static str,
46        /// The rejected textual value.
47        value: String,
48    },
49    /// A pax `hdrcharset` record requests text encoding unsupported by this API.
50    #[error("unsupported pax hdrcharset value {value:?}")]
51    UnsupportedCharset {
52        /// The unsupported character-set identifier.
53        value: String,
54    },
55    /// A pax record length or offset overflowed.
56    #[error("arithmetic overflow while computing {context}")]
57    ArithmeticOverflow {
58        /// The computation that overflowed.
59        context: &'static str,
60    },
61}
62
63pub(crate) type SharedPaxRecords = Arc<PaxRecords>;
64
65#[derive(Clone, Debug, Default, Eq, PartialEq)]
66pub(crate) struct PaxRecords(Vec<PaxRecord>);
67
68/// An owned, hashable pax extended-header keyword.
69#[derive(Clone, Debug, Eq, Hash, PartialEq)]
70pub enum PaxKeyword {
71    /// File access time.
72    Atime,
73    /// Encoding of the following member's file data.
74    Charset,
75    /// Uninterpreted archive comment.
76    Comment,
77    /// File status-change time compatibility extension.
78    Ctime,
79    /// Numeric group identifier.
80    Gid,
81    /// Group name.
82    Gname,
83    /// Encoding of pathname and user/group-name values.
84    HdrCharset,
85    /// Link pathname.
86    LinkPath,
87    /// File modification time.
88    Mtime,
89    /// Member pathname.
90    Path,
91    /// Reserved `realtime.*` attribute.
92    Realtime(Arc<str>),
93    /// Reserved `security.*` attribute.
94    Security(Arc<str>),
95    /// Member payload size.
96    Size,
97    /// Numeric user identifier.
98    Uid,
99    /// User name.
100    Uname,
101    /// An implementation extension in a `vendor.keyword` namespace.
102    Vendor {
103        /// Vendor or organization identifier.
104        vendor: Arc<str>,
105        /// Keyword suffix after the vendor namespace.
106        name: Arc<str>,
107    },
108}
109
110impl PaxKeyword {
111    pub(crate) fn components(&self) -> (&str, Option<&str>) {
112        match self {
113            Self::Atime => ("atime", None),
114            Self::Charset => ("charset", None),
115            Self::Comment => ("comment", None),
116            Self::Ctime => ("ctime", None),
117            Self::Gid => ("gid", None),
118            Self::Gname => ("gname", None),
119            Self::HdrCharset => ("hdrcharset", None),
120            Self::LinkPath => ("linkpath", None),
121            Self::Mtime => ("mtime", None),
122            Self::Path => ("path", None),
123            Self::Realtime(name) => ("realtime", Some(name)),
124            Self::Security(name) => ("security", Some(name)),
125            Self::Size => ("size", None),
126            Self::Uid => ("uid", None),
127            Self::Uname => ("uname", None),
128            Self::Vendor { vendor, name } => (vendor, Some(name)),
129        }
130    }
131}
132
133impl fmt::Display for PaxKeyword {
134    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
135        let (namespace, name) = self.components();
136        formatter.write_str(namespace)?;
137        if let Some(name) = name {
138            formatter.write_str(".")?;
139            formatter.write_str(name)?;
140        }
141        Ok(())
142    }
143}
144
145/// Like [`PaxRecords`], but with an additional index of `keyword -> effective record index`
146/// to keep lookups cheap, even across pathological pax archives (e.g. multiple
147/// global extensions being merged together).
148#[derive(Debug, Default, Eq, PartialEq)]
149pub(crate) struct GlobalPaxRecords {
150    records: PaxRecords,
151    indices: HashMap<PaxKeyword, usize>,
152}
153
154impl GlobalPaxRecords {
155    fn apply(&mut self, updates: &PaxRecords) {
156        for update in updates.as_slice() {
157            match self.indices.entry(update.keyword()) {
158                Entry::Occupied(entry) => self.records.0[*entry.get()] = update.clone(),
159                Entry::Vacant(entry) => {
160                    let index = self.records.0.len();
161                    self.records.0.push(update.clone());
162                    entry.insert(index);
163                }
164            }
165        }
166    }
167
168    fn get(&self, keyword: &PaxKeyword) -> Option<&PaxRecord> {
169        self.indices
170            .get(keyword)
171            .and_then(|index| self.records.as_slice().get(*index))
172    }
173
174    pub(super) fn hdrcharset(&self) -> HdrCharset {
175        self.get(&PaxKeyword::HdrCharset)
176            .and_then(|record| match record {
177                PaxRecord::HdrCharset(value) => Some(value),
178                _ => None,
179            })
180            .map_or(HdrCharset::Utf8, |value| match value {
181                PaxValue::Value(value) => *value,
182                PaxValue::Deleted => HdrCharset::Utf8,
183            })
184    }
185}
186
187/// One positioned parsed pax extended header.
188#[derive(Clone, Debug, Eq, PartialEq)]
189pub struct PaxExtension {
190    /// The absolute byte position of the pax extension header block.
191    pub position: u64,
192    /// Whether this extension has local or global scope.
193    pub kind: PaxKind,
194    records: SharedPaxRecords,
195}
196
197impl PaxExtension {
198    pub(crate) fn new(position: u64, kind: PaxKind, records: SharedPaxRecords) -> Self {
199        Self {
200            position,
201            kind,
202            records,
203        }
204    }
205
206    /// Returns the parsed pax records in archive order.
207    pub fn records(&self) -> &[PaxRecord] {
208        self.records.as_slice()
209    }
210}
211
212/// Unified pax metadata state applicable to one ordinary member.
213///
214/// Effective values apply local records over the active global state using
215/// standard last-record-wins and deletion semantics. [`Self::extensions`]
216/// retains the positioned extension headers newly encountered for this member.
217/// The effective global state is borrowed from the originating logical reader,
218/// so retaining this view also prevents that reader from advancing to another
219/// member whose global state could differ.
220#[derive(Clone, Debug, Eq, PartialEq)]
221pub struct PaxState<'global> {
222    global_records: Option<&'global GlobalPaxRecords>,
223    global_extensions: Vec<PaxExtension>,
224    local_extension: Option<PaxExtension>,
225}
226
227impl<'global> PaxState<'global> {
228    pub(crate) fn new(
229        global_records: Option<&'global GlobalPaxRecords>,
230        global_extensions: Vec<PaxExtension>,
231        local_extension: Option<PaxExtension>,
232    ) -> Self {
233        Self {
234            global_records,
235            global_extensions,
236            local_extension,
237        }
238    }
239
240    /// Returns positioned extensions newly encountered for this member.
241    ///
242    /// Global extensions are yielded in source order, followed by the optional
243    /// local extension.
244    pub fn extensions(&self) -> impl Iterator<Item = &PaxExtension> {
245        self.global_extensions
246            .iter()
247            .chain(self.local_extension.iter())
248    }
249
250    /// Returns the final applicable record for `keyword`, including deletions.
251    pub fn effective_record(&self, keyword: &PaxKeyword) -> Option<&PaxRecord> {
252        let local_records = self
253            .local_extension
254            .as_ref()
255            .map(|extension| extension.records.as_ref());
256        Self::effective_record_from(local_records, self.global_records, keyword)
257    }
258
259    pub(super) fn effective_size<'records>(
260        local_records: Option<&'records PaxRecords>,
261        global_records: Option<&'records GlobalPaxRecords>,
262    ) -> Option<&'records PaxValue<u64>> {
263        Self::effective_record_from(local_records, global_records, &PaxKeyword::Size).and_then(
264            |record| match record {
265                PaxRecord::Size(value) => Some(value),
266                _ => None,
267            },
268        )
269    }
270
271    pub(super) fn effective_record_from<'records>(
272        local_records: Option<&'records PaxRecords>,
273        global_records: Option<&'records GlobalPaxRecords>,
274        keyword: &PaxKeyword,
275    ) -> Option<&'records PaxRecord> {
276        local_records
277            .and_then(|records| records.get(keyword))
278            .or_else(|| global_records.and_then(|records| records.get(keyword)))
279    }
280}
281
282/// A character encoding for PAX pathname and user/group-name values.
283#[derive(Clone, Copy, Debug, Eq, PartialEq)]
284pub enum HdrCharset {
285    /// UTF-8 extended-header text.
286    Utf8,
287    /// Unencoded bytes copied from the originating system.
288    Binary,
289}
290
291impl FromStr for HdrCharset {
292    type Err = String;
293
294    fn from_str(value: &str) -> Result<Self, Self::Err> {
295        match value {
296            UTF8_HDRCHARSET => Ok(Self::Utf8),
297            BINARY_HDRCHARSET => Ok(Self::Binary),
298            _ => Err(value.to_owned()),
299        }
300    }
301}
302
303/// A character value governed by the effective PAX [`HdrCharset`].
304#[derive(Clone, Debug, Eq, PartialEq)]
305pub enum PaxString {
306    /// A value declared or defaulted to UTF-8.
307    Utf8(Arc<str>),
308    /// A value declared as unencoded binary bytes.
309    Binary(Arc<[u8]>),
310}
311
312/// A parsed pax value, including an explicit deletion tombstone.
313///
314/// Deletion tombstones are needed because pax has special semantics for
315/// empty (i.e. deleted) pax records: they're considered to delete
316/// "any header block field, previously entered extended header value, or global
317/// extended header value of the same name."
318///
319/// This is a distinct state from "missing," which allows for fallbacks to
320/// e.g. global pax headers or the equivalent ustar field.
321#[derive(Clone, Debug, Eq, PartialEq)]
322pub enum PaxValue<T> {
323    /// This record sets or overrides the attribute.
324    Value(T),
325    /// This record deletes the attribute from its applicable scope.
326    Deleted,
327}
328
329impl<T: FromStr> FromStr for PaxValue<T> {
330    type Err = T::Err;
331
332    fn from_str(value: &str) -> Result<Self, Self::Err> {
333        if value.is_empty() {
334            Ok(Self::Deleted)
335        } else {
336            value.parse().map(Self::Value)
337        }
338    }
339}
340
341impl<T> PaxValue<T> {
342    fn parse_utf8(value: &[u8]) -> Result<&str, PaxError> {
343        std::str::from_utf8(value).map_err(|_| PaxError::InvalidUtf8)
344    }
345}
346
347/// A parsed pax extended-header record.
348#[derive(Clone, Debug, Eq, PartialEq)]
349pub enum PaxRecord {
350    /// File access time in integral seconds; fractional seconds are discarded.
351    Atime(PaxValue<u64>),
352    /// Encoding of the following member's file data.
353    // TODO: Consider enforcing known values here, similarly to what we do for `hdrcharset`.
354    Charset(PaxValue<Arc<str>>),
355    /// An uninterpreted archive comment.
356    Comment(PaxValue<Arc<str>>),
357    /// File status-change time compatibility extension in integral seconds.
358    ///
359    /// NOTE: newer versions of the pax spec don't include this record.
360    /// We support it for backwards compatibility.
361    ///
362    /// See: <https://www.opengroup.org/austin/aardvark/finaltext/xcubug.txt>
363    /// See: <https://www.opengroup.org/austin/docs/austin_166.txt>
364    /// See: <https://www.opengroup.org/austin/docs/austin_206.txt>
365    Ctime(PaxValue<u64>),
366    /// Numeric group identifier.
367    Gid(PaxValue<u64>),
368    /// Group name encoded according to the effective [`HdrCharset`].
369    Gname(PaxValue<PaxString>),
370    /// Encoding of pathname and user/group-name extended-header values.
371    HdrCharset(PaxValue<HdrCharset>),
372    /// Link pathname encoded according to the effective [`HdrCharset`].
373    LinkPath(PaxValue<PaxString>),
374    /// File modification time in integral seconds; fractional seconds are discarded.
375    Mtime(PaxValue<u64>),
376    /// Member pathname encoded according to the effective [`HdrCharset`].
377    Path(PaxValue<PaxString>),
378    /// A reserved `realtime.*` extended attribute.
379    Realtime {
380        /// Keyword suffix after `realtime.`.
381        name: Arc<str>,
382        /// Attribute value or deletion tombstone.
383        value: PaxValue<Arc<str>>,
384    },
385    /// A reserved `security.*` extended attribute.
386    Security {
387        /// Keyword suffix after `security.`.
388        name: Arc<str>,
389        /// Attribute value or deletion tombstone.
390        value: PaxValue<Arc<str>>,
391    },
392    /// Member payload size in octets.
393    Size(PaxValue<u64>),
394    /// Numeric user identifier.
395    Uid(PaxValue<u64>),
396    /// User name encoded according to the effective [`HdrCharset`].
397    Uname(PaxValue<PaxString>),
398    /// An implementation extension in a `vendor.keyword` namespace.
399    Vendor {
400        /// Vendor or organization identifier.
401        vendor: Arc<str>,
402        /// Keyword suffix after the vendor namespace.
403        name: Arc<str>,
404        /// Attribute value or deletion tombstone.
405        value: PaxValue<Arc<str>>,
406    },
407}
408
409impl PaxRecord {
410    /// Returns this record's typed pax keyword.
411    pub fn keyword(&self) -> PaxKeyword {
412        match self {
413            Self::Atime(_) => PaxKeyword::Atime,
414            Self::Charset(_) => PaxKeyword::Charset,
415            Self::Comment(_) => PaxKeyword::Comment,
416            Self::Ctime(_) => PaxKeyword::Ctime,
417            Self::Gid(_) => PaxKeyword::Gid,
418            Self::Gname(_) => PaxKeyword::Gname,
419            Self::HdrCharset(_) => PaxKeyword::HdrCharset,
420            Self::LinkPath(_) => PaxKeyword::LinkPath,
421            Self::Mtime(_) => PaxKeyword::Mtime,
422            Self::Path(_) => PaxKeyword::Path,
423            Self::Realtime { name, .. } => PaxKeyword::Realtime(Arc::clone(name)),
424            Self::Security { name, .. } => PaxKeyword::Security(Arc::clone(name)),
425            Self::Size(_) => PaxKeyword::Size,
426            Self::Uid(_) => PaxKeyword::Uid,
427            Self::Uname(_) => PaxKeyword::Uname,
428            Self::Vendor { vendor, name, .. } => PaxKeyword::Vendor {
429                vendor: Arc::clone(vendor),
430                name: Arc::clone(name),
431            },
432        }
433    }
434
435    fn parse(keyword: &str, value: &[u8], hdrcharset: HdrCharset) -> Result<Self, PaxError> {
436        match keyword {
437            "atime" => PaxValue::parse_time("atime", value).map(Self::Atime),
438            "charset" => PaxValue::parse_text(value).map(Self::Charset),
439            "comment" => PaxValue::parse_text(value).map(Self::Comment),
440            "ctime" => PaxValue::parse_time("ctime", value).map(Self::Ctime),
441            "gid" => PaxValue::parse_integer("gid", value).map(Self::Gid),
442            "gname" => PaxValue::parse_string(value, hdrcharset).map(Self::Gname),
443            "hdrcharset" => PaxValue::parse_hdrcharset(value).map(Self::HdrCharset),
444            "linkpath" => PaxValue::parse_string(value, hdrcharset).map(Self::LinkPath),
445            "mtime" => PaxValue::parse_time("mtime", value).map(Self::Mtime),
446            "path" => PaxValue::parse_string(value, hdrcharset).map(Self::Path),
447            "size" => PaxValue::parse_integer("size", value).map(Self::Size),
448            "uid" => PaxValue::parse_integer("uid", value).map(Self::Uid),
449            "uname" => PaxValue::parse_string(value, hdrcharset).map(Self::Uname),
450            _ => Self::parse_namespaced(keyword, value),
451        }
452    }
453
454    fn parse_namespaced(keyword: &str, value: &[u8]) -> Result<Self, PaxError> {
455        let invalid = || PaxError::InvalidKeyword {
456            keyword: keyword.to_owned(),
457        };
458        let (namespace, name) = match keyword.split_once('.') {
459            Some((namespace, name)) if !name.is_empty() => (namespace, name),
460            _ => return Err(invalid()),
461        };
462        match namespace {
463            "realtime" => Ok(Self::Realtime {
464                name: Arc::from(name),
465                value: PaxValue::parse_text(value)?,
466            }),
467            "security" => Ok(Self::Security {
468                name: Arc::from(name),
469                value: PaxValue::parse_text(value)?,
470            }),
471            vendor if !vendor.is_empty() => Ok(Self::Vendor {
472                vendor: Arc::from(vendor),
473                name: Arc::from(name),
474                value: PaxValue::parse_text(value)?,
475            }),
476            _ => Err(invalid()),
477        }
478    }
479}
480
481impl PaxRecords {
482    pub(crate) fn as_slice(&self) -> &[PaxRecord] {
483        &self.0
484    }
485
486    pub(super) fn parse(
487        payload: &[u8],
488        inherited_hdrcharset: HdrCharset,
489    ) -> Result<Self, PaxError> {
490        if payload.is_empty() {
491            return Err(PaxError::InvalidRecords {
492                reason: "local extended header payload contains no records",
493            });
494        }
495
496        let mut records = Vec::new();
497        let mut cursor = 0;
498        while cursor < payload.len() {
499            let length_end = payload[cursor..]
500                .iter()
501                .position(|byte| *byte == b' ')
502                .ok_or(PaxError::InvalidRecords {
503                    reason: "record is missing its length separator",
504                })?
505                + cursor;
506            if length_end == cursor {
507                return Err(PaxError::InvalidRecords {
508                    reason: "record length is empty",
509                });
510            }
511            let record_len = std::str::from_utf8(&payload[cursor..length_end])
512                .ok()
513                .and_then(decimal_u64)
514                .ok_or(PaxError::InvalidRecords {
515                    reason: "record length is not a valid decimal integer",
516                })?;
517            let record_len =
518                usize::try_from(record_len).map_err(|_| PaxError::ArithmeticOverflow {
519                    context: "pax record length",
520                })?;
521            let record_end =
522                cursor
523                    .checked_add(record_len)
524                    .ok_or(PaxError::ArithmeticOverflow {
525                        context: "pax record end",
526                    })?;
527            if record_end > payload.len() {
528                return Err(PaxError::InvalidRecords {
529                    reason: "record length exceeds extended header payload",
530                });
531            }
532            let record = &payload[cursor..record_end];
533            if record.last() != Some(&b'\n') {
534                return Err(PaxError::InvalidRecords {
535                    reason: "record is not newline terminated",
536                });
537            }
538            let content_start = length_end - cursor + 1;
539            let equals = record[content_start..record.len() - 1]
540                .iter()
541                .position(|byte| *byte == b'=')
542                .ok_or(PaxError::InvalidRecords {
543                    reason: "record is missing its keyword/value separator",
544                })?
545                + content_start;
546            if equals == content_start {
547                return Err(PaxError::InvalidRecords {
548                    reason: "record keyword is empty",
549                });
550            }
551            let keyword = std::str::from_utf8(&record[content_start..equals])
552                .map_err(|_| PaxError::InvalidUtf8)?;
553            records.push((keyword, &record[equals + 1..record.len() - 1]));
554            cursor = record_end;
555        }
556
557        // Per pax spec: the `gname`, `linkpath`, `path`, and `uname` records
558        // are encoded according to `hdrcharset`, so we need to first parse
559        // it (or take it from a parent global pax header) before we can parse
560        // the other pax records, regardless of order.
561        //
562        // See: pax spec, "pax Extended Header"
563        let hdrcharset = Self::resolve_hdrcharset(&records, inherited_hdrcharset)?;
564        records
565            .into_iter()
566            .map(|(keyword, value)| PaxRecord::parse(keyword, value, hdrcharset))
567            .collect::<Result<Vec<_>, _>>()
568            .map(Self)
569    }
570
571    fn resolve_hdrcharset(
572        records: &[(&str, &[u8])],
573        inherited: HdrCharset,
574    ) -> Result<HdrCharset, PaxError> {
575        let mut hdrcharset = inherited;
576        // TODO: Consider finding the last `hdrcharset` with a reverse search to avoid parsing
577        // shadowed values here. All records would still be validated during typed parsing.
578        for (keyword, value) in records {
579            if *keyword == "hdrcharset" {
580                hdrcharset = match PaxValue::parse_hdrcharset(value)? {
581                    PaxValue::Value(value) => value,
582                    PaxValue::Deleted => HdrCharset::Utf8,
583                };
584            }
585        }
586        Ok(hdrcharset)
587    }
588
589    fn get(&self, keyword: &PaxKeyword) -> Option<&PaxRecord> {
590        self.0
591            .iter()
592            .rev()
593            .find(|record| record.keyword() == *keyword)
594    }
595
596    pub(super) fn apply_global(&self, active: &mut Option<GlobalPaxRecords>) {
597        active.get_or_insert_default().apply(self);
598    }
599}
600
601impl PaxValue<Arc<str>> {
602    fn parse_text(value: &[u8]) -> Result<Self, PaxError> {
603        Self::parse_utf8(value).map(|value| match value {
604            "" => Self::Deleted,
605            value => Self::Value(Arc::from(value)),
606        })
607    }
608}
609
610impl PaxValue<PaxString> {
611    /// Parses a pax "string", taking the effective [`HdrCharset`] into account.
612    fn parse_string(value: &[u8], hdrcharset: HdrCharset) -> Result<Self, PaxError> {
613        if value.is_empty() {
614            return Ok(Self::Deleted);
615        }
616        match hdrcharset {
617            HdrCharset::Utf8 => Self::parse_utf8(value)
618                .map(Arc::from)
619                .map(PaxString::Utf8)
620                .map(Self::Value),
621            HdrCharset::Binary => Ok(Self::Value(PaxString::Binary(Arc::from(value)))),
622        }
623    }
624}
625
626impl PaxValue<HdrCharset> {
627    fn parse_hdrcharset(value: &[u8]) -> Result<Self, PaxError> {
628        let value = Self::parse_utf8(value)?;
629        value
630            .parse()
631            .map_err(|value| PaxError::UnsupportedCharset { value })
632    }
633}
634
635impl PaxValue<u64> {
636    fn parse_integer(keyword: &'static str, value: &[u8]) -> Result<Self, PaxError> {
637        let value = Self::parse_utf8(value)?;
638        if value.is_empty() {
639            return Ok(Self::Deleted);
640        }
641
642        decimal_u64(value)
643            .map(Self::Value)
644            .ok_or_else(|| PaxError::InvalidInteger {
645                keyword,
646                value: value.to_owned(),
647            })
648    }
649
650    fn parse_time(keyword: &'static str, value: &[u8]) -> Result<Self, PaxError> {
651        let value = Self::parse_utf8(value)?;
652        if value.is_empty() {
653            return Ok(Self::Deleted);
654        }
655
656        let invalid = || PaxError::InvalidTime {
657            keyword,
658            value: value.to_owned(),
659        };
660        let seconds = match value.split_once('.') {
661            Some((seconds, fractional_digits))
662                if !fractional_digits.is_empty()
663                    && fractional_digits.bytes().all(|byte| byte.is_ascii_digit()) =>
664            {
665                seconds
666            }
667            Some(_) => return Err(invalid()),
668            None => value,
669        };
670        decimal_u64(seconds).map(Self::Value).ok_or_else(invalid)
671    }
672}
673
674fn decimal_u64(value: &str) -> Option<u64> {
675    if value.starts_with('+') {
676        return None;
677    }
678    value.parse().ok()
679}
680
681#[cfg(test)]
682mod tests {
683    use std::ptr;
684
685    use super::*;
686    use crate::test_support::{raw_record, record};
687
688    fn text(value: &str) -> Arc<str> {
689        Arc::from(value)
690    }
691
692    fn comment(value: &str) -> PaxRecord {
693        PaxRecord::Comment(PaxValue::Value(text(value)))
694    }
695
696    fn utf8(value: &str) -> PaxString {
697        PaxString::Utf8(text(value))
698    }
699
700    fn binary(value: &[u8]) -> PaxString {
701        PaxString::Binary(Arc::from(value))
702    }
703
704    fn vendor(name: &str, value: &str) -> PaxRecord {
705        PaxRecord::Vendor {
706            vendor: text("Acme"),
707            name: text(name),
708            value: PaxValue::Value(text(value)),
709        }
710    }
711
712    fn security(value: &str) -> PaxRecord {
713        PaxRecord::Security {
714            name: text("label"),
715            value: PaxValue::Value(text(value)),
716        }
717    }
718
719    fn global_state(records: Vec<PaxRecord>) -> Option<GlobalPaxRecords> {
720        let mut active = None;
721        PaxRecords(records).apply_global(&mut active);
722        active
723    }
724
725    fn extension(position: u64, kind: PaxKind, records: Vec<PaxRecord>) -> PaxExtension {
726        PaxExtension::new(position, kind, Arc::new(PaxRecords(records)))
727    }
728
729    #[test]
730    fn resolves_state_precedence_and_preserves_extension_order() {
731        struct Case {
732            name: &'static str,
733            global: Vec<PaxRecord>,
734            local: Option<Vec<PaxRecord>>,
735            expected: Option<PaxRecord>,
736        }
737
738        for case in [
739            Case {
740                name: "missing",
741                global: Vec::new(),
742                local: None,
743                expected: None,
744            },
745            Case {
746                name: "global",
747                global: vec![comment("global")],
748                local: None,
749                expected: Some(comment("global")),
750            },
751            Case {
752                name: "local overrides global",
753                global: vec![comment("global")],
754                local: Some(vec![comment("local")]),
755                expected: Some(comment("local")),
756            },
757            Case {
758                name: "last local duplicate wins",
759                global: Vec::new(),
760                local: Some(vec![comment("first"), comment("last")]),
761                expected: Some(comment("last")),
762            },
763            Case {
764                name: "local deletion suppresses global",
765                global: vec![comment("global")],
766                local: Some(vec![PaxRecord::Comment(PaxValue::Deleted)]),
767                expected: Some(PaxRecord::Comment(PaxValue::Deleted)),
768            },
769        ] {
770            let global = global_state(case.global);
771            let state = PaxState::new(
772                global.as_ref(),
773                Vec::new(),
774                case.local
775                    .map(|records| extension(0, PaxKind::Local, records)),
776            );
777            assert_eq!(
778                state.effective_record(&PaxKeyword::Comment),
779                case.expected.as_ref(),
780                "{}",
781                case.name
782            );
783        }
784
785        let state = PaxState::new(
786            None,
787            vec![
788                extension(3, PaxKind::Global, vec![vendor("first", "value")]),
789                extension(7, PaxKind::Global, vec![vendor("second", "value")]),
790            ],
791            Some(extension(
792                11,
793                PaxKind::Local,
794                vec![vendor("local", "value")],
795            )),
796        );
797        assert_eq!(
798            state
799                .extensions()
800                .map(|extension| (extension.position, extension.kind))
801                .collect::<Vec<_>>(),
802            [
803                (3, PaxKind::Global),
804                (7, PaxKind::Global),
805                (11, PaxKind::Local),
806            ]
807        );
808    }
809
810    #[test]
811    fn updates_effective_global_state_in_place() {
812        let physical_records = Arc::new(PaxRecords(vec![comment("initial")]));
813        let mut active = None;
814        physical_records.apply_global(&mut active);
815        let initial_state = ptr::from_ref(active.as_ref().expect("global state should exist"));
816
817        PaxRecords(vec![vendor("attribute", "value")]).apply_global(&mut active);
818
819        assert_eq!(
820            ptr::from_ref(active.as_ref().expect("global state should exist")),
821            initial_state
822        );
823        assert_eq!(physical_records.as_slice(), [comment("initial")]);
824    }
825
826    #[test]
827    fn global_deletions_remain_effective_tombstones() {
828        let initial = Arc::new(PaxRecords(vec![
829            PaxRecord::Path(PaxValue::Value(utf8("global"))),
830            vendor("kept", "value"),
831        ]));
832        let deletion = Arc::new(PaxRecords(vec![PaxRecord::Path(PaxValue::Deleted)]));
833        let mut active = None;
834        initial.apply_global(&mut active);
835        deletion.apply_global(&mut active);
836
837        let active_records = active.as_ref().expect("global state should exist");
838        assert_eq!(active_records.records.as_slice().len(), 2);
839        let state = PaxState::new(active.as_ref(), Vec::new(), None);
840        assert_eq!(
841            state.effective_record(&PaxKeyword::Path),
842            Some(&PaxRecord::Path(PaxValue::Deleted))
843        );
844    }
845
846    #[test]
847    fn parses_values_and_deletions_through_from_str() {
848        assert!(matches!(
849            "".parse::<PaxValue<String>>(),
850            Ok(PaxValue::Deleted)
851        ));
852        assert!(matches!(
853            "value".parse::<PaxValue<String>>(),
854            Ok(PaxValue::Value(value)) if value == "value"
855        ));
856        assert!(matches!(
857            "12".parse::<PaxValue<u64>>(),
858            Ok(PaxValue::Value(12))
859        ));
860    }
861
862    #[test]
863    fn parses_strict_numeric_and_timestamp_values() {
864        assert!(matches!(
865            PaxValue::parse_integer("uid", b"12"),
866            Ok(PaxValue::Value(12))
867        ));
868        assert!(matches!(
869            PaxValue::parse_integer("uid", b""),
870            Ok(PaxValue::Deleted)
871        ));
872        assert!(matches!(
873            PaxValue::parse_time("mtime", b"12.034"),
874            Ok(PaxValue::Value(12))
875        ));
876        assert!(matches!(
877            PaxValue::parse_time("mtime", b""),
878            Ok(PaxValue::Deleted)
879        ));
880
881        for value in ["+1", "-1", "12x", "18446744073709551616"] {
882            assert!(matches!(
883                PaxValue::parse_integer("gid", value.as_bytes()),
884                Err(PaxError::InvalidInteger { .. })
885            ));
886        }
887        for value in ["+1", "-1", "1.", "1.nanosecond", "18446744073709551616"] {
888            assert!(matches!(
889                PaxValue::parse_time("atime", value.as_bytes()),
890                Err(PaxError::InvalidTime { .. })
891            ));
892        }
893    }
894
895    #[test]
896    fn parses_typed_standard_reserved_and_vendor_records() {
897        let fields = [
898            ("atime", "12.034"),
899            ("charset", "BINARY"),
900            ("comment", "a=b"),
901            ("ctime", "17.500"),
902            ("gid", "7"),
903            ("gname", "group"),
904            ("hdrcharset", UTF8_HDRCHARSET),
905            ("linkpath", "target"),
906            ("mtime", "42"),
907            ("path", "file"),
908            ("realtime.deadline", "soon"),
909            ("security.label", "secure"),
910            ("size", "0"),
911            ("uid", "8"),
912            ("uname", "user"),
913            ("Acme.attribute", "custom"),
914        ];
915        let mut payload = Vec::new();
916        for (keyword, value) in fields {
917            payload.extend_from_slice(&record(keyword, value));
918        }
919
920        let Ok(records) = PaxRecords::parse(&payload, HdrCharset::Utf8) else {
921            panic!("records should parse");
922        };
923        assert_eq!(
924            records.as_slice(),
925            [
926                PaxRecord::Atime(PaxValue::Value(12)),
927                PaxRecord::Charset(PaxValue::Value(text("BINARY"))),
928                comment("a=b"),
929                PaxRecord::Ctime(PaxValue::Value(17)),
930                PaxRecord::Gid(PaxValue::Value(7)),
931                PaxRecord::Gname(PaxValue::Value(utf8("group"))),
932                PaxRecord::HdrCharset(PaxValue::Value(HdrCharset::Utf8)),
933                PaxRecord::LinkPath(PaxValue::Value(utf8("target"))),
934                PaxRecord::Mtime(PaxValue::Value(42)),
935                PaxRecord::Path(PaxValue::Value(utf8("file"))),
936                PaxRecord::Realtime {
937                    name: text("deadline"),
938                    value: PaxValue::Value(text("soon")),
939                },
940                security("secure"),
941                PaxRecord::Size(PaxValue::Value(0)),
942                PaxRecord::Uid(PaxValue::Value(8)),
943                PaxRecord::Uname(PaxValue::Value(utf8("user"))),
944                vendor("attribute", "custom"),
945            ]
946        );
947        assert!(
948            records
949                .as_slice()
950                .iter()
951                .zip(fields)
952                .all(|(record, (keyword, _))| record.keyword().to_string() == keyword)
953        );
954    }
955
956    #[test]
957    fn parses_deleted_ctime_compatibility_extension() {
958        let Ok(records) = PaxRecords::parse(&record("ctime", ""), HdrCharset::Utf8) else {
959            panic!("ctime deletion should parse");
960        };
961        assert_eq!(records.as_slice(), [PaxRecord::Ctime(PaxValue::Deleted)]);
962    }
963
964    #[test]
965    fn rejects_invalid_records_and_keywords() {
966        for payload in [
967            b"11 path=name".as_slice(),
968            b"12 pathname\n".as_slice(),
969            b"99 path=name\n".as_slice(),
970            b"+12 path=name\n".as_slice(),
971        ] {
972            assert!(matches!(
973                PaxRecords::parse(payload, HdrCharset::Utf8),
974                Err(PaxError::InvalidRecords { .. })
975            ));
976        }
977
978        let invalid_utf8 = raw_record(b"path", &[0xff]);
979        assert!(matches!(
980            PaxRecords::parse(&invalid_utf8, HdrCharset::Utf8),
981            Err(PaxError::InvalidUtf8)
982        ));
983
984        for keyword in ["unknown", "VENDOR", "VENDOR.", "realtime.", "security."] {
985            assert!(matches!(
986                PaxRecord::parse(keyword, b"value", HdrCharset::Utf8),
987                Err(PaxError::InvalidKeyword { .. })
988            ));
989        }
990    }
991
992    #[test]
993    fn applies_namespaced_globals_and_accepts_supported_hdrcharset_records() {
994        let mut active = global_state(vec![
995            vendor("first", "old"),
996            vendor("second", "kept"),
997            security("old"),
998        ]);
999        let update = Arc::new(PaxRecords(vec![vendor("first", "new"), security("new")]));
1000        update.apply_global(&mut active);
1001        let active = active.as_ref().expect("global state should exist");
1002        assert_eq!(active.records.as_slice().len(), 3);
1003        assert_eq!(
1004            active.get(&PaxKeyword::Vendor {
1005                vendor: text("Acme"),
1006                name: text("first"),
1007            }),
1008            Some(&vendor("first", "new"))
1009        );
1010        assert_eq!(
1011            active.get(&PaxKeyword::Security(text("label"))),
1012            Some(&security("new"))
1013        );
1014
1015        for (case, payload) in [
1016            (
1017                "supported hdrcharset",
1018                record("hdrcharset", UTF8_HDRCHARSET),
1019            ),
1020            ("deleted hdrcharset", record("hdrcharset", "")),
1021            ("member data charset", record("charset", "BINARY")),
1022        ] {
1023            assert!(
1024                PaxRecords::parse(&payload, HdrCharset::Utf8).is_ok(),
1025                "{case}"
1026            );
1027        }
1028
1029        let mut binary_values = record("hdrcharset", BINARY_HDRCHARSET);
1030        for (keyword, value) in [
1031            (b"gname".as_slice(), [0xfc]),
1032            (b"linkpath".as_slice(), [0xfd]),
1033            (b"path".as_slice(), [0xfe]),
1034            (b"uname".as_slice(), [0xff]),
1035        ] {
1036            binary_values.extend_from_slice(&raw_record(keyword, &value));
1037        }
1038        let Ok(binary_records) = PaxRecords::parse(&binary_values, HdrCharset::Utf8) else {
1039            panic!("binary records should parse");
1040        };
1041        assert_eq!(
1042            binary_records.as_slice(),
1043            [
1044                PaxRecord::HdrCharset(PaxValue::Value(HdrCharset::Binary)),
1045                PaxRecord::Gname(PaxValue::Value(binary(&[0xfc]))),
1046                PaxRecord::LinkPath(PaxValue::Value(binary(&[0xfd]))),
1047                PaxRecord::Path(PaxValue::Value(binary(&[0xfe]))),
1048                PaxRecord::Uname(PaxValue::Value(binary(&[0xff]))),
1049            ]
1050        );
1051        let inherited_binary_path = raw_record(b"path", &[0xfe]);
1052        let Ok(inherited_records) = PaxRecords::parse(&inherited_binary_path, HdrCharset::Binary)
1053        else {
1054            panic!("inherited binary records should parse");
1055        };
1056        assert_eq!(
1057            inherited_records.as_slice(),
1058            [PaxRecord::Path(PaxValue::Value(binary(&[0xfe])))]
1059        );
1060        let mut reset_to_utf8 = record("hdrcharset", "");
1061        reset_to_utf8.extend_from_slice(&raw_record(b"path", &[0xfd]));
1062        assert!(matches!(
1063            PaxRecords::parse(&reset_to_utf8, HdrCharset::Binary),
1064            Err(PaxError::InvalidUtf8)
1065        ));
1066        let mut binary_comment = record("hdrcharset", BINARY_HDRCHARSET);
1067        binary_comment.extend_from_slice(&raw_record(b"comment", &[0xff]));
1068        assert!(matches!(
1069            PaxRecords::parse(&binary_comment, HdrCharset::Utf8),
1070            Err(PaxError::InvalidUtf8)
1071        ));
1072
1073        let unsupported_value = "ISO-IR 8859 1 1998";
1074        let mut overridden_unsupported = record("hdrcharset", unsupported_value);
1075        overridden_unsupported.extend_from_slice(&record("hdrcharset", UTF8_HDRCHARSET));
1076        for unsupported in [
1077            record("hdrcharset", unsupported_value),
1078            overridden_unsupported,
1079        ] {
1080            assert!(matches!(
1081                PaxRecords::parse(&unsupported, HdrCharset::Utf8),
1082                Err(PaxError::UnsupportedCharset { .. })
1083            ));
1084        }
1085    }
1086}