Skip to main content

tar_framing/
pax.rs

1//! PAX record parsing, active-global updates, and per-member metadata state.
2
3use std::{
4    collections::{HashMap, hash_map::Entry},
5    fmt,
6    str::FromStr,
7    sync::Arc,
8};
9
10use super::PaxKind;
11
12const UTF8_HDRCHARSET: &str = "ISO-IR 10646 2000 UTF-8";
13const BINARY_HDRCHARSET: &str = "BINARY";
14
15/// An error encountered while parsing pax extended-header records.
16#[derive(Debug, thiserror::Error)]
17pub enum PaxError {
18    /// A pax payload did not consist of valid extended-header records.
19    #[error("invalid pax records: {reason}")]
20    InvalidRecords {
21        /// A concise description of the grammar violation.
22        reason: &'static str,
23    },
24    /// A pax text component that must be UTF-8 is not valid UTF-8.
25    #[error("pax records contain invalid UTF-8 text")]
26    InvalidUtf8,
27    /// A pax record keyword is neither standard nor an accepted namespaced extension.
28    #[error("invalid or unknown pax keyword {keyword:?}")]
29    InvalidKeyword {
30        /// The rejected keyword.
31        keyword: String,
32    },
33    /// A pax decimal integer field is malformed or exceeds this API's integer range.
34    #[error("invalid pax {keyword} value: {value:?}")]
35    InvalidInteger {
36        /// The affected standard keyword.
37        keyword: &'static str,
38        /// The rejected textual value.
39        value: String,
40    },
41    /// A pax file-time value is malformed or exceeds this API's integer range.
42    #[error("invalid pax {keyword} time value: {value:?}")]
43    InvalidTime {
44        /// The affected standard keyword.
45        keyword: &'static str,
46        /// The rejected textual value.
47        value: String,
48    },
49    /// A pax `hdrcharset` record requests text encoding unsupported by this API.
50    #[error("unsupported pax hdrcharset value {value:?}")]
51    UnsupportedCharset {
52        /// The unsupported character-set identifier.
53        value: String,
54    },
55    /// A pax record length or offset overflowed.
56    #[error("arithmetic overflow while computing {context}")]
57    ArithmeticOverflow {
58        /// The computation that overflowed.
59        context: &'static str,
60    },
61}
62
63pub(crate) type SharedPaxRecords = Arc<PaxRecords>;
64
65#[derive(Clone, Debug, Default, Eq, PartialEq)]
66pub(crate) struct PaxRecords(Vec<PaxRecord>);
67
68/// An owned, hashable pax extended-header keyword.
69#[derive(Clone, Debug, Eq, Hash, PartialEq)]
70pub enum PaxKeyword {
71    /// File access time.
72    Atime,
73    /// Encoding of the following member's file data.
74    Charset,
75    /// Uninterpreted archive comment.
76    Comment,
77    /// File status-change time compatibility extension.
78    Ctime,
79    /// Numeric group identifier.
80    Gid,
81    /// Group name.
82    Gname,
83    /// Encoding of pathname and user/group-name values.
84    HdrCharset,
85    /// Link pathname.
86    LinkPath,
87    /// File modification time.
88    Mtime,
89    /// Member pathname.
90    Path,
91    /// Reserved `realtime.*` attribute.
92    Realtime(Arc<str>),
93    /// Reserved `security.*` attribute.
94    Security(Arc<str>),
95    /// Member payload size.
96    Size,
97    /// Numeric user identifier.
98    Uid,
99    /// User name.
100    Uname,
101    /// An implementation extension in a `vendor.keyword` namespace.
102    Vendor {
103        /// Vendor or organization identifier.
104        vendor: Arc<str>,
105        /// Keyword suffix after the vendor namespace.
106        name: Arc<str>,
107    },
108}
109
110impl PaxKeyword {
111    pub(crate) fn components(&self) -> (&str, Option<&str>) {
112        match self {
113            Self::Atime => ("atime", None),
114            Self::Charset => ("charset", None),
115            Self::Comment => ("comment", None),
116            Self::Ctime => ("ctime", None),
117            Self::Gid => ("gid", None),
118            Self::Gname => ("gname", None),
119            Self::HdrCharset => ("hdrcharset", None),
120            Self::LinkPath => ("linkpath", None),
121            Self::Mtime => ("mtime", None),
122            Self::Path => ("path", None),
123            Self::Realtime(name) => ("realtime", Some(name)),
124            Self::Security(name) => ("security", Some(name)),
125            Self::Size => ("size", None),
126            Self::Uid => ("uid", None),
127            Self::Uname => ("uname", None),
128            Self::Vendor { vendor, name } => (vendor, Some(name)),
129        }
130    }
131}
132
133impl fmt::Display for PaxKeyword {
134    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
135        let (namespace, name) = self.components();
136        formatter.write_str(namespace)?;
137        if let Some(name) = name {
138            formatter.write_str(".")?;
139            formatter.write_str(name)?;
140        }
141        Ok(())
142    }
143}
144
145/// Like [`PaxRecords`], but with an additional index of `keyword -> effective record index`
146/// to keep lookups cheap, even across pathological pax archives (e.g. multiple
147/// global extensions being merged together).
148#[derive(Debug, Default, Eq, PartialEq)]
149pub(crate) struct GlobalPaxRecords {
150    records: PaxRecords,
151    indices: HashMap<PaxKeyword, usize>,
152}
153
154impl GlobalPaxRecords {
155    fn apply(&mut self, updates: &PaxRecords) {
156        for update in updates.as_slice() {
157            match self.indices.entry(update.keyword()) {
158                Entry::Occupied(entry) => self.records.0[*entry.get()] = update.clone(),
159                Entry::Vacant(entry) => {
160                    let index = self.records.0.len();
161                    self.records.0.push(update.clone());
162                    entry.insert(index);
163                }
164            }
165        }
166    }
167
168    fn get(&self, keyword: &PaxKeyword) -> Option<&PaxRecord> {
169        self.indices
170            .get(keyword)
171            .and_then(|index| self.records.as_slice().get(*index))
172    }
173
174    pub(super) fn hdrcharset(&self) -> HdrCharset {
175        self.get(&PaxKeyword::HdrCharset)
176            .and_then(|record| match record {
177                PaxRecord::HdrCharset(value) => Some(value),
178                _ => None,
179            })
180            .map_or(HdrCharset::Utf8, |value| match value {
181                PaxValue::Value(value) => *value,
182                PaxValue::Deleted => HdrCharset::Utf8,
183            })
184    }
185}
186
187/// One positioned parsed pax extended header.
188#[derive(Clone, Debug, Eq, PartialEq)]
189pub struct PaxExtension {
190    /// The absolute byte position of the pax extension header block.
191    pub position: u64,
192    /// Whether this extension has local or global scope.
193    pub kind: PaxKind,
194    records: SharedPaxRecords,
195}
196
197impl PaxExtension {
198    pub(crate) fn new(position: u64, kind: PaxKind, records: SharedPaxRecords) -> Self {
199        Self {
200            position,
201            kind,
202            records,
203        }
204    }
205
206    /// Returns the parsed pax records in archive order.
207    pub fn records(&self) -> &[PaxRecord] {
208        self.records.as_slice()
209    }
210}
211
212/// Unified pax metadata state applicable to one ordinary member.
213///
214/// Effective values apply local records over the active global state using
215/// standard last-record-wins and deletion semantics. [`Self::extensions`]
216/// retains the positioned extension headers newly encountered for this member.
217/// The effective global state is borrowed from the originating logical reader,
218/// so retaining this view also prevents that reader from advancing to another
219/// member whose global state could differ.
220#[derive(Clone, Debug, Eq, PartialEq)]
221pub struct PaxState<'global> {
222    global_records: Option<&'global GlobalPaxRecords>,
223    global_extensions: Vec<PaxExtension>,
224    local_extension: Option<PaxExtension>,
225}
226
227impl<'global> PaxState<'global> {
228    pub(crate) fn new(
229        global_records: Option<&'global GlobalPaxRecords>,
230        global_extensions: Vec<PaxExtension>,
231        local_extension: Option<PaxExtension>,
232    ) -> Self {
233        Self {
234            global_records,
235            global_extensions,
236            local_extension,
237        }
238    }
239
240    /// Returns positioned extensions newly encountered for this member.
241    ///
242    /// Global extensions are yielded in source order, followed by the optional
243    /// local extension.
244    pub fn extensions(&self) -> impl Iterator<Item = &PaxExtension> {
245        self.global_extensions
246            .iter()
247            .chain(self.local_extension.iter())
248    }
249
250    /// Returns the final applicable record for `keyword`, including deletions.
251    pub fn effective_record(&self, keyword: &PaxKeyword) -> Option<&PaxRecord> {
252        let local_records = self
253            .local_extension
254            .as_ref()
255            .map(|extension| extension.records.as_ref());
256        Self::effective_record_from(local_records, self.global_records, keyword)
257    }
258
259    pub(super) fn effective_size<'records>(
260        local_records: Option<&'records PaxRecords>,
261        global_records: Option<&'records GlobalPaxRecords>,
262    ) -> Option<&'records PaxValue<u64>> {
263        Self::effective_record_from(local_records, global_records, &PaxKeyword::Size).and_then(
264            |record| match record {
265                PaxRecord::Size(value) => Some(value),
266                _ => None,
267            },
268        )
269    }
270
271    pub(super) fn effective_record_from<'records>(
272        local_records: Option<&'records PaxRecords>,
273        global_records: Option<&'records GlobalPaxRecords>,
274        keyword: &PaxKeyword,
275    ) -> Option<&'records PaxRecord> {
276        local_records
277            .and_then(|records| records.get(keyword))
278            .or_else(|| global_records.and_then(|records| records.get(keyword)))
279    }
280}
281
282/// A character encoding for PAX pathname and user/group-name values.
283#[derive(Clone, Copy, Debug, Eq, PartialEq)]
284pub enum HdrCharset {
285    /// UTF-8 extended-header text.
286    Utf8,
287    /// Unencoded bytes copied from the originating system.
288    Binary,
289}
290
291impl FromStr for HdrCharset {
292    type Err = String;
293
294    fn from_str(value: &str) -> Result<Self, Self::Err> {
295        match value {
296            UTF8_HDRCHARSET => Ok(Self::Utf8),
297            BINARY_HDRCHARSET => Ok(Self::Binary),
298            _ => Err(value.to_owned()),
299        }
300    }
301}
302
303/// A character value governed by the effective PAX [`HdrCharset`].
304#[derive(Clone, Debug, Eq, PartialEq)]
305pub enum PaxString {
306    /// A value declared or defaulted to UTF-8.
307    Utf8(Arc<str>),
308    /// A value declared as unencoded binary bytes.
309    Binary(Arc<[u8]>),
310}
311
312/// A parsed pax value, including an explicit deletion tombstone.
313///
314/// Deletion tombstones are needed because pax has special semantics for
315/// empty (i.e. deleted) pax records: they're considered to delete
316/// "any header block field, previously entered extended header value, or global
317/// extended header value of the same name."
318///
319/// This is a distinct state from "missing," which allows for fallbacks to
320/// e.g. global pax headers or the equivalent ustar field.
321#[derive(Clone, Debug, Eq, PartialEq)]
322pub enum PaxValue<T> {
323    /// This record sets or overrides the attribute.
324    Value(T),
325    /// This record deletes the attribute from its applicable scope.
326    Deleted,
327}
328
329impl<T: FromStr> FromStr for PaxValue<T> {
330    type Err = T::Err;
331
332    fn from_str(value: &str) -> Result<Self, Self::Err> {
333        if value.is_empty() {
334            Ok(Self::Deleted)
335        } else {
336            value.parse().map(Self::Value)
337        }
338    }
339}
340
341impl<T> PaxValue<T> {
342    fn parse_utf8(value: &[u8]) -> Result<&str, PaxError> {
343        std::str::from_utf8(value).map_err(|_| PaxError::InvalidUtf8)
344    }
345}
346
347/// A parsed pax extended-header record.
348#[derive(Clone, Debug, Eq, PartialEq)]
349pub enum PaxRecord {
350    /// File access time in integral seconds; fractional seconds are discarded.
351    Atime(PaxValue<u64>),
352    /// Encoding of the following member's file data.
353    // TODO: Consider enforcing known values here, similarly to what we do for `hdrcharset`.
354    Charset(PaxValue<Arc<str>>),
355    /// An uninterpreted archive comment.
356    Comment(PaxValue<Arc<str>>),
357    /// File status-change time compatibility extension in integral seconds.
358    ///
359    /// NOTE: newer versions of the pax spec don't include this record.
360    /// We support it for backwards compatibility.
361    ///
362    /// See: <https://www.opengroup.org/austin/aardvark/finaltext/xcubug.txt>
363    /// See: <https://www.opengroup.org/austin/docs/austin_166.txt>
364    /// See: <https://www.opengroup.org/austin/docs/austin_206.txt>
365    Ctime(PaxValue<u64>),
366    /// Numeric group identifier.
367    Gid(PaxValue<u64>),
368    /// Group name encoded according to the effective [`HdrCharset`].
369    Gname(PaxValue<PaxString>),
370    /// Encoding of pathname and user/group-name extended-header values.
371    HdrCharset(PaxValue<HdrCharset>),
372    /// Link pathname encoded according to the effective [`HdrCharset`].
373    LinkPath(PaxValue<PaxString>),
374    /// File modification time in integral seconds; fractional seconds are discarded.
375    Mtime(PaxValue<u64>),
376    /// Member pathname encoded according to the effective [`HdrCharset`].
377    Path(PaxValue<PaxString>),
378    /// A reserved `realtime.*` extended attribute.
379    Realtime {
380        /// Keyword suffix after `realtime.`.
381        name: Arc<str>,
382        /// Attribute value or deletion tombstone.
383        value: PaxValue<Arc<str>>,
384    },
385    /// A reserved `security.*` extended attribute.
386    Security {
387        /// Keyword suffix after `security.`.
388        name: Arc<str>,
389        /// Attribute value or deletion tombstone.
390        value: PaxValue<Arc<str>>,
391    },
392    /// Member payload size in octets.
393    Size(PaxValue<u64>),
394    /// Numeric user identifier.
395    Uid(PaxValue<u64>),
396    /// User name encoded according to the effective [`HdrCharset`].
397    Uname(PaxValue<PaxString>),
398    /// An implementation extension in a `vendor.keyword` namespace.
399    Vendor {
400        /// Vendor or organization identifier.
401        vendor: Arc<str>,
402        /// Keyword suffix after the vendor namespace.
403        name: Arc<str>,
404        /// Opaque attribute bytes or deletion tombstone.
405        value: PaxValue<Arc<[u8]>>,
406    },
407}
408
409impl PaxRecord {
410    /// Returns this record's typed pax keyword.
411    pub fn keyword(&self) -> PaxKeyword {
412        match self {
413            Self::Atime(_) => PaxKeyword::Atime,
414            Self::Charset(_) => PaxKeyword::Charset,
415            Self::Comment(_) => PaxKeyword::Comment,
416            Self::Ctime(_) => PaxKeyword::Ctime,
417            Self::Gid(_) => PaxKeyword::Gid,
418            Self::Gname(_) => PaxKeyword::Gname,
419            Self::HdrCharset(_) => PaxKeyword::HdrCharset,
420            Self::LinkPath(_) => PaxKeyword::LinkPath,
421            Self::Mtime(_) => PaxKeyword::Mtime,
422            Self::Path(_) => PaxKeyword::Path,
423            Self::Realtime { name, .. } => PaxKeyword::Realtime(Arc::clone(name)),
424            Self::Security { name, .. } => PaxKeyword::Security(Arc::clone(name)),
425            Self::Size(_) => PaxKeyword::Size,
426            Self::Uid(_) => PaxKeyword::Uid,
427            Self::Uname(_) => PaxKeyword::Uname,
428            Self::Vendor { vendor, name, .. } => PaxKeyword::Vendor {
429                vendor: Arc::clone(vendor),
430                name: Arc::clone(name),
431            },
432        }
433    }
434
435    fn parse(keyword: &str, value: &[u8], hdrcharset: HdrCharset) -> Result<Self, PaxError> {
436        match keyword {
437            "atime" => PaxValue::parse_time("atime", value).map(Self::Atime),
438            "charset" => PaxValue::parse_text(value).map(Self::Charset),
439            "comment" => PaxValue::parse_text(value).map(Self::Comment),
440            "ctime" => PaxValue::parse_time("ctime", value).map(Self::Ctime),
441            "gid" => PaxValue::parse_integer("gid", value).map(Self::Gid),
442            "gname" => PaxValue::parse_string(value, hdrcharset).map(Self::Gname),
443            "hdrcharset" => PaxValue::parse_hdrcharset(value).map(Self::HdrCharset),
444            "linkpath" => PaxValue::parse_string(value, hdrcharset).map(Self::LinkPath),
445            "mtime" => PaxValue::parse_time("mtime", value).map(Self::Mtime),
446            "path" => PaxValue::parse_string(value, hdrcharset).map(Self::Path),
447            "size" => PaxValue::parse_integer("size", value).map(Self::Size),
448            "uid" => PaxValue::parse_integer("uid", value).map(Self::Uid),
449            "uname" => PaxValue::parse_string(value, hdrcharset).map(Self::Uname),
450            _ => Self::parse_namespaced(keyword, value),
451        }
452    }
453
454    fn parse_namespaced(keyword: &str, value: &[u8]) -> Result<Self, PaxError> {
455        let invalid = || PaxError::InvalidKeyword {
456            keyword: keyword.to_owned(),
457        };
458        let (namespace, name) = match keyword.split_once('.') {
459            Some((namespace, name)) if !name.is_empty() => (namespace, name),
460            _ => return Err(invalid()),
461        };
462        match namespace {
463            "realtime" => Ok(Self::Realtime {
464                name: Arc::from(name),
465                value: PaxValue::parse_text(value)?,
466            }),
467            "security" => Ok(Self::Security {
468                name: Arc::from(name),
469                value: PaxValue::parse_text(value)?,
470            }),
471            vendor if !vendor.is_empty() => Ok(Self::Vendor {
472                vendor: Arc::from(vendor),
473                name: Arc::from(name),
474                value: PaxValue::parse_opaque(value),
475            }),
476            _ => Err(invalid()),
477        }
478    }
479}
480
481impl PaxRecords {
482    pub(crate) fn as_slice(&self) -> &[PaxRecord] {
483        &self.0
484    }
485
486    pub(super) fn parse(
487        payload: &[u8],
488        inherited_hdrcharset: HdrCharset,
489    ) -> Result<Self, PaxError> {
490        if payload.is_empty() {
491            return Err(PaxError::InvalidRecords {
492                reason: "local extended header payload contains no records",
493            });
494        }
495
496        let mut records = Vec::new();
497        let mut cursor = 0;
498        while cursor < payload.len() {
499            let length_end = payload[cursor..]
500                .iter()
501                .position(|byte| *byte == b' ')
502                .ok_or(PaxError::InvalidRecords {
503                    reason: "record is missing its length separator",
504                })?
505                + cursor;
506            if length_end == cursor {
507                return Err(PaxError::InvalidRecords {
508                    reason: "record length is empty",
509                });
510            }
511            let record_len = std::str::from_utf8(&payload[cursor..length_end])
512                .ok()
513                .and_then(decimal_u64)
514                .ok_or(PaxError::InvalidRecords {
515                    reason: "record length is not a valid decimal integer",
516                })?;
517            let record_len =
518                usize::try_from(record_len).map_err(|_| PaxError::ArithmeticOverflow {
519                    context: "pax record length",
520                })?;
521            let record_end =
522                cursor
523                    .checked_add(record_len)
524                    .ok_or(PaxError::ArithmeticOverflow {
525                        context: "pax record end",
526                    })?;
527            if record_end > payload.len() {
528                return Err(PaxError::InvalidRecords {
529                    reason: "record length exceeds extended header payload",
530                });
531            }
532            let record = &payload[cursor..record_end];
533            if record.last() != Some(&b'\n') {
534                return Err(PaxError::InvalidRecords {
535                    reason: "record is not newline terminated",
536                });
537            }
538            let content_start = length_end - cursor + 1;
539            let equals = record[content_start..record.len() - 1]
540                .iter()
541                .position(|byte| *byte == b'=')
542                .ok_or(PaxError::InvalidRecords {
543                    reason: "record is missing its keyword/value separator",
544                })?
545                + content_start;
546            if equals == content_start {
547                return Err(PaxError::InvalidRecords {
548                    reason: "record keyword is empty",
549                });
550            }
551            let keyword = std::str::from_utf8(&record[content_start..equals])
552                .map_err(|_| PaxError::InvalidUtf8)?;
553            records.push((keyword, &record[equals + 1..record.len() - 1]));
554            cursor = record_end;
555        }
556
557        // Per pax spec: the `gname`, `linkpath`, `path`, and `uname` records
558        // are encoded according to `hdrcharset`, so we need to first parse
559        // it (or take it from a parent global pax header) before we can parse
560        // the other pax records, regardless of order.
561        //
562        // See: pax spec, "pax Extended Header"
563        let hdrcharset = Self::resolve_hdrcharset(&records, inherited_hdrcharset)?;
564        records
565            .into_iter()
566            .map(|(keyword, value)| PaxRecord::parse(keyword, value, hdrcharset))
567            .collect::<Result<Vec<_>, _>>()
568            .map(Self)
569    }
570
571    fn resolve_hdrcharset(
572        records: &[(&str, &[u8])],
573        inherited: HdrCharset,
574    ) -> Result<HdrCharset, PaxError> {
575        let mut hdrcharset = inherited;
576        // TODO: Consider finding the last `hdrcharset` with a reverse search to avoid parsing
577        // shadowed values here. All records would still be validated during typed parsing.
578        for (keyword, value) in records {
579            if *keyword == "hdrcharset" {
580                hdrcharset = match PaxValue::parse_hdrcharset(value)? {
581                    PaxValue::Value(value) => value,
582                    PaxValue::Deleted => HdrCharset::Utf8,
583                };
584            }
585        }
586        Ok(hdrcharset)
587    }
588
589    fn get(&self, keyword: &PaxKeyword) -> Option<&PaxRecord> {
590        self.0
591            .iter()
592            .rev()
593            .find(|record| record.keyword() == *keyword)
594    }
595
596    pub(super) fn apply_global(&self, active: &mut Option<GlobalPaxRecords>) {
597        active.get_or_insert_default().apply(self);
598    }
599}
600
601impl PaxValue<Arc<str>> {
602    fn parse_text(value: &[u8]) -> Result<Self, PaxError> {
603        Self::parse_utf8(value).map(|value| match value {
604            "" => Self::Deleted,
605            value => Self::Value(Arc::from(value)),
606        })
607    }
608}
609
610impl PaxValue<Arc<[u8]>> {
611    fn parse_opaque(value: &[u8]) -> Self {
612        if value.is_empty() {
613            Self::Deleted
614        } else {
615            Self::Value(Arc::from(value))
616        }
617    }
618}
619
620impl PaxValue<PaxString> {
621    /// Parses a pax "string", taking the effective [`HdrCharset`] into account.
622    fn parse_string(value: &[u8], hdrcharset: HdrCharset) -> Result<Self, PaxError> {
623        if value.is_empty() {
624            return Ok(Self::Deleted);
625        }
626        match hdrcharset {
627            HdrCharset::Utf8 => Self::parse_utf8(value)
628                .map(Arc::from)
629                .map(PaxString::Utf8)
630                .map(Self::Value),
631            HdrCharset::Binary => Ok(Self::Value(PaxString::Binary(Arc::from(value)))),
632        }
633    }
634}
635
636impl PaxValue<HdrCharset> {
637    fn parse_hdrcharset(value: &[u8]) -> Result<Self, PaxError> {
638        let value = Self::parse_utf8(value)?;
639        value
640            .parse()
641            .map_err(|value| PaxError::UnsupportedCharset { value })
642    }
643}
644
645impl PaxValue<u64> {
646    fn parse_integer(keyword: &'static str, value: &[u8]) -> Result<Self, PaxError> {
647        let value = Self::parse_utf8(value)?;
648        if value.is_empty() {
649            return Ok(Self::Deleted);
650        }
651
652        decimal_u64(value)
653            .map(Self::Value)
654            .ok_or_else(|| PaxError::InvalidInteger {
655                keyword,
656                value: value.to_owned(),
657            })
658    }
659
660    fn parse_time(keyword: &'static str, value: &[u8]) -> Result<Self, PaxError> {
661        let value = Self::parse_utf8(value)?;
662        if value.is_empty() {
663            return Ok(Self::Deleted);
664        }
665
666        let invalid = || PaxError::InvalidTime {
667            keyword,
668            value: value.to_owned(),
669        };
670        let seconds = match value.split_once('.') {
671            Some((seconds, fractional_digits))
672                if !fractional_digits.is_empty()
673                    && fractional_digits.bytes().all(|byte| byte.is_ascii_digit()) =>
674            {
675                seconds
676            }
677            Some(_) => return Err(invalid()),
678            None => value,
679        };
680        decimal_u64(seconds).map(Self::Value).ok_or_else(invalid)
681    }
682}
683
684fn decimal_u64(value: &str) -> Option<u64> {
685    if value.starts_with('+') {
686        return None;
687    }
688    value.parse().ok()
689}
690
691#[cfg(test)]
692mod tests {
693    use std::ptr;
694
695    use super::*;
696    use crate::test_support::{raw_record, record};
697
698    fn text(value: &str) -> Arc<str> {
699        Arc::from(value)
700    }
701
702    fn comment(value: &str) -> PaxRecord {
703        PaxRecord::Comment(PaxValue::Value(text(value)))
704    }
705
706    fn utf8(value: &str) -> PaxString {
707        PaxString::Utf8(text(value))
708    }
709
710    fn binary(value: &[u8]) -> PaxString {
711        PaxString::Binary(Arc::from(value))
712    }
713
714    fn opaque(value: &[u8]) -> Arc<[u8]> {
715        Arc::from(value)
716    }
717
718    fn vendor(name: &str, value: &str) -> PaxRecord {
719        PaxRecord::Vendor {
720            vendor: text("Acme"),
721            name: text(name),
722            value: PaxValue::Value(opaque(value.as_bytes())),
723        }
724    }
725
726    fn security(value: &str) -> PaxRecord {
727        PaxRecord::Security {
728            name: text("label"),
729            value: PaxValue::Value(text(value)),
730        }
731    }
732
733    fn global_state(records: Vec<PaxRecord>) -> Option<GlobalPaxRecords> {
734        let mut active = None;
735        PaxRecords(records).apply_global(&mut active);
736        active
737    }
738
739    fn extension(position: u64, kind: PaxKind, records: Vec<PaxRecord>) -> PaxExtension {
740        PaxExtension::new(position, kind, Arc::new(PaxRecords(records)))
741    }
742
743    #[test]
744    fn resolves_state_precedence_and_preserves_extension_order() {
745        struct Case {
746            name: &'static str,
747            global: Vec<PaxRecord>,
748            local: Option<Vec<PaxRecord>>,
749            expected: Option<PaxRecord>,
750        }
751
752        for case in [
753            Case {
754                name: "missing",
755                global: Vec::new(),
756                local: None,
757                expected: None,
758            },
759            Case {
760                name: "global",
761                global: vec![comment("global")],
762                local: None,
763                expected: Some(comment("global")),
764            },
765            Case {
766                name: "local overrides global",
767                global: vec![comment("global")],
768                local: Some(vec![comment("local")]),
769                expected: Some(comment("local")),
770            },
771            Case {
772                name: "last local duplicate wins",
773                global: Vec::new(),
774                local: Some(vec![comment("first"), comment("last")]),
775                expected: Some(comment("last")),
776            },
777            Case {
778                name: "local deletion suppresses global",
779                global: vec![comment("global")],
780                local: Some(vec![PaxRecord::Comment(PaxValue::Deleted)]),
781                expected: Some(PaxRecord::Comment(PaxValue::Deleted)),
782            },
783        ] {
784            let global = global_state(case.global);
785            let state = PaxState::new(
786                global.as_ref(),
787                Vec::new(),
788                case.local
789                    .map(|records| extension(0, PaxKind::Local, records)),
790            );
791            assert_eq!(
792                state.effective_record(&PaxKeyword::Comment),
793                case.expected.as_ref(),
794                "{}",
795                case.name
796            );
797        }
798
799        let state = PaxState::new(
800            None,
801            vec![
802                extension(3, PaxKind::Global, vec![vendor("first", "value")]),
803                extension(7, PaxKind::Global, vec![vendor("second", "value")]),
804            ],
805            Some(extension(
806                11,
807                PaxKind::Local,
808                vec![vendor("local", "value")],
809            )),
810        );
811        assert_eq!(
812            state
813                .extensions()
814                .map(|extension| (extension.position, extension.kind))
815                .collect::<Vec<_>>(),
816            [
817                (3, PaxKind::Global),
818                (7, PaxKind::Global),
819                (11, PaxKind::Local),
820            ]
821        );
822    }
823
824    #[test]
825    fn updates_effective_global_state_in_place() {
826        let physical_records = Arc::new(PaxRecords(vec![comment("initial")]));
827        let mut active = None;
828        physical_records.apply_global(&mut active);
829        let initial_state = ptr::from_ref(active.as_ref().expect("global state should exist"));
830
831        PaxRecords(vec![vendor("attribute", "value")]).apply_global(&mut active);
832
833        assert_eq!(
834            ptr::from_ref(active.as_ref().expect("global state should exist")),
835            initial_state
836        );
837        assert_eq!(physical_records.as_slice(), [comment("initial")]);
838    }
839
840    #[test]
841    fn global_deletions_remain_effective_tombstones() {
842        let initial = Arc::new(PaxRecords(vec![
843            PaxRecord::Path(PaxValue::Value(utf8("global"))),
844            vendor("kept", "value"),
845        ]));
846        let deletion = Arc::new(PaxRecords(vec![PaxRecord::Path(PaxValue::Deleted)]));
847        let mut active = None;
848        initial.apply_global(&mut active);
849        deletion.apply_global(&mut active);
850
851        let active_records = active.as_ref().expect("global state should exist");
852        assert_eq!(active_records.records.as_slice().len(), 2);
853        let state = PaxState::new(active.as_ref(), Vec::new(), None);
854        assert_eq!(
855            state.effective_record(&PaxKeyword::Path),
856            Some(&PaxRecord::Path(PaxValue::Deleted))
857        );
858    }
859
860    #[test]
861    fn parses_values_and_deletions_through_from_str() {
862        assert!(matches!(
863            "".parse::<PaxValue<String>>(),
864            Ok(PaxValue::Deleted)
865        ));
866        assert!(matches!(
867            "value".parse::<PaxValue<String>>(),
868            Ok(PaxValue::Value(value)) if value == "value"
869        ));
870        assert!(matches!(
871            "12".parse::<PaxValue<u64>>(),
872            Ok(PaxValue::Value(12))
873        ));
874    }
875
876    #[test]
877    fn parses_strict_numeric_and_timestamp_values() {
878        assert!(matches!(
879            PaxValue::parse_integer("uid", b"12"),
880            Ok(PaxValue::Value(12))
881        ));
882        assert!(matches!(
883            PaxValue::parse_integer("uid", b""),
884            Ok(PaxValue::Deleted)
885        ));
886        assert!(matches!(
887            PaxValue::parse_time("mtime", b"12.034"),
888            Ok(PaxValue::Value(12))
889        ));
890        assert!(matches!(
891            PaxValue::parse_time("mtime", b""),
892            Ok(PaxValue::Deleted)
893        ));
894
895        for value in ["+1", "-1", "12x", "18446744073709551616"] {
896            assert!(matches!(
897                PaxValue::parse_integer("gid", value.as_bytes()),
898                Err(PaxError::InvalidInteger { .. })
899            ));
900        }
901        for value in ["+1", "-1", "1.", "1.nanosecond", "18446744073709551616"] {
902            assert!(matches!(
903                PaxValue::parse_time("atime", value.as_bytes()),
904                Err(PaxError::InvalidTime { .. })
905            ));
906        }
907    }
908
909    #[test]
910    fn parses_typed_standard_reserved_and_vendor_records() {
911        let fields = [
912            ("atime", "12.034"),
913            ("charset", "BINARY"),
914            ("comment", "a=b"),
915            ("ctime", "17.500"),
916            ("gid", "7"),
917            ("gname", "group"),
918            ("hdrcharset", UTF8_HDRCHARSET),
919            ("linkpath", "target"),
920            ("mtime", "42"),
921            ("path", "file"),
922            ("realtime.deadline", "soon"),
923            ("security.label", "secure"),
924            ("size", "0"),
925            ("uid", "8"),
926            ("uname", "user"),
927            ("Acme.attribute", "custom"),
928        ];
929        let mut payload = Vec::new();
930        for (keyword, value) in fields {
931            payload.extend_from_slice(&record(keyword, value));
932        }
933
934        let Ok(records) = PaxRecords::parse(&payload, HdrCharset::Utf8) else {
935            panic!("records should parse");
936        };
937        assert_eq!(
938            records.as_slice(),
939            [
940                PaxRecord::Atime(PaxValue::Value(12)),
941                PaxRecord::Charset(PaxValue::Value(text("BINARY"))),
942                comment("a=b"),
943                PaxRecord::Ctime(PaxValue::Value(17)),
944                PaxRecord::Gid(PaxValue::Value(7)),
945                PaxRecord::Gname(PaxValue::Value(utf8("group"))),
946                PaxRecord::HdrCharset(PaxValue::Value(HdrCharset::Utf8)),
947                PaxRecord::LinkPath(PaxValue::Value(utf8("target"))),
948                PaxRecord::Mtime(PaxValue::Value(42)),
949                PaxRecord::Path(PaxValue::Value(utf8("file"))),
950                PaxRecord::Realtime {
951                    name: text("deadline"),
952                    value: PaxValue::Value(text("soon")),
953                },
954                security("secure"),
955                PaxRecord::Size(PaxValue::Value(0)),
956                PaxRecord::Uid(PaxValue::Value(8)),
957                PaxRecord::Uname(PaxValue::Value(utf8("user"))),
958                vendor("attribute", "custom"),
959            ]
960        );
961        assert!(
962            records
963                .as_slice()
964                .iter()
965                .zip(fields)
966                .all(|(record, (keyword, _))| record.keyword().to_string() == keyword)
967        );
968    }
969
970    #[test]
971    fn parses_deleted_ctime_compatibility_extension() {
972        let Ok(records) = PaxRecords::parse(&record("ctime", ""), HdrCharset::Utf8) else {
973            panic!("ctime deletion should parse");
974        };
975        assert_eq!(records.as_slice(), [PaxRecord::Ctime(PaxValue::Deleted)]);
976    }
977
978    #[test]
979    fn rejects_invalid_records_and_keywords() {
980        for payload in [
981            b"11 path=name".as_slice(),
982            b"12 pathname\n".as_slice(),
983            b"99 path=name\n".as_slice(),
984            b"+12 path=name\n".as_slice(),
985        ] {
986            assert!(matches!(
987                PaxRecords::parse(payload, HdrCharset::Utf8),
988                Err(PaxError::InvalidRecords { .. })
989            ));
990        }
991
992        for keyword in ["unknown", "VENDOR", "VENDOR.", "realtime.", "security."] {
993            assert!(matches!(
994                PaxRecord::parse(keyword, b"value", HdrCharset::Utf8),
995                Err(PaxError::InvalidKeyword { .. })
996            ));
997        }
998    }
999
1000    #[test]
1001    fn accepts_opaque_vendor_values_but_rejects_invalid_utf8_text() {
1002        let invalid_utf8 = [0xd6, 0xfb, 0x00];
1003        let mut vendor_records = raw_record(b"SCHILY.xattr.user.data", &invalid_utf8);
1004        vendor_records.extend_from_slice(&raw_record(b"SCHILY.xattr.user.deleted", b""));
1005        let expected = PaxRecords(vec![
1006            PaxRecord::Vendor {
1007                vendor: text("SCHILY"),
1008                name: text("xattr.user.data"),
1009                value: PaxValue::Value(opaque(&invalid_utf8)),
1010            },
1011            PaxRecord::Vendor {
1012                vendor: text("SCHILY"),
1013                name: text("xattr.user.deleted"),
1014                value: PaxValue::Deleted,
1015            },
1016        ]);
1017        assert!(matches!(
1018            PaxRecords::parse(&vendor_records, HdrCharset::Utf8),
1019            Ok(records) if records == expected
1020        ));
1021
1022        for keyword in [
1023            b"path".as_slice(),
1024            b"comment",
1025            b"realtime.deadline",
1026            b"security.label",
1027        ] {
1028            assert!(matches!(
1029                PaxRecords::parse(&raw_record(keyword, &invalid_utf8), HdrCharset::Utf8),
1030                Err(PaxError::InvalidUtf8)
1031            ));
1032        }
1033    }
1034
1035    #[test]
1036    fn applies_namespaced_globals_and_accepts_supported_hdrcharset_records() {
1037        let mut active = global_state(vec![
1038            vendor("first", "old"),
1039            vendor("second", "kept"),
1040            security("old"),
1041        ]);
1042        let update = Arc::new(PaxRecords(vec![vendor("first", "new"), security("new")]));
1043        update.apply_global(&mut active);
1044        let active = active.as_ref().expect("global state should exist");
1045        assert_eq!(active.records.as_slice().len(), 3);
1046        assert_eq!(
1047            active.get(&PaxKeyword::Vendor {
1048                vendor: text("Acme"),
1049                name: text("first"),
1050            }),
1051            Some(&vendor("first", "new"))
1052        );
1053        assert_eq!(
1054            active.get(&PaxKeyword::Security(text("label"))),
1055            Some(&security("new"))
1056        );
1057
1058        for (case, payload) in [
1059            (
1060                "supported hdrcharset",
1061                record("hdrcharset", UTF8_HDRCHARSET),
1062            ),
1063            ("deleted hdrcharset", record("hdrcharset", "")),
1064            ("member data charset", record("charset", "BINARY")),
1065        ] {
1066            assert!(
1067                PaxRecords::parse(&payload, HdrCharset::Utf8).is_ok(),
1068                "{case}"
1069            );
1070        }
1071
1072        let mut binary_values = record("hdrcharset", BINARY_HDRCHARSET);
1073        for (keyword, value) in [
1074            (b"gname".as_slice(), [0xfc]),
1075            (b"linkpath".as_slice(), [0xfd]),
1076            (b"path".as_slice(), [0xfe]),
1077            (b"uname".as_slice(), [0xff]),
1078        ] {
1079            binary_values.extend_from_slice(&raw_record(keyword, &value));
1080        }
1081        let Ok(binary_records) = PaxRecords::parse(&binary_values, HdrCharset::Utf8) else {
1082            panic!("binary records should parse");
1083        };
1084        assert_eq!(
1085            binary_records.as_slice(),
1086            [
1087                PaxRecord::HdrCharset(PaxValue::Value(HdrCharset::Binary)),
1088                PaxRecord::Gname(PaxValue::Value(binary(&[0xfc]))),
1089                PaxRecord::LinkPath(PaxValue::Value(binary(&[0xfd]))),
1090                PaxRecord::Path(PaxValue::Value(binary(&[0xfe]))),
1091                PaxRecord::Uname(PaxValue::Value(binary(&[0xff]))),
1092            ]
1093        );
1094        let inherited_binary_path = raw_record(b"path", &[0xfe]);
1095        let Ok(inherited_records) = PaxRecords::parse(&inherited_binary_path, HdrCharset::Binary)
1096        else {
1097            panic!("inherited binary records should parse");
1098        };
1099        assert_eq!(
1100            inherited_records.as_slice(),
1101            [PaxRecord::Path(PaxValue::Value(binary(&[0xfe])))]
1102        );
1103        let mut reset_to_utf8 = record("hdrcharset", "");
1104        reset_to_utf8.extend_from_slice(&raw_record(b"path", &[0xfd]));
1105        assert!(matches!(
1106            PaxRecords::parse(&reset_to_utf8, HdrCharset::Binary),
1107            Err(PaxError::InvalidUtf8)
1108        ));
1109        let mut binary_comment = record("hdrcharset", BINARY_HDRCHARSET);
1110        binary_comment.extend_from_slice(&raw_record(b"comment", &[0xff]));
1111        assert!(matches!(
1112            PaxRecords::parse(&binary_comment, HdrCharset::Utf8),
1113            Err(PaxError::InvalidUtf8)
1114        ));
1115
1116        let unsupported_value = "ISO-IR 8859 1 1998";
1117        let mut overridden_unsupported = record("hdrcharset", unsupported_value);
1118        overridden_unsupported.extend_from_slice(&record("hdrcharset", UTF8_HDRCHARSET));
1119        for unsupported in [
1120            record("hdrcharset", unsupported_value),
1121            overridden_unsupported,
1122        ] {
1123            assert!(matches!(
1124                PaxRecords::parse(&unsupported, HdrCharset::Utf8),
1125                Err(PaxError::UnsupportedCharset { .. })
1126            ));
1127        }
1128    }
1129}