mtree2/
parser.rs

1//! Stuff for parsing mtree files.
2use crate::Device;
3use crate::util::FromDec;
4use crate::util::FromHex;
5use crate::util::decode_escapes_path;
6use crate::util::parse_time;
7use smallvec::SmallVec;
8use std::ffi::OsStr;
9use std::fmt;
10use std::os::unix::ffi::OsStrExt;
11use std::path::PathBuf;
12use std::time::Duration;
13
14/// An mtree file is a sequence of lines, each a semantic unit.
15#[derive(Debug)]
16pub enum MTreeLine<'a> {
17    /// Blank lines are ignored.
18    Blank,
19    /// Lines starting with a '#' are ignored.
20    Comment,
21    /// Special commands (starting with '/') alter the behavior of later
22    /// entries.
23    Special(SpecialKind, SmallVec<[Keyword<'a>; 5]>),
24    /// Change the current directory to the parent of the current directory.
25    DotDot,
26    // For Relative and Full, the owning data structure is now shifted to one level deeper.
27    // Before, the owning data structure was created in parsing MtreeLine::Relative and
28    // MtreeLine::Full (even doubled code, and processing was done on full path).
29    // Now, MtreeLine::Full and MtreeLine::Relative does own the path via PathBuf.
30    /// If the path does not contain a '/', it is regarded as a relative entry
31    /// and appended to the current directory in scope.
32    Relative(PathBuf, SmallVec<[Keyword<'a>; 5]>),
33    /// If the first word does contain a '/', it is regarded as a Full Path
34    /// specification and no further processing is done.
35    Full(PathBuf, SmallVec<[Keyword<'a>; 5]>),
36}
37
38impl<'a> MTreeLine<'a> {
39    pub fn from_bytes(input: &'a [u8]) -> Result<Self, LineParseError> {
40        // Fast path - empty line
41        if input.is_empty() {
42            return Ok(MTreeLine::Blank);
43        }
44
45        // check for wrapped line and comment
46        let first_byte = input[0];
47        if let Some(&last) = input.last()
48            && last == b'\\'
49        {
50            return Err(LineParseError::WrappedLine(
51                input[..input.len() - 1].to_vec(),
52            ));
53        }
54        if first_byte == b'#' {
55            return Ok(MTreeLine::Comment);
56        }
57
58        // Split into parts, filtering empty words
59        let mut parts =
60            crate::util::MemchrSplitter::new(b' ', input).filter(|word| !word.is_empty());
61
62        let Some(first) = parts.next() else {
63            return Ok(MTreeLine::Blank);
64        };
65
66        // Fast path - dotdot
67        if first == b".." {
68            return Ok(MTreeLine::DotDot);
69        }
70
71        // Fast path - special command -- this way we have an speed increase by 3%
72        if first_byte == b'/' {
73            let kind = SpecialKind::from_bytes(&first[1..])?;
74            // Pre-allocate params with expected size
75            let mut params = SmallVec::with_capacity(5);
76            for part in parts {
77                let keyword = Keyword::from_bytes(part);
78                if let Ok(keyword) = keyword {
79                    params.push(keyword);
80                } else {
81                    return Err(LineParseError::Parser(ParserError(format!(
82                        r#"Could not parse "{}" as a valid mtree field"#,
83                        String::from_utf8_lossy(part)
84                    ))));
85                }
86            }
87            return Ok(MTreeLine::Special(kind, params));
88        }
89
90        // Pre-allocate params with expected size
91        let mut params = SmallVec::with_capacity(5);
92        for part in parts {
93            let keyword = Keyword::from_bytes(part);
94            if let Ok(keyword) = keyword {
95                params.push(keyword);
96            } else {
97                return Err(LineParseError::Parser(ParserError(format!(
98                    r#"Could not parse "{}" as a valid mtree field"#,
99                    String::from_utf8_lossy(part)
100                ))));
101            }
102        }
103
104        // Check for slashes and escapes in one pass
105        let mut has_slash = false;
106        let mut has_escape = false;
107        for &byte in first {
108            match byte {
109                b'/' => has_slash = true,
110                b'\\' => has_escape = true,
111                _ => {}
112            }
113            if has_slash && has_escape {
114                break;
115            }
116        }
117
118        // Create path, avoiding allocation if possible
119        let path_dec = if has_escape {
120            let mut path_enc = first.to_vec();
121            decode_escapes_path(&mut path_enc).ok_or_else(|| {
122                LineParseError::Parser(ParserError(
123                    "Failed to decode escapes in path - you might need to enable the netbsd6 \
124                     feature"
125                        .to_owned(),
126                ))
127            })?
128        } else {
129            PathBuf::from(OsStr::from_bytes(first))
130        };
131
132        if has_slash {
133            Ok(MTreeLine::Full(path_dec, params))
134        } else {
135            Ok(MTreeLine::Relative(path_dec, params))
136        }
137    }
138}
139/// A command that alters the behavior of later commands.
140#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
141pub enum SpecialKind {
142    /// Set a default for future lines.
143    Set,
144    /// Unset a default for future lines.
145    Unset,
146}
147
148impl SpecialKind {
149    fn from_bytes(input: &[u8]) -> ParserResult<Self> {
150        Ok(match input {
151            b"set" => Self::Set,
152            b"unset" => Self::Unset,
153            _ => {
154                return Err(format!(
155                    r#""{}" is not a special command"#,
156                    String::from_utf8_lossy(input)
157                )
158                .into());
159            }
160        })
161    }
162}
163
164/// Each entry may have one or more key word
165#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
166pub enum Keyword<'a> {
167    /// `cksum` The checksum of the file using the default algorithm specified
168    /// by the cksum(1) utility.
169    // I'm pretty sure u32 is big enough, but I'm using u64 because I'm not sure that this is
170    // guaranteed.
171    Checksum(u64),
172    /// `device` The device number for *block* or *char* file types.
173    DeviceRef(DeviceRef<'a>),
174    /// `contents` The full pathname of a file that holds the contents of this
175    /// file.
176    Contents(&'a [u8]),
177    /// `flags` The file flags as a symbolic name.
178    ///
179    /// I think this is bsd-specific.
180    Flags(&'a [u8]),
181    /// `gid` The file group as a numeric value.
182    Gid(u32),
183    /// `gname` The file group as a symbolic name.
184    Gname(&'a [u8]),
185    /// `ignore` Ignore any file hierarchy below this line.
186    Ignore,
187    /// `inode` The inode number.
188    Inode(u64),
189    /// `link` The target of the symbolic link when type=link.
190    Link(&'a [u8]),
191    /// `md5|md5digest` The MD5 message digest of the file.
192    Md5(u128),
193    /// `mode` The current file's permissions as a numeric (octal) or symbolic
194    /// value.
195    Mode(FileMode),
196    /// `nlink` The number of hard links the file is expected to have.
197    NLink(u64),
198    /// `nochange` Make sure this file or directory exists but otherwise ignore
199    /// all attributes.
200    NoChange,
201    /// `optional` The file is optional; do not complain about the file if it is
202    /// not in the file hierarchy.
203    Optional,
204    /// `resdevice` The "resident" device number of the file, e.g. the ID of the
205    /// device that contains the file. Its format is the same as the one for
206    /// `device`.
207    ResidentDeviceRef(DeviceRef<'a>),
208    /// `rmd160|rmd160digest|ripemd160digest` The RIPEMD160 message digest of
209    /// the file.
210    Rmd160([u8; 20]),
211    /// `sha1|sha1digest` The FIPS 160-1 ("SHA-1") message digest of the file.
212    Sha1([u8; 20]),
213    /// `sha256|sha256digest` The FIPS 180-2 ("SHA-256") message digest of the
214    /// file.
215    Sha256([u8; 32]),
216    /// `sha384|sha384digest` The FIPS 180-2 ("SHA-384") message digest of the
217    /// file.
218    Sha384([u8; 48]),
219    /// `sha512|sha512digest` The FIPS 180-2 ("SHA-512") message digest of the
220    /// file.
221    Sha512([u8; 64]),
222    /// `size` The size, in bytes, of the file.
223    Size(u64),
224    /// `time` The last modification time of the file, as a duration since the
225    /// unix epoch.
226    // The last modification time of the file, in seconds and nanoseconds. The value should
227    // include a period character and exactly nine digits after the period.
228    Time(Duration),
229    /// `type` The type of the file.
230    Type(FileType),
231    /// The file owner as a numeric value.
232    Uid(u32),
233    /// The file owner as a symbolic name.
234    Uname(&'a [u8]),
235}
236impl<'a> Keyword<'a> {
237    /// Parse a keyword with optional value.
238    ///
239    /// Input must be a non-empty slice
240    fn from_bytes(input: &'a [u8]) -> ParserResult<Self> {
241        fn next<'a>(field: &'static str, val: Option<&'a [u8]>) -> ParserResult<&'a [u8]> {
242            val.ok_or_else(|| format!(r#""{field}" requires a parameter, none found"#).into())
243        }
244        debug_assert!(!input.len() > 0, "Input must be non-empty");
245        let mut iter = input.splitn(2, |ch| *ch == b'=');
246        // Unwrap cannot fail, as long as input is non-empty, guaranteed by the caller.
247        let key = iter.next().expect("cannot fail");
248        Ok(match key {
249            b"cksum" => Keyword::Checksum(u64::from_dec(next("cksum", iter.next())?)?),
250            b"device" => Keyword::DeviceRef(DeviceRef::from_bytes(next("devices", iter.next())?)?),
251            b"contents" => Keyword::Contents(next("contents", iter.next())?),
252            b"flags" => Keyword::Flags(next("flags", iter.next())?),
253            b"gid" => Keyword::Gid(u32::from_dec(next("gid", iter.next())?)?),
254            b"gname" => Keyword::Gname(next("gname", iter.next())?),
255            b"ignore" => Keyword::Ignore,
256            b"inode" => Keyword::Inode(u64::from_dec(next("inode", iter.next())?)?),
257            b"link" => Keyword::Link(next("link", iter.next())?),
258            b"md5" | b"md5digest" => {
259                Keyword::Md5(u128::from_hex(next("md5|md5digest", iter.next())?)?)
260            }
261            b"mode" => Keyword::Mode(FileMode::from_bytes(next("mode", iter.next())?)?),
262            b"nlink" => Keyword::NLink(u64::from_dec(next("nlink", iter.next())?)?),
263            b"nochange" => Keyword::NoChange,
264            b"optional" => Keyword::Optional,
265            b"resdevice" => {
266                Keyword::ResidentDeviceRef(DeviceRef::from_bytes(next("resdevice", iter.next())?)?)
267            }
268            b"rmd160" | b"rmd160digest" | b"ripemd160digest" => Keyword::Rmd160(
269                <[u8; 20]>::from_hex(next("rmd160|rmd160digest|ripemd160digest", iter.next())?)?,
270            ),
271            b"sha1" | b"sha1digest" => {
272                Keyword::Sha1(<[u8; 20]>::from_hex(next("sha1|sha1digest", iter.next())?)?)
273            }
274            b"sha256" | b"sha256digest" => Keyword::Sha256(<[u8; 32]>::from_hex(next(
275                "sha256|sha256digest",
276                iter.next(),
277            )?)?),
278            b"sha384" | b"sha384digest" => Keyword::Sha384(<[u8; 48]>::from_hex(next(
279                "sha384|sha384digest",
280                iter.next(),
281            )?)?),
282            b"sha512" | b"sha512digest" => Keyword::Sha512(<[u8; 64]>::from_hex(next(
283                "sha512|sha512digest",
284                iter.next(),
285            )?)?),
286            b"size" => Keyword::Size(u64::from_dec(next("size", iter.next())?)?),
287            b"time" => Keyword::Time(parse_time(next("time", iter.next())?)?),
288            b"type" => Keyword::Type(FileType::from_bytes(next("type", iter.next())?)?),
289            b"uid" => Keyword::Uid(u32::from_dec(next("uid", iter.next())?)?),
290            b"uname" => Keyword::Uname(next("uname", iter.next())?),
291            other => {
292                return Err(format!(
293                    r#""{}" is not a valid parameter key (in "{}")"#,
294                    String::from_utf8_lossy(other),
295                    String::from_utf8_lossy(input)
296                )
297                .into());
298            }
299        })
300    }
301}
302
303#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
304pub struct DeviceRef<'a> {
305    /// The device format
306    format: Format,
307    /// The device major identifier
308    major: &'a [u8],
309    /// The device minor identifier
310    minor: &'a [u8],
311    /// The device subunit identifier, if applicable.
312    subunit: Option<&'a [u8]>,
313}
314
315impl<'a> DeviceRef<'a> {
316    /// Take ownership of the underlying data by copying
317    pub fn to_device(&self) -> Device {
318        Device {
319            format: self.format,
320            major: self.major.to_owned(),
321            minor: self.minor.to_owned(),
322            subunit: self.subunit.map(ToOwned::to_owned),
323        }
324    }
325
326    fn from_bytes(input: &'a [u8]) -> ParserResult<Self> {
327        let mut iter = input.splitn(4, |ch| *ch == b',');
328        let format = Format::from_bytes(iter.next().ok_or_else(|| {
329            format!(
330                r#"could not read format from device "{}""#,
331                String::from_utf8_lossy(input)
332            )
333        })?)?;
334        let major = iter.next().ok_or_else(|| {
335            format!(
336                r#"could not read major field from device "{}""#,
337                String::from_utf8_lossy(input)
338            )
339        })?;
340        let minor = iter.next().ok_or_else(|| {
341            format!(
342                r#"could not read minor field from device "{}""#,
343                String::from_utf8_lossy(input)
344            )
345        })?;
346        // optional, so no '?'
347        let subunit = iter.next();
348        Ok(DeviceRef {
349            format,
350            major,
351            minor,
352            subunit,
353        })
354    }
355}
356
357/// The available device formats.
358#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
359pub enum Format {
360    Native,
361    Bsd386,
362    Bsd4,
363    BsdOs,
364    FreeBsd,
365    Hpux,
366    Isc,
367    Linux,
368    NetBsd,
369    Osf1,
370    Sco,
371    Solaris,
372    SunOs,
373    Svr3,
374    Svr4,
375    Ultrix,
376}
377
378impl Format {
379    fn from_bytes(bytes: &[u8]) -> ParserResult<Self> {
380        Ok(match bytes {
381            b"native" => Self::Native,
382            b"386bsd" => Self::Bsd386,
383            b"4bsd" => Self::Bsd4,
384            b"bsdos" => Self::BsdOs,
385            b"freebsd" => Self::FreeBsd,
386            b"hpux" => Self::Hpux,
387            b"isc" => Self::Isc,
388            b"linux" => Self::Linux,
389            b"netbsd" => Self::NetBsd,
390            b"osf1" => Self::Osf1,
391            b"sco" => Self::Sco,
392            b"solaris" => Self::Solaris,
393            b"sunos" => Self::SunOs,
394            b"svr3" => Self::Svr3,
395            b"svr4" => Self::Svr4,
396            b"ultrix" => Self::Ultrix,
397            other => {
398                return Err(format!(
399                    r#""{}" is not a valid format"#,
400                    String::from_utf8_lossy(other)
401                )
402                .into());
403            }
404        })
405    }
406}
407
408#[test]
409fn test_format_from_bytes() {
410    for (input, res) in vec![
411        (&b"native"[..], Format::Native),
412        (&b"386bsd"[..], Format::Bsd386),
413        (&b"4bsd"[..], Format::Bsd4),
414        (&b"bsdos"[..], Format::BsdOs),
415        (&b"freebsd"[..], Format::FreeBsd),
416        (&b"hpux"[..], Format::Hpux),
417        (&b"isc"[..], Format::Isc),
418        (&b"linux"[..], Format::Linux),
419        (&b"netbsd"[..], Format::NetBsd),
420        (&b"osf1"[..], Format::Osf1),
421        (&b"sco"[..], Format::Sco),
422        (&b"solaris"[..], Format::Solaris),
423        (&b"sunos"[..], Format::SunOs),
424        (&b"svr3"[..], Format::Svr3),
425        (&b"svr4"[..], Format::Svr4),
426        (&b"ultrix"[..], Format::Ultrix),
427    ] {
428        assert_eq!(Format::from_bytes(input), Ok(res));
429    }
430}
431
432/// The type of an entry.
433///
434/// In an mtree file, entries can be files, directories, and some other special
435/// unix types like block/character devices.
436#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
437pub enum FileType {
438    /// A unix block device.
439    BlockDevice,
440    /// A unix character device.
441    CharacterDevice,
442    /// A directory.
443    Directory,
444    /// A unix fifo (named pipe), useful for IPC.
445    Fifo,
446    /// A standard file.
447    File,
448    /// A symbolic link.
449    SymbolicLink,
450    /// A unix socket.
451    Socket,
452}
453
454impl FileType {
455    fn from_bytes(input: &[u8]) -> ParserResult<Self> {
456        Ok(match input {
457            b"block" => Self::BlockDevice,
458            b"char" => Self::CharacterDevice,
459            b"dir" => Self::Directory,
460            b"fifo" => Self::Fifo,
461            b"file" => Self::File,
462            b"link" => Self::SymbolicLink,
463            b"socket" => Self::Socket,
464            _ => {
465                return Err(format!(
466                    r#""{}" is not a valid file type"#,
467                    String::from_utf8_lossy(input)
468                )
469                .into());
470            }
471        })
472    }
473
474    fn as_str(self) -> &'static str {
475        match self {
476            Self::BlockDevice => "block",
477            Self::CharacterDevice => "char",
478            Self::Directory => "dir",
479            Self::Fifo => "fifo",
480            Self::File => "file",
481            Self::SymbolicLink => "link",
482            Self::Socket => "socket",
483        }
484    }
485}
486
487impl fmt::Display for FileType {
488    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
489        write!(f, "{}", self.as_str())
490    }
491}
492
493#[test]
494fn test_type_from_bytes() {
495    for (input, res) in [
496        (&b"block"[..], FileType::BlockDevice),
497        (&b"char"[..], FileType::CharacterDevice),
498        (&b"dir"[..], FileType::Directory),
499        (&b"fifo"[..], FileType::Fifo),
500        (&b"file"[..], FileType::File),
501        (&b"link"[..], FileType::SymbolicLink),
502        (&b"socket"[..], FileType::Socket),
503    ] {
504        assert_eq!(FileType::from_bytes(input), Ok(res));
505    }
506    assert!(FileType::from_bytes(&b"other"[..]).is_err());
507}
508
509bitflags::bitflags! {
510    /// Unix file permissions.
511    #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
512    pub struct Perms: u8 {
513        /// Entity has read access.
514        const READ = 0b100;
515        /// Entity has write access.
516        const WRITE = 0b010;
517        /// Entity has execute access.
518        const EXECUTE = 0b001;
519    }
520}
521
522impl fmt::Display for Perms {
523    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
524        if self.contains(Self::READ) {
525            f.write_str("r")?;
526        } else {
527            f.write_str("-")?;
528        }
529        if self.contains(Self::WRITE) {
530            f.write_str("w")?;
531        } else {
532            f.write_str("-")?;
533        }
534        if self.contains(Self::EXECUTE) {
535            f.write_str("x")?;
536        } else {
537            f.write_str("-")?;
538        }
539        Ok(())
540    }
541}
542
543/// The file/dir permissions for owner/group/everyone else.
544#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
545pub struct FileMode {
546    mode: u32,
547}
548
549impl FileMode {
550    fn from_bytes(input: &[u8]) -> ParserResult<Self> {
551        // file mode can either be symbolic, or octal. For now only support octal
552        if input.len() > 4 {
553            return Err(format!(
554                r#"mode value must be 4 or less octal chars, found "{}""#,
555                String::from_utf8_lossy(input)
556            )
557            .into());
558        }
559        Ok(Self {
560            mode: u32::from_str_radix(
561                std::str::from_utf8(input)
562                    .map_err(|err| ParserError(format!("failed to parse mode value: {err}")))?,
563                8,
564            )
565            .map_err(|err| ParserError(format!("failed to parse mode as integer: {err}")))?,
566        })
567    }
568
569    /// Executable files with this bit set will
570    /// run with effective uid set to the uid of the file owner.
571    pub fn setuid(self) -> bool {
572        self.mode & 0o4000 != 0
573    }
574
575    /// Executable files with this bit set will
576    /// run with effective gid set to the gid of the file owner.
577    pub fn setgid(self) -> bool {
578        self.mode & 0o2000 != 0
579    }
580
581    /// Is the sticky bit set?
582    pub fn sticky(self) -> bool {
583        self.mode & 0o1000 != 0
584    }
585
586    /// The permissions for the owner of the file.
587    pub fn owner(self) -> Perms {
588        const MASK: u32 = 0o700;
589        Perms::from_bits_truncate(((self.mode & MASK) >> 6) as u8)
590    }
591
592    /// The permissions for everyone who is not the owner, but in the group.
593    pub fn group(self) -> Perms {
594        const MASK: u32 = 0o070;
595        Perms::from_bits_truncate(((self.mode & MASK) >> 3) as u8)
596    }
597
598    /// The permissions for everyone who is not the owner and not in the group.
599    pub fn other(self) -> Perms {
600        const MASK: u32 = 0o007;
601        Perms::from_bits_truncate((self.mode & MASK) as u8)
602    }
603}
604
605/// Convert to u32 for compatibility with standard library
606impl From<FileMode> for u32 {
607    fn from(value: FileMode) -> Self {
608        value.mode
609    }
610}
611
612impl fmt::Display for FileMode {
613    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
614        write!(f, "{}{}{}", self.owner(), self.group(), self.other())
615    }
616}
617
618impl fmt::Octal for FileMode {
619    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
620        write!(f, "{:o}{:o}{:o}", self.owner(), self.group(), self.other())
621    }
622}
623
624pub(crate) type ParserResult<T> = Result<T, ParserError>;
625
626/// An error occurred during parsing a record.
627///
628/// This currently just gives an error report at the moment.
629#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
630pub struct ParserError(pub String);
631
632impl From<String> for ParserError {
633    fn from(s: String) -> Self {
634        Self(s)
635    }
636}
637
638impl fmt::Display for ParserError {
639    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
640        f.write_str(&self.0)
641    }
642}
643
644impl std::error::Error for ParserError {}
645
646#[derive(Debug)]
647#[non_exhaustive]
648pub(crate) enum LineParseError {
649    Parser(ParserError),
650    WrappedLine(Vec<u8>),
651    Io(std::io::Error),
652}
653
654impl fmt::Display for LineParseError {
655    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
656        match self {
657            Self::Io(e) => write!(f, "{e}"),
658            Self::Parser(e) => write!(f, "{e}"),
659            Self::WrappedLine(e) => {
660                let s = String::from_utf8_lossy(e);
661                write!(f, "Wrapped Line: {s}")
662            }
663        }
664    }
665}
666
667impl From<std::io::Error> for LineParseError {
668    fn from(e: std::io::Error) -> Self {
669        Self::Io(e)
670    }
671}
672
673impl From<ParserError> for LineParseError {
674    fn from(e: ParserError) -> Self {
675        Self::Parser(e)
676    }
677}
678
679impl std::error::Error for LineParseError {}