nix_nar/
dec.rs

1use std::{
2    cell::{Cell, RefCell},
3    fmt,
4    fs::{self, OpenOptions, Permissions},
5    io::{self, Read},
6    path::Path,
7};
8
9use camino::{Utf8Component, Utf8PathBuf};
10
11use crate::{error::NarError, parser};
12
13/// Decoder that can extract the contents of NAR files.
14pub struct Decoder<R: Read> {
15    inner: DecoderInner<R>,
16}
17
18// This struct uses `Cell` and `RefCell` so that we can modify it
19// behind a read-only reference.  This is necessary if we want to
20// implement the standard `Iterator` interface.
21//
22// We need to track the `pos` in the stream so that we can properly
23// skip bytes when the user doesn't consume a file.
24/// Internal state for the `Decoder`.  This is only exposed because
25/// it's referenced in a type.
26pub struct DecoderInner<R> {
27    pos: Cell<u64>,
28    reader: RefCell<R>,
29}
30
31/// Iterator over the entries in the archive.
32pub struct Entries<'a, R: Read> {
33    decoder: &'a Decoder<R>,
34
35    // What we're currently doing.
36    current_activity: CurrentActivity,
37}
38
39/// A single entry in a NAR file.
40#[derive(Debug)]
41pub struct Entry<'a, R> {
42    /// The path to the entry in the archive. The top-level entry
43    /// doesn't have a path, so the first entry will have `path =
44    /// None`.  This `path` is the full path in the archive with all
45    /// parent paths combined. I.e. this is "some/subdir/my-file", and
46    /// not just "my-file".
47    pub path: Option<Utf8PathBuf>,
48
49    /// The contents of the entry.
50    pub content: Content<'a, R>,
51}
52
53/// The content of an [`Entry`] emitted by [`Decoder`].
54pub enum Content<'a, R> {
55    /// A directory.  Its children will follow as separate
56    /// [`File`](Self::File) entries.
57    Directory,
58
59    /// A symlink with a given path. The NAR format imposes no
60    /// constraints on `target`, so this symlink could point to
61    /// anywhere.
62    Symlink { target: Utf8PathBuf },
63
64    /// A file, either plain or executable, with the given contents.
65    /// The `data` field is a struct implementing [`std::io::Read`],
66    /// so it can be read like any file.  You *must* either read
67    /// `data` before calling [`Entries::next`] on the iterator, or
68    /// not read it all.  Attempting to read `data` after calling
69    /// [`Entries::next`] is undefined behaviour, and will almost
70    /// certainly return garbage data.
71    File {
72        executable: bool,
73        size: u64,
74        offset: u64,
75
76        /// May be used to extract the contents of this file.
77        data: io::Take<&'a DecoderInner<R>>,
78    },
79}
80
81impl<R> Content<'_, R> {
82    /// Returns `true` if the content is [`Directory`].
83    ///
84    /// [`Directory`]: Content::Directory
85    #[must_use]
86    pub fn is_directory(&self) -> bool {
87        matches!(self, Self::Directory)
88    }
89
90    /// Returns `true` if the content is [`Symlink`].
91    ///
92    /// [`Symlink`]: Content::Symlink
93    #[must_use]
94    pub fn is_symlink(&self) -> bool {
95        matches!(self, Self::Symlink { .. })
96    }
97
98    /// Returns `true` if the content is [`File`].
99    ///
100    /// [`File`]: Content::File
101    #[must_use]
102    pub fn is_file(&self) -> bool {
103        matches!(self, Self::File { .. })
104    }
105}
106
107#[derive(Debug)]
108enum CurrentActivity {
109    Finished,
110    ParsingTopLevel,
111    ParsingContent { next: u64, path: Utf8PathBuf },
112    ParsingDirectoryEntries { path: Utf8PathBuf },
113}
114
115impl<R: Read> Decoder<R> {
116    /// Create a new decoder over the given [`Read`](std::io::Read)er.
117    /// Consider wrapping it in a [`std::io::BufReader`] for
118    /// performance.
119    ///
120    /// # Errors
121    ///
122    /// Returns an error if the reader does not contain a valid NAR header.
123    pub fn new(reader: R) -> Result<Self, NarError> {
124        let inner = DecoderInner {
125            pos: Cell::new(0),
126            reader: RefCell::new(reader),
127        };
128        parser::expect_str(&inner, "nix-archive-1")?;
129        Ok(Self { inner })
130    }
131
132    /// Construct an iterator over the entries in this archive.
133    ///
134    /// You must consider each entry within an archive in sequence. If
135    /// entries are processed out of sequence (from what the iterator
136    /// returns), then the contents read for each entry may be
137    /// corrupted.
138    ///
139    /// # Errors
140    ///
141    /// Returns an error if called on a decoder that has already been used.
142    pub fn entries(&self) -> Result<Entries<'_, R>, NarError> {
143        Entries::new(self)
144    }
145
146    /// Unpacks the contents of the NAR file to the given destination
147    /// (which must not already exist).
148    ///
149    /// This operation will not not write files outside of the path
150    /// specified by `dst`. Files in the archive which have a `..` in
151    /// their path are skipped during the unpacking process.
152    ///
153    /// No attempt is made to validate the targets of symlinks.
154    ///
155    /// If the NAR file is invalid, this function returns an error.
156    /// For instance, this happens if an [`Entry`]'s parent doesn't
157    /// exist or is not a directory.
158    ///
159    /// # Errors
160    ///
161    /// Returns an error if the destination already exists, the NAR is invalid,
162    /// or an I/O error occurs during unpacking.
163    pub fn unpack<P: AsRef<Path>>(&self, dst: P) -> Result<(), NarError> {
164        let dst = dst.as_ref();
165        if fs::symlink_metadata(dst).is_ok() {
166            return Err(NarError::UnpackError(format!(
167                "Unpack destination already exists: {}. Delete it first.",
168                dst.display()
169            )));
170        }
171        for entry in self.entries()? {
172            let entry = entry?;
173            match &entry.path {
174                None => {}
175                Some(path) => {
176                    if path
177                        .components()
178                        .any(|c| !matches!(c, Utf8Component::Normal(_)))
179                    {
180                        continue;
181                    }
182                }
183            }
184            let dst_path = entry
185                .path
186                .map_or_else(|| dst.to_path_buf(), |path| dst.join(path));
187            macro_rules! assert_parent_is_dir {
188                ($dst_path:ident) => {
189                    if let Some(parent) = dst_path.parent() {
190                        if !parent.is_dir() {
191                            return Err(NarError::UnpackError(format!(
192                                "Entry {} has a parent which is not a directory",
193                                dst_path.display()
194                            )));
195                        }
196                    }
197                };
198            }
199            match entry.content {
200                Content::Directory => {
201                    fs::create_dir(dst_path)?;
202                }
203                Content::Symlink { target } => {
204                    assert_parent_is_dir!(dst_path);
205                    symlink::symlink_file(target, dst_path)?;
206                }
207                Content::File {
208                    executable,
209                    size: _,
210                    offset: _,
211                    mut data,
212                } => {
213                    assert_parent_is_dir!(dst_path);
214                    let mut file = OpenOptions::new()
215                        .read(true)
216                        .write(true)
217                        .create_new(true)
218                        .open(dst_path)?;
219                    io::copy(&mut data, &mut file)?;
220                    let mut perms = file.metadata()?.permissions();
221                    if executable {
222                        set_mode(&mut perms, 0o555);
223                    } else {
224                        set_mode(&mut perms, 0o444);
225                    }
226                    file.set_permissions(perms)?;
227                }
228            }
229        }
230        Ok(())
231    }
232}
233
234#[cfg(target_family = "unix")]
235fn set_mode(perms: &mut Permissions, mode: u32) {
236    use std::os::unix::prelude::PermissionsExt;
237    perms.set_mode(mode);
238}
239
240/// Attempting to change file permissions on windows is a no-op
241#[cfg(target_family = "windows")]
242fn set_mode(_perms: &mut Permissions, _mode: u32) {}
243
244impl<R: Read> Read for &DecoderInner<R> {
245    fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
246        let i = self.reader.borrow_mut().read(into)?;
247        self.pos.set(self.pos.get() + i as u64);
248        Ok(i)
249    }
250}
251
252impl<'a, R: Read> Entries<'a, R> {
253    fn new(decoder: &'a Decoder<R>) -> Result<Self, NarError> {
254        let decoder_pos = decoder.inner.pos.get();
255        // The 24 comes from the `nar-archive-1` (8 for the length +
256        // 12 rounded up to 16 for the text) that starts every NAR
257        // file.
258        if decoder_pos != 24 {
259            return Err(NarError::ApiError(format!(
260                "Can only call `entries` on a new `Decoder`. This one is at position {decoder_pos}."
261            )));
262        }
263        Ok(Self {
264            decoder,
265            current_activity: CurrentActivity::ParsingTopLevel,
266        })
267    }
268
269    fn handle_parse_regular(
270        &mut self,
271        path: Option<Utf8PathBuf>,
272        executable: bool,
273        size: u64,
274    ) -> Entry<'a, R> {
275        let size_rounded_up = (size + 7) & !7;
276        self.current_activity = CurrentActivity::ParsingContent {
277            next: self.decoder.inner.pos.get() + size_rounded_up,
278            path: path.clone().unwrap_or_default(),
279        };
280        Entry {
281            path,
282            content: Content::File {
283                executable,
284                size,
285                offset: self.decoder.inner.pos.get(),
286                data: self.decoder.inner.take(size),
287            },
288        }
289    }
290
291    fn next_or_err(&mut self) -> Result<Option<Entry<'a, R>>, NarError> {
292        use parser::{Node as N, ParseResult as PR};
293        use CurrentActivity as CA;
294        match self.current_activity {
295            CA::Finished => Ok(None),
296            CA::ParsingTopLevel => match parser::parse_next(&self.decoder.inner)? {
297                PR::Node(N::Regular { executable, size }) => {
298                    Ok(Some(self.handle_parse_regular(None, executable, size)))
299                }
300                PR::Node(N::Symlink { target }) => {
301                    self.current_activity = CA::Finished;
302                    Ok(Some(Entry {
303                        path: None,
304                        content: Content::Symlink {
305                            target: target.into(),
306                        },
307                    }))
308                }
309                PR::Node(N::Directory) => {
310                    self.current_activity = CA::ParsingDirectoryEntries {
311                        path: Utf8PathBuf::new(),
312                    };
313                    Ok(Some(Entry {
314                        path: None,
315                        content: Content::Directory,
316                    }))
317                }
318                PR::DirectoryEntry(path, _) => Err(NarError::ParseError(format!(
319                    "got unexpected directory entry at top-level: '{path}'"
320                ))),
321                PR::ParenClose => {
322                    self.current_activity = CA::Finished;
323                    Ok(None)
324                }
325            },
326            CA::ParsingDirectoryEntries { path: ref dir_path } => {
327                let dir_path = dir_path.to_path_buf();
328                match parser::parse_next(&self.decoder.inner)? {
329                    PR::Node(
330                        node @ (N::Regular { .. } | N::Symlink { .. } | N::Directory),
331                    ) => Err(NarError::ParseError(format!(
332                        "got unexpected {} node at while parsing directory '{}'",
333                        node.variant_name(),
334                        dir_path,
335                    ))),
336                    PR::DirectoryEntry(path, node) => {
337                        let path = dir_path.join(path);
338                        match node {
339                            N::Regular { executable, size } => Ok(Some(
340                                self.handle_parse_regular(Some(path), executable, size),
341                            )),
342                            N::Symlink { target } => {
343                                // Skip the closing parentheses of the directory entry
344                                parser::parse_paren_close(&self.decoder.inner)?;
345                                Ok(Some(Entry {
346                                    path: Some(path),
347                                    content: Content::Symlink {
348                                        target: target.into(),
349                                    },
350                                }))
351                            }
352                            N::Directory => {
353                                self.current_activity =
354                                    CA::ParsingDirectoryEntries { path: path.clone() };
355                                Ok(Some(Entry {
356                                    path: Some(path),
357                                    content: Content::Directory,
358                                }))
359                            }
360                        }
361                    }
362                    PR::ParenClose => {
363                        if let Some(parent) = dir_path.parent() {
364                            // Skip the closing parentheses of the directory entry
365                            parser::parse_paren_close(&self.decoder.inner)?;
366                            self.current_activity = CA::ParsingDirectoryEntries {
367                                path: parent.to_path_buf(),
368                            };
369                            self.next_or_err()
370                        } else {
371                            self.current_activity = CA::Finished;
372                            Ok(None)
373                        }
374                    }
375                }
376            }
377            CA::ParsingContent { next, ref path } => {
378                // Skip any remaining bytes in the current file.
379                skip_bytes(&self.decoder.inner, next - self.decoder.inner.pos.get())?;
380                // Skip the closing parentheses of the node
381                parser::parse_paren_close(&self.decoder.inner)?;
382                if let Some(parent) = path.parent() {
383                    // Skip the closing parentheses of the directory entry
384                    parser::parse_paren_close(&self.decoder.inner)?;
385                    self.current_activity = CA::ParsingDirectoryEntries {
386                        path: parent.to_path_buf(),
387                    };
388                    self.next_or_err()
389                } else {
390                    self.current_activity = CA::Finished;
391                    Ok(None)
392                }
393            }
394        }
395    }
396}
397
398impl<'a, R: Read> Iterator for Entries<'a, R> {
399    type Item = Result<Entry<'a, R>, NarError>;
400
401    fn next(&mut self) -> Option<Self::Item> {
402        match self.next_or_err() {
403            Err(err) => {
404                self.current_activity = CurrentActivity::Finished;
405                Some(Err(err))
406            }
407            Ok(None) => None,
408            Ok(Some(res)) => Some(Ok(res)),
409        }
410    }
411}
412
413impl<R> fmt::Debug for Content<'_, R> {
414    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
415        match self {
416            Content::Directory => f.write_str("Directory"),
417            Content::Symlink { target } => {
418                f.debug_struct("Symlink").field("target", target).finish()
419            }
420            Content::File {
421                executable,
422                size,
423                offset,
424                data: _,
425            } => f
426                .debug_struct("File")
427                .field("executable", executable)
428                .field("size", size)
429                .field("offset", offset)
430                .finish(),
431        }
432    }
433}
434
435#[allow(clippy::large_stack_arrays)]
436fn skip_bytes<R: Read>(
437    mut decoder_inner: &DecoderInner<R>,
438    mut bytes_to_skip: u64,
439) -> Result<(), NarError> {
440    if bytes_to_skip > 0 {
441        use std::cmp;
442        while bytes_to_skip > 0 {
443            let mut buf = [0u8; 4096 * 8];
444            let n = cmp::min(bytes_to_skip, buf.len() as u64);
445            #[allow(clippy::cast_possible_truncation)]
446            let read_len = n as usize;
447            match decoder_inner
448                .read(&mut buf[..read_len])
449                .map_err(Into::<NarError>::into)?
450            {
451                0 => {
452                    return Err(NarError::ParseError(
453                        "unexpected EOF during skip".to_string(),
454                    ));
455                }
456                n => {
457                    bytes_to_skip -= n as u64;
458                }
459            }
460        }
461    }
462    Ok(())
463}
464
465impl<R> Entry<'_, R> {
466    #[must_use]
467    pub fn abs_path(&self) -> Utf8PathBuf {
468        let mut p =
469            Utf8PathBuf::from(std::path::MAIN_SEPARATOR.to_string()).to_path_buf();
470        if let Some(ref path) = self.path {
471            p.push(path);
472        }
473        p
474    }
475}