nix-nar 0.4.0

Library to manipulate Nix Archive (nar) files
Documentation
use std::{
    cmp::min,
    fs::{self, File},
    io::{self, Read},
    path::Path,
};

use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
use is_executable::IsExecutable;

use crate::{coder, NarError};

/// Encoder which can archive a given path as a NAR file.
pub struct Encoder {
    stack: Vec<CurrentActivity>,
    internal_buffer_size: usize,
}

/// Builder for [`Encoder`].
pub struct EncoderBuilder<P: AsRef<Path>> {
    path: P,
    internal_buffer_size: usize,
}

#[derive(Debug)]
enum CurrentActivity {
    StartArchive,
    StartEntry,
    Toplevel {
        path: Utf8PathBuf,
    },
    WalkingDir {
        dir_path: Utf8PathBuf,
        files_rev: Vec<String>,
    },
    EncodingFile {
        file: File,
    },
    WritePadding {
        padding: u64,
    },
    WriteMoreBytes {
        bytes: Vec<u8>,
    },
    CloseDirEntry,
    CloseEntry,
}

impl Encoder {
    /// Create a new encoder for file hierarchy at the given path.
    ///
    /// # Errors
    ///
    /// Returns an error if the path is not valid UTF-8.
    pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, NarError> {
        let path = to_utf8_path(path)?;
        Ok(Self {
            stack: vec![
                CurrentActivity::CloseEntry,
                CurrentActivity::Toplevel { path },
                CurrentActivity::StartEntry,
                CurrentActivity::StartArchive,
            ],
            internal_buffer_size: 1024,
        })
    }

    /// Create a builder for this encoder that can take additional
    /// options.
    pub fn builder<P: AsRef<Path>>(path: P) -> EncoderBuilder<P> {
        EncoderBuilder {
            path,
            internal_buffer_size: 1024,
        }
    }

    /// Archive to the given path.
    ///
    /// This is equivalent to creating the file, and [`io::copy`]ing
    /// to it.
    ///
    /// # Errors
    ///
    /// Returns an error if the path already exists or an I/O error occurs.
    pub fn pack<P: AsRef<Path>>(&mut self, dst: P) -> Result<(), NarError> {
        let dst = to_utf8_path(dst)?;
        if dst.symlink_metadata().is_ok() {
            return Err(NarError::PackError(format!(
                "Destination {dst} already exists. Delete it first."
            )));
        }
        let mut nar = File::create(&dst)?;
        io::copy(self, &mut nar)?;
        Ok(())
    }
}

impl<P: AsRef<Path>> EncoderBuilder<P> {
    /// Build the encoder.
    ///
    /// # Errors
    ///
    /// Returns an error if the path is not valid UTF-8.
    pub fn build(&self) -> Result<Encoder, NarError> {
        let Self {
            path,
            internal_buffer_size,
        } = self;
        let mut enc = Encoder::new(path)?;
        enc.internal_buffer_size = *internal_buffer_size;
        Ok(enc)
    }

    /// Configure the internal buffer size.  This should be at least
    /// 200 bytes larger than the longest filename.
    ///
    /// # Panics
    ///
    /// Panics if the given number is smaller than 200.
    pub fn internal_buffer_size(&mut self, x: usize) -> &mut Self {
        assert!(
            x >= 200,
            "internal_buffer_size should be at least 200 bytes larger than the longest filename you have"
        );
        self.internal_buffer_size = x;
        self
    }
}

impl Encoder {
    fn start_encoding_file<P: AsRef<Utf8Path>>(
        &mut self,
        buf: &mut [u8],
        path: P,
        dir_entry: Option<String>,
    ) -> Result<usize, io::Error> {
        let path = path.as_ref();
        let executable = path.as_std_path().is_executable();
        let file_handle = File::open(path).map_err(annotate_err_with_path(&path))?;
        let file_len = file_handle.metadata()?.len();
        let file_len_rounded_up = (file_len + 7) & !7;
        if file_len_rounded_up > file_len {
            self.stack.push(CurrentActivity::WritePadding {
                padding: file_len_rounded_up - file_len,
            });
        }
        self.stack
            .push(CurrentActivity::EncodingFile { file: file_handle });
        self.write_with_buffer(buf, move |buf| {
            let mut len = 0;
            if let Some(ref file) = dir_entry {
                len += coder::start_dir_entry(&mut buf[len..], file)?;
            }
            len += coder::write_file_regular(&mut buf[len..], executable)?;
            len += coder::write_u64_le(&mut buf[len..], file_len)?;
            Ok(len)
        })
    }

    fn start_encoding_dir<P: AsRef<Utf8Path>>(
        &mut self,
        buf: &mut [u8],
        path: P,
        dir_entry: Option<String>,
    ) -> Result<usize, io::Error> {
        self.stack.push(CurrentActivity::WalkingDir {
            files_rev: list_dir_files(path.as_ref())
                .map_err(annotate_err_with_path(&path))?,
            dir_path: path.as_ref().into(),
        });
        self.write_with_buffer(buf, move |buf| {
            let mut len = 0;
            if let Some(ref file) = dir_entry {
                len += coder::start_dir_entry(&mut buf[len..], file)?;
            }
            len += coder::start_dir(&mut buf[len..])?;
            Ok(len)
        })
    }

    fn start_encoding_symlink<P: AsRef<Utf8Path>>(
        &mut self,
        buf: &mut [u8],
        link_path: P,
        dir_entry: Option<String>,
    ) -> Result<usize, io::Error> {
        let link_path = link_path.as_ref();
        let target_path: Utf8PathBuf = link_path
            .read_link_utf8()
            .map_err(annotate_err_with_path(&link_path))?;
        self.write_with_buffer(buf, move |buf| {
            let mut len = 0;
            if let Some(ref file) = dir_entry {
                len += coder::start_dir_entry(buf, file)?;
            }
            len += coder::write_symlink(&mut buf[len..], &target_path)?;
            Ok(len)
        })
    }

    /// Execute a write-into-buffer operation.  If the given buffer is
    /// big enough, then we just write into that.  Otherwise, we
    /// create a temporary buffer, we write into that, we copy as much
    /// data as we can to the given buffer, and store the remainer
    /// into a `CurrentActivity::WriteMoreBytes` on the `self.stack`.
    fn write_with_buffer<F>(&mut self, dst_buf: &mut [u8], f: F) -> io::Result<usize>
    where
        F: FnOnce(&mut [u8]) -> io::Result<usize>,
    {
        if dst_buf.len() >= 1024 {
            f(dst_buf)
        } else {
            let mut buf = vec![0; self.internal_buffer_size];
            let len = f(&mut buf)?;
            let to_write_len = min(len, dst_buf.len());
            dst_buf[..to_write_len].copy_from_slice(&buf[..to_write_len]);
            if len > to_write_len {
                // TODO This is one vec copy too many.
                self.stack.push(CurrentActivity::WriteMoreBytes {
                    bytes: buf[to_write_len..len].to_vec(),
                });
            }
            Ok(to_write_len)
        }
    }
}

impl Read for Encoder {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        match self.stack.pop() {
            None => Ok(0),
            Some(CurrentActivity::StartArchive) => {
                self.write_with_buffer(buf, coder::start_archive)
            }
            Some(CurrentActivity::StartEntry) => {
                self.write_with_buffer(buf, coder::start_entry)
            }
            Some(CurrentActivity::CloseDirEntry) => {
                self.write_with_buffer(buf, coder::close_dir_entry)
            }
            Some(CurrentActivity::CloseEntry) => {
                self.write_with_buffer(buf, coder::close_entry)
            }
            Some(CurrentActivity::Toplevel { path }) => {
                let metadata =
                    fs::symlink_metadata(&path).map_err(annotate_err_with_path(&path))?;
                if metadata.is_dir() {
                    self.start_encoding_dir(buf, path, None)
                } else if metadata.is_symlink() {
                    self.start_encoding_symlink(buf, path, None)
                } else if metadata.is_file() {
                    self.start_encoding_file(buf, path, None)
                } else {
                    Err(other_io_error(format!("unknown file type {path}")))
                }
            }
            Some(CurrentActivity::WalkingDir {
                dir_path,
                mut files_rev,
            }) => match files_rev.pop() {
                None => self.read(buf),
                Some(file) => {
                    let path = dir_path.join(&file);

                    self.stack.push(CurrentActivity::WalkingDir {
                        dir_path,
                        files_rev,
                    });

                    self.stack.push(CurrentActivity::CloseDirEntry);

                    let metadata = fs::symlink_metadata(&path)
                        .map_err(annotate_err_with_path(&file))?;
                    if metadata.is_dir() {
                        self.start_encoding_dir(buf, path, Some(file))
                    } else if metadata.is_symlink() {
                        self.start_encoding_symlink(buf, path, Some(file))
                    } else if metadata.is_file() {
                        self.start_encoding_file(buf, path, Some(file))
                    } else {
                        Err(other_io_error(format!("unknown file type {path}",)))
                    }
                }
            },
            Some(CurrentActivity::EncodingFile { mut file }) => {
                let len = file.read(buf)?;
                if len != 0 {
                    self.stack.push(CurrentActivity::EncodingFile { file });
                    Ok(len)
                } else {
                    self.read(buf)
                }
            }
            Some(CurrentActivity::WritePadding { padding }) => {
                #[allow(clippy::cast_possible_truncation)]
                let len = min(padding, buf.len() as u64) as usize;
                buf.fill(0);
                if (len as u64) < padding {
                    self.stack.push(CurrentActivity::WritePadding {
                        padding: padding - len as u64,
                    });
                }
                Ok(len)
            }
            Some(CurrentActivity::WriteMoreBytes { bytes }) => {
                let len = min(bytes.len(), buf.len());
                buf[..len].copy_from_slice(&bytes[..len]);
                if len < bytes.len() {
                    self.stack.push(CurrentActivity::WriteMoreBytes {
                        bytes: bytes[len..].to_vec(),
                    });
                }
                Ok(len)
            }
        }
    }
}

fn list_dir_files(path: &Utf8Path) -> Result<Vec<String>, io::Error> {
    let mut fs = path
        .read_dir_utf8()
        .map_err(annotate_err_with_path(&path))?
        .collect::<Result<Vec<Utf8DirEntry>, io::Error>>()?
        .into_iter()
        .map(|p| p.file_name().into())
        .collect::<Vec<String>>();
    fs.sort_by(|a, b| b.cmp(a));
    Ok(fs)
}

fn annotate_err_with_path<P: AsRef<Utf8Path>>(
    path: P,
) -> impl FnOnce(io::Error) -> io::Error {
    let path = path.as_ref().to_path_buf();
    move |err: io::Error| other_io_error(format!("IO error on {path}: {err}"))
}

fn other_io_error<S: AsRef<str>>(message: S) -> io::Error {
    io::Error::other(message.as_ref())
}

fn to_utf8_path<P: AsRef<Path>>(path: P) -> Result<Utf8PathBuf, NarError> {
    let path = path.as_ref();
    path.try_into()
        .map(|x: &Utf8Path| x.to_path_buf())
        .map_err(|err| {
            NarError::Utf8PathError(format!(
                "Failed to convert '{}' to UTF-8: {err}",
                path.display()
            ))
        })
}