nix-nar 0.4.0

Library to manipulate Nix Archive (nar) files
Documentation
//! Actual implementation of the NAR parser.
//!
//! The Nix Archive format, according to page 93 of "The Purely
//! Functional Deployment Model" by Eelco Dolstra.
//!
//! Note: This module intentionally uses `as usize` casts from u64 because
//! NAR files are designed for reasonable file sizes where truncation won't occur.
#![allow(clippy::cast_possible_truncation)]
//!
//! ```text
//!     serialise(fso) = str("nix-archive-1") + serialise'(fso)
//!
//!     serialise'(fso) = str("(") + seralise''(fso) + str(")")
//!
//!     serialise''(Regular exec contents) =
//!       str("type") + str("regular")
//!       + {  str("executable") + str(""), if exec = Executable
//!         OR ""                         , if exec = NonExecutable
//!         }
//!       + str("contents") + str(contents)
//!
//!     serialise''(SymLink target) =
//!       str("type") + str("symlink") + str("target") + str(target)
//!
//!     serialise''(Directory entries) =
//!       str("type") + str("directory") + concatMap(serialiseEntry, sortEntries(entries))
//!
//!     serialiseEntry((name, fso)) =
//!       str("entry") + str("(")
//!       + str("name") + str(name)
//!       + str("node") + serialise'(fso)
//!       + str(")")
//!
//!     str(s) = int(|s|) + pad(s)
//!
//!     int(n) = the 64-bit little endian representation of the number n
//!
//!     pad(s) = the byte sequence s, padded with 0s to a multiple of 8 bytes
//! ```

use std::io::Read;
use std::str::FromStr;

use camino::Utf8PathBuf;

use crate::dec::DecoderInner;
use crate::error::NarError;

pub enum ParseResult {
    Node(Node),
    DirectoryEntry(Utf8PathBuf, Node),
    ParenClose,
}

pub enum Node {
    Regular { executable: bool, size: u64 },
    Symlink { target: String },
    Directory,
}

impl Node {
    pub fn variant_name(&self) -> &'static str {
        match self {
            Node::Regular { .. } => "regular",
            Node::Symlink { .. } => "symlink",
            Node::Directory => "directory",
        }
    }
}

pub fn parse_paren_close<R: Read>(reader: &DecoderInner<R>) -> Result<(), NarError> {
    expect_str(reader, ")")
}

pub fn parse_next<R: Read>(reader: &DecoderInner<R>) -> Result<ParseResult, NarError> {
    let next = read_str(reader)?;
    match next.as_ref() {
        "(" => Ok(ParseResult::Node(parse_typed_node(reader)?)),
        ")" => Ok(ParseResult::ParenClose),
        "entry" => parse_directory_entry(reader),
        str => Err(NarError::ParseError(format!(
            "unexpected str in parse_next: '{str}' (expected '(', ')', or 'entry'"
        ))),
    }
}

pub fn parse_typed_node<R: Read>(reader: &DecoderInner<R>) -> Result<Node, NarError> {
    let next = read_str(reader)?;
    match next.as_ref() {
        "type" => Ok(parse_node(reader)?),
        str => Err(NarError::ParseError(format!(
            "unexpected str in parse_typed_node: '{str}' (expected 'type')"
        ))),
    }
}

pub fn parse_node<R: Read>(reader: &DecoderInner<R>) -> Result<Node, NarError> {
    let tag = read_str(reader)?;
    match tag.as_ref() {
        "regular" => {
            let executable_or_content = read_str(reader)?;
            match executable_or_content.as_ref() {
                "executable" => {
                    expect_str(reader, "")?;
                    expect_str(reader, "contents")?;
                    Ok(Node::Regular {
                        executable: true,
                        size: read_str_len(reader)?,
                    })
                }
                "contents" => Ok(Node::Regular {
                    executable: false,
                    size: read_str_len(reader)?,
                }),
                file_type => Err(NarError::ParseError(format!(
                    "unknown type of regular file: '{file_type}'",
                ))),
            }
        }
        "symlink" => {
            expect_str(reader, "target")?;
            let target = read_str(reader)?;
            expect_str(reader, ")")?;
            Ok(Node::Symlink { target })
        }
        "directory" => Ok(Node::Directory),
        top_level_tag => Err(NarError::ParseError(format!(
            "unknown top-level entry: '{top_level_tag}'",
        ))),
    }
}

pub fn parse_directory_entry<R: Read>(
    reader: &DecoderInner<R>,
) -> Result<ParseResult, NarError> {
    expect_str(reader, "(")?;
    expect_str(reader, "name")?;
    let name = read_vec(reader)?;
    let name_str = String::from_utf8(name)
        .map_err(|e| NarError::ParseError(format!("failed to parse filename: {e}",)))?;
    expect_str(reader, "node")?;
    expect_str(reader, "(")?;
    let node = parse_typed_node(reader)?;
    Ok(ParseResult::DirectoryEntry(
        Utf8PathBuf::from_str(name_str.as_str())
            .map_err(|e| NarError::ParseError(format!("failed to parse: {e}")))?,
        node,
    ))
}

pub fn read_str_len<R: Read>(mut reader: &DecoderInner<R>) -> Result<u64, NarError> {
    let mut len_buf: [u8; 8] = [0; 8];
    reader.read_exact(&mut len_buf)?;
    Ok(u64::from_le_bytes(len_buf))
}

pub fn read_str<R: Read>(reader: &DecoderInner<R>) -> Result<String, NarError> {
    let len = read_str_len(reader)?;
    read_str_with_len(reader, len)
}

pub fn read_str_with_len<R: Read>(
    mut reader: &DecoderInner<R>,
    len: u64,
) -> Result<String, NarError> {
    if len == 0 {
        return Ok(String::new());
    }
    let len_rounded_up = (len + 7) & !7;
    let mut buf: Vec<u8> = vec![0; len_rounded_up as usize];
    reader.read_exact(&mut buf)?;
    match std::str::from_utf8(&buf[0..len as usize]) {
        Err(err) => Err(NarError::ParseError(format!(
            "error interpreting '{:?}' as str: {err}",
            &buf[0..len as usize]
        ))),
        Ok(str) => {
            for i in len..len_rounded_up - 1 {
                if buf[i as usize] != 0 {
                    return Err(NarError::ParseError(format!(
                        "expected only zeroes in str padding, found '{}' at {}",
                        buf[i as usize], i
                    )));
                }
            }
            Ok(str.to_string())
        }
    }
}

pub fn read_vec<R: Read>(mut reader: &DecoderInner<R>) -> Result<Vec<u8>, NarError> {
    let len = read_str_len(reader)?;
    if len == 0 {
        return Ok("".as_bytes().into());
    }
    let len_rounded_up = (len + 7) & !7;
    let mut buf: Vec<u8> = vec![0; len_rounded_up as usize];
    reader.read_exact(&mut buf)?;
    buf.truncate(len as usize);
    Ok(buf)
}

pub fn expect_str<'a, R: Read>(
    reader: &'a DecoderInner<R>,
    expected_str: &'a str,
) -> Result<(), NarError> {
    let found_len = read_str_len(reader)?;
    if found_len as usize != expected_str.len() {
        return Err(NarError::ParseError(format!(
            "expected '{expected_str}', got a str of length {found_len}",
        )));
    }
    let found_str = read_str_with_len(reader, found_len)?;
    if found_str == expected_str {
        Ok(())
    } else {
        Err(NarError::ParseError(format!(
            "expected '{expected_str}', got: '{found_str}'",
        )))
    }
}