pub mod modification;
use std::{borrow::Cow, cmp::Ordering, ffi::OsStr, fmt::Debug, path::Path};
#[cfg(not(windows))]
use std::fmt::Write;
#[cfg(not(windows))]
use std::num::ParseIntError;
#[cfg(not(windows))]
use std::os::unix::ffi::OsStrExt;
use derive_more::Constructor;
use jiff::Timestamp;
use serde_aux::prelude::*;
use serde_derive::{Deserialize, Serialize};
use serde_with::{
DefaultOnNull,
base64::{Base64, Standard},
formats::Padded,
serde_as, skip_serializing_none,
};
use crate::blob::{DataId, tree::TreeId};
use crate::repofile::RusticTime;
#[cfg(not(windows))]
#[derive(thiserror::Error, Debug, displaydoc::Display)]
#[non_exhaustive]
pub enum NodeErrorKind<'a> {
#[cfg(not(windows))]
UnexpectedEOF {
file_name: String,
},
#[cfg(not(windows))]
InvalidUnicode {
file_name: String,
},
#[cfg(not(windows))]
UnrecognizedEscape {
file_name: String,
},
#[cfg(not(windows))]
ParsingHexFailed {
file_name: String,
hex: String,
chars: std::str::Chars<'a>,
source: ParseIntError,
},
#[cfg(not(windows))]
ParsingUnicodeFailed {
file_name: String,
target: String,
chars: std::str::Chars<'a>,
source: ParseIntError,
},
}
#[cfg(not(windows))]
pub(crate) type NodeResult<'a, T> = Result<T, NodeErrorKind<'a>>;
#[derive(
Default, Serialize, Deserialize, Clone, Debug, PartialEq, Eq, Constructor, PartialOrd, Ord,
)]
pub struct Node {
pub name: String,
#[serde(flatten)]
pub node_type: NodeType,
#[serde(flatten)]
pub meta: Metadata,
#[serde(default, deserialize_with = "deserialize_default_from_null")]
pub content: Option<Vec<DataId>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub subtree: Option<TreeId>,
}
#[serde_as]
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, strum::Display)]
#[serde(tag = "type", rename_all = "lowercase")]
#[derive(Default)]
pub enum NodeType {
#[strum(to_string = "file")]
#[default]
File,
#[strum(to_string = "dir")]
Dir,
#[strum(to_string = "symlink:{linktarget}")]
Symlink {
linktarget: String,
#[serde_as(as = "DefaultOnNull<Option<Base64::<Standard,Padded>>>")]
#[serde(default, skip_serializing_if = "Option::is_none")]
linktarget_raw: Option<Vec<u8>>,
},
#[strum(to_string = "dev:{device}")]
Dev {
#[serde(default)]
device: u64,
},
#[strum(to_string = "chardev:{device}")]
Chardev {
#[serde(default)]
device: u64,
},
#[strum(to_string = "fifo")]
Fifo,
#[strum(to_string = "socket")]
Socket,
}
impl NodeType {
#[cfg(not(windows))]
#[must_use]
pub fn from_link(target: &Path) -> Self {
let (linktarget, linktarget_raw) = target.to_str().map_or_else(
|| {
(
target.as_os_str().to_string_lossy().to_string(),
Some(target.as_os_str().as_bytes().to_vec()),
)
},
|t| (t.to_string(), None),
);
Self::Symlink {
linktarget,
linktarget_raw,
}
}
#[cfg(windows)]
#[must_use]
pub fn from_link(target: &Path) -> Self {
Self::Symlink {
linktarget: target.as_os_str().to_string_lossy().to_string(),
linktarget_raw: None,
}
}
#[cfg(not(windows))]
#[must_use]
pub fn to_link(&self) -> &Path {
match self {
Self::Symlink {
linktarget,
linktarget_raw,
} => linktarget_raw.as_ref().map_or_else(
|| Path::new(linktarget),
|t| Path::new(OsStr::from_bytes(t)),
),
_ => panic!("called method to_link on non-symlink!"),
}
}
#[cfg(windows)]
#[must_use]
pub fn to_link(&self) -> &Path {
match self {
Self::Symlink { linktarget, .. } => Path::new(linktarget),
_ => panic!("called method to_link on non-symlink!"),
}
}
}
#[serde_as]
#[skip_serializing_none]
#[serde_with::apply(
u64 => #[serde(default, skip_serializing_if = "is_default")],
)]
#[derive(Serialize, Deserialize, Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct Metadata {
pub mode: Option<u32>,
#[serde_as(as = "Option<RusticTime>")]
pub mtime: Option<Timestamp>,
#[serde_as(as = "Option<RusticTime>")]
pub atime: Option<Timestamp>,
#[serde_as(as = "Option<RusticTime>")]
pub ctime: Option<Timestamp>,
pub uid: Option<u32>,
pub gid: Option<u32>,
pub user: Option<String>,
pub group: Option<String>,
pub inode: u64,
pub device_id: u64,
pub size: u64,
pub links: u64,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub extended_attributes: Vec<ExtendedAttribute>,
}
pub(crate) fn is_default<T: Default + PartialEq>(t: &T) -> bool {
t == &T::default()
}
#[serde_as]
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Ord)]
pub struct ExtendedAttribute {
pub name: String,
#[serde_as(as = "DefaultOnNull<Option<Base64::<Standard,Padded>>>")]
pub value: Option<Vec<u8>>,
}
impl Node {
#[must_use]
pub fn new_node(name: &OsStr, node_type: NodeType, meta: Metadata) -> Self {
Self {
name: escape_filename(name),
node_type,
content: None,
subtree: None,
meta,
}
}
#[must_use]
pub const fn is_dir(&self) -> bool {
matches!(self.node_type, NodeType::Dir)
}
#[must_use]
pub const fn is_symlink(&self) -> bool {
matches!(self.node_type, NodeType::Symlink { .. })
}
#[must_use]
pub const fn is_file(&self) -> bool {
matches!(self.node_type, NodeType::File)
}
#[must_use]
pub const fn is_special(&self) -> bool {
matches!(
self.node_type,
NodeType::Symlink { .. }
| NodeType::Dev { .. }
| NodeType::Chardev { .. }
| NodeType::Fifo
| NodeType::Socket
)
}
#[must_use]
pub fn name(&self) -> Cow<'_, OsStr> {
unescape_filename(&self.name).unwrap_or_else(|_| Cow::Borrowed(OsStr::new(&self.name)))
}
}
#[must_use]
pub fn last_modified_node(n1: &Node, n2: &Node) -> Ordering {
n1.meta.mtime.cmp(&n2.meta.mtime)
}
#[cfg(windows)]
fn escape_filename(name: &OsStr) -> String {
name.to_string_lossy().to_string()
}
#[cfg(windows)]
fn unescape_filename(s: &str) -> Result<Cow<'_, OsStr>, core::convert::Infallible> {
Ok(Cow::Borrowed(OsStr::new(s)))
}
#[cfg(not(windows))]
fn escape_filename(name: &OsStr) -> String {
let mut input = name.as_bytes();
let mut s = String::with_capacity(name.len());
let push = |s: &mut String, p: &str| {
for c in p.chars() {
match c {
'\\' => s.push_str("\\\\"),
'\"' => s.push_str("\\\""),
'\u{7}' => s.push_str("\\a"),
'\u{8}' => s.push_str("\\b"),
'\u{c}' => s.push_str("\\f"),
'\n' => s.push_str("\\n"),
'\r' => s.push_str("\\r"),
'\t' => s.push_str("\\t"),
'\u{b}' => s.push_str("\\v"),
c => s.push(c),
}
}
};
loop {
match std::str::from_utf8(input) {
Ok(valid) => {
push(&mut s, valid);
break;
}
Err(error) => {
let (valid, after_valid) = input.split_at(error.valid_up_to());
push(&mut s, std::str::from_utf8(valid).unwrap());
if let Some(invalid_sequence_length) = error.error_len() {
for b in &after_valid[..invalid_sequence_length] {
write!(s, "\\x{b:02x}").unwrap();
}
input = &after_valid[invalid_sequence_length..];
} else {
for b in after_valid {
write!(s, "\\x{b:02x}").unwrap();
}
break;
}
}
}
}
s
}
#[cfg(not(windows))]
fn unescape_filename(s: &str) -> NodeResult<'_, Cow<'_, OsStr>> {
if !s.contains('\\') {
return Ok(Cow::Borrowed(OsStr::new(s)));
}
let mut chars = s.chars();
let mut u = Vec::with_capacity(s.len());
loop {
match chars.next() {
None => break,
Some(c) => {
if c == '\\' {
match chars.next() {
None => {
return Err(NodeErrorKind::UnexpectedEOF {
file_name: s.to_string(),
});
}
Some(c) => match c {
'\\' => u.push(b'\\'),
'"' => u.push(b'"'),
'\'' => u.push(b'\''),
'`' => u.push(b'`'),
'a' => u.push(b'\x07'),
'b' => u.push(b'\x08'),
'f' => u.push(b'\x0c'),
'n' => u.push(b'\n'),
'r' => u.push(b'\r'),
't' => u.push(b'\t'),
'v' => u.push(b'\x0b'),
'x' => {
let hex = take(&mut chars, 2);
u.push(u8::from_str_radix(&hex, 16).map_err(|err| {
NodeErrorKind::ParsingHexFailed {
file_name: s.to_string(),
hex: hex.clone(),
chars: chars.clone(),
source: err,
}
})?);
}
'u' => {
let n = u32::from_str_radix(&take(&mut chars, 4), 16).map_err(
|err| NodeErrorKind::ParsingUnicodeFailed {
file_name: s.to_string(),
target: "u32".to_string(),
chars: chars.clone(),
source: err,
},
)?;
let c = std::char::from_u32(n).ok_or_else(|| {
NodeErrorKind::InvalidUnicode {
file_name: s.to_string(),
}
})?;
let mut bytes = vec![0u8; c.len_utf8()];
_ = c.encode_utf8(&mut bytes);
u.extend_from_slice(&bytes);
}
'U' => {
let n = u32::from_str_radix(&take(&mut chars, 8), 16).map_err(
|err| NodeErrorKind::ParsingUnicodeFailed {
file_name: s.to_string(),
target: "u32".to_string(),
chars: chars.clone(),
source: err,
},
)?;
let c = std::char::from_u32(n).ok_or_else(|| {
NodeErrorKind::InvalidUnicode {
file_name: s.to_string(),
}
})?;
let mut bytes = vec![0u8; c.len_utf8()];
_ = c.encode_utf8(&mut bytes);
u.extend_from_slice(&bytes);
}
_ => {
return Err(NodeErrorKind::UnrecognizedEscape {
file_name: s.to_string(),
});
}
},
}
} else {
let mut bytes = vec![0u8; c.len_utf8()];
_ = c.encode_utf8(&mut bytes);
u.extend_from_slice(&bytes);
}
}
}
}
Ok(Cow::Owned(OsStr::from_bytes(&u).to_os_string()))
}
#[cfg(not(windows))]
#[inline]
fn take<I: Iterator<Item = char>>(iterator: &mut I, n: usize) -> String {
let mut s = String::with_capacity(n);
for _ in 0..n {
s.push(iterator.next().unwrap_or_default());
}
s
}
#[cfg(not(windows))]
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
use rstest::rstest;
proptest! {
#[test]
fn escape_unescape_is_identity(bytes in prop::collection::vec(prop::num::u8::ANY, 0..65536)) {
let name = OsStr::from_bytes(&bytes);
let escaped = escape_filename(name);
prop_assert_eq!(name, unescape_filename(escaped.as_ref()).unwrap());
}
}
#[rstest]
#[case(b"\\", r#"\\"#)]
#[case(b"\"", r#"\""#)]
#[case(b"'", r#"'"#)]
#[case(b"`", r#"`"#)]
#[case(b"\x07", r#"\a"#)]
#[case(b"\x08", r#"\b"#)]
#[case(b"\x0b", r#"\v"#)]
#[case(b"\x0c", r#"\f"#)]
#[case(b"\n", r#"\n"#)]
#[case(b"\r", r#"\r"#)]
#[case(b"\t", r#"\t"#)]
#[case(b"\xab", r#"\xab"#)]
#[case(b"\xc2", r#"\xc2"#)]
#[case(b"\xff", r#"\xff"#)]
#[case(b"\xc3\x9f", "\u{00df}")]
#[case(b"\xe2\x9d\xa4", "\u{2764}")]
#[case(b"\xf0\x9f\x92\xaf", "\u{01f4af}")]
fn escape_cases(#[case] input: &[u8], #[case] expected: &str) {
let name = OsStr::from_bytes(input);
assert_eq!(expected, escape_filename(name));
}
#[rstest]
#[case(r#"\\"#, b"\\")]
#[case(r#"\""#, b"\"")]
#[case(r#"\'"#, b"\'")]
#[case(r#"\`"#, b"`")]
#[case(r#"\a"#, b"\x07")]
#[case(r#"\b"#, b"\x08")]
#[case(r#"\v"#, b"\x0b")]
#[case(r#"\f"#, b"\x0c")]
#[case(r#"\n"#, b"\n")]
#[case(r#"\r"#, b"\r")]
#[case(r#"\t"#, b"\t")]
#[case(r#"\xab"#, b"\xab")]
#[case(r#"\xAB"#, b"\xab")]
#[case(r#"\xFF"#, b"\xff")]
#[case(r#"\u00df"#, b"\xc3\x9f")]
#[case(r#"\u00DF"#, b"\xc3\x9f")]
#[case(r#"\u2764"#, b"\xe2\x9d\xa4")]
#[case(r#"\U0001f4af"#, b"\xf0\x9f\x92\xaf")]
fn unescape_cases(#[case] input: &str, #[case] expected: &[u8]) {
let expected = OsStr::from_bytes(expected);
assert_eq!(expected, unescape_filename(input).unwrap());
}
proptest! {
#[test]
fn from_link_to_link_is_identity(bytes in prop::collection::vec(prop::num::u8::ANY, 0..65536)) {
let path = Path::new(OsStr::from_bytes(&bytes));
let node = NodeType::from_link(path);
prop_assert_eq!(path, node.to_link());
}
}
}