use super::{PackValidationError, Validator};
use crate::pack::{ChildRef, PackManifest};
pub(crate) struct ChildPathValidator;
impl Validator for ChildPathValidator {
fn name(&self) -> &'static str {
"child_path_bare_name"
}
fn check(&self, pack: &PackManifest) -> Vec<PackValidationError> {
let mut errs = Vec::new();
for child in &pack.children {
if let Some(err) = check_one(child) {
errs.push(err);
}
}
errs
}
}
#[must_use]
pub(crate) fn check_one(child: &ChildRef) -> Option<PackValidationError> {
let (effective, attribution) = match child.path.as_deref() {
Some(p) => (p.to_string(), Attribution::Explicit(p.to_string())),
None => (child.effective_path(), Attribution::UrlDerived(child.url.clone())),
};
let reason = reject_reason(&effective)?;
let (child_name, path) = match attribution {
Attribution::Explicit(label) => (label.clone(), label),
Attribution::UrlDerived(url) => (url, effective),
};
Some(PackValidationError::ChildPathInvalid { child_name, path, reason: reason.to_string() })
}
enum Attribution {
Explicit(String),
UrlDerived(String),
}
pub(crate) struct DupChildPathValidator;
impl Validator for DupChildPathValidator {
fn name(&self) -> &'static str {
"child_path_no_duplicates"
}
fn check(&self, pack: &PackManifest) -> Vec<PackValidationError> {
use std::collections::BTreeMap;
let mut by_path: BTreeMap<String, Vec<String>> = BTreeMap::new();
for child in &pack.children {
let effective = child.effective_path();
if reject_reason(&effective).is_some() {
continue;
}
by_path.entry(effective).or_default().push(child.url.clone());
}
let mut errs = Vec::new();
for (path, urls) in by_path {
if urls.len() >= 2 {
errs.push(PackValidationError::ChildPathDuplicate { path, urls });
}
}
errs
}
}
pub(crate) fn reject_reason(path: &str) -> Option<&'static str> {
if path.is_empty() {
return Some("empty string is not a valid child path");
}
if path.contains('/') || path.contains('\\') {
return Some("path separators are not allowed (children[].path must be a bare name)");
}
if path == "." || path == ".." {
return Some("`.` and `..` are not allowed (children[].path must be a bare name)");
}
if !matches_bare_name_regex(path) {
return Some(
"must match `^[a-z][a-z0-9-]*$` (letter-led, lowercase, digits and hyphens allowed)",
);
}
None
}
fn matches_bare_name_regex(s: &str) -> bool {
let mut chars = s.chars();
match chars.next() {
Some(c) if c.is_ascii_lowercase() => {}
_ => return false,
}
chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-')
}
const WINDOWS_RESERVED: &[&str] = &[
"CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8",
"COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
];
#[must_use]
pub(crate) fn boundary_reject_reason(path: &str) -> Option<&'static str> {
if path.contains(':') {
return Some("colon `:` is not allowed in a child path (Windows drive / ADS hazard)");
}
if path.contains('$') {
return Some("dollar `$` is not allowed in a child path (env-var interpolation hazard)");
}
if has_tilde_digit_pattern(path) {
return Some("tilde-digit (`~1`/`~9`/...) is not allowed (Windows short-name hazard)");
}
if is_windows_reserved_name(path) {
return Some(
"child path is a Windows reserved device name (CON/PRN/AUX/NUL/COM1-9/LPT1-9)",
);
}
None
}
fn has_tilde_digit_pattern(path: &str) -> bool {
let bytes = path.as_bytes();
bytes
.iter()
.enumerate()
.any(|(i, &b)| b == b'~' && bytes.get(i + 1).is_some_and(u8::is_ascii_digit))
}
fn is_windows_reserved_name(path: &str) -> bool {
let stem = path.split('.').next().unwrap_or(path);
WINDOWS_RESERVED.iter().any(|reserved| stem.eq_ignore_ascii_case(reserved))
}
#[must_use]
pub(crate) fn nfc_duplicate_path(children: &[ChildRef]) -> Option<String> {
use std::collections::BTreeSet;
use unicode_normalization::UnicodeNormalization;
let mut seen: BTreeSet<String> = BTreeSet::new();
for child in children {
let effective = child.effective_path();
if effective.is_empty() || effective == "." || effective == ".." {
continue;
}
let nfc: String = effective.nfc().collect();
if !seen.insert(nfc) {
return Some(effective);
}
}
None
}
#[must_use]
pub(crate) fn boundary_fs_reject_reason(dest: &std::path::Path) -> Option<&'static str> {
let Ok(meta) = std::fs::symlink_metadata(dest) else {
return None;
};
let ft = meta.file_type();
if ft.is_symlink() {
return Some(
"child destination is a symlink — refusing to walk into it (boundary escape hazard)",
);
}
#[cfg(target_os = "windows")]
{
if is_windows_reparse_point(&meta) {
return Some(
"child destination is a Windows junction or reparse point — refusing to walk into it",
);
}
}
let git_entry = dest.join(".git");
if let Ok(git_meta) = std::fs::symlink_metadata(&git_entry) {
if git_meta.file_type().is_file() && file_is_gitfile(&git_entry) {
return Some(
"child destination's `.git` is a gitfile redirect (boundary escape hazard)",
);
}
}
None
}
#[cfg(target_os = "windows")]
fn is_windows_reparse_point(meta: &std::fs::Metadata) -> bool {
use std::os::windows::fs::MetadataExt;
const FILE_ATTRIBUTE_REPARSE_POINT: u32 = 0x0000_0400;
(meta.file_attributes() & FILE_ATTRIBUTE_REPARSE_POINT) != 0
}
fn file_is_gitfile(path: &std::path::Path) -> bool {
use std::io::Read;
let Ok(mut f) = std::fs::File::open(path) else { return false };
let mut buf = [0u8; 32];
let n = match f.read(&mut buf) {
Ok(n) => n,
Err(_) => return false,
};
let prefix = match std::str::from_utf8(&buf[..n]) {
Ok(s) => s.trim_start(),
Err(_) => return false,
};
prefix.starts_with("gitdir:")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::pack::{ChildRef, PackManifest, PackType, SchemaVersion};
use std::collections::BTreeMap;
fn pack_with_child_paths(paths: &[&str]) -> PackManifest {
let children = paths
.iter()
.map(|p| ChildRef {
url: format!("https://example.invalid/{p}"),
path: Some((*p).to_string()),
r#ref: None,
})
.collect();
PackManifest {
schema_version: SchemaVersion::current(),
name: "p".to_string(),
r#type: PackType::Meta,
version: None,
depends_on: Vec::new(),
children,
actions: Vec::new(),
teardown: None,
extensions: BTreeMap::new(),
}
}
fn validate_path(path: &str) -> Vec<PackValidationError> {
ChildPathValidator.check(&pack_with_child_paths(&[path]))
}
#[test]
fn rejection_table() {
let cases: &[(&str, &str)] = &[
("", "empty"),
("foo/bar", "separator"),
("foo\\bar", "separator"),
("/abs", "separator"),
("../escape", "separator"),
(".", "`.` and `..`"),
("..", "`.` and `..`"),
("Foo", "`^[a-z]"),
("1foo", "letter-led"),
];
for (input, expected_reason_substr) in cases {
let errs = validate_path(input);
assert_eq!(errs.len(), 1, "input {input:?}");
match &errs[0] {
PackValidationError::ChildPathInvalid { path, reason, .. } => {
assert_eq!(path, input, "input {input:?}");
assert!(
reason.contains(expected_reason_substr),
"input {input:?} reason: {reason}",
);
}
other => panic!("input {input:?} wrong variant: {other:?}"),
}
}
}
#[test]
fn accept_table() {
for ok in ["foo", "a", "algo-leet", "foo-bar", "foo123", "a1-b2"] {
assert!(validate_path(ok).is_empty(), "input {ok:?} should accept");
}
}
#[test]
fn url_derived_tail_is_validated_when_path_absent() {
let ok = PackManifest {
schema_version: SchemaVersion::current(),
name: "p".to_string(),
r#type: PackType::Meta,
version: None,
depends_on: Vec::new(),
children: vec![ChildRef {
url: "https://example.invalid/foo.git".to_string(),
path: None,
r#ref: None,
}],
actions: Vec::new(),
teardown: None,
extensions: BTreeMap::new(),
};
assert!(ChildPathValidator.check(&ok).is_empty());
let bad = PackManifest {
schema_version: SchemaVersion::current(),
name: "p".to_string(),
r#type: PackType::Meta,
version: None,
depends_on: Vec::new(),
children: vec![ChildRef {
url: "https://example.invalid/...git".to_string(),
path: None,
r#ref: None,
}],
actions: Vec::new(),
teardown: None,
extensions: BTreeMap::new(),
};
let errs = ChildPathValidator.check(&bad);
assert_eq!(errs.len(), 1, "errs: {errs:?}");
match &errs[0] {
PackValidationError::ChildPathInvalid { child_name, path, .. } => {
assert_eq!(child_name, "https://example.invalid/...git");
assert_eq!(path, "..");
}
other => panic!("wrong variant: {other:?}"),
}
}
#[test]
fn aggregates_errors_across_multiple_children() {
let pack = pack_with_child_paths(&["good", "foo/bar", "..", "ALSO-BAD"]);
let errs = ChildPathValidator.check(&pack);
assert_eq!(errs.len(), 3, "errs: {errs:?}");
}
fn pack_with_children(entries: &[(&str, Option<&str>)]) -> PackManifest {
let children = entries
.iter()
.map(|(url, path)| ChildRef {
url: (*url).to_string(),
path: path.map(str::to_string),
r#ref: None,
})
.collect();
PackManifest {
schema_version: SchemaVersion::current(),
name: "p".to_string(),
r#type: PackType::Meta,
version: None,
depends_on: Vec::new(),
children,
actions: Vec::new(),
teardown: None,
extensions: BTreeMap::new(),
}
}
#[test]
fn dup_validator_passes_on_distinct_paths() {
let pack =
pack_with_children(&[("https://x/a.git", Some("a")), ("https://x/b.git", Some("b"))]);
assert!(DupChildPathValidator.check(&pack).is_empty());
}
#[test]
fn dup_validator_flags_two_children_at_same_explicit_path() {
let pack = pack_with_children(&[
("https://x/a.git", Some("foo")),
("https://y/b.git", Some("foo")),
]);
let errs = DupChildPathValidator.check(&pack);
assert_eq!(errs.len(), 1, "errs: {errs:?}");
match &errs[0] {
PackValidationError::ChildPathDuplicate { path, urls } => {
assert_eq!(path, "foo");
assert_eq!(urls.len(), 2);
assert!(urls.contains(&"https://x/a.git".to_string()));
assert!(urls.contains(&"https://y/b.git".to_string()));
}
other => panic!("wrong variant: {other:?}"),
}
}
#[test]
fn dup_validator_collides_explicit_path_with_url_tail() {
let pack = pack_with_children(&[
("https://x/foo.git", None),
("https://y/elsewhere.git", Some("foo")),
]);
let errs = DupChildPathValidator.check(&pack);
assert_eq!(errs.len(), 1, "errs: {errs:?}");
match &errs[0] {
PackValidationError::ChildPathDuplicate { path, urls } => {
assert_eq!(path, "foo");
assert_eq!(urls.len(), 2);
}
other => panic!("wrong variant: {other:?}"),
}
}
#[test]
fn dup_validator_skips_children_with_invalid_path() {
let pack = pack_with_children(&[
("https://x/a.git", Some("../escape")),
("https://x/b.git", Some("good")),
]);
assert!(DupChildPathValidator.check(&pack).is_empty());
}
#[test]
fn test_validator_rejects_colon_in_segment() {
let reason = boundary_reject_reason("child:foo")
.expect("colon must be rejected as a boundary-preservation hazard");
assert!(
reason.to_ascii_lowercase().contains("colon"),
"reason should mention `colon`: {reason}",
);
}
#[test]
fn test_validator_rejects_dollar_in_segment() {
let reason = boundary_reject_reason("$home")
.expect("dollar must be rejected as a boundary-preservation hazard");
assert!(
reason.contains('$') || reason.to_ascii_lowercase().contains("dollar"),
"reason should mention `$`/dollar: {reason}",
);
}
#[test]
fn test_validator_rejects_tilde_digit_segment() {
for bad in ["foo~1", "bar~9", "x~12", "abc~3"] {
let reason = boundary_reject_reason(bad)
.unwrap_or_else(|| panic!("`{bad}` must be rejected (Windows 8.3 short-name)"));
assert!(
reason.contains('~') || reason.to_ascii_lowercase().contains("short"),
"reason should mention `~`/short-name: {reason}",
);
}
}
#[test]
fn test_validator_accepts_tilde_without_digit() {
assert!(boundary_reject_reason("foo~bar").is_none());
}
#[test]
fn test_validator_rejects_windows_reserved_name_bare() {
for variant in ["CON", "con", "Con", "PRN", "prn", "AUX", "NUL", "COM1", "com9", "LPT5"] {
let reason = boundary_reject_reason(variant)
.unwrap_or_else(|| panic!("`{variant}` must be rejected (Windows reserved)"));
assert!(
reason.to_ascii_lowercase().contains("reserved")
|| reason.to_ascii_lowercase().contains("windows"),
"reason should mention `reserved`/`windows` for {variant}: {reason}",
);
}
}
#[test]
fn test_validator_rejects_windows_reserved_name_with_ext() {
for variant in ["con.txt", "CON.TXT", "nul.dat", "lpt1.log", "com3.bak"] {
let reason = boundary_reject_reason(variant)
.unwrap_or_else(|| panic!("`{variant}` must be rejected (Windows reserved + ext)"));
assert!(
reason.to_ascii_lowercase().contains("reserved")
|| reason.to_ascii_lowercase().contains("windows"),
"reason should mention `reserved`/`windows` for {variant}: {reason}",
);
}
}
#[test]
fn test_validator_accepts_windows_reserved_name_as_substring() {
for ok in ["concert", "console", "comic", "lpton", "auxiliary", "nullable"] {
assert!(
boundary_reject_reason(ok).is_none(),
"`{ok}` is a normal name, must NOT be flagged as Windows-reserved",
);
}
}
#[test]
fn test_validator_accepts_clean_paths() {
for ok in ["foo", "a", "algo-leet", "foo-bar", "foo123", "a1-b2", "pkg-name"] {
assert!(boundary_reject_reason(ok).is_none(), "`{ok}` should pass boundary check",);
}
}
#[test]
fn test_validator_rejects_unicode_nfc_duplicate() {
let nfc = "caf\u{00e9}";
let nfd = "cafe\u{0301}";
let children = vec![
ChildRef {
url: "https://x/a.git".to_string(),
path: Some(nfc.to_string()),
r#ref: None,
},
ChildRef {
url: "https://x/b.git".to_string(),
path: Some(nfd.to_string()),
r#ref: None,
},
];
let dup = nfc_duplicate_path(&children)
.expect("NFC vs NFD siblings must be flagged as a duplicate");
assert!(
dup == nfc || dup == nfd,
"duplicate path must be one of the offending pair, got {dup:?}",
);
}
#[test]
fn test_validator_accepts_distinct_unicode_paths() {
let children = vec![
ChildRef {
url: "https://x/a.git".to_string(),
path: Some("caf\u{00e9}".to_string()),
r#ref: None,
},
ChildRef {
url: "https://x/b.git".to_string(),
path: Some("cafe".to_string()),
r#ref: None,
},
];
assert!(nfc_duplicate_path(&children).is_none());
}
#[test]
fn test_validator_fs_accepts_nonexistent_path() {
let outer = tempfile::tempdir().unwrap();
let dest = outer.path().join("not-yet-cloned");
assert!(boundary_fs_reject_reason(&dest).is_none());
}
#[test]
fn test_validator_fs_accepts_plain_directory() {
let outer = tempfile::tempdir().unwrap();
let dest = outer.path().join("plain-dir");
std::fs::create_dir(&dest).unwrap();
assert!(boundary_fs_reject_reason(&dest).is_none());
}
#[test]
fn test_validator_rejects_gitfile_reference() {
let outer = tempfile::tempdir().unwrap();
let dest = outer.path().join("gitfile-child");
std::fs::create_dir(&dest).unwrap();
std::fs::write(dest.join(".git"), "gitdir: ../elsewhere/.git\n").unwrap();
let reason = boundary_fs_reject_reason(&dest)
.expect("gitfile-style `.git` reference must be rejected");
assert!(
reason.to_ascii_lowercase().contains("gitfile")
|| reason.to_ascii_lowercase().contains(".git"),
"reason should mention `.git`/`gitfile`: {reason}",
);
}
#[test]
fn test_validator_accepts_gitdir_directory() {
let outer = tempfile::tempdir().unwrap();
let dest = outer.path().join("normal-clone");
std::fs::create_dir_all(dest.join(".git")).unwrap();
assert!(boundary_fs_reject_reason(&dest).is_none());
}
#[cfg(target_os = "windows")]
#[test]
fn test_validator_rejects_ntfs_reparse_point() {
let outer = tempfile::tempdir().unwrap();
let real = outer.path().join("real-target");
std::fs::create_dir(&real).unwrap();
let link = outer.path().join("via-reparse");
if std::os::windows::fs::symlink_dir(&real, &link).is_err() {
return;
}
let reason =
boundary_fs_reject_reason(&link).expect("Windows reparse-point dest must be rejected");
assert!(
reason.to_ascii_lowercase().contains("reparse")
|| reason.to_ascii_lowercase().contains("symlink")
|| reason.to_ascii_lowercase().contains("junction"),
"reason should mention reparse/symlink/junction: {reason}",
);
}
#[cfg(not(target_os = "windows"))]
#[test]
fn test_validator_ntfs_reparse_point_stub_non_windows() {
let outer = tempfile::tempdir().unwrap();
let dest = outer.path().join("missing");
assert!(boundary_fs_reject_reason(&dest).is_none());
}
}