use std::collections::HashMap;
use crate::error::{Error, Result};
use crate::record::RecordId;
pub const SLUG_MAX_BYTES: usize = 60;
pub fn validate_path_component(name: &str) -> Result<&str> {
if name.is_empty() {
return Err(Error::InvalidPath(String::from("<empty>")));
}
if name == "." || name == ".." {
return Err(Error::InvalidPath(name.to_owned()));
}
for b in name.bytes() {
if b == b'/' || b == 0 {
return Err(Error::InvalidPath(name.to_owned()));
}
}
Ok(name)
}
pub fn validate_record_filename(name: &str) -> Result<RecordId> {
validate_path_component(name)?;
let prefix = name
.strip_suffix(".md")
.ok_or_else(|| Error::InvalidPath(name.to_owned()))?;
if prefix.is_empty() {
return Err(Error::InvalidPath(name.to_owned()));
}
if !prefix.bytes().all(|b| b.is_ascii_digit()) {
return Err(Error::InvalidPath(name.to_owned()));
}
let n = prefix
.parse::<u64>()
.map_err(|_| Error::InvalidPath(name.to_owned()))?;
Ok(RecordId(n))
}
#[must_use]
pub fn slugify_title(title: &str) -> String {
let trimmed_input: String = title.chars().take(SLUG_MAX_BYTES * 4).collect();
let lower = trimmed_input.to_lowercase();
let mut out = String::with_capacity(lower.len().min(SLUG_MAX_BYTES * 2));
let mut last_was_dash = true;
for ch in lower.chars() {
if ch.is_ascii_alphanumeric() {
out.push(ch);
last_was_dash = false;
} else if !last_was_dash {
out.push('-');
last_was_dash = true;
}
}
while out.ends_with('-') {
out.pop();
}
if out.len() > SLUG_MAX_BYTES {
let mut end = SLUG_MAX_BYTES;
while !out.is_char_boundary(end) {
end -= 1;
}
out.truncate(end);
while out.ends_with('-') {
out.pop();
}
}
out
}
#[must_use]
pub fn slug_or_fallback(title: &str, id: RecordId) -> String {
let s = slugify_title(title);
if s.is_empty() || s == "." || s == ".." || s.chars().all(|c| c == '-') {
format!("page-{:011}", id.0)
} else {
s
}
}
#[must_use]
pub fn dedupe_siblings(mut siblings: Vec<(RecordId, String)>) -> Vec<(RecordId, String)> {
siblings.sort_by_key(|(id, _)| *id);
let mut seen: HashMap<String, u32> = HashMap::new();
let mut out = Vec::with_capacity(siblings.len());
for (id, slug) in siblings {
let n = seen.entry(slug.clone()).or_insert(0);
*n += 1;
let final_slug = if *n == 1 { slug } else { format!("{slug}-{n}") };
out.push((id, final_slug));
}
out
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn validate_issue_filename_accepts_digits_md() {
assert_eq!(validate_record_filename("0.md").unwrap().0, 0);
assert_eq!(validate_record_filename("123.md").unwrap().0, 123);
assert_eq!(validate_record_filename("00042.md").unwrap().0, 42);
}
#[test]
fn filename_is_id_derived_not_title_derived() {
for bad in [
"../etc/passwd.md",
"my bug.md",
"thing is broken.md",
"readme.md",
"abc.md",
"12a.md",
] {
let err = validate_record_filename(bad).unwrap_err();
assert!(
matches!(err, Error::InvalidPath(_)),
"{bad:?} must be rejected; got {err:?}"
);
}
}
#[test]
fn path_with_dotdot_or_nul_is_rejected() {
for bad in [
"..", ".", "", "a/b", "a\0b", "/", "\0", "/123.md", "123.md/", "\0.md",
] {
let err = validate_path_component(bad);
let err2 = validate_record_filename(bad);
assert!(
err.is_err() || err2.is_err(),
"{bad:?} must be rejected by at least one validator"
);
}
assert!(validate_path_component("..").is_err());
assert!(validate_record_filename("../123.md").is_err());
}
#[test]
fn validate_issue_filename_rejects_junk() {
for bad in [
"", ".md", "..md", "123", "123.txt", "123.md/", "/123.md", "\0.md", "123.md\n", ".",
"..",
] {
let err = validate_record_filename(bad).unwrap_err();
assert!(
matches!(err, Error::InvalidPath(_)),
"{bad:?} must be rejected"
);
}
}
#[test]
fn validate_issue_filename_rejects_overflow() {
let too_big = "999999999999999999999999999999.md";
let err = validate_record_filename(too_big).unwrap_err();
assert!(matches!(err, Error::InvalidPath(_)));
}
#[test]
fn validate_path_component_accepts_normal() {
assert_eq!(validate_path_component("foo").unwrap(), "foo");
assert_eq!(validate_path_component("foo.md").unwrap(), "foo.md");
assert_eq!(validate_path_component("issue-123").unwrap(), "issue-123");
}
#[test]
fn validate_path_component_rejects_danger() {
for bad in ["", ".", "..", "a/b", "a\0b", "/", "\0"] {
assert!(
validate_path_component(bad).is_err(),
"{bad:?} must be rejected"
);
}
}
#[test]
fn slug_simple_ascii() {
assert_eq!(slugify_title("Hello, World!"), "hello-world");
}
#[test]
fn slug_strips_leading_and_trailing_whitespace() {
assert_eq!(
slugify_title(" leading and trailing "),
"leading-and-trailing"
);
}
#[test]
fn slug_collapses_multiple_spaces() {
assert_eq!(slugify_title("multiple spaces"), "multiple-spaces");
}
#[test]
fn slug_welcome_to_reposix() {
assert_eq!(slugify_title("Welcome to reposix"), "welcome-to-reposix");
}
#[test]
fn slug_empty_input_is_empty() {
assert_eq!(slugify_title(""), "");
}
#[test]
fn slug_full_multibyte_is_empty() {
assert_eq!(slugify_title("日本語"), "");
}
#[test]
fn slug_emoji_stripped_alnum_preserved() {
assert_eq!(slugify_title("🚀 Rocket"), "rocket");
}
#[test]
fn slug_all_dashes_is_empty() {
assert_eq!(slugify_title("---"), "");
}
#[test]
fn slug_single_dot_is_empty() {
assert_eq!(slugify_title("."), "");
}
#[test]
fn slug_double_dot_is_empty() {
assert_eq!(slugify_title(".."), "");
}
#[test]
fn slug_respects_max_bytes_and_const_is_60() {
assert_eq!(SLUG_MAX_BYTES, 60);
let long = "A".repeat(100);
let s = slugify_title(&long);
assert!(
s.len() <= SLUG_MAX_BYTES,
"slug length {} exceeded SLUG_MAX_BYTES {}",
s.len(),
SLUG_MAX_BYTES
);
assert_eq!(s.len(), SLUG_MAX_BYTES);
assert!(s.chars().all(|c| c == 'a'));
}
#[test]
fn slug_non_ascii_alphanumeric_becomes_separator() {
assert_eq!(slugify_title("a-\u{00e9}-b"), "a-b");
}
#[test]
fn slug_truncation_is_char_boundary_safe_on_long_alpha() {
let long = "A".repeat(100);
let _ = slugify_title(&long);
}
#[test]
fn slug_truncation_trims_trailing_dash_after_cut() {
let input = format!("{}-tail", "a".repeat(59));
let s = slugify_title(&input);
assert!(!s.ends_with('-'), "slug must not end with '-': {s:?}");
assert!(s.len() <= SLUG_MAX_BYTES);
}
#[test]
fn slug_is_ascii_alnum_dash_only_over_adversarial_inputs() {
let adversarial = [
"../../../etc/passwd",
"foo/bar",
"foo\0bar",
"$(rm -rf /)",
"`whoami`",
"hello;ls",
"\u{202e}reverse", "tab\there",
];
for input in adversarial {
let s = slugify_title(input);
assert!(
s.chars().all(|c| c.is_ascii_alphanumeric() || c == '-'),
"slugify_title({input:?}) = {s:?} contains forbidden char"
);
assert!(
s != "." && s != "..",
"slug must not be '.' or '..'; got {s:?} from {input:?}"
);
assert!(!s.contains('/'), "slug must not contain '/'; got {s:?}");
assert!(!s.contains('\0'), "slug must not contain NUL; got {s:?}");
}
}
#[test]
fn fallback_on_empty_input() {
assert_eq!(slug_or_fallback("", RecordId(42)), "page-00000000042");
}
#[test]
fn fallback_on_all_dashes() {
assert_eq!(slug_or_fallback("---", RecordId(7)), "page-00000000007");
}
#[test]
fn fallback_on_double_dot() {
assert_eq!(slug_or_fallback("..", RecordId(3)), "page-00000000003");
}
#[test]
fn fallback_on_all_multibyte() {
assert_eq!(
slug_or_fallback("日本語", RecordId(100)),
"page-00000000100"
);
}
#[test]
fn fallback_passthrough_for_nonempty_slug() {
assert_eq!(slug_or_fallback("Welcome", RecordId(1)), "welcome");
}
#[test]
fn dedupe_assigns_suffix_to_lower_id_first() {
let input = vec![
(RecordId(5), "foo".to_owned()),
(RecordId(3), "foo".to_owned()),
(RecordId(4), "bar".to_owned()),
];
let got = dedupe_siblings(input);
assert_eq!(
got,
vec![
(RecordId(3), "foo".to_owned()),
(RecordId(4), "bar".to_owned()),
(RecordId(5), "foo-2".to_owned()),
]
);
}
#[test]
fn dedupe_three_colliders_get_ascending_suffixes() {
let input = vec![
(RecordId(30), "same".to_owned()),
(RecordId(10), "same".to_owned()),
(RecordId(20), "same".to_owned()),
];
let got = dedupe_siblings(input);
assert_eq!(
got,
vec![
(RecordId(10), "same".to_owned()),
(RecordId(20), "same-2".to_owned()),
(RecordId(30), "same-3".to_owned()),
]
);
}
#[test]
fn dedupe_empty_input_is_empty() {
let got = dedupe_siblings(Vec::new());
assert!(got.is_empty());
}
#[test]
fn dedupe_preserves_all_entries() {
let input: Vec<(RecordId, String)> = (1..=10)
.map(|i| (RecordId(i), format!("slug-{}", i % 3)))
.collect();
let got = dedupe_siblings(input.clone());
assert_eq!(got.len(), input.len(), "no entries may be dropped");
let mut ids: Vec<u64> = got.iter().map(|(id, _)| id.0).collect();
ids.sort_unstable();
assert_eq!(ids, (1..=10).collect::<Vec<_>>());
}
#[test]
fn dedupe_is_deterministic() {
let input = vec![
(RecordId(2), "foo".to_owned()),
(RecordId(1), "foo".to_owned()),
(RecordId(4), "bar".to_owned()),
(RecordId(3), "foo".to_owned()),
];
let a = dedupe_siblings(input.clone());
let b = dedupe_siblings(input);
assert_eq!(a, b, "dedupe must be deterministic");
}
}