use std::collections::{HashMap, HashSet};
pub(super) fn normalize_label(label: &str) -> String {
let mut out = String::with_capacity(label.len());
let mut in_space = true; for ch in label.chars() {
if ch.is_whitespace() {
if !in_space {
out.push(' ');
in_space = true;
}
} else {
for lower in ch.to_lowercase() {
out.push(lower);
}
in_space = false;
}
}
if out.ends_with(' ') {
out.pop();
}
out
}
pub(super) fn slugify(text: &str) -> String {
let mut out = String::with_capacity(text.len());
let mut prev_dash = true; for ch in text.chars() {
if ch.is_alphanumeric() {
for lower in ch.to_lowercase() {
out.push(lower);
}
prev_dash = false;
} else if !prev_dash {
out.push('-');
prev_dash = true;
}
}
if out.ends_with('-') {
out.pop();
}
out
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) struct LabelChoice {
pub label: String,
pub is_new: bool,
}
type DefKey = (String, Option<String>);
#[derive(Debug, Clone)]
struct ExistingTarget {
url: String,
title: Option<String>,
}
pub(super) struct LabelGenerator {
by_label: HashMap<String, ExistingTarget>,
by_def: HashMap<DefKey, String>,
existing_defs: HashSet<DefKey>,
}
impl LabelGenerator {
pub(super) fn from_existing<I, L, U, T>(existing: I) -> Self
where
I: IntoIterator<Item = (L, U, Option<T>)>,
L: AsRef<str>,
U: AsRef<str>,
T: AsRef<str>,
{
let mut by_label: HashMap<String, ExistingTarget> = HashMap::new();
let mut by_def: HashMap<DefKey, String> = HashMap::new();
let mut existing_defs: HashSet<DefKey> = HashSet::new();
for (label, url, title) in existing {
let label = label.as_ref();
let url = url.as_ref().to_string();
let title = title.as_ref().map(|t| t.as_ref().to_string());
let normalized = normalize_label(label);
if by_label.contains_key(&normalized) {
continue;
}
by_label.insert(
normalized,
ExistingTarget {
url: url.clone(),
title: title.clone(),
},
);
let key: DefKey = (url, title);
by_def.entry(key.clone()).or_insert_with(|| label.to_string());
existing_defs.insert(key);
}
Self {
by_label,
by_def,
existing_defs,
}
}
pub(super) fn reserve_exact(&mut self, label: &str, url: &str, title: Option<&str>) -> Option<LabelChoice> {
let normalized = normalize_label(label);
let key: DefKey = (url.to_string(), title.map(str::to_string));
match self.by_label.get(&normalized) {
Some(existing) if existing.url == url && existing.title.as_deref() == title => {
let is_new = !self.existing_defs.contains(&key);
self.by_def.entry(key).or_insert_with(|| label.to_string());
Some(LabelChoice {
label: label.to_string(),
is_new,
})
}
Some(_) => None,
None => {
self.by_label.insert(
normalized,
ExistingTarget {
url: url.to_string(),
title: title.map(str::to_string),
},
);
self.by_def.entry(key).or_insert_with(|| label.to_string());
Some(LabelChoice {
label: label.to_string(),
is_new: true,
})
}
}
}
pub(super) fn label_for(&mut self, text: &str, url: &str, title: Option<&str>) -> LabelChoice {
let key: DefKey = (url.to_string(), title.map(str::to_string));
if let Some(existing) = self.by_def.get(&key) {
let is_new = !self.existing_defs.contains(&key);
return LabelChoice {
label: existing.clone(),
is_new,
};
}
let mut base = slugify(text);
if base.is_empty() {
base = slugify(url);
}
if base.is_empty() {
base = "ref".to_string();
}
let mut candidate = base.clone();
let mut suffix = 2u32;
loop {
let normalized = normalize_label(&candidate);
match self.by_label.get(&normalized) {
Some(existing) if existing.url == url && existing.title.as_deref() == title => {
let is_new = !self.existing_defs.contains(&key);
self.by_def.insert(key, candidate.clone());
return LabelChoice {
label: candidate,
is_new,
};
}
Some(_) => {
candidate = format!("{base}-{suffix}");
suffix += 1;
}
None => {
self.by_label.insert(
normalized,
ExistingTarget {
url: url.to_string(),
title: title.map(str::to_string),
},
);
self.by_def.insert(key, candidate.clone());
return LabelChoice {
label: candidate,
is_new: true,
};
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn slugify_basic() {
assert_eq!(slugify("Hello World"), "hello-world");
assert_eq!(slugify("documentation"), "documentation");
assert_eq!(slugify("API Reference Guide"), "api-reference-guide");
}
#[test]
fn slugify_punctuation_collapses_to_dash() {
assert_eq!(slugify("foo!bar"), "foo-bar");
assert_eq!(slugify("a.b.c"), "a-b-c");
assert_eq!(slugify("!!!hello!!!"), "hello");
}
#[test]
fn slugify_unicode_kept_as_alphanumeric() {
assert_eq!(slugify("café"), "café");
assert_eq!(slugify("日本語"), "日本語");
assert_eq!(slugify("hello 日本"), "hello-日本");
}
#[test]
fn slugify_empty_for_punctuation_only() {
assert_eq!(slugify(""), "");
assert_eq!(slugify("!!!"), "");
assert_eq!(slugify("---"), "");
}
#[test]
fn normalize_label_case_and_whitespace() {
assert_eq!(normalize_label("Hello World"), "hello world");
assert_eq!(normalize_label("\tfoo\nbar "), "foo bar");
assert_eq!(normalize_label("ALREADY-LOW"), "already-low");
}
fn no_existing() -> LabelGenerator {
LabelGenerator::from_existing(std::iter::empty::<(&str, &str, Option<&str>)>())
}
fn with_existing(defs: Vec<(&str, &str, Option<&str>)>) -> LabelGenerator {
LabelGenerator::from_existing(defs)
}
#[test]
fn generator_reuses_label_for_same_url_and_title() {
let mut g = no_existing();
let l1 = g.label_for("docs", "https://example.com/x", None);
let l2 = g.label_for("documentation", "https://example.com/x", None);
assert_eq!(l1.label, l2.label);
assert!(l1.is_new);
assert!(l2.is_new);
}
#[test]
fn generator_disambiguates_same_url_different_titles() {
let mut g = no_existing();
let a = g.label_for("first", "https://example.com", Some("Title A"));
let b = g.label_for("second", "https://example.com", Some("Title B"));
assert_ne!(a.label, b.label, "different titles must get different labels");
assert!(a.is_new);
assert!(b.is_new);
let c = g.label_for("third", "https://example.com", Some("Title C"));
assert_ne!(c.label, a.label);
assert_ne!(c.label, b.label);
}
#[test]
fn generator_treats_no_title_distinctly_from_empty_or_present_title() {
let mut g = no_existing();
let with_title = g.label_for("a", "https://example.com", Some("T"));
let without_title = g.label_for("a", "https://example.com", None);
assert_ne!(
with_title.label, without_title.label,
"presence vs absence of title must produce distinct labels"
);
}
#[test]
fn generator_disambiguates_collision() {
let mut g = no_existing();
let a = g.label_for("docs", "https://a.example.com", None);
let b = g.label_for("docs", "https://b.example.com", None);
assert_eq!(a.label, "docs");
assert_eq!(b.label, "docs-2");
let c = g.label_for("docs", "https://c.example.com", None);
assert_eq!(c.label, "docs-3");
}
#[test]
fn generator_respects_existing_labels() {
let mut g = with_existing(vec![("docs", "https://existing.com/docs", None)]);
let same = g.label_for("documentation", "https://existing.com/docs", None);
assert_eq!(same.label, "docs");
assert!(!same.is_new, "reusing pre-existing ref def must report is_new=false");
let diff = g.label_for("docs", "https://other.com/docs", None);
assert_eq!(diff.label, "docs-2");
assert!(diff.is_new);
}
#[test]
fn generator_falls_back_to_url_when_text_empty() {
let mut g = no_existing();
let choice = g.label_for("", "https://example.com/page", None);
assert_eq!(choice.label, "https-example-com-page");
assert!(choice.is_new);
}
#[test]
fn generator_falls_back_to_ref_when_both_empty() {
let mut g = no_existing();
let first = g.label_for("", "", None);
assert_eq!(first.label, "ref");
assert!(first.is_new);
let again = g.label_for("", "", None);
assert_eq!(again.label, "ref");
assert!(again.is_new);
}
#[test]
fn generator_normalizes_when_checking_collision() {
let mut g = with_existing(vec![("Hello World", "https://existing.com", None)]);
let choice = g.label_for("Hello World", "https://other.com", None);
assert_eq!(choice.label, "hello-world");
assert!(choice.is_new);
let mut g2 = with_existing(vec![("hello-world", "https://existing.com", None)]);
let choice2 = g2.label_for("Hello World", "https://other.com", None);
assert_eq!(choice2.label, "hello-world-2");
assert!(choice2.is_new);
}
#[test]
fn generator_marks_existing_url_match_as_not_new() {
let mut g = with_existing(vec![("site", "https://example.com", None)]);
let choice = g.label_for("docs", "https://example.com", None);
assert_eq!(choice.label, "site");
assert!(!choice.is_new);
}
#[test]
fn generator_skips_shadowed_existing_defs() {
let mut g = with_existing(vec![
("docs", "https://first.com", None),
("DOCS", "https://second.com", None),
]);
let choice = g.label_for("anchor", "https://second.com", None);
assert_ne!(choice.label, "docs");
assert_ne!(choice.label, "DOCS");
assert!(choice.is_new);
}
#[test]
fn reserve_exact_treats_title_as_part_of_identity() {
let mut g = with_existing(vec![("anchor", "https://example.com", Some("T1"))]);
assert!(g.reserve_exact("anchor", "https://example.com", Some("T2")).is_none());
let reuse = g
.reserve_exact("anchor", "https://example.com", Some("T1"))
.expect("same destination must reuse");
assert_eq!(reuse.label, "anchor");
assert!(!reuse.is_new);
}
}