use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, LazyLock, RwLock};
use crate::transliterate;
use crate::limits::{MAX_REGEX_DFA_BYTES, MAX_REGEX_PATTERN_BYTES};
const MAX_ENTITY_DIGITS: usize = 10;
fn compile_regex(pattern: &str) -> Result<regex::Regex, crate::ErrorRepr> {
if pattern.len() > MAX_REGEX_PATTERN_BYTES {
return Err(crate::ErrorRepr::RegexTooLong {
len: pattern.len(),
max: MAX_REGEX_PATTERN_BYTES,
});
}
regex::RegexBuilder::new(pattern)
.size_limit(MAX_REGEX_DFA_BYTES)
.build()
.map_err(|e| crate::ErrorRepr::RegexCompile {
pattern: pattern.to_owned(),
source: e,
})
}
const REGEX_CACHE_MAX: usize = 32;
static REGEX_CACHE: LazyLock<RwLock<HashMap<String, regex::Regex>>> =
LazyLock::new(|| RwLock::new(HashMap::new()));
fn compile_regex_cached(pattern: &str) -> Result<regex::Regex, crate::ErrorRepr> {
if let Some(re) = crate::recover_lock(REGEX_CACHE.read(), "REGEX_CACHE").get(pattern) {
return Ok(re.clone());
}
let re = compile_regex(pattern)?;
let mut cache = crate::recover_lock(REGEX_CACHE.write(), "REGEX_CACHE");
if cache.len() >= REGEX_CACHE_MAX && !cache.contains_key(pattern) {
cache.clear();
}
cache.insert(pattern.to_owned(), re.clone());
Ok(re)
}
use crate::utils::floor_char_boundary;
struct SlugReplacementAutomaton {
ac: aho_corasick::AhoCorasick,
values: Vec<String>,
}
fn build_slug_replacement_automaton(
pairs: &[(String, String)],
) -> Option<SlugReplacementAutomaton> {
let mut patterns: Vec<&str> = Vec::with_capacity(pairs.len());
let mut values: Vec<String> = Vec::with_capacity(pairs.len());
for (from, to) in pairs {
if from.is_empty() {
continue;
}
patterns.push(from.as_str());
values.push(to.clone());
}
if patterns.len() < 2 {
return None;
}
let ac = aho_corasick::AhoCorasick::builder()
.match_kind(aho_corasick::MatchKind::LeftmostFirst)
.build(&patterns)
.expect("slug replacement keys are valid aho-corasick patterns");
Some(SlugReplacementAutomaton { ac, values })
}
const REPLACEMENT_AUTOMATON_CACHE_MAX: usize = 32;
#[allow(clippy::type_complexity)]
static REPLACEMENT_AUTOMATON_CACHE: LazyLock<
RwLock<HashMap<Vec<(String, String)>, Arc<SlugReplacementAutomaton>>>,
> = LazyLock::new(|| RwLock::new(HashMap::new()));
fn cached_slug_replacement_automaton(
pairs: &[(String, String)],
) -> Option<Arc<SlugReplacementAutomaton>> {
if let Some(a) = crate::recover_lock(
REPLACEMENT_AUTOMATON_CACHE.read(),
"REPLACEMENT_AUTOMATON_CACHE",
)
.get(pairs)
{
return Some(a.clone());
}
let automaton = Arc::new(build_slug_replacement_automaton(pairs)?);
let mut cache = crate::recover_lock(
REPLACEMENT_AUTOMATON_CACHE.write(),
"REPLACEMENT_AUTOMATON_CACHE",
);
if cache.len() >= REPLACEMENT_AUTOMATON_CACHE_MAX && !cache.contains_key(pairs) {
cache.clear();
}
cache.insert(pairs.to_vec(), automaton.clone());
Some(automaton)
}
fn slug_replace_with_automaton(text: &str, automaton: &SlugReplacementAutomaton) -> String {
let mut result = String::with_capacity(text.len());
let mut last = 0;
for mat in automaton.ac.find_iter(text) {
result.push_str(&text[last..mat.start()]);
result.push_str(&automaton.values[mat.pattern().as_usize()]);
last = mat.end();
}
result.push_str(&text[last..]);
result
}
#[non_exhaustive]
pub struct SlugConfig {
pub separator: String,
pub lowercase: bool,
pub max_length: usize,
pub word_boundary: bool,
pub save_order: bool,
pub stopwords: Vec<String>,
pub regex_pattern: Option<regex::Regex>,
pub replacements: Vec<(String, String)>,
pub allow_unicode: bool,
pub lang: Option<String>,
pub entities: bool,
pub decimal: bool,
pub hexadecimal: bool,
pub safe_chars: String,
}
impl Default for SlugConfig {
fn default() -> Self {
Self {
separator: "-".to_owned(),
lowercase: true,
max_length: 0,
word_boundary: false,
save_order: false,
stopwords: Vec::new(),
regex_pattern: None,
replacements: Vec::new(),
allow_unicode: false,
lang: None,
entities: true,
decimal: true,
hexadecimal: true,
safe_chars: String::new(),
}
}
}
impl SlugConfig {
pub(crate) fn from_pyargs(
separator: &str,
lowercase: bool,
max_length: usize,
word_boundary: bool,
save_order: bool,
stopwords: Vec<String>,
regex_pattern: Option<&str>,
replacements: Vec<(String, String)>,
allow_unicode: bool,
lang: Option<&str>,
entities: bool,
decimal: bool,
hexadecimal: bool,
) -> Result<Self, crate::ErrorRepr> {
let compiled_regex = regex_pattern.map(compile_regex_cached).transpose()?;
Ok(Self {
separator: separator.to_owned(),
lowercase,
max_length,
word_boundary,
save_order,
stopwords,
regex_pattern: compiled_regex,
replacements,
allow_unicode,
lang: lang.map(std::borrow::ToOwned::to_owned),
entities,
decimal,
hexadecimal,
safe_chars: String::new(),
})
}
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
self.separator = separator.into();
self
}
#[must_use]
pub fn with_lowercase(mut self, lowercase: bool) -> Self {
self.lowercase = lowercase;
self
}
#[must_use]
pub fn with_max_length(mut self, max_length: usize) -> Self {
self.max_length = max_length;
self
}
#[must_use]
pub fn with_word_boundary(mut self, word_boundary: bool) -> Self {
self.word_boundary = word_boundary;
self
}
#[must_use]
pub fn with_save_order(mut self, save_order: bool) -> Self {
self.save_order = save_order;
self
}
#[must_use]
pub fn with_stopwords<I, S>(mut self, stopwords: I) -> Self
where
I: IntoIterator<Item = S>,
S: Into<String>,
{
self.stopwords = stopwords.into_iter().map(Into::into).collect();
self
}
#[must_use]
pub fn with_allow_unicode(mut self, allow_unicode: bool) -> Self {
self.allow_unicode = allow_unicode;
self
}
#[must_use]
pub fn with_lang(mut self, lang: impl Into<String>) -> Self {
self.lang = Some(lang.into());
self
}
#[must_use]
pub fn with_safe_chars(mut self, safe_chars: impl Into<String>) -> Self {
self.safe_chars = safe_chars.into();
self
}
#[must_use]
pub fn with_replacements<I, F, T>(mut self, replacements: I) -> Self
where
I: IntoIterator<Item = (F, T)>,
F: Into<String>,
T: Into<String>,
{
self.replacements = replacements
.into_iter()
.map(|(from, to)| (from.into(), to.into()))
.collect();
self
}
#[must_use]
pub fn with_entities(mut self, entities: bool) -> Self {
self.entities = entities;
self
}
#[must_use]
pub fn with_decimal(mut self, decimal: bool) -> Self {
self.decimal = decimal;
self
}
#[must_use]
pub fn with_hexadecimal(mut self, hexadecimal: bool) -> Self {
self.hexadecimal = hexadecimal;
self
}
}
pub(crate) fn slugify_impl(text: &str, config: &SlugConfig) -> String {
slugify_impl_with_stopset(text, config, None)
}
pub(crate) fn slugify_impl_with_stopset(
text: &str,
config: &SlugConfig,
prebuilt_stopset: Option<&HashSet<String>>,
) -> String {
if text.is_empty() {
return String::new();
}
let mut value: Cow<str> = Cow::Borrowed(text);
if !config.replacements.is_empty() {
if config.replacements.len() == 1 {
let (from, to) = &config.replacements[0];
let replaced = value.replace(from.as_str(), to.as_str());
value = Cow::Owned(replaced);
} else if let Some(automaton) = cached_slug_replacement_automaton(&config.replacements) {
value = Cow::Owned(slug_replace_with_automaton(&value, &automaton));
} else {
let mut result = String::with_capacity(value.len());
let mut i = 0;
let value_bytes = value.as_bytes();
while i < value.len() {
let mut matched = false;
for (from, to) in &config.replacements {
if value_bytes[i..].starts_with(from.as_bytes()) {
result.push_str(to);
i += from.len();
matched = true;
break;
}
}
if !matched {
let ch = value[i..].chars().next().unwrap();
result.push(ch);
i += ch.len_utf8();
}
}
value = Cow::Owned(result);
}
}
if config.entities {
let owned = match decode_entities(&value, config.decimal, config.hexadecimal) {
Cow::Borrowed(_) => None,
Cow::Owned(s) => Some(s),
};
if let Some(s) = owned {
value = Cow::Owned(s);
}
}
if !config.allow_unicode {
let owned = match transliterate::transliterate_impl(
&value,
config.lang.as_deref(),
crate::ErrorMode::Ignore,
"",
false,
false,
false,
) {
Cow::Borrowed(_) => None,
Cow::Owned(s) => Some(s),
};
if let Some(s) = owned {
value = Cow::Owned(s);
}
}
if config.lowercase {
if value.is_ascii() {
if value.bytes().any(|b| b.is_ascii_uppercase()) {
let mut s = value.into_owned();
s.make_ascii_lowercase();
value = Cow::Owned(s);
}
} else {
value = Cow::Owned(value.to_lowercase());
}
}
if let Some(ref re) = config.regex_pattern {
value = Cow::Owned(re.replace_all(&value, "").into_owned());
}
let separator = &config.separator;
let mut slug = String::with_capacity(value.len());
let mut prev_was_sep = true;
let has_safe_chars = !config.safe_chars.is_empty();
let safe_set: HashSet<char> = if has_safe_chars {
config.safe_chars.chars().collect()
} else {
HashSet::new()
};
for ch in value.chars() {
if ch.is_alphanumeric()
|| (config.allow_unicode && !ch.is_ascii() && !ch.is_whitespace())
|| (has_safe_chars && safe_set.contains(&ch))
{
slug.push(ch);
prev_was_sep = false;
} else if !prev_was_sep && !separator.is_empty() {
slug.push_str(separator);
prev_was_sep = true;
}
}
if slug.ends_with(separator) && !separator.is_empty() {
slug.truncate(slug.len() - separator.len());
}
if !config.stopwords.is_empty() {
let tmp_stopset;
let stopset: &HashSet<String> = if let Some(s) = prebuilt_stopset {
s
} else {
tmp_stopset = config.stopwords.iter().cloned().collect();
&tmp_stopset
};
slug = filter_stopwords(&slug, separator, stopset, config.save_order);
}
if config.max_length > 0 && slug.len() > config.max_length {
if config.word_boundary {
slug = truncate_at_boundary(&slug, config.max_length, separator);
} else {
let boundary = floor_char_boundary(&slug, config.max_length);
slug.truncate(boundary);
if slug.ends_with(separator) && !separator.is_empty() {
slug.truncate(slug.len() - separator.len());
}
}
}
slug
}
fn filter_stopwords(
slug: &str,
separator: &str,
stopset: &HashSet<String>,
save_order: bool,
) -> String {
if save_order {
let words: Vec<&str> = slug.split(separator).collect();
let start = words
.iter()
.position(|w| !stopset.contains(*w))
.unwrap_or(words.len());
let end = words
.iter()
.rposition(|w| !stopset.contains(*w))
.map_or(0, |i| i + 1);
let kept = if start < end { &words[start..end] } else { &[] };
kept.iter()
.enumerate()
.fold(String::with_capacity(slug.len()), |mut acc, (i, w)| {
if i > 0 {
acc.push_str(separator);
}
acc.push_str(w);
acc
})
} else {
slug.split(separator)
.filter(|w| !stopset.contains(*w))
.enumerate()
.fold(String::with_capacity(slug.len()), |mut acc, (i, w)| {
if i > 0 {
acc.push_str(separator);
}
acc.push_str(w);
acc
})
}
}
fn truncate_at_boundary(slug: &str, max_length: usize, separator: &str) -> String {
if slug.len() <= max_length {
return slug.to_owned();
}
let boundary = floor_char_boundary(slug, max_length);
let truncated = &slug[..boundary];
match truncated.rfind(separator) {
Some(pos) => truncated[..pos].to_owned(),
None => truncated.to_owned(),
}
}
fn decode_numeric_entity(bytes: &[u8], pos: usize, num_buf: &mut String) -> (Option<char>, usize) {
let len = bytes.len();
let mut i = pos + 2; let is_hex = i < len && (bytes[i] == b'x' || bytes[i] == b'X');
if is_hex {
i += 1;
}
num_buf.clear();
while i < len {
let b = bytes[i];
if b == b';' {
i += 1;
break;
}
if num_buf.len() >= MAX_ENTITY_DIGITS {
break;
}
let valid_digit = if is_hex {
(b as char).is_ascii_hexdigit()
} else {
b.is_ascii_digit()
};
if valid_digit {
num_buf.push(b as char);
i += 1;
} else {
break;
}
}
let parsed = if is_hex {
u32::from_str_radix(num_buf, 16).ok()
} else {
num_buf.parse::<u32>().ok()
};
if let Some(ch) = parsed.and_then(char::from_u32).filter(|c| !c.is_control()) {
return (Some(ch), i - pos);
}
let mut j = pos + 2;
if j < len && (bytes[j] == b'x' || bytes[j] == b'X') {
j += 1;
}
while j < len && bytes[j].is_ascii() && bytes[j] != b';' && (j - pos) < MAX_ENTITY_DIGITS + 4 {
j += 1;
}
if j < len && bytes[j] == b';' {
j += 1;
}
(None, j - pos)
}
fn decode_entities(text: &str, decimal: bool, hexadecimal: bool) -> Cow<'_, str> {
let Some(first) = text.find('&') else {
return Cow::Borrowed(text);
};
let mut result = String::with_capacity(text.len());
result.push_str(&text[..first]);
let bytes = text.as_bytes();
let len = bytes.len();
let mut i = first;
let mut num_buf = String::with_capacity(MAX_ENTITY_DIGITS);
while i < len {
if bytes[i] != b'&' {
let rel = text[i..].find('&').unwrap_or(len - i);
result.push_str(&text[i..i + rel]);
i += rel;
continue;
}
if text[i..].starts_with("&") {
result.push('&');
i += "&".len();
} else if text[i..].starts_with("<") {
result.push('<');
i += "<".len();
} else if text[i..].starts_with(">") {
result.push('>');
i += ">".len();
} else if text[i..].starts_with(""") {
result.push('"');
i += """.len();
} else if text[i..].starts_with("'") {
result.push('\'');
i += "'".len();
} else if text[i..].starts_with("&#") {
let is_hex = i + 2 < len && (bytes[i + 2] == b'x' || bytes[i + 2] == b'X');
let decode = if is_hex { hexadecimal } else { decimal };
if decode {
let (decoded, consumed) = decode_numeric_entity(bytes, i, &mut num_buf);
if let Some(ch) = decoded {
result.push(ch);
}
i += consumed;
} else {
result.push('&');
i += 1;
}
} else {
result.push('&');
i += 1;
}
}
Cow::Owned(result)
}
#[cfg(test)]
mod tests {
use super::*;
fn default_config() -> SlugConfig {
SlugConfig {
separator: "-".to_owned(),
lowercase: true,
max_length: 0,
word_boundary: false,
save_order: false,
stopwords: vec![],
regex_pattern: None,
replacements: vec![],
allow_unicode: false,
lang: None,
entities: true,
decimal: true,
hexadecimal: true,
safe_chars: String::new(),
}
}
#[test]
fn test_empty_input() {
let config = default_config();
assert_eq!(slugify_impl("", &config), "");
}
#[test]
fn slug_automaton_matches_scan() {
fn scan(text: &str, pairs: &[(String, String)]) -> String {
let mut result = String::with_capacity(text.len());
let mut i = 0;
let b = text.as_bytes();
while i < text.len() {
let mut matched = false;
for (from, to) in pairs {
if !from.is_empty() && b[i..].starts_with(from.as_bytes()) {
result.push_str(to);
i += from.len();
matched = true;
break;
}
}
if !matched {
let ch = text[i..].chars().next().unwrap();
result.push(ch);
i += ch.len_utf8();
}
}
result
}
let pair = |a: &str, b: &str| (a.to_owned(), b.to_owned());
let lists = [
vec![pair("ab", "X"), pair("a", "Y")], vec![pair("a", "Y"), pair("ab", "X")], vec![pair("the", "T"), pair("he", "H")],
vec![pair("&", "and"), pair("@", "at"), pair("%", "pct")],
vec![pair("\u{5317}", "N"), pair("\u{5317}\u{4eac}", "BJ")], ];
let inputs = [
"",
"abc",
"ab",
"abab",
"the heat",
"a&b@c%d",
"\u{5317}\u{4eac}\u{5e02}",
"no-op",
"aaa",
];
for pairs in &lists {
let automaton = build_slug_replacement_automaton(pairs);
for inp in inputs {
let reference = scan(inp, pairs);
let got = automaton
.as_ref()
.map_or_else(|| scan(inp, pairs), |a| slug_replace_with_automaton(inp, a));
assert_eq!(got, reference, "slug automaton != scan for input {inp:?}");
}
}
}
#[test]
fn test_ascii_passthrough() {
let config = default_config();
assert_eq!(slugify_impl("hello world", &config), "hello-world");
}
#[test]
fn custom_lang_non_ascii_value_is_lowercased() {
let mut mappings = std::collections::HashMap::new();
mappings.insert("\u{03A9}".to_owned(), "\u{03A8}".to_owned()); crate::tables::register_lang("slugtest_psi_rs", mappings).unwrap();
let config = SlugConfig {
lang: Some("slugtest_psi_rs".to_owned()),
..default_config()
};
assert_eq!(slugify_impl("\u{03A9}", &config), "\u{03C8}"); }
#[test]
fn test_separator() {
let mut config = default_config();
config.separator = "_".to_owned();
assert_eq!(slugify_impl("hello world", &config), "hello_world");
}
#[test]
fn test_safe_chars_preserved_in_place() {
let mut config = default_config();
config.lowercase = false;
config.separator = "_".to_owned();
config.safe_chars = "-.".to_owned();
assert_eq!(slugify_impl("My Report.pdf", &config), "My_Report.pdf");
assert_eq!(slugify_impl("Foo-Bar Baz.txt", &config), "Foo-Bar_Baz.txt");
}
#[test]
fn test_safe_chars_empty_is_default_behavior() {
let config = default_config();
assert_eq!(slugify_impl("My Report.pdf", &config), "my-report-pdf");
}
#[test]
fn test_slugify_rejects_negative_max_length() {
let err = crate::error::checked_max_length(-1).unwrap_err();
assert!(err
.to_string()
.contains("max_length must be non-negative, got -1"));
assert_eq!(crate::error::checked_max_length(0).unwrap(), 0);
assert_eq!(crate::error::checked_max_length(255).unwrap(), 255);
}
#[test]
fn test_no_lowercase() {
let mut config = default_config();
config.lowercase = false;
assert_eq!(slugify_impl("Hello World", &config), "Hello-World");
}
#[test]
fn test_max_length() {
let mut config = default_config();
config.max_length = 5;
let result = slugify_impl("hello world", &config);
assert!(result.len() <= 5);
}
#[test]
fn test_max_length_word_boundary() {
let mut config = default_config();
config.max_length = 8;
config.word_boundary = true;
assert_eq!(slugify_impl("hello world foo", &config), "hello");
}
#[test]
fn test_stopwords() {
let mut config = default_config();
config.stopwords = vec!["the".to_owned(), "a".to_owned()];
assert_eq!(slugify_impl("the big a fox", &config), "big-fox");
}
#[test]
fn test_stopwords_uses_hashset() {
let mut config = default_config();
config.stopwords = (0..100).map(|i| format!("stop{i}")).collect();
config.stopwords.push("the".to_owned());
assert_eq!(slugify_impl("the big fox", &config), "big-fox");
}
#[test]
fn test_replacements() {
let mut config = default_config();
config.replacements = vec![("C++".to_owned(), "cpp".to_owned())];
assert_eq!(slugify_impl("C++ Code", &config), "cpp-code");
}
#[test]
fn test_allow_unicode() {
let mut config = default_config();
config.allow_unicode = true;
let result = slugify_impl("café latte", &config);
assert!(result.contains("café"));
}
#[test]
fn test_decode_entities_multibyte_utf8() {
assert_eq!(
decode_entities("café & résumé", true, true),
"café & résumé"
);
assert_eq!(decode_entities("über < cool", true, true), "über < cool");
assert_eq!(
decode_entities("䏿–‡ & 日本語", true, true),
"䏿–‡ & 日本語"
);
assert_eq!(
decode_entities("emoji 🎉 & fun", true, true),
"emoji 🎉 & fun"
);
assert_eq!(decode_entities("café", true, true), "café");
}
#[test]
fn test_decode_named_entities() {
assert_eq!(decode_entities("AT&T", true, true), "AT&T");
assert_eq!(decode_entities("5 < 10", true, true), "5 < 10");
assert_eq!(
decode_entities(""hello"", true, true),
"\"hello\""
);
}
#[test]
fn test_decode_numeric_entity_decimal() {
assert_eq!(decode_entities("A", true, true), "A");
assert_eq!(decode_entities("&", true, true), "&");
}
#[test]
fn test_decode_numeric_entity_hex() {
assert_eq!(decode_entities("A", true, true), "A");
assert_eq!(decode_entities("&", true, true), "&");
}
#[test]
fn test_decode_malformed_entity() {
assert_eq!(decode_entities("&#xyz;", true, true), "");
}
#[test]
fn test_decode_malformed_entity_semicolon_preserved() {
assert_eq!(decode_entities("&#;", true, true), "");
assert_eq!(decode_entities("&#x;", true, true), "");
assert_eq!(decode_entities("�", true, true), "");
let result = decode_entities("�", true, true);
assert_eq!(result, "");
}
#[test]
fn test_decode_entity_digit_limit() {
let long = format!("&#{}1;", "9".repeat(100));
let result = decode_entities(&long, true, true);
assert!(!result.contains("&#"));
}
#[test]
fn test_decode_decimal_disabled() {
assert_eq!(decode_entities("A", false, true), "A");
assert_eq!(decode_entities("A", false, true), "A");
}
#[test]
fn test_decode_hex_disabled() {
assert_eq!(decode_entities("A", true, false), "A");
assert_eq!(decode_entities("A", true, false), "A");
}
#[test]
fn test_decode_both_disabled() {
assert_eq!(
decode_entities("A & A", false, false),
"A & A"
);
}
#[test]
fn test_truncate_at_boundary_no_truncation_needed() {
assert_eq!(truncate_at_boundary("abc", 10, "-"), "abc");
}
#[test]
fn test_truncate_at_boundary_with_separator() {
assert_eq!(
truncate_at_boundary("hello-world-foo", 12, "-"),
"hello-world"
);
}
#[test]
fn test_truncate_at_boundary_no_separator_found() {
assert_eq!(truncate_at_boundary("helloworld", 5, "-"), "hello");
}
#[test]
fn test_allow_unicode_max_length_no_panic() {
let mut config = default_config();
config.allow_unicode = true;
config.max_length = 3;
let result = slugify_impl("éééé", &config);
assert!(result.len() <= 3);
assert_eq!(result, "é");
}
#[test]
fn test_allow_unicode_max_length_exact_boundary() {
let mut config = default_config();
config.allow_unicode = true;
config.max_length = 4; let result = slugify_impl("éééé", &config);
assert!(result.len() <= 4);
assert_eq!(result, "éé");
}
#[test]
fn test_allow_unicode_word_boundary_no_panic() {
let mut config = default_config();
config.allow_unicode = true;
config.max_length = 5;
config.word_boundary = true;
let result = slugify_impl("café latte", &config);
assert!(result.len() <= 5);
assert_eq!(result, "café");
}
#[test]
fn test_strip_trailing_separator() {
let config = default_config();
let result = slugify_impl("hello!", &config);
assert!(!result.ends_with('-'));
}
#[test]
fn test_consecutive_separators_collapsed() {
let config = default_config();
let result = slugify_impl("hello world", &config);
assert_eq!(result, "hello-world");
}
#[test]
fn test_entities_disabled() {
let mut config = default_config();
config.entities = false;
let result = slugify_impl("AT&T", &config);
assert!(result.contains("amp"));
}
#[test]
fn test_regex_pattern() {
let mut config = default_config();
config.regex_pattern = Some(regex::Regex::new(r"\d").unwrap());
assert_eq!(slugify_impl("abc123def", &config), "abcdef");
}
#[test]
fn test_compile_regex_valid() {
assert!(compile_regex(r"\d+").is_ok());
}
#[test]
fn test_compile_regex_too_long() {
let long_pattern = "a".repeat(MAX_REGEX_PATTERN_BYTES + 1);
let err = compile_regex(&long_pattern).unwrap_err().to_string();
assert!(err.contains("too long"), "unexpected error: {err}");
}
#[test]
fn test_compile_regex_at_limit() {
let pattern = "a".repeat(MAX_REGEX_PATTERN_BYTES);
assert!(compile_regex(&pattern).is_ok());
}
#[test]
fn test_compile_regex_invalid() {
let err = compile_regex(r"[unclosed").unwrap_err().to_string();
assert!(err.contains("regex_pattern"), "unexpected error: {err}");
assert!(err.contains("[unclosed"), "pattern not echoed: {err}");
}
mod proptest_properties {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(1000))]
#[test]
fn slugify_output_is_ascii(s in "\\PC*") {
let config = default_config();
let result = slugify_impl(&s, &config);
prop_assert!(result.is_ascii());
}
#[test]
fn slugify_output_charset(s in "\\PC*") {
let config = default_config();
let result = slugify_impl(&s, &config);
if !result.is_empty() {
for ch in result.chars() {
prop_assert!(
ch.is_ascii_lowercase() || ch.is_ascii_digit() || ch == '-',
"bad char {ch:?} in {result:?}"
);
}
prop_assert!(!result.starts_with('-'));
prop_assert!(!result.ends_with('-'));
prop_assert!(!result.contains("--"));
}
}
#[test]
fn slugify_max_length(s in "\\PC*", max_len in 1..200usize) {
let mut config = default_config();
config.max_length = max_len;
let result = slugify_impl(&s, &config);
prop_assert!(result.len() <= max_len);
}
#[test]
fn slugify_unicode_max_length_no_panic(s in "\\PC*", max_len in 1..200usize) {
let mut config = default_config();
config.allow_unicode = true;
config.max_length = max_len;
let result = slugify_impl(&s, &config);
prop_assert!(result.len() <= max_len);
prop_assert!(std::str::from_utf8(result.as_bytes()).is_ok());
}
#[test]
fn slugify_empty_is_empty(_unused in 0..1u8) {
let config = default_config();
prop_assert_eq!(slugify_impl("", &config), "");
}
#[test]
fn slugify_idempotent_on_slugs(s in "[a-z][a-z0-9]{0,10}(-[a-z0-9]{1,10}){0,5}") {
let config = default_config();
let result = slugify_impl(&s, &config);
prop_assert_eq!(&result, &s, "slug changed on re-slugify");
}
}
}
}