slugrs 0.2.3

A fast, locale-aware slugify library for Rust
Documentation
use deunicode::deunicode;
use regex::Regex;
use unicode_categories::UnicodeCategories;

use crate::options::Options;

/// Slugifier processes strings into URL-friendly slugs.
#[derive(Debug, Clone)]
pub struct Slugifier {
    options: Options,
}

impl Slugifier {
    pub fn new() -> Self {
        Self {
            options: Options::default(),
        }
    }

    pub fn with_options(options: Options) -> Self {
        Self { options }
    }

    pub fn options(&self) -> &Options {
        &self.options
    }

    pub fn options_mut(&mut self) -> &mut Options {
        &mut self.options
    }

    pub fn separator(mut self, separator: impl Into<String>) -> Self {
        self.options.separator = separator.into();
        self
    }

    pub fn locale(mut self, locale: Option<crate::Locale>) -> Self {
        self.options.locale = locale;
        self
    }

    pub fn remove(mut self, remove: Option<Regex>) -> Self {
        self.options.remove = remove;
        self
    }

    pub fn lowercase(mut self, lowercase: bool) -> Self {
        self.options.lowercase = lowercase;
        self
    }

    pub fn trim(mut self, trim: bool) -> Self {
        self.options.trim = trim;
        self
    }

    pub fn max_length(mut self, max_length: Option<usize>) -> Self {
        self.options.max_length = max_length;
        self
    }

    pub fn slugify(&self, input: &str) -> String {
        slugify_impl(input, &self.options)
    }
}

impl Default for Slugifier {
    fn default() -> Self {
        Self::new()
    }
}

pub fn slugify_with_options(input: &str, options: &Options) -> String {
    slugify_impl(input, options)
}

fn slugify_impl(input: &str, options: &Options) -> String {
    let mut text = input.to_string();

    // Pre-remove using custom regex if provided
    if let Some(ref remove_regex) = options.remove {
        text = remove_regex.replace_all(&text, "").into_owned();
    }

    // Locale-specific mappings first so deunicode doesn't lose semantics
    if let Some(locale) = options.locale {
        text = locale.apply(&text);
    }

    // Strip emoji and symbols before transliteration. Also drop apostrophes by default
    // to avoid creating separators between elisions.
    text = text
        .chars()
        .filter(|c| {
            // Keep letters, numbers, whitespace, and common punctuation; drop symbols like emoji
            c.is_letter() || c.is_number() || c.is_whitespace() || matches!(c, '-' | '_')
        })
        .collect::<String>();

    // Transliterate unicode to ASCII where possible
    text = deunicode(&text);

    // Replace all non-alphanumeric sequences with separator
    let sep = &options.separator;
    let pattern = Regex::new(r"[^A-Za-z0-9]+").unwrap();
    text = pattern.replace_all(&text, sep.as_str()).into_owned();

    // Trim repeated separators
    let repeated_sep_pattern = format!("(?:{})+", regex::escape(sep));
    let multi_sep = Regex::new(&repeated_sep_pattern).unwrap();
    text = multi_sep.replace_all(&text, sep.as_str()).into_owned();

    // Trim leading/trailing separators
    if options.trim {
        text = text
            .trim_matches(|c| sep.chars().any(|s| s == c))
            .to_string();
    }

    if options.lowercase {
        text = text.to_lowercase();
    }

    // Enforce max length if specified
    if let Some(max_len) = options.max_length {
        if text.len() > max_len {
            text.truncate(max_len);
            // Trim trailing separator fragment after truncation
            if options.trim {
                text = text
                    .trim_matches(|c| sep.chars().any(|s| s == c))
                    .to_string();
            }
        }
    }

    text
}