use deunicode::deunicode;
use regex::Regex;
use unicode_categories::UnicodeCategories;
use crate::options::Options;
#[derive(Debug, Clone)]
pub struct Slugifier {
options: Options,
}
impl Slugifier {
pub fn new() -> Self {
Self {
options: Options::default(),
}
}
pub fn with_options(options: Options) -> Self {
Self { options }
}
pub fn options(&self) -> &Options {
&self.options
}
pub fn options_mut(&mut self) -> &mut Options {
&mut self.options
}
pub fn separator(mut self, separator: impl Into<String>) -> Self {
self.options.separator = separator.into();
self
}
pub fn locale(mut self, locale: Option<crate::Locale>) -> Self {
self.options.locale = locale;
self
}
pub fn remove(mut self, remove: Option<Regex>) -> Self {
self.options.remove = remove;
self
}
pub fn lowercase(mut self, lowercase: bool) -> Self {
self.options.lowercase = lowercase;
self
}
pub fn trim(mut self, trim: bool) -> Self {
self.options.trim = trim;
self
}
pub fn max_length(mut self, max_length: Option<usize>) -> Self {
self.options.max_length = max_length;
self
}
pub fn slugify(&self, input: &str) -> String {
slugify_impl(input, &self.options)
}
}
impl Default for Slugifier {
fn default() -> Self {
Self::new()
}
}
pub fn slugify_with_options(input: &str, options: &Options) -> String {
slugify_impl(input, options)
}
fn slugify_impl(input: &str, options: &Options) -> String {
let pre = apply_remove_and_locale(input, options);
let sep = options.separator.as_str();
let mut state = BuildState::with_capacity(pre.len());
for ch in pre.chars() {
if should_drop_char(ch, options) {
if (ch == '-' || ch == '_')
&& options.drop_emoji
&& !push_separator(&mut state, sep, options)
{
break;
}
continue;
}
if ch.is_alphanumeric() {
if ch.is_ascii() {
if !append_ascii_char(&mut state, ch, options) {
break;
}
} else if !append_transliterated_char(&mut state, ch, options) {
break;
}
} else if !push_separator(&mut state, sep, options) {
break;
}
}
let mut text = state.builder;
if options.trim {
trim_separators(&mut text, sep);
}
if options.lowercase {
text = text.to_lowercase();
}
text
}
struct BuildState {
builder: String,
prev_was_sep: bool,
bytes_so_far: usize,
}
impl BuildState {
fn with_capacity(cap: usize) -> Self {
Self {
builder: String::with_capacity(cap),
prev_was_sep: false,
bytes_so_far: 0,
}
}
}
fn apply_remove_and_locale(input: &str, options: &Options) -> String {
let removed = if let Some(ref re) = options.remove {
re.replace_all(input, "").into_owned()
} else {
input.to_string()
};
if let Some(locale) = options.locale {
locale.apply(&removed)
} else {
removed
}
}
fn is_symbol_like(c: char) -> bool {
!(c.is_letter() || c.is_number() || c.is_whitespace())
}
fn should_drop_char(c: char, options: &Options) -> bool {
(options.drop_apostrophes && (c == '\'' || c == '’'))
|| (options.drop_emoji && is_symbol_like(c) && c != '-' && c != '_')
}
fn would_exceed(current: usize, add_len: usize, options: &Options) -> bool {
options
.max_length
.map(|max_len| current + add_len > max_len)
.unwrap_or(false)
}
fn push_separator(state: &mut BuildState, sep: &str, options: &Options) -> bool {
if state.prev_was_sep {
return true;
}
let add_len = sep.len();
if would_exceed(state.bytes_so_far, add_len, options) {
return false;
}
state.builder.push_str(sep);
state.prev_was_sep = true;
state.bytes_so_far += add_len;
true
}
fn append_ascii_char(state: &mut BuildState, ch: char, options: &Options) -> bool {
if would_exceed(state.bytes_so_far, 1, options) {
return false;
}
state.builder.push(ch);
state.prev_was_sep = false;
state.bytes_so_far += 1;
true
}
fn append_transliterated_char(state: &mut BuildState, ch: char, options: &Options) -> bool {
let mut buf = [0u8; 4];
let s = ch.encode_utf8(&mut buf);
let ascii = deunicode(s);
if ascii.is_empty() {
return true; }
let add_len = ascii.len();
if would_exceed(state.bytes_so_far, add_len, options) {
return false;
}
state.builder.push_str(&ascii);
state.prev_was_sep = false;
state.bytes_so_far += add_len;
true
}
fn trim_separators(text: &mut String, sep: &str) {
while text.starts_with(sep) {
text.drain(..sep.len());
}
while text.ends_with(sep) {
let new_len = text.len() - sep.len();
text.truncate(new_len);
}
}