use std::sync::LazyLock;
use std::sync::RwLock;
use pyo3::prelude::*;
use pyo3::types::PyList;
use crate::emoji::{
is_emoji_codepoint, is_emoji_modifier, match_emoji_at, pad_emoji_replacement,
strip_modifier_suffix, CharWindow, VS15, VS16, ZWJ,
};
use crate::tables;
use crate::ErrorMode;
static GLOBAL_PROVIDER: LazyLock<RwLock<Option<Py<PyAny>>>> = LazyLock::new(|| RwLock::new(None));
pub fn set_provider(provider: Option<Py<PyAny>>) {
let mut guard = crate::recover_lock(GLOBAL_PROVIDER.write(), "GLOBAL_PROVIDER");
*guard = provider;
}
fn try_python_provider(
py: Python<'_>,
provider: &Py<PyAny>,
window: &[char],
max_len: usize,
) -> Option<(String, usize)> {
let try_len = max_len.min(window.len());
for len in (1..=try_len).rev() {
let seq: Vec<u32> = window[..len].iter().map(|c| *c as u32).collect();
let py_seq = PyList::new(py, &seq).ok()?;
let result = match provider.call_method1(py, "lookup", (py_seq,)) {
Ok(r) => r,
Err(e) => {
let msg = format!(
"disarm: EmojiProvider.lookup() raised an exception and will be ignored: {e}"
);
crate::emit_py_warning(py, &msg);
return None;
}
};
if !result.is_none(py) {
match result.extract::<String>(py) {
Ok(name) => return Some((name, len)),
Err(e) => {
let msg = format!(
"disarm: EmojiProvider.lookup() returned a non-string value \
and will be ignored: {e}"
);
crate::emit_py_warning(py, &msg);
return None;
}
}
}
}
None
}
fn demojize_impl(
py: Python<'_>,
text: &str,
strip_modifiers: bool,
error_mode: ErrorMode,
replace_with: &str,
provider: Option<&Py<PyAny>>,
) -> String {
if text.is_ascii() {
return text.to_owned();
}
let mut win = CharWindow::new(text.chars());
let mut result = String::with_capacity(text.len());
let mut last_was_emoji = false;
while let Some(ch) = win.current() {
if ch == VS16 || ch == VS15 || ch == ZWJ {
win.advance(1);
continue;
}
if let Some(prov) = provider {
if let Some((name, consumed)) =
try_python_provider(py, prov, win.as_slice(), tables::max_emoji_seq_len())
{
pad_emoji_replacement(&mut result, &name);
win.advance(consumed);
while win.current().is_some_and(is_emoji_modifier) {
win.advance(1);
}
last_was_emoji = true;
continue;
}
}
if let Some((name, consumed)) = match_emoji_at(win.as_slice()) {
let replacement = strip_modifier_suffix(name, strip_modifiers);
pad_emoji_replacement(&mut result, replacement);
win.advance(consumed);
while win.current().is_some_and(is_emoji_modifier) {
win.advance(1);
}
last_was_emoji = true;
continue;
}
if is_emoji_codepoint(ch) {
match error_mode {
ErrorMode::Replace => result.push_str(replace_with),
ErrorMode::Ignore => {}
ErrorMode::Preserve => result.push(ch),
}
win.advance(1);
while let Some(mc) = win.current() {
if !is_emoji_modifier(mc) {
break;
}
if let ErrorMode::Preserve = error_mode {
result.push(mc);
}
win.advance(1);
}
last_was_emoji = match error_mode {
ErrorMode::Preserve => true,
ErrorMode::Replace => !replace_with.is_empty(),
ErrorMode::Ignore => false,
};
continue;
}
if last_was_emoji && ch.is_alphanumeric() {
result.push(' ');
}
result.push(ch);
last_was_emoji = false;
win.advance(1);
}
result
}
#[pyfunction]
#[pyo3(name = "_demojize")]
#[pyo3(signature = (text, *, strip_modifiers=false, errors="replace", replace_with="[?]", provider=None))]
pub fn _demojize(
py: Python<'_>,
text: &str,
strip_modifiers: bool,
errors: &str,
replace_with: &str,
provider: Option<Py<PyAny>>,
) -> PyResult<String> {
let error_mode = ErrorMode::parse(errors)?;
let effective_provider: Option<Py<PyAny>> = if provider.is_some() {
provider
} else {
let guard = crate::recover_lock(GLOBAL_PROVIDER.read(), "GLOBAL_PROVIDER");
guard.as_ref().map(|p| p.clone_ref(py))
};
Ok(demojize_impl(
py,
text,
strip_modifiers,
error_mode,
replace_with,
effective_provider.as_ref(),
))
}
#[pyfunction]
#[pyo3(name = "_set_emoji_provider")]
#[pyo3(signature = (provider=None))]
pub fn _set_emoji_provider(provider: Option<Py<PyAny>>) -> PyResult<()> {
crate::transliterate::check_not_sealed("set_emoji_provider")?;
set_provider(provider);
Ok(())
}