use pyo3::exceptions::PyRuntimeError;
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyList, PyString};
use std::borrow::Cow;
use std::collections::HashMap;
use std::sync::{LazyLock, RwLock};
use crate::ErrorMode;
#[pyfunction]
#[pyo3(signature = (text,))]
pub fn _strip_accents(text: &str) -> String {
crate::transliterate::strip_accents(text)
}
#[pyfunction]
#[pyo3(signature = (text,))]
pub fn _is_ascii(text: &str) -> bool {
text.is_ascii()
}
#[pyfunction]
#[pyo3(signature = ())]
pub fn _list_langs() -> Vec<String> {
crate::tables::list_langs()
}
#[pyfunction]
#[pyo3(signature = ())]
pub fn _seal_registrations() {
crate::tables::seal_registrations();
}
#[pyfunction]
#[pyo3(signature = ())]
pub fn _registrations_sealed() -> bool {
crate::tables::registrations_sealed()
}
#[pyfunction]
#[pyo3(signature = (code, mappings))]
pub fn _register_lang(code: &str, mappings: HashMap<String, String>) -> PyResult<()> {
Ok(crate::transliterate::register_lang(code, mappings)?)
}
#[pyfunction]
#[pyo3(signature = (replacements,))]
pub fn _register_replacements(replacements: HashMap<String, String>) -> PyResult<()> {
Ok(crate::transliterate::register_replacements(replacements)?)
}
#[pyfunction]
#[pyo3(signature = (key,))]
pub fn _remove_replacement(key: &str) -> PyResult<bool> {
Ok(crate::transliterate::remove_replacement(key)?)
}
#[pyfunction]
#[pyo3(signature = ())]
pub fn _clear_replacements() -> PyResult<()> {
Ok(crate::transliterate::clear_replacements()?)
}
static TRANSLITERATE_FALLBACK: LazyLock<RwLock<Option<Py<PyAny>>>> =
LazyLock::new(|| RwLock::new(None));
#[pyfunction]
pub fn _set_transliterate_fallback(f: Bound<'_, PyAny>) -> PyResult<()> {
if !f.is_callable() {
return Err(PyRuntimeError::new_err(
"transliterate fallback must be callable",
));
}
let mut slot = crate::recover_lock(TRANSLITERATE_FALLBACK.write(), "TRANSLITERATE_FALLBACK");
*slot = Some(f.unbind());
Ok(())
}
#[pyfunction]
#[pyo3(
signature = (text, *, lang=None, target=None, errors="replace", replace_with="[?]", strict_iso9=false, gost7034=false, tones=false, context=false),
text_signature = "(text, *, lang=None, target=None, errors='replace', replace_with='[?]', strict_iso9=False, gost7034=False, tones=False, context=False)"
)]
#[allow(clippy::too_many_arguments)]
pub fn _transliterate_entry<'py>(
text: &Bound<'py, PyAny>,
lang: Option<&str>,
target: Option<&str>,
errors: &str,
replace_with: &str,
strict_iso9: bool,
gost7034: bool,
tones: bool,
context: bool,
) -> PyResult<Bound<'py, PyAny>> {
if target.is_none() && !context {
if let Ok(s) = text.cast_exact::<PyString>() {
return Ok(_transliterate(
s,
lang,
errors,
replace_with,
strict_iso9,
gost7034,
tones,
)?
.into_any());
}
}
let py = text.py();
let fallback = {
let slot = crate::recover_lock(TRANSLITERATE_FALLBACK.read(), "TRANSLITERATE_FALLBACK");
slot.as_ref().map(|f| f.clone_ref(py))
};
let Some(fallback) = fallback else {
return Err(PyRuntimeError::new_err(
"disarm internal error: transliterate dispatcher not registered — \
import the `disarm` package rather than `disarm._core` directly",
));
};
let kwargs = PyDict::new(py);
kwargs.set_item("lang", lang)?;
kwargs.set_item("target", target)?;
kwargs.set_item("errors", errors)?;
kwargs.set_item("replace_with", replace_with)?;
kwargs.set_item("strict_iso9", strict_iso9)?;
kwargs.set_item("gost7034", gost7034)?;
kwargs.set_item("tones", tones)?;
kwargs.set_item("context", context)?;
fallback.bind(py).call((text,), Some(&kwargs))
}
#[pyfunction]
#[pyo3(signature = (text, lang=None, errors="replace", replace_with="[?]", strict_iso9=false, gost7034=false, tones=false))]
pub fn _transliterate<'py>(
text: &Bound<'py, PyString>,
lang: Option<&str>,
errors: &str,
replace_with: &str,
strict_iso9: bool,
gost7034: bool,
tones: bool,
) -> PyResult<Bound<'py, PyString>> {
if strict_iso9 && gost7034 {
return Err(crate::ErrorRepr::MutuallyExclusiveBare.into());
}
crate::transliterate::validate_lang(lang)?;
let py = text.py();
let s = text.to_str()?;
let replaced = crate::transliterate::apply_replacements_bounded(s)?;
if errors == "strict" {
return Ok(PyString::new(
py,
&crate::transliterate::transliterate_strict(
&replaced,
lang,
strict_iso9,
gost7034,
tones,
)?,
));
}
let error_mode = ErrorMode::parse(errors)?;
let out = crate::transliterate::transliterate_impl(
&replaced,
lang,
error_mode,
replace_with,
strict_iso9,
gost7034,
tones,
);
match (&replaced, &out) {
(Cow::Borrowed(_), Cow::Borrowed(_)) => Ok(text.clone()),
_ => Ok(PyString::new(py, &out)),
}
}
#[pyfunction]
#[pyo3(signature = (
*,
lang=None,
target=None,
errors="replace",
replace_with="[?]",
strict_iso9=false,
gost7034=false,
tones=false,
context=false,
))]
#[allow(clippy::too_many_arguments)]
pub fn _validate_transliterate_args(
lang: Option<&str>,
target: Option<&str>,
errors: &str,
replace_with: &str,
strict_iso9: bool,
gost7034: bool,
tones: bool,
context: bool,
) -> PyResult<()> {
crate::transliterate::validate_transliterate_args(
lang,
target,
errors,
replace_with,
strict_iso9,
gost7034,
tones,
context,
)?;
Ok(())
}
#[pyfunction]
#[pyo3(signature = (text, *, lang=None, strict_iso9=false, gost7034=false, tones=false))]
pub fn _find_untranslatable(
text: &str,
lang: Option<&str>,
strict_iso9: bool,
gost7034: bool,
tones: bool,
) -> PyResult<Vec<(char, usize)>> {
if strict_iso9 && gost7034 {
return Err(crate::ErrorRepr::MutuallyExclusiveBare.into());
}
crate::transliterate::validate_lang(lang)?;
let text = crate::transliterate::apply_replacements_bounded(text)?;
Ok(crate::transliterate::find_untranslatable_impl(
&text,
lang,
strict_iso9,
gost7034,
tones,
))
}
#[pyfunction]
#[pyo3(signature = (text, *, lang=None, errors="replace", replace_with="[?]", strict_iso9=false, gost7034=false))]
pub fn _transliterate_context(
text: &str,
lang: Option<&str>,
errors: &str,
replace_with: &str,
strict_iso9: bool,
gost7034: bool,
) -> PyResult<String> {
Ok(crate::transliterate::transliterate_context(
text,
lang,
errors,
replace_with,
strict_iso9,
gost7034,
)?)
}
#[pyfunction]
#[pyo3(signature = (texts, lang=None, errors="replace", replace_with="[?]", strict_iso9=false, gost7034=false, tones=false))]
pub fn _transliterate_batch(
py: Python<'_>,
texts: &Bound<'_, PyList>,
lang: Option<&str>,
errors: &str,
replace_with: &str,
strict_iso9: bool,
gost7034: bool,
tones: bool,
) -> PyResult<Vec<String>> {
if strict_iso9 && gost7034 {
return Err(crate::ErrorRepr::MutuallyExclusiveBare.into());
}
let texts = texts.to_tuple();
let len = texts.len();
if len > crate::MAX_BATCH_SIZE {
return Err(crate::ErrorRepr::BatchTooLarge {
len,
max: crate::MAX_BATCH_SIZE,
}
.into());
}
crate::transliterate::validate_lang(lang)?;
let strict = errors == "strict";
let error_mode = if strict {
ErrorMode::Ignore
} else {
ErrorMode::parse(errors)?
};
let lang = lang.map(str::to_owned);
let replace_with = replace_with.to_owned();
let mut out: Vec<String> = Vec::with_capacity(len);
let mut start = 0;
while start < len {
let end = (start + crate::BATCH_CHUNK_SIZE).min(len);
let mut chunk: Vec<String> = Vec::with_capacity(end - start);
for i in start..end {
chunk.push(texts.get_item(i)?.extract::<String>()?);
}
let processed: Vec<String> = py.detach(|| -> PyResult<Vec<String>> {
chunk
.iter()
.map(|text| -> PyResult<String> {
let replaced = crate::transliterate::apply_replacements_bounded(text)?;
if strict {
return Ok(crate::transliterate::transliterate_strict(
&replaced,
lang.as_deref(),
strict_iso9,
gost7034,
tones,
)?);
}
Ok(crate::transliterate::transliterate_impl(
&replaced,
lang.as_deref(),
error_mode,
&replace_with,
strict_iso9,
gost7034,
tones,
)
.into_owned())
})
.collect()
})?;
out.extend(processed);
start = end;
}
Ok(out)
}
#[pyfunction]
#[pyo3(signature = (texts,))]
pub fn _strip_accents_batch(py: Python<'_>, texts: Vec<String>) -> PyResult<Vec<String>> {
use unicode_normalization::UnicodeNormalization;
if texts.len() > crate::MAX_BATCH_SIZE {
return Err(crate::ErrorRepr::BatchTooLarge {
len: texts.len(),
max: crate::MAX_BATCH_SIZE,
}
.into());
}
Ok(py.detach(move || {
texts
.into_iter()
.map(|text| {
if text.is_ascii() {
text } else {
text.nfd()
.filter(|c| !unicode_normalization::char::is_combining_mark(*c))
.nfc()
.collect()
}
})
.collect()
}))
}