#![cfg_attr(not(feature = "extension-module"), allow(dead_code))]
#[cfg(feature = "extension-module")]
use pyo3::prelude::*;
#[macro_use]
mod obs;
#[doc(hidden)]
pub mod utils;
pub(crate) mod error;
pub(crate) use error::ErrorRepr;
pub use error::{Error, ErrorKind};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ErrorMode {
Replace,
Ignore,
Preserve,
}
impl ErrorMode {
#[must_use]
pub fn as_str(self) -> &'static str {
match self {
Self::Replace => "replace",
Self::Ignore => "ignore",
Self::Preserve => "preserve",
}
}
pub(crate) fn parse(s: &str) -> Result<Self, crate::ErrorRepr> {
match s {
"replace" => Ok(Self::Replace),
"ignore" => Ok(Self::Ignore),
"preserve" => Ok(Self::Preserve),
_ => Err(crate::ErrorRepr::InvalidErrorMode { got: s.to_owned() }),
}
}
}
impl std::fmt::Display for ErrorMode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
impl std::str::FromStr for ErrorMode {
type Err = crate::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s).map_err(crate::Error::from)
}
}
pub mod api;
pub use api::DisarmStr;
pub(crate) mod case_fold;
pub(crate) mod confusables;
pub(crate) mod context;
pub(crate) mod encoders;
pub(crate) mod encoding;
pub(crate) mod filename;
pub(crate) mod grapheme;
pub(crate) mod hostname;
pub(crate) mod limits;
pub(crate) mod log_injection;
pub(crate) mod normalize;
pub(crate) mod pipeline;
pub(crate) mod presets;
pub(crate) mod reverse;
pub(crate) mod scripts;
pub(crate) mod slugify;
pub(crate) mod unicode_ranges;
pub(crate) mod whitespace;
pub(crate) mod width;
pub(crate) mod zalgo;
#[doc(hidden)]
pub mod emoji;
#[doc(hidden)]
pub mod transliterate;
#[allow(clippy::unreadable_literal, clippy::unicode_not_nfc)]
#[doc(hidden)]
pub mod tables;
#[cfg(feature = "extension-module")]
#[doc(hidden)]
mod py;
#[cfg(feature = "extension-module")]
#[pymodule]
#[pyo3(name = "_core")]
fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(py::transliterate::_transliterate, m)?)?;
m.add_function(wrap_pyfunction!(
py::transliterate::_transliterate_entry,
m
)?)?;
m.add_function(wrap_pyfunction!(
py::transliterate::_set_transliterate_fallback,
m
)?)?;
m.add_function(wrap_pyfunction!(
py::transliterate::_validate_transliterate_args,
m
)?)?;
m.add_function(wrap_pyfunction!(
py::transliterate::_find_untranslatable,
m
)?)?;
m.add_function(wrap_pyfunction!(
py::transliterate::_transliterate_context,
m
)?)?;
m.add_function(wrap_pyfunction!(py::transliterate::_strip_accents, m)?)?;
m.add_function(wrap_pyfunction!(py::transliterate::_is_ascii, m)?)?;
m.add_function(wrap_pyfunction!(py::transliterate::_list_langs, m)?)?;
m.add_function(wrap_pyfunction!(py::transliterate::_register_lang, m)?)?;
m.add_function(wrap_pyfunction!(
py::transliterate::_register_replacements,
m
)?)?;
m.add_function(wrap_pyfunction!(py::transliterate::_remove_replacement, m)?)?;
m.add_function(wrap_pyfunction!(py::transliterate::_clear_replacements, m)?)?;
m.add_function(wrap_pyfunction!(py::transliterate::_seal_registrations, m)?)?;
m.add_function(wrap_pyfunction!(
py::transliterate::_registrations_sealed,
m
)?)?;
m.add_function(wrap_pyfunction!(py::slugify::_slugify, m)?)?;
m.add_function(wrap_pyfunction!(
py::log_injection::_strip_log_injection,
m
)?)?;
m.add_function(wrap_pyfunction!(py::normalize::_normalize, m)?)?;
m.add_function(wrap_pyfunction!(py::normalize::_is_normalized, m)?)?;
m.add_function(wrap_pyfunction!(
py::confusables::_normalize_confusables,
m
)?)?;
m.add_function(wrap_pyfunction!(py::confusables::_is_confusable, m)?)?;
m.add_function(wrap_pyfunction!(py::encoders::_escape_html, m)?)?;
m.add_function(wrap_pyfunction!(py::encoders::_percent_encode, m)?)?;
m.add_function(wrap_pyfunction!(py::filename::_sanitize_filename, m)?)?;
m.add_function(wrap_pyfunction!(py::case_fold::_fold_case, m)?)?;
m.add_function(wrap_pyfunction!(py::whitespace::_collapse_whitespace, m)?)?;
m.add_function(wrap_pyfunction!(py::scripts::_detect_scripts, m)?)?;
m.add_function(wrap_pyfunction!(py::scripts::_is_mixed_script, m)?)?;
m.add_function(wrap_pyfunction!(py::scripts::_inspect_auto_lang, m)?)?;
m.add_function(wrap_pyfunction!(
py::transliterate::_transliterate_batch,
m
)?)?;
m.add_function(wrap_pyfunction!(
py::transliterate::_strip_accents_batch,
m
)?)?;
m.add_function(wrap_pyfunction!(py::slugify::_slugify_batch, m)?)?;
m.add_function(wrap_pyfunction!(py::normalize::_normalize_batch, m)?)?;
m.add_class::<py::slugify::_Slugifier>()?;
m.add_class::<py::slugify::_UniqueSlugifier>()?;
m.add_class::<py::pipeline::_TextPipeline>()?;
m.add_function(wrap_pyfunction!(py::pipeline::_get_pipeline, m)?)?;
m.add_function(wrap_pyfunction!(py::pipeline::_list_profiles, m)?)?;
m.add_function(wrap_pyfunction!(py::presets::_security_clean, m)?)?;
m.add_function(wrap_pyfunction!(py::presets::_ml_normalize, m)?)?;
m.add_function(wrap_pyfunction!(py::presets::_catalog_key, m)?)?;
m.add_function(wrap_pyfunction!(py::presets::_display_clean, m)?)?;
m.add_function(wrap_pyfunction!(py::presets::_search_key, m)?)?;
m.add_function(wrap_pyfunction!(py::presets::_sort_key, m)?)?;
m.add_function(wrap_pyfunction!(py::presets::_strip_bidi, m)?)?;
m.add_function(wrap_pyfunction!(py::presets::_normalize_user_input, m)?)?;
m.add_function(wrap_pyfunction!(py::presets::_strip_obfuscation, m)?)?;
m.add_function(wrap_pyfunction!(py::zalgo::_is_zalgo, m)?)?;
m.add_function(wrap_pyfunction!(py::zalgo::_strip_zalgo, m)?)?;
m.add_function(wrap_pyfunction!(py::grapheme::_grapheme_len, m)?)?;
m.add_function(wrap_pyfunction!(py::grapheme::_grapheme_split, m)?)?;
m.add_function(wrap_pyfunction!(py::grapheme::_grapheme_truncate, m)?)?;
m.add_function(wrap_pyfunction!(py::width::_terminal_width, m)?)?;
m.add_function(wrap_pyfunction!(py::width::_grapheme_width, m)?)?;
m.add_function(wrap_pyfunction!(py::hostname::_is_suspicious_hostname, m)?)?;
m.add_class::<py::hostname::HostnameAnalysis>()?;
m.add_function(wrap_pyfunction!(py::encoding::_detect_encoding, m)?)?;
m.add_function(wrap_pyfunction!(py::encoding::_decode_to_utf8, m)?)?;
m.add_function(wrap_pyfunction!(py::reverse::_reverse_transliterate, m)?)?;
m.add_function(wrap_pyfunction!(py::reverse::_reverse_langs, m)?)?;
m.add_function(wrap_pyfunction!(py::emoji::_demojize, m)?)?;
m.add_function(wrap_pyfunction!(py::emoji::_set_emoji_provider, m)?)?;
m.add("DisarmError", m.py().get_type::<DisarmError>())?;
m.add(
"InvalidArgumentError",
m.py().get_type::<InvalidArgumentError>(),
)?;
m.add(
"ResourceLimitError",
m.py().get_type::<ResourceLimitError>(),
)?;
m.add("UnsupportedError", m.py().get_type::<UnsupportedError>())?;
m.add("_MAX_BATCH_SIZE", MAX_BATCH_SIZE)?;
Ok(())
}
pub(crate) const MAX_BATCH_SIZE: usize = 100_000;
pub(crate) const BATCH_CHUNK_SIZE: usize = 64;
pub(crate) fn recover_lock<T>(result: std::sync::LockResult<T>, table_name: &str) -> T {
result.unwrap_or_else(|e| {
tl_error!("lock poisoned, recovered: table={table_name:?}");
let msg = format!(
"disarm: lock for {table_name:?} poisoned (a thread panicked while holding the \
lock). Recovering from poisoned state — data may be inconsistent. This is a bug; \
please report it."
);
#[cfg(feature = "extension-module")]
{
let emitted = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
pyo3::Python::attach(|py| emit_py_warning(py, &msg));
}));
if emitted.is_err() {
emit_warning_stderr(&msg);
}
}
#[cfg(not(feature = "extension-module"))]
emit_warning_stderr(&msg);
e.into_inner()
})
}
pub(crate) fn emit_warning_stderr(msg: &str) {
eprintln!("{msg}");
}
#[cfg(feature = "extension-module")]
pub(crate) fn emit_py_warning(py: pyo3::Python<'_>, msg: &str) {
if py
.import("warnings")
.and_then(|w| w.call_method1("warn", (msg,)))
.is_err()
{
emit_warning_stderr(msg);
}
}
#[cfg(feature = "extension-module")]
pyo3::create_exception!(
disarm,
DisarmError,
pyo3::exceptions::PyValueError,
"Base exception for every error disarm raises.\n\
Subclass of ``ValueError`` (so existing ``except ValueError`` code keeps\n\
working); catch ``DisarmError`` to handle any disarm failure. The\n\
subclasses below categorise the failure (#183)."
);
#[cfg(feature = "extension-module")]
pyo3::create_exception!(
disarm,
InvalidArgumentError,
DisarmError,
"An argument had an invalid value or a combination of arguments was\n\
contradictory (e.g. an unknown ``errors``/``form``/``lang`` value, or two\n\
mutually-exclusive flags). Subclass of ``disarm.DisarmError``."
);
#[cfg(feature = "extension-module")]
pyo3::create_exception!(
disarm,
ResourceLimitError,
DisarmError,
"A configured resource limit was exceeded (batch size, registration cap,\n\
regex length, unique-slug attempts). Subclass of ``disarm.DisarmError``."
);
#[cfg(feature = "extension-module")]
pyo3::create_exception!(
disarm,
UnsupportedError,
DisarmError,
"A requested operation is not supported (e.g. reverse transliteration for a\n\
language, or auto-detecting an encoding). Subclass of ``disarm.DisarmError``."
);