use crate::Error;
use std::borrow::Cow;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum TargetScript {
Latin,
Cyrillic,
}
impl TargetScript {
#[must_use]
pub fn as_str(self) -> &'static str {
match self {
TargetScript::Latin => "latin",
TargetScript::Cyrillic => "cyrillic",
}
}
}
impl std::fmt::Display for TargetScript {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
impl std::str::FromStr for TargetScript {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"latin" => Ok(Self::Latin),
"cyrillic" => Ok(Self::Cyrillic),
_ => Err(Error::from(crate::ErrorRepr::InvalidTargetScript {
got: s.to_owned(),
})),
}
}
}
#[must_use]
pub fn normalize_confusables(text: &str, target: TargetScript) -> Cow<'_, str> {
crate::confusables::normalize_confusables_cow(text, target.as_str())
.expect("TargetScript always maps to a supported target script")
}
#[must_use]
pub fn is_confusable(text: &str, target: TargetScript) -> bool {
crate::confusables::is_confusable(text, target.as_str())
.expect("TargetScript always maps to a supported target script")
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum ReverseLang {
Greek,
Russian,
Ukrainian,
}
impl ReverseLang {
#[must_use]
pub fn as_str(self) -> &'static str {
match self {
ReverseLang::Greek => "el",
ReverseLang::Russian => "ru",
ReverseLang::Ukrainian => "uk",
}
}
}
impl std::fmt::Display for ReverseLang {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
impl std::str::FromStr for ReverseLang {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"el" => Ok(Self::Greek),
"ru" => Ok(Self::Russian),
"uk" => Ok(Self::Ukrainian),
_ => Err(Error::from(crate::ErrorRepr::InvalidReverseLang {
got: s.to_owned(),
})),
}
}
}
#[must_use]
pub fn reverse_transliterate(text: &str, lang: ReverseLang) -> String {
crate::reverse::reverse_transliterate_impl(text, lang.as_str())
}
#[must_use]
pub fn reverse_langs() -> Vec<String> {
crate::reverse::reverse_langs()
}
#[must_use]
pub fn detect_scripts(text: &str) -> Vec<&'static str> {
crate::scripts::detect_scripts(text)
}
#[must_use]
pub fn is_mixed_script(text: &str) -> bool {
crate::scripts::is_mixed_script(text)
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct AutoLangInspection {
pub script: Option<String>,
pub chosen_lang: Option<String>,
pub reason: String,
pub discriminators_hit: Vec<String>,
}
#[must_use]
pub fn inspect_auto_lang(text: &str) -> AutoLangInspection {
let (script, chosen_lang, reason, discriminators_hit) = crate::scripts::inspect_auto_lang(text);
AutoLangInspection {
script: script.map(str::to_owned),
chosen_lang,
reason: reason.to_owned(),
discriminators_hit,
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct HostnameAnalysis {
pub suspicious: bool,
pub scripts: Vec<String>,
pub mixed_script: bool,
pub has_confusables: bool,
pub canonical: String,
}
#[must_use]
pub fn is_suspicious_hostname(hostname: &str) -> HostnameAnalysis {
let (_, core) = crate::hostname::is_suspicious_hostname(hostname);
HostnameAnalysis {
suspicious: core.suspicious,
scripts: core.scripts,
mixed_script: core.mixed_script,
has_confusables: core.has_confusables,
canonical: core.canonical,
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum Platform {
Universal,
Windows,
Posix,
}
impl Platform {
#[must_use]
pub fn as_str(self) -> &'static str {
match self {
Platform::Universal => "universal",
Platform::Windows => "windows",
Platform::Posix => "posix",
}
}
}
impl std::fmt::Display for Platform {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
impl std::str::FromStr for Platform {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"universal" => Ok(Self::Universal),
"windows" => Ok(Self::Windows),
"posix" => Ok(Self::Posix),
_ => Err(Error::from(crate::ErrorRepr::InvalidPlatform {
got: s.to_owned(),
})),
}
}
}
pub fn sanitize_filename(
text: &str,
separator: &str,
max_length: usize,
platform: Platform,
lang: Option<&str>,
preserve_extension: bool,
) -> Result<String, Error> {
crate::filename::sanitize_filename(
text,
separator,
max_length,
platform.as_str(),
lang,
preserve_extension,
)
.map_err(Error::from)
}
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub struct EncodingDetection {
pub label: String,
pub confidence: f64,
}
#[must_use]
pub fn detect_encoding(bytes: &[u8]) -> EncodingDetection {
let (label, confidence) = crate::encoding::detect_encoding_impl(bytes);
EncodingDetection { label, confidence }
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub struct DecodedText {
pub text: String,
pub had_errors: bool,
}
pub fn decode_to_utf8(
bytes: &[u8],
encoding: Option<&str>,
min_confidence: f64,
strict: bool,
) -> Result<DecodedText, Error> {
crate::encoding::decode_to_utf8_impl(bytes, encoding, min_confidence, strict)
.map(|(text, had_errors)| DecodedText { text, had_errors })
.map_err(Error::from)
}
pub fn strip_log_injection<'a>(
text: &'a str,
replacement: &str,
keep_tab: bool,
) -> Result<Cow<'a, str>, Error> {
crate::log_injection::validate_log_replacement(replacement, keep_tab).map_err(Error::from)?;
Ok(crate::log_injection::strip_log_injection_str(
text,
replacement,
keep_tab,
))
}