#[allow(unused_imports, reason = "used by docs")]
use crate::ErrorKind;
use crate::transform::advanced::{MaybeMarkerCodepoint, TransformBufferError};
use crate::transform::buffer::{MaybeUninitSlice, SplitInitBuffer};
use bstr::BStr;
use buffer::MaybeUninitSliceExt;
use std::alloc::Layout;
use std::fmt::{Debug, Display, Formatter};
use std::mem::MaybeUninit;
pub mod advanced;
pub mod buffer;
mod options;
#[cfg(feature = "unstable-redundant")]
pub mod redundant;
pub use options::*;
pub type TransformCallback<'a> = &'a mut dyn FnMut(char) -> char;
#[derive(Clone, Debug, thiserror::Error)]
#[error("Insufficient space: Need room for {needed_space} elements, but only have space for {actual_space}")]
pub struct InsufficientSpaceError {
pub(crate) actual_space: usize,
pub(crate) needed_space: usize,
}
impl InsufficientSpaceError {
#[inline]
pub fn needed_space(&self) -> usize {
self.needed_space
}
}
#[derive(Clone)]
pub struct DecomposedChar {
codepoints: [char; advanced::MAX_DECOMPOSE_CHAR_LENGTH],
len: usize,
}
impl DecomposedChar {
#[inline]
pub fn iter(&self) -> std::iter::Copied<std::slice::Iter<'_, char>> {
<&Self as IntoIterator>::into_iter(self)
}
#[inline]
pub fn as_slice(&self) -> &[char] {
&self.codepoints[..self.len]
}
}
impl AsRef<[char]> for DecomposedChar {
#[inline]
fn as_ref(&self) -> &[char] {
self.as_slice()
}
}
impl Debug for DecomposedChar {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_list().entries(self.as_slice()).finish()
}
}
impl<'a> IntoIterator for &'a DecomposedChar {
type Item = char;
type IntoIter = std::iter::Copied<std::slice::Iter<'a, char>>;
#[inline]
fn into_iter(self) -> Self::IntoIter {
self.as_slice().iter().copied()
}
}
impl IntoIterator for DecomposedChar {
type Item = char;
type IntoIter = std::iter::Take<std::array::IntoIter<char, { advanced::MAX_DECOMPOSE_CHAR_LENGTH }>>;
#[inline]
fn into_iter(self) -> Self::IntoIter {
debug_assert!(self.len <= advanced::MAX_DECOMPOSE_CHAR_LENGTH);
self.codepoints.into_iter().take(self.len)
}
}
#[cfg_attr(feature = "inline-more", inline)] pub fn decompose_char(codepoint: char, options: &TransformOptions) -> Result<DecomposedChar, crate::Error> {
options.validate_utf8();
let mut buffer = MaybeUninit::<[MaybeMarkerCodepoint; advanced::MAX_DECOMPOSE_CHAR_LENGTH]>::uninit();
let buffer = MaybeUninitSliceExt::from_uninit_array_mut(&mut buffer);
match advanced::decompose_char(codepoint, buffer, options, None) {
Ok((init, _uninit)) => {
assert!(!init.is_empty(), "result has zero length");
#[cfg(debug_assertions)]
{
for value in &*init {
debug_assert!(value.to_char().is_ok(), "produced invalid codepoint");
}
}
let mut result = [0 as char; advanced::MAX_DECOMPOSE_CHAR_LENGTH];
assert_eq!(Layout::new::<MaybeMarkerCodepoint>(), Layout::new::<char>(),);
unsafe {
init.as_ptr()
.cast::<char>()
.copy_to_nonoverlapping(result.as_mut_ptr(), init.len());
}
Ok(DecomposedChar {
codepoints: result,
len: init.len(),
})
}
Err(TransformBufferError::InsufficientSpace(cause)) => {
unreachable!("{cause}")
}
Err(TransformBufferError::Other(cause)) => Err(cause),
}
}
#[cfg_attr(feature = "inline-more", inline)] pub fn decompose_buffer<'a>(
text: &BStr,
buffer: &'a mut MaybeUninitSlice<char>,
options: &TransformOptions,
func: Option<TransformCallback>,
) -> Result<SplitInitBuffer<'a, char>, TransformBufferError> {
options.validate_utf8();
assert_eq!(Layout::new::<char>(), Layout::new::<MaybeMarkerCodepoint>(),);
let buffer: &mut MaybeUninitSlice<MaybeMarkerCodepoint> = unsafe {
std::slice::from_raw_parts_mut(
buffer.as_mut_ptr().cast::<MaybeUninit<MaybeMarkerCodepoint>>(),
buffer.len(),
)
};
let (init, uninit) = advanced::decompose_buffer(text, buffer, options, func)?;
#[cfg(debug_assertions)]
{
for &val in &*init {
debug_assert!(val.to_char().is_ok(), "produced invalid codepoint");
}
}
unsafe {
Ok((
std::slice::from_raw_parts_mut(init.as_mut_ptr().cast::<char>(), init.len()),
std::slice::from_raw_parts_mut(uninit.as_mut_ptr().cast::<MaybeUninit<char>>(), uninit.len()),
))
}
}
#[inline]
pub fn map(text: impl AsRef<BStr>, options: &TransformOptions) -> Result<String, crate::Error> {
let mut buffer = String::new();
map_into(text.as_ref(), &mut buffer, options, None)?;
Ok(buffer)
}
#[inline]
pub fn map_into(
text: impl AsRef<BStr>,
dest: &mut String,
options: &TransformOptions,
func: Option<TransformCallback>,
) -> Result<(), crate::Error> {
options.validate_utf8();
let dest = unsafe { dest.as_mut_vec() };
advanced::map_into(text.as_ref(), dest, options, func)
}
#[cfg_attr(feature = "inline-more", inline)] pub fn normalize_utf32(items: &mut [char], options: &TransformOptions) -> Result<usize, crate::Error> {
let options = unsafe { options.to_ffi() };
assert_eq!(Layout::new::<MaybeMarkerCodepoint>(), Layout::new::<i32>());
let res_code = unsafe {
utf8proc_sys::utf8proc_normalize_utf32(items.as_mut_ptr().cast::<i32>(), items.len().cast_signed(), options)
};
if res_code < 0 {
Err(crate::Error::from_code(res_code))
} else {
let len = res_code.cast_unsigned();
assert!(len <= items.len());
Ok(len)
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum UnicodeNormalizationForm {
NFD,
NFC,
NFKD,
NFKC,
}
impl Display for UnicodeNormalizationForm {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{self:?}")
}
}
#[inline] pub fn normalize(text: &str, form: UnicodeNormalizationForm) -> Result<String, crate::Error> {
let mut options = TransformOptions::default();
match form {
UnicodeNormalizationForm::NFD => {
options.composition = Some(CompositionOptions::decompose());
}
UnicodeNormalizationForm::NFC => {
options.composition = Some(CompositionOptions::compose());
}
UnicodeNormalizationForm::NFKD => {
options.composition = Some(CompositionOptions::decompose());
options.compat = true;
}
UnicodeNormalizationForm::NFKC => {
options.composition = Some(CompositionOptions::compose());
options.compat = true;
}
}
map(text, &options)
}