#![no_std]
#![allow(unused_unsafe)]
extern crate alloc;
mod arabic;
mod ffi;
mod global;
mod syriac;
use alloc::{
borrow::{Cow, ToOwned},
boxed::Box,
vec::Vec,
};
struct SegmentationDataProvider;
mod private {
include!("../segmentation_data/mod.rs");
impl_data_provider!(super::SegmentationDataProvider);
}
fn get_segmenters() -> (icu_segmenter::WordSegmenter, icu_segmenter::GraphemeClusterSegmenter) {
let word_segmenter = icu_segmenter::WordSegmenter::try_new_for_non_complex_scripts_unstable(
&SegmentationDataProvider,
Default::default(),
)
.unwrap();
let grapheme_segmenter =
icu_segmenter::GraphemeClusterSegmenter::try_new_unstable(&SegmentationDataProvider)
.unwrap();
(word_segmenter, grapheme_segmenter)
}
#[derive(PartialEq, Eq, Hash, Debug, Clone, Copy)]
struct KashidaCandidate {
breakpoint: usize,
bp_priority: usize,
}
impl KashidaCandidate {
fn new(breakpoint: usize, bp_priority: usize) -> Self {
Self { breakpoint, bp_priority }
}
}
#[non_exhaustive]
#[derive(Clone, Copy)]
pub enum Script {
Arabic,
Syriac,
Unknown,
}
#[must_use]
pub fn find_kashidas(input: &str, script: Script) -> Box<[usize]> {
match script {
Script::Arabic => arabic::find_kashidas(input),
Script::Syriac => syriac::find_kashidas(input),
Script::Unknown => global::find_kashidas(input),
}
}
#[must_use]
pub fn place_kashidas<'a>(
text: &'a str,
kashida_locs: &'_ [usize],
kashida_count: usize,
) -> Cow<'a, str> {
if kashida_count == 0 || kashida_locs.is_empty() {
Cow::Borrowed(text)
} else {
let mut buffer = text.to_owned();
let mut locs = kashida_locs.iter().cycle().take(kashida_count).collect::<Vec<_>>();
locs.sort_unstable_by(|a, b| b.cmp(a));
for kc in locs {
buffer.insert(*kc, 'ـ'); }
Cow::Owned(buffer)
}
}
#[cfg(test)]
mod tests {
use super::*;
use alloc::vec;
#[test]
fn basmala_placement() {
let input = "بسم الله الرحمن الرحيم";
let candidates = crate::arabic::find_kashidas(input);
let output = place_kashidas(input, &candidates, 25);
assert_eq!(candidates, vec![4, 37, 26].into_boxed_slice());
assert_eq!(output, "بســـــــــم الله الرحمــــــــن الرحــــــــيم");
}
}