1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
#![no_std]
extern crate alloc;
mod arabic;
mod ffi;
mod global;
mod syriac;
use alloc::{
borrow::{Cow, ToOwned},
boxed::Box,
vec::Vec,
};
#[derive(PartialEq, Eq, Hash, Debug, Clone, Copy)]
struct KashidaCandidate {
/// where the candidate is
breakpoint: usize,
/// lower is better
bp_priority: usize,
}
impl KashidaCandidate {
fn new(breakpoint: usize, bp_priority: usize) -> Self {
Self { breakpoint, bp_priority }
}
}
/// Script to find Kashidas in. Only Arabic and Syriac for now.
/// Use Unknown to get the generic function.
#[non_exhaustive]
#[derive(Clone, Copy)]
pub enum Script {
Arabic,
Syriac,
Unknown,
}
/// Main entry point.
///
/// Does not verify string is valid for the language chosen.
///
/// Returns a list of byte-positions to insert the Kashida in, sorted by priority.
///
/// Does not guarantee a stable ordering for the same string. However, all positions are guaranteed to be valid.
/// If a Kashida is suggested at a wrong position, please report the bug.
#[must_use]
pub fn find_kashidas(input: &str, script: Script) -> Box<[usize]> {
match script {
Script::Arabic => arabic::find_kashidas(input),
Script::Syriac => syriac::find_kashidas(input),
Script::Unknown => global::find_kashidas(input),
}
}
/// Convenience function to place the kashidas you found into your string.
///
/// To be used after `find_kashidas`. Make sure the same text is passed to both,
/// and the output of the first function is used. Doesn't allocate if it does not
/// have to.
///
/// Uses U+0640 ARABIC TATWEEL, which is used for most connected scripts.
pub fn place_kashidas<'a>(
text: &'a str,
kashida_locs: &'_ [usize],
kashida_count: usize,
) -> Cow<'a, str> {
if kashida_count == 0 || kashida_locs.is_empty() {
Cow::Borrowed(text)
} else {
let mut buffer = text.to_owned();
let mut locs = kashida_locs.iter().cycle().take(kashida_count).collect::<Vec<_>>();
locs.sort_unstable_by(|a, b| b.cmp(a));
for kc in locs {
buffer.insert(*kc, 'ـ'); // e.g. N'Ko uses a different character (U+07FA: NKO LAJANYALAN)
}
Cow::Owned(buffer)
}
}