1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#![no_std]

extern crate alloc;

mod arabic;
mod ffi;
mod global;
mod syriac;

use alloc::{
    borrow::{Cow, ToOwned},
    boxed::Box,
    vec::Vec,
};

#[derive(PartialEq, Eq, Hash, Debug, Clone, Copy)]
struct KashidaCandidate {
    /// where the candidate is
    breakpoint: usize,

    /// lower is better
    bp_priority: usize,
}

impl KashidaCandidate {
    fn new(breakpoint: usize, bp_priority: usize) -> Self {
        Self { breakpoint, bp_priority }
    }
}

/// Script to find Kashidas in. Only Arabic and Syriac for now.
/// Use Unknown to get the generic function.
#[non_exhaustive]
#[derive(Clone, Copy)]
pub enum Script {
    Arabic,
    Syriac,
    Unknown,
}

/// Main entry point.
///
/// Does not verify string is valid for the language chosen.
///
/// Returns a list of byte-positions to insert the Kashida in, sorted by priority.
///
/// Does not guarantee a stable ordering for the same string. However, all positions are guaranteed to be valid.
/// If a Kashida is suggested at a wrong position, please report the bug.
#[must_use]
pub fn find_kashidas(input: &str, script: Script) -> Box<[usize]> {
    match script {
        Script::Arabic => arabic::find_kashidas(input),
        Script::Syriac => syriac::find_kashidas(input),
        Script::Unknown => global::find_kashidas(input),
    }
}

/// Convenience function to place the kashidas you found into your string.
///
/// To be used after `find_kashidas`. Make sure the same text is passed to both,
/// and the output of the first function is used. Doesn't allocate if it does not
/// have to.
///
/// Uses U+0640 ARABIC TATWEEL, which is used for most connected scripts.
pub fn place_kashidas<'a>(
    text: &'a str,
    kashida_locs: &'_ [usize],
    kashida_count: usize,
) -> Cow<'a, str> {
    if kashida_count == 0 || kashida_locs.is_empty() {
        Cow::Borrowed(text)
    } else {
        let mut buffer = text.to_owned();
        let mut locs = kashida_locs.iter().cycle().take(kashida_count).collect::<Vec<_>>();
        locs.sort_unstable_by(|a, b| b.cmp(a));
        for kc in locs {
            buffer.insert(*kc, 'ـ'); // e.g. N'Ko uses a different character (U+07FA: NKO LAJANYALAN)
        }
        Cow::Owned(buffer)
    }
}