vidyut_chandas/
akshara.rs

1use crate::sounds;
2
3/// The weight of an akshara.
4#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
5pub enum Weight {
6    /// A heavy syllable.
7    G,
8    /// A light syllable.
9    L,
10}
11
12/// A Sanskrit syllable.
13///
14/// An akshara follows the following rules:
15///
16/// - It must contain exactly one vowel.
17/// - It must end with a vowel, an anusvara, or a visarga.
18/// - It must not start with an anusvara or visarga.
19///
20/// Together, these three rurles mean that an input string has exactly one division into aksharas.
21#[derive(Debug, Clone, PartialEq)]
22pub struct Akshara {
23    pub(crate) text: String,
24    pub(crate) weight: Weight,
25}
26
27impl Akshara {
28    /// Creates a new akshara.
29    ///
30    /// This function assumes that `text` contains exactly one vowel.
31    pub(crate) fn new(text: String, weight: Weight) -> Self {
32        Self { text, weight }
33    }
34
35    /// The text of this akshara.
36    pub fn text(&self) -> &str {
37        &self.text
38    }
39
40    /// The weight of this akshara.
41    pub fn weight(&self) -> Weight {
42        self.weight
43    }
44
45    /// The length of this akshara in matras.
46    pub fn num_matras(&self) -> usize {
47        match self.weight {
48            Weight::L => 1,
49            Weight::G => 2,
50        }
51    }
52}
53
54/// Scans the given string into aksharas.
55///
56/// Any text that is not a valid Sanskrit sound in SLP1 will be ignored.
57pub fn scan_line(text: impl AsRef<str>) -> Vec<Akshara> {
58    let mut akshara_strs = Vec::new();
59    let mut cur = String::new();
60    for c in text.as_ref().chars() {
61        if sounds::is_hal(c) {
62            cur.push(c);
63        } else if sounds::is_ac(c) {
64            // Each akshara has exactly one vowel.
65            cur.push(c);
66            // Generally, a vowel ends an akshara.
67            akshara_strs.push(cur.clone());
68            cur.clear();
69        } else if matches!(c, 'M' | 'H') {
70            // Add to the end of the previous akshara.
71            if let Some(prev) = akshara_strs.last_mut() {
72                prev.push(c);
73            }
74        }
75
76        // Skip all other punctuation, spaces, etc.
77        // TODO: consider including nasal vowels and accent
78    }
79
80    if !cur.is_empty() {
81        if cur.chars().any(sounds::is_ac) {
82            // Case 1: push new syllable
83            akshara_strs.push(cur);
84        } else if let Some(last) = akshara_strs.last_mut() {
85            // Case 2: extend old syllable
86            last.push_str(&cur);
87        }
88    }
89
90    akshara_strs
91        .iter()
92        .enumerate()
93        .map(|(i, cur)| {
94            let next_is_samyogadi = if let Some(next) = akshara_strs.get(i + 1) {
95                sounds::is_samyogadi(next)
96            } else {
97                false
98            };
99
100            let weight = if !cur.ends_with(sounds::is_hrasva) || next_is_samyogadi {
101                Weight::G
102            } else {
103                Weight::L
104            };
105            Akshara::new(cur.to_string(), weight)
106        })
107        .collect()
108}
109
110/// Scans the given multi-line string into aksharas.
111///
112/// Any text that is not a valid Sanskrit sound in SLP1 will be ignored.
113pub fn scan_lines<'a>(lines: impl Iterator<Item = &'a str>) -> Vec<Vec<Akshara>> {
114    use sounds::{is_hal, is_sanskrit};
115
116    let clean_lines: Vec<_> = lines
117        .map(|line| line.trim())
118        .filter(|line| !line.is_empty())
119        .collect();
120
121    let mut ret = Vec::new();
122    for (i, line) in clean_lines.iter().enumerate() {
123        let mut scan = scan_line(line);
124        if scan.is_empty() {
125            continue;
126        }
127
128        // If the first sound of the next line is heavy and in contact with this line, make the
129        // last akshara of `scan` heavy.
130        if let Some(next) = clean_lines.get(i + 1) {
131            let touches_next = line.ends_with(is_sanskrit) && next.starts_with(is_sanskrit);
132            if touches_next
133                && (sounds::is_samyogadi(next)
134                    || (line.ends_with(is_hal) && next.starts_with(is_hal)))
135            {
136                scan.last_mut().expect("checked non-empty").weight = Weight::G;
137            }
138        }
139
140        ret.push(scan);
141    }
142    ret
143}
144
145#[cfg(test)]
146mod tests {
147    use super::*;
148    use Weight::*;
149
150    fn strings(aksharas: &Vec<Akshara>) -> Vec<String> {
151        aksharas.iter().map(|x| x.text.clone()).collect()
152    }
153
154    fn weights(aksharas: &Vec<Akshara>) -> Vec<Weight> {
155        aksharas.iter().map(|x| x.weight).collect()
156    }
157
158    #[test]
159    fn test_akshara() {
160        let laghu = Akshara::new("ta".to_string(), L);
161        assert_eq!(laghu.num_matras(), 1);
162    }
163
164    #[test]
165    fn test_scan_line_to_text() {
166        let akshara_text = |text: &str| -> Vec<String> { strings(&scan_line(text)) };
167
168        // Basic cases.
169        for text in &["a", "ma", "am", "mam", "aH", "aM"] {
170            assert_eq!(akshara_text(text), vec![text.to_string()]);
171        }
172
173        // Each vowel should be its own akshara.
174        assert_eq!(akshara_text("aaaa"), vec!["a", "a", "a", "a"]);
175
176        // I'm not sure how to handle invalid text. For now, ignore it.
177        for text in &["1", " ", "!", "M", "H", "k"] {
178            assert_eq!(akshara_text(text), Vec::<String>::new());
179        }
180
181        assert_eq!(
182            akshara_text("agnimILe purohitaM yajYasya devamftvijam"),
183            vec![
184                "a", "gni", "mI", "Le", "pu", "ro", "hi", "taM", "ya", "jYa", "sya", "de", "va",
185                "mf", "tvi", "jam"
186            ]
187        );
188    }
189
190    #[test]
191    fn test_scan_line_to_weights() {
192        let akshara_weights = |text: &str| -> Vec<Weight> { weights(&scan_line(text)) };
193
194        assert_eq!(
195            akshara_weights("vAgarTAviva sampfktO"),
196            vec![G, G, G, L, L, G, G, G]
197        );
198
199        assert_eq!(
200            akshara_weights("mAtaH samastajagatAM maDukEwaBAre"),
201            vec![G, G, L, G, L, L, L, G, L, L, G, L, G, G]
202        );
203        assert_eq!(
204            akshara_weights("yakzaScakre janakatanayAsnAnapuRyodakezu"),
205            vec![G, G, G, G, L, L, L, L, L, G, G, L, G, G, L, G, L]
206        );
207    }
208
209    #[test]
210    fn test_scan_lines() {
211        let scan = scan_lines(
212            "vAgarTAviva saMpfktO
213                vAgarTapratipattaye .
214                jagataH pitarO vande
215                pArvatIparameSvarO .. 1 .."
216                .lines(),
217        );
218        assert_eq!(
219            strings(&scan[0]),
220            vec!["vA", "ga", "rTA", "vi", "va", "saM", "pf", "ktO"]
221        );
222        assert_eq!(weights(&scan[0]), vec![G, G, G, L, L, G, G, G]);
223        assert_eq!(
224            strings(&scan[1]),
225            vec!["vA", "ga", "rTa", "pra", "ti", "pa", "tta", "ye"]
226        );
227        assert_eq!(weights(&scan[1]), vec![G, G, G, L, L, G, L, G]);
228        assert_eq!(
229            strings(&scan[2]),
230            vec!["ja", "ga", "taH", "pi", "ta", "rO", "va", "nde"]
231        );
232        assert_eq!(weights(&scan[2]), vec![L, L, G, L, L, G, G, G]);
233        assert_eq!(
234            strings(&scan[3]),
235            vec!["pA", "rva", "tI", "pa", "ra", "me", "Sva", "rO"]
236        );
237        assert_eq!(weights(&scan[3]), vec![G, L, G, L, L, G, L, G]);
238    }
239
240    #[test]
241    fn test_scan_lines_with_hrasva_weight_change() {
242        let scan = scan_lines("ASramezu".lines());
243        assert_eq!(weights(&scan[0]), vec![G, L, G, L]);
244
245        // Last syllable of `ASramezu` becomes guru due to following samyoga.
246        let scan = scan_lines("ASramezu\nsnigDa".lines());
247        assert_eq!(weights(&scan[0]), vec![G, L, G, G]);
248
249        // Last syllable of `ASramezu` stays laghu.
250        let scan = scan_lines("ASramezu\ntasya".lines());
251        assert_eq!(weights(&scan[0]), vec![G, L, G, L]);
252    }
253
254    #[test]
255    fn test_scan_block_with_laghu_weight_change() {
256        let scan = scan_lines("anIkam".lines());
257        assert_eq!(weights(&scan[0]), vec![L, G, G]);
258
259        // Last syllable of `anIkam` becomes guru due to following samyoga.
260        let scan = scan_lines("anIkam\nvyUQam".lines());
261        assert_eq!(weights(&scan[0]), vec![L, G, G]);
262
263        // Last syllable of `anIka` stays laghu due to following vowel.
264        // TODO: this is buggy.
265        // let scan = scan_block("anIkam\neva");
266        // assert_eq!(weights(&scan[0]), vec![L, G, L]);
267    }
268}