asca/
lib.rs

1mod trie;
2pub mod word;
3pub mod rule;
4pub mod error;
5mod alias;
6
7use indexmap::IndexMap;
8use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
9use serde::Deserialize;
10use std::collections::HashMap;
11use lazy_static::lazy_static;
12use wasm_bindgen::prelude::*;
13
14use alias :: Transformation;
15use trie  :: *;
16use word  :: { DiaMods, Diacritic, * };
17use error :: { ASCAError,  * };
18use rule  :: { trace::Change, BinMod, ModKind, Rule, RuleGroup };
19
20const CARDINALS_FILE: &str = include_str!("cardinals.json");
21const DIACRITIC_FILE: &str = include_str!("diacritics.json");
22lazy_static! {
23    static ref CARDINALS_MAP: IndexMap<String, Segment> = serde_json::from_str(CARDINALS_FILE).unwrap();
24    static ref DIACRITS: Vec<Diacritic> = {
25        // this seems very unnecessary, but I don't know enough about serde
26        // at least it works
27        #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserialize, Hash)]
28        pub enum DiaFeatType {
29            Root, Manner, Laryngeal, Place, Labial, Coronal, Dorsal, Pharyngeal, 
30            /*RUT*/ Consonantal, Sonorant, Syllabic,      
31            /*MAN*/ Continuant, Approximant, Lateral, Nasal, DelayedRelease, Strident, Rhotic, Click,          
32            /*LAR*/ Voice, SpreadGlottis, ConstrGlottis,   
33            /*LAB*/ Labiodental, Round,          
34            /*COR*/ Anterior, Distributed,     
35            /*DOR*/ Front, Back, High, Low, Tense, Reduced,        
36            /*PHR*/ AdvancedTongueRoot, RetractedTongueRoot, 
37        }
38        
39        #[derive(Deserialize)]
40        struct DT {
41            pub name: String,
42            pub diacrit: char,
43            pub prereqs: Option<HashMap<DiaFeatType, bool>>,
44            pub payload: Option<HashMap<DiaFeatType, bool>>,
45        }
46
47        impl DT {
48            pub fn hm_to_mod(&self, hm: &Option<HashMap<DiaFeatType, bool>>) -> DiaMods {
49                let mut args = DiaMods::new();
50                // if hm.is_none() {return args};
51                let Some(s) = hm else {return args};
52                for (key, value) in s.iter() {
53                    let x = *key as usize;
54                    match value {
55                        true =>{
56                            if x > 7 { args.feats[x - 8] = Some(ModKind::Binary(BinMod::Positive)) }
57                            else { args.nodes[x] = Some(ModKind::Binary(BinMod::Positive)) };
58                        },
59                        false => {
60                            if x > 7 { args.feats[x - 8] = Some(ModKind::Binary(BinMod::Negative)) } 
61                            else { args.nodes[x] = Some(ModKind::Binary(BinMod::Negative)) };
62                        }
63                    }
64                }
65                args
66            }
67
68            pub fn to_diacritic(&self) ->  Diacritic {
69                Diacritic { 
70                    name: self.name.clone(), 
71                    diacrit: self.diacrit, 
72                    prereqs: self.hm_to_mod(&self.prereqs), 
73                    payload: self.hm_to_mod(&self.payload)
74                }
75            }
76        }
77
78        let dt: Vec<DT> = serde_json::from_str(DIACRITIC_FILE).unwrap();
79
80        dt.iter().map(|x| x.to_diacritic()).collect()
81    };
82    static ref CARDINALS_VEC: Vec<String> = CARDINALS_MAP.keys().cloned().collect();
83    static ref CARDINALS_TRIE: Trie = {
84        let mut m = Trie::new();
85        CARDINALS_MAP.keys().for_each(|k| m.insert(k.as_str()));
86        m
87    };    
88}
89
90fn apply_rule_groups(rules: &[Vec<Rule>], phrases: &[Phrase]) -> Result<Vec<Phrase>, ASCAError> {
91    let mut transformed_phrases: Vec<Phrase> = Vec::with_capacity(phrases.len());
92
93    for phrase in phrases {
94        transformed_phrases.push(phrase.apply_all(rules)?);
95    }
96
97    Ok(transformed_phrases)
98}
99
100fn apply_rules_trace(rules: &[Vec<Rule>], phrase: &Phrase) -> Result<Vec<Change>, ASCAError> {   
101    let mut changes: Vec<Change> = Vec::new();
102
103    let mut res_phrase = phrase.clone();
104    for (i, rule_group) in rules.iter().enumerate() {
105        let res_before = res_phrase.clone();
106        for rule in rule_group {
107            res_phrase = rule.apply(res_phrase)?;
108        }
109        if res_phrase != res_before {
110            changes.push(Change { rule_index: i, after: res_phrase.clone() });
111        }
112    }
113
114    Ok(changes)
115}
116
117fn phrases_to_string(phrases: Vec<Phrase>, alias_from: Vec<Transformation>) -> (Vec<String>, Vec<Segment>) {
118    let mut res = Vec::with_capacity(phrases.len());
119    let mut unknowns = Vec::new();
120
121    let mut phr_res = String::with_capacity(1);
122    for phrase in phrases {
123        phr_res.clear();
124        // let mut phr_res = String::with_capacity(phrase.len());
125        for word in phrase.iter() {
126            let (w, u) = word.render_debug(&alias_from);
127            phr_res.push(' ');
128            phr_res.push_str(&w);
129            unknowns.extend(u);
130        }
131        res.push(phr_res.trim().to_string());
132    }
133
134    (res, unknowns)
135}
136
137fn parse_rule_groups(unparsed_rule_groups: &[RuleGroup]) -> Result<Vec<Vec<Rule>>, RuleSyntaxError> {
138    unparsed_rule_groups.iter().enumerate().map(|(rgi, rg)| {
139        rg.rule.iter().enumerate().filter_map(|(ri, r)| {
140            match rule::Lexer::new(&r.chars().collect::<Vec<_>>(), rgi, ri).get_line() {
141                Ok(tokens) => {
142                    match rule::Parser::new(tokens, rgi, ri).parse() {
143                        Ok(rule) => rule.map(Ok),
144                        Err(e) => Some(Err(e)),
145                    }
146                }
147                Err(e) => Some(Err(e)),
148            }
149        }).collect::<Result<Vec<Rule>, RuleSyntaxError>>()
150    }).collect::<Result<Vec<Vec<Rule>>, RuleSyntaxError>>()
151}
152
153pub fn run_unparsed(unparsed_rules: &[RuleGroup], unparsed_phrases: &[String], unparsed_into: &[String], unparsed_from: &[String]) -> Result<Vec<String>, ASCAError> {
154    let rules = parse_rule_groups(unparsed_rules)?;
155    let alias_into = alias::parse_into(unparsed_into)?;
156    let alias_from = alias::parse_from(unparsed_from)?;
157
158    unparsed_phrases.iter().map(|up| {
159        let phrase = match up.split(' ').map(|w| Word::new(w, &alias_into)).collect::<Result<Phrase, _>>() {
160            Ok(ph) => ph,
161            Err(e) => return Err(e),
162        };
163
164        let applied = match phrase.apply_all(&rules) {
165            Ok(ap) => ap,
166            Err(e) => return Err(e),
167        };
168
169        Ok(applied.iter().map(|word| word.render(&alias_from)).collect::<Vec<_>>().join(" "))
170    }).collect()
171}
172
173type ParsedPhrases = Vec<String>;
174type OutputPhrases = Vec<String>;
175type UnknownSegments = Vec<String>;
176
177pub fn run_unparsed_debug(unparsed_rules: &[RuleGroup], unparsed_phrases: &[String], unparsed_into: &[String], unparsed_from: &[String]) -> Result<(ParsedPhrases, OutputPhrases, UnknownSegments), ASCAError> {
178    let rules = parallel_parse_rule_groups(unparsed_rules)?;
179    let alias_into = alias::parse_into(unparsed_into)?;
180    let alias_from = alias::parse_from(unparsed_from)?;
181    
182    let (input, applied) = unparsed_phrases.par_iter().map(|up| {
183        let phrase = match up.trim_end().split(' ').map(|w| Word::new(w, &alias_into)).collect::<Result<Phrase, _>>() {
184            Ok(ph) => ph,
185            Err(e) => return Err(e),
186        };
187
188        let rendered_input = phrase.iter().map(|word| word.render(&[])).collect::<Vec<_>>().join(" ");
189
190        let applied = match phrase.apply_all(&rules) {
191            Ok(ap) => ap,
192            Err(e) => return Err(e),
193        };
194
195        Ok((rendered_input, applied))
196    }).collect::<Result<(Vec<String>, Vec<Phrase>), ASCAError>>()?;
197
198    let (output, unknowns) = phrases_to_string(applied, alias_from);
199
200    let unknowns = unknowns.iter().map(|seg| format!("{seg:?}")).collect();
201
202    Ok((input, output, unknowns))
203}
204
205pub fn par_run_unparsed(unparsed_rules: &[RuleGroup], unparsed_phrases: &[String], unparsed_into: &[String], unparsed_from: &[String]) -> Result<Vec<String>, ASCAError> {
206    let rules = parallel_parse_rule_groups(unparsed_rules)?;
207    let alias_into = alias::parse_into(unparsed_into)?;
208    let alias_from = alias::parse_from(unparsed_from)?;
209
210    unparsed_phrases.par_iter().map(|up| {
211        let phrase = match up.trim_end().split(' ').map(|w| Word::new(w, &alias_into)).collect::<Result<Phrase, _>>() {
212            Ok(ph) => ph,
213            Err(e) => return Err(e),
214        };
215
216        let applied = match phrase.apply_all(&rules) {
217            Ok(ap) => ap,
218            Err(e) => return Err(e),
219        };
220
221        Ok(applied.iter().map(|word| word.render(&alias_from)).collect::<Vec<_>>().join(" "))
222    }).collect()
223}
224
225fn parallel_parse_rule_groups(unparsed_rule_groups: &[RuleGroup]) -> Result<Vec<Vec<Rule>>, RuleSyntaxError> {
226    unparsed_rule_groups.par_iter().enumerate().map(|(rgi, rg)| {
227        rg.rule.par_iter().enumerate().filter_map(|(ri, r): (usize, &String)| {
228            match rule::Lexer::new(&r.chars().collect::<Vec<_>>(), rgi, ri).get_line() {
229                Ok(tokens) => {
230                    match rule::Parser::new(tokens, rgi, ri).parse() {
231                        Ok(rule) => rule.map(Ok),
232                        Err(e) => Some(Err(e)),
233                    }
234                }
235                Err(e) => Some(Err(e)),
236            }
237        }).collect::<Result<Vec<Rule>, RuleSyntaxError>>()
238    }).collect::<Result<Vec<Vec<Rule>>, RuleSyntaxError>>()
239}
240
241// For interop with WebASCA
242
243#[doc(hidden)]
244#[wasm_bindgen]
245pub struct WasmResult {
246    input: Vec<String>,         // parsed input
247    output: Vec<String>,
248    unknowns: Vec<String>,      // Any segments that were unable to be parsed
249    trace_rules: Vec<usize>,    // Indices of rules which were applied
250    was_ok: bool                // Did we error, or succeed?
251}
252
253#[wasm_bindgen]
254impl WasmResult {
255    pub fn get_input(&self) -> Vec<String> {
256        self.input.clone()
257    }
258
259    pub fn get_output(&self) -> Vec<String> {
260        self.output.clone()
261    }
262
263    pub fn get_unknowns(&self) -> Vec<String> {
264        self.unknowns.clone()
265    }
266
267    pub fn get_traces(&self) -> Vec<usize> {
268        self.trace_rules.clone()
269    }
270
271    pub fn was_ok(&self) -> bool {
272        self.was_ok
273    }
274}
275
276#[doc(hidden)]
277#[wasm_bindgen]
278pub fn run_wasm(val: JsValue, unparsed_phrases: Vec<String>, unparsed_into: Vec<String>, unparsed_from: Vec<String>, trace_index: Option<usize>) -> WasmResult {
279    let unparsed_rules: Vec<RuleGroup> = serde_wasm_bindgen::from_value(val).expect("Rules are in valid JSObject format");
280    
281    match trace_index {
282        Some(ti) => match run_trace_wasm(&unparsed_rules, &unparsed_phrases, &unparsed_into, ti) {
283            Ok((input, output, unknowns, trace_rules)) => WasmResult { input: vec![input], output, unknowns, trace_rules, was_ok: true },
284            Err(e) => parse_error_web(&e, &unparsed_rules, &unparsed_into, &unparsed_from, &unparsed_phrases),
285        }
286        None => match run_unparsed_debug(&unparsed_rules, &unparsed_phrases, &unparsed_into, &unparsed_from) {
287            Ok((input, output, unknowns)) => WasmResult { input, output, unknowns, trace_rules: vec![], was_ok: true },
288            Err(e) => parse_error_web(&e, &unparsed_rules, &unparsed_into, &unparsed_from, &unparsed_phrases),
289        }
290    }
291}
292
293#[inline]
294fn get_trace_phrase(unparsed_phrases: &[String], alias_into: &[String], trace_index: usize) -> Result<Option<Phrase>, ASCAError> {
295    match unparsed_phrases.get(trace_index) {
296        Some(phrase) => Ok(Some(Phrase::try_from(phrase, alias_into)?)),
297        None => Ok(None),
298    }
299}
300
301
302type RuleIndices = Vec<usize>;
303
304fn run_trace_wasm(unparsed_rules: &[RuleGroup], unparsed_phrase: &[String], alias_into: &[String], trace_index: usize) -> Result<(String, Vec<String>, UnknownSegments, RuleIndices), ASCAError> {
305    let rules = parallel_parse_rule_groups(unparsed_rules)?;
306    let phrase = get_trace_phrase(unparsed_phrase, alias_into, trace_index)?.unwrap_or_default();
307    let res = apply_rules_trace(&rules, &phrase)?;
308    
309    let rendered_input = phrase.iter().map(|word| word.render(&[])).collect::<Vec<_>>().join(" ");
310    
311    let (output, unknowns, rule_indices) = rule::trace::to_string_wasm(&phrase, res, unparsed_rules);
312
313    Ok((rendered_input, output, unknowns, rule_indices))
314}
315
316fn parse_error_web(err: &ASCAError, unparsed_rules: &[RuleGroup], unparsed_into: &[String], unparsed_from: &[String], unparsed_phrases: &[String]) -> WasmResult {
317    let output = match err {
318        ASCAError::WordSyn(e) => e.format(),
319        ASCAError::AliasSyn(e) => e.format(unparsed_into, unparsed_from),
320        ASCAError::AliasRun(e) => e.format(unparsed_into, unparsed_from),
321        ASCAError::RuleSyn(e) => e.format(unparsed_rules),
322        ASCAError::RuleRun(e) => e.format(unparsed_rules),
323    };
324
325    WasmResult { input: unparsed_phrases.to_vec(), output: vec![output], unknowns: vec![], trace_rules: vec![], was_ok: false }
326}