name_engine/
lib.rs

1/*
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
5 */
6
7use std::collections::HashMap;
8use thiserror::Error;
9
10use sorted_vec::{SortedVec, SortedVecBuilder};
11
12/// The letter of the syllable
13type Letter = String;
14/// The phonetic representation of the letter
15type Phonics = String;
16/// The parameter of syllables that can be the next syllable or not
17type ToRestore = bool;
18
19mod sorted_vec;
20
21#[derive(Error, Debug)]
22pub enum NameError {
23    #[error("empty string detected")]
24    EmptyString,
25}
26
27struct PhoneticConnectionBuilder {
28    conn: HashMap<char, HashMap<char, usize>>,
29}
30
31impl PhoneticConnectionBuilder {
32    fn new() -> PhoneticConnectionBuilder {
33        PhoneticConnectionBuilder {
34            conn: HashMap::new(),
35        }
36    }
37
38    fn add_char_pair(&mut self, incoming_char: char, outgoing_char: char) {
39        self.conn
40            .entry(incoming_char)
41            .or_default()
42            .entry(outgoing_char)
43            .and_modify(|e| *e += 1)
44            .or_insert(1);
45    }
46
47    fn build(self) -> PhoneticConnection {
48        let mut builder = SortedVecBuilder::new();
49        self.conn.iter().for_each(|(k, v)| {
50            let mut sum = 0;
51            for v2 in v.values() {
52                sum += v2;
53            }
54
55            let mut v = v.iter().collect::<Vec<(&char, &usize)>>();
56            v.sort_by(|a, b| a.0.cmp(b.0));
57
58            let mut set = SortedVecBuilder::new();
59            let mut prop = 0.0;
60
61            for (k2, v2) in v {
62                prop += *v2 as f64 / sum as f64;
63                set.push(prop, *k2);
64            }
65
66            let set = set.build();
67            builder.push(*k, set);
68        });
69
70        PhoneticConnection {
71            conn: builder.build(),
72        }
73    }
74}
75
76struct PhoneticConnection {
77    conn: SortedVec<char, SortedVec<f64, char>>,
78}
79
80impl PhoneticConnection {
81    fn extract_forward(&self, character: char, prop: f64) -> char {
82        let found = {
83            let set = &self.conn.find(character).1;
84            let found = set.find(prop);
85            found
86        };
87        found.1
88    }
89}
90
91/// The struct that represents the name.
92///  names are composed of syllables, and each syllable has a letter as `Letter`, and a phonetic representation as `Phonics`.
93///
94/// Example: Bedford -> Name::new(vec![("bed", "ˈbɛd"), ("ford", "fərd")])
95#[derive(Debug)]
96pub struct Name {
97    syllables: Vec<(Letter, Phonics)>,
98}
99
100impl Name {
101    pub fn new(syllables: Vec<(&str, &str)>) -> Result<Self, NameError> {
102        Self::from_string(
103            syllables
104                .iter()
105                .map(|(k, r)| (k.to_string(), r.to_string()))
106                .collect(),
107        )
108    }
109
110    pub fn from_string(syllables: Vec<(String, String)>) -> Result<Self, NameError> {
111        for syllable in &syllables {
112            if syllable.1.is_empty() {
113                return Err(NameError::EmptyString);
114            }
115        }
116        Ok(Self { syllables })
117    }
118
119    fn connection_pairs(&self) -> Vec<(char, char)> {
120        let mut pairs = vec![];
121        for i in 0..self.syllables.len() - 1 {
122            pairs.push((
123                self.syllables[i].1.chars().last().unwrap(),
124                self.syllables[i + 1].1.chars().next().unwrap(),
125            ))
126        }
127        pairs
128    }
129
130    fn last_char_of_syllable(&self, i: usize) -> char {
131        self.syllables[i].1.chars().last().unwrap()
132    }
133
134    pub fn content(&self) -> Letter {
135        self.syllables.iter().map(|p| p.0.clone()).collect()
136    }
137
138    pub fn script(&self) -> Phonics {
139        self.syllables.iter().map(|p| p.1.clone()).collect()
140    }
141
142    pub fn syllables(&self) -> &Vec<(Letter, Phonics)> {
143        &self.syllables
144    }
145}
146
147/// The builder for the NameGenerator.
148pub struct NameGeneratorBuilder {
149    names: Vec<Name>,
150}
151
152impl Default for NameGeneratorBuilder {
153    fn default() -> Self {
154        Self::new()
155    }
156}
157
158impl NameGeneratorBuilder {
159    pub fn new() -> Self {
160        Self { names: vec![] }
161    }
162
163    pub fn add_name(mut self, name: Name) -> Self {
164        self.names.push(name);
165        self
166    }
167
168    pub fn bulk_add_names(mut self, names: Vec<Name>) -> Self {
169        self.names.extend(names);
170        self
171    }
172
173    pub fn build(self) -> NameGenerator {
174        let mut conn_builder = PhoneticConnectionBuilder::new();
175        let mut outgoing_tree = HashMap::new();
176        let mut incoming_syllables = vec![];
177        let mut outgoing_syllables = vec![];
178        self.names.iter().enumerate().for_each(|(ipn, name)| {
179            name.connection_pairs()
180                .iter()
181                .enumerate()
182                .for_each(|(ipc, pair)| {
183                    conn_builder.add_char_pair(pair.0, pair.1);
184                    if ipc == 0 {
185                        incoming_syllables.push((ipn, ipc));
186                    }
187                    let to_restore = ipc + 1 != name.syllables.len() - 1;
188                    outgoing_syllables.push((ipn, ipc + 1, to_restore));
189                    outgoing_tree
190                        .entry(pair.1)
191                        .and_modify(|v: &mut Vec<usize>| v.push(outgoing_syllables.len() - 1))
192                        .or_insert(vec![outgoing_syllables.len() - 1]);
193                });
194        });
195
196        NameGenerator {
197            names: self.names,
198            incoming_syllables,
199            outgoing_syllables,
200            outgoing_tree,
201            conn: conn_builder.build(),
202        }
203    }
204}
205
206/// The generator for the names.
207pub struct NameGenerator {
208    // list of the names
209    names: Vec<Name>,
210    // syllables that can be the first syllable
211    incoming_syllables: Vec<(usize, usize)>,
212    // syllables that can be the next syllable
213    outgoing_syllables: Vec<(usize, usize, ToRestore)>,
214    // list of the index of the outgoing_syllables which has the same first character
215    outgoing_tree: HashMap<char, Vec<usize>>,
216    // phonetic connection between the last character of the previous syllable and the first character of the next syllable
217    conn: PhoneticConnection,
218}
219
220/// The detailed information of the syllables.
221#[derive(Debug)]
222pub struct SyllableInfo {
223    /// The index of the name in the dataset
224    pub name_index: usize,
225    /// The index of the syllable in the name
226    pub syllable_index: usize,
227}
228
229impl NameGenerator {
230    /// Generate a name with detailed information of the syllables.
231    /// Random number generator is required as argument `rand_fn`.
232    pub fn generate_verbose(
233        &self,
234        mut rand_fn: impl FnMut() -> f64,
235    ) -> (Letter, Phonics, Vec<SyllableInfo>) {
236        let query_next = |incoming_syllable: (usize, usize), p0: f64, p1: f64| {
237            let connection_syllable = self.conn.extract_forward(
238                self.names[incoming_syllable.0].last_char_of_syllable(incoming_syllable.1),
239                p0,
240            );
241            let outgoing_syllable_list = &self.outgoing_tree[&connection_syllable];
242            &self.outgoing_syllables
243                [outgoing_syllable_list[(p1 * outgoing_syllable_list.len() as f64) as usize]]
244        };
245
246        let incoming_syllable =
247            &self.incoming_syllables[(rand_fn() * self.incoming_syllables.len() as f64) as usize];
248        let mut syllables_vec = vec![(incoming_syllable.0, incoming_syllable.1)];
249
250        let mut restore_flag = true;
251        while restore_flag {
252            let (k, r, to_restore) =
253                query_next(syllables_vec[syllables_vec.len() - 1], rand_fn(), rand_fn());
254            syllables_vec.push((*k, *r));
255            restore_flag = *to_restore;
256        }
257
258        let syllable_info = syllables_vec
259            .iter()
260            .map(|p| SyllableInfo {
261                name_index: p.0,
262                syllable_index: p.1,
263            })
264            .collect::<Vec<SyllableInfo>>();
265
266        let content = syllables_vec
267            .iter()
268            .map(|p| self.names[p.0].syllables[p.1].0.clone())
269            .collect::<Vec<Letter>>()
270            .join("");
271        let script = syllables_vec
272            .iter()
273            .map(|p| self.names[p.0].syllables[p.1].1.clone())
274            .collect::<Vec<Phonics>>()
275            .join("");
276
277        (content, script, syllable_info)
278    }
279
280    /// Generate a name.
281    /// Random number generator is required as argument `rand_fn`.
282    pub fn generate(&self, rand_fn: impl FnMut() -> f64) -> (Letter, Phonics) {
283        let (content, script, _) = self.generate_verbose(rand_fn);
284        (content, script)
285    }
286
287    /// Get the list of the names as reference
288    pub fn names(&self) -> &Vec<Name> {
289        &self.names
290    }
291}