1use crate::common;
2use crate::error::Error;
3use crate::file_writer;
4use crate::generators::CodeGen;
5use crate::ucd_parsers;
6use std::collections::HashSet;
7use std::fs::File;
8use std::path::{Path, PathBuf};
9use ucd_parse::Codepoints;
10use ucd_parse::CoreProperty;
11use ucd_parse::Property;
12use ucd_parse::Script;
13use ucd_parse::UnicodeDataDecompositionTag;
14use ucd_parsers::DerivedJoiningType;
15use ucd_parsers::HangulSyllableType;
16
17fn parse_unicode_file<U: ucd_parse::UcdFile, F>(path: &Path, mut f: F) -> Result<(), Error>
18where
19 F: FnMut(&U) -> Result<(), Error>,
20{
21 let lines: Vec<U> = ucd_parse::parse(path)?;
22 for line in lines.iter() {
23 f(line)?;
24 }
25 Ok(())
26}
27
28pub struct UcdFileGen {
30 ucd_path: PathBuf,
31 generators: Vec<Box<dyn UcdCodeGen>>,
32}
33
34impl UcdFileGen {
35 pub fn new<P: AsRef<Path>>(path: P) -> Self {
39 let path = path.as_ref();
40 Self {
41 ucd_path: path.to_path_buf(),
42 generators: Vec::new(),
43 }
44 }
45
46 pub fn add(&mut self, gen: Box<dyn UcdCodeGen>) {
48 self.generators.push(gen);
49 }
50}
51
52impl CodeGen for UcdFileGen {
53 fn generate_code(&mut self, file: &mut File) -> Result<(), Error> {
54 let it = self.generators.iter_mut();
55 for gen in it {
56 gen.parse_unicode_file(&self.ucd_path)?;
57 gen.generate_code(file)?;
58 }
59 Ok(())
60 }
61}
62
63pub trait UcdCodeGen: CodeGen {
65 fn parse_unicode_file(&mut self, ucd_path: &Path) -> Result<(), Error>;
69}
70
71pub trait UcdLineParser<U>: CodeGen {
73 fn process_entry(&mut self, line: &U) -> Result<(), Error>;
77}
78
79pub struct UcdTableGen {
82 name: String,
83 table_name: String,
84 cps: HashSet<u32>,
85}
86
87impl UcdTableGen {
88 pub fn new(name: &str, table_name: &str) -> Self {
90 Self {
91 name: String::from(name),
92 table_name: String::from(table_name),
93 cps: HashSet::new(),
94 }
95 }
96}
97
98impl CodeGen for UcdTableGen {
99 fn generate_code(&mut self, file: &mut File) -> Result<(), Error> {
100 file_writer::generate_code_from_hashset(file, &self.table_name, &self.cps)
101 }
102}
103
104impl UcdLineParser<ucd_parsers::UnicodeData> for UcdTableGen {
105 fn process_entry(&mut self, udata: &ucd_parsers::UnicodeData) -> Result<(), Error> {
106 if self.name == udata.general_category {
107 match udata.codepoints {
108 Codepoints::Single(ref cp) => common::insert_codepoint(cp.value(), &mut self.cps)?,
109 Codepoints::Range(ref r) => common::insert_codepoint_range(r, &mut self.cps)?,
110 }
111 }
112 Ok(())
113 }
114}
115
116impl UcdLineParser<HangulSyllableType> for UcdTableGen {
117 fn process_entry(&mut self, line: &HangulSyllableType) -> Result<(), Error> {
118 if self.name == line.prop.property {
119 match line.prop.codepoints {
120 Codepoints::Single(cp) => common::insert_codepoint(cp.value(), &mut self.cps)?,
121 Codepoints::Range(r) => common::insert_codepoint_range(&r, &mut self.cps)?,
122 }
123 }
124 Ok(())
125 }
126}
127
128impl UcdLineParser<Property> for UcdTableGen {
129 fn process_entry(&mut self, line: &Property) -> Result<(), Error> {
130 if self.name == line.property {
131 match line.codepoints {
132 Codepoints::Single(cp) => common::insert_codepoint(cp.value(), &mut self.cps)?,
133 Codepoints::Range(r) => common::insert_codepoint_range(&r, &mut self.cps)?,
134 }
135 }
136 Ok(())
137 }
138}
139
140impl UcdLineParser<CoreProperty> for UcdTableGen {
141 fn process_entry(&mut self, line: &CoreProperty) -> Result<(), Error> {
142 if self.name == line.property {
143 match line.codepoints {
144 Codepoints::Single(cp) => common::insert_codepoint(cp.value(), &mut self.cps)?,
145 Codepoints::Range(r) => common::insert_codepoint_range(&r, &mut self.cps)?,
146 }
147 }
148 Ok(())
149 }
150}
151
152impl UcdLineParser<Script> for UcdTableGen {
153 fn process_entry(&mut self, line: &Script) -> Result<(), Error> {
154 if self.name == line.script {
155 match line.codepoints {
156 Codepoints::Single(ref cp) => common::insert_codepoint(cp.value(), &mut self.cps)?,
157 Codepoints::Range(ref r) => common::insert_codepoint_range(r, &mut self.cps)?,
158 }
159 }
160 Ok(())
161 }
162}
163
164impl UcdLineParser<DerivedJoiningType> for UcdTableGen {
165 fn process_entry(&mut self, line: &DerivedJoiningType) -> Result<(), Error> {
166 if self.name == line.prop.property {
167 match line.prop.codepoints {
168 Codepoints::Single(ref cp) => common::insert_codepoint(cp.value(), &mut self.cps)?,
169 Codepoints::Range(ref r) => common::insert_codepoint_range(r, &mut self.cps)?,
170 }
171 }
172 Ok(())
173 }
174}
175
176pub struct UnicodeGen<T: ucd_parse::UcdFile> {
178 generators: Vec<Box<dyn UcdLineParser<T>>>,
179}
180
181impl<T: ucd_parse::UcdFile> UnicodeGen<T> {
182 pub fn new() -> Self {
184 Self {
185 generators: Vec::new(),
186 }
187 }
188
189 pub fn add(&mut self, gen: Box<dyn UcdLineParser<T>>) {
191 self.generators.push(gen);
192 }
193}
194
195impl<T: ucd_parse::UcdFile> Default for UnicodeGen<T> {
196 fn default() -> Self {
197 Self::new()
198 }
199}
200
201impl<T: ucd_parse::UcdFile> UcdCodeGen for UnicodeGen<T> {
202 fn parse_unicode_file(&mut self, ucd_path: &Path) -> Result<(), Error> {
203 parse_unicode_file(ucd_path, |line: &T| {
204 let it = self.generators.iter_mut();
205 for gen in it {
206 gen.process_entry(line)?;
207 }
208 Ok(())
209 })
210 }
211}
212
213impl<T: ucd_parse::UcdFile> CodeGen for UnicodeGen<T> {
214 fn generate_code(&mut self, file: &mut File) -> Result<(), Error> {
215 let it = self.generators.iter_mut();
216 for gen in it {
217 gen.generate_code(file)?;
218 }
219 Ok(())
220 }
221}
222
223pub struct GeneralCategoryGen {
226 generators: Vec<Box<dyn UcdLineParser<ucd_parsers::UnicodeData>>>,
227}
228
229impl GeneralCategoryGen {
230 pub fn new() -> Self {
232 Self {
233 generators: Vec::new(),
234 }
235 }
236
237 pub fn add(&mut self, gen: Box<dyn UcdLineParser<ucd_parsers::UnicodeData>>) {
239 self.generators.push(gen);
240 }
241}
242
243impl Default for GeneralCategoryGen {
244 fn default() -> Self {
245 Self::new()
246 }
247}
248
249impl UcdCodeGen for GeneralCategoryGen {
250 fn parse_unicode_file(&mut self, ucd_path: &Path) -> Result<(), Error> {
251 let cps: Vec<ucd_parsers::UnicodeData> = ucd_parsers::UnicodeData::parse(ucd_path)?;
252 for udata in cps.iter() {
253 let it = self.generators.iter_mut();
254 for gen in it {
255 gen.process_entry(udata)?;
256 }
257 }
258 Ok(())
259 }
260}
261
262impl CodeGen for GeneralCategoryGen {
263 fn generate_code(&mut self, file: &mut File) -> Result<(), Error> {
264 let it = self.generators.iter_mut();
265 for gen in it {
266 gen.generate_code(file)?;
267 }
268 Ok(())
269 }
270}
271
272const CANONICAL_COMBINING_CLASS_VIRAMA: u8 = 9;
273
274pub struct ViramaTableGen {
277 table_name: String,
278 cps: HashSet<u32>,
279}
280
281impl ViramaTableGen {
282 pub fn new(table_name: &str) -> Self {
284 Self {
285 table_name: String::from(table_name),
286 cps: HashSet::new(),
287 }
288 }
289}
290
291impl CodeGen for ViramaTableGen {
292 fn generate_code(&mut self, file: &mut File) -> Result<(), Error> {
293 file_writer::generate_code_from_hashset(file, &self.table_name, &self.cps)
294 }
295}
296
297impl UcdLineParser<ucd_parsers::UnicodeData> for ViramaTableGen {
298 fn process_entry(&mut self, udata: &ucd_parsers::UnicodeData) -> Result<(), Error> {
299 match udata.codepoints {
300 Codepoints::Range(ref r) => {
301 if udata.canonical_combining_class == CANONICAL_COMBINING_CLASS_VIRAMA {
302 common::insert_codepoint_range(r, &mut self.cps)?;
303 }
304 }
305 Codepoints::Single(ref cp) => {
306 if udata.canonical_combining_class == CANONICAL_COMBINING_CLASS_VIRAMA {
307 common::insert_codepoint(cp.value(), &mut self.cps)?;
308 }
309 }
310 }
311 Ok(())
312 }
313}
314
315pub struct WidthMappingTableGen {
318 name: String,
319 vec: Vec<(Codepoints, ucd_parse::Codepoint)>,
320}
321
322impl WidthMappingTableGen {
323 pub fn new(name: &str) -> Self {
325 Self {
326 name: String::from(name),
327 vec: Vec::new(),
328 }
329 }
330}
331
332impl UcdLineParser<ucd_parsers::UnicodeData> for WidthMappingTableGen {
333 fn process_entry(&mut self, udata: &ucd_parsers::UnicodeData) -> Result<(), Error> {
334 if udata.decomposition.len == 0 {
335 return err!("No decomposition mappings");
336 }
337
338 if let Some(tag) = &udata.decomposition.tag {
339 if *tag == UnicodeDataDecompositionTag::Wide
340 || *tag == UnicodeDataDecompositionTag::Narrow
341 {
342 self.vec
343 .push((udata.codepoints, udata.decomposition.mapping[0]));
344 }
345 }
346 Ok(())
347 }
348}
349
350impl CodeGen for WidthMappingTableGen {
351 fn generate_code(&mut self, file: &mut File) -> Result<(), Error> {
352 file_writer::generate_width_mapping_vector(file, &self.name, &self.vec)
353 }
354}
355
356pub struct UnassignedTableGen {
358 name: String,
359 range: ucd_parse::CodepointRange,
360 vec: Vec<Codepoints>,
361}
362
363impl UnassignedTableGen {
364 pub fn new(table_name: &str) -> Self {
366 Self {
367 name: String::from(table_name),
368 range: ucd_parse::CodepointRange {
369 start: ucd_parse::Codepoint::from_u32(0).unwrap(),
370 end: ucd_parse::Codepoint::from_u32(0).unwrap(),
371 },
372 vec: Vec::new(),
373 }
374 }
375}
376
377impl UcdLineParser<ucd_parsers::UnicodeData> for UnassignedTableGen {
378 fn process_entry(&mut self, udata: &ucd_parsers::UnicodeData) -> Result<(), Error> {
379 match udata.codepoints {
380 Codepoints::Range(ref r) => {
381 if r.start.value() - self.range.end.value() > 0 {
382 self.range.end = ucd_parse::Codepoint::from_u32(r.start.value() - 1)?;
383 common::add_codepoints(&self.range, &mut self.vec);
384 }
385 self.range.start = ucd_parse::Codepoint::from_u32(r.end.value() + 1)?;
386 self.range.end = r.start;
387 }
388 Codepoints::Single(ref cp) => {
389 let next_cp = ucd_parse::Codepoint::from_u32(cp.value() + 1)?;
390 if cp.value() - self.range.end.value() != 0 {
391 self.range.end = ucd_parse::Codepoint::from_u32(cp.value() - 1)?;
392 common::add_codepoints(&self.range, &mut self.vec);
393 }
394
395 self.range.start = next_cp;
396 self.range.end = next_cp;
397 }
398 }
399 Ok(())
400 }
401}
402
403impl CodeGen for UnassignedTableGen {
404 fn generate_code(&mut self, file: &mut File) -> Result<(), Error> {
405 file_writer::generate_code_from_vec(file, &self.name, &self.vec)
406 }
407}