precis_tools/generators/
bidi_class.rs

1use crate::file_writer;
2use crate::ucd_parsers;
3use crate::Error;
4use crate::{CodeGen, UcdLineParser};
5use std::fs::File;
6use std::io::Write;
7use ucd_parse::{CodepointRange, Codepoints};
8
9/// Generates a table of tuples (`Codepoints`, `BidiClass`) representing
10/// the values of the Unicode character property
11/// [`Bidi_Class`](http://www.unicode.org/reports/tr44/#Bidi_Class).
12/// Possible values are listed in
13/// [`UAX44`, Table 13](http://www.unicode.org/reports/tr44/#Bidi_Class_Values).
14pub struct BidiClassGen {
15    table_name: String,
16    vec: Vec<(Codepoints, String)>,
17}
18
19impl BidiClassGen {
20    /// Creates a new [`BidiClassGen`]
21    pub fn new(table_name: &str) -> Self {
22        Self {
23            table_name: String::from(table_name),
24            vec: Vec::new(),
25        }
26    }
27}
28
29impl CodeGen for BidiClassGen {
30    fn generate_code(&mut self, file: &mut File) -> Result<(), Error> {
31        generate_bidi_class_enum(file)?;
32        self.generate_bidi_class_table(file)
33    }
34}
35
36impl UcdLineParser<ucd_parsers::UnicodeData> for BidiClassGen {
37    fn process_entry(&mut self, udata: &ucd_parsers::UnicodeData) -> Result<(), Error> {
38        self.vec.push((udata.codepoints, udata.bidi_class.clone()));
39        Ok(())
40    }
41}
42
43fn add_range(range: &CodepointRange, bidi: &str, vec: &mut Vec<(Codepoints, String)>) {
44    if range.start.value() == range.end.value() {
45        vec.push((Codepoints::Single(range.start), String::from(bidi)));
46    } else {
47        vec.push((Codepoints::Range(*range), String::from(bidi)));
48    }
49}
50
51fn generate_bidi_class_enum(file: &mut File) -> Result<(), Error> {
52    writeln!(
53        file,
54        "/// Represents values of the Unicode character property"
55    )?;
56    writeln!(
57        file,
58        "/// [Bidi_Class](http://www.unicode.org/reports/tr44/#Bidi_Class),"
59    )?;
60    writeln!(
61        file,
62        "/// also known as the bidirectional character type. Possible values"
63    )?;
64    writeln!(file,
65		"/// are listed in [UAX44, Table 13](http://www.unicode.org/reports/tr44/#Bidi_Class_Values)"
66		)?;
67
68    // Values in `BidiClass` `enum` are generated by parsing the `UCD` files, they are upper case
69    // which goes against the rust naming rules. Let's allow it just for this time
70    writeln!(file, "#[allow(clippy::upper_case_acronyms)]")?;
71
72    writeln!(file, "#[derive(Copy, Clone, PartialEq, Eq, Debug)]")?;
73    writeln!(file, "pub enum BidiClass {{")?;
74    writeln!(file, "\tAL,")?;
75    writeln!(file, "\tAN,")?;
76    writeln!(file, "\tB,")?;
77    writeln!(file, "\tBN,")?;
78    writeln!(file, "\tCS,")?;
79    writeln!(file, "\tEN,")?;
80    writeln!(file, "\tES,")?;
81    writeln!(file, "\tET,")?;
82    writeln!(file, "\tFSI,")?;
83    writeln!(file, "\tL,")?;
84    writeln!(file, "\tLRE,")?;
85    writeln!(file, "\tLRI,")?;
86    writeln!(file, "\tLRO,")?;
87    writeln!(file, "\tNSM,")?;
88    writeln!(file, "\tON,")?;
89    writeln!(file, "\tPDF,")?;
90    writeln!(file, "\tPDI,")?;
91    writeln!(file, "\tR,")?;
92    writeln!(file, "\tRLE,")?;
93    writeln!(file, "\tRLI,")?;
94    writeln!(file, "\tRLO,")?;
95    writeln!(file, "\tS,")?;
96    writeln!(file, "\tWS,")?;
97    writeln!(file, "}}")?;
98
99    Ok(writeln!(file)?)
100}
101impl BidiClassGen {
102    fn generate_bidi_class_table(&mut self, file: &mut File) -> Result<(), Error> {
103        self.compress_into_ranges();
104        self.write_table_to_file(file)
105    }
106
107    fn compress_into_ranges(&mut self) {
108        let mut out = Vec::new();
109        let mut range: Option<CodepointRange> = None;
110        let mut val: Option<String> = None;
111
112        for (cp, bidi) in self.vec.iter() {
113            if val.is_none() {
114                val = Some(bidi.clone());
115            }
116
117            if val.as_ref() != Some(bidi) {
118                // No same `bidi` class
119                match range.as_ref() {
120                    Some(r) => {
121                        add_range(r, &val.unwrap(), &mut out);
122                        range = None;
123                    }
124                    None => out.push((*cp, val.unwrap())),
125                }
126                val = Some(bidi.clone());
127            }
128
129            match cp {
130                Codepoints::Single(cp) => {
131                    match range.as_mut() {
132                        Some(r) => {
133                            if cp.value() - r.end.value() == 1 {
134                                r.end = *cp;
135                            } else {
136                                // there is a gap, non-consecutive numbers
137                                add_range(r, bidi, &mut out);
138                                // Start a new range
139                                range = Some(CodepointRange {
140                                    start: *cp,
141                                    end: *cp,
142                                });
143                            }
144                        }
145                        None => {
146                            range = Some(CodepointRange {
147                                start: *cp,
148                                end: *cp,
149                            });
150                        }
151                    }
152                }
153                Codepoints::Range(cp) => {
154                    match range.as_mut() {
155                        Some(r) => {
156                            if cp.start.value() - r.end.value() == 1 {
157                                // This range can be included in the previous one
158                                r.end = cp.end;
159                            } else {
160                                // no consecutive ranges
161                                out.push((Codepoints::Range(*r), bidi.clone()));
162                                out.push((Codepoints::Range(*cp), bidi.clone()));
163                                range = None;
164                            }
165                        }
166                        None => {
167                            range = Some(*cp);
168                        }
169                    }
170                }
171            }
172        }
173
174        self.vec = out;
175    }
176
177    fn write_table_to_file(&mut self, file: &mut File) -> Result<(), Error> {
178        writeln!(
179            file,
180            "static {}: [(Codepoints, BidiClass); {}] = [",
181            self.table_name.to_uppercase(),
182            self.vec.len()
183        )?;
184
185        for (cp, bidi) in self.vec.iter() {
186            writeln!(
187                file,
188                "\t({}, BidiClass::{}),",
189                file_writer::generate_codepoint_str(cp),
190                bidi
191            )?;
192        }
193
194        writeln!(file, "];")?;
195        Ok(writeln!(file)?)
196    }
197}