precis-tools 0.1.9

Tools and parsers to generate PRECIS tables from the Unicode Character Database (UCD)
Documentation
use crate::file_writer;
use crate::ucd_parsers;
use crate::Error;
use crate::{CodeGen, UcdLineParser};
use std::fs::File;
use std::io::Write;
use ucd_parse::{CodepointRange, Codepoints};

/// Generates a table of tuples (`Codepoints`, `BidiClass`) representing
/// the values of the Unicode character property
/// [`Bidi_Class`](http://www.unicode.org/reports/tr44/#Bidi_Class).
/// Possible values are listed in
/// [`UAX44`, Table 13](http://www.unicode.org/reports/tr44/#Bidi_Class_Values).
pub struct BidiClassGen {
    table_name: String,
    vec: Vec<(Codepoints, String)>,
}

impl BidiClassGen {
    /// Creates a new [`BidiClassGen`]
    pub fn new(table_name: &str) -> Self {
        Self {
            table_name: String::from(table_name),
            vec: Vec::new(),
        }
    }
}

impl CodeGen for BidiClassGen {
    fn generate_code(&mut self, file: &mut File) -> Result<(), Error> {
        generate_bidi_class_enum(file)?;
        self.generate_bidi_class_table(file)
    }
}

impl UcdLineParser<ucd_parsers::UnicodeData> for BidiClassGen {
    fn process_entry(&mut self, udata: &ucd_parsers::UnicodeData) -> Result<(), Error> {
        self.vec.push((udata.codepoints, udata.bidi_class.clone()));
        Ok(())
    }
}

fn add_range(range: &CodepointRange, bidi: &str, vec: &mut Vec<(Codepoints, String)>) {
    if range.start.value() == range.end.value() {
        vec.push((Codepoints::Single(range.start), String::from(bidi)));
    } else {
        vec.push((Codepoints::Range(*range), String::from(bidi)));
    }
}

fn generate_bidi_class_enum(file: &mut File) -> Result<(), Error> {
    writeln!(
        file,
        "/// Represents values of the Unicode character property"
    )?;
    writeln!(
        file,
        "/// [Bidi_Class](http://www.unicode.org/reports/tr44/#Bidi_Class),"
    )?;
    writeln!(
        file,
        "/// also known as the bidirectional character type. Possible values"
    )?;
    writeln!(file,
		"/// are listed in [UAX44, Table 13](http://www.unicode.org/reports/tr44/#Bidi_Class_Values)"
		)?;

    // Values in `BidiClass` `enum` are generated by parsing the `UCD` files, they are upper case
    // which goes against the rust naming rules. Let's allow it just for this time
    writeln!(file, "#[allow(clippy::upper_case_acronyms)]")?;

    writeln!(file, "#[derive(Copy, Clone, PartialEq, Eq, Debug)]")?;
    writeln!(file, "pub enum BidiClass {{")?;
    writeln!(file, "\tAL,")?;
    writeln!(file, "\tAN,")?;
    writeln!(file, "\tB,")?;
    writeln!(file, "\tBN,")?;
    writeln!(file, "\tCS,")?;
    writeln!(file, "\tEN,")?;
    writeln!(file, "\tES,")?;
    writeln!(file, "\tET,")?;
    writeln!(file, "\tFSI,")?;
    writeln!(file, "\tL,")?;
    writeln!(file, "\tLRE,")?;
    writeln!(file, "\tLRI,")?;
    writeln!(file, "\tLRO,")?;
    writeln!(file, "\tNSM,")?;
    writeln!(file, "\tON,")?;
    writeln!(file, "\tPDF,")?;
    writeln!(file, "\tPDI,")?;
    writeln!(file, "\tR,")?;
    writeln!(file, "\tRLE,")?;
    writeln!(file, "\tRLI,")?;
    writeln!(file, "\tRLO,")?;
    writeln!(file, "\tS,")?;
    writeln!(file, "\tWS,")?;
    writeln!(file, "}}")?;

    Ok(writeln!(file)?)
}
impl BidiClassGen {
    fn generate_bidi_class_table(&mut self, file: &mut File) -> Result<(), Error> {
        self.compress_into_ranges();
        self.write_table_to_file(file)
    }

    fn compress_into_ranges(&mut self) {
        let mut out = Vec::new();
        let mut range: Option<CodepointRange> = None;
        let mut val: Option<String> = None;

        for (cp, bidi) in self.vec.iter() {
            if val.is_none() {
                val = Some(bidi.clone());
            }

            if val.as_ref() != Some(bidi) {
                // No same `bidi` class
                match range.as_ref() {
                    Some(r) => {
                        add_range(r, &val.unwrap(), &mut out);
                        range = None;
                    }
                    None => out.push((*cp, val.unwrap())),
                }
                val = Some(bidi.clone());
            }

            match cp {
                Codepoints::Single(cp) => {
                    match range.as_mut() {
                        Some(r) => {
                            if cp.value() - r.end.value() == 1 {
                                r.end = *cp;
                            } else {
                                // there is a gap, non-consecutive numbers
                                add_range(r, bidi, &mut out);
                                // Start a new range
                                range = Some(CodepointRange {
                                    start: *cp,
                                    end: *cp,
                                });
                            }
                        }
                        None => {
                            range = Some(CodepointRange {
                                start: *cp,
                                end: *cp,
                            });
                        }
                    }
                }
                Codepoints::Range(cp) => {
                    match range.as_mut() {
                        Some(r) => {
                            if cp.start.value() - r.end.value() == 1 {
                                // This range can be included in the previous one
                                r.end = cp.end;
                            } else {
                                // no consecutive ranges
                                out.push((Codepoints::Range(*r), bidi.clone()));
                                out.push((Codepoints::Range(*cp), bidi.clone()));
                                range = None;
                            }
                        }
                        None => {
                            range = Some(*cp);
                        }
                    }
                }
            }
        }

        self.vec = out;
    }

    fn write_table_to_file(&mut self, file: &mut File) -> Result<(), Error> {
        writeln!(
            file,
            "static {}: [(Codepoints, BidiClass); {}] = [",
            self.table_name.to_uppercase(),
            self.vec.len()
        )?;

        for (cp, bidi) in self.vec.iter() {
            writeln!(
                file,
                "\t({}, BidiClass::{}),",
                file_writer::generate_codepoint_str(cp),
                bidi
            )?;
        }

        writeln!(file, "];")?;
        Ok(writeln!(file)?)
    }
}