#![allow(
clippy::too_many_lines,
clippy::redundant_clone,
clippy::format_collect,
clippy::missing_assert_message,
clippy::absolute_paths,
reason = "build.rs is dev-tooling code that emits source to OUT_DIR; \
the workspace's pedantic lint profile aimed at library APIs \
fires noisy here without improving anything downstream"
)]
use std::env;
use std::fs;
use std::io::{BufWriter, Write};
use std::path::PathBuf;
const INFALLIBLE: &str = "BufWriter::write_all over Vec<u8> is infallible";
fn main() {
let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR"));
let data_dir = manifest_dir.join("data");
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR"));
let out_path = out_dir.join("jisx0213_table.rs");
let single_tsv = data_dir.join("jisx0213-2004.tsv");
let combo_tsv = data_dir.join("jisx0213-combo.tsv");
let dict_tsv = data_dir.join("aozora-gaiji-chuki.tsv");
let special_tsv = data_dir.join("aozora-gaiji-special.tsv");
println!("cargo:rerun-if-changed=build.rs");
println!("cargo:rerun-if-changed={}", single_tsv.display());
println!("cargo:rerun-if-changed={}", combo_tsv.display());
println!("cargo:rerun-if-changed={}", dict_tsv.display());
println!("cargo:rerun-if-changed={}", special_tsv.display());
let single = parse_single_tsv(&single_tsv);
let combo = parse_combo_tsv(&combo_tsv);
let dict = parse_description_tsv(&dict_tsv);
let special = parse_description_tsv(&special_tsv);
let mut description: Vec<DescriptionEntry> = special.clone();
let seen: std::collections::HashSet<String> =
description.iter().map(|e| e.description.clone()).collect();
for entry in dict {
if !seen.contains(&entry.description) {
description.push(entry);
}
}
description.sort_by(|a, b| a.description.cmp(&b.description));
let plane1_count = single.iter().filter(|e| e.plane == 1).count();
let plane2_count = single.iter().filter(|e| e.plane == 2).count();
let combo_count = combo.len();
let description_count = description.len();
let file = fs::File::create(&out_path).expect("create OUT_DIR/jisx0213_table.rs");
let mut out = BufWriter::new(file);
writeln!(
out,
"// AUTO-GENERATED by `crates/aozora-encoding/build.rs`. \
DO NOT EDIT BY HAND.\n\
// Source TSVs in `crates/aozora-encoding/data/`; rerun by \
touching any of them.\n",
)
.expect(INFALLIBLE);
let mut single_builder = phf_codegen::Map::<&str>::new();
let single_keys: Vec<String> = single
.iter()
.map(|e| mencode(e.plane, e.row, e.cell))
.collect();
let single_values: Vec<String> = single
.iter()
.map(|e| format!("'\\u{{{:04X}}}'", e.codepoint))
.collect();
for (k, v) in single_keys.iter().zip(single_values.iter()) {
single_builder.entry(k.as_str(), v.as_str());
}
writeln!(
out,
"pub(crate) static JISX0213_MENCODE_TO_CHAR: phf::Map<&'static str, char> = {};",
single_builder.build(),
)
.expect(INFALLIBLE);
writeln!(out).expect(INFALLIBLE);
let mut combo_builder = phf_codegen::Map::<&str>::new();
let combo_keys: Vec<String> = combo
.iter()
.map(|e| mencode(e.plane, e.row, e.cell))
.collect();
let combo_values: Vec<String> = combo
.iter()
.map(|e| {
let body: String = e
.codepoints
.iter()
.map(|cp| format!("\\u{{{cp:04X}}}"))
.collect();
format!("\"{body}\"")
})
.collect();
for (k, v) in combo_keys.iter().zip(combo_values.iter()) {
combo_builder.entry(k.as_str(), v.as_str());
}
writeln!(
out,
"pub(crate) static JISX0213_MENCODE_TO_STR: phf::Map<&'static str, &'static str> = {};",
combo_builder.build(),
)
.expect(INFALLIBLE);
writeln!(out).expect(INFALLIBLE);
let mut description_builder = phf_codegen::Map::<&str>::new();
let description_values: Vec<String> = description
.iter()
.map(|e| format!("'\\u{{{:04X}}}'", e.codepoint))
.collect();
for (entry, value) in description.iter().zip(description_values.iter()) {
description_builder.entry(entry.description.as_str(), value.as_str());
}
writeln!(
out,
"pub(crate) static DESCRIPTION_TO_CHAR: phf::Map<&'static str, char> = {};",
description_builder.build(),
)
.expect(INFALLIBLE);
writeln!(out).expect(INFALLIBLE);
writeln!(
out,
"#[allow(dead_code, reason = \"pinned for table-size tests\")]\n\
pub(crate) const JISX0213_PLANE1_COUNT: usize = {plane1_count};\n\
#[allow(dead_code, reason = \"pinned for table-size tests\")]\n\
pub(crate) const JISX0213_PLANE2_COUNT: usize = {plane2_count};\n\
#[allow(dead_code, reason = \"pinned for table-size tests\")]\n\
pub(crate) const JISX0213_COMBO_COUNT: usize = {combo_count};\n\
#[allow(dead_code, reason = \"pinned for table-size tests\")]\n\
pub(crate) const DESCRIPTION_COUNT: usize = {description_count};",
)
.expect(INFALLIBLE);
writeln!(out).expect(INFALLIBLE);
writeln!(
out,
"// Summary: 第3水準={plane1_count}, 第4水準={plane2_count}, \
combo={combo_count}, description={description_count}, \
total={total}.",
total = plane1_count + plane2_count + combo_count + description_count,
)
.expect(INFALLIBLE);
}
#[derive(Clone, Copy)]
struct SingleEntry {
plane: u8,
row: u8,
cell: u8,
codepoint: u32,
}
struct ComboEntry {
plane: u8,
row: u8,
cell: u8,
codepoints: Vec<u32>,
}
#[derive(Clone)]
struct DescriptionEntry {
description: String,
codepoint: u32,
}
fn parse_single_tsv(path: &std::path::Path) -> Vec<SingleEntry> {
let text =
fs::read_to_string(path).unwrap_or_else(|err| panic!("read {}: {err}", path.display()));
let mut out = Vec::new();
for (lineno, line) in text.lines().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
let cols: Vec<&str> = trimmed.split('\t').collect();
assert!(
cols.len() == 4,
"{}:{} expected 4 cols, got {}",
path.display(),
lineno + 1,
cols.len(),
);
let plane: u8 = cols[0].parse().expect("plane");
let row: u8 = cols[1].parse().expect("row");
let cell: u8 = cols[2].parse().expect("cell");
let codepoint = u32::from_str_radix(cols[3], 16).expect("codepoint hex");
out.push(SingleEntry {
plane,
row,
cell,
codepoint,
});
}
out
}
fn parse_combo_tsv(path: &std::path::Path) -> Vec<ComboEntry> {
let text =
fs::read_to_string(path).unwrap_or_else(|err| panic!("read {}: {err}", path.display()));
let mut out = Vec::new();
for line in text.lines() {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
let cols: Vec<&str> = trimmed.split('\t').collect();
assert_eq!(cols.len(), 4);
let plane: u8 = cols[0].parse().expect("plane");
let row: u8 = cols[1].parse().expect("row");
let cell: u8 = cols[2].parse().expect("cell");
let codepoints: Vec<u32> = cols[3]
.split(',')
.map(|hex| u32::from_str_radix(hex, 16).expect("codepoint hex"))
.collect();
out.push(ComboEntry {
plane,
row,
cell,
codepoints,
});
}
out
}
fn parse_description_tsv(path: &std::path::Path) -> Vec<DescriptionEntry> {
let text =
fs::read_to_string(path).unwrap_or_else(|err| panic!("read {}: {err}", path.display()));
let mut out = Vec::new();
for line in text.lines() {
let trimmed = line.trim_end();
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
let cols: Vec<&str> = trimmed.split('\t').collect();
assert_eq!(cols.len(), 2);
let codepoint = u32::from_str_radix(cols[1], 16).expect("codepoint hex");
out.push(DescriptionEntry {
description: cols[0].to_owned(),
codepoint,
});
}
out
}
fn mencode(plane: u8, row: u8, cell: u8) -> String {
let level = if plane == 1 { 3 } else { 4 };
format!("第{level}水準{plane}-{row}-{cell}")
}