use std::env;
use std::io;
use std::io::BufRead;
use std::process;
type CodePoint = u32;
#[derive(Debug, Copy, Clone)]
struct FoldPair {
orig: CodePoint,
folded: CodePoint,
}
impl FoldPair {
fn delta(self) -> i32 {
(self.folded as i32) - (self.orig as i32)
}
fn stride_to(self, rhs: FoldPair) -> u32 {
rhs.orig - self.orig
}
}
struct DeltaBlock {
folds: Vec<FoldPair>,
}
impl DeltaBlock {
fn create(fp: FoldPair) -> DeltaBlock {
DeltaBlock { folds: vec![fp] }
}
fn stride(&self) -> Option<u32> {
if self.folds.len() >= 2 {
Some(self.folds[0].stride_to(self.folds[1]))
} else {
None
}
}
fn first(&self) -> FoldPair {
*self.folds.first().unwrap()
}
fn last(&self) -> FoldPair {
*self.folds.last().unwrap()
}
fn length(&self) -> usize {
(self.last().orig as usize) - (self.first().orig as usize) + 1
}
fn delta(&self) -> i32 {
self.first().delta()
}
#[allow(clippy::if_same_then_else)]
fn can_append(&self, fp: FoldPair) -> bool {
if self.folds.is_empty() {
true
} else if fp.orig - self.first().orig >= 256 {
false
} else if self.delta() != fp.delta() {
false
} else if let Some(stride) = self.stride() {
stride == self.last().stride_to(fp)
} else {
true
}
}
fn append(&mut self, fp: FoldPair) {
std::debug_assert!(self.can_append(fp));
self.folds.push(fp)
}
}
fn create_delta_blocks(fps: &[FoldPair]) -> Vec<DeltaBlock> {
let mut blocks: Vec<DeltaBlock> = Vec::new();
for &fp in fps {
match blocks.last_mut() {
Some(ref mut db) if db.can_append(fp) => db.append(fp),
_ => blocks.push(DeltaBlock::create(fp)),
}
}
blocks
}
fn format_delta_blocks(dbs: &[DeltaBlock]) -> String {
let format_db = |db: &DeltaBlock| -> String {
format!(
"fr({start:#04X}, {length}, {delta}, {modulo})",
start = db.first().orig,
length = db.length(),
delta = db.delta(),
modulo = db.stride().unwrap_or(1),
)
};
let mut lines = Vec::new();
for dbc in dbs.chunks(4) {
lines.push(
dbc.iter()
.map(format_db)
.collect::<Vec<String>>()
.join(", ")
+ ",",
);
}
let prefix = r#"
/// This file is autogenerated from gen_folds.rs and CaseFolding.txt.
pub struct FoldRange {
/// The first codepoint in the range.
pub start:u32,
/// The length of the range, in code points.
pub length:u8,
/// The (signed) delta amount.
/// Folds are performed by adding this (signed) value to a code point.
pub delta:i32,
/// The modulo amount.
/// Folds are only performed if the code point is a multiple of this value.
pub modulo:u8,
}
const fn fr(start: u32, length: u8, delta: i32, modulo: u8) -> FoldRange {
FoldRange {start, length, delta, modulo}
}
"#;
format!(
r#"
{prefix}
pub static FOLDS: [FoldRange; {count}] = [
{lines}
];
"#,
prefix = prefix.trim(),
count = dbs.len(),
lines = lines.join("\n ")
)
}
fn process_simple_fold(s: &str) -> Option<FoldPair> {
if let Some(s) = s.trim().split('#').next() {
let fields: Vec<&str> = s.split(';').map(str::trim).collect();
if fields.len() != 4 {
return None;
}
let status = fields[1];
if status != "C" && status != "S" {
return None;
}
let from_hex = |s: &str| u32::from_str_radix(s, 16).unwrap();
let (orig, folded) = (from_hex(fields[0]), from_hex(fields[2]));
return Some(FoldPair { orig, folded });
}
None
}
fn print_usage_and_exit() {
println!("Usage: curl -L ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt | gen_folds > foldtable.rs");
process::exit(1)
}
fn main() {
let argv: Vec<String> = env::args().collect();
if argv.len() >= 2 {
print_usage_and_exit();
}
let stdin = io::stdin();
let mut foldpairs = Vec::new();
for line in stdin.lock().lines() {
let s: String = line.unwrap();
if let Some(s) = s.as_str().trim().split('#').next() {
if let Some(fp) = process_simple_fold(s) {
foldpairs.push(fp);
}
}
}
let dblocks = create_delta_blocks(&foldpairs);
println!("{}", format_delta_blocks(&dblocks).trim());
}