#[macro_use] extern crate lazy_static;
extern crate regex;
use std::char;
use std::collections::HashMap;
use regex::Regex;
const CH_VOICED_COMBI: char = '\u{3099}';
const CH_SEMIVOICED_COMBI: char = '\u{309A}';
const CH_VOICED_FULL: char = '\u{309B}';
const CH_SEMIVOICED_FULL: char = '\u{309C}';
const CH_VOICED_HALF: char = '\u{FF9E}';
const CH_SEMIVOICED_HALF: char = '\u{FF9F}';
const CH_SPACE: char = '\u{20}';
const VOICED_COMBI: &'static str = "\u{3099}";
const SEMIVOICED_COMBI: &'static str = "\u{309A}";
const VOICED_WITH_SPACE: &'static str = "\u{20}\u{3099}";
const SEMIVOICED_WITH_SPACE: &'static str = "\u{20}\u{309A}";
const RE_VOICED_MARKS: &'static str
= r"(?:\x20??\x{3099}|\x{309B}|\x{FF9E})";
const RE_SEMIVOICED_MARKS: &'static str
= r"(?:\x20??\x{309A}|\x{309C}|\x{FF9F})";
lazy_static! {
static ref SEMIVOICED_HALVES: HashMap<char,char> = [
('\u{FF8A}', '\u{30D1}'),
('\u{FF8B}', '\u{30D4}'),
('\u{FF8C}', '\u{30D7}'),
('\u{FF8D}', '\u{30DA}'),
('\u{FF8E}', '\u{30DD}'),
].iter().copied().collect();
static ref VOICED_HALVES: HashMap<char,char> = [
('\u{FF66}', '\u{30FA}'),
('\u{FF73}', '\u{30F4}'),
('\u{FF76}', '\u{30AC}'),
('\u{FF77}', '\u{30AE}'),
('\u{FF78}', '\u{30B0}'),
('\u{FF79}', '\u{30B2}'),
('\u{FF7A}', '\u{30B4}'),
('\u{FF7B}', '\u{30B6}'),
('\u{FF7C}', '\u{30B8}'),
('\u{FF7D}', '\u{30BA}'),
('\u{FF7E}', '\u{30BC}'),
('\u{FF7F}', '\u{30BE}'),
('\u{FF80}', '\u{30C0}'),
('\u{FF81}', '\u{30C2}'),
('\u{FF82}', '\u{30C5}'),
('\u{FF83}', '\u{30C7}'),
('\u{FF84}', '\u{30C9}'),
('\u{FF8A}', '\u{30D0}'),
('\u{FF8B}', '\u{30D3}'),
('\u{FF8C}', '\u{30D6}'),
('\u{FF8D}', '\u{30D9}'),
('\u{FF8E}', '\u{30DC}'),
('\u{FF9C}', '\u{30F7}'),
].iter().copied().collect();
static ref SEMIVOICES: HashMap<char,char> = [
('\u{30CF}', '\u{30D1}'),
('\u{30D2}', '\u{30D4}'),
('\u{30D5}', '\u{30D7}'),
('\u{30D8}', '\u{30DA}'),
('\u{30DB}', '\u{30DD}'),
('\u{306F}', '\u{3071}'),
('\u{3072}', '\u{3074}'),
('\u{3075}', '\u{3077}'),
('\u{3078}', '\u{307A}'),
('\u{307B}', '\u{307D}'),
].iter().copied().collect();
static ref VOICES: HashMap<char,char> = [
('\u{30A6}', '\u{30F4}'),
('\u{30AB}', '\u{30AC}'),
('\u{30AD}', '\u{30AE}'),
('\u{30AF}', '\u{30B0}'),
('\u{30B1}', '\u{30B2}'),
('\u{30B3}', '\u{30B4}'),
('\u{30B5}', '\u{30B6}'),
('\u{30B7}', '\u{30B8}'),
('\u{30B9}', '\u{30BA}'),
('\u{30BB}', '\u{30BC}'),
('\u{30BD}', '\u{30BE}'),
('\u{30BF}', '\u{30C0}'),
('\u{30C1}', '\u{30C2}'),
('\u{30C4}', '\u{30C5}'),
('\u{30C6}', '\u{30C7}'),
('\u{30C8}', '\u{30C9}'),
('\u{30CF}', '\u{30D0}'),
('\u{30D2}', '\u{30D3}'),
('\u{30D5}', '\u{30D6}'),
('\u{30D8}', '\u{30D9}'),
('\u{30DB}', '\u{30DC}'),
('\u{30EF}', '\u{30F7}'),
('\u{30F0}', '\u{30F8}'),
('\u{30F1}', '\u{30F9}'),
('\u{30F2}', '\u{30FA}'),
('\u{3046}', '\u{3094}'),
('\u{304B}', '\u{304C}'),
('\u{304D}', '\u{304E}'),
('\u{304F}', '\u{3050}'),
('\u{3051}', '\u{3052}'),
('\u{3053}', '\u{3054}'),
('\u{3055}', '\u{3056}'),
('\u{3057}', '\u{3058}'),
('\u{3059}', '\u{305A}'),
('\u{305B}', '\u{305C}'),
('\u{305D}', '\u{305E}'),
('\u{305F}', '\u{3060}'),
('\u{3061}', '\u{3062}'),
('\u{3064}', '\u{3065}'),
('\u{3066}', '\u{3067}'),
('\u{3068}', '\u{3069}'),
('\u{306F}', '\u{3070}'),
('\u{3072}', '\u{3073}'),
('\u{3075}', '\u{3076}'),
('\u{3078}', '\u{3079}'),
('\u{307B}', '\u{307C}'),
('\u{309D}', '\u{309E}'),
].iter().copied().collect();
static ref HALVES: HashMap<char,char> = [
('\u{FF61}', '\u{3002}'),
('\u{FF62}', '\u{300C}'),
('\u{FF63}', '\u{300D}'),
('\u{FF64}', '\u{3001}'),
('\u{FF65}', '\u{30FB}'),
('\u{FF66}', '\u{30F2}'),
('\u{FF67}', '\u{30A1}'),
('\u{FF68}', '\u{30A3}'),
('\u{FF69}', '\u{30A5}'),
('\u{FF6A}', '\u{30A7}'),
('\u{FF6B}', '\u{30A9}'),
('\u{FF6C}', '\u{30E3}'),
('\u{FF6D}', '\u{30E5}'),
('\u{FF6E}', '\u{30E7}'),
('\u{FF6F}', '\u{30C3}'),
('\u{FF70}', '\u{30FC}'),
('\u{FF71}', '\u{30A2}'),
('\u{FF72}', '\u{30A4}'),
('\u{FF73}', '\u{30A6}'),
('\u{FF74}', '\u{30A8}'),
('\u{FF75}', '\u{30AA}'),
('\u{FF76}', '\u{30AB}'),
('\u{FF77}', '\u{30AD}'),
('\u{FF78}', '\u{30AF}'),
('\u{FF79}', '\u{30B1}'),
('\u{FF7A}', '\u{30B3}'),
('\u{FF7B}', '\u{30B5}'),
('\u{FF7C}', '\u{30B7}'),
('\u{FF7D}', '\u{30B9}'),
('\u{FF7E}', '\u{30BB}'),
('\u{FF7F}', '\u{30BD}'),
('\u{FF80}', '\u{30BF}'),
('\u{FF81}', '\u{30C1}'),
('\u{FF82}', '\u{30C4}'),
('\u{FF83}', '\u{30C6}'),
('\u{FF84}', '\u{30C8}'),
('\u{FF85}', '\u{30CA}'),
('\u{FF86}', '\u{30CB}'),
('\u{FF87}', '\u{30CC}'),
('\u{FF88}', '\u{30CD}'),
('\u{FF89}', '\u{30CE}'),
('\u{FF8A}', '\u{30CF}'),
('\u{FF8B}', '\u{30D2}'),
('\u{FF8C}', '\u{30D5}'),
('\u{FF8D}', '\u{30D8}'),
('\u{FF8E}', '\u{30DB}'),
('\u{FF8F}', '\u{30DE}'),
('\u{FF90}', '\u{30DF}'),
('\u{FF91}', '\u{30E0}'),
('\u{FF92}', '\u{30E1}'),
('\u{FF93}', '\u{30E2}'),
('\u{FF94}', '\u{30E4}'),
('\u{FF95}', '\u{30E6}'),
('\u{FF96}', '\u{30E8}'),
('\u{FF97}', '\u{30E9}'),
('\u{FF98}', '\u{30EA}'),
('\u{FF99}', '\u{30EB}'),
('\u{FF9A}', '\u{30EC}'),
('\u{FF9B}', '\u{30ED}'),
('\u{FF9C}', '\u{30EF}'),
('\u{FF9D}', '\u{30F3}'),
('\u{FF9E}', '\u{3099}'),
('\u{FF9F}', '\u{309A}'),
].iter().copied().collect();
}
fn shift_code<F,G>(judge: F, convert: G, src: &str) -> String
where F: Fn(u32) -> bool,
G: Fn(u32) -> u32
{
src.chars().map(|c| {
let k = c as u32;
if judge(k) { char::from_u32(convert(k)).unwrap() } else { c }
} ).collect()
}
pub fn wide2ascii(s: &str) -> String {
shift_code(|x| 0xff00 < x && x < 0xff5f, |x| x - 0xfee0, s)
}
pub fn ascii2wide(s: &str) -> String {
shift_code(|x| 0x0020 < x && x < 0x007f, |x| x + 0xfee0, s)
}
pub fn hira2kata(s: &str) -> String {
shift_code(|x| 0x3041 <= x && x <= 0x3096, |x| x + 0x0060, s)
}
pub fn kata2hira(s: &str) -> String {
shift_code(|x| 0x30A1 <= x && x <= 0x30F6, |x| x - 0x0060, s)
}
macro_rules! push_content {
($judge:expr, $table:expr, $res:expr, $a:expr, $b:expr) => {
if $judge($b) {
if let Some(v) = $table.get(&$a) {
$res.push(*v);
return None;
}
}
};
}
pub fn half2full(s: &str) -> String {
s.chars().map(|c| consult(&HALVES, &c)).collect()
}
pub fn half2kana(s: &str) -> String {
let mut line = String::with_capacity(s.len());
format!("{} ", s).chars().fold(None, |prev, b| {
if let Some(a) = prev {
push_content!(|b| b == CH_VOICED_HALF,
VOICED_HALVES, line, a, b);
push_content!(|b| b == CH_SEMIVOICED_HALF,
SEMIVOICED_HALVES, line, a, b);
if a == CH_VOICED_HALF ||
a == CH_SEMIVOICED_HALF { line.push(CH_SPACE); }
line.push(consult(&HALVES, &a));
}
Some(b)
} );
line
}
pub fn combine(s: &str) -> String {
let ss = despace(s);
let mut line = String::with_capacity(ss.len());
format!("{} ", ss).chars().fold(None, |prev, b| {
if let Some(a) = prev {
push_content!(|b| b == CH_VOICED_HALF ||
b == CH_VOICED_FULL ||
b == CH_VOICED_COMBI,
VOICES, line, a, b);
push_content!(|b| b == CH_SEMIVOICED_HALF ||
b == CH_SEMIVOICED_FULL ||
b == CH_SEMIVOICED_COMBI,
SEMIVOICES, line, a, b);
line.push(a);
}
Some(b)
} );
enspace(&line)
}
fn consult(table: &HashMap<char,char>, c: &char) -> char {
match table.get(c) {
None => *c,
Some(x) => *x,
}
}
fn despace(s: &str) -> String {
let s_ = &s.replace(VOICED_WITH_SPACE, VOICED_COMBI);
s_.replace(SEMIVOICED_WITH_SPACE, SEMIVOICED_COMBI)
}
fn enspace(s: &str) -> String {
let s_ = &s.replace(VOICED_COMBI, VOICED_WITH_SPACE);
s_.replace(SEMIVOICED_COMBI, SEMIVOICED_WITH_SPACE)
}
fn replace_marks(vmark: &str, svmark: &str, src: &str) -> String {
lazy_static! {
static ref RE1: Regex = Regex::new(RE_VOICED_MARKS).unwrap();
static ref RE2: Regex = Regex::new(RE_SEMIVOICED_MARKS).unwrap();
}
let s_ = RE1.replace_all(src, vmark);
RE2.replace_all(&s_, svmark)
}
pub fn vsmark2half(s: &str) -> String {
replace_marks(&CH_VOICED_HALF.to_string(),
&CH_SEMIVOICED_HALF.to_string(), s)
}
pub fn vsmark2full(s: &str) -> String {
replace_marks(&CH_VOICED_FULL.to_string(),
&CH_SEMIVOICED_FULL.to_string(), s)
}
pub fn vsmark2combi(s: &str) -> String {
replace_marks(&VOICED_WITH_SPACE, &SEMIVOICED_WITH_SPACE, s)
}
pub fn nowidespace(s: &str) -> String { s.replace("\u{3000}", "\u{20}") }
pub fn space2wide(s: &str) -> String { s.replace("\u{20}", "\u{3000}") }
pub fn nowideyen(s: &str) -> String { s.replace("\u{ffe5}", "\u{a5}") }
pub fn yen2wide(s: &str) -> String { s.replace("\u{a5}", "\u{ffe5}") }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pub_fn_t1() {
assert_eq!("!rust-0;", wide2ascii("!rust-0;"));
assert_eq!("!rust-0;", ascii2wide("!rust-0;"));
assert_eq!("カナ", hira2kata("かな"));
assert_eq!("かな", kata2hira("カナ"));
}
#[test]
fn pub_fn_t2() {
assert_eq!(" ", nowidespace(" "));
assert_eq!(" ", space2wide(" "));
assert_eq!("¥", nowideyen("¥"));
assert_eq!("¥", yen2wide("¥"));
}
#[test]
fn kana_t1() {
assert_eq!(Some(&'\u{30A2}'), HALVES.get(&'\u{FF71}'));
assert_eq!("ガナ", half2full("ガナ"));
assert_eq!("ガナ", half2kana("ガナ"));
assert_eq!("がな", combine("か゛な"));
}
}