#![doc(html_root_url = "https://docs.rs/kanji/2.0.0")]
use std::char;
use std::collections::HashMap;
use std::fmt;
#[cfg(feature = "serde")]
use serde::de::{Error, Unexpected, Visitor};
#[cfg(feature = "serde")]
use serde::{Deserialize, Deserializer, Serialize, Serializer};
pub mod exam_lists;
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)]
pub struct Kanji(char);
impl Kanji {
pub fn new(c: char) -> Option<Kanji> {
if is_kanji(c) {
Some(Kanji(c))
} else {
None
}
}
pub fn get(&self) -> char {
self.0
}
}
impl fmt::Display for Kanji {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.get())
}
}
#[cfg(feature = "serde")]
impl Serialize for Kanji {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_char(self.0)
}
}
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for Kanji {
fn deserialize<D>(deserializer: D) -> Result<Kanji, D::Error>
where
D: Deserializer<'de>,
{
Ok(deserializer.deserialize_char(KanjiVisitor)?)
}
}
#[cfg(feature = "serde")]
struct KanjiVisitor;
#[cfg(feature = "serde")]
impl<'de> Visitor<'de> for KanjiVisitor {
type Value = Kanji;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a character in the legal UTF8 range")
}
fn visit_char<E: Error>(self, v: char) -> Result<Kanji, E> {
Kanji::new(v).ok_or(Error::invalid_value(Unexpected::Char(v), &self))
}
fn visit_str<E: Error>(self, v: &str) -> Result<Kanji, E> {
let mut iter = v.chars();
match (iter.next(), iter.next()) {
(Some(c), None) => self.visit_char(c),
_ => Err(Error::invalid_value(Unexpected::Str(v), &self)),
}
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)]
pub struct Hiragana(char);
impl Hiragana {
pub fn new(c: char) -> Option<Hiragana> {
if is_hiragana(c) {
Some(Hiragana(c))
} else {
None
}
}
pub fn get(&self) -> char {
self.0
}
}
impl fmt::Display for Hiragana {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.get())
}
}
#[cfg(feature = "serde")]
impl Serialize for Hiragana {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_char(self.0)
}
}
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for Hiragana {
fn deserialize<D>(deserializer: D) -> Result<Hiragana, D::Error>
where
D: Deserializer<'de>,
{
Ok(deserializer.deserialize_char(HiraganaVisitor)?)
}
}
#[cfg(feature = "serde")]
struct HiraganaVisitor;
#[cfg(feature = "serde")]
impl<'de> Visitor<'de> for HiraganaVisitor {
type Value = Hiragana;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a character in the legal UTF8 range")
}
fn visit_char<E: Error>(self, v: char) -> Result<Hiragana, E> {
Hiragana::new(v).ok_or(Error::invalid_value(Unexpected::Char(v), &self))
}
fn visit_str<E: Error>(self, v: &str) -> Result<Hiragana, E> {
let mut iter = v.chars();
match (iter.next(), iter.next()) {
(Some(c), None) => self.visit_char(c),
_ => Err(Error::invalid_value(Unexpected::Str(v), &self)),
}
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)]
pub struct Katakana(char);
impl Katakana {
pub fn new(c: char) -> Option<Katakana> {
if is_katakana(c) {
Some(Katakana(c))
} else {
None
}
}
pub fn get(&self) -> char {
self.0
}
}
impl fmt::Display for Katakana {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.get())
}
}
#[cfg(feature = "serde")]
impl Serialize for Katakana {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_char(self.0)
}
}
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for Katakana {
fn deserialize<D>(deserializer: D) -> Result<Katakana, D::Error>
where
D: Deserializer<'de>,
{
Ok(deserializer.deserialize_char(KatakanaVisitor)?)
}
}
#[cfg(feature = "serde")]
struct KatakanaVisitor;
#[cfg(feature = "serde")]
impl<'de> Visitor<'de> for KatakanaVisitor {
type Value = Katakana;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a character in the legal UTF8 range")
}
fn visit_char<E: Error>(self, v: char) -> Result<Katakana, E> {
Katakana::new(v).ok_or(Error::invalid_value(Unexpected::Char(v), &self))
}
fn visit_str<E: Error>(self, v: &str) -> Result<Katakana, E> {
let mut iter = v.chars();
match (iter.next(), iter.next()) {
(Some(c), None) => self.visit_char(c),
_ => Err(Error::invalid_value(Unexpected::Str(v), &self)),
}
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)]
pub struct Punctuation(char);
impl Punctuation {
pub fn new(c: char) -> Option<Punctuation> {
if is_japanese_punct(c) {
Some(Punctuation(c))
} else {
None
}
}
pub fn get(&self) -> char {
self.0
}
}
impl fmt::Display for Punctuation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.get())
}
}
#[cfg(feature = "serde")]
impl Serialize for Punctuation {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_char(self.0)
}
}
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for Punctuation {
fn deserialize<D>(deserializer: D) -> Result<Punctuation, D::Error>
where
D: Deserializer<'de>,
{
Ok(deserializer.deserialize_char(PunctuationVisitor)?)
}
}
#[cfg(feature = "serde")]
struct PunctuationVisitor;
#[cfg(feature = "serde")]
impl<'de> Visitor<'de> for PunctuationVisitor {
type Value = Punctuation;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a character in the legal UTF8 range")
}
fn visit_char<E: Error>(self, v: char) -> Result<Punctuation, E> {
Punctuation::new(v).ok_or(Error::invalid_value(Unexpected::Char(v), &self))
}
fn visit_str<E: Error>(self, v: &str) -> Result<Punctuation, E> {
let mut iter = v.chars();
match (iter.next(), iter.next()) {
(Some(c), None) => self.visit_char(c),
_ => Err(Error::invalid_value(Unexpected::Str(v), &self)),
}
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)]
pub struct AlphaNum(char);
impl AlphaNum {
pub fn new(c: char) -> Option<AlphaNum> {
if is_alphanum(c) {
Some(AlphaNum(c))
} else {
None
}
}
pub fn get(&self) -> char {
self.0
}
}
impl fmt::Display for AlphaNum {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.get())
}
}
#[cfg(feature = "serde")]
impl Serialize for AlphaNum {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_char(self.0)
}
}
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for AlphaNum {
fn deserialize<D>(deserializer: D) -> Result<AlphaNum, D::Error>
where
D: Deserializer<'de>,
{
Ok(deserializer.deserialize_char(AlphaNumVisitor)?)
}
}
#[cfg(feature = "serde")]
struct AlphaNumVisitor;
#[cfg(feature = "serde")]
impl<'de> Visitor<'de> for AlphaNumVisitor {
type Value = AlphaNum;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a character in the legal UTF8 range")
}
fn visit_char<E: Error>(self, v: char) -> Result<AlphaNum, E> {
AlphaNum::new(v).ok_or(Error::invalid_value(Unexpected::Char(v), &self))
}
fn visit_str<E: Error>(self, v: &str) -> Result<AlphaNum, E> {
let mut iter = v.chars();
match (iter.next(), iter.next()) {
(Some(c), None) => self.visit_char(c),
_ => Err(Error::invalid_value(Unexpected::Str(v), &self)),
}
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)]
pub struct ASCII(char);
impl ASCII {
pub fn new(c: char) -> Option<ASCII> {
if char::is_ascii(&c) {
Some(ASCII(c))
} else {
None
}
}
pub fn get(&self) -> char {
self.0
}
}
impl fmt::Display for ASCII {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.get())
}
}
#[cfg(feature = "serde")]
impl Serialize for ASCII {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_char(self.0)
}
}
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for ASCII {
fn deserialize<D>(deserializer: D) -> Result<ASCII, D::Error>
where
D: Deserializer<'de>,
{
Ok(deserializer.deserialize_char(ASCIIVisitor)?)
}
}
#[cfg(feature = "serde")]
struct ASCIIVisitor;
#[cfg(feature = "serde")]
impl<'de> Visitor<'de> for ASCIIVisitor {
type Value = ASCII;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a character in the legal UTF8 range")
}
fn visit_char<E: Error>(self, v: char) -> Result<ASCII, E> {
ASCII::new(v).ok_or(Error::invalid_value(Unexpected::Char(v), &self))
}
fn visit_str<E: Error>(self, v: &str) -> Result<ASCII, E> {
let mut iter = v.chars();
match (iter.next(), iter.next()) {
(Some(c), None) => self.visit_char(c),
_ => Err(Error::invalid_value(Unexpected::Str(v), &self)),
}
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)]
pub enum Character {
Kanji(Kanji),
Hiragana(Hiragana),
Katakana(Katakana),
Punctuation(Punctuation),
AlphaNum(AlphaNum),
ASCII(ASCII),
Other(char),
}
impl Character {
pub fn new(c: char) -> Character {
Kanji::new(c)
.map(Character::Kanji)
.or_else(|| Hiragana::new(c).map(Character::Hiragana))
.or_else(|| Katakana::new(c).map(Character::Katakana))
.or_else(|| Punctuation::new(c).map(Character::Punctuation))
.or_else(|| AlphaNum::new(c).map(Character::AlphaNum))
.or_else(|| ASCII::new(c).map(Character::ASCII))
.unwrap_or_else(|| Character::Other(c))
}
pub fn kanji(&self) -> Option<Kanji> {
match self {
Character::Kanji(k) => Some(*k),
_ => None,
}
}
pub fn get(&self) -> char {
match self {
Character::Kanji(c) => c.get(),
Character::Hiragana(c) => c.get(),
Character::Katakana(c) => c.get(),
Character::Punctuation(c) => c.get(),
Character::AlphaNum(c) => c.get(),
Character::ASCII(c) => c.get(),
Character::Other(c) => *c,
}
}
}
impl fmt::Display for Character {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.get())
}
}
#[cfg(feature = "serde")]
impl Serialize for Character {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_char(self.get())
}
}
#[cfg(feature = "serde")]
impl<'de> Deserialize<'de> for Character {
fn deserialize<D>(deserializer: D) -> Result<Character, D::Error>
where
D: Deserializer<'de>,
{
Ok(deserializer.deserialize_char(CharacterVisitor)?)
}
}
#[cfg(feature = "serde")]
struct CharacterVisitor;
#[cfg(feature = "serde")]
impl<'de> Visitor<'de> for CharacterVisitor {
type Value = Character;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(formatter, "a character in the legal UTF8 range")
}
fn visit_char<E: Error>(self, v: char) -> Result<Character, E> {
Ok(Character::new(v))
}
fn visit_str<E: Error>(self, v: &str) -> Result<Character, E> {
let mut iter = v.chars();
match (iter.next(), iter.next()) {
(Some(c), None) => self.visit_char(c),
_ => Err(Error::invalid_value(Unexpected::Str(v), &self)),
}
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)]
pub enum Level {
Ten,
Nine,
Eight,
Seven,
Six,
Five,
Four,
Three,
PreTwo,
Two,
PreOne,
One,
}
impl fmt::Display for Level {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
match self {
Level::Ten => write!(f, "十"),
Level::Nine => write!(f, "九"),
Level::Eight => write!(f, "八"),
Level::Seven => write!(f, "七"),
Level::Six => write!(f, "六"),
Level::Five => write!(f, "五"),
Level::Four => write!(f, "四"),
Level::Three => write!(f, "三"),
Level::PreTwo => write!(f, "準二"),
Level::Two => write!(f, "二"),
Level::PreOne => write!(f, "準一"),
Level::One => write!(f, "一"),
}
}
}
pub fn is_kanji(c: char) -> bool {
(c >= '\u{4e00}' && c <= '\u{9ffc}') || (c >= '\u{f900}' && c <= '\u{faff}') || (c >= '\u{3400}' && c <= '\u{4dbf}') || (c >= '\u{20000}' && c <= '\u{2a6dd}') || (c >= '\u{2a700}' && c <= '\u{2b734}') || (c >= '\u{2b740}' && c <= '\u{2b81d}') || (c >= '\u{2b820}' && c <= '\u{2cea1}') || (c >= '\u{2ceb0}' && c <= '\u{2ebe0}') || (c >= '\u{30000}' && c <= '\u{3134a}') }
pub fn is_hiragana(c: char) -> bool {
c >= '\u{3041}' && c <= '\u{309f}'
}
pub fn is_katakana(c: char) -> bool {
c >= '\u{30a0}' && c <= '\u{30ff}'
}
pub fn is_japanese_punct(c: char) -> bool {
c >= '\u{3000}' && c <= '\u{303f}'
}
pub fn is_alphanum(c: char) -> bool {
c >= '\u{ff01}' && c <= '\u{ff5e}'
}
pub fn all_kanji() -> String {
let mut s = String::with_capacity(62967); (0x4e00..=0x9ffc)
.filter_map(char::from_u32)
.for_each(|c| s.push(c));
s
}
pub fn level_table() -> HashMap<Kanji, Level> {
let pairs = vec![
(exam_lists::LEVEL_10, Level::Ten),
(exam_lists::LEVEL_09, Level::Nine),
(exam_lists::LEVEL_08, Level::Eight),
(exam_lists::LEVEL_07, Level::Seven),
(exam_lists::LEVEL_06, Level::Six),
(exam_lists::LEVEL_05, Level::Five),
(exam_lists::LEVEL_04, Level::Four),
(exam_lists::LEVEL_03, Level::Three),
(exam_lists::LEVEL_02_PRE, Level::PreTwo),
(exam_lists::LEVEL_02, Level::Two),
(exam_lists::LEVEL_01, Level::One),
(exam_lists::LEVEL_01_PRE, Level::PreOne),
];
let mut hm = HashMap::new();
pairs.iter().for_each(|(c, l)| {
c.chars().filter_map(Kanji::new).for_each(|k| {
hm.insert(k, *l);
});
});
hm
}
pub fn kanji_counts(s: &str, levels: &HashMap<Kanji, Level>) -> HashMap<Level, u32> {
let mut counts: HashMap<Level, u32> = HashMap::new();
s.chars()
.filter_map(Kanji::new)
.filter_map(|k| levels.get(&k))
.for_each(|&l| {
let counter = counts.entry(l).or_insert(0);
*counter += 1;
});
counts
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn unicode_ranges() {
let ks = all_kanji();
assert_eq!(20989, ks.chars().count());
assert_eq!(62967, ks.len()); }
#[test]
fn all_kanjiable() {
assert!(exam_lists::LEVEL_10.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_09.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_08.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_07.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_06.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_05.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_04.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_03.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_02_PRE.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_02.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_01_PRE.chars().all(|c| is_kanji(c)));
assert!(exam_lists::LEVEL_01.chars().all(|c| is_kanji(c)));
}
#[test]
fn sane_overwrite() {
let k = Kanji::new('氣').unwrap();
let m = level_table();
assert_eq!(Some(&Level::PreOne), m.get(&k))
}
#[test]
fn lookup_map_length() {
let m = level_table();
assert_eq!(5906, m.len());
}
}