use core::fmt;
use crate::jisx0213_table::{
DESCRIPTION_TO_CHAR, JISX0213_MENCODE_TO_CHAR, JISX0213_MENCODE_TO_STR,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Resolved {
Char(char),
Multi(&'static str),
}
impl Resolved {
pub fn write_to<W: fmt::Write>(self, w: &mut W) -> fmt::Result {
match self {
Self::Char(c) => w.write_char(c),
Self::Multi(s) => w.write_str(s),
}
}
#[must_use]
pub fn as_char(self) -> Option<char> {
match self {
Self::Char(c) => Some(c),
Self::Multi(_) => None,
}
}
#[must_use]
pub fn utf8_len(self) -> usize {
match self {
Self::Char(c) => c.len_utf8(),
Self::Multi(s) => s.len(),
}
}
}
#[must_use]
pub fn lookup(
existing: Option<char>,
mencode: Option<&str>,
description: &str,
) -> Option<Resolved> {
if let Some(ch) = existing {
return Some(Resolved::Char(ch));
}
if let Some(m) = mencode {
if let Some(&s) = JISX0213_MENCODE_TO_STR.get(m) {
return Some(Resolved::Multi(s));
}
if let Some(&ch) = JISX0213_MENCODE_TO_CHAR.get(m) {
return Some(Resolved::Char(ch));
}
if let Some(ch) = parse_u_plus(m) {
return Some(Resolved::Char(ch));
}
}
if let Some(&ch) = DESCRIPTION_TO_CHAR.get(description) {
return Some(Resolved::Char(ch));
}
let mut chars = description.chars();
if let Some(only) = chars.next()
&& chars.next().is_none()
{
return Some(Resolved::Char(only));
}
None
}
#[must_use]
fn parse_u_plus(mencode: &str) -> Option<char> {
let hex = mencode.strip_prefix("U+")?;
if hex.is_empty() || hex.len() > 6 {
return None;
}
let code = u32::from_str_radix(hex, 16).ok()?;
char::from_u32(code)
}
#[must_use]
pub fn table_sizes() -> (usize, usize, usize) {
(
JISX0213_MENCODE_TO_CHAR.len(),
JISX0213_MENCODE_TO_STR.len(),
DESCRIPTION_TO_CHAR.len(),
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lookup_prefers_existing_ucs_when_already_set() {
assert_eq!(
lookup(Some('\u{1234}'), Some("第3水準1-85-54"), "木+吶のつくり"),
Some(Resolved::Char('\u{1234}'))
);
}
#[test]
fn lookup_via_mencode_table_when_ucs_missing() {
assert_eq!(
lookup(None, Some("第3水準1-85-54"), "木+吶のつくり"),
Some(Resolved::Char('\u{6798}'))
);
}
#[test]
fn lookup_via_combo_table_returns_multi() {
assert_eq!(
lookup(None, Some("第3水準1-4-87"), ""),
Some(Resolved::Multi("\u{304B}\u{309A}"))
);
}
#[test]
fn combo_resolution_writes_both_codepoints() {
let resolved = lookup(None, Some("第3水準1-4-87"), "").expect("combo resolves");
let mut s = String::new();
resolved
.write_to(&mut s)
.expect("write to String never fails");
assert_eq!(s, "\u{304B}\u{309A}");
assert_eq!(s.chars().count(), 2);
}
#[test]
fn lookup_via_u_plus_form() {
assert_eq!(
lookup(None, Some("U+01F5"), "Latin Small Letter G With Acute"),
Some(Resolved::Char('\u{01F5}'))
);
}
#[test]
fn lookup_via_u_plus_max_six_hex_digits() {
assert_eq!(
lookup(None, Some("U+10FFFF"), ""),
Some(Resolved::Char('\u{10FFFF}'))
);
}
#[test]
fn lookup_rejects_u_plus_beyond_seven_hex_digits() {
assert_eq!(lookup(None, Some("U+1234567"), ""), None);
}
#[test]
fn lookup_rejects_u_plus_surrogate() {
assert_eq!(lookup(None, Some("U+D800"), ""), None);
}
#[test]
fn lookup_rejects_u_plus_non_hex() {
assert_eq!(lookup(None, Some("U+GG12"), ""), None);
}
#[test]
fn lookup_rejects_u_plus_without_digits() {
assert_eq!(lookup(None, Some("U+"), ""), None);
}
#[test]
fn lookup_via_description_fallback_when_mencode_absent() {
assert_eq!(lookup(None, None, "〓"), Some(Resolved::Char('\u{3013}')));
}
#[test]
fn lookup_returns_none_when_all_paths_miss() {
assert_eq!(
lookup(None, Some("not-in-any-table"), "unresolved gaiji"),
None
);
}
#[test]
fn lookup_falls_back_to_description_self_when_single_char() {
assert_eq!(
lookup(None, Some("第4水準2-16-1"), "丂"),
Some(Resolved::Char('\u{4E02}'))
);
assert_eq!(lookup(None, None, "畺"), Some(Resolved::Char('\u{757A}')));
assert_eq!(lookup(None, None, "龔"), Some(Resolved::Char('\u{9F94}')));
}
#[test]
fn single_char_fallback_does_not_override_dictionary_hit() {
assert_eq!(lookup(None, None, "〓"), Some(Resolved::Char('\u{3013}')));
}
#[test]
fn single_char_fallback_does_not_fire_for_multi_char_descriptions() {
assert_eq!(lookup(None, None, "未知の字形"), None);
assert_eq!(lookup(None, None, "ab"), None);
}
#[test]
fn mencode_table_covers_the_fixture_gaiji() {
assert_eq!(
JISX0213_MENCODE_TO_CHAR.get("第3水準1-85-54"),
Some(&'\u{6798}')
);
}
#[test]
fn table_sizes_match_jisx0213_2004_spec() {
use crate::jisx0213_table::{
DESCRIPTION_COUNT, JISX0213_COMBO_COUNT, JISX0213_PLANE1_COUNT, JISX0213_PLANE2_COUNT,
};
let (single, combo, description) = table_sizes();
assert_eq!(single, JISX0213_PLANE1_COUNT + JISX0213_PLANE2_COUNT);
assert_eq!(combo, JISX0213_COMBO_COUNT);
assert_eq!(description, DESCRIPTION_COUNT);
assert_eq!(
JISX0213_PLANE1_COUNT, 1893,
"第3水準 must equal the spec count",
);
assert_eq!(
JISX0213_PLANE2_COUNT, 2436,
"第4水準 must equal the spec count",
);
assert_eq!(
JISX0213_COMBO_COUNT, 25,
"combining-sequence cells must equal spec",
);
assert!(
description >= 8_000,
"description-fallback table looks too small ({description}) — \
did the gaiji-chuki extraction drop entries?",
);
}
#[test]
fn description_table_resolves_a_known_dictionary_entry() {
assert_eq!(
lookup(None, None, "木+吶のつくり"),
Some(Resolved::Char('\u{6798}')),
);
}
#[test]
fn description_table_preserves_special_placeholders() {
assert_eq!(lookup(None, None, "〓"), Some(Resolved::Char('\u{3013}')));
assert_eq!(lookup(None, None, "〻"), Some(Resolved::Char('\u{303B}')));
}
#[test]
fn full_jisx0213_table_covers_a_known_plane1_third_tier_kanji() {
assert_eq!(
JISX0213_MENCODE_TO_CHAR.get("第3水準1-85-9"),
Some(&'\u{6567}')
);
}
#[test]
fn full_jisx0213_table_covers_a_known_plane2_fourth_tier_entry() {
assert_eq!(
JISX0213_MENCODE_TO_CHAR.get("第4水準2-1-1"),
Some(&'\u{20089}')
);
}
#[test]
fn resolved_utf8_len_matches_actual_encoding() {
assert_eq!(Resolved::Char('A').utf8_len(), 1);
assert_eq!(Resolved::Char('あ').utf8_len(), 3);
assert_eq!(Resolved::Char('𠂉').utf8_len(), 4);
assert_eq!(Resolved::Multi("\u{304B}\u{309A}").utf8_len(), 6);
}
#[test]
fn resolved_as_char_returns_none_for_combos() {
assert_eq!(Resolved::Char('A').as_char(), Some('A'));
assert_eq!(Resolved::Multi("か゚").as_char(), None);
}
#[test]
fn lookup_is_identity_on_the_ucs_input_when_set() {
assert_eq!(
lookup(Some('あ'), Some("anything"), "anything"),
Some(Resolved::Char('あ'))
);
}
}