chinese_telegraph/
lib.rs

1#![no_std]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![deny(missing_docs)]
4#![warn(clippy::missing_docs_in_private_items)]
5
6//! # Chinese Telegraph Code
7//!
8//! This crate provides utilities for converting Unicode Chinese characters to Chinese telegraph codes.
9//!
10//! Chinese telegraph codes are numerical codes used historically for transmitting Chinese text over telegraph systems.
11//! This crate supports both Traditional Chinese (Taiwan) and Simplified Chinese character sets.
12//!
13//! ## Features
14//!
15//! - Convert Chinese characters to telegraph codes
16//! - Support for both Traditional and Simplified Chinese
17//! - `no_std` compatible (with optional `std` feature for string formatting)
18//! - Fast lookups using perfect hash functions
19//!
20//! ## Usage
21//!
22//! ```rust
23//! use chinese_telegraph::{to_telegraph, to_telegraph_string, Table};
24//!
25//! // Look up a Traditional Chinese character
26//! let code = to_telegraph("這", Table::TW);
27//! assert_eq!(code, Some(6638));
28//!
29//! // Look up a Simplified Chinese character
30//! let code = to_telegraph("这", Table::CN);
31//! assert_eq!(code, Some(6638));
32//!
33//! // Search both tables
34//! let code = to_telegraph("一", Table::Both);
35//! assert_eq!(code, Some(1));
36//!
37//! // Format as a 4-digit string (requires std feature)
38//! # #[cfg(feature = "std")]
39//! let formatted = to_telegraph_string("一", Table::Both);
40//! # #[cfg(feature = "std")]
41//! assert_eq!(formatted, Some("0001".to_string()));
42//! ```
43
44/// Simplified Chinese character lookup table.
45mod cn;
46/// Traditional Chinese character lookup table.
47mod tw;
48
49/// Specifies which character table(s) to use for telegraph code lookup.
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51pub enum Table {
52    /// Search both Traditional Chinese (TW) and Simplified Chinese (CN) tables.
53    /// TW table is searched first, then CN table if no match is found.
54    Both,
55    /// Search only the Traditional Chinese (Taiwan) character table.
56    TW,
57    /// Search only the Simplified Chinese character table.
58    CN,
59}
60
61/// Converts a Chinese character to its telegraph code.
62///
63/// # Arguments
64///
65/// * `character` - A string slice containing exactly one Chinese character
66/// * `table` - Which character table(s) to search
67///
68/// # Returns
69///
70/// Returns `Some(code)` if the character is found in the specified table(s),
71/// or `None` if the character is not found or if the input contains more than one character.
72///
73/// # Examples
74///
75/// ```rust
76/// use chinese_telegraph::{to_telegraph, Table};
77///
78/// // Traditional Chinese character
79/// assert_eq!(to_telegraph("這", Table::TW), Some(6638));
80///
81/// // Simplified Chinese character
82/// assert_eq!(to_telegraph("这", Table::CN), Some(6638));
83///
84/// // Character found in both tables
85/// assert_eq!(to_telegraph("一", Table::Both), Some(1));
86///
87/// // Unknown character
88/// assert_eq!(to_telegraph("🦀", Table::Both), None);
89///
90/// // Multiple characters
91/// assert_eq!(to_telegraph("這是", Table::Both), None);
92/// ```
93pub fn to_telegraph(character: &str, table: Table) -> Option<usize> {
94    match table {
95        Table::Both => tw::TW_TABLE
96            .get(character)
97            .or_else(|| cn::CN_TABLE.get(character))
98            .copied(),
99        Table::TW => tw::TW_TABLE.get(character).copied(),
100        Table::CN => cn::CN_TABLE.get(character).copied(),
101    }
102}
103#[cfg(feature = "std")]
104extern crate std;
105
106/// Converts a Chinese character to its telegraph code formatted as a 4-digit string.
107///
108/// This function is only available when the `std` feature is enabled.
109///
110/// # Arguments
111///
112/// * `character` - A string slice containing exactly one Chinese character
113/// * `table` - Which character table(s) to search
114///
115/// # Returns
116///
117/// Returns `Some(formatted_code)` if the character is found, where the code is
118/// formatted as a 4-digit string with leading zeros, or `None` if the character
119/// is not found or if the input contains more than one character.
120///
121/// # Examples
122///
123/// ```rust
124/// use chinese_telegraph::{to_telegraph_string, Table};
125///
126/// assert_eq!(to_telegraph_string("一", Table::Both), Some("0001".to_string()));
127/// assert_eq!(to_telegraph_string("這", Table::TW), Some("6638".to_string()));
128/// assert_eq!(to_telegraph_string("🦀", Table::Both), None);
129/// ```
130#[cfg(feature = "std")]
131#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
132pub fn to_telegraph_string(character: &str, table: Table) -> Option<std::string::String> {
133    to_telegraph(character, table).map(|num| std::format!("{:04}", num))
134}
135
136#[cfg(test)]
137mod tests {
138    use crate::{to_telegraph, to_telegraph_string};
139
140    #[test]
141    fn it_can_look_up_a_tw_character() {
142        let result = to_telegraph("這", crate::Table::TW);
143        assert_eq!(result, Some(6638));
144    }
145
146    #[test]
147    fn it_can_look_up_a_cn_character() {
148        let result = to_telegraph("这", crate::Table::CN);
149        assert_eq!(result, Some(6638));
150    }
151
152    #[test]
153    fn it_can_look_up_a_character_in_both_tables() {
154        let result = to_telegraph("一", crate::Table::Both);
155        assert_eq!(result, Some(1));
156    }
157
158    #[test]
159    fn it_returns_none_for_unknown_characters() {
160        let result = to_telegraph("🦀", crate::Table::Both);
161        assert_eq!(result, None);
162    }
163
164    #[test]
165    fn it_returns_none_for_more_than_one_character() {
166        let result = to_telegraph("這是", crate::Table::Both);
167        assert_eq!(result, None);
168    }
169
170    #[test]
171    fn it_formats_the_number_with_leading_zeros() {
172        let result = to_telegraph_string("一", crate::Table::Both);
173        assert_eq!(result, Some(std::string::ToString::to_string("0001")));
174    }
175}