golia_pinyin/lib.rs
1#![deny(missing_docs)]
2#![deny(rustdoc::broken_intra_doc_links)]
3
4//! `golia-pinyin` — self-developed Mandarin Pinyin input method engine.
5//!
6//! Engine surface ✓ (segmenter, fuzzy, FST dict, encode, session) +
7//! 919k-entry corpus-derived dict (Unihan + jieba + Leipzig + SUBTLEX) +
8//! L0 user-learning ranking (3-pick auto-pin). The published crate
9//! version stays at `0.1.0` per the publish strategy in lab8-ime ROADMAP
10//! item 35; internal milestone names (v0.2-data, v0.3-l0) refer to data +
11//! feature readiness. See [workspace ROADMAP](https://github.com/goliajp/pinyin/blob/main/ROADMAP.md).
12//!
13//! Sibling library: [`wubi`](https://crates.io/crates/wubi) — same
14//! architectural pattern (PHF static tables, FST main dict, zero-alloc hot
15//! path).
16//!
17//! # Quickstart
18//!
19//! ```no_run
20//! use golia_pinyin::{PinyinEngine, Session};
21//! let engine = PinyinEngine::new();
22//! let mut session = Session::new(&engine);
23//! for c in "zhongguo".chars() {
24//! session.input_char(c);
25//! }
26//! let cands = session.candidates();
27//! assert_eq!(cands.first().map(String::as_str), Some("中国"));
28//! ```
29//!
30//! # Module map
31//! - [`syllable`] — 403 valid Mandarin syllable inventory (PHF set)
32//! - [`fuzzy`] — toggleable fuzzy-pair expansion (`z↔zh` etc.)
33//! - [`segmenter`] — DP segmentation of continuous pinyin strings
34//! - [`dict`] — FST-backed `pinyin → words` lookup with L0 user-learning
35//! - [`encode`] — `char → readings` reverse lookup
36//! - [`engine`] — immutable [`PinyinEngine`] (dict + fuzzy)
37//! - [`session`] — mutable [`Session`] holding the user's input buffer
38//! - [`ranking`] — L0 snapshot type for host-side persistence
39
40pub mod dict;
41pub mod encode;
42pub mod engine;
43pub mod fuzzy;
44pub mod ranking;
45pub mod segmenter;
46pub mod session;
47pub mod syllable;
48
49pub use dict::PinyinDict;
50pub use encode::{char_to_pinyin, covered_char_count};
51pub use engine::PinyinEngine;
52pub use fuzzy::FuzzyConfig;
53pub use ranking::{L0Snapshot, PROMOTE_THRESHOLD};
54pub use segmenter::{Segmentation, segment};
55pub use session::Session;
56pub use syllable::{VALID_SYLLABLES, count as syllable_count, is_valid as is_valid_syllable};