Skip to main content

vibrato_rkyv/
lib.rs

1//! # Vibrato-rkyv
2//!
3//! Vibrato is a fast implementation of tokenization (or morphological analysis)
4//! based on the viterbi algorithm.
5//!
6//! ## Examples
7//!
8//! ```
9//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
10//! use vibrato_rkyv::{Dictionary, SystemDictionaryBuilder, Tokenizer};
11//!
12//! let lexicon_csv = "京都,4,4,5,京都,名詞,固有名詞,地名,一般,*,*,キョウト,京都,*,A,*,*,*,1/5
13//! 東京都,5,5,9,東京都,名詞,固有名詞,地名,一般,*,*,トウキョウト,東京都,*,B,5/9,*,5/9,*";
14//! let matrix_def = "10 10\n0 4 -5\n0 5 -9";
15//! let char_def = "DEFAULT 0 1 0";
16//! let unk_def = "DEFAULT,0,0,100,DEFAULT,名詞,普通名詞,*,*,*,*,*,*,*,*,*,*,*,*";
17//!
18//!
19//! let dict = SystemDictionaryBuilder::from_readers(
20//!     lexicon_csv.as_bytes(),
21//!     matrix_def.as_bytes(),
22//!     char_def.as_bytes(),
23//!     unk_def.as_bytes(),
24//! )?;
25//!
26//! let tokenizer = Tokenizer::from_inner(dict);
27//! let mut worker = tokenizer.new_worker();
28//!
29//! worker.reset_sentence("京都東京都");
30//! worker.tokenize();
31//! assert_eq!(worker.num_tokens(), 2);
32//!
33//! let t0 = worker.token(0);
34//! assert_eq!(t0.surface(), "京都");
35//! assert_eq!(t0.range_char(), 0..2);
36//! assert_eq!(t0.range_byte(), 0..6);
37//! assert_eq!(t0.feature(), "京都,名詞,固有名詞,地名,一般,*,*,キョウト,京都,*,A,*,*,*,1/5");
38//!
39//! let t1 = worker.token(1);
40//! assert_eq!(t1.surface(), "東京都");
41//! assert_eq!(t1.range_char(), 2..5);
42//! assert_eq!(t1.range_byte(), 6..15);
43//! assert_eq!(t1.feature(), "東京都,名詞,固有名詞,地名,一般,*,*,トウキョウト,東京都,*,B,5/9,*,5/9,*");
44//! # Ok(())
45//! # }
46//! ```
47#![cfg_attr(docsrs, feature(doc_cfg))]
48
49#[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))]
50compile_error!("`target_pointer_width` must be 32 or 64");
51
52pub mod common;
53pub mod dictionary;
54pub mod errors;
55mod num;
56mod sentence;
57pub mod token;
58pub mod tokenizer;
59mod utils;
60
61#[cfg(feature = "legacy")]
62mod legacy;
63
64#[cfg(feature = "train")]
65#[cfg_attr(docsrs, doc(cfg(feature = "train")))]
66pub mod mecab;
67
68#[cfg(feature = "train")]
69#[cfg_attr(docsrs, doc(cfg(feature = "train")))]
70pub mod trainer;
71
72#[cfg(all(test, feature = "train"))]
73mod test_utils;
74#[cfg(test)]
75mod tests;
76
77pub use dictionary::{CacheStrategy, Dictionary, LoadMode, SystemDictionaryBuilder};
78pub use tokenizer::Tokenizer;
79
80/// Version number of this library.
81pub const VERSION: &str = env!("CARGO_PKG_VERSION");