1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79
//! # Vibrato
//!
//! Vibrato is a fast implementation of tokenization (or morphological analysis)
//! based on the viterbi algorithm.
//!
//! ## Examples
//!
//! ```
//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
//! use std::fs::File;
//! use std::io::{BufRead, BufReader};
//!
//! use vibrato::{SystemDictionaryBuilder, Tokenizer};
//!
//! // Loads a set of raw dictionary files
//! let dict = SystemDictionaryBuilder::from_readers(
//! File::open("src/tests/resources/lex.csv")?,
//! File::open("src/tests/resources/matrix.def")?,
//! File::open("src/tests/resources/char.def")?,
//! File::open("src/tests/resources/unk.def")?,
//! )?;
//! // or loads a compiled dictionary
//! // let reader = File::open("path/to/system.dic")?;
//! // let dict = Dictionary::read(reader)?;
//!
//! let tokenizer = vibrato::Tokenizer::new(dict);
//! let mut worker = tokenizer.new_worker();
//!
//! worker.reset_sentence("京都東京都");
//! worker.tokenize();
//! assert_eq!(worker.num_tokens(), 2);
//!
//! let t0 = worker.token(0);
//! assert_eq!(t0.surface(), "京都");
//! assert_eq!(t0.range_char(), 0..2);
//! assert_eq!(t0.range_byte(), 0..6);
//! assert_eq!(t0.feature(), "京都,名詞,固有名詞,地名,一般,*,*,キョウト,京都,*,A,*,*,*,1/5");
//!
//! let t1 = worker.token(1);
//! assert_eq!(t1.surface(), "東京都");
//! assert_eq!(t1.range_char(), 2..5);
//! assert_eq!(t1.range_byte(), 6..15);
//! assert_eq!(t1.feature(), "東京都,名詞,固有名詞,地名,一般,*,*,トウキョウト,東京都,*,B,5/9,*,5/9,*");
//! # Ok(())
//! # }
//! ```
#![deny(missing_docs)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#[cfg(not(any(target_pointer_width = "32", target_pointer_width = "64")))]
compile_error!("`target_pointer_width` must be 32 or 64");
pub mod common;
pub mod dictionary;
pub mod errors;
mod num;
mod sentence;
pub mod token;
pub mod tokenizer;
mod utils;
#[cfg(feature = "train")]
#[cfg_attr(docsrs, doc(cfg(feature = "train")))]
pub mod mecab;
#[cfg(feature = "train")]
#[cfg_attr(docsrs, doc(cfg(feature = "train")))]
pub mod trainer;
#[cfg(all(test, feature = "train"))]
mod test_utils;
#[cfg(test)]
mod tests;
pub use dictionary::{Dictionary, SystemDictionaryBuilder};
pub use tokenizer::Tokenizer;
/// Version number of this library.
pub const VERSION: &str = env!("CARGO_PKG_VERSION");