[−][src]Crate sudachiclone
ⓘThis code runs with edition 2018
use sudachiclone::prelude::*; let dictionary = Dictionary::new(None, None).unwrap(); let tokenizer = dictionary.create(); // Multi-granular tokenization // using `system_core.dic` or `system_full.dic` version 20190781 // you may not be able to replicate this particular example due to dictionary you use for m in tokenizer.tokenize("国家公務員", &Some(SplitMode::C), None).unwrap() { println!("{}", m.surface()); }; for m in tokenizer.tokenize("国家公務員", &Some(SplitMode::B), None).unwrap() { println!("{}", m.surface()); }; for m in tokenizer.tokenize("国家公務員", &Some(SplitMode::A), None).unwrap() { println!("{}", m.surface()); }; // Morpheme information let m = tokenizer.tokenize("食べ", &Some(SplitMode::A), None).unwrap().get(0).unwrap(); println!("{}", m.surface()); println!("{}", m.dictionary_form()); println!("{}", m.reading_form()); println!("{:?}", m.part_of_speech()); // Normalization println!("{}", tokenizer.tokenize("附属", &Some(SplitMode::A), None).unwrap().get(0).unwrap().normalized_form()); println!("{}", tokenizer.tokenize("SUMMER", &Some(SplitMode::A), None).unwrap().get(0).unwrap().normalized_form()); println!("{}", tokenizer.tokenize("シュミレーション", &Some(SplitMode::A), None).unwrap().get(0).unwrap().normalized_form());
Modules
config | |
darts | |
dictionary | |
dictionary_lib | |
lattice | |
lattice_node | |
morpheme | |
morpheme_list | |
plugin | |
prelude | |
tokenizer | |
utf8_input_text | |
utf8_input_text_builder |