JPreprocess

Struct JPreprocess 

Source
pub struct JPreprocess<T: Tokenizer> { /* private fields */ }

Implementations§

Source§

impl JPreprocess<DefaultTokenizer>

Source

pub fn from_config(config: JPreprocessConfig) -> JPreprocessResult<Self>

👎Deprecated since 0.13.0: Use with_dictionaries instead
  1. This function is deprecated and will be removed in a future version. Use with_dictionaries instead.
  2. Meanwhile, this function will continue to work, but it cannot load CSV-type user dictionaries.

Loads the dictionary from JPreprocessConfig.

This supports importing files and built-in dictionary (needs feature). If you need to import from data, please use with_dictionaries instead.

Source

pub fn with_dictionaries( dictionary: Dictionary, user_dictionary: Option<UserDictionary>, ) -> Self

Creates JPreprocess with provided dictionary data.

§Example 1: Load from file
use jpreprocess::*;

let system = SystemDictionaryConfig::File(path).load()?;
let jpreprocess = JPreprocess::with_dictionaries(system, None);
§Example 2: Load bundled dictionary (This requires a feature to be enabled)
use jpreprocess::{*, kind::*};

let system = SystemDictionaryConfig::Bundled(JPreprocessDictionaryKind::NaistJdic).load()?;
let jpreprocess = JPreprocess::with_dictionaries(system, None);
Source§

impl<T: Tokenizer> JPreprocess<T>

Source

pub fn from_tokenizer(tokenizer: T) -> Self

Creates JPreprocess from provided tokenizer.

Source

pub fn text_to_njd(&self, text: &str) -> JPreprocessResult<NJD>

Tokenize input text and return NJD.

Useful for customizing text processing.

use jpreprocess::*;
use jpreprocess_jpcommon::*;

let system = SystemDictionaryConfig::File(path).load()?;
let jpreprocess = JPreprocess::with_dictionaries(system, None);

let mut njd = jpreprocess.text_to_njd("日本語文を解析し、音声合成エンジンに渡せる形式に変換します.")?;
njd.preprocess();

// Do something with njd

// jpcommon utterance
let utterance = Utterance::from(njd.nodes.as_slice());

// Vec<([phoneme string], [context labels])>
let phoneme_vec = utterance_to_phoneme_vec(&utterance);

assert_eq!(&phoneme_vec[2].0, "i");

// fullcontext label
let fullcontext = overwrapping_phonemes(phoneme_vec);

assert!(fullcontext[2].to_string().starts_with("sil^n-i+h=o"));
Source

pub fn run_frontend(&self, text: &str) -> JPreprocessResult<Vec<String>>

Tokenize a text, preprocess, and return NJD converted to string.

The returned string does not match that of openjtalk. JPreprocess drops orig string and some of the CForm information, which is unnecessary to preprocessing.

If you need these infomation, please raise a feature request as an issue.

Source

pub fn make_label(&self, njd_features: Vec<String>) -> Vec<Label>

Generate jpcommon features from NJD features(returned by run_frontend).

Source

pub fn extract_fullcontext(&self, text: &str) -> JPreprocessResult<Vec<Label>>

Generate jpcommon features from a text.

This is not guaranteed to be same as calling run_frontend and make_label.

Auto Trait Implementations§

§

impl<T> Freeze for JPreprocess<T>
where T: Freeze,

§

impl<T> RefUnwindSafe for JPreprocess<T>
where T: RefUnwindSafe,

§

impl<T> Send for JPreprocess<T>
where T: Send,

§

impl<T> Sync for JPreprocess<T>
where T: Sync,

§

impl<T> Unpin for JPreprocess<T>
where T: Unpin,

§

impl<T> UnwindSafe for JPreprocess<T>
where T: UnwindSafe,

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<T> ErasedDestructor for T
where T: 'static,