sbv2_core 0.1.2

Style-Bert-VITSの推論ライブラリ
Documentation
use std::{fs, time::Instant};

use sbv2_core::tts;
use std::env;

fn main() -> anyhow::Result<()> {
    dotenvy::dotenv_override().ok();
    env_logger::init();
    let text = "眠たい";
    let ident = "aaa";
    let mut tts_holder = tts::TTSModelHolder::new(
        &fs::read(env::var("BERT_MODEL_PATH")?)?,
        &fs::read(env::var("TOKENIZER_PATH")?)?,
    )?;
    tts_holder.load_sbv2file(ident, fs::read(env::var("MODEL_PATH")?)?)?;

    let (bert_ori, phones, tones, lang_ids) = tts_holder.parse_text(text)?;

    let style_vector = tts_holder.get_style_vector(ident, 0, 1.0)?;
    let data = tts_holder.synthesize(
        ident,
        bert_ori.to_owned(),
        phones.clone(),
        tones.clone(),
        lang_ids.clone(),
        style_vector.clone(),
        0.0,
        0.5,
    )?;
    std::fs::write("output.wav", data)?;
    let now = Instant::now();
    for _ in 0..10 {
        tts_holder.parse_text(text)?;
    }
    println!(
        "Time taken(parse_text): {}ms/it",
        now.elapsed().as_millis() / 10
    );
    let now = Instant::now();
    for _ in 0..10 {
        tts_holder.synthesize(
            ident,
            bert_ori.to_owned(),
            phones.clone(),
            tones.clone(),
            lang_ids.clone(),
            style_vector.clone(),
            0.0,
            1.0,
        )?;
    }
    println!(
        "Time taken(synthesize): {}ms/it",
        now.elapsed().as_millis() / 10
    );
    Ok(())
}