use crate::errors::HaqumeiError;
use crate::ffi;
use crate::phoneme::Phoneme;
use crate::prosody::{PitchAccent, ProsodicPhoneme};
use crate::utils::has_odori_chars;
use crate::word_phoneme::WordPhonemeProsody;
use crate::{MecabMorph, OpenJTalk};
use crate::{NjdFeature, WordPhonemeDetail, WordPhonemeMap, WordPhonemePair};
use std::collections::HashMap;
pub(crate) trait WordPhonemeEntry {
fn phonemes_mut(&mut self) -> &mut Vec<Phoneme>;
fn phonemes(&self) -> &[Phoneme];
fn merge_from(&mut self, other: &mut Self);
}
impl WordPhonemeEntry for WordPhonemePair {
fn phonemes_mut(&mut self) -> &mut Vec<Phoneme> {
&mut self.phonemes
}
fn phonemes(&self) -> &[Phoneme] {
&self.phonemes
}
fn merge_from(&mut self, other: &mut Self) {
let text_to_merge = std::mem::take(&mut other.word);
self.word.push_str(&text_to_merge);
}
}
impl WordPhonemeEntry for WordPhonemeDetail {
fn phonemes_mut(&mut self) -> &mut Vec<Phoneme> {
&mut self.phonemes
}
fn phonemes(&self) -> &[Phoneme] {
&self.phonemes
}
fn merge_from(&mut self, other: &mut Self) {
debug_assert!(
other.phonemes.is_empty(),
"phonemes should be empty when merging"
);
let text_to_merge = std::mem::take(&mut other.word);
self.word.push_str(&text_to_merge);
self.mora_count += other.mora_count;
if !other.orig.is_empty() && other.orig.chars().all(|c| c == 'ー') {
let orig_to_merge = std::mem::take(&mut other.orig);
self.orig.push_str(&orig_to_merge);
}
let read_to_merge = std::mem::take(&mut other.read);
self.read.push_str(&read_to_merge);
let pron_to_merge = std::mem::take(&mut other.pron);
self.pron.push_str(&pron_to_merge);
}
}
impl WordPhonemeProsody {
pub(crate) fn merge_from(&mut self, other: &mut Self) {
debug_assert!(
other.phonemes.is_empty(),
"phonemes should be empty when merging"
);
let text_to_merge = std::mem::take(&mut other.word);
self.word.push_str(&text_to_merge);
self.mora_count += other.mora_count;
if !other.orig.is_empty() && other.orig.chars().all(|c| c == 'ー') {
let orig_to_merge = std::mem::take(&mut other.orig);
self.orig.push_str(&orig_to_merge);
}
let read_to_merge = std::mem::take(&mut other.read);
self.read.push_str(&read_to_merge);
let pron_to_merge = std::mem::take(&mut other.pron);
self.pron.push_str(&pron_to_merge);
}
}
pub(crate) trait IntoPhonemeMapItem: Sized {
type Output;
fn word(&self) -> &str;
fn new_ignored(surface: String, is_unknown: bool) -> Self::Output;
fn into_unmatched_remainder(self) -> Self::Output;
fn into_exact_match(self, morph: &MecabMorph) -> Self::Output;
fn into_prefix_match(self, is_unknown_word: bool) -> Self::Output;
fn into_mismatch(self) -> Self::Output;
}
impl IntoPhonemeMapItem for WordPhonemePair {
type Output = WordPhonemeMap;
#[inline]
fn word(&self) -> &str {
&self.word
}
#[inline]
fn new_ignored(surface: String, is_unknown: bool) -> Self::Output {
WordPhonemeMap {
word: surface,
phonemes: vec![Phoneme::Sp],
is_unknown,
is_ignored: true,
}
}
#[inline]
fn into_unmatched_remainder(self) -> Self::Output {
let is_ignored = self.phonemes.is_empty();
WordPhonemeMap {
word: self.word,
phonemes: self.phonemes,
is_unknown: false,
is_ignored,
}
}
#[inline]
fn into_exact_match(self, morph: &MecabMorph) -> Self::Output {
let is_ignored = self.phonemes.is_empty();
let mut phonemes = self.phonemes;
if morph.is_unknown && (phonemes.is_empty() || phonemes == [Phoneme::Pau]) {
phonemes = vec![Phoneme::Unk];
}
WordPhonemeMap {
word: self.word,
phonemes,
is_unknown: morph.is_unknown,
is_ignored,
}
}
#[inline]
fn into_prefix_match(self, is_unknown_word: bool) -> Self::Output {
let mut phonemes = self.phonemes;
let is_ignored = phonemes.is_empty();
if is_unknown_word && (phonemes.is_empty() || phonemes == [Phoneme::Pau]) {
phonemes = vec![Phoneme::Unk];
}
WordPhonemeMap {
word: self.word,
phonemes,
is_unknown: is_unknown_word,
is_ignored,
}
}
#[inline]
fn into_mismatch(self) -> Self::Output {
let is_ignored = self.phonemes.is_empty();
WordPhonemeMap {
word: self.word,
phonemes: self.phonemes,
is_unknown: false,
is_ignored,
}
}
}
impl IntoPhonemeMapItem for WordPhonemeDetail {
type Output = WordPhonemeDetail;
#[inline]
fn word(&self) -> &str {
&self.word
}
#[inline]
fn new_ignored(surface: String, is_unknown: bool) -> Self::Output {
WordPhonemeDetail {
word: surface.clone(),
phonemes: vec![Phoneme::Sp],
features: Vec::new(),
pos: "記号".to_string(),
pos_group1: "空白".to_string(),
pos_group2: "*".to_string(),
pos_group3: "*".to_string(),
ctype: "*".to_string(),
cform: "*".to_string(),
orig: surface.clone(),
read: surface.clone(),
pron: surface,
accent_nucleus: 0,
mora_count: 0,
chain_rule: "*".to_string(),
chain_flag: -1,
is_unknown,
is_ignored: true,
}
}
#[inline]
fn into_unmatched_remainder(mut self) -> Self::Output {
self.is_ignored = self.phonemes.is_empty();
self
}
#[inline]
fn into_exact_match(mut self, morph: &MecabMorph) -> Self::Output {
if morph.is_unknown && (self.phonemes.is_empty() || self.phonemes == [Phoneme::Pau]) {
self.phonemes = vec![Phoneme::Unk];
}
self.is_unknown = morph.is_unknown;
self.is_ignored = self.phonemes.is_empty();
self.features = morph.feature.split(',').map(|s| s.to_string()).collect();
self
}
#[inline]
fn into_prefix_match(mut self, is_unknown_word: bool) -> Self::Output {
if is_unknown_word && (self.phonemes.is_empty() || self.phonemes == [Phoneme::Pau]) {
self.phonemes = vec![Phoneme::Unk];
}
self.is_unknown = is_unknown_word;
self.is_ignored = self.phonemes.is_empty();
self.features = Vec::new();
self
}
#[inline]
fn into_mismatch(mut self) -> Self::Output {
self.is_unknown = false;
self.is_ignored = self.phonemes.is_empty();
self.features = Vec::new();
self
}
}
impl IntoPhonemeMapItem for WordPhonemeProsody {
type Output = WordPhonemeProsody;
#[inline]
fn word(&self) -> &str {
&self.word
}
#[inline]
fn new_ignored(surface: String, is_unknown: bool) -> Self::Output {
WordPhonemeProsody {
word: surface.clone(),
phonemes: vec![ProsodicPhoneme::sp()],
pos: "記号".to_string(),
pos_group1: "空白".to_string(),
pos_group2: "*".to_string(),
pos_group3: "*".to_string(),
ctype: "*".to_string(),
cform: "*".to_string(),
orig: surface.clone(),
read: surface.clone(),
pron: surface,
accent_nucleus: 0,
mora_count: 0,
chain_rule: "*".to_string(),
chain_flag: -1,
is_unknown,
is_ignored: true,
}
}
#[inline]
fn into_unmatched_remainder(mut self) -> Self::Output {
self.is_ignored = self.phonemes.is_empty();
self
}
#[inline]
fn into_exact_match(mut self, morph: &MecabMorph) -> Self::Output {
if morph.is_unknown
&& (self.phonemes.is_empty() || self.phonemes == [ProsodicPhoneme::pau()])
{
self.phonemes = vec![ProsodicPhoneme::unk()];
}
self.is_unknown = morph.is_unknown;
self.is_ignored = self.phonemes.is_empty();
self
}
#[inline]
fn into_prefix_match(mut self, is_unknown_word: bool) -> Self::Output {
if is_unknown_word
&& (self.phonemes.is_empty() || self.phonemes == [ProsodicPhoneme::pau()])
{
self.phonemes = vec![ProsodicPhoneme::unk()];
}
self.is_unknown = is_unknown_word;
self.is_ignored = self.phonemes.is_empty();
self
}
#[inline]
fn into_mismatch(mut self) -> Self::Output {
self.is_unknown = false;
self.is_ignored = self.phonemes.is_empty();
self
}
}
#[inline(always)]
pub(super) fn consume_odori_morphs(
morphs: &[MecabMorph],
morph_idx: usize,
map_word: &str,
) -> usize {
let mut consumed = 1;
if let Some(ahead) = morphs.get(morph_idx + 1)
&& !ahead.is_ignored
&& map_word.ends_with(&ahead.surface)
{
consumed += 1;
}
consumed
}
#[rustfmt::skip]
#[inline(always)]
pub(super) fn consume_mismatched_morphs<T: IntoPhonemeMapItem>(
morphs: &[MecabMorph],
morph_idx: usize,
current_map_word: &str,
remaining_mapping: &[Option<T>],
) -> usize {
let current_morph = &morphs[morph_idx];
let is_digit_mismatch = matches!(
current_morph.surface.as_str(),
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
);
if !is_digit_mismatch {
return 1;
}
let mut digit_morphs_count = 0;
for m in &morphs[morph_idx..] {
if m.is_ignored {
continue;
}
if matches!(
m.surface.as_str(),
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
) {
digit_morphs_count += 1;
} else {
break;
}
}
let mut digit_maps_count: i32 = 0;
if matches!(
current_map_word,
"一" | "二" | "三" | "四" | "五" | "六" | "七" | "八" |"九" |
"十" | "百" | "千" | "万" | "億" | "兆" | "〇" | "零"
) {
digit_maps_count += 1;
}
for map in remaining_mapping.iter().flatten() {
let w = map.word();
if matches!(
w,
"一" | "二" | "三" | "四" | "五" | "六" | "七" | "八" | "九" |
"十" | "百" | "千" | "万" | "億" | "兆" | "〇" | "零"
) {
digit_maps_count += 1;
} else {
break;
}
}
let target_remaining_morphs = digit_maps_count.saturating_sub(1);
if digit_morphs_count <= target_remaining_morphs {
return 0; }
let needed_non_ignored = digit_morphs_count - target_remaining_morphs;
let mut consumed = 0;
let mut counted_non_ignored = 0;
while let Some(m) = morphs.get(morph_idx + consumed) {
if !m.is_ignored {
if !matches!(
m.surface.as_str(),
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
) {
break;
}
counted_non_ignored += 1;
}
consumed += 1;
if counted_non_ignored >= needed_non_ignored {
break;
}
}
consumed
}
impl OpenJTalk {
pub(crate) fn g2p_pairs_inner(
&mut self,
njd_features: &[NjdFeature],
is_non_pause_symbol: fn(&str) -> bool,
) -> Result<Vec<WordPhonemePair>, HaqumeiError> {
let mut mapping: Vec<WordPhonemePair> = njd_features
.iter()
.map(|f| WordPhonemePair {
word: f.string.clone(),
phonemes: Vec::new(),
})
.collect();
self.assign_and_merge_phonemes(njd_features, &mut mapping, is_non_pause_symbol)?;
Ok(mapping)
}
pub(crate) fn g2p_mapping_inner(
&mut self,
njd_features: &[NjdFeature],
is_non_pause_symbol: fn(&str) -> bool,
) -> Result<Vec<WordPhonemeDetail>, HaqumeiError> {
let mut mapping: Vec<WordPhonemeDetail> = njd_features
.iter()
.map(|f| WordPhonemeDetail {
word: f.string.clone(),
phonemes: Vec::new(),
features: Vec::new(),
pos: f.pos.clone(),
pos_group1: f.pos_group1.clone(),
pos_group2: f.pos_group2.clone(),
pos_group3: f.pos_group3.clone(),
ctype: f.ctype.clone(),
cform: f.cform.clone(),
orig: f.orig.clone(),
read: f.read.clone(),
pron: f.pron.clone(),
accent_nucleus: f.acc,
mora_count: f.mora_size,
chain_rule: f.chain_rule.clone(),
chain_flag: f.chain_flag,
is_unknown: false,
is_ignored: false,
})
.collect();
self.assign_and_merge_phonemes(njd_features, &mut mapping, is_non_pause_symbol)?;
Ok(mapping)
}
pub(crate) fn g2p_mapping_prosody_inner(
&mut self,
njd_features: &[NjdFeature],
is_non_pause_symbol: fn(&str) -> bool,
) -> Result<Vec<WordPhonemeProsody>, HaqumeiError> {
let mut mapping: Vec<WordPhonemeProsody> = njd_features
.iter()
.map(|f| WordPhonemeProsody {
word: f.string.clone(),
phonemes: Vec::new(),
pos: f.pos.clone(),
pos_group1: f.pos_group1.clone(),
pos_group2: f.pos_group2.clone(),
pos_group3: f.pos_group3.clone(),
ctype: f.ctype.clone(),
cform: f.cform.clone(),
orig: f.orig.clone(),
read: f.read.clone(),
pron: f.pron.clone(),
accent_nucleus: f.acc,
mora_count: f.mora_size,
chain_rule: f.chain_rule.clone(),
chain_flag: f.chain_flag,
is_unknown: false,
is_ignored: false,
})
.collect();
self.assign_and_merge_prosodic_phonemes(njd_features, &mut mapping, is_non_pause_symbol)?;
Ok(mapping)
}
pub(crate) fn assign_and_merge_phonemes<T: WordPhonemeEntry>(
&mut self,
njd_features: &[NjdFeature],
mapping: &mut Vec<T>,
is_non_pause_symbol: fn(&str) -> bool,
) -> Result<(), HaqumeiError> {
unsafe {
let ptr_to_idx = self.prepare_jpcommon_label_internal(njd_features)?;
let jp = self.jp_common.inner.as_mut();
for (f_idx, f) in njd_features.iter().enumerate() {
let is_pause_pron = f.pron == "、" || f.pron == "?" || f.pron == "!";
if is_pause_pron && !is_non_pause_symbol(&f.string) {
mapping[f_idx].phonemes_mut().push(Phoneme::Pau);
}
}
let mut p = (*jp.label).phoneme_head;
while !p.is_null() {
let s_ptr = (*p).phoneme;
if !s_ptr.is_null() {
let s = if cfg!(debug_assertions) {
Phoneme::try_from_ptr(s_ptr).unwrap()
} else {
Phoneme::from(s_ptr)
};
if s != Phoneme::Pau {
let mora = (*p).up;
if !mora.is_null() {
let word = (*mora).up;
if !word.is_null()
&& let Some(&idx) = ptr_to_idx.get(&(word as usize))
&& let Some(target) = mapping.get_mut(idx)
{
target.phonemes_mut().push(s);
}
}
}
}
p = (*p).next;
}
ffi::JPCommon_refresh(jp);
ffi::NJD_refresh(self.njd.inner.as_mut());
let mut write_idx = 0;
for read_idx in 0..mapping.len() {
let mut should_merge = false;
if read_idx > 0 && mapping[read_idx].phonemes().is_empty() {
let pron = &njd_features[read_idx].pron;
let is_absorbed_long_vowel =
!pron.is_empty() && pron.chars().all(|c| c == 'ー');
if is_absorbed_long_vowel {
let prev_phonemes = mapping[write_idx - 1].phonemes();
let prev_is_pause = prev_phonemes.len() == 1 && prev_phonemes[0] == "pau";
if !prev_is_pause && !prev_phonemes.is_empty() {
should_merge = true;
}
}
}
if should_merge {
let (left, right) = mapping.split_at_mut(read_idx);
left[write_idx - 1].merge_from(&mut right[0]);
continue;
}
if write_idx != read_idx {
mapping.swap(write_idx, read_idx);
}
write_idx += 1;
}
mapping.truncate(write_idx);
Ok(())
}
}
pub(crate) fn assign_and_merge_prosodic_phonemes(
&mut self,
njd_features: &[NjdFeature],
mapping: &mut Vec<WordPhonemeProsody>,
is_non_pause_symbol: fn(&str) -> bool,
) -> Result<(), HaqumeiError> {
let labels = self.extract_fullcontext_labels(njd_features)?;
unsafe {
let ptr_to_idx = self.prepare_jpcommon_label_internal(njd_features)?;
let jp = self.jp_common.inner.as_mut();
for (f_idx, f) in njd_features.iter().enumerate() {
let is_pause_pron = f.pron == "、" || f.pron == "?" || f.pron == "!";
if is_pause_pron && !is_non_pause_symbol(&f.string) {
for c in f.string.chars() {
let marker = match c {
'?' | '?' => ProsodicPhoneme::Interrogative,
'!' | '!' => ProsodicPhoneme::Exclamatory,
_ => ProsodicPhoneme::Pause,
};
mapping[f_idx].phonemes.push(marker);
}
}
}
let check_already_has = |mapping: &[WordPhonemeProsody], target_idx: usize| -> bool {
let end_idx = (target_idx + 3).min(mapping.len());
mapping[target_idx..end_idx].iter().any(|m| {
m.phonemes.iter().any(|p| {
matches!(
p,
ProsodicPhoneme::Interrogative | ProsodicPhoneme::Exclamatory
)
})
})
};
let mut last_target_idx: Option<usize> = None;
let num_labels = labels.len();
let mut p = (*jp.label).phoneme_head;
let mut label_idx = 0;
while label_idx < num_labels {
let label = &labels[label_idx];
let p3 = label.phoneme.c.as_deref().unwrap_or("");
if p3 == "sil" {
if label_idx == num_labels - 1 {
let (is_inter, is_excl) = label
.accent_phrase_prev
.as_ref()
.map(|a| (a.is_interrogative, a.is_exclamatory))
.unwrap_or((false, false));
if (is_inter || is_excl)
&& let Some(target_idx) = last_target_idx
&& !check_already_has(mapping, target_idx)
{
if is_excl {
mapping[target_idx]
.phonemes
.push(ProsodicPhoneme::Exclamatory);
}
if is_inter {
mapping[target_idx]
.phonemes
.push(ProsodicPhoneme::Interrogative);
}
}
}
label_idx += 1;
continue;
}
if p.is_null() {
label_idx += 1;
continue;
}
let mut current_target_idx = None;
let mora = (*p).up;
if !mora.is_null() {
let word = (*mora).up;
if !word.is_null()
&& let Some(&idx) = ptr_to_idx.get(&(word as usize))
{
current_target_idx = Some(idx);
}
}
if current_target_idx.is_some() {
last_target_idx = current_target_idx;
}
let target_idx = match current_target_idx.or(last_target_idx) {
Some(idx) => idx,
None => {
p = (*p).next;
label_idx += 1;
continue;
}
};
let check_already_has = check_already_has(mapping, target_idx);
let target = mapping.get_mut(target_idx).unwrap();
let s_ptr = (*p).phoneme;
let s = if cfg!(debug_assertions) {
Phoneme::try_from_ptr(s_ptr).unwrap()
} else {
Phoneme::from(s_ptr)
};
if s == Phoneme::Pau {
let (is_inter, is_excl) = label
.accent_phrase_prev
.as_ref()
.map(|a| (a.is_interrogative, a.is_exclamatory))
.unwrap_or((false, false));
if (is_inter || is_excl) && !check_already_has {
if is_excl {
target.phonemes.push(ProsodicPhoneme::Exclamatory);
}
if is_inter {
target.phonemes.push(ProsodicPhoneme::Interrogative);
}
}
p = (*p).next;
label_idx += 1;
continue;
}
let f2 = label
.accent_phrase_curr
.as_ref()
.map(|a| a.accent_position as i32)
.unwrap_or(0);
let a2 = label
.mora
.as_ref()
.map(|m| m.position_forward as i32)
.unwrap_or(0);
let is_high = if f2 == 0 {
a2 >= 2 } else if f2 == 1 {
a2 == 1 } else {
a2 >= 2 && a2 <= f2 };
let pitch = if is_high {
PitchAccent::High
} else {
PitchAccent::Low
};
target.phonemes.push(ProsodicPhoneme::Phoneme {
phoneme: s,
pitch: Some(pitch),
});
let a3 = label
.mora
.as_ref()
.map(|m| m.position_backward as i32)
.unwrap_or(-50);
let a2_next = if label_idx + 1 < num_labels {
labels[label_idx + 1]
.mora
.as_ref()
.map(|m| m.position_forward as i32)
.unwrap_or(-50)
} else {
-50
};
if a3 == 1
&& a2_next == 1
&& matches!(
p3,
"a" | "e" | "i" | "o" | "u" | "A" | "E" | "I" | "O" | "U" | "N" | "cl"
)
{
target.phonemes.push(ProsodicPhoneme::AccentPhraseBoundary);
}
p = (*p).next;
label_idx += 1;
}
ffi::JPCommon_refresh(jp);
ffi::NJD_refresh(self.njd.inner.as_mut());
let mut write_idx = 0;
for read_idx in 0..mapping.len() {
let mut should_merge = false;
if read_idx > 0 && mapping[read_idx].phonemes.is_empty() {
let pron = &njd_features[read_idx].pron;
let is_absorbed_long_vowel =
!pron.is_empty() && pron.chars().all(|c| c == 'ー');
if is_absorbed_long_vowel {
let prev_phonemes = &mapping[write_idx - 1].phonemes;
let prev_is_pause =
prev_phonemes.len() == 1 && prev_phonemes[0] == ProsodicPhoneme::pau();
if !prev_is_pause && !prev_phonemes.is_empty() {
should_merge = true;
}
}
}
if should_merge {
let (left, right) = mapping.split_at_mut(read_idx);
left[write_idx - 1].merge_from(&mut right[0]);
continue;
}
if write_idx != read_idx {
mapping.swap(write_idx, read_idx);
}
write_idx += 1;
}
mapping.truncate(write_idx);
Ok(())
}
}
#[inline(always)]
pub(crate) fn make_phoneme_mapping<T: IntoPhonemeMapItem>(
&self,
morphs: Vec<MecabMorph>,
mapping: Vec<T>,
) -> Result<Vec<T::Output>, HaqumeiError> {
let mut result = Vec::with_capacity(morphs.len());
let mut morph_idx = 0;
let mut mapping_options: Vec<Option<T>> = mapping.into_iter().map(Some).collect();
for idx in 0..mapping_options.len() {
while let Some(m) = morphs.get(morph_idx) {
if m.is_ignored {
result.push(T::new_ignored(m.surface.clone(), m.is_unknown));
morph_idx += 1;
} else {
break;
}
}
let map = mapping_options[idx].take().unwrap();
if morph_idx >= morphs.len() {
result.push(map.into_unmatched_remainder());
continue;
}
let morph = &morphs[morph_idx];
if map.word() == morph.surface {
result.push(map.into_exact_match(morph));
morph_idx += 1;
} else if map.word().starts_with(&morph.surface) {
let mut is_unknown_word = false;
let mut matched_len = 0;
let mut pre_ignored = Vec::new();
let mut internal_ignored = Vec::new();
while let Some(inner_morph) = morphs.get(morph_idx) {
if inner_morph.is_ignored {
if matched_len == 0 {
pre_ignored.push(T::new_ignored(
inner_morph.surface.clone(),
inner_morph.is_unknown,
));
} else {
internal_ignored.push(T::new_ignored(
inner_morph.surface.clone(),
inner_morph.is_unknown,
));
}
morph_idx += 1;
continue;
}
let remaining = &map.word()[matched_len..];
if remaining.starts_with(&inner_morph.surface) {
is_unknown_word |= inner_morph.is_unknown;
matched_len += inner_morph.surface.len();
morph_idx += 1;
if matched_len == map.word().len() {
break;
}
} else {
break;
}
}
result.extend(pre_ignored);
result.push(map.into_prefix_match(is_unknown_word));
result.extend(internal_ignored);
} else {
if has_odori_chars(&morph.surface) {
morph_idx += consume_odori_morphs(&morphs, morph_idx, map.word());
} else {
morph_idx += consume_mismatched_morphs(
&morphs,
morph_idx,
map.word(),
&mapping_options[idx + 1..],
);
}
result.push(map.into_mismatch());
}
}
while let Some(m) = morphs.get(morph_idx) {
if m.is_ignored {
result.push(T::new_ignored(m.surface.clone(), m.is_unknown));
}
morph_idx += 1;
}
Ok(result)
}
unsafe fn prepare_jpcommon_label_internal(
&mut self,
features: &[NjdFeature],
) -> Result<HashMap<usize, usize>, HaqumeiError> {
Self::features_to_njd(features, &mut self.njd)?;
let mut ptr_to_idx = HashMap::with_capacity(features.len());
unsafe {
let jp = self.jp_common.inner.as_mut();
let njd = self.njd.inner.as_mut();
ffi::njd2jpcommon(jp, njd);
if !jp.label.is_null() {
ffi::JPCommonLabel_clear(jp.label);
} else {
let ptr = libc::calloc(1, std::mem::size_of::<ffi::JPCommonLabel>());
if ptr.is_null() {
return Err(HaqumeiError::AllocationError("ffi::JPCommonLabel"));
}
jp.label = ptr as *mut ffi::JPCommonLabel;
}
ffi::JPCommonLabel_initialize(jp.label);
let mut node = jp.head;
let mut f_idx = 0;
while !node.is_null() {
let prev_word_tail = (*jp.label).word_tail;
ffi::JPCommonLabel_push_word(
jp.label,
ffi::JPCommonNode_get_pron(node),
ffi::JPCommonNode_get_pos(node),
ffi::JPCommonNode_get_ctype(node),
ffi::JPCommonNode_get_cform(node),
ffi::JPCommonNode_get_acc(node),
ffi::JPCommonNode_get_chain_flag(node),
);
let curr_word_tail = (*jp.label).word_tail;
if prev_word_tail != curr_word_tail && !curr_word_tail.is_null() {
ptr_to_idx.insert(curr_word_tail as usize, f_idx);
}
node = (*node).next;
f_idx += 1;
}
}
Ok(ptr_to_idx)
}
}