1use crate::native::{KiwiGlobalConfigRaw, KiwiMorphemeRaw, KiwiTokenInfoRaw};
2
3#[derive(Debug, Clone)]
9pub struct PreAnalyzedToken {
10 pub form: String,
12 pub tag: String,
14 pub begin: Option<usize>,
16 pub end: Option<usize>,
18}
19
20impl PreAnalyzedToken {
21 pub fn new(form: impl Into<String>, tag: impl Into<String>) -> Self {
23 Self {
24 form: form.into(),
25 tag: tag.into(),
26 begin: None,
27 end: None,
28 }
29 }
30
31 pub fn with_span(mut self, begin: usize, end: usize) -> Self {
33 self.begin = Some(begin);
34 self.end = Some(end);
35 self
36 }
37}
38
39#[derive(Debug, Clone, Copy)]
43pub struct SentenceBoundary {
44 pub begin: usize,
46 pub end: usize,
48}
49
50#[derive(Debug, Clone, Copy)]
52pub struct SimilarityPair {
53 pub id: u32,
55 pub score: f32,
57}
58
59#[derive(Debug, Clone, Copy)]
64pub struct TokenInfo {
65 pub chr_position: u32,
67 pub word_position: u32,
69 pub sent_position: u32,
71 pub line_number: u32,
73 pub length: u16,
75 pub tag: u8,
77 pub sense_or_script: u8,
79 pub score: f32,
81 pub typo_cost: f32,
83 pub typo_form_id: u32,
85 pub paired_token: u32,
87 pub sub_sent_position: u32,
89 pub dialect: u16,
91}
92
93impl From<KiwiTokenInfoRaw> for TokenInfo {
94 fn from(value: KiwiTokenInfoRaw) -> Self {
95 Self {
96 chr_position: value.chr_position,
97 word_position: value.word_position,
98 sent_position: value.sent_position,
99 line_number: value.line_number,
100 length: value.length,
101 tag: value.tag,
102 sense_or_script: value.sense_or_script,
103 score: value.score,
104 typo_cost: value.typo_cost,
105 typo_form_id: value.typo_form_id,
106 paired_token: value.paired_token,
107 sub_sent_position: value.sub_sent_position,
108 dialect: value.dialect,
109 }
110 }
111}
112
113#[derive(Debug, Clone)]
115pub struct ExtractedWord {
116 pub form: String,
118 pub score: f32,
120 pub frequency: i32,
122 pub pos_score: f32,
124}
125
126#[derive(Debug, Clone, Copy)]
128pub struct MorphemeInfo {
129 pub tag: u8,
131 pub sense_id: u8,
133 pub user_score: f32,
135 pub lm_morpheme_id: u32,
137 pub orig_morpheme_id: u32,
139 pub dialect: u16,
141}
142
143impl From<KiwiMorphemeRaw> for MorphemeInfo {
144 fn from(value: KiwiMorphemeRaw) -> Self {
145 Self {
146 tag: value.tag,
147 sense_id: value.sense_id,
148 user_score: value.user_score,
149 lm_morpheme_id: value.lm_morpheme_id,
150 orig_morpheme_id: value.orig_morpheme_id,
151 dialect: value.dialect,
152 }
153 }
154}
155
156#[derive(Debug, Clone)]
158pub struct MorphemeSense {
159 pub morph_id: u32,
161 pub form: String,
163 pub tag: String,
165 pub sense_id: u8,
167 pub dialect: u16,
169}
170
171#[derive(Debug, Clone, Copy)]
173pub struct GlobalConfig {
174 pub integrate_allomorph: bool,
176 pub cut_off_threshold: f32,
178 pub unk_form_score_scale: f32,
180 pub unk_form_score_bias: f32,
182 pub space_penalty: f32,
184 pub typo_cost_weight: f32,
186 pub max_unk_form_size: u32,
188 pub space_tolerance: u32,
190}
191
192impl Default for GlobalConfig {
193 fn default() -> Self {
194 KiwiGlobalConfigRaw::default().into()
195 }
196}
197
198impl From<KiwiGlobalConfigRaw> for GlobalConfig {
199 fn from(value: KiwiGlobalConfigRaw) -> Self {
200 Self {
201 integrate_allomorph: value.integrate_allomorph != 0,
202 cut_off_threshold: value.cut_off_threshold,
203 unk_form_score_scale: value.unk_form_score_scale,
204 unk_form_score_bias: value.unk_form_score_bias,
205 space_penalty: value.space_penalty,
206 typo_cost_weight: value.typo_cost_weight,
207 max_unk_form_size: value.max_unk_form_size,
208 space_tolerance: value.space_tolerance,
209 }
210 }
211}
212
213impl From<GlobalConfig> for KiwiGlobalConfigRaw {
214 fn from(value: GlobalConfig) -> Self {
215 Self {
216 integrate_allomorph: if value.integrate_allomorph { 1 } else { 0 },
217 cut_off_threshold: value.cut_off_threshold,
218 unk_form_score_scale: value.unk_form_score_scale,
219 unk_form_score_bias: value.unk_form_score_bias,
220 space_penalty: value.space_penalty,
221 typo_cost_weight: value.typo_cost_weight,
222 max_unk_form_size: value.max_unk_form_size,
223 space_tolerance: value.space_tolerance,
224 }
225 }
226}