1use std::env;
7use std::os::raw::c_int;
8use std::path::{Path, PathBuf};
9
10use crate::constants::{
11 KIWI_BUILD_DEFAULT, KIWI_DIALECT_STANDARD, KIWI_MATCH_ALL_WITH_NORMALIZING,
12};
13use crate::discovery::discover_default_model_path;
14use crate::error::{KiwiError, Result};
15
16#[derive(Debug, Clone)]
18pub struct UserWord {
19 pub word: String,
21 pub tag: String,
23 pub score: f32,
25}
26
27impl UserWord {
28 pub fn new(word: impl Into<String>, tag: impl Into<String>, score: f32) -> Self {
30 Self {
31 word: word.into(),
32 tag: tag.into(),
33 score,
34 }
35 }
36}
37
38#[derive(Debug, Clone, Copy)]
43pub struct AnalyzeOptions {
44 pub top_n: usize,
46 pub match_options: i32,
48 pub open_ending: bool,
50 pub allowed_dialects: i32,
52 pub dialect_cost: f32,
54}
55
56impl Default for AnalyzeOptions {
57 fn default() -> Self {
58 Self {
59 top_n: 1,
60 match_options: KIWI_MATCH_ALL_WITH_NORMALIZING,
61 open_ending: false,
62 allowed_dialects: KIWI_DIALECT_STANDARD,
63 dialect_cost: 3.0,
64 }
65 }
66}
67
68impl AnalyzeOptions {
69 pub fn with_top_n(mut self, top_n: usize) -> Self {
71 self.top_n = top_n;
72 self
73 }
74
75 pub fn with_match_options(mut self, match_options: i32) -> Self {
77 self.match_options = match_options;
78 self
79 }
80
81 pub fn with_open_ending(mut self, open_ending: bool) -> Self {
83 self.open_ending = open_ending;
84 self
85 }
86
87 pub fn with_allowed_dialects(mut self, allowed_dialects: i32) -> Self {
89 self.allowed_dialects = allowed_dialects;
90 self
91 }
92
93 pub fn with_dialect_cost(mut self, dialect_cost: f32) -> Self {
95 self.dialect_cost = dialect_cost;
96 self
97 }
98
99 pub(crate) fn validated_top_n(&self) -> Result<c_int> {
100 if self.top_n == 0 {
101 return Err(KiwiError::InvalidArgument(
102 "AnalyzeOptions.top_n must be >= 1".to_string(),
103 ));
104 }
105 if self.top_n > c_int::MAX as usize {
106 return Err(KiwiError::InvalidArgument(format!(
107 "AnalyzeOptions.top_n must be <= {}",
108 c_int::MAX
109 )));
110 }
111 Ok(self.top_n as c_int)
112 }
113}
114
115#[derive(Debug, Clone)]
117pub struct BuilderConfig {
118 pub model_path: Option<PathBuf>,
120 pub num_threads: i32,
122 pub build_options: i32,
124 pub enabled_dialects: i32,
126 pub typo_cost_threshold: f32,
128}
129
130impl Default for BuilderConfig {
131 fn default() -> Self {
132 Self {
133 model_path: discover_default_model_path(),
134 num_threads: -1,
135 build_options: KIWI_BUILD_DEFAULT,
136 enabled_dialects: KIWI_DIALECT_STANDARD,
137 typo_cost_threshold: 0.0,
138 }
139 }
140}
141
142impl BuilderConfig {
143 pub fn with_model_path(mut self, model_path: impl AsRef<Path>) -> Self {
145 self.model_path = Some(model_path.as_ref().to_path_buf());
146 self
147 }
148
149 pub fn with_num_threads(mut self, num_threads: i32) -> Self {
151 self.num_threads = num_threads;
152 self
153 }
154
155 pub fn with_build_options(mut self, build_options: i32) -> Self {
157 self.build_options = build_options;
158 self
159 }
160
161 pub fn with_enabled_dialects(mut self, enabled_dialects: i32) -> Self {
163 self.enabled_dialects = enabled_dialects;
164 self
165 }
166
167 pub fn with_typo_cost_threshold(mut self, typo_cost_threshold: f32) -> Self {
169 self.typo_cost_threshold = typo_cost_threshold;
170 self
171 }
172}
173
174#[derive(Debug, Clone)]
176pub struct KiwiConfig {
177 pub library_path: Option<PathBuf>,
179 pub builder: BuilderConfig,
181 pub default_analyze_options: AnalyzeOptions,
183 pub user_words: Vec<UserWord>,
185}
186
187impl Default for KiwiConfig {
188 fn default() -> Self {
189 Self {
190 library_path: env::var_os("KIWI_LIBRARY_PATH").map(PathBuf::from),
191 builder: BuilderConfig::default(),
192 default_analyze_options: AnalyzeOptions::default(),
193 user_words: Vec::new(),
194 }
195 }
196}
197
198impl KiwiConfig {
199 pub fn with_library_path(mut self, library_path: impl AsRef<Path>) -> Self {
201 self.library_path = Some(library_path.as_ref().to_path_buf());
202 self
203 }
204
205 pub fn with_model_path(mut self, model_path: impl AsRef<Path>) -> Self {
207 self.builder = self.builder.with_model_path(model_path);
208 self
209 }
210
211 pub fn with_builder(mut self, builder: BuilderConfig) -> Self {
213 self.builder = builder;
214 self
215 }
216
217 pub fn with_default_analyze_options(mut self, options: AnalyzeOptions) -> Self {
219 self.default_analyze_options = options;
220 self
221 }
222
223 pub fn add_user_word(
225 mut self,
226 word: impl Into<String>,
227 tag: impl Into<String>,
228 score: f32,
229 ) -> Self {
230 self.user_words.push(UserWord::new(word, tag, score));
231 self
232 }
233}
234
235#[derive(Debug, Clone)]
237pub struct Token {
238 pub form: String,
240 pub tag: String,
242 pub position: usize,
244 pub length: usize,
246 pub word_position: usize,
248 pub sent_position: usize,
250 pub line_number: usize,
252 pub sub_sent_position: usize,
254 pub score: f32,
256 pub typo_cost: f32,
258 pub typo_form_id: u32,
260 pub paired_token: Option<usize>,
262 pub morpheme_id: Option<u32>,
264 pub tag_id: Option<u8>,
266 pub sense_or_script: Option<u8>,
268 pub dialect: Option<u16>,
270}
271
272#[derive(Debug, Clone)]
274pub struct AnalysisCandidate {
275 pub probability: f32,
277 pub tokens: Vec<Token>,
279}
280
281pub type Analysis = AnalysisCandidate;
283
284#[derive(Debug, Clone)]
286pub struct Sentence {
287 pub text: String,
289 pub start: usize,
291 pub end: usize,
293 pub tokens: Option<Vec<Token>>,
295 pub subs: Option<Vec<Sentence>>,
297}