mecab_sys/tagger.rs
1use super::ffi;
2use super::{Error, Model, Node, NodeCursor};
3
4use bitflags::bitflags;
5
6use std::ffi::CStr;
7use std::ffi::c_char;
8use std::marker::PhantomData;
9use std::ptr::NonNull;
10
11/// A MeCab tagger for analyzing text. It wraps C `mecab_t`.
12///
13/// The lifetime parameter is bound to [`Model`](`Model::new_tagger()`).
14pub struct Tagger<'a> {
15 inner: NonNull<ffi::mecab_t>,
16 _marker: PhantomData<&'a Model>,
17}
18
19impl<'a> Tagger<'a> {
20 /// Returns the raw pointer to the underlying [`mecab_t`](ffi::mecab_t).
21 ///
22 /// The pointer is guaranteed to be [`NonNull`].
23 pub fn as_ptr(&self) -> *mut ffi::mecab_t {
24 self.inner.as_ptr()
25 }
26
27 /// Creates a new [`Tagger`] from a [`Model`].
28 ///
29 /// It wraps `mecab_model_new_tagger()`, the wrapper of `MeCab::Model::createTagger()`.
30 pub fn new(model: &'a Model) -> Result<Self, Error> {
31 unsafe {
32 let inner = ffi::mecab_model_new_tagger(model.inner.as_ptr());
33
34 NonNull::new(inner)
35 .map(|inner| Self {
36 inner,
37 _marker: PhantomData,
38 })
39 .ok_or_else(Error::global)
40 }
41 }
42
43 /// Parses the sentence set in the [`Lattice`].
44 ///
45 /// It wraps `mecab_parse_lattice()`, the wrapper of `MeCab::Tagger::parse(MeCab::Lattice *lattice)`.
46 pub fn parse(&self, lattice: &mut LatticeGuard<'_, '_, '_>) -> Result<(), Error> {
47 unsafe {
48 let result = ffi::mecab_parse_lattice(self.as_ptr(), lattice.lattice.as_ptr());
49 if result == 0 {
50 Err(Error::with_lattice(lattice.as_ref()))
51 } else {
52 Ok(())
53 }
54 }
55 }
56}
57
58impl Drop for Tagger<'_> {
59 fn drop(&mut self) {
60 unsafe {
61 ffi::mecab_destroy(self.as_ptr());
62 }
63 }
64}
65
66unsafe impl Send for Tagger<'_> {}
67
68/// A MeCab lattice representing a search space for morphological analysis.
69/// It wraps C `mecab_lattice_t`.
70///
71/// The lifetime parameter is bound to [`Model`](`Model::new_lattice()`).
72pub struct Lattice<'a> {
73 inner: NonNull<ffi::mecab_lattice_t>,
74 _marker: PhantomData<&'a Model>,
75}
76
77impl<'a> Lattice<'a> {
78 /// Returns the raw pointer to the underlying [`mecab_lattice_t`](ffi::mecab_lattice_t).
79 ///
80 /// The pointer is guaranteed to be [`NonNull`].
81 pub fn as_ptr(&self) -> *mut ffi::mecab_lattice_t {
82 self.inner.as_ptr()
83 }
84
85 /// Creates a new [`Lattice`] from a [`Model`].
86 ///
87 /// It wraps `mecab_model_new_lattice()`, the wrapper of `MeCab::Model::createLattice()`.
88 pub fn new(model: &'a Model) -> Result<Self, Error> {
89 unsafe {
90 let inner = ffi::mecab_model_new_lattice(model.inner.as_ptr());
91 NonNull::new(inner)
92 .map(|inner| Self {
93 inner,
94 _marker: PhantomData,
95 })
96 .ok_or_else(Error::global)
97 }
98 }
99
100 /// Sets the sentence to be analyzed and returns a [`LatticeGuard`].
101 ///
102 /// It wraps `mecab_lattice_set_sentence2()`, the wrapper of `MeCab::Lattice::set_sentence(sentence, len)`.
103 ///
104 /// Since the analysis result is borrowed from the input `s`, the input should be captured in a
105 /// [`LatticeGuard`] during analysis.
106 pub fn set_sentence<'l, 's>(&'l mut self, s: &'s str) -> LatticeGuard<'a, 'l, 's> {
107 unsafe {
108 ffi::mecab_lattice_set_sentence2(self.as_ptr(), s.as_ptr() as *const c_char, s.len());
109 LatticeGuard {
110 lattice: self,
111 _marker: PhantomData,
112 }
113 }
114 }
115
116 /// Clears the lattice.
117 ///
118 /// It wraps `mecab_lattice_clear()`, the wrapper of `MeCab::Lattice::clear()`.
119 pub fn clear(&mut self) {
120 unsafe { ffi::mecab_lattice_clear(self.as_ptr()) }
121 }
122
123 /// Returns `true` if the lattice is available.
124 ///
125 /// It wraps `mecab_lattice_is_available()`, the wrapper of `MeCab::Lattice::is_available()`.
126 pub fn is_available(&self) -> bool {
127 unsafe {
128 let res = ffi::mecab_lattice_is_available(self.as_ptr());
129 res != 0
130 }
131 }
132
133 /// Returns the CRF normalization factor.
134 ///
135 /// It wraps `mecab_lattice_get_z()`, the wrapper of `MeCab::Lattice::Z()`.
136 pub fn crf_norm_factor(&self) -> f64 {
137 unsafe { ffi::mecab_lattice_get_z(self.as_ptr()) }
138 }
139
140 /// Sets the CRF normalization factor.
141 ///
142 /// It wraps `mecab_lattice_set_z()`, the wrapper of `MeCab::Lattice::set_Z()`.
143 pub fn set_crf_norm_factor(&mut self, z: f64) {
144 unsafe { ffi::mecab_lattice_set_z(self.as_ptr(), z) }
145 }
146
147 /// Returns the temperature parameter.
148 ///
149 /// It wraps `mecab_lattice_get_theta()`, the wrapper of `MeCab::Lattice::theta()`.
150 pub fn temparature(&self) -> f64 {
151 unsafe { ffi::mecab_lattice_get_theta(self.as_ptr()) }
152 }
153
154 /// Sets the temperature parameter.
155 ///
156 /// It wraps `mecab_lattice_set_theta()`, the wrapper of `MeCab::Lattice::set_theta()`.
157 pub fn set_temparature(&mut self, theta: f64) {
158 unsafe { ffi::mecab_lattice_set_theta(self.as_ptr(), theta) }
159 }
160
161 /// Returns the request type flags.
162 ///
163 /// It wraps `mecab_lattice_get_request_type()`, the wrapper of `MeCab::Lattice::request_type()`.
164 pub fn request_type(&self) -> RequestType {
165 unsafe { RequestType::from_raw(ffi::mecab_lattice_get_request_type(self.as_ptr())) }
166 }
167
168 /// Sets the request type flags.
169 ///
170 /// It wraps `mecab_lattice_set_request_type()`, the wrapper of `MeCab::Lattice::set_request_type()`.
171 pub fn set_request_type(&mut self, request_type: RequestType) {
172 unsafe { ffi::mecab_lattice_set_request_type(self.as_ptr(), request_type.as_raw()) }
173 }
174
175 /// Returns the boundary constraint at the given position.
176 ///
177 /// It wraps `mecab_lattice_get_boundary_constraint()`, the wrapper of `MeCab::Lattice::boundary_constraint(pos)`.
178 pub fn boundary_constraint(&self, pos: usize) -> BoundaryConstraintType {
179 unsafe {
180 BoundaryConstraintType::from_raw(ffi::mecab_lattice_get_boundary_constraint(
181 self.as_ptr(),
182 pos,
183 ))
184 }
185 }
186
187 /// Sets the boundary constraint at the given position.
188 ///
189 /// It wraps `mecab_lattice_set_boundary_constraint()`, the wrapper of `MeCab::Lattice::boundary_constraint(pos, type)`.
190 pub fn set_boundary_constraint(&mut self, pos: usize, constraint: BoundaryConstraintType) {
191 unsafe {
192 ffi::mecab_lattice_set_boundary_constraint(self.as_ptr(), pos, constraint.as_raw());
193 }
194 }
195}
196
197impl Drop for Lattice<'_> {
198 fn drop(&mut self) {
199 unsafe {
200 ffi::mecab_lattice_destroy(self.as_ptr());
201 }
202 }
203}
204
205unsafe impl Send for Lattice<'_> {}
206
207/// A guard for a lattice that holds a reference to the sentence string.
208///
209/// It is returned by [`Lattice::set_sentence()`].
210pub struct LatticeGuard<'a, 'l, 's> {
211 lattice: &'l mut Lattice<'a>,
212 _marker: PhantomData<&'s c_char>,
213}
214
215impl<'a> AsRef<Lattice<'a>> for LatticeGuard<'a, '_, '_> {
216 fn as_ref(&self) -> &Lattice<'a> {
217 self.lattice
218 }
219}
220impl<'a> AsMut<Lattice<'a>> for LatticeGuard<'a, '_, '_> {
221 fn as_mut(&mut self) -> &mut Lattice<'a> {
222 self.lattice
223 }
224}
225
226impl<'a, 'l> LatticeGuard<'a, 'l, '_> {
227 /// Returns the underlying [`Lattice`].
228 pub fn into_inner(self) -> &'l mut Lattice<'a> {
229 self.lattice
230 }
231}
232
233impl LatticeGuard<'_, '_, '_> {
234 /// Returns the analysis result as a string.
235 ///
236 /// It wraps `mecab_lattice_tostr()`, the wrapper of `MeCab::Lattice::toString()`.
237 pub fn to_str(&mut self) -> &str {
238 unsafe {
239 let s = ffi::mecab_lattice_tostr(self.lattice.as_ptr());
240 let s = CStr::from_ptr(s);
241 std::str::from_utf8_unchecked(s.to_bytes())
242 }
243 }
244
245 /// Returns a cursor to the BoS node.
246 ///
247 /// It wraps `mecab_lattice_get_bos_node()`, the wrapper of `MeCab::Lattice::bos_node()`.
248 pub fn bos_node(&self) -> NodeCursor<'_> {
249 unsafe {
250 let node = ffi::mecab_lattice_get_bos_node(self.lattice.as_ptr());
251 let curr = Node::from_ptr(node);
252 NodeCursor { curr }
253 }
254 }
255
256 /// Returns a cursor to the EoS node.
257 ///
258 /// It wraps `mecab_lattice_get_eos_node()`, the wrapper of `MeCab::Lattice::eos_node()`.
259 pub fn eos_node(&self) -> NodeCursor<'_> {
260 unsafe {
261 let node = ffi::mecab_lattice_get_eos_node(self.lattice.as_ptr());
262 let curr = Node::from_ptr(node);
263 NodeCursor { curr }
264 }
265 }
266}
267
268impl<'s> LatticeGuard<'_, '_, 's> {
269 /// Returns the sentence string.
270 ///
271 /// It wraps `mecab_lattice_get_sentence()`, the wrapper of `MeCab::Lattice::sentence()`.
272 ///
273 /// The returned value should be equal to the input sentence of [`Lattice::set_sentence()`].
274 pub fn sentence(&self) -> &str {
275 unsafe {
276 let ptr = ffi::mecab_lattice_get_sentence(self.lattice.as_ptr());
277 if ptr.is_null() {
278 ""
279 } else {
280 let slice = std::slice::from_raw_parts(ptr as *const u8, self.sentence_len());
281 std::str::from_utf8_unchecked(slice)
282 }
283 }
284 }
285
286 /// Returns the length of the sentence.
287 ///
288 /// It wraps `mecab_lattice_get_size()`, the wrapper of `MeCab::Lattice::size()`.
289 pub fn sentence_len(&self) -> usize {
290 unsafe { ffi::mecab_lattice_get_size(self.lattice.as_ptr()) }
291 }
292}
293
294bitflags! {
295 /// Request type flags for MeCab analysis.
296 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
297 pub struct RequestType: u8 {
298 /// One best result is obtained (default mode).
299 const ONE_BEST = 1;
300 /// Set this flag if you want to obtain N best results.
301 const NBEST = 1 << 1;
302 /// Set this flag if you want to enable a partial parsing mode.
303 /// When this flag is set, the input sentence needs to be written
304 /// in partial parsing format.
305 const PARTIAL = 1 << 2;
306 /// Set this flag if you want to obtain marginal probabilities.
307 /// Marginal probability is set in `Node::prob()`.
308 /// The parsing speed will get 3-5 times slower than the default mode.
309 const MARGINAL_PROB = 1 << 3;
310 /// Set this flag if you want to obtain alternative results.
311 /// Not implemented.
312 const ALTERNATIVE = 1 << 4;
313 /// When this flag is set, the result linked-list (`Node::next/prev`)
314 /// traverses all nodes in the lattice.
315 const ALL_MORPHS = 1 << 5;
316 /// When this flag is set, tagger internally copies the body of passed
317 /// sentence into internal buffer.
318 const ALLOCATE_SENTENCE = 1 << 6;
319 }
320}
321
322impl Default for RequestType {
323 fn default() -> Self {
324 Self::ONE_BEST
325 }
326}
327
328impl RequestType {
329 /// Returns the raw bits as [`i32`].
330 pub fn as_raw(self) -> i32 {
331 self.bits() as _
332 }
333
334 /// Creates a [`RequestType`] from raw bits.
335 pub fn from_raw(request_type: i32) -> Self {
336 Self::from_bits_truncate(request_type as u8)
337 }
338}
339
340/// Boundary constraint type for MeCab analysis.
341#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
342pub enum BoundaryConstraintType {
343 /// The token boundary is not specified.
344 Any,
345 /// The position is a strong token boundary.
346 Token,
347 /// The position is not a token boundary.
348 InsideToken,
349}
350
351impl BoundaryConstraintType {
352 /// Returns the raw ID.
353 pub fn as_raw(self) -> i32 {
354 match self {
355 Self::Any => 0,
356 Self::Token => 1,
357 Self::InsideToken => 2,
358 }
359 }
360
361 /// Creates a [`BoundaryConstraintType`] from a raw ID.
362 pub fn from_raw(constraint: i32) -> Self {
363 match constraint {
364 1 => Self::Token,
365 2 => Self::InsideToken,
366 _ => Self::Any,
367 }
368 }
369}