Skip to main content

mecab_sys/
tagger.rs

1use super::ffi;
2use super::{Error, Model, Node, NodeCursor};
3
4use bitflags::bitflags;
5
6use std::ffi::CStr;
7use std::ffi::c_char;
8use std::marker::PhantomData;
9use std::ptr::NonNull;
10
11/// A MeCab tagger for analyzing text. It wraps C `mecab_t`.
12///
13/// The lifetime parameter is bound to [`Model`](`Model::new_tagger()`).
14pub struct Tagger<'a> {
15    inner: NonNull<ffi::mecab_t>,
16    _marker: PhantomData<&'a Model>,
17}
18
19impl<'a> Tagger<'a> {
20    /// Returns the raw pointer to the underlying [`mecab_t`](ffi::mecab_t).
21    ///
22    /// The pointer is guaranteed to be [`NonNull`].
23    pub fn as_ptr(&self) -> *mut ffi::mecab_t {
24        self.inner.as_ptr()
25    }
26
27    /// Creates a new [`Tagger`] from a [`Model`].
28    ///
29    /// It wraps `mecab_model_new_tagger()`, the wrapper of `MeCab::Model::createTagger()`.
30    pub fn new(model: &'a Model) -> Result<Self, Error> {
31        unsafe {
32            let inner = ffi::mecab_model_new_tagger(model.inner.as_ptr());
33
34            NonNull::new(inner)
35                .map(|inner| Self {
36                    inner,
37                    _marker: PhantomData,
38                })
39                .ok_or_else(Error::global)
40        }
41    }
42
43    /// Parses the sentence set in the [`Lattice`].
44    ///
45    /// It wraps `mecab_parse_lattice()`, the wrapper of `MeCab::Tagger::parse(MeCab::Lattice *lattice)`.
46    pub fn parse(&self, lattice: &mut LatticeGuard<'_, '_, '_>) -> Result<(), Error> {
47        unsafe {
48            let result = ffi::mecab_parse_lattice(self.as_ptr(), lattice.lattice.as_ptr());
49            if result == 0 {
50                Err(Error::with_lattice(lattice.as_ref()))
51            } else {
52                Ok(())
53            }
54        }
55    }
56}
57
58impl Drop for Tagger<'_> {
59    fn drop(&mut self) {
60        unsafe {
61            ffi::mecab_destroy(self.as_ptr());
62        }
63    }
64}
65
66unsafe impl Send for Tagger<'_> {}
67
68/// A MeCab lattice representing a search space for morphological analysis.
69/// It wraps C `mecab_lattice_t`.
70///
71/// The lifetime parameter is bound to [`Model`](`Model::new_lattice()`).
72pub struct Lattice<'a> {
73    inner: NonNull<ffi::mecab_lattice_t>,
74    _marker: PhantomData<&'a Model>,
75}
76
77impl<'a> Lattice<'a> {
78    /// Returns the raw pointer to the underlying [`mecab_lattice_t`](ffi::mecab_lattice_t).
79    ///
80    /// The pointer is guaranteed to be [`NonNull`].
81    pub fn as_ptr(&self) -> *mut ffi::mecab_lattice_t {
82        self.inner.as_ptr()
83    }
84
85    /// Creates a new [`Lattice`] from a [`Model`].
86    ///
87    /// It wraps `mecab_model_new_lattice()`, the wrapper of `MeCab::Model::createLattice()`.
88    pub fn new(model: &'a Model) -> Result<Self, Error> {
89        unsafe {
90            let inner = ffi::mecab_model_new_lattice(model.inner.as_ptr());
91            NonNull::new(inner)
92                .map(|inner| Self {
93                    inner,
94                    _marker: PhantomData,
95                })
96                .ok_or_else(Error::global)
97        }
98    }
99
100    /// Sets the sentence to be analyzed and returns a [`LatticeGuard`].
101    ///
102    /// It wraps `mecab_lattice_set_sentence2()`, the wrapper of `MeCab::Lattice::set_sentence(sentence, len)`.
103    ///
104    /// Since the analysis result is borrowed from the input `s`, the input should be captured in a
105    /// [`LatticeGuard`] during analysis.
106    pub fn set_sentence<'l, 's>(&'l mut self, s: &'s str) -> LatticeGuard<'a, 'l, 's> {
107        unsafe {
108            ffi::mecab_lattice_set_sentence2(self.as_ptr(), s.as_ptr() as *const c_char, s.len());
109            LatticeGuard {
110                lattice: self,
111                _marker: PhantomData,
112            }
113        }
114    }
115
116    /// Clears the lattice.
117    ///
118    /// It wraps `mecab_lattice_clear()`, the wrapper of `MeCab::Lattice::clear()`.
119    pub fn clear(&mut self) {
120        unsafe { ffi::mecab_lattice_clear(self.as_ptr()) }
121    }
122
123    /// Returns `true` if the lattice is available.
124    ///
125    /// It wraps `mecab_lattice_is_available()`, the wrapper of `MeCab::Lattice::is_available()`.
126    pub fn is_available(&self) -> bool {
127        unsafe {
128            let res = ffi::mecab_lattice_is_available(self.as_ptr());
129            res != 0
130        }
131    }
132
133    /// Returns the CRF normalization factor.
134    ///
135    /// It wraps `mecab_lattice_get_z()`, the wrapper of `MeCab::Lattice::Z()`.
136    pub fn crf_norm_factor(&self) -> f64 {
137        unsafe { ffi::mecab_lattice_get_z(self.as_ptr()) }
138    }
139
140    /// Sets the CRF normalization factor.
141    ///
142    /// It wraps `mecab_lattice_set_z()`, the wrapper of `MeCab::Lattice::set_Z()`.
143    pub fn set_crf_norm_factor(&mut self, z: f64) {
144        unsafe { ffi::mecab_lattice_set_z(self.as_ptr(), z) }
145    }
146
147    /// Returns the temperature parameter.
148    ///
149    /// It wraps `mecab_lattice_get_theta()`, the wrapper of `MeCab::Lattice::theta()`.
150    pub fn temparature(&self) -> f64 {
151        unsafe { ffi::mecab_lattice_get_theta(self.as_ptr()) }
152    }
153
154    /// Sets the temperature parameter.
155    ///
156    /// It wraps `mecab_lattice_set_theta()`, the wrapper of `MeCab::Lattice::set_theta()`.
157    pub fn set_temparature(&mut self, theta: f64) {
158        unsafe { ffi::mecab_lattice_set_theta(self.as_ptr(), theta) }
159    }
160
161    /// Returns the request type flags.
162    ///
163    /// It wraps `mecab_lattice_get_request_type()`, the wrapper of `MeCab::Lattice::request_type()`.
164    pub fn request_type(&self) -> RequestType {
165        unsafe { RequestType::from_raw(ffi::mecab_lattice_get_request_type(self.as_ptr())) }
166    }
167
168    /// Sets the request type flags.
169    ///
170    /// It wraps `mecab_lattice_set_request_type()`, the wrapper of `MeCab::Lattice::set_request_type()`.
171    pub fn set_request_type(&mut self, request_type: RequestType) {
172        unsafe { ffi::mecab_lattice_set_request_type(self.as_ptr(), request_type.as_raw()) }
173    }
174
175    /// Returns the boundary constraint at the given position.
176    ///
177    /// It wraps `mecab_lattice_get_boundary_constraint()`, the wrapper of `MeCab::Lattice::boundary_constraint(pos)`.
178    pub fn boundary_constraint(&self, pos: usize) -> BoundaryConstraintType {
179        unsafe {
180            BoundaryConstraintType::from_raw(ffi::mecab_lattice_get_boundary_constraint(
181                self.as_ptr(),
182                pos,
183            ))
184        }
185    }
186
187    /// Sets the boundary constraint at the given position.
188    ///
189    /// It wraps `mecab_lattice_set_boundary_constraint()`, the wrapper of `MeCab::Lattice::boundary_constraint(pos, type)`.
190    pub fn set_boundary_constraint(&mut self, pos: usize, constraint: BoundaryConstraintType) {
191        unsafe {
192            ffi::mecab_lattice_set_boundary_constraint(self.as_ptr(), pos, constraint.as_raw());
193        }
194    }
195}
196
197impl Drop for Lattice<'_> {
198    fn drop(&mut self) {
199        unsafe {
200            ffi::mecab_lattice_destroy(self.as_ptr());
201        }
202    }
203}
204
205unsafe impl Send for Lattice<'_> {}
206
207/// A guard for a lattice that holds a reference to the sentence string.
208///
209/// It is returned by [`Lattice::set_sentence()`].
210pub struct LatticeGuard<'a, 'l, 's> {
211    lattice: &'l mut Lattice<'a>,
212    _marker: PhantomData<&'s c_char>,
213}
214
215impl<'a> AsRef<Lattice<'a>> for LatticeGuard<'a, '_, '_> {
216    fn as_ref(&self) -> &Lattice<'a> {
217        self.lattice
218    }
219}
220impl<'a> AsMut<Lattice<'a>> for LatticeGuard<'a, '_, '_> {
221    fn as_mut(&mut self) -> &mut Lattice<'a> {
222        self.lattice
223    }
224}
225
226impl<'a, 'l> LatticeGuard<'a, 'l, '_> {
227    /// Returns the underlying [`Lattice`].
228    pub fn into_inner(self) -> &'l mut Lattice<'a> {
229        self.lattice
230    }
231}
232
233impl LatticeGuard<'_, '_, '_> {
234    /// Returns the analysis result as a string.
235    ///
236    /// It wraps `mecab_lattice_tostr()`, the wrapper of `MeCab::Lattice::toString()`.
237    pub fn to_str(&mut self) -> &str {
238        unsafe {
239            let s = ffi::mecab_lattice_tostr(self.lattice.as_ptr());
240            let s = CStr::from_ptr(s);
241            std::str::from_utf8_unchecked(s.to_bytes())
242        }
243    }
244
245    /// Returns a cursor to the BoS node.
246    ///
247    /// It wraps `mecab_lattice_get_bos_node()`, the wrapper of `MeCab::Lattice::bos_node()`.
248    pub fn bos_node(&self) -> NodeCursor<'_> {
249        unsafe {
250            let node = ffi::mecab_lattice_get_bos_node(self.lattice.as_ptr());
251            let curr = Node::from_ptr(node);
252            NodeCursor { curr }
253        }
254    }
255
256    /// Returns a cursor to the EoS node.
257    ///
258    /// It wraps `mecab_lattice_get_eos_node()`, the wrapper of `MeCab::Lattice::eos_node()`.
259    pub fn eos_node(&self) -> NodeCursor<'_> {
260        unsafe {
261            let node = ffi::mecab_lattice_get_eos_node(self.lattice.as_ptr());
262            let curr = Node::from_ptr(node);
263            NodeCursor { curr }
264        }
265    }
266}
267
268impl<'s> LatticeGuard<'_, '_, 's> {
269    /// Returns the sentence string.
270    ///
271    /// It wraps `mecab_lattice_get_sentence()`, the wrapper of `MeCab::Lattice::sentence()`.
272    ///
273    /// The returned value should be equal to the input sentence of [`Lattice::set_sentence()`].
274    pub fn sentence(&self) -> &str {
275        unsafe {
276            let ptr = ffi::mecab_lattice_get_sentence(self.lattice.as_ptr());
277            if ptr.is_null() {
278                ""
279            } else {
280                let slice = std::slice::from_raw_parts(ptr as *const u8, self.sentence_len());
281                std::str::from_utf8_unchecked(slice)
282            }
283        }
284    }
285
286    /// Returns the length of the sentence.
287    ///
288    /// It wraps `mecab_lattice_get_size()`, the wrapper of `MeCab::Lattice::size()`.
289    pub fn sentence_len(&self) -> usize {
290        unsafe { ffi::mecab_lattice_get_size(self.lattice.as_ptr()) }
291    }
292}
293
294bitflags! {
295    /// Request type flags for MeCab analysis.
296    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
297    pub struct RequestType: u8 {
298        /// One best result is obtained (default mode).
299        const ONE_BEST = 1;
300        /// Set this flag if you want to obtain N best results.
301        const NBEST = 1 << 1;
302        /// Set this flag if you want to enable a partial parsing mode.
303        /// When this flag is set, the input sentence needs to be written
304        /// in partial parsing format.
305        const PARTIAL = 1 << 2;
306        /// Set this flag if you want to obtain marginal probabilities.
307        /// Marginal probability is set in `Node::prob()`.
308        /// The parsing speed will get 3-5 times slower than the default mode.
309        const MARGINAL_PROB = 1 << 3;
310        /// Set this flag if you want to obtain alternative results.
311        /// Not implemented.
312        const ALTERNATIVE = 1 << 4;
313        /// When this flag is set, the result linked-list (`Node::next/prev`)
314        /// traverses all nodes in the lattice.
315        const ALL_MORPHS = 1 << 5;
316        /// When this flag is set, tagger internally copies the body of passed
317        /// sentence into internal buffer.
318        const ALLOCATE_SENTENCE = 1 << 6;
319    }
320}
321
322impl Default for RequestType {
323    fn default() -> Self {
324        Self::ONE_BEST
325    }
326}
327
328impl RequestType {
329    /// Returns the raw bits as [`i32`].
330    pub fn as_raw(self) -> i32 {
331        self.bits() as _
332    }
333
334    /// Creates a [`RequestType`] from raw bits.
335    pub fn from_raw(request_type: i32) -> Self {
336        Self::from_bits_truncate(request_type as u8)
337    }
338}
339
340/// Boundary constraint type for MeCab analysis.
341#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
342pub enum BoundaryConstraintType {
343    /// The token boundary is not specified.
344    Any,
345    /// The position is a strong token boundary.
346    Token,
347    /// The position is not a token boundary.
348    InsideToken,
349}
350
351impl BoundaryConstraintType {
352    /// Returns the raw ID.
353    pub fn as_raw(self) -> i32 {
354        match self {
355            Self::Any => 0,
356            Self::Token => 1,
357            Self::InsideToken => 2,
358        }
359    }
360
361    /// Creates a [`BoundaryConstraintType`] from a raw ID.
362    pub fn from_raw(constraint: i32) -> Self {
363        match constraint {
364            1 => Self::Token,
365            2 => Self::InsideToken,
366            _ => Self::Any,
367        }
368    }
369}