Skip to main content

mecab_sys/
node.rs

1use super::ffi;
2
3use std::ffi::CStr;
4use std::marker::PhantomData;
5use std::ptr::NonNull;
6
7/// A single node (morpheme) in the MeCab lattice. It wraps C `mecab_node_t`.
8///
9/// The lifetime parameter is bound to
10/// [`Lattice` and the input sentence](`crate::LatticeGuard::bos_node()`).
11#[derive(Clone, Copy)]
12pub struct Node<'a> {
13    inner: NonNull<ffi::mecab_node_t>,
14    _marker: PhantomData<&'a ffi::mecab_node_t>,
15}
16
17impl<'a> Node<'a> {
18    /// Returns the raw pointer to the underlying [`mecab_node_t`](ffi::mecab_node_t).
19    pub fn as_ptr(&self) -> *mut ffi::mecab_node_t {
20        self.inner.as_ptr()
21    }
22
23    pub(crate) fn from_ptr(inner: *mut ffi::mecab_node_t) -> Option<Self> {
24        NonNull::new(inner).map(|inner| Node {
25            inner,
26            _marker: PhantomData,
27        })
28    }
29
30    /// Returns the next node in the same lattice.
31    ///
32    /// It wraps `mecab_node_t::next`.
33    ///
34    /// `None` is likely to imply that `self` is [EoS](`NodeKind`).
35    pub fn next(self) -> Option<Self> {
36        unsafe {
37            let node = (*self.as_ptr()).next;
38            Node::from_ptr(node)
39        }
40    }
41
42    /// Returns the previous node in the same lattice.
43    ///
44    /// It wraps `mecab_node_t::prev`.
45    ///
46    /// `None` is likely to imply that `self` is [BoS](`NodeKind`).
47    pub fn prev(self) -> Option<Self> {
48        unsafe {
49            let node = (*self.as_ptr()).prev;
50            Node::from_ptr(node)
51        }
52    }
53
54    /// Returns the unique node ID.
55    ///
56    /// It wraps `mecab_node_t::id`.
57    pub fn id(self) -> u32 {
58        unsafe { (*self.as_ptr()).id }
59    }
60
61    /// Returns the feature string of this node.
62    ///
63    /// It wraps `mecab_node_t::feature`.
64    pub fn feature(self) -> &'a str {
65        unsafe {
66            let ptr = (*self.as_ptr()).feature;
67            let s = CStr::from_ptr(ptr);
68            std::str::from_utf8_unchecked(s.to_bytes())
69        }
70    }
71
72    /// Returns the surface string of this node.
73    ///
74    /// It wraps `mecab_node_t::surface` and `mecab_node_t::length`.
75    pub fn surface(self) -> &'a str {
76        unsafe {
77            let ptr = (*self.as_ptr()).surface;
78            let len = (*self.as_ptr()).length;
79
80            std::str::from_utf8_unchecked(std::slice::from_raw_parts(
81                ptr as *const u8,
82                len as usize,
83            ))
84        }
85    }
86
87    /// Returns the status of this node.
88    ///
89    /// It wraps `mecab_node_t::stat`.
90    pub fn kind(self) -> NodeKind {
91        use ffi::{MECAB_BOS_NODE, MECAB_EON_NODE, MECAB_EOS_NODE, MECAB_NOR_NODE, MECAB_UNK_NODE};
92
93        unsafe {
94            let kind = (*self.as_ptr()).stat as u32;
95            if kind == MECAB_BOS_NODE {
96                NodeKind::Bos
97            } else if kind == MECAB_EOS_NODE {
98                NodeKind::Eos
99            } else if kind == MECAB_UNK_NODE {
100                NodeKind::Unk
101            } else if kind == MECAB_EON_NODE {
102                NodeKind::EoNbest
103            } else {
104                debug_assert_eq!(kind, MECAB_NOR_NODE);
105                NodeKind::Normal
106            }
107        }
108    }
109
110    /// Returns the left attribute ID.
111    ///
112    /// It wraps `mecab_node_t::lcAttr`.
113    pub fn lc_attr(self) -> LcAttr {
114        unsafe { LcAttr((*self.as_ptr()).lcAttr) }
115    }
116
117    /// Returns the right attribute ID.
118    ///
119    /// It wraps `mecab_node_t::rcAttr`.
120    pub fn rc_attr(self) -> RcAttr {
121        unsafe { RcAttr((*self.as_ptr()).rcAttr) }
122    }
123
124    /// Returns the unique part of speech ID.
125    ///
126    /// It wraps `mecab_node_t::posid`.
127    pub fn pos_id(self) -> u16 {
128        unsafe { (*self.as_ptr()).posid }
129    }
130
131    /// Returns the character type ID.
132    ///
133    /// It wraps `mecab_node_t::char_type`.
134    pub fn char_type(self) -> u8 {
135        unsafe { (*self.as_ptr()).char_type }
136    }
137
138    /// Returns `true` if this node is part of the best path.
139    ///
140    /// It wraps `mecab_node_t::isbest`.
141    pub fn is_best(self) -> bool {
142        unsafe { (*self.as_ptr()).isbest == 1 }
143    }
144
145    /// Returns the forward accumulative log summation.
146    ///
147    /// It wraps `mecab_node_t::alpha`.
148    pub fn alpha(self) -> f32 {
149        unsafe { (*self.as_ptr()).alpha }
150    }
151
152    /// Returns the backward accumulative log summation.
153    ///
154    /// It wraps `mecab_node_t::beta`.
155    pub fn beta(self) -> f32 {
156        unsafe { (*self.as_ptr()).beta }
157    }
158
159    /// Returns the marginal probability.
160    ///
161    /// It wraps `mecab_node_t::prob`.
162    pub fn prob(self) -> f32 {
163        unsafe { (*self.as_ptr()).prob }
164    }
165
166    /// Returns the word cost.
167    ///
168    /// It wraps `mecab_node_t::wcost`.
169    pub fn wcost(self) -> i16 {
170        unsafe { (*self.as_ptr()).wcost }
171    }
172
173    /// Returns the best accumulative cost from BoS to this node.
174    ///
175    /// It wraps `mecab_node_t::cost`.
176    pub fn cost(self) -> i64 {
177        unsafe { (*self.as_ptr()).cost }
178    }
179}
180
181/// Status of a MeCab node.
182#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
183pub enum NodeKind {
184    /// Beginning of sentence.
185    Bos,
186    /// End of sentence.
187    Eos,
188    /// Unknown word.
189    Unk,
190    /// End of N-best.
191    EoNbest,
192    /// Normal word.
193    Normal,
194}
195
196impl NodeKind {
197    /// Returns true iff `self` is `Bos`.
198    pub fn is_bos(self) -> bool {
199        self == Self::Bos
200    }
201
202    /// Returns true iff `self` is `Eos`.
203    pub fn is_eos(self) -> bool {
204        self == Self::Eos
205    }
206}
207
208/// A cursor for iterating over nodes in a lattice.
209///
210/// The lifetime parameter is bound to
211/// [`Lattice` and the input sentence](`crate::LatticeGuard::bos_node()`).
212///
213/// The cursor can be used either as an iterator and as a cursor of [`Node`]s.
214///
215/// As an iterator:
216///
217/// ```
218/// # use mecab_sys::LatticeGuard;
219/// # fn analyze(lattice: &LatticeGuard<'_, '_, '_>) {
220/// let cursor = lattice.bos_node();
221/// for node in cursor {
222///     let surface = node.surface();
223///     let feat = node.feature();
224///
225///     println!("{surface}: {feat}");
226/// }
227/// # }
228/// ```
229///
230/// As a cursor:
231///
232/// ```
233/// # use mecab_sys::LatticeGuard;
234/// # fn analyze(lattice: &LatticeGuard<'_, '_, '_>) {
235/// let mut cursor = lattice.bos_node();
236/// while let Some(node) = cursor.curr() {
237///     let surface = node.surface();
238///     let feat = node.feature();
239///
240///     println!("{surface}: {feat}");
241///
242///     cursor.move_next();
243/// }
244/// # }
245/// ```
246///
247/// The cursor can move back and forth:
248///
249/// ```
250/// # use mecab_sys::LatticeGuard;
251/// # fn analyze(lattice: &LatticeGuard<'_, '_, '_>) {
252/// let mut cursor = lattice.bos_node();
253/// assert!(cursor.curr().is_some_and(|node| node.kind().is_bos()));
254///
255/// cursor.move_next();
256/// cursor.move_next();
257/// if let Some(node) = cursor.curr() {
258///     let surface = node.surface();
259///     let feat = node.feature();
260///
261///     println!("{surface}: {feat}");
262/// }
263///
264/// cursor.move_prev();
265/// if let Some(node) = cursor.curr() {
266///     let surface = node.surface();
267///     let feat = node.feature();
268///
269///     println!("{surface}: {feat}");
270/// }
271///
272/// cursor.move_prev();
273/// assert!(cursor.curr().is_some_and(|node| node.kind().is_bos()));
274/// # }
275/// ```
276///
277///
278/// After you call `move_next()` on the EoS node, call `move_prev()` on the BoS node, or consume as
279/// an iterator, then the cursor shifted to the "dead" state, never being back to the alive state.
280///
281/// ```
282/// # use mecab_sys::LatticeGuard;
283/// # fn analyze(lattice: &LatticeGuard<'_, '_, '_>) {
284/// let mut cursor = lattice.bos_node();
285/// while cursor.curr().is_some_and(|node| !node.kind().is_eos()) {
286///     cursor.move_next();
287/// }
288/// assert!(cursor.curr().is_some_and(|node| node.kind().is_eos()));
289///
290/// // Call `move_next()` on the EoS node
291/// cursor.move_next();
292/// assert!(cursor.curr().is_none());
293///
294/// // Never back to the original node
295/// cursor.move_prev();
296/// assert!(cursor.curr().is_none());
297///
298/// let mut cursor = lattice.bos_node();
299/// assert!(cursor.curr().is_some_and(|node| node.kind().is_bos()));
300///
301/// // Call `move_prev()` on the BoS node
302/// cursor.move_prev();
303/// assert!(cursor.curr().is_none());
304///
305/// // Never back to the original node
306/// cursor.move_next();
307/// assert!(cursor.curr().is_none());
308///
309/// let mut cursor = lattice.bos_node();
310/// // Consume the iterator, reaching at the EoS node
311/// for _ in &mut cursor {}
312/// assert!(cursor.curr().is_none());
313///
314/// // Never back to alive nodes
315/// cursor.move_prev();
316/// assert!(cursor.curr().is_none());
317/// # }
318/// ```
319pub struct NodeCursor<'a> {
320    pub(crate) curr: Option<Node<'a>>,
321}
322
323impl<'a> NodeCursor<'a> {
324    /// Moves the cursor to the next node.
325    ///
326    /// It is equivalent to calling [`Node::next()`] on the [current node](`Self::curr()`).
327    pub fn move_next(&mut self) {
328        if let Some(curr) = self.curr {
329            self.curr = curr.next();
330        }
331    }
332
333    /// Moves the cursor to the previous node.
334    ///
335    /// It is equivalent to calling [`Node::prev()`] on the [current node](`Self::curr()`).
336    pub fn move_prev(&mut self) {
337        if let Some(curr) = self.curr {
338            self.curr = curr.prev();
339        }
340    }
341
342    /// Returns the current node pointed to by the cursor.
343    pub fn curr(&self) -> Option<Node<'a>> {
344        self.curr
345    }
346}
347
348impl<'a> Iterator for NodeCursor<'a> {
349    type Item = Node<'a>;
350
351    fn next(&mut self) -> Option<Self::Item> {
352        let res = self.curr?;
353        self.move_next();
354        Some(res)
355    }
356}
357
358/// Left attribute ID.
359#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
360pub struct LcAttr(u16);
361
362/// Right attribute ID.
363#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
364pub struct RcAttr(u16);
365
366impl LcAttr {
367    /// Creates a new [`LcAttr`] from a raw ID.
368    pub fn from_raw(attr: u16) -> Self {
369        Self(attr)
370    }
371    /// Returns the raw ID.
372    pub fn to_raw(self) -> u16 {
373        self.0
374    }
375}
376impl RcAttr {
377    /// Creates a new [`RcAttr`] from a raw ID.
378    pub fn from_raw(attr: u16) -> Self {
379        Self(attr)
380    }
381    /// Returns the raw ID.
382    pub fn to_raw(self) -> u16 {
383        self.0
384    }
385}