mecab_sys/node.rs
1use super::ffi;
2
3use std::ffi::CStr;
4use std::marker::PhantomData;
5use std::ptr::NonNull;
6
7/// A single node (morpheme) in the MeCab lattice. It wraps C `mecab_node_t`.
8///
9/// The lifetime parameter is bound to
10/// [`Lattice` and the input sentence](`crate::LatticeGuard::bos_node()`).
11#[derive(Clone, Copy)]
12pub struct Node<'a> {
13 inner: NonNull<ffi::mecab_node_t>,
14 _marker: PhantomData<&'a ffi::mecab_node_t>,
15}
16
17impl<'a> Node<'a> {
18 /// Returns the raw pointer to the underlying [`mecab_node_t`](ffi::mecab_node_t).
19 pub fn as_ptr(&self) -> *mut ffi::mecab_node_t {
20 self.inner.as_ptr()
21 }
22
23 pub(crate) fn from_ptr(inner: *mut ffi::mecab_node_t) -> Option<Self> {
24 NonNull::new(inner).map(|inner| Node {
25 inner,
26 _marker: PhantomData,
27 })
28 }
29
30 /// Returns the next node in the same lattice.
31 ///
32 /// It wraps `mecab_node_t::next`.
33 ///
34 /// `None` is likely to imply that `self` is [EoS](`NodeKind`).
35 pub fn next(self) -> Option<Self> {
36 unsafe {
37 let node = (*self.as_ptr()).next;
38 Node::from_ptr(node)
39 }
40 }
41
42 /// Returns the previous node in the same lattice.
43 ///
44 /// It wraps `mecab_node_t::prev`.
45 ///
46 /// `None` is likely to imply that `self` is [BoS](`NodeKind`).
47 pub fn prev(self) -> Option<Self> {
48 unsafe {
49 let node = (*self.as_ptr()).prev;
50 Node::from_ptr(node)
51 }
52 }
53
54 /// Returns the unique node ID.
55 ///
56 /// It wraps `mecab_node_t::id`.
57 pub fn id(self) -> u32 {
58 unsafe { (*self.as_ptr()).id }
59 }
60
61 /// Returns the feature string of this node.
62 ///
63 /// It wraps `mecab_node_t::feature`.
64 pub fn feature(self) -> &'a str {
65 unsafe {
66 let ptr = (*self.as_ptr()).feature;
67 let s = CStr::from_ptr(ptr);
68 std::str::from_utf8_unchecked(s.to_bytes())
69 }
70 }
71
72 /// Returns the surface string of this node.
73 ///
74 /// It wraps `mecab_node_t::surface` and `mecab_node_t::length`.
75 pub fn surface(self) -> &'a str {
76 unsafe {
77 let ptr = (*self.as_ptr()).surface;
78 let len = (*self.as_ptr()).length;
79
80 std::str::from_utf8_unchecked(std::slice::from_raw_parts(
81 ptr as *const u8,
82 len as usize,
83 ))
84 }
85 }
86
87 /// Returns the status of this node.
88 ///
89 /// It wraps `mecab_node_t::stat`.
90 pub fn kind(self) -> NodeKind {
91 use ffi::{MECAB_BOS_NODE, MECAB_EON_NODE, MECAB_EOS_NODE, MECAB_NOR_NODE, MECAB_UNK_NODE};
92
93 unsafe {
94 let kind = (*self.as_ptr()).stat as u32;
95 if kind == MECAB_BOS_NODE {
96 NodeKind::Bos
97 } else if kind == MECAB_EOS_NODE {
98 NodeKind::Eos
99 } else if kind == MECAB_UNK_NODE {
100 NodeKind::Unk
101 } else if kind == MECAB_EON_NODE {
102 NodeKind::EoNbest
103 } else {
104 debug_assert_eq!(kind, MECAB_NOR_NODE);
105 NodeKind::Normal
106 }
107 }
108 }
109
110 /// Returns the left attribute ID.
111 ///
112 /// It wraps `mecab_node_t::lcAttr`.
113 pub fn lc_attr(self) -> LcAttr {
114 unsafe { LcAttr((*self.as_ptr()).lcAttr) }
115 }
116
117 /// Returns the right attribute ID.
118 ///
119 /// It wraps `mecab_node_t::rcAttr`.
120 pub fn rc_attr(self) -> RcAttr {
121 unsafe { RcAttr((*self.as_ptr()).rcAttr) }
122 }
123
124 /// Returns the unique part of speech ID.
125 ///
126 /// It wraps `mecab_node_t::posid`.
127 pub fn pos_id(self) -> u16 {
128 unsafe { (*self.as_ptr()).posid }
129 }
130
131 /// Returns the character type ID.
132 ///
133 /// It wraps `mecab_node_t::char_type`.
134 pub fn char_type(self) -> u8 {
135 unsafe { (*self.as_ptr()).char_type }
136 }
137
138 /// Returns `true` if this node is part of the best path.
139 ///
140 /// It wraps `mecab_node_t::isbest`.
141 pub fn is_best(self) -> bool {
142 unsafe { (*self.as_ptr()).isbest == 1 }
143 }
144
145 /// Returns the forward accumulative log summation.
146 ///
147 /// It wraps `mecab_node_t::alpha`.
148 pub fn alpha(self) -> f32 {
149 unsafe { (*self.as_ptr()).alpha }
150 }
151
152 /// Returns the backward accumulative log summation.
153 ///
154 /// It wraps `mecab_node_t::beta`.
155 pub fn beta(self) -> f32 {
156 unsafe { (*self.as_ptr()).beta }
157 }
158
159 /// Returns the marginal probability.
160 ///
161 /// It wraps `mecab_node_t::prob`.
162 pub fn prob(self) -> f32 {
163 unsafe { (*self.as_ptr()).prob }
164 }
165
166 /// Returns the word cost.
167 ///
168 /// It wraps `mecab_node_t::wcost`.
169 pub fn wcost(self) -> i16 {
170 unsafe { (*self.as_ptr()).wcost }
171 }
172
173 /// Returns the best accumulative cost from BoS to this node.
174 ///
175 /// It wraps `mecab_node_t::cost`.
176 pub fn cost(self) -> i64 {
177 unsafe { (*self.as_ptr()).cost }
178 }
179}
180
181/// Status of a MeCab node.
182#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
183pub enum NodeKind {
184 /// Beginning of sentence.
185 Bos,
186 /// End of sentence.
187 Eos,
188 /// Unknown word.
189 Unk,
190 /// End of N-best.
191 EoNbest,
192 /// Normal word.
193 Normal,
194}
195
196impl NodeKind {
197 /// Returns true iff `self` is `Bos`.
198 pub fn is_bos(self) -> bool {
199 self == Self::Bos
200 }
201
202 /// Returns true iff `self` is `Eos`.
203 pub fn is_eos(self) -> bool {
204 self == Self::Eos
205 }
206}
207
208/// A cursor for iterating over nodes in a lattice.
209///
210/// The lifetime parameter is bound to
211/// [`Lattice` and the input sentence](`crate::LatticeGuard::bos_node()`).
212///
213/// The cursor can be used either as an iterator and as a cursor of [`Node`]s.
214///
215/// As an iterator:
216///
217/// ```
218/// # use mecab_sys::LatticeGuard;
219/// # fn analyze(lattice: &LatticeGuard<'_, '_, '_>) {
220/// let cursor = lattice.bos_node();
221/// for node in cursor {
222/// let surface = node.surface();
223/// let feat = node.feature();
224///
225/// println!("{surface}: {feat}");
226/// }
227/// # }
228/// ```
229///
230/// As a cursor:
231///
232/// ```
233/// # use mecab_sys::LatticeGuard;
234/// # fn analyze(lattice: &LatticeGuard<'_, '_, '_>) {
235/// let mut cursor = lattice.bos_node();
236/// while let Some(node) = cursor.curr() {
237/// let surface = node.surface();
238/// let feat = node.feature();
239///
240/// println!("{surface}: {feat}");
241///
242/// cursor.move_next();
243/// }
244/// # }
245/// ```
246///
247/// The cursor can move back and forth:
248///
249/// ```
250/// # use mecab_sys::LatticeGuard;
251/// # fn analyze(lattice: &LatticeGuard<'_, '_, '_>) {
252/// let mut cursor = lattice.bos_node();
253/// assert!(cursor.curr().is_some_and(|node| node.kind().is_bos()));
254///
255/// cursor.move_next();
256/// cursor.move_next();
257/// if let Some(node) = cursor.curr() {
258/// let surface = node.surface();
259/// let feat = node.feature();
260///
261/// println!("{surface}: {feat}");
262/// }
263///
264/// cursor.move_prev();
265/// if let Some(node) = cursor.curr() {
266/// let surface = node.surface();
267/// let feat = node.feature();
268///
269/// println!("{surface}: {feat}");
270/// }
271///
272/// cursor.move_prev();
273/// assert!(cursor.curr().is_some_and(|node| node.kind().is_bos()));
274/// # }
275/// ```
276///
277///
278/// After you call `move_next()` on the EoS node, call `move_prev()` on the BoS node, or consume as
279/// an iterator, then the cursor shifted to the "dead" state, never being back to the alive state.
280///
281/// ```
282/// # use mecab_sys::LatticeGuard;
283/// # fn analyze(lattice: &LatticeGuard<'_, '_, '_>) {
284/// let mut cursor = lattice.bos_node();
285/// while cursor.curr().is_some_and(|node| !node.kind().is_eos()) {
286/// cursor.move_next();
287/// }
288/// assert!(cursor.curr().is_some_and(|node| node.kind().is_eos()));
289///
290/// // Call `move_next()` on the EoS node
291/// cursor.move_next();
292/// assert!(cursor.curr().is_none());
293///
294/// // Never back to the original node
295/// cursor.move_prev();
296/// assert!(cursor.curr().is_none());
297///
298/// let mut cursor = lattice.bos_node();
299/// assert!(cursor.curr().is_some_and(|node| node.kind().is_bos()));
300///
301/// // Call `move_prev()` on the BoS node
302/// cursor.move_prev();
303/// assert!(cursor.curr().is_none());
304///
305/// // Never back to the original node
306/// cursor.move_next();
307/// assert!(cursor.curr().is_none());
308///
309/// let mut cursor = lattice.bos_node();
310/// // Consume the iterator, reaching at the EoS node
311/// for _ in &mut cursor {}
312/// assert!(cursor.curr().is_none());
313///
314/// // Never back to alive nodes
315/// cursor.move_prev();
316/// assert!(cursor.curr().is_none());
317/// # }
318/// ```
319pub struct NodeCursor<'a> {
320 pub(crate) curr: Option<Node<'a>>,
321}
322
323impl<'a> NodeCursor<'a> {
324 /// Moves the cursor to the next node.
325 ///
326 /// It is equivalent to calling [`Node::next()`] on the [current node](`Self::curr()`).
327 pub fn move_next(&mut self) {
328 if let Some(curr) = self.curr {
329 self.curr = curr.next();
330 }
331 }
332
333 /// Moves the cursor to the previous node.
334 ///
335 /// It is equivalent to calling [`Node::prev()`] on the [current node](`Self::curr()`).
336 pub fn move_prev(&mut self) {
337 if let Some(curr) = self.curr {
338 self.curr = curr.prev();
339 }
340 }
341
342 /// Returns the current node pointed to by the cursor.
343 pub fn curr(&self) -> Option<Node<'a>> {
344 self.curr
345 }
346}
347
348impl<'a> Iterator for NodeCursor<'a> {
349 type Item = Node<'a>;
350
351 fn next(&mut self) -> Option<Self::Item> {
352 let res = self.curr?;
353 self.move_next();
354 Some(res)
355 }
356}
357
358/// Left attribute ID.
359#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
360pub struct LcAttr(u16);
361
362/// Right attribute ID.
363#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
364pub struct RcAttr(u16);
365
366impl LcAttr {
367 /// Creates a new [`LcAttr`] from a raw ID.
368 pub fn from_raw(attr: u16) -> Self {
369 Self(attr)
370 }
371 /// Returns the raw ID.
372 pub fn to_raw(self) -> u16 {
373 self.0
374 }
375}
376impl RcAttr {
377 /// Creates a new [`RcAttr`] from a raw ID.
378 pub fn from_raw(attr: u16) -> Self {
379 Self(attr)
380 }
381 /// Returns the raw ID.
382 pub fn to_raw(self) -> u16 {
383 self.0
384 }
385}