lindera_nodejs/token.rs
1//! Token representation for morphological analysis results.
2//!
3//! This module provides the Token class that wraps morphological analysis results
4//! and exposes token properties to JavaScript.
5
6use lindera::token::Token;
7
8/// A morphological token.
9///
10/// Represents a single token from morphological analysis with its surface form,
11/// position information, and morphological details.
12#[napi(js_name = "Token")]
13#[derive(Clone)]
14pub struct JsToken {
15 /// Surface form of the token.
16 surface: String,
17 /// Start byte position in the original text.
18 byte_start: u32,
19 /// End byte position in the original text.
20 byte_end: u32,
21 /// Position index of the token.
22 position: u32,
23 /// Word ID in the dictionary.
24 word_id: u32,
25 /// Whether this token is an unknown word.
26 is_unknown: bool,
27 /// Morphological details of the token.
28 details: Option<Vec<String>>,
29}
30
31#[napi]
32impl JsToken {
33 /// Surface form of the token.
34 #[napi(getter)]
35 pub fn surface(&self) -> String {
36 self.surface.clone()
37 }
38
39 /// Start byte position in the original text.
40 #[napi(getter)]
41 pub fn byte_start(&self) -> u32 {
42 self.byte_start
43 }
44
45 /// End byte position in the original text.
46 #[napi(getter)]
47 pub fn byte_end(&self) -> u32 {
48 self.byte_end
49 }
50
51 /// Position index of the token.
52 #[napi(getter)]
53 pub fn position(&self) -> u32 {
54 self.position
55 }
56
57 /// Word ID in the dictionary.
58 #[napi(getter)]
59 pub fn word_id(&self) -> u32 {
60 self.word_id
61 }
62
63 /// Whether this token is an unknown word (not found in the dictionary).
64 #[napi(getter)]
65 pub fn is_unknown(&self) -> bool {
66 self.is_unknown
67 }
68
69 /// Morphological details of the token (part of speech, reading, etc.).
70 #[napi(getter)]
71 pub fn details(&self) -> Option<Vec<String>> {
72 self.details.clone()
73 }
74
75 /// Returns the detail string at the specified index.
76 ///
77 /// # Arguments
78 ///
79 /// * `index` - Zero-based index into the details array.
80 ///
81 /// # Returns
82 ///
83 /// The detail string if found, or `null` if the index is out of range.
84 #[napi]
85 pub fn get_detail(&self, index: u32) -> Option<String> {
86 self.details
87 .as_ref()
88 .and_then(|d| d.get(index as usize).cloned())
89 }
90}
91
92impl JsToken {
93 /// Creates a JsToken from a lindera Token.
94 ///
95 /// # Arguments
96 ///
97 /// * `token` - The lindera Token to convert.
98 ///
99 /// # Returns
100 ///
101 /// A new JsToken instance.
102 pub fn from_token(mut token: Token) -> Self {
103 let details = token.details().iter().map(|s| s.to_string()).collect();
104
105 Self {
106 surface: token.surface.to_string(),
107 byte_start: token.byte_start as u32,
108 byte_end: token.byte_end as u32,
109 position: token.position as u32,
110 word_id: token.word_id.id,
111 is_unknown: token.word_id.is_unknown(),
112 details: Some(details),
113 }
114 }
115}
116
117/// N-best tokenization result.
118///
119/// Contains a list of tokens and their total path cost.
120#[napi(js_name = "NbestResult")]
121pub struct JsNbestResult {
122 /// Tokens in this result.
123 tokens: Vec<JsToken>,
124 /// Total path cost of this tokenization.
125 cost: i64,
126}
127
128#[napi]
129impl JsNbestResult {
130 /// Tokens in this result.
131 #[napi(getter)]
132 pub fn tokens(&self) -> Vec<JsToken> {
133 self.tokens.clone()
134 }
135
136 /// Total path cost of this tokenization.
137 #[napi(getter)]
138 pub fn cost(&self) -> i64 {
139 self.cost
140 }
141}
142
143impl JsNbestResult {
144 /// Creates a new JsNbestResult.
145 ///
146 /// # Arguments
147 ///
148 /// * `tokens` - The tokens in this result.
149 /// * `cost` - The total path cost.
150 ///
151 /// # Returns
152 ///
153 /// A new JsNbestResult instance.
154 pub fn new(tokens: Vec<JsToken>, cost: i64) -> Self {
155 Self { tokens, cost }
156 }
157}