Skip to main content

lindera_nodejs/
token.rs

1//! Token representation for morphological analysis results.
2//!
3//! This module provides the Token class that wraps morphological analysis results
4//! and exposes token properties to JavaScript.
5
6use lindera::token::Token;
7
8/// A morphological token.
9///
10/// Represents a single token from morphological analysis with its surface form,
11/// position information, and morphological details.
12#[napi(js_name = "Token")]
13#[derive(Clone)]
14pub struct JsToken {
15    /// Surface form of the token.
16    surface: String,
17    /// Start byte position in the original text.
18    byte_start: u32,
19    /// End byte position in the original text.
20    byte_end: u32,
21    /// Position index of the token.
22    position: u32,
23    /// Word ID in the dictionary.
24    word_id: u32,
25    /// Whether this token is an unknown word.
26    is_unknown: bool,
27    /// Morphological details of the token.
28    details: Option<Vec<String>>,
29}
30
31#[napi]
32impl JsToken {
33    /// Surface form of the token.
34    #[napi(getter)]
35    pub fn surface(&self) -> String {
36        self.surface.clone()
37    }
38
39    /// Start byte position in the original text.
40    #[napi(getter)]
41    pub fn byte_start(&self) -> u32 {
42        self.byte_start
43    }
44
45    /// End byte position in the original text.
46    #[napi(getter)]
47    pub fn byte_end(&self) -> u32 {
48        self.byte_end
49    }
50
51    /// Position index of the token.
52    #[napi(getter)]
53    pub fn position(&self) -> u32 {
54        self.position
55    }
56
57    /// Word ID in the dictionary.
58    #[napi(getter)]
59    pub fn word_id(&self) -> u32 {
60        self.word_id
61    }
62
63    /// Whether this token is an unknown word (not found in the dictionary).
64    #[napi(getter)]
65    pub fn is_unknown(&self) -> bool {
66        self.is_unknown
67    }
68
69    /// Morphological details of the token (part of speech, reading, etc.).
70    #[napi(getter)]
71    pub fn details(&self) -> Option<Vec<String>> {
72        self.details.clone()
73    }
74
75    /// Returns the detail string at the specified index.
76    ///
77    /// # Arguments
78    ///
79    /// * `index` - Zero-based index into the details array.
80    ///
81    /// # Returns
82    ///
83    /// The detail string if found, or `null` if the index is out of range.
84    #[napi]
85    pub fn get_detail(&self, index: u32) -> Option<String> {
86        self.details
87            .as_ref()
88            .and_then(|d| d.get(index as usize).cloned())
89    }
90}
91
92impl JsToken {
93    /// Creates a JsToken from a lindera Token.
94    ///
95    /// # Arguments
96    ///
97    /// * `token` - The lindera Token to convert.
98    ///
99    /// # Returns
100    ///
101    /// A new JsToken instance.
102    pub fn from_token(mut token: Token) -> Self {
103        let details = token.details().iter().map(|s| s.to_string()).collect();
104
105        Self {
106            surface: token.surface.to_string(),
107            byte_start: token.byte_start as u32,
108            byte_end: token.byte_end as u32,
109            position: token.position as u32,
110            word_id: token.word_id.id,
111            is_unknown: token.word_id.is_unknown(),
112            details: Some(details),
113        }
114    }
115}
116
117/// N-best tokenization result.
118///
119/// Contains a list of tokens and their total path cost.
120#[napi(js_name = "NbestResult")]
121pub struct JsNbestResult {
122    /// Tokens in this result.
123    tokens: Vec<JsToken>,
124    /// Total path cost of this tokenization.
125    cost: i64,
126}
127
128#[napi]
129impl JsNbestResult {
130    /// Tokens in this result.
131    #[napi(getter)]
132    pub fn tokens(&self) -> Vec<JsToken> {
133        self.tokens.clone()
134    }
135
136    /// Total path cost of this tokenization.
137    #[napi(getter)]
138    pub fn cost(&self) -> i64 {
139        self.cost
140    }
141}
142
143impl JsNbestResult {
144    /// Creates a new JsNbestResult.
145    ///
146    /// # Arguments
147    ///
148    /// * `tokens` - The tokens in this result.
149    /// * `cost` - The total path cost.
150    ///
151    /// # Returns
152    ///
153    /// A new JsNbestResult instance.
154    pub fn new(tokens: Vec<JsToken>, cost: i64) -> Self {
155        Self { tokens, cost }
156    }
157}