Skip to main content

lindera_ruby/
token.rs

1//! Token representation for morphological analysis results.
2//!
3//! This module wraps Lindera tokens for use in Ruby.
4
5use magnus::prelude::*;
6use magnus::{Error, Ruby, method};
7
8use lindera::token::Token;
9
10/// Token object wrapping the Rust Token data.
11///
12/// This class provides access to token fields and details.
13#[magnus::wrap(class = "Lindera::Token", free_immediately, size)]
14pub struct RbToken {
15    /// Surface form of the token.
16    surface: String,
17    /// Start byte position in the original text.
18    byte_start: usize,
19    /// End byte position in the original text.
20    byte_end: usize,
21    /// Position index of the token.
22    position: usize,
23    /// Word ID in the dictionary.
24    word_id: u32,
25    /// Whether this token is an unknown word.
26    is_unknown: bool,
27    /// Morphological details of the token.
28    details: Option<Vec<String>>,
29}
30
31impl RbToken {
32    /// Creates a new `RbToken` from a Lindera `Token`.
33    ///
34    /// # Arguments
35    ///
36    /// * `token` - Lindera token to convert.
37    ///
38    /// # Returns
39    ///
40    /// A new `RbToken` instance.
41    pub fn from_token(mut token: Token) -> Self {
42        let details = token.details().iter().map(|s| s.to_string()).collect();
43
44        Self {
45            surface: token.surface.to_string(),
46            byte_start: token.byte_start,
47            byte_end: token.byte_end,
48            position: token.position,
49            word_id: token.word_id.id,
50            is_unknown: token.word_id.is_unknown(),
51            details: Some(details),
52        }
53    }
54
55    /// Returns the surface form of the token.
56    fn surface(&self) -> String {
57        self.surface.clone()
58    }
59
60    /// Returns the start byte position.
61    fn byte_start(&self) -> usize {
62        self.byte_start
63    }
64
65    /// Returns the end byte position.
66    fn byte_end(&self) -> usize {
67        self.byte_end
68    }
69
70    /// Returns the position index.
71    fn position(&self) -> usize {
72        self.position
73    }
74
75    /// Returns the word ID.
76    fn word_id(&self) -> u32 {
77        self.word_id
78    }
79
80    /// Returns whether this token is an unknown word.
81    fn is_unknown(&self) -> bool {
82        self.is_unknown
83    }
84
85    /// Returns the morphological details of the token.
86    fn details(&self) -> Option<Vec<String>> {
87        self.details.clone()
88    }
89
90    /// Returns the detail at the specified index.
91    ///
92    /// # Arguments
93    ///
94    /// * `index` - Index of the detail to retrieve.
95    ///
96    /// # Returns
97    ///
98    /// The detail string if found, otherwise nil.
99    fn get_detail(&self, index: usize) -> Option<String> {
100        self.details.as_ref().and_then(|d| d.get(index).cloned())
101    }
102
103    /// Returns the string representation of the token.
104    fn to_s(&self) -> String {
105        self.surface.clone()
106    }
107
108    /// Returns the inspect representation of the token.
109    fn inspect(&self) -> String {
110        format!(
111            "#<Lindera::Token surface='{}', start={}, end={}, position={}, word_id={}, unknown={}>",
112            self.surface,
113            self.byte_start,
114            self.byte_end,
115            self.position,
116            self.word_id,
117            self.is_unknown
118        )
119    }
120}
121
122/// Defines the Token class in the given Ruby module.
123///
124/// # Arguments
125///
126/// * `ruby` - Ruby runtime handle.
127/// * `module` - Parent Ruby module.
128///
129/// # Returns
130///
131/// `Ok(())` on success, or a Magnus `Error` on failure.
132pub fn define(ruby: &Ruby, module: &magnus::RModule) -> Result<(), Error> {
133    let token_class = module.define_class("Token", ruby.class_object())?;
134    token_class.define_method("surface", method!(RbToken::surface, 0))?;
135    token_class.define_method("byte_start", method!(RbToken::byte_start, 0))?;
136    token_class.define_method("byte_end", method!(RbToken::byte_end, 0))?;
137    token_class.define_method("position", method!(RbToken::position, 0))?;
138    token_class.define_method("word_id", method!(RbToken::word_id, 0))?;
139    token_class.define_method("unknown?", method!(RbToken::is_unknown, 0))?;
140    token_class.define_method("details", method!(RbToken::details, 0))?;
141    token_class.define_method("get_detail", method!(RbToken::get_detail, 1))?;
142    token_class.define_method("to_s", method!(RbToken::to_s, 0))?;
143    token_class.define_method("inspect", method!(RbToken::inspect, 0))?;
144
145    Ok(())
146}