Skip to main content

lindera_ruby/
dictionary.rs

1//! Dictionary management for morphological analysis.
2//!
3//! This module provides functionality for building, loading, and managing dictionaries
4//! used in morphological analysis.
5
6use std::path::Path;
7
8use magnus::prelude::*;
9use magnus::{Error, Ruby, function, method};
10
11use lindera::dictionary::{
12    Dictionary, DictionaryBuilder, Metadata, UserDictionary,
13    load_dictionary as lindera_load_dictionary,
14    load_user_dictionary as lindera_load_user_dictionary,
15};
16
17use crate::error::to_magnus_error;
18use crate::metadata::RbMetadata;
19
20/// A morphological analysis dictionary.
21///
22/// Contains the data structures needed for tokenization and morphological analysis.
23#[magnus::wrap(class = "Lindera::Dictionary", free_immediately, size)]
24#[derive(Clone)]
25pub struct RbDictionary {
26    /// Inner Lindera dictionary.
27    pub inner: Dictionary,
28}
29
30impl RbDictionary {
31    /// Returns the name of the dictionary metadata.
32    ///
33    /// # Returns
34    ///
35    /// The dictionary metadata name.
36    fn metadata_name(&self) -> String {
37        self.inner.metadata.name.clone()
38    }
39
40    /// Returns the character encoding of the dictionary.
41    ///
42    /// # Returns
43    ///
44    /// The dictionary encoding string.
45    fn metadata_encoding(&self) -> String {
46        self.inner.metadata.encoding.clone()
47    }
48
49    /// Returns the full metadata object.
50    ///
51    /// # Returns
52    ///
53    /// The dictionary metadata.
54    fn metadata(&self) -> RbMetadata {
55        RbMetadata::from(self.inner.metadata.clone())
56    }
57
58    /// Returns the string representation.
59    fn to_s(&self) -> String {
60        "Dictionary".to_string()
61    }
62
63    /// Returns the inspect representation.
64    fn inspect(&self) -> String {
65        format!(
66            "#<Lindera::Dictionary: name='{}'>",
67            self.inner.metadata.name
68        )
69    }
70}
71
72/// A user-defined dictionary for custom words.
73///
74/// User dictionaries allow you to add custom words and their morphological features
75/// that are not present in the main dictionary.
76#[magnus::wrap(class = "Lindera::UserDictionary", free_immediately, size)]
77#[derive(Clone)]
78pub struct RbUserDictionary {
79    /// Inner Lindera user dictionary.
80    pub inner: UserDictionary,
81}
82
83impl RbUserDictionary {
84    /// Returns the string representation.
85    fn to_s(&self) -> String {
86        "UserDictionary".to_string()
87    }
88
89    /// Returns the inspect representation.
90    fn inspect(&self) -> String {
91        "UserDictionary()".to_string()
92    }
93}
94
95/// Loads a dictionary from the specified URI.
96///
97/// # Arguments
98///
99/// * `uri` - URI to the dictionary. Can be a file path or embedded dictionary name.
100///
101/// # Returns
102///
103/// A loaded `RbDictionary` object.
104fn load_dictionary(uri: String) -> Result<RbDictionary, Error> {
105    let ruby = Ruby::get().expect("Ruby runtime not initialized");
106    lindera_load_dictionary(&uri)
107        .map_err(|e| {
108            to_magnus_error(
109                &ruby,
110                format!("Failed to load dictionary from '{uri}': {e}"),
111            )
112        })
113        .map(|d| RbDictionary { inner: d })
114}
115
116/// Loads a user dictionary from the specified URI.
117///
118/// # Arguments
119///
120/// * `uri` - URI to the user dictionary directory.
121/// * `metadata` - Metadata configuration for the user dictionary.
122///
123/// # Returns
124///
125/// A loaded `RbUserDictionary` object.
126fn load_user_dictionary(uri: String, metadata: &RbMetadata) -> Result<RbUserDictionary, Error> {
127    let ruby = Ruby::get().expect("Ruby runtime not initialized");
128    let meta: Metadata = metadata.clone().into();
129    lindera_load_user_dictionary(&uri, &meta)
130        .map_err(|e| {
131            to_magnus_error(
132                &ruby,
133                format!("Failed to load user dictionary from '{uri}': {e}"),
134            )
135        })
136        .map(|d| RbUserDictionary { inner: d })
137}
138
139/// Builds a dictionary from source files.
140///
141/// # Arguments
142///
143/// * `input_dir` - Directory containing dictionary source files.
144/// * `output_dir` - Directory where the built dictionary will be saved.
145/// * `metadata` - Metadata configuration for the dictionary.
146fn build_dictionary(
147    input_dir: String,
148    output_dir: String,
149    metadata: &RbMetadata,
150) -> Result<(), Error> {
151    let ruby = Ruby::get().expect("Ruby runtime not initialized");
152    let input_path = Path::new(&input_dir);
153    let output_path = Path::new(&output_dir);
154
155    if !input_path.exists() {
156        return Err(Error::new(
157            ruby.exception_arg_error(),
158            format!("Input directory does not exist: {input_dir}"),
159        ));
160    }
161
162    let builder = DictionaryBuilder::new(metadata.clone().into());
163    builder
164        .build_dictionary(input_path, output_path)
165        .map_err(|e| to_magnus_error(&ruby, format!("Failed to build dictionary: {e}")))?;
166
167    Ok(())
168}
169
170/// Builds a user dictionary from a CSV file.
171///
172/// # Arguments
173///
174/// * `_kind` - Dictionary kind (reserved for future use).
175/// * `input_file` - Path to the CSV file.
176/// * `output_dir` - Directory where the built user dictionary will be saved.
177/// * `metadata` - Optional metadata configuration.
178fn build_user_dictionary(
179    _kind: String,
180    input_file: String,
181    output_dir: String,
182    metadata: Option<&RbMetadata>,
183) -> Result<(), Error> {
184    let ruby = Ruby::get().expect("Ruby runtime not initialized");
185    let input_path = Path::new(&input_file);
186    let output_path = Path::new(&output_dir);
187
188    if !input_path.exists() {
189        return Err(Error::new(
190            ruby.exception_arg_error(),
191            format!("Input file does not exist: {input_file}"),
192        ));
193    }
194
195    let meta = match metadata {
196        Some(m) => m.clone().into(),
197        None => Metadata::default(),
198    };
199
200    let builder = DictionaryBuilder::new(meta);
201    builder
202        .build_user_dictionary(input_path, output_path)
203        .map_err(|e| to_magnus_error(&ruby, format!("Failed to build user dictionary: {e}")))?;
204
205    Ok(())
206}
207
208/// Defines Dictionary, UserDictionary classes and module functions in the given Ruby module.
209///
210/// # Arguments
211///
212/// * `ruby` - Ruby runtime handle.
213/// * `module` - Parent Ruby module.
214///
215/// # Returns
216///
217/// `Ok(())` on success, or a Magnus `Error` on failure.
218pub fn define(ruby: &Ruby, module: &magnus::RModule) -> Result<(), Error> {
219    let dict_class = module.define_class("Dictionary", ruby.class_object())?;
220    dict_class.define_method("metadata_name", method!(RbDictionary::metadata_name, 0))?;
221    dict_class.define_method(
222        "metadata_encoding",
223        method!(RbDictionary::metadata_encoding, 0),
224    )?;
225    dict_class.define_method("metadata", method!(RbDictionary::metadata, 0))?;
226    dict_class.define_method("to_s", method!(RbDictionary::to_s, 0))?;
227    dict_class.define_method("inspect", method!(RbDictionary::inspect, 0))?;
228
229    let user_dict_class = module.define_class("UserDictionary", ruby.class_object())?;
230    user_dict_class.define_method("to_s", method!(RbUserDictionary::to_s, 0))?;
231    user_dict_class.define_method("inspect", method!(RbUserDictionary::inspect, 0))?;
232
233    module.define_module_function("load_dictionary", function!(load_dictionary, 1))?;
234    module.define_module_function("load_user_dictionary", function!(load_user_dictionary, 2))?;
235    module.define_module_function("build_dictionary", function!(build_dictionary, 3))?;
236    module.define_module_function("build_user_dictionary", function!(build_user_dictionary, 4))?;
237
238    Ok(())
239}