Skip to main content

lindera_ruby/
lib.rs

1//! # Lindera Ruby Bindings
2//!
3//! Ruby bindings for [Lindera](https://github.com/lindera/lindera), a morphological analysis library for CJK text.
4//!
5//! Lindera provides high-performance tokenization and morphological analysis for:
6//! - Japanese (IPADIC, IPADIC NEologd, UniDic)
7//! - Korean (ko-dic)
8//! - Chinese (CC-CEDICT, Jieba)
9//!
10//! ## Features
11//!
12//! - **Dictionary management**: Build, load, and use custom dictionaries
13//! - **Tokenization**: Multiple tokenization modes (normal, decompose)
14//! - **Filters**: Character and token filtering pipeline
15//! - **Training**: Train custom morphological models (with `train` feature)
16//! - **User dictionaries**: Support for custom user dictionaries
17//!
18//! ## Examples
19//!
20//! ```ruby
21//! require "lindera"
22//!
23//! # Create a tokenizer
24//! builder = Lindera::TokenizerBuilder.new
25//! tokenizer = builder.build
26//!
27//! # Tokenize text
28//! tokens = tokenizer.tokenize("関西国際空港")
29//! tokens.each { |token| puts "#{token.surface}: #{token.details}" }
30//! ```
31
32pub mod character_filter;
33pub mod dictionary;
34pub mod error;
35pub mod metadata;
36pub mod mode;
37pub mod schema;
38pub mod segmenter;
39pub mod token;
40pub mod token_filter;
41pub mod tokenizer;
42pub mod util;
43
44#[cfg(feature = "train")]
45pub mod trainer;
46
47use magnus::{Error, Ruby, function};
48
49/// Returns the version of the lindera-ruby package.
50///
51/// # Returns
52///
53/// A string with the package version.
54fn version() -> String {
55    env!("CARGO_PKG_VERSION").to_string()
56}
57
58/// Ruby extension initialization entry point.
59///
60/// Defines the `Lindera` module and all its classes and functions.
61#[magnus::init]
62fn init(ruby: &Ruby) -> Result<(), Error> {
63    let module = ruby.define_module("Lindera")?;
64
65    // Version
66    module.define_module_function("version", function!(version, 0))?;
67
68    // Schema / FieldDefinition / FieldType
69    schema::define(ruby, &module)?;
70
71    // Metadata
72    metadata::define(ruby, &module)?;
73
74    // Mode / Penalty
75    mode::define(ruby, &module)?;
76
77    // Token
78    token::define(ruby, &module)?;
79
80    // Dictionary / UserDictionary + load/build functions
81    dictionary::define(ruby, &module)?;
82
83    // TokenizerBuilder / Tokenizer
84    tokenizer::define(ruby, &module)?;
85
86    // Trainer (feature = "train")
87    #[cfg(feature = "train")]
88    trainer::define(ruby, &module)?;
89
90    Ok(())
91}