lindera_ruby/lib.rs
1//! # Lindera Ruby Bindings
2//!
3//! Ruby bindings for [Lindera](https://github.com/lindera/lindera), a morphological analysis library for CJK text.
4//!
5//! Lindera provides high-performance tokenization and morphological analysis for:
6//! - Japanese (IPADIC, IPADIC NEologd, UniDic)
7//! - Korean (ko-dic)
8//! - Chinese (CC-CEDICT, Jieba)
9//!
10//! ## Features
11//!
12//! - **Dictionary management**: Build, load, and use custom dictionaries
13//! - **Tokenization**: Multiple tokenization modes (normal, decompose)
14//! - **Filters**: Character and token filtering pipeline
15//! - **Training**: Train custom morphological models (with `train` feature)
16//! - **User dictionaries**: Support for custom user dictionaries
17//!
18//! ## Examples
19//!
20//! ```ruby
21//! require "lindera"
22//!
23//! # Create a tokenizer
24//! builder = Lindera::TokenizerBuilder.new
25//! tokenizer = builder.build
26//!
27//! # Tokenize text
28//! tokens = tokenizer.tokenize("関西国際空港")
29//! tokens.each { |token| puts "#{token.surface}: #{token.details}" }
30//! ```
31
32pub mod character_filter;
33pub mod dictionary;
34pub mod error;
35pub mod metadata;
36pub mod mode;
37pub mod schema;
38pub mod segmenter;
39pub mod token;
40pub mod token_filter;
41pub mod tokenizer;
42pub mod util;
43
44#[cfg(feature = "train")]
45pub mod trainer;
46
47use magnus::{Error, Ruby, function};
48
49/// Returns the version of the lindera-ruby package.
50///
51/// # Returns
52///
53/// A string with the package version.
54fn version() -> String {
55 env!("CARGO_PKG_VERSION").to_string()
56}
57
58/// Ruby extension initialization entry point.
59///
60/// Defines the `Lindera` module and all its classes and functions.
61#[magnus::init]
62fn init(ruby: &Ruby) -> Result<(), Error> {
63 let module = ruby.define_module("Lindera")?;
64
65 // Version
66 module.define_module_function("version", function!(version, 0))?;
67
68 // Schema / FieldDefinition / FieldType
69 schema::define(ruby, &module)?;
70
71 // Metadata
72 metadata::define(ruby, &module)?;
73
74 // Mode / Penalty
75 mode::define(ruby, &module)?;
76
77 // Token
78 token::define(ruby, &module)?;
79
80 // Dictionary / UserDictionary + load/build functions
81 dictionary::define(ruby, &module)?;
82
83 // TokenizerBuilder / Tokenizer
84 tokenizer::define(ruby, &module)?;
85
86 // Trainer (feature = "train")
87 #[cfg(feature = "train")]
88 trainer::define(ruby, &module)?;
89
90 Ok(())
91}