Skip to main content

lindera/
lib.rs

1//! # Lindera Python Bindings
2//!
3//! Python bindings for [Lindera](https://github.com/lindera/lindera), a morphological analysis library for CJK text.
4//!
5//! Lindera provides high-performance tokenization and morphological analysis for:
6//! - Japanese (IPADIC, IPADIC NEologd, UniDic)
7//! - Korean (ko-dic)
8//! - Chinese (CC-CEDICT)
9//!
10//! ## Features
11//!
12//! - **Dictionary management**: Build, load, and use custom dictionaries
13//! - **Tokenization**: Multiple tokenization modes (normal, decompose)
14//! - **Filters**: Character and token filtering pipeline
15//! - **Training**: Train custom morphological models (with `train` feature)
16//! - **User dictionaries**: Support for custom user dictionaries
17//!
18//! ## Examples
19//!
20//! ```python
21//! import lindera
22//!
23//! # Create a tokenizer
24//! tokenizer = lindera.TokenizerBuilder().build()
25//!
26//! # Tokenize text
27//! tokens = tokenizer.tokenize("関西国際空港")
28//! for token in tokens:
29//!     print(token["text"], token["detail"])
30//! ```
31
32pub mod character_filter;
33pub mod dictionary;
34pub mod error;
35pub mod metadata;
36pub mod mode;
37pub mod schema;
38pub mod segmenter;
39pub mod token;
40pub mod token_filter;
41pub mod tokenizer;
42pub mod util;
43
44#[cfg(feature = "train")]
45pub mod trainer;
46
47use pyo3::prelude::*;
48
49/// Returns the version of the lindera-python package.
50#[pyfunction]
51pub fn version() -> String {
52    env!("CARGO_PKG_VERSION").to_string()
53}
54
55/// Python module definition for lindera.
56#[pymodule]
57fn lindera(m: &Bound<'_, PyModule>) -> PyResult<()> {
58    // Register submodules
59    tokenizer::register(m)?;
60    dictionary::register(m)?;
61    token::register(m)?;
62    mode::register(m)?;
63    metadata::register(m)?;
64    schema::register(m)?;
65    segmenter::register(m)?;
66    character_filter::register(m)?;
67    token_filter::register(m)?;
68    error::register(m)?;
69
70    #[cfg(feature = "train")]
71    {
72        // For trainer, we can implement register similarly or just keeping it flat for now if complex
73        // Let's assume we want lindera.trainer.train() etc.
74        let py = m.py();
75        let trainer_mod = PyModule::new(py, "trainer")?;
76        trainer_mod.add_function(wrap_pyfunction!(crate::trainer::train, &trainer_mod)?)?;
77        trainer_mod.add_function(wrap_pyfunction!(crate::trainer::export, &trainer_mod)?)?;
78        m.add_submodule(&trainer_mod)?;
79    }
80
81    m.add_function(wrap_pyfunction!(version, m)?)?;
82
83    // --- Backward compatibility aliases (top-level) ---
84    // Classes
85    m.add("Tokenizer", m.getattr("tokenizer")?.getattr("Tokenizer")?)?;
86    m.add(
87        "TokenizerBuilder",
88        m.getattr("tokenizer")?.getattr("TokenizerBuilder")?,
89    )?;
90    m.add(
91        "Dictionary",
92        m.getattr("dictionary")?.getattr("Dictionary")?,
93    )?;
94    m.add(
95        "UserDictionary",
96        m.getattr("dictionary")?.getattr("UserDictionary")?,
97    )?;
98    m.add("Token", m.getattr("token")?.getattr("Token")?)?;
99    m.add("Mode", m.getattr("mode")?.getattr("Mode")?)?;
100    m.add("Penalty", m.getattr("mode")?.getattr("Penalty")?)?;
101    m.add("Metadata", m.getattr("metadata")?.getattr("Metadata")?)?;
102    m.add(
103        "CompressionAlgorithm",
104        m.getattr("metadata")?.getattr("CompressionAlgorithm")?,
105    )?;
106    m.add("Schema", m.getattr("schema")?.getattr("Schema")?)?;
107    m.add(
108        "FieldDefinition",
109        m.getattr("schema")?.getattr("FieldDefinition")?,
110    )?;
111    m.add("FieldType", m.getattr("schema")?.getattr("FieldType")?)?;
112    m.add("LinderaError", m.getattr("error")?.getattr("LinderaError")?)?;
113
114    // Functions
115    m.add(
116        "load_dictionary",
117        m.getattr("dictionary")?.getattr("load_dictionary")?,
118    )?;
119    m.add(
120        "load_user_dictionary",
121        m.getattr("dictionary")?.getattr("load_user_dictionary")?,
122    )?;
123    m.add(
124        "build_dictionary",
125        m.getattr("dictionary")?.getattr("build_dictionary")?,
126    )?;
127    m.add(
128        "build_user_dictionary",
129        m.getattr("dictionary")?.getattr("build_user_dictionary")?,
130    )?;
131
132    #[cfg(feature = "train")]
133    {
134        m.add("train", m.getattr("trainer")?.getattr("train")?)?;
135        m.add("export", m.getattr("trainer")?.getattr("export")?)?;
136    }
137
138    Ok(())
139}