lindera/
lib.rs

1//! # Lindera Python Bindings
2//!
3//! Python bindings for [Lindera](https://github.com/lindera/lindera), a morphological analysis library for CJK text.
4//!
5//! Lindera provides high-performance tokenization and morphological analysis for:
6//! - Japanese (IPADIC, IPADIC NEologd, UniDic)
7//! - Korean (ko-dic)
8//! - Chinese (CC-CEDICT)
9//!
10//! ## Features
11//!
12//! - **Dictionary management**: Build, load, and use custom dictionaries
13//! - **Tokenization**: Multiple tokenization modes (normal, decompose)
14//! - **Filters**: Character and token filtering pipeline
15//! - **Training**: Train custom morphological models (with `train` feature)
16//! - **User dictionaries**: Support for custom user dictionaries
17//!
18//! ## Examples
19//!
20//! ```python
21//! import lindera
22//!
23//! # Create a tokenizer
24//! tokenizer = lindera.TokenizerBuilder().build()
25//!
26//! # Tokenize text
27//! tokens = tokenizer.tokenize("関西国際空港")
28//! for token in tokens:
29//!     print(token["text"], token["detail"])
30//! ```
31
32pub mod dictionary;
33pub mod error;
34pub mod metadata;
35pub mod mode;
36pub mod schema;
37pub mod tokenizer;
38#[cfg(feature = "train")]
39pub mod trainer;
40pub mod util;
41
42use pyo3::prelude::*;
43
44use crate::dictionary::{PyDictionary, PyUserDictionary};
45use crate::error::PyLinderaError;
46use crate::metadata::{PyCompressionAlgorithm, PyMetadata};
47use crate::mode::{PyMode, PyPenalty};
48use crate::schema::{PyFieldDefinition, PyFieldType, PySchema};
49use crate::tokenizer::{PyTokenizer, PyTokenizerBuilder};
50
51/// Returns the version of the lindera-python package.
52///
53/// # Returns
54///
55/// Version string in the format "major.minor.patch"
56#[pyfunction]
57pub fn version() -> String {
58    env!("CARGO_PKG_VERSION").to_string()
59}
60
61/// Python module definition for lindera.
62///
63/// This module exports all classes and functions available to Python code.
64#[pymodule]
65fn lindera(module: &Bound<'_, PyModule>) -> PyResult<()> {
66    module.add_class::<PyDictionary>()?;
67    module.add_class::<PyUserDictionary>()?;
68    module.add_class::<PyTokenizerBuilder>()?;
69    module.add_class::<PyTokenizer>()?;
70    module.add_class::<PyLinderaError>()?;
71    module.add_class::<PyMode>()?;
72    module.add_class::<PyPenalty>()?;
73    module.add_class::<PyMetadata>()?;
74    module.add_class::<PySchema>()?;
75    module.add_class::<PyFieldDefinition>()?;
76    module.add_class::<PyFieldType>()?;
77    module.add_class::<PyCompressionAlgorithm>()?;
78
79    // Dictionary functions
80    module.add_function(wrap_pyfunction!(
81        crate::dictionary::build_dictionary,
82        module
83    )?)?;
84    module.add_function(wrap_pyfunction!(
85        crate::dictionary::build_user_dictionary,
86        module
87    )?)?;
88    module.add_function(wrap_pyfunction!(
89        crate::dictionary::load_dictionary,
90        module
91    )?)?;
92    module.add_function(wrap_pyfunction!(
93        crate::dictionary::load_user_dictionary,
94        module
95    )?)?;
96
97    // Trainer functions
98    #[cfg(feature = "train")]
99    module.add_function(wrap_pyfunction!(crate::trainer::train, module)?)?;
100    #[cfg(feature = "train")]
101    module.add_function(wrap_pyfunction!(crate::trainer::export, module)?)?;
102
103    module.add_function(wrap_pyfunction!(version, module)?)?;
104    Ok(())
105}