Skip to main content

lindera_wasm/
lib.rs

1//! # lindera-wasm
2//!
3//! WebAssembly bindings for [Lindera](https://github.com/lindera/lindera), a morphological analysis library.
4//!
5//! This crate provides WASM bindings that enable Japanese, Korean, and Chinese text tokenization
6//! in web browsers and Node.js environments.
7//!
8//! ## Features
9//!
10//! - **Multiple dictionaries**: IPADIC, UniDic (Japanese), ko-dic (Korean), CC-CEDICT (Chinese)
11//! - **Flexible tokenization modes**: Normal and decompose modes
12//! - **Character filters**: Unicode normalization and more
13//! - **Token filters**: Lowercase, compound word handling, number normalization
14//! - **Custom user dictionaries**: Support for user-defined dictionaries
15//!
16//! ## Usage
17//!
18//! ### Web (Browser)
19//!
20//! ```javascript
21//! import __wbg_init, { TokenizerBuilder } from 'lindera-wasm-web-ipadic';
22//!
23//! __wbg_init().then(() => {
24//!     const builder = new TokenizerBuilder();
25//!     builder.set_dictionary("embedded://ipadic");
26//!     builder.set_mode("normal");
27//!
28//!     const tokenizer = builder.build();
29//!     const tokens = tokenizer.tokenize("関西国際空港");
30//!     console.log(tokens);
31//! });
32//! ```
33//!
34//! ### Node.js
35//!
36//! ```javascript
37//! const { TokenizerBuilder } = require('lindera-wasm-nodejs-ipadic');
38//!
39//! const builder = new TokenizerBuilder();
40//! builder.set_dictionary("embedded://ipadic");
41//! builder.set_mode("normal");
42//!
43//! const tokenizer = builder.build();
44//! const tokens = tokenizer.tokenize("関西国際空港");
45//! console.log(tokens);
46//! ```
47
48pub mod character_filter;
49pub mod dictionary;
50pub mod error;
51pub mod metadata;
52pub mod mode;
53pub mod schema;
54pub mod segmenter;
55pub mod token;
56pub mod token_filter;
57pub mod tokenizer;
58
59use wasm_bindgen::prelude::*;
60
61pub use crate::dictionary::{JsDictionary as Dictionary, JsUserDictionary as UserDictionary};
62pub use crate::error::JsLinderaError as LinderaError;
63pub use crate::metadata::{JsCompressionAlgorithm as CompressionAlgorithm, JsMetadata as Metadata};
64pub use crate::mode::{JsMode as Mode, JsPenalty as Penalty};
65pub use crate::schema::{
66    JsFieldDefinition as FieldDefinition, JsFieldType as FieldType, JsSchema as Schema,
67};
68pub use crate::segmenter::JsSegmenter as Segmenter;
69pub use crate::token::JsToken as Token;
70pub use crate::tokenizer::{Tokenizer, TokenizerBuilder};
71
72// Top-level function aliases for consistency with Python API
73#[wasm_bindgen(js_name = "load_dictionary")]
74pub fn py_load_dictionary(uri: &str) -> Result<Dictionary, JsValue> {
75    crate::dictionary::load_dictionary(uri)
76}
77
78#[wasm_bindgen(js_name = "load_user_dictionary")]
79pub fn py_load_user_dictionary(uri: &str, metadata: Metadata) -> Result<UserDictionary, JsValue> {
80    crate::dictionary::load_user_dictionary(uri, metadata)
81}
82
83#[wasm_bindgen(js_name = "build_dictionary")]
84pub fn py_build_dictionary(
85    input_dir: &str,
86    output_dir: &str,
87    metadata: Metadata,
88) -> Result<(), JsValue> {
89    crate::dictionary::build_dictionary(input_dir, output_dir, metadata)
90}
91
92#[wasm_bindgen(js_name = "build_user_dictionary")]
93pub fn py_build_user_dictionary(
94    input_file: &str,
95    output_dir: &str,
96    metadata: Option<Metadata>,
97) -> Result<(), JsValue> {
98    crate::dictionary::build_user_dictionary(input_file, output_dir, metadata)
99}
100
101const VERSION: &str = env!("CARGO_PKG_VERSION");
102
103/// Returns the version of the lindera-wasm package.
104#[wasm_bindgen]
105pub fn version() -> String {
106    VERSION.to_string()
107}
108
109/// Gets the version of the lindera-wasm library.
110/// Backward compatibility alias for version().
111#[wasm_bindgen(js_name = "getVersion")]
112pub fn get_version() -> String {
113    version()
114}