Skip to main content

mecab_ko_dict/
lib.rs

1//! # mecab-ko-dict
2//!
3//! 한국어 형태소 사전 관리 라이브러리
4//!
5//! ## 주요 기능
6//!
7//! - 바이너리 사전 포맷 (v3.0)
8//! - FST 기반 형태소 검색
9//! - 연접 비용 매트릭스
10//! - 사전 빌더/컴파일러
11//!
12//! ## 예제
13//!
14//! ```rust,ignore
15//! use mecab_ko_dict::dictionary::SystemDictionary;
16//!
17//! let dict = SystemDictionary::load("path/to/dict").unwrap();
18//! let entries = dict.lookup("안녕");
19//! ```
20
21#![warn(missing_docs)]
22#![deny(unsafe_code)]
23#![cfg_attr(feature = "simd", feature(portable_simd))]
24#![allow(
25    clippy::inline_always,
26    clippy::similar_names,
27    clippy::cast_precision_loss,
28    clippy::cast_possible_truncation,
29    clippy::option_if_let_else,
30    clippy::map_unwrap_or,
31    clippy::zero_sized_map_values,
32    clippy::missing_panics_doc,
33    clippy::unwrap_used
34)]
35
36pub mod dictionary;
37pub mod domain;
38pub mod entry_store;
39pub mod file_watcher;
40pub mod hot_reload;
41pub mod lazy_entries;
42pub mod loader;
43pub mod matrix;
44pub mod string_pool;
45pub mod trie;
46pub mod user_dict;
47
48#[cfg(feature = "hot-reload-v2")]
49pub mod hot_reload_v2;
50
51pub use dictionary::{DictEntry, DictionaryLoader, LoadOptions, SystemDictionary};
52pub use entry_store::{EagerStore, EntryStore, LazyStore};
53pub use error::{DictError, Result};
54pub use file_watcher::{FileEvent, FileWatcher, WatchConfig};
55pub use hot_reload::{
56    DeltaUpdate, DeltaUpdateBuilder, EntryChange, HotReloadDictionary, Version, VersionInfo,
57};
58pub use lazy_entries::LazyEntries;
59pub use loader::{LazyDictionary, LoaderConfig, MmapDictionary};
60pub use matrix::{ConnectionMatrix, DenseMatrix, Matrix, MatrixLoader, MmapMatrix, SparseMatrix};
61pub use string_pool::{ConcurrentStringPool, StringPool, StringPoolStats};
62pub use trie::{DictionarySearcher, EntryIndex, PrefixMatch, Trie, TrieBuilder};
63pub use user_dict::{UserDictionary, UserDictionaryBuilder, UserEntry};
64
65/// 사전 엔트리
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub struct Entry {
68    /// 표면형
69    pub surface: String,
70    /// 좌문맥 ID
71    pub left_id: u16,
72    /// 우문맥 ID
73    pub right_id: u16,
74    /// 비용
75    pub cost: i16,
76    /// 품사 정보
77    pub feature: String,
78}
79
80/// 사전 인터페이스
81pub trait Dictionary {
82    /// 형태소 검색
83    fn lookup(&self, surface: &str) -> Vec<Entry>;
84
85    /// 연접 비용 조회
86    fn get_connection_cost(&self, left_id: u16, right_id: u16) -> i16;
87}
88
89/// 에러 모듈
90pub mod error {
91    use thiserror::Error;
92
93    /// 사전 에러 타입
94    #[derive(Error, Debug)]
95    pub enum DictError {
96        /// IO 에러
97        #[error("IO error: {0}")]
98        Io(#[from] std::io::Error),
99
100        /// 포맷 에러
101        #[error("Invalid dictionary format: {0}")]
102        Format(String),
103
104        /// 버전 불일치
105        #[error("Version mismatch: expected {expected}, found {found}")]
106        Version {
107            /// 예상 버전
108            expected: u32,
109            /// 실제 버전
110            found: u32,
111        },
112    }
113
114    /// Result 타입 별칭
115    pub type Result<T> = std::result::Result<T, DictError>;
116}
117
118/// 사전 포맷 모듈 (스텁)
119///
120/// 바이너리 사전 포맷 정의
121pub mod format {
122
123    /// 사전 헤더
124    pub struct Header {
125        /// 매직 넘버
126        pub magic: [u8; 4],
127        /// 버전
128        pub version: u32,
129        /// 엔트리 수
130        pub entry_count: u32,
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn test_entry_creation() {
140        let entry = Entry {
141            surface: "안녕".to_string(),
142            left_id: 1,
143            right_id: 1,
144            cost: 100,
145            feature: "NNG,*,T,안녕,*,*,*,*".to_string(),
146        };
147
148        assert_eq!(entry.surface, "안녕");
149    }
150}