1use std::io::{BufRead, BufReader, Read as _, Write as _};
31use std::path::{Path, PathBuf};
32use std::sync::Arc;
33
34use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
35
36use crate::error::{DictError, Result};
37use crate::matrix::{ConnectionMatrix, Matrix};
38use crate::trie::Trie;
39use crate::user_dict::UserDictionary;
40use crate::{Dictionary, Entry};
41
42const DEFAULT_DICDIR_PATHS: &[&str] = &[
44 "/usr/local/lib/mecab/dic/mecab-ko-dic",
45 "/usr/lib/mecab/dic/mecab-ko-dic",
46 "/opt/mecab/dic/mecab-ko-dic",
47 "./dic/mecab-ko-dic",
48];
49
50const TRIE_FILE: &str = "sys.dic";
52const MATRIX_FILE: &str = "matrix.bin";
53const ENTRIES_BIN_FILE: &str = "entries.bin";
54const ENTRIES_CSV_FILE: &str = "entries.csv";
55
56const ENTRIES_MAGIC: &[u8; 4] = b"MKED";
58const ENTRIES_VERSION: u32 = 1;
60
61pub struct SystemDictionary {
66 dicdir: PathBuf,
68 trie: Trie<'static>,
70 matrix: ConnectionMatrix,
72 entries: Vec<DictEntry>,
74 user_dict: Option<Arc<UserDictionary>>,
76}
77
78#[derive(Debug, Clone, PartialEq, Eq)]
82pub struct DictEntry {
83 pub surface: String,
85 pub left_id: u16,
87 pub right_id: u16,
89 pub cost: i16,
91 pub feature: String,
93}
94
95impl DictEntry {
96 pub fn new(
98 surface: impl Into<String>,
99 left_id: u16,
100 right_id: u16,
101 cost: i16,
102 feature: impl Into<String>,
103 ) -> Self {
104 Self {
105 surface: surface.into(),
106 left_id,
107 right_id,
108 cost,
109 feature: feature.into(),
110 }
111 }
112
113 #[must_use]
115 pub fn to_entry(&self) -> Entry {
116 Entry {
117 surface: self.surface.clone(),
118 left_id: self.left_id,
119 right_id: self.right_id,
120 cost: self.cost,
121 feature: self.feature.clone(),
122 }
123 }
124}
125
126impl From<Entry> for DictEntry {
127 fn from(entry: Entry) -> Self {
128 Self {
129 surface: entry.surface,
130 left_id: entry.left_id,
131 right_id: entry.right_id,
132 cost: entry.cost,
133 feature: entry.feature,
134 }
135 }
136}
137
138#[derive(Debug, Clone, Copy, Default)]
140pub struct LoadOptions {
141 pub use_mmap_matrix: bool,
143 pub use_lazy_entries: bool,
145 pub lazy_cache_size: Option<usize>,
147}
148
149impl LoadOptions {
150 #[must_use]
152 pub const fn memory_optimized() -> Self {
153 Self {
154 use_mmap_matrix: true,
155 use_lazy_entries: true,
156 lazy_cache_size: Some(10000),
157 }
158 }
159
160 #[must_use]
162 pub const fn speed_optimized() -> Self {
163 Self {
164 use_mmap_matrix: false,
165 use_lazy_entries: false,
166 lazy_cache_size: None,
167 }
168 }
169}
170
171impl SystemDictionary {
172 pub fn load_default() -> Result<Self> {
183 let dicdir = DictionaryLoader::find_dicdir()?;
184 Self::load(dicdir)
185 }
186
187 pub fn load_memory_optimized() -> Result<Self> {
196 let dicdir = DictionaryLoader::find_dicdir()?;
197 Self::load_with_options(dicdir, LoadOptions::memory_optimized())
198 }
199
200 pub fn load_with_options<P: AsRef<Path>>(dicdir: P, options: LoadOptions) -> Result<Self> {
207 let dicdir = dicdir.as_ref().to_path_buf();
208
209 let trie_path = dicdir.join(TRIE_FILE);
211 let trie = if trie_path.exists() {
212 Trie::from_file(&trie_path)?
213 } else {
214 let compressed_path = dicdir.join(format!("{TRIE_FILE}.zst"));
216 if compressed_path.exists() {
217 Trie::from_compressed_file(&compressed_path)?
218 } else {
219 return Err(DictError::Format(format!(
220 "Trie file not found: {}",
221 trie_path.display()
222 )));
223 }
224 };
225
226 let matrix_path = dicdir.join(MATRIX_FILE);
228 let matrix = if matrix_path.exists() {
229 if options.use_mmap_matrix {
230 ConnectionMatrix::from_mmap_file(&matrix_path)?
231 } else {
232 ConnectionMatrix::from_bin_file(&matrix_path)?
233 }
234 } else {
235 let def_path = dicdir.join("matrix.def");
237 if def_path.exists() {
238 ConnectionMatrix::from_def_file(&def_path)?
239 } else {
240 return Err(DictError::Format(format!(
241 "Matrix file not found: {}",
242 matrix_path.display()
243 )));
244 }
245 };
246
247 let entries = Self::load_entries(&dicdir)?;
249
250 Ok(Self {
251 dicdir,
252 trie,
253 matrix,
254 entries,
255 user_dict: None,
256 })
257 }
258
259 pub fn load<P: AsRef<Path>>(dicdir: P) -> Result<Self> {
270 let dicdir = dicdir.as_ref().to_path_buf();
271
272 let trie_path = dicdir.join(TRIE_FILE);
274 let trie = if trie_path.exists() {
275 Trie::from_file(&trie_path)?
276 } else {
277 let compressed_path = dicdir.join(format!("{TRIE_FILE}.zst"));
279 if compressed_path.exists() {
280 Trie::from_compressed_file(&compressed_path)?
281 } else {
282 return Err(DictError::Format(format!(
283 "Trie file not found: {}",
284 trie_path.display()
285 )));
286 }
287 };
288
289 let matrix_path = dicdir.join(MATRIX_FILE);
291 let matrix = if matrix_path.exists() {
292 ConnectionMatrix::from_bin_file(&matrix_path)?
293 } else {
294 let def_path = dicdir.join("matrix.def");
296 if def_path.exists() {
297 ConnectionMatrix::from_def_file(&def_path)?
298 } else {
299 return Err(DictError::Format(format!(
300 "Matrix file not found: {}",
301 matrix_path.display()
302 )));
303 }
304 };
305
306 let entries = Self::load_entries(&dicdir)?;
308
309 Ok(Self {
310 dicdir,
311 trie,
312 matrix,
313 entries,
314 user_dict: None,
315 })
316 }
317
318 fn load_entries(dicdir: &Path) -> Result<Vec<DictEntry>> {
324 let bin_path = dicdir.join(ENTRIES_BIN_FILE);
326 if bin_path.exists() {
327 return Self::load_entries_bin(&bin_path);
328 }
329
330 let csv_path = dicdir.join(ENTRIES_CSV_FILE);
332 if csv_path.exists() {
333 return Self::load_entries_csv(&csv_path);
334 }
335
336 Ok(Vec::new())
338 }
339
340 fn load_entries_csv(path: &Path) -> Result<Vec<DictEntry>> {
344 let file = std::fs::File::open(path).map_err(DictError::Io)?;
345 let reader = BufReader::new(file);
346 let mut entries = Vec::new();
347
348 for (line_num, line_result) in reader.lines().enumerate() {
349 let line = line_result.map_err(DictError::Io)?;
350 let line = line.trim();
351 if line.is_empty() || line.starts_with('#') {
352 continue;
353 }
354
355 let mut fields = line.splitn(5, ',');
357 let surface = fields
358 .next()
359 .ok_or_else(|| {
360 DictError::Format(format!("line {}: missing surface", line_num + 1))
361 })?
362 .to_string();
363 let left_id: u16 = fields
364 .next()
365 .ok_or_else(|| {
366 DictError::Format(format!("line {}: missing left_id", line_num + 1))
367 })?
368 .parse()
369 .map_err(|_| {
370 DictError::Format(format!("line {}: invalid left_id", line_num + 1))
371 })?;
372 let right_id: u16 = fields
373 .next()
374 .ok_or_else(|| {
375 DictError::Format(format!("line {}: missing right_id", line_num + 1))
376 })?
377 .parse()
378 .map_err(|_| {
379 DictError::Format(format!("line {}: invalid right_id", line_num + 1))
380 })?;
381 let cost: i16 = fields
382 .next()
383 .ok_or_else(|| DictError::Format(format!("line {}: missing cost", line_num + 1)))?
384 .parse()
385 .map_err(|_| DictError::Format(format!("line {}: invalid cost", line_num + 1)))?;
386 let feature = fields.next().unwrap_or("").to_string();
387
388 entries.push(DictEntry {
389 surface,
390 left_id,
391 right_id,
392 cost,
393 feature,
394 });
395 }
396
397 Ok(entries)
398 }
399
400 fn load_entries_bin(path: &Path) -> Result<Vec<DictEntry>> {
404 let data = std::fs::read(path).map_err(DictError::Io)?;
405 let mut cursor = std::io::Cursor::new(&data);
406
407 let mut magic = [0u8; 4];
409 cursor
410 .read_exact(&mut magic)
411 .map_err(|e| DictError::Format(format!("entries.bin magic: {e}")))?;
412 if &magic != ENTRIES_MAGIC {
413 return Err(DictError::Format(
414 "entries.bin: invalid magic number".into(),
415 ));
416 }
417
418 let version = cursor
420 .read_u32::<LittleEndian>()
421 .map_err(|e| DictError::Format(format!("entries.bin version: {e}")))?;
422 if version != ENTRIES_VERSION {
423 return Err(DictError::Format(format!(
424 "entries.bin: unsupported version {version}"
425 )));
426 }
427
428 let count = cursor
430 .read_u32::<LittleEndian>()
431 .map_err(|e| DictError::Format(format!("entries.bin count: {e}")))?;
432
433 let mut entries = Vec::with_capacity(count as usize);
434 for i in 0..count {
435 let left_id = cursor
436 .read_u16::<LittleEndian>()
437 .map_err(|e| DictError::Format(format!("entries.bin entry {i} left_id: {e}")))?;
438 let right_id = cursor
439 .read_u16::<LittleEndian>()
440 .map_err(|e| DictError::Format(format!("entries.bin entry {i} right_id: {e}")))?;
441 let cost = cursor
442 .read_i16::<LittleEndian>()
443 .map_err(|e| DictError::Format(format!("entries.bin entry {i} cost: {e}")))?;
444 let surface_len = cursor
445 .read_u16::<LittleEndian>()
446 .map_err(|e| DictError::Format(format!("entries.bin entry {i} surface_len: {e}")))?
447 as usize;
448 let feature_len = cursor
449 .read_u16::<LittleEndian>()
450 .map_err(|e| DictError::Format(format!("entries.bin entry {i} feature_len: {e}")))?
451 as usize;
452
453 let mut surface_bytes = vec![0u8; surface_len];
454 cursor
455 .read_exact(&mut surface_bytes)
456 .map_err(|e| DictError::Format(format!("entries.bin entry {i} surface: {e}")))?;
457 let surface = String::from_utf8(surface_bytes).map_err(|e| {
458 DictError::Format(format!("entries.bin entry {i} surface utf8: {e}"))
459 })?;
460
461 let mut feature_bytes = vec![0u8; feature_len];
462 cursor
463 .read_exact(&mut feature_bytes)
464 .map_err(|e| DictError::Format(format!("entries.bin entry {i} feature: {e}")))?;
465 let feature = String::from_utf8(feature_bytes).map_err(|e| {
466 DictError::Format(format!("entries.bin entry {i} feature utf8: {e}"))
467 })?;
468
469 entries.push(DictEntry {
470 surface,
471 left_id,
472 right_id,
473 cost,
474 feature,
475 });
476 }
477
478 Ok(entries)
479 }
480
481 pub fn save_entries_bin(entries: &[DictEntry], path: &Path) -> Result<()> {
487 let mut file = std::fs::File::create(path).map_err(DictError::Io)?;
488
489 file.write_all(ENTRIES_MAGIC).map_err(DictError::Io)?;
490 file.write_u32::<LittleEndian>(ENTRIES_VERSION)
491 .map_err(DictError::Io)?;
492
493 let count = u32::try_from(entries.len())
494 .map_err(|_| DictError::Format("too many entries".into()))?;
495 file.write_u32::<LittleEndian>(count)
496 .map_err(DictError::Io)?;
497
498 for entry in entries {
499 file.write_u16::<LittleEndian>(entry.left_id)
500 .map_err(DictError::Io)?;
501 file.write_u16::<LittleEndian>(entry.right_id)
502 .map_err(DictError::Io)?;
503 file.write_i16::<LittleEndian>(entry.cost)
504 .map_err(DictError::Io)?;
505
506 let surface_bytes = entry.surface.as_bytes();
507 let surface_len = u16::try_from(surface_bytes.len())
508 .map_err(|_| DictError::Format("surface too long".into()))?;
509 file.write_u16::<LittleEndian>(surface_len)
510 .map_err(DictError::Io)?;
511
512 let feature_bytes = entry.feature.as_bytes();
513 let feature_len = u16::try_from(feature_bytes.len())
514 .map_err(|_| DictError::Format("feature too long".into()))?;
515 file.write_u16::<LittleEndian>(feature_len)
516 .map_err(DictError::Io)?;
517
518 file.write_all(surface_bytes).map_err(DictError::Io)?;
519 file.write_all(feature_bytes).map_err(DictError::Io)?;
520 }
521
522 Ok(())
523 }
524
525 pub fn save_entries_csv(entries: &[DictEntry], path: &Path) -> Result<()> {
531 let mut file = std::fs::File::create(path).map_err(DictError::Io)?;
532
533 for entry in entries {
534 writeln!(
535 file,
536 "{},{},{},{},{}",
537 entry.surface, entry.left_id, entry.right_id, entry.cost, entry.feature
538 )
539 .map_err(DictError::Io)?;
540 }
541
542 Ok(())
543 }
544
545 fn get_entries_at(&self, first_index: u32, surface: &str) -> Vec<&DictEntry> {
550 let start = first_index as usize;
551 let mut results = Vec::new();
552 for entry in self.entries.get(start..).unwrap_or(&[]) {
553 if entry.surface == surface {
554 results.push(entry);
555 } else {
556 break;
557 }
558 }
559 results
560 }
561
562 #[must_use]
568 pub fn with_user_dictionary(mut self, user_dict: UserDictionary) -> Self {
569 self.user_dict = Some(Arc::new(user_dict));
570 self
571 }
572
573 pub fn set_user_dictionary(&mut self, user_dict: UserDictionary) {
575 self.user_dict = Some(Arc::new(user_dict));
576 }
577
578 #[must_use]
580 pub fn dicdir(&self) -> &Path {
581 &self.dicdir
582 }
583
584 #[must_use]
586 pub const fn trie(&self) -> &Trie<'static> {
587 &self.trie
588 }
589
590 #[must_use]
592 pub const fn matrix(&self) -> &ConnectionMatrix {
593 &self.matrix
594 }
595
596 #[must_use]
598 pub fn entries(&self) -> &[DictEntry] {
599 &self.entries
600 }
601
602 #[must_use]
604 pub fn user_dictionary(&self) -> Option<&UserDictionary> {
605 self.user_dict.as_deref()
606 }
607
608 #[must_use]
614 pub fn get_entry(&self, index: u32) -> Option<&DictEntry> {
615 self.entries.get(index as usize)
616 }
617
618 #[must_use]
631 pub fn common_prefix_search(&self, text: &str) -> Vec<(&DictEntry, usize)> {
632 let mut results = Vec::new();
633 for (index, byte_len) in self.trie.common_prefix_search(text) {
634 let surface = &text[..byte_len];
635 let entries = self.get_entries_at(index, surface);
636 for entry in entries {
637 results.push((entry, byte_len));
638 }
639 }
640 results
641 }
642
643 #[must_use]
650 pub fn common_prefix_search_at(
651 &self,
652 text: &str,
653 start_byte: usize,
654 ) -> Vec<(&DictEntry, usize)> {
655 let mut results = Vec::new();
656 for (index, end_byte) in self.trie.common_prefix_search_at(text, start_byte) {
657 let byte_len = end_byte - start_byte;
658 let surface = &text[start_byte..end_byte];
659 let entries = self.get_entries_at(index, surface);
660 for entry in entries {
661 results.push((entry, byte_len));
662 }
663 }
664 results
665 }
666
667 #[must_use]
673 pub fn lookup_combined(&self, surface: &str) -> Vec<Entry> {
674 let mut results = self.lookup(surface);
675
676 if let Some(user_dict) = &self.user_dict {
678 let user_entries = user_dict.lookup(surface);
679 results.extend(user_entries.iter().map(|e| e.to_entry()));
680 }
681
682 results
683 }
684
685 #[cfg(test)]
689 pub fn add_entry(&mut self, entry: DictEntry) {
690 self.entries.push(entry);
691 }
692
693 #[doc(hidden)]
695 #[must_use]
696 pub const fn new_test(
697 dicdir: PathBuf,
698 trie: Trie<'static>,
699 matrix: ConnectionMatrix,
700 entries: Vec<DictEntry>,
701 ) -> Self {
702 Self {
703 dicdir,
704 trie,
705 matrix,
706 entries,
707 user_dict: None,
708 }
709 }
710}
711
712impl Dictionary for SystemDictionary {
713 fn lookup(&self, surface: &str) -> Vec<Entry> {
714 if let Some(index) = self.trie.exact_match(surface) {
716 let entries = self.get_entries_at(index, surface);
717 if !entries.is_empty() {
718 return entries.iter().map(|e| e.to_entry()).collect();
719 }
720 }
721
722 Vec::new()
723 }
724
725 fn get_connection_cost(&self, left_id: u16, right_id: u16) -> i16 {
726 i16::try_from(self.matrix.get(right_id, left_id)).unwrap_or(i16::MAX)
727 }
728}
729
730pub struct DictionaryLoader;
734
735impl DictionaryLoader {
736 pub fn find_dicdir() -> Result<PathBuf> {
746 if let Ok(dicdir) = std::env::var("MECAB_DICDIR") {
748 let path = PathBuf::from(dicdir);
749 if path.is_dir() {
750 return Ok(path);
751 }
752 }
753
754 for &path_str in DEFAULT_DICDIR_PATHS {
756 let path = PathBuf::from(path_str);
757 if path.is_dir() {
758 return Ok(path);
759 }
760 }
761
762 {
764 let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
765 let test_dict = manifest_dir.join("../../test-fixtures/mini-dict");
766 if test_dict.is_dir() {
767 return Ok(test_dict);
768 }
769 }
770
771 Err(DictError::Format(
772 "Dictionary directory not found. Set MECAB_DICDIR environment variable or install mecab-ko-dic to default location".to_string(),
773 ))
774 }
775
776 pub fn load_system<P: AsRef<Path>>(dicdir: P) -> Result<SystemDictionary> {
782 SystemDictionary::load(dicdir)
783 }
784
785 pub fn load_default() -> Result<SystemDictionary> {
791 SystemDictionary::load_default()
792 }
793
794 pub fn validate_dicdir<P: AsRef<Path>>(dicdir: P) -> Result<()> {
804 let dicdir = dicdir.as_ref();
805
806 if !dicdir.is_dir() {
807 return Err(DictError::Format(format!(
808 "Dictionary directory does not exist: {}",
809 dicdir.display()
810 )));
811 }
812
813 let has_trie =
815 dicdir.join(TRIE_FILE).exists() || dicdir.join(format!("{TRIE_FILE}.zst")).exists();
816
817 let has_matrix = dicdir.join(MATRIX_FILE).exists() || dicdir.join("matrix.def").exists();
818
819 if !has_trie {
820 return Err(DictError::Format(format!(
821 "Trie file not found in {}",
822 dicdir.display()
823 )));
824 }
825
826 if !has_matrix {
827 return Err(DictError::Format(format!(
828 "Matrix file not found in {}",
829 dicdir.display()
830 )));
831 }
832
833 Ok(())
834 }
835}
836
837#[cfg(test)]
838#[allow(
839 clippy::expect_used,
840 clippy::unwrap_used,
841 clippy::items_after_statements
842)]
843mod tests {
844 use super::*;
845 use crate::matrix::DenseMatrix;
846 use crate::trie::TrieBuilder;
847
848 fn create_test_dictionary() -> SystemDictionary {
849 let entries = vec![
851 ("가", 0u32),
852 ("가다", 1),
853 ("가방", 2),
854 ("나", 3),
855 ("나다", 4),
856 ];
857 let trie_bytes = TrieBuilder::build(&entries).expect("should build trie");
858 let trie = Trie::from_vec(trie_bytes);
859
860 let matrix = DenseMatrix::new(10, 10, 100);
862 let matrix = ConnectionMatrix::Dense(matrix);
863
864 let dict_entries = vec![
866 DictEntry::new("가", 1, 1, 100, "NNG,*,T,가,*,*,*,*"),
867 DictEntry::new("가다", 2, 2, 200, "VV,*,F,가다,*,*,*,*"),
868 DictEntry::new("가방", 3, 3, 300, "NNG,*,T,가방,*,*,*,*"),
869 DictEntry::new("나", 4, 4, 400, "NP,*,F,나,*,*,*,*"),
870 DictEntry::new("나다", 5, 5, 500, "VV,*,F,나다,*,*,*,*"),
871 ];
872
873 SystemDictionary {
874 dicdir: PathBuf::from("./test_dic"),
875 trie,
876 matrix,
877 entries: dict_entries,
878 user_dict: None,
879 }
880 }
881
882 #[test]
883 fn test_dict_entry_creation() {
884 let entry = DictEntry::new("안녕", 1, 1, 100, "NNG,*,T,안녕,*,*,*,*");
885 assert_eq!(entry.surface, "안녕");
886 assert_eq!(entry.left_id, 1);
887 assert_eq!(entry.right_id, 1);
888 assert_eq!(entry.cost, 100);
889 }
890
891 #[test]
892 fn test_dict_entry_to_entry() {
893 let dict_entry = DictEntry::new("테스트", 5, 5, 200, "NNG,*,T,테스트,*,*,*,*");
894 let entry = dict_entry.to_entry();
895
896 assert_eq!(entry.surface, "테스트");
897 assert_eq!(entry.left_id, 5);
898 assert_eq!(entry.cost, 200);
899 }
900
901 #[test]
902 fn test_system_dictionary_lookup() {
903 let dict = create_test_dictionary();
904
905 let entries = dict.lookup("가");
906 assert_eq!(entries.len(), 1);
907 assert_eq!(entries[0].surface, "가");
908
909 let entries = dict.lookup("가다");
910 assert_eq!(entries.len(), 1);
911 assert_eq!(entries[0].surface, "가다");
912
913 let entries = dict.lookup("없음");
914 assert!(entries.is_empty());
915 }
916
917 #[test]
918 fn test_system_dictionary_get_connection_cost() {
919 let dict = create_test_dictionary();
920 let cost = dict.get_connection_cost(1, 2);
921 assert_eq!(cost, 100); }
923
924 #[test]
925 fn test_common_prefix_search() {
926 let dict = create_test_dictionary();
927
928 let results = dict.common_prefix_search("가방에");
930 assert_eq!(results.len(), 2);
931
932 let surfaces: Vec<_> = results.iter().map(|(e, _)| e.surface.as_str()).collect();
933 assert!(surfaces.contains(&"가"));
934 assert!(surfaces.contains(&"가방"));
935 }
936
937 #[test]
938 fn test_common_prefix_search_at() {
939 let dict = create_test_dictionary();
940
941 let text = "나가다";
942 let start = "나".len(); let results = dict.common_prefix_search_at(text, start);
945 assert_eq!(results.len(), 2); let surfaces: Vec<_> = results.iter().map(|(e, _)| e.surface.as_str()).collect();
948 assert!(surfaces.contains(&"가"));
949 assert!(surfaces.contains(&"가다"));
950 }
951
952 #[test]
953 fn test_with_user_dictionary() {
954 let mut dict = create_test_dictionary();
955
956 let mut user_dict = UserDictionary::new();
957 user_dict.add_entry("딥러닝", "NNG", Some(-1000), None);
958 user_dict.add_entry("머신러닝", "NNG", Some(-1000), None);
959
960 dict.set_user_dictionary(user_dict);
961
962 let entries = dict.lookup_combined("딥러닝");
963 assert_eq!(entries.len(), 1);
964 assert_eq!(entries[0].surface, "딥러닝");
965 }
966
967 #[test]
968 fn test_lookup_combined_system_and_user() {
969 let mut dict = create_test_dictionary();
970
971 let mut user_dict = UserDictionary::new();
972 user_dict.add_entry("가", "JKS", Some(-500), None); dict.set_user_dictionary(user_dict);
975
976 let entries = dict.lookup_combined("가");
977 assert_eq!(entries.len(), 2);
979 }
980
981 #[test]
982 fn test_get_entry() {
983 let dict = create_test_dictionary();
984
985 let entry = dict.get_entry(0);
986 assert!(entry.is_some());
987 assert_eq!(entry.unwrap().surface, "가");
988
989 let entry = dict.get_entry(100);
990 assert!(entry.is_none());
991 }
992
993 #[test]
994 fn test_dicdir() {
995 let dict = create_test_dictionary();
996 assert_eq!(dict.dicdir(), Path::new("./test_dic"));
997 }
998
999 #[test]
1000 fn test_trie_reference() {
1001 let dict = create_test_dictionary();
1002 let trie = dict.trie();
1003 assert!(trie.exact_match("가").is_some());
1004 }
1005
1006 #[test]
1007 fn test_matrix_reference() {
1008 let dict = create_test_dictionary();
1009 let matrix = dict.matrix();
1010 assert_eq!(matrix.left_size(), 10);
1011 assert_eq!(matrix.right_size(), 10);
1012 }
1013
1014 #[test]
1015 fn test_entries_reference() {
1016 let dict = create_test_dictionary();
1017 let entries = dict.entries();
1018 assert_eq!(entries.len(), 5);
1019 }
1020
1021 #[test]
1022 fn test_dictionary_loader_find_dicdir() {
1023 let result = DictionaryLoader::find_dicdir();
1026
1027 match result {
1030 Ok(path) => {
1031 assert!(path.is_dir());
1032 }
1033 Err(e) => {
1034 assert!(e.to_string().contains("Dictionary directory not found"));
1036 }
1037 }
1038 }
1039
1040 #[test]
1041 fn test_dict_entry_from_entry() {
1042 let entry = Entry {
1043 surface: "테스트".to_string(),
1044 left_id: 10,
1045 right_id: 20,
1046 cost: 300,
1047 feature: "NNG,*,T,테스트,*,*,*,*".to_string(),
1048 };
1049
1050 let dict_entry: DictEntry = entry.into();
1051 assert_eq!(dict_entry.surface, "테스트");
1052 assert_eq!(dict_entry.left_id, 10);
1053 assert_eq!(dict_entry.right_id, 20);
1054 assert_eq!(dict_entry.cost, 300);
1055 }
1056
1057 #[test]
1058 fn test_entries_bin_roundtrip() {
1059 let entries = vec![
1060 DictEntry::new("안녕", 1, 1, 100, "NNG,*,T,안녕,*,*,*,*"),
1061 DictEntry::new("하세요", 2, 2, 50, "VV,*,F,하세요,*,*,*,*"),
1062 DictEntry::new("감사", 3, 3, 80, "NNG,*,F,감사,*,*,*,*"),
1063 ];
1064
1065 let temp = tempfile::NamedTempFile::new().expect("create temp file");
1066 let path = temp.path();
1067
1068 SystemDictionary::save_entries_bin(&entries, path).expect("save should work");
1069 let loaded = SystemDictionary::load_entries_bin(path).expect("load should work");
1070
1071 assert_eq!(loaded.len(), 3);
1072 assert_eq!(loaded[0].surface, "안녕");
1073 assert_eq!(loaded[0].left_id, 1);
1074 assert_eq!(loaded[0].cost, 100);
1075 assert_eq!(loaded[0].feature, "NNG,*,T,안녕,*,*,*,*");
1076 assert_eq!(loaded[1].surface, "하세요");
1077 assert_eq!(loaded[2].surface, "감사");
1078 }
1079
1080 #[test]
1081 fn test_entries_csv_roundtrip() {
1082 let entries = vec![
1083 DictEntry::new("형태소", 10, 20, 150, "NNG,*,F,형태소,*,*,*,*"),
1084 DictEntry::new("분석", 11, 21, 200, "NNG,*,T,분석,*,*,*,*"),
1085 ];
1086
1087 let temp = tempfile::NamedTempFile::new().expect("create temp file");
1088 let path = temp.path();
1089
1090 SystemDictionary::save_entries_csv(&entries, path).expect("save should work");
1091 let loaded = SystemDictionary::load_entries_csv(path).expect("load should work");
1092
1093 assert_eq!(loaded.len(), 2);
1094 assert_eq!(loaded[0].surface, "형태소");
1095 assert_eq!(loaded[0].left_id, 10);
1096 assert_eq!(loaded[0].right_id, 20);
1097 assert_eq!(loaded[0].cost, 150);
1098 assert_eq!(loaded[1].surface, "분석");
1099 }
1100
1101 #[test]
1102 fn test_get_entries_at_multi() {
1103 let trie_input = vec![("가", 0u32), ("나", 2u32)];
1105 let trie_bytes = TrieBuilder::build(&trie_input).expect("build trie");
1106 let trie = Trie::from_vec(trie_bytes);
1107 let matrix = ConnectionMatrix::Dense(DenseMatrix::new(5, 5, 100));
1108
1109 let dict_entries = vec![
1110 DictEntry::new("가", 1, 1, 100, "VV,*,F,가,*,*,*,*"),
1111 DictEntry::new("가", 2, 2, 50, "JKS,*,F,가,*,*,*,*"),
1112 DictEntry::new("나", 3, 3, 200, "NP,*,F,나,*,*,*,*"),
1113 ];
1114
1115 let dict = SystemDictionary {
1116 dicdir: PathBuf::from("./test"),
1117 trie,
1118 matrix,
1119 entries: dict_entries,
1120 user_dict: None,
1121 };
1122
1123 let results = dict.get_entries_at(0, "가");
1125 assert_eq!(results.len(), 2);
1126 assert_eq!(results[0].feature, "VV,*,F,가,*,*,*,*");
1127 assert_eq!(results[1].feature, "JKS,*,F,가,*,*,*,*");
1128
1129 use crate::Dictionary;
1131 let entries = dict.lookup("가");
1132 assert_eq!(entries.len(), 2);
1133 }
1134}