1use std::io::{BufRead, BufReader, Read as _, Write as _};
31use std::path::{Path, PathBuf};
32use std::sync::Arc;
33
34use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
35
36use crate::error::{DictError, Result};
37use crate::matrix::{ConnectionMatrix, Matrix};
38use crate::trie::Trie;
39use crate::user_dict::UserDictionary;
40use crate::{Dictionary, Entry};
41
42const DEFAULT_DICDIR_PATHS: &[&str] = &[
44 "/usr/local/lib/mecab/dic/mecab-ko-dic",
45 "/usr/lib/mecab/dic/mecab-ko-dic",
46 "/opt/mecab/dic/mecab-ko-dic",
47 "./dic/mecab-ko-dic",
48];
49
50const TRIE_FILE: &str = "sys.dic";
52const MATRIX_FILE: &str = "matrix.bin";
53const ENTRIES_BIN_FILE: &str = "entries.bin";
54const ENTRIES_CSV_FILE: &str = "entries.csv";
55
56const ENTRIES_MAGIC: &[u8; 4] = b"MKED";
58const ENTRIES_VERSION: u32 = 1;
60
61pub struct SystemDictionary {
66 dicdir: PathBuf,
68 trie: Trie<'static>,
70 matrix: ConnectionMatrix,
72 entries: Vec<DictEntry>,
74 user_dict: Option<Arc<UserDictionary>>,
76}
77
78#[derive(Debug, Clone, PartialEq, Eq)]
82pub struct DictEntry {
83 pub surface: String,
85 pub left_id: u16,
87 pub right_id: u16,
89 pub cost: i16,
91 pub feature: String,
93}
94
95impl DictEntry {
96 pub fn new(
98 surface: impl Into<String>,
99 left_id: u16,
100 right_id: u16,
101 cost: i16,
102 feature: impl Into<String>,
103 ) -> Self {
104 Self {
105 surface: surface.into(),
106 left_id,
107 right_id,
108 cost,
109 feature: feature.into(),
110 }
111 }
112
113 #[must_use]
115 pub fn to_entry(&self) -> Entry {
116 Entry {
117 surface: self.surface.clone(),
118 left_id: self.left_id,
119 right_id: self.right_id,
120 cost: self.cost,
121 feature: self.feature.clone(),
122 }
123 }
124}
125
126impl From<Entry> for DictEntry {
127 fn from(entry: Entry) -> Self {
128 Self {
129 surface: entry.surface,
130 left_id: entry.left_id,
131 right_id: entry.right_id,
132 cost: entry.cost,
133 feature: entry.feature,
134 }
135 }
136}
137
138#[derive(Debug, Clone, Copy, Default)]
140pub struct LoadOptions {
141 pub use_mmap_matrix: bool,
143 pub use_lazy_entries: bool,
145 pub lazy_cache_size: Option<usize>,
147}
148
149impl LoadOptions {
150 #[must_use]
152 pub const fn memory_optimized() -> Self {
153 Self {
154 use_mmap_matrix: true,
155 use_lazy_entries: true,
156 lazy_cache_size: Some(10000),
157 }
158 }
159
160 #[must_use]
162 pub const fn speed_optimized() -> Self {
163 Self {
164 use_mmap_matrix: false,
165 use_lazy_entries: false,
166 lazy_cache_size: None,
167 }
168 }
169}
170
171impl SystemDictionary {
172 pub fn load_default() -> Result<Self> {
183 let dicdir = DictionaryLoader::find_dicdir()?;
184 Self::load(dicdir)
185 }
186
187 pub fn load_memory_optimized() -> Result<Self> {
196 let dicdir = DictionaryLoader::find_dicdir()?;
197 Self::load_with_options(dicdir, LoadOptions::memory_optimized())
198 }
199
200 pub fn load_with_options<P: AsRef<Path>>(dicdir: P, options: LoadOptions) -> Result<Self> {
207 let dicdir = dicdir.as_ref().to_path_buf();
208
209 let trie_path = dicdir.join(TRIE_FILE);
211 let trie = if trie_path.exists() {
212 Trie::from_file(&trie_path)?
213 } else {
214 let compressed_path = dicdir.join(format!("{TRIE_FILE}.zst"));
216 if compressed_path.exists() {
217 Trie::from_compressed_file(&compressed_path)?
218 } else {
219 return Err(DictError::Format(format!(
220 "Trie file not found: {}",
221 trie_path.display()
222 )));
223 }
224 };
225
226 let matrix_path = dicdir.join(MATRIX_FILE);
228 let matrix = if matrix_path.exists() {
229 if options.use_mmap_matrix {
230 ConnectionMatrix::from_mmap_file(&matrix_path)?
231 } else {
232 ConnectionMatrix::from_bin_file(&matrix_path)?
233 }
234 } else {
235 let compressed_path = dicdir.join(format!("{MATRIX_FILE}.zst"));
237 if compressed_path.exists() {
238 ConnectionMatrix::from_compressed_file(&compressed_path)?
239 } else {
240 let def_path = dicdir.join("matrix.def");
242 if def_path.exists() {
243 ConnectionMatrix::from_def_file(&def_path)?
244 } else {
245 return Err(DictError::Format(format!(
246 "Matrix file not found: {}",
247 matrix_path.display()
248 )));
249 }
250 }
251 };
252
253 let entries = Self::load_entries(&dicdir)?;
255
256 Ok(Self {
257 dicdir,
258 trie,
259 matrix,
260 entries,
261 user_dict: None,
262 })
263 }
264
265 pub fn load<P: AsRef<Path>>(dicdir: P) -> Result<Self> {
276 let dicdir = dicdir.as_ref().to_path_buf();
277
278 let trie_path = dicdir.join(TRIE_FILE);
280 let trie = if trie_path.exists() {
281 Trie::from_file(&trie_path)?
282 } else {
283 let compressed_path = dicdir.join(format!("{TRIE_FILE}.zst"));
285 if compressed_path.exists() {
286 Trie::from_compressed_file(&compressed_path)?
287 } else {
288 return Err(DictError::Format(format!(
289 "Trie file not found: {}",
290 trie_path.display()
291 )));
292 }
293 };
294
295 let matrix_path = dicdir.join(MATRIX_FILE);
297 let matrix = if matrix_path.exists() {
298 ConnectionMatrix::from_bin_file(&matrix_path)?
299 } else {
300 let compressed_path = dicdir.join(format!("{MATRIX_FILE}.zst"));
302 if compressed_path.exists() {
303 ConnectionMatrix::from_compressed_file(&compressed_path)?
304 } else {
305 let def_path = dicdir.join("matrix.def");
307 if def_path.exists() {
308 ConnectionMatrix::from_def_file(&def_path)?
309 } else {
310 return Err(DictError::Format(format!(
311 "Matrix file not found: {}",
312 matrix_path.display()
313 )));
314 }
315 }
316 };
317
318 let entries = Self::load_entries(&dicdir)?;
320
321 Ok(Self {
322 dicdir,
323 trie,
324 matrix,
325 entries,
326 user_dict: None,
327 })
328 }
329
330 fn load_entries(dicdir: &Path) -> Result<Vec<DictEntry>> {
336 let bin_path = dicdir.join(ENTRIES_BIN_FILE);
338 if bin_path.exists() {
339 return Self::load_entries_bin(&bin_path);
340 }
341
342 let csv_path = dicdir.join(ENTRIES_CSV_FILE);
344 if csv_path.exists() {
345 return Self::load_entries_csv(&csv_path);
346 }
347
348 Ok(Vec::new())
350 }
351
352 fn load_entries_csv(path: &Path) -> Result<Vec<DictEntry>> {
356 let file = std::fs::File::open(path).map_err(DictError::Io)?;
357 let reader = BufReader::new(file);
358 let mut entries = Vec::new();
359
360 for (line_num, line_result) in reader.lines().enumerate() {
361 let line = line_result.map_err(DictError::Io)?;
362 let line = line.trim();
363 if line.is_empty() || line.starts_with('#') {
364 continue;
365 }
366
367 let mut fields = line.splitn(5, ',');
369 let surface = fields
370 .next()
371 .ok_or_else(|| {
372 DictError::Format(format!("line {}: missing surface", line_num + 1))
373 })?
374 .to_string();
375 let left_id: u16 = fields
376 .next()
377 .ok_or_else(|| {
378 DictError::Format(format!("line {}: missing left_id", line_num + 1))
379 })?
380 .parse()
381 .map_err(|_| {
382 DictError::Format(format!("line {}: invalid left_id", line_num + 1))
383 })?;
384 let right_id: u16 = fields
385 .next()
386 .ok_or_else(|| {
387 DictError::Format(format!("line {}: missing right_id", line_num + 1))
388 })?
389 .parse()
390 .map_err(|_| {
391 DictError::Format(format!("line {}: invalid right_id", line_num + 1))
392 })?;
393 let cost: i16 = fields
394 .next()
395 .ok_or_else(|| DictError::Format(format!("line {}: missing cost", line_num + 1)))?
396 .parse()
397 .map_err(|_| DictError::Format(format!("line {}: invalid cost", line_num + 1)))?;
398 let feature = fields.next().unwrap_or("").to_string();
399
400 entries.push(DictEntry {
401 surface,
402 left_id,
403 right_id,
404 cost,
405 feature,
406 });
407 }
408
409 Ok(entries)
410 }
411
412 fn load_entries_bin(path: &Path) -> Result<Vec<DictEntry>> {
416 let data = std::fs::read(path).map_err(DictError::Io)?;
417 let mut cursor = std::io::Cursor::new(&data);
418
419 let mut magic = [0u8; 4];
421 cursor
422 .read_exact(&mut magic)
423 .map_err(|e| DictError::Format(format!("entries.bin magic: {e}")))?;
424 if &magic != ENTRIES_MAGIC {
425 return Err(DictError::Format(
426 "entries.bin: invalid magic number".into(),
427 ));
428 }
429
430 let version = cursor
432 .read_u32::<LittleEndian>()
433 .map_err(|e| DictError::Format(format!("entries.bin version: {e}")))?;
434 if version != ENTRIES_VERSION {
435 return Err(DictError::Format(format!(
436 "entries.bin: unsupported version {version}"
437 )));
438 }
439
440 let count = cursor
442 .read_u32::<LittleEndian>()
443 .map_err(|e| DictError::Format(format!("entries.bin count: {e}")))?;
444
445 let mut entries = Vec::with_capacity(count as usize);
446 for i in 0..count {
447 let left_id = cursor
448 .read_u16::<LittleEndian>()
449 .map_err(|e| DictError::Format(format!("entries.bin entry {i} left_id: {e}")))?;
450 let right_id = cursor
451 .read_u16::<LittleEndian>()
452 .map_err(|e| DictError::Format(format!("entries.bin entry {i} right_id: {e}")))?;
453 let cost = cursor
454 .read_i16::<LittleEndian>()
455 .map_err(|e| DictError::Format(format!("entries.bin entry {i} cost: {e}")))?;
456 let surface_len = cursor
457 .read_u16::<LittleEndian>()
458 .map_err(|e| DictError::Format(format!("entries.bin entry {i} surface_len: {e}")))?
459 as usize;
460 let feature_len = cursor
461 .read_u16::<LittleEndian>()
462 .map_err(|e| DictError::Format(format!("entries.bin entry {i} feature_len: {e}")))?
463 as usize;
464
465 let mut surface_bytes = vec![0u8; surface_len];
466 cursor
467 .read_exact(&mut surface_bytes)
468 .map_err(|e| DictError::Format(format!("entries.bin entry {i} surface: {e}")))?;
469 let surface = String::from_utf8(surface_bytes).map_err(|e| {
470 DictError::Format(format!("entries.bin entry {i} surface utf8: {e}"))
471 })?;
472
473 let mut feature_bytes = vec![0u8; feature_len];
474 cursor
475 .read_exact(&mut feature_bytes)
476 .map_err(|e| DictError::Format(format!("entries.bin entry {i} feature: {e}")))?;
477 let feature = String::from_utf8(feature_bytes).map_err(|e| {
478 DictError::Format(format!("entries.bin entry {i} feature utf8: {e}"))
479 })?;
480
481 entries.push(DictEntry {
482 surface,
483 left_id,
484 right_id,
485 cost,
486 feature,
487 });
488 }
489
490 Ok(entries)
491 }
492
493 pub fn save_entries_bin(entries: &[DictEntry], path: &Path) -> Result<()> {
499 let mut file = std::fs::File::create(path).map_err(DictError::Io)?;
500
501 file.write_all(ENTRIES_MAGIC).map_err(DictError::Io)?;
502 file.write_u32::<LittleEndian>(ENTRIES_VERSION)
503 .map_err(DictError::Io)?;
504
505 let count = u32::try_from(entries.len())
506 .map_err(|_| DictError::Format("too many entries".into()))?;
507 file.write_u32::<LittleEndian>(count)
508 .map_err(DictError::Io)?;
509
510 for entry in entries {
511 file.write_u16::<LittleEndian>(entry.left_id)
512 .map_err(DictError::Io)?;
513 file.write_u16::<LittleEndian>(entry.right_id)
514 .map_err(DictError::Io)?;
515 file.write_i16::<LittleEndian>(entry.cost)
516 .map_err(DictError::Io)?;
517
518 let surface_bytes = entry.surface.as_bytes();
519 let surface_len = u16::try_from(surface_bytes.len())
520 .map_err(|_| DictError::Format("surface too long".into()))?;
521 file.write_u16::<LittleEndian>(surface_len)
522 .map_err(DictError::Io)?;
523
524 let feature_bytes = entry.feature.as_bytes();
525 let feature_len = u16::try_from(feature_bytes.len())
526 .map_err(|_| DictError::Format("feature too long".into()))?;
527 file.write_u16::<LittleEndian>(feature_len)
528 .map_err(DictError::Io)?;
529
530 file.write_all(surface_bytes).map_err(DictError::Io)?;
531 file.write_all(feature_bytes).map_err(DictError::Io)?;
532 }
533
534 Ok(())
535 }
536
537 pub fn save_entries_csv(entries: &[DictEntry], path: &Path) -> Result<()> {
543 let mut file = std::fs::File::create(path).map_err(DictError::Io)?;
544
545 for entry in entries {
546 writeln!(
547 file,
548 "{},{},{},{},{}",
549 entry.surface, entry.left_id, entry.right_id, entry.cost, entry.feature
550 )
551 .map_err(DictError::Io)?;
552 }
553
554 Ok(())
555 }
556
557 fn get_entries_at(&self, first_index: u32, surface: &str) -> Vec<&DictEntry> {
562 let start = first_index as usize;
563 let mut results = Vec::new();
564 for entry in self.entries.get(start..).unwrap_or(&[]) {
565 if entry.surface == surface {
566 results.push(entry);
567 } else {
568 break;
569 }
570 }
571 results
572 }
573
574 #[must_use]
580 pub fn with_user_dictionary(mut self, user_dict: UserDictionary) -> Self {
581 self.user_dict = Some(Arc::new(user_dict));
582 self
583 }
584
585 pub fn set_user_dictionary(&mut self, user_dict: UserDictionary) {
587 self.user_dict = Some(Arc::new(user_dict));
588 }
589
590 #[must_use]
592 pub fn dicdir(&self) -> &Path {
593 &self.dicdir
594 }
595
596 #[must_use]
598 pub const fn trie(&self) -> &Trie<'static> {
599 &self.trie
600 }
601
602 #[must_use]
604 pub const fn matrix(&self) -> &ConnectionMatrix {
605 &self.matrix
606 }
607
608 #[must_use]
610 pub fn entries(&self) -> &[DictEntry] {
611 &self.entries
612 }
613
614 #[must_use]
616 pub fn user_dictionary(&self) -> Option<&UserDictionary> {
617 self.user_dict.as_deref()
618 }
619
620 #[must_use]
626 pub fn get_entry(&self, index: u32) -> Option<&DictEntry> {
627 self.entries.get(index as usize)
628 }
629
630 #[must_use]
643 pub fn common_prefix_search(&self, text: &str) -> Vec<(&DictEntry, usize)> {
644 let mut results = Vec::new();
645 for (index, byte_len) in self.trie.common_prefix_search(text) {
646 let surface = &text[..byte_len];
647 let entries = self.get_entries_at(index, surface);
648 for entry in entries {
649 results.push((entry, byte_len));
650 }
651 }
652 results
653 }
654
655 #[must_use]
662 pub fn common_prefix_search_at(
663 &self,
664 text: &str,
665 start_byte: usize,
666 ) -> Vec<(&DictEntry, usize)> {
667 let mut results = Vec::new();
668 for (index, end_byte) in self.trie.common_prefix_search_at(text, start_byte) {
669 let byte_len = end_byte - start_byte;
670 let surface = &text[start_byte..end_byte];
671 let entries = self.get_entries_at(index, surface);
672 for entry in entries {
673 results.push((entry, byte_len));
674 }
675 }
676 results
677 }
678
679 #[must_use]
685 pub fn lookup_combined(&self, surface: &str) -> Vec<Entry> {
686 let mut results = self.lookup(surface);
687
688 if let Some(user_dict) = &self.user_dict {
690 let user_entries = user_dict.lookup(surface);
691 results.extend(user_entries.iter().map(|e| e.to_entry()));
692 }
693
694 results
695 }
696
697 #[cfg(test)]
701 pub fn add_entry(&mut self, entry: DictEntry) {
702 self.entries.push(entry);
703 }
704
705 #[doc(hidden)]
707 #[must_use]
708 pub const fn new_test(
709 dicdir: PathBuf,
710 trie: Trie<'static>,
711 matrix: ConnectionMatrix,
712 entries: Vec<DictEntry>,
713 ) -> Self {
714 Self {
715 dicdir,
716 trie,
717 matrix,
718 entries,
719 user_dict: None,
720 }
721 }
722}
723
724impl Dictionary for SystemDictionary {
725 fn lookup(&self, surface: &str) -> Vec<Entry> {
726 if let Some(index) = self.trie.exact_match(surface) {
728 let entries = self.get_entries_at(index, surface);
729 if !entries.is_empty() {
730 return entries.iter().map(|e| e.to_entry()).collect();
731 }
732 }
733
734 Vec::new()
735 }
736
737 fn get_connection_cost(&self, left_id: u16, right_id: u16) -> i16 {
738 i16::try_from(self.matrix.get(right_id, left_id)).unwrap_or(i16::MAX)
739 }
740}
741
742pub struct DictionaryLoader;
746
747impl DictionaryLoader {
748 pub fn find_dicdir() -> Result<PathBuf> {
758 if let Ok(dicdir) = std::env::var("MECAB_DICDIR") {
760 let path = PathBuf::from(dicdir);
761 if path.is_dir() {
762 return Ok(path);
763 }
764 }
765
766 for &path_str in DEFAULT_DICDIR_PATHS {
768 let path = PathBuf::from(path_str);
769 if path.is_dir() {
770 return Ok(path);
771 }
772 }
773
774 {
776 let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
777 let test_dict = manifest_dir.join("../../test-fixtures/mini-dict");
778 if test_dict.is_dir() {
779 return Ok(test_dict);
780 }
781 }
782
783 Err(DictError::Format(
784 "Dictionary directory not found. Set MECAB_DICDIR environment variable or install mecab-ko-dic to default location".to_string(),
785 ))
786 }
787
788 pub fn load_system<P: AsRef<Path>>(dicdir: P) -> Result<SystemDictionary> {
794 SystemDictionary::load(dicdir)
795 }
796
797 pub fn load_default() -> Result<SystemDictionary> {
803 SystemDictionary::load_default()
804 }
805
806 pub fn validate_dicdir<P: AsRef<Path>>(dicdir: P) -> Result<()> {
816 let dicdir = dicdir.as_ref();
817
818 if !dicdir.is_dir() {
819 return Err(DictError::Format(format!(
820 "Dictionary directory does not exist: {}",
821 dicdir.display()
822 )));
823 }
824
825 let has_trie =
827 dicdir.join(TRIE_FILE).exists() || dicdir.join(format!("{TRIE_FILE}.zst")).exists();
828
829 let has_matrix = dicdir.join(MATRIX_FILE).exists() || dicdir.join("matrix.def").exists();
830
831 if !has_trie {
832 return Err(DictError::Format(format!(
833 "Trie file not found in {}",
834 dicdir.display()
835 )));
836 }
837
838 if !has_matrix {
839 return Err(DictError::Format(format!(
840 "Matrix file not found in {}",
841 dicdir.display()
842 )));
843 }
844
845 Ok(())
846 }
847}
848
849#[cfg(test)]
850#[allow(
851 clippy::expect_used,
852 clippy::unwrap_used,
853 clippy::items_after_statements
854)]
855mod tests {
856 use super::*;
857 use crate::matrix::DenseMatrix;
858 use crate::trie::TrieBuilder;
859
860 fn create_test_dictionary() -> SystemDictionary {
861 let entries = vec![
863 ("가", 0u32),
864 ("가다", 1),
865 ("가방", 2),
866 ("나", 3),
867 ("나다", 4),
868 ];
869 let trie_bytes = TrieBuilder::build(&entries).expect("should build trie");
870 let trie = Trie::from_vec(trie_bytes);
871
872 let matrix = DenseMatrix::new(10, 10, 100);
874 let matrix = ConnectionMatrix::Dense(matrix);
875
876 let dict_entries = vec![
878 DictEntry::new("가", 1, 1, 100, "NNG,*,T,가,*,*,*,*"),
879 DictEntry::new("가다", 2, 2, 200, "VV,*,F,가다,*,*,*,*"),
880 DictEntry::new("가방", 3, 3, 300, "NNG,*,T,가방,*,*,*,*"),
881 DictEntry::new("나", 4, 4, 400, "NP,*,F,나,*,*,*,*"),
882 DictEntry::new("나다", 5, 5, 500, "VV,*,F,나다,*,*,*,*"),
883 ];
884
885 SystemDictionary {
886 dicdir: PathBuf::from("./test_dic"),
887 trie,
888 matrix,
889 entries: dict_entries,
890 user_dict: None,
891 }
892 }
893
894 #[test]
895 fn test_dict_entry_creation() {
896 let entry = DictEntry::new("안녕", 1, 1, 100, "NNG,*,T,안녕,*,*,*,*");
897 assert_eq!(entry.surface, "안녕");
898 assert_eq!(entry.left_id, 1);
899 assert_eq!(entry.right_id, 1);
900 assert_eq!(entry.cost, 100);
901 }
902
903 #[test]
904 fn test_dict_entry_to_entry() {
905 let dict_entry = DictEntry::new("테스트", 5, 5, 200, "NNG,*,T,테스트,*,*,*,*");
906 let entry = dict_entry.to_entry();
907
908 assert_eq!(entry.surface, "테스트");
909 assert_eq!(entry.left_id, 5);
910 assert_eq!(entry.cost, 200);
911 }
912
913 #[test]
914 fn test_system_dictionary_lookup() {
915 let dict = create_test_dictionary();
916
917 let entries = dict.lookup("가");
918 assert_eq!(entries.len(), 1);
919 assert_eq!(entries[0].surface, "가");
920
921 let entries = dict.lookup("가다");
922 assert_eq!(entries.len(), 1);
923 assert_eq!(entries[0].surface, "가다");
924
925 let entries = dict.lookup("없음");
926 assert!(entries.is_empty());
927 }
928
929 #[test]
930 fn test_system_dictionary_get_connection_cost() {
931 let dict = create_test_dictionary();
932 let cost = dict.get_connection_cost(1, 2);
933 assert_eq!(cost, 100); }
935
936 #[test]
937 fn test_common_prefix_search() {
938 let dict = create_test_dictionary();
939
940 let results = dict.common_prefix_search("가방에");
942 assert_eq!(results.len(), 2);
943
944 let surfaces: Vec<_> = results.iter().map(|(e, _)| e.surface.as_str()).collect();
945 assert!(surfaces.contains(&"가"));
946 assert!(surfaces.contains(&"가방"));
947 }
948
949 #[test]
950 fn test_common_prefix_search_at() {
951 let dict = create_test_dictionary();
952
953 let text = "나가다";
954 let start = "나".len(); let results = dict.common_prefix_search_at(text, start);
957 assert_eq!(results.len(), 2); let surfaces: Vec<_> = results.iter().map(|(e, _)| e.surface.as_str()).collect();
960 assert!(surfaces.contains(&"가"));
961 assert!(surfaces.contains(&"가다"));
962 }
963
964 #[test]
965 fn test_with_user_dictionary() {
966 let mut dict = create_test_dictionary();
967
968 let mut user_dict = UserDictionary::new();
969 user_dict.add_entry("딥러닝", "NNG", Some(-1000), None);
970 user_dict.add_entry("머신러닝", "NNG", Some(-1000), None);
971
972 dict.set_user_dictionary(user_dict);
973
974 let entries = dict.lookup_combined("딥러닝");
975 assert_eq!(entries.len(), 1);
976 assert_eq!(entries[0].surface, "딥러닝");
977 }
978
979 #[test]
980 fn test_lookup_combined_system_and_user() {
981 let mut dict = create_test_dictionary();
982
983 let mut user_dict = UserDictionary::new();
984 user_dict.add_entry("가", "JKS", Some(-500), None); dict.set_user_dictionary(user_dict);
987
988 let entries = dict.lookup_combined("가");
989 assert_eq!(entries.len(), 2);
991 }
992
993 #[test]
994 fn test_get_entry() {
995 let dict = create_test_dictionary();
996
997 let entry = dict.get_entry(0);
998 assert!(entry.is_some());
999 assert_eq!(entry.unwrap().surface, "가");
1000
1001 let entry = dict.get_entry(100);
1002 assert!(entry.is_none());
1003 }
1004
1005 #[test]
1006 fn test_dicdir() {
1007 let dict = create_test_dictionary();
1008 assert_eq!(dict.dicdir(), Path::new("./test_dic"));
1009 }
1010
1011 #[test]
1012 fn test_trie_reference() {
1013 let dict = create_test_dictionary();
1014 let trie = dict.trie();
1015 assert!(trie.exact_match("가").is_some());
1016 }
1017
1018 #[test]
1019 fn test_matrix_reference() {
1020 let dict = create_test_dictionary();
1021 let matrix = dict.matrix();
1022 assert_eq!(matrix.left_size(), 10);
1023 assert_eq!(matrix.right_size(), 10);
1024 }
1025
1026 #[test]
1027 fn test_entries_reference() {
1028 let dict = create_test_dictionary();
1029 let entries = dict.entries();
1030 assert_eq!(entries.len(), 5);
1031 }
1032
1033 #[test]
1034 fn test_dictionary_loader_find_dicdir() {
1035 let result = DictionaryLoader::find_dicdir();
1038
1039 match result {
1042 Ok(path) => {
1043 assert!(path.is_dir());
1044 }
1045 Err(e) => {
1046 assert!(e.to_string().contains("Dictionary directory not found"));
1048 }
1049 }
1050 }
1051
1052 #[test]
1053 fn test_dict_entry_from_entry() {
1054 let entry = Entry {
1055 surface: "테스트".to_string(),
1056 left_id: 10,
1057 right_id: 20,
1058 cost: 300,
1059 feature: "NNG,*,T,테스트,*,*,*,*".to_string(),
1060 };
1061
1062 let dict_entry: DictEntry = entry.into();
1063 assert_eq!(dict_entry.surface, "테스트");
1064 assert_eq!(dict_entry.left_id, 10);
1065 assert_eq!(dict_entry.right_id, 20);
1066 assert_eq!(dict_entry.cost, 300);
1067 }
1068
1069 #[test]
1070 fn test_entries_bin_roundtrip() {
1071 let entries = vec![
1072 DictEntry::new("안녕", 1, 1, 100, "NNG,*,T,안녕,*,*,*,*"),
1073 DictEntry::new("하세요", 2, 2, 50, "VV,*,F,하세요,*,*,*,*"),
1074 DictEntry::new("감사", 3, 3, 80, "NNG,*,F,감사,*,*,*,*"),
1075 ];
1076
1077 let temp = tempfile::NamedTempFile::new().expect("create temp file");
1078 let path = temp.path();
1079
1080 SystemDictionary::save_entries_bin(&entries, path).expect("save should work");
1081 let loaded = SystemDictionary::load_entries_bin(path).expect("load should work");
1082
1083 assert_eq!(loaded.len(), 3);
1084 assert_eq!(loaded[0].surface, "안녕");
1085 assert_eq!(loaded[0].left_id, 1);
1086 assert_eq!(loaded[0].cost, 100);
1087 assert_eq!(loaded[0].feature, "NNG,*,T,안녕,*,*,*,*");
1088 assert_eq!(loaded[1].surface, "하세요");
1089 assert_eq!(loaded[2].surface, "감사");
1090 }
1091
1092 #[test]
1093 fn test_entries_csv_roundtrip() {
1094 let entries = vec![
1095 DictEntry::new("형태소", 10, 20, 150, "NNG,*,F,형태소,*,*,*,*"),
1096 DictEntry::new("분석", 11, 21, 200, "NNG,*,T,분석,*,*,*,*"),
1097 ];
1098
1099 let temp = tempfile::NamedTempFile::new().expect("create temp file");
1100 let path = temp.path();
1101
1102 SystemDictionary::save_entries_csv(&entries, path).expect("save should work");
1103 let loaded = SystemDictionary::load_entries_csv(path).expect("load should work");
1104
1105 assert_eq!(loaded.len(), 2);
1106 assert_eq!(loaded[0].surface, "형태소");
1107 assert_eq!(loaded[0].left_id, 10);
1108 assert_eq!(loaded[0].right_id, 20);
1109 assert_eq!(loaded[0].cost, 150);
1110 assert_eq!(loaded[1].surface, "분석");
1111 }
1112
1113 #[test]
1114 fn test_get_entries_at_multi() {
1115 let trie_input = vec![("가", 0u32), ("나", 2u32)];
1117 let trie_bytes = TrieBuilder::build(&trie_input).expect("build trie");
1118 let trie = Trie::from_vec(trie_bytes);
1119 let matrix = ConnectionMatrix::Dense(DenseMatrix::new(5, 5, 100));
1120
1121 let dict_entries = vec![
1122 DictEntry::new("가", 1, 1, 100, "VV,*,F,가,*,*,*,*"),
1123 DictEntry::new("가", 2, 2, 50, "JKS,*,F,가,*,*,*,*"),
1124 DictEntry::new("나", 3, 3, 200, "NP,*,F,나,*,*,*,*"),
1125 ];
1126
1127 let dict = SystemDictionary {
1128 dicdir: PathBuf::from("./test"),
1129 trie,
1130 matrix,
1131 entries: dict_entries,
1132 user_dict: None,
1133 };
1134
1135 let results = dict.get_entries_at(0, "가");
1137 assert_eq!(results.len(), 2);
1138 assert_eq!(results[0].feature, "VV,*,F,가,*,*,*,*");
1139 assert_eq!(results[1].feature, "JKS,*,F,가,*,*,*,*");
1140
1141 use crate::Dictionary;
1143 let entries = dict.lookup("가");
1144 assert_eq!(entries.len(), 2);
1145 }
1146}