Skip to main content

oar_ocr_core/utils/
dict.rs

1//! Dictionary and tokenizer loading utilities.
2//!
3//! This module provides helper functions for loading character dictionaries
4//! and tokenizer files used throughout the OCR pipeline.
5
6use crate::core::OCRError;
7use std::path::Path;
8
9/// Reads a character dictionary file and returns a vector of strings.
10///
11/// Each line in the file becomes one entry in the resulting vector.
12/// Empty lines are preserved.
13///
14/// # Arguments
15///
16/// * `path` - Path to the dictionary file
17///
18/// # Returns
19///
20/// A vector of strings, one per line in the file.
21///
22/// # Errors
23///
24/// Returns an `OCRError::InvalidInput` if the file cannot be read.
25///
26/// # Example
27///
28/// ```rust,no_run
29/// use oar_ocr_core::utils::read_character_dict;
30/// use std::path::Path;
31///
32/// let dict = read_character_dict(Path::new("path/to/dict.txt"))?;
33/// # Ok::<(), oar_ocr_core::core::OCRError>(())
34/// ```
35pub fn read_character_dict(path: &Path) -> Result<Vec<String>, OCRError> {
36    let content = std::fs::read_to_string(path).map_err(|e| OCRError::InvalidInput {
37        message: format!(
38            "Failed to read character dictionary from '{}': {}",
39            path.display(),
40            e
41        ),
42    })?;
43    Ok(content.lines().map(|s| s.to_string()).collect())
44}
45
46/// Reads a character dictionary file and returns the raw content string.
47///
48/// This is useful when you need the raw content before processing.
49///
50/// # Arguments
51///
52/// * `path` - Path to the dictionary file
53///
54/// # Returns
55///
56/// The raw file content as a string.
57///
58/// # Errors
59///
60/// Returns an `OCRError::InvalidInput` if the file cannot be read.
61pub fn read_dict_content(path: &Path) -> Result<String, OCRError> {
62    std::fs::read_to_string(path).map_err(|e| OCRError::InvalidInput {
63        message: format!("Failed to read dictionary from '{}': {}", path.display(), e),
64    })
65}
66
67/// Validates that a required path option is present and returns the path.
68///
69/// This is a helper for builder patterns where a path is required but stored
70/// as an `Option<PathBuf>`.
71///
72/// # Arguments
73///
74/// * `path` - Optional path to validate
75/// * `component` - Component name for error message (e.g., "text_recognition")
76/// * `description` - Human-readable description of what the path is for
77///
78/// # Returns
79///
80/// The path if present.
81///
82/// # Errors
83///
84/// Returns an `OCRError::ConfigError` if the path is None.
85///
86/// # Example
87///
88/// ```rust,no_run
89/// use oar_ocr_core::utils::require_path;
90/// use std::path::PathBuf;
91///
92/// let path: Option<PathBuf> = Some(PathBuf::from("/path/to/dict.txt"));
93/// let validated = require_path(path, "text_recognition", "character dictionary path")?;
94/// # Ok::<(), oar_ocr_core::core::OCRError>(())
95/// ```
96pub fn require_path<P: AsRef<Path> + Clone>(
97    path: Option<P>,
98    component: &str,
99    description: &str,
100) -> Result<P, OCRError> {
101    path.ok_or_else(|| {
102        OCRError::config_error_detailed(
103            component,
104            format!("{} is required for {}", description, component),
105        )
106    })
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112    use std::io::Write;
113    use tempfile::NamedTempFile;
114
115    #[test]
116    fn test_read_character_dict() -> Result<(), Box<dyn std::error::Error>> {
117        let mut file = NamedTempFile::new()?;
118        writeln!(file, "a")?;
119        writeln!(file, "b")?;
120        writeln!(file, "c")?;
121
122        let dict = read_character_dict(file.path())?;
123        assert_eq!(dict, vec!["a", "b", "c"]);
124        Ok(())
125    }
126
127    #[test]
128    fn test_read_dict_content() -> Result<(), Box<dyn std::error::Error>> {
129        let mut file = NamedTempFile::new()?;
130        write!(file, "hello\nworld")?;
131
132        let content = read_dict_content(file.path())?;
133        assert_eq!(content, "hello\nworld");
134        Ok(())
135    }
136
137    #[test]
138    fn test_read_nonexistent_file() {
139        let result = read_character_dict(Path::new("/nonexistent/path/dict.txt"));
140        assert!(result.is_err());
141    }
142
143    #[test]
144    fn test_require_path_some() {
145        let path = Some(std::path::PathBuf::from("/some/path"));
146        let result = require_path(path, "test", "test path");
147        assert!(result.is_ok());
148    }
149
150    #[test]
151    fn test_require_path_none() {
152        let path: Option<std::path::PathBuf> = None;
153        let result = require_path(path, "test_component", "test path");
154        assert!(result.is_err());
155        let err = result.unwrap_err();
156        assert!(err.to_string().contains("test_component"));
157    }
158}