#![allow(
dead_code,
clippy::unnecessary_debug_formatting,
clippy::format_push_string,
clippy::cast_possible_truncation,
clippy::expect_used,
clippy::unwrap_used
)]
use std::sync::OnceLock;
pub mod fixtures;
pub mod mini_dict;
use serde::{Deserialize, Serialize};
static SYSTEM_DICT_AVAILABLE: OnceLock<bool> = OnceLock::new();
#[must_use]
pub fn system_dict_available() -> bool {
*SYSTEM_DICT_AVAILABLE.get_or_init(|| {
let common_paths = [
"/usr/local/lib/mecab/dic/mecab-ko-dic",
"/usr/lib/mecab/dic/mecab-ko-dic",
"/opt/homebrew/lib/mecab/dic/mecab-ko-dic",
];
for path in &common_paths {
let path = std::path::Path::new(path);
if path.join("sys.dic").exists() {
return true;
}
}
if let Ok(dic_dir) = std::env::var("MECAB_DIC_DIR") {
let path = std::path::Path::new(&dic_dir);
if path.join("sys.dic").exists() {
return true;
}
}
false
})
}
#[macro_export]
macro_rules! skip_without_system_dict {
() => {
if !$crate::common::system_dict_available() {
eprintln!("Skipping test: system dictionary not available");
return;
}
};
}
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct MorphTestCase {
pub input: String,
#[serde(default)]
pub expected_morphs: Vec<String>,
#[serde(default)]
pub expected_pos: Vec<(String, String)>,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub category: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TestResult {
pub passed: bool,
pub expected: String,
pub actual: String,
pub diff: Option<String>,
}
#[allow(dead_code)]
pub fn load_fixtures(filename: &str) -> Result<Vec<MorphTestCase>, Box<dyn std::error::Error>> {
let fixtures_path = get_fixtures_path();
let file_path = fixtures_path.join(filename);
let content = std::fs::read_to_string(&file_path)
.map_err(|e| format!("Failed to read fixture file {file_path:?}: {e}"))?;
let cases: Vec<MorphTestCase> = serde_json::from_str(&content)
.map_err(|e| format!("Failed to parse fixture file {file_path:?}: {e}"))?;
Ok(cases)
}
#[allow(dead_code)]
pub fn load_golden_tests(filename: &str) -> Result<Vec<MorphTestCase>, Box<dyn std::error::Error>> {
let golden_path = get_golden_path();
let file_path = golden_path.join(filename);
let content = std::fs::read_to_string(&file_path)
.map_err(|e| format!("Failed to read golden test file {file_path:?}: {e}"))?;
let cases: Vec<MorphTestCase> = serde_json::from_str(&content)
.map_err(|e| format!("Failed to parse golden test file {file_path:?}: {e}"))?;
Ok(cases)
}
pub fn get_fixtures_path() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
}
pub fn get_golden_path() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("golden")
}
pub fn compare_morphs(expected: &[String], actual: &[String]) -> TestResult {
let passed = expected == actual;
let expected_str = format!("{expected:?}");
let actual_str = format!("{actual:?}");
let diff = if passed {
None
} else {
Some(generate_diff(expected, actual))
};
TestResult {
passed,
expected: expected_str,
actual: actual_str,
diff,
}
}
pub fn compare_pos_tags(expected: &[(String, String)], actual: &[(String, String)]) -> TestResult {
let passed = expected == actual;
let expected_str = format!("{expected:?}");
let actual_str = format!("{actual:?}");
let diff = if passed {
None
} else {
Some(generate_pos_diff(expected, actual))
};
TestResult {
passed,
expected: expected_str,
actual: actual_str,
diff,
}
}
fn generate_diff(expected: &[String], actual: &[String]) -> String {
let mut diff = String::new();
diff.push_str("Morpheme differences:\n");
let max_len = expected.len().max(actual.len());
for i in 0..max_len {
let exp = expected
.get(i)
.map_or("<missing>", std::string::String::as_str);
let act = actual
.get(i)
.map_or("<missing>", std::string::String::as_str);
if exp != act {
diff.push_str(&format!(" Position {i}: expected '{exp}', got '{act}'\n"));
}
}
diff
}
fn generate_pos_diff(expected: &[(String, String)], actual: &[(String, String)]) -> String {
let mut diff = String::new();
diff.push_str("POS tag differences:\n");
let max_len = expected.len().max(actual.len());
for i in 0..max_len {
let exp = expected
.get(i)
.map_or_else(|| "<missing>".to_string(), |(m, p)| format!("{m}/{p}"));
let act = actual
.get(i)
.map_or_else(|| "<missing>".to_string(), |(m, p)| format!("{m}/{p}"));
if exp != act {
diff.push_str(&format!(" Position {i}: expected '{exp}', got '{act}'\n"));
}
}
diff
}
#[macro_export]
macro_rules! assert_test_result {
($result:expr, $test_case:expr) => {
if !$result.passed {
let mut msg = format!("Test failed for input: '{}'", $test_case.input);
if let Some(desc) = &$test_case.description {
msg.push_str(&format!("\nDescription: {desc}"));
}
msg.push_str(&format!("\nExpected: {}", $result.expected));
msg.push_str(&format!("\nActual: {}", $result.actual));
if let Some(diff) = &$result.diff {
msg.push_str(&format!("\n{diff}"));
}
panic!("{msg}");
}
};
}
#[allow(dead_code)]
pub fn create_test_dict() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")
.join("fixtures")
.join("test_dict")
}
pub mod perf {
use std::time::{Duration, Instant};
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct PerfResult {
pub name: String,
pub duration: Duration,
pub iterations: usize,
pub avg_per_iter: Duration,
}
impl PerfResult {
#[allow(dead_code)]
pub fn format(&self) -> String {
format!(
"{}: {:.2}ms total, {:.2}μs per iteration ({} iterations)",
self.name,
self.duration.as_secs_f64() * 1000.0,
self.avg_per_iter.as_secs_f64() * 1_000_000.0,
self.iterations
)
}
}
#[allow(dead_code)]
pub fn measure<F>(name: &str, iterations: usize, mut f: F) -> PerfResult
where
F: FnMut(),
{
let start = Instant::now();
for _ in 0..iterations {
f();
}
let duration = start.elapsed();
PerfResult {
name: name.to_string(),
duration,
iterations,
avg_per_iter: duration / iterations as u32,
}
}
#[allow(dead_code)]
pub fn assert_performance(result: &PerfResult, max_avg_micros: f64) {
let actual_micros = result.avg_per_iter.as_secs_f64() * 1_000_000.0;
assert!(
actual_micros <= max_avg_micros,
"Performance regression detected for '{}': expected ≤{:.2}μs, got {:.2}μs",
result.name,
max_avg_micros,
actual_micros
);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compare_morphs_equal() {
let expected = vec!["안녕".to_string(), "하".to_string(), "세요".to_string()];
let actual = vec!["안녕".to_string(), "하".to_string(), "세요".to_string()];
let result = compare_morphs(&expected, &actual);
assert!(result.passed);
assert!(result.diff.is_none());
}
#[test]
fn test_compare_morphs_different() {
let expected = vec!["안녕".to_string(), "하".to_string(), "세요".to_string()];
let actual = vec!["안녕".to_string(), "하세요".to_string()];
let result = compare_morphs(&expected, &actual);
assert!(!result.passed);
assert!(result.diff.is_some());
}
#[test]
fn test_compare_pos_tags_equal() {
let expected = vec![
("안녕".to_string(), "NNG".to_string()),
("하".to_string(), "XSV".to_string()),
];
let actual = vec![
("안녕".to_string(), "NNG".to_string()),
("하".to_string(), "XSV".to_string()),
];
let result = compare_pos_tags(&expected, &actual);
assert!(result.passed);
}
#[test]
fn test_fixtures_path() {
let path = get_fixtures_path();
assert!(path.ends_with("tests/fixtures"));
}
#[test]
fn test_golden_path() {
let path = get_golden_path();
assert!(path.to_string_lossy().contains("tests/golden"));
}
}