#![allow(
clippy::expect_used,
clippy::unwrap_used,
clippy::similar_names,
clippy::useless_vec
)]
mod common;
use common::fixtures::SampleTextGenerator;
use common::{load_fixtures, MorphTestCase};
#[test]
fn test_basic_greetings() {
let test_cases = load_fixtures("sample_texts.json").expect("Failed to load sample texts");
let basic_cases: Vec<&MorphTestCase> = test_cases
.iter()
.filter(|tc| tc.category.as_deref() == Some("basic"))
.collect();
assert!(
!basic_cases.is_empty(),
"Should have basic test cases loaded"
);
println!("Loaded {} basic test cases", basic_cases.len());
}
#[test]
fn test_empty_input() {
let empty_inputs = ["", " ", " ", "\t", "\n", " \t\n "];
println!(
"Empty input test cases prepared: {} cases",
empty_inputs.len()
);
}
#[test]
fn test_single_character_input() {
let single_chars = ["가", "ㄱ", "ㅏ", "1", "a", "A", "!", "?", ".", ",", " "];
println!(
"Single character test cases prepared: {} cases",
single_chars.len()
);
}
#[test]
fn test_common_sentences() {
let sentences = SampleTextGenerator::basic_sentences();
assert!(!sentences.is_empty(), "Should have sample sentences");
println!(
"Common sentence test cases prepared: {} cases",
sentences.len()
);
}
#[test]
fn test_morpheme_boundaries() {
let test_cases = [
("안녕하세요", vec!["안녕", "하", "세요"]),
("감사합니다", vec!["감사", "하", "ㅂ니다"]),
("좋은", vec!["좋", "은"]),
];
println!(
"Morpheme boundary test cases prepared: {} cases",
test_cases.len()
);
}
#[test]
fn test_pos_tagging_accuracy() {
let test_cases = vec![
("사람", vec![("사람", "NNG")]), ("서울", vec![("서울", "NNP")]), ("나", vec![("나", "NP")]), ("가다", vec![("가", "VV"), ("다", "EF")]), ("예쁘다", vec![("예쁘", "VA"), ("다", "EF")]), ];
println!(
"POS tagging test cases prepared: {} cases",
test_cases.len()
);
}
#[test]
fn test_particle_handling() {
let test_cases = [
("나는", vec![("나", "NP"), ("는", "JX")]),
("책을", vec![("책", "NNG"), ("을", "JKO")]),
("학교에서", vec![("학교", "NNG"), ("에서", "JKB")]),
("친구와", vec![("친구", "NNG"), ("와", "JC")]),
];
println!(
"Particle handling test cases prepared: {} cases",
test_cases.len()
);
}
#[test]
fn test_verb_conjugations() {
let test_cases = [
("먹었다", vec![("먹", "VV"), ("었", "EP"), ("다", "EF")]),
(
"갔습니다",
vec![("가", "VV"), ("았", "EP"), ("습니다", "EF")],
),
("하고", vec![("하", "VV"), ("고", "EC")]),
("먹어요", vec![("먹", "VV"), ("어요", "EF")]),
];
println!(
"Verb conjugation test cases prepared: {} cases",
test_cases.len()
);
}
#[test]
fn test_token_positions() {
let input = "안녕하세요 반갑습니다";
println!("Token position test prepared for: '{input}'");
}
#[test]
fn test_tokenization_consistency() {
let test_inputs = ["안녕하세요", "대한민국", "인공지능"];
println!(
"Consistency test cases prepared: {} cases",
test_inputs.len()
);
}
#[cfg(test)]
mod hangul_tests {
#[test]
fn test_hangul_decomposition_integration() {
use mecab_ko_hangul::{compose, decompose, has_jongseong, is_hangul};
let (cho, jung, jong) = decompose('한').expect("Should decompose");
assert_eq!(cho, 'ㅎ');
assert_eq!(jung, 'ㅏ');
assert_eq!(jong, Some('ㄴ'));
let composed = compose('ㅎ', 'ㅏ', Some('ㄴ')).expect("Should compose");
assert_eq!(composed, '한');
assert!(is_hangul('가'));
assert!(is_hangul('힣'));
assert!(!is_hangul('a'));
assert!(!is_hangul('1'));
assert_eq!(has_jongseong('한'), Some(true));
assert_eq!(has_jongseong('하'), Some(false));
assert_eq!(has_jongseong('a'), None);
}
#[test]
fn test_jongseong_detection_comprehensive() {
use mecab_ko_hangul::has_jongseong;
let with_jong = vec!['각', '간', '갈', '감', '갑', '강', '한', '국'];
for ch in with_jong {
assert_eq!(
has_jongseong(ch),
Some(true),
"'{ch}' should have jongseong"
);
}
let without_jong = vec!['가', '나', '다', '라', '마', '바', '사', '아'];
for ch in without_jong {
assert_eq!(
has_jongseong(ch),
Some(false),
"'{ch}' should not have jongseong"
);
}
}
}