use super::apply_context_corrections;
use super::compound_and_irregular::apply_compound_and_irregular_corrections;
use super::conjugation::apply_conjugation_corrections;
use super::sentence_final::apply_sentence_final_corrections;
use super::sentence_final_endings::apply_sentence_final_endings_corrections;
use super::suffix_and_dependency::apply_suffix_and_dependency_corrections;
use super::verb_and_morpheme::apply_verb_and_morpheme_corrections;
use super::verb_splitting::apply_verb_splitting_corrections;
use super::xsv_and_ec_ef::apply_xsv_and_ec_ef_corrections;
use super::xsv_morpheme_split::apply_xsv_morpheme_split_corrections;
use crate::sejong::types::SejongToken;
fn tok(surface: &str, pos: &str) -> SejongToken {
let end = surface.chars().count();
SejongToken::new(surface, pos, 0, end)
}
fn tok_at(surface: &str, pos: &str, start: usize, end: usize) -> SejongToken {
SejongToken::new(surface, pos, start, end)
}
#[test]
fn test_correction_185_ha_xsv_to_vv_at_start() {
let mut tokens = vec![tok("하", "XSV"), tok("니까", "EC")];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[0].pos, "VV", "하/XSV at position 0 should become VV");
}
#[test]
fn test_correction_185_ha_xsv_not_changed_if_not_first() {
let mut tokens = vec![tok("먹", "VV"), tok("하", "XSV")];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens[1].pos, "XSV",
"하/XSV not at position 0 must stay XSV"
);
}
#[test]
fn test_correction_188_geurae_vv_normalized_to_geuro() {
let mut tokens = vec![tok("그래", "VV")];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[0].surface, "그러");
assert_eq!(tokens[0].pos, "VV");
}
#[test]
fn test_correction_193_etn_jamo_normalization() {
let mut tokens = vec![tok("\u{1106}", "ETN")];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[0].surface, "ㅁ");
}
#[test]
fn test_correction_194_tara_merge_to_tararso() {
let mut tokens = vec![
tok_at("따라", "NNB", 0, 2),
tok_at("서", "VV", 2, 3),
tok_at("어", "EC", 3, 4),
];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].surface, "따라서");
assert_eq!(tokens[0].pos, "MAG");
}
#[test]
fn test_correction_196_xpn_compound_split() {
let mut tokens = vec![tok_at("맨손", "NNG", 0, 2)];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0].surface, "맨");
assert_eq!(tokens[0].pos, "XPN");
assert_eq!(tokens[1].surface, "손");
assert_eq!(tokens[1].pos, "NNG");
}
#[test]
fn test_correction_255_eo_yo_merge() {
let mut tokens = vec![tok_at("어", "EF", 0, 1), tok_at("요", "JX", 1, 2)];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].surface, "어요");
assert_eq!(tokens[0].pos, "EF");
}
#[test]
fn test_protection_empty_input_unchanged() {
let mut tokens: Vec<SejongToken> = vec![];
apply_context_corrections(&mut tokens);
assert!(tokens.is_empty(), "empty token list must remain empty");
}
#[test]
fn test_protection_200_bamnak_split() {
let mut tokens = vec![tok_at("밤낮", "NNG", 0, 2)];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 2, "밤낮 should be split into two tokens");
assert_eq!(tokens[0].surface, "밤");
assert_eq!(tokens[0].pos, "NNG");
assert_eq!(tokens[1].surface, "낮");
assert_eq!(tokens[1].pos, "NNG");
}
#[test]
fn test_protection_202_compound_noun_merge() {
let mut tokens = vec![tok_at("여론", "NNG", 0, 2), tok_at("조사", "NNG", 2, 4)];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 1, "여론 + 조사 should merge into one token");
assert_eq!(tokens[0].surface, "여론조사");
assert_eq!(tokens[0].pos, "NNG");
assert_eq!(tokens[0].start_pos, 0);
assert_eq!(tokens[0].end_pos, 4);
}
#[test]
fn test_protection_207_jinja_mag_to_nng() {
let mut tokens = vec![tok("진짜", "MAG")];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[0].pos, "NNG", "진짜/MAG must become NNG");
assert_eq!(tokens[0].surface, "진짜");
}
#[test]
fn test_protection_248_foreign_word_nnp_to_nng() {
let mut tokens = vec![tok("알고리즘", "NNP")];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[0].pos, "NNG", "알고리즘/NNP must become NNG");
}
#[test]
fn test_protection_251_geudongan_merge() {
let mut tokens = vec![tok_at("그", "NP", 0, 1), tok_at("동안", "NNG", 1, 3)];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 1, "그 + 동안 should merge into 그동안");
assert_eq!(tokens[0].surface, "그동안");
assert_eq!(tokens[0].pos, "NNG");
assert_eq!(tokens[0].start_pos, 0);
assert_eq!(tokens[0].end_pos, 3);
}
#[test]
fn test_protection_253_onomatopoeia_yaong_to_ic() {
let mut tokens = vec![tok("야옹", "NNG")];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[0].pos, "IC", "야옹/NNG must become IC");
assert_eq!(tokens[0].surface, "야옹");
}
#[test]
fn test_protection_254_jamo_eo_yo_normalization() {
let mut tokens = vec![tok("ㅓ요", "EF")];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens[0].surface, "어요",
"ㅓ요/EF surface must normalize to 어요"
);
assert_eq!(tokens[0].pos, "EF");
}
#[test]
fn test_protection_256_jollri_vv_to_va() {
let mut tokens = vec![tok("졸리", "VV")];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[0].pos, "VA", "졸리/VV must become VA");
assert_eq!(tokens[0].surface, "졸리");
}
#[test]
fn test_protection_187_seoul_teukbyeolsi_split() {
let mut tokens = vec![tok_at("서울특별시", "NNP", 0, 5)];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 2, "서울특별시 must be split into two tokens");
assert_eq!(tokens[0].surface, "서울");
assert_eq!(tokens[0].pos, "NNP");
assert_eq!(tokens[1].surface, "특별시");
assert_eq!(tokens[1].pos, "NNG");
}
#[test]
fn test_protection_247_ha_yeo_xsn_to_ec() {
let mut tokens = vec![
tok("공부", "NNG"),
tok("하", "XSV"),
tok("여", "XSN"),
tok("주", "VX"), ];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[2].surface, "어", "여/XSN surface must change to 어");
assert_eq!(tokens[2].pos, "EC", "여/XSN pos must change to EC");
}
#[test]
fn test_protection_228_halmeonym_merge() {
let mut tokens = vec![
tok_at("하", "XSV", 0, 1),
tok_at("ㄹ", "ETM", 1, 2),
tok_at("머", "NP", 2, 3),
tok_at("님", "XSN", 3, 4),
];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 1, "하+ㄹ+머+님 must merge into 할머님");
assert_eq!(tokens[0].surface, "할머님");
assert_eq!(tokens[0].pos, "NNG");
assert_eq!(tokens[0].start_pos, 0);
assert_eq!(tokens[0].end_pos, 4);
}
#[test]
fn test_protection_230_sigan_merge() {
let mut tokens = vec![
tok_at("시", "NNG", 0, 1),
tok_at("가", "VV", 1, 2),
tok_at("ㄴ", "ETM", 2, 3),
];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 1, "시+가+ㄴ must merge into 시간");
assert_eq!(tokens[0].surface, "시간");
assert_eq!(tokens[0].pos, "NNG");
}
#[test]
fn test_protection_231_jumal_merge() {
let mut tokens = vec![tok_at("주", "VX", 0, 1), tok_at("말", "NNG", 1, 2)];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 1, "주+말 must merge into 주말");
assert_eq!(tokens[0].surface, "주말");
assert_eq!(tokens[0].pos, "NNG");
}
#[test]
fn test_protection_232_galdeung_merge() {
let mut tokens = vec![
tok_at("가", "VV", 0, 1),
tok_at("ㄹ", "ETM", 1, 2),
tok_at("등", "NNG", 2, 3),
];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 1, "가+ㄹ+등 must merge into 갈등");
assert_eq!(tokens[0].surface, "갈등");
assert_eq!(tokens[0].pos, "NNG");
}
#[test]
fn test_protection_234_sl_ga_vv_to_jks() {
let mut tokens = vec![
tok("MBTI", "SL"),
tok("가", "VV"),
tok("어", "EC"),
tok("뭐", "NP"),
];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[1].surface, "가");
assert_eq!(tokens[1].pos, "JKS", "가/VV after SL must become JKS");
assert_eq!(tokens.len(), 3, "어/EC must be removed");
}
#[test]
fn test_protection_236_jinheng_merge() {
let mut tokens = vec![
tok_at("지", "VX", 0, 1),
tok_at("ㄴ", "ETM", 1, 2),
tok_at("행", "NNG", 2, 3),
];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 1, "지+ㄴ+행 must merge into 진행");
assert_eq!(tokens[0].surface, "진행");
assert_eq!(tokens[0].pos, "NNG");
}
#[test]
fn test_protection_238_ha_a_to_ha_eo() {
let mut tokens = vec![tok("사랑하", "VV"), tok("아", "EC"), tok("주", "VX")];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens[1].surface, "어",
"아 after 하-ending VV must change to 어"
);
assert_eq!(tokens[1].pos, "EC");
}
#[test]
fn test_protection_239_jup_irregular_weo_nng() {
let mut tokens = vec![tok_at("주", "VX", 0, 1), tok_at("워", "NNG", 1, 2)];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens[0].surface, "줍",
"주/VX before 워/NNG must become 줍"
);
assert_eq!(tokens[0].pos, "VV");
assert_eq!(tokens[1].surface, "어");
}
#[test]
fn test_protection_241_mugeop_irregular() {
let mut tokens = vec![tok_at("무거", "NNG", 0, 2), tok_at("우면", "NNG", 2, 4)];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[0].surface, "무겁", "무거/NNG must become 무겁/VA");
assert_eq!(tokens[0].pos, "VA");
assert_eq!(tokens[1].surface, "으면");
assert_eq!(tokens[1].pos, "EC");
}
#[test]
fn test_protection_242_ireumyeon_maj_to_vv_ec() {
let mut tokens = vec![tok("이르", "VV"), tok("어", "EF"), tok("이르면", "MAJ")];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens[2].surface, "이르",
"이르면/MAJ must split: stem=이르"
);
assert_eq!(tokens[2].pos, "VV");
assert_eq!(tokens[3].surface, "면");
assert_eq!(tokens[3].pos, "EC");
}
#[test]
fn test_protection_244_an_mag_removal() {
let mut tokens = vec![
tok("가", "VV"),
tok("고", "EC"),
tok("있", "VX"),
tok("안", "MAG"),
tok("으며", "EC"),
];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens.len(),
4,
"안/MAG between VX and 으며/EC must be removed"
);
assert_eq!(tokens[2].surface, "있");
assert_eq!(tokens[3].surface, "으며");
}
#[test]
fn test_protection_167_jeok_merge() {
let mut tokens = vec![tok_at("성공", "NNG", 0, 2), tok_at("적", "XSN", 2, 3)];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 1, "성공 + 적 must merge into 성공적");
assert_eq!(tokens[0].surface, "성공적");
assert_eq!(tokens[0].pos, "NNG");
}
#[test]
fn test_protection_168_ui_jkb_to_jkg() {
let mut tokens = vec![tok("나라", "NNG"), tok("의", "JKB")];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[1].surface, "의");
assert_eq!(tokens[1].pos, "JKG", "의/JKB after NNG must become JKG");
}
#[test]
fn test_protection_86_nda_etm_ef_merge() {
let mut tokens = vec![
tok_at("가", "VV", 0, 1),
tok_at("ㄴ", "ETM", 1, 2),
tok_at("다", "EF", 2, 3),
];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 2, "ㄴ + 다 must merge into ㄴ다");
assert_eq!(tokens[1].surface, "ㄴ다");
assert_eq!(tokens[1].pos, "EF");
}
#[test]
fn test_protection_87_aux_vv_to_vx_after_ec() {
let mut tokens = vec![
tok("먹", "VV"),
tok("어", "EC"),
tok("버리", "VV"),
tok("었", "EP"),
tok("다", "EF"),
];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[2].surface, "버리");
assert_eq!(tokens[2].pos, "VX", "버리/VV after EC must become VX");
}
#[test]
fn test_protection_88_doe_vv_to_xsv_after_nng() {
let mut tokens = vec![
tok("공개", "NNG"),
tok("되", "VV"),
tok("었", "EP"),
tok("다", "EF"),
];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[1].surface, "되");
assert_eq!(tokens[1].pos, "XSV", "되/VV after NNG must become XSV");
}
#[test]
fn test_protection_265_ne_ic_to_ef_after_vv() {
let mut tokens = vec![tok("킹받", "VV"), tok("네", "IC")];
apply_context_corrections(&mut tokens);
assert_eq!(tokens[1].surface, "네");
assert_eq!(
tokens[1].pos, "EF",
"네/IC at sentence end after VV must become EF"
);
}
#[test]
fn test_protection_259_chae_vv_to_nnb() {
let mut tokens = vec![tok_at("채", "VV", 0, 1), tok_at("아", "EF", 1, 2)];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens.len(),
1,
"채/VV + 아/EF must reduce to single 채/NNB"
);
assert_eq!(tokens[0].surface, "채");
assert_eq!(tokens[0].pos, "NNB");
}
#[test]
fn test_protection_209_empty_pos_ascii_to_sl() {
let mut tokens = vec![tok("HELLO", "")];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens[0].pos, "SL",
"empty POS for ASCII surface 'HELLO' must become SL (Pass 209)"
);
}
#[test]
fn test_protection_209_empty_pos_korean_to_nng() {
let mut tokens = vec![tok("사랑", "")];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens[0].pos, "NNG",
"empty POS for Korean surface '사랑' must become NNG (Pass 209)"
);
}
#[test]
fn test_protection_223_xr_to_nng() {
let mut tokens = vec![tok("아름답", "XR")];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens[0].pos, "NNG",
"XR root token '아름답' must be converted to NNG (Pass 223)"
);
}
#[test]
fn test_protection_conjugation_174_xsv_to_xsa() {
let mut tokens = vec![tok("행복", "NNG"), tok("하", "XSV"), tok("어요", "EF")];
apply_context_corrections(&mut tokens);
let ha_token = tokens.iter().find(|t| t.surface == "하");
assert!(ha_token.is_some(), "하 token must exist");
assert_eq!(
ha_token.unwrap().pos,
"XSA",
"하/XSV after adj root must become XSA (Pass 174)"
);
}
#[test]
fn test_protection_post_conjugation_86_nda_merge() {
let mut tokens = vec![
tok_at("가", "VV", 0, 1),
tok_at("ㄴ", "ETM", 1, 2),
tok_at("다", "EF", 2, 3),
];
apply_context_corrections(&mut tokens);
let nda = tokens.iter().find(|t| t.surface == "ㄴ다");
assert!(
nda.is_some(),
"ㄴ/ETM + 다/EF must merge to ㄴ다/EF (Pass 86)"
);
assert_eq!(nda.unwrap().pos, "EF");
}
#[test]
fn test_protection_post_conjugation_88_nng_doe_xsv() {
let mut tokens = vec![tok("공개", "NNG"), tok("되", "VV"), tok("었", "EP")];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens[1].pos, "XSV",
"되/VV after NNG must become XSV (Pass 88)"
);
}
#[test]
fn test_protection_particle_21_vcp_insertion() {
let mut tokens = vec![tok("학생", "NNG"), tok("이", "EP"), tok("다", "EF")];
apply_context_corrections(&mut tokens);
assert_eq!(
tokens[1].pos, "VCP",
"이/EP after NNG must become VCP (Pass 21)"
);
}
#[test]
fn test_protection_compound_196_xpn_maeson() {
let mut tokens = vec![tok_at("맨손", "NNG", 0, 2)];
apply_context_corrections(&mut tokens);
assert_eq!(tokens.len(), 2, "맨손 must split into 맨/XPN + 손/NNG");
assert_eq!(tokens[0].surface, "맨");
assert_eq!(tokens[0].pos, "XPN");
assert_eq!(tokens[1].surface, "손");
assert_eq!(tokens[1].pos, "NNG");
}
#[test]
fn test_protection_sentence_final_89_eoyo_ec_to_ef() {
let mut tokens = vec![tok("먹", "VV"), tok("어요", "EC")];
apply_sentence_final_corrections(&mut tokens);
assert_eq!(
tokens.last().unwrap().pos,
"EF",
"어요/EC at sentence end must become EF (Pass 89)"
);
}
#[test]
fn test_protection_sentence_final_91_nda_ec_to_ef() {
let mut tokens = vec![tok("가", "VV"), tok("ㄴ다", "EC")];
apply_sentence_final_corrections(&mut tokens);
assert_eq!(
tokens.last().unwrap().pos,
"EF",
"ㄴ다/EC must become EF (Pass 91)"
);
}
#[test]
fn test_protection_sentence_final_154_da_nng_to_ef() {
let mut tokens = vec![tok("하", "VV"), tok("다", "NNG")];
apply_sentence_final_corrections(&mut tokens);
assert_eq!(
tokens.last().unwrap().pos,
"EF",
"다/NNG at sentence end after VV must become EF (Pass 154)"
);
}
#[test]
fn test_protection_conjugation_174_direct_call() {
let mut tokens = vec![tok("행복", "NNG"), tok("하", "XSV"), tok("어요", "EF")];
apply_conjugation_corrections(&mut tokens);
assert_eq!(
tokens[1].pos, "XSA",
"하/XSV after adj root 행복 must become XSA (Pass 174 direct call)"
);
}
#[test]
fn test_protection_conjugation_217_va_eumyeon_ef_to_ec() {
let mut tokens = vec![tok("예쁘", "VA"), tok("으면", "EF")];
apply_conjugation_corrections(&mut tokens);
assert_eq!(tokens[0].pos, "VA", "예쁘/VA must remain VA");
assert_eq!(
tokens[1].pos, "EC",
"으면/EF after VA must become EC (Pass 217 direct call)"
);
}
#[test]
fn test_protection_verb_morpheme_24_verb_gi_splitting() {
let mut tokens = vec![tok("가기", "NNG")];
apply_verb_and_morpheme_corrections(&mut tokens);
assert_eq!(tokens.len(), 2);
assert_eq!(tokens[0].surface, "가");
assert_eq!(tokens[0].pos, "VV");
assert_eq!(tokens[1].surface, "기");
assert_eq!(tokens[1].pos, "ETN");
}
#[test]
fn test_protection_compound_irregular_228_halmeonim() {
let mut tokens = vec![
tok("하", "XSV"),
tok("ㄹ", "ETM"),
tok("머", "NP"),
tok("님", "XSN"),
];
apply_compound_and_irregular_corrections(&mut tokens);
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].surface, "할머님");
assert_eq!(tokens[0].pos, "NNG");
}
#[test]
fn test_protection_suffix_dep_167_jeok_xsn_merge() {
let mut tokens = vec![tok("역사", "NNG"), tok("적", "XSN")];
apply_suffix_and_dependency_corrections(&mut tokens);
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0].surface, "역사적");
assert_eq!(tokens[0].pos, "NNG");
}
#[test]
fn test_protection_xsv_morpheme_split_113_gonaseo_merge() {
let mut tokens = vec![
tok_at("고", "EC", 0, 1),
tok_at("나", "NP", 1, 2),
tok_at("서", "JKB", 2, 3),
];
apply_xsv_morpheme_split_corrections(&mut tokens);
assert_eq!(tokens.len(), 1, "고+나+서 must merge into 고나서");
assert_eq!(tokens[0].surface, "고나서");
assert_eq!(tokens[0].pos, "EC");
assert_eq!(tokens[0].start_pos, 0);
assert_eq!(tokens[0].end_pos, 3);
}
#[test]
fn test_protection_verb_splitting_24_gagi_nng_to_vv_etn() {
let mut tokens = vec![tok("가기", "NNG")];
apply_verb_splitting_corrections(&mut tokens);
assert_eq!(tokens.len(), 2, "가기/NNG must split into 가/VV + 기/ETN");
assert_eq!(tokens[0].surface, "가");
assert_eq!(tokens[0].pos, "VV");
assert_eq!(tokens[1].surface, "기");
assert_eq!(tokens[1].pos, "ETN");
}
#[test]
fn test_protection_sentence_final_endings_164_nr_numeral_merge() {
let mut tokens = vec![tok_at("삼", "NR", 0, 1), tok_at("십", "NR", 1, 2)];
apply_sentence_final_endings_corrections(&mut tokens);
assert_eq!(tokens.len(), 1, "삼 + 십 must merge into 삼십");
assert_eq!(tokens[0].surface, "삼십");
assert_eq!(tokens[0].pos, "NR");
}
#[test]
fn test_protection_xsv_and_ec_ef_91_neunda_ec_to_ef() {
let mut tokens = vec![tok("먹", "VV"), tok("는다", "EC")];
apply_xsv_and_ec_ef_corrections(&mut tokens);
assert_eq!(tokens[1].surface, "는다");
assert_eq!(
tokens[1].pos, "EF",
"는다/EC must become EF (Pass 91 direct call)"
);
}