pub(crate) use super::*;
#[test]
fn ctl_01_tokenizer_config_parsed() {
let json = r#"{
"chat_template": "{% for message in messages %}{{ message.content }}{% endfor %}",
"bos_token": "<s>",
"eos_token": "</s>"
}"#;
let template = HuggingFaceTemplate::from_json(json);
assert!(template.is_ok(), "Failed to parse valid tokenizer config");
}
#[test]
fn ctl_02_invalid_json_rejected() {
let json = "{ invalid json }";
let result = HuggingFaceTemplate::from_json(json);
assert!(result.is_err(), "Should reject invalid JSON");
}
#[test]
fn ctl_03_chat_template_extracted() {
let json = r#"{
"chat_template": "test template",
"bos_token": "<s>"
}"#;
let template = HuggingFaceTemplate::from_json(json).expect("Parse failed");
assert!(!template.template_str.is_empty());
}
#[test]
fn ctl_04_missing_template_error() {
let json = r#"{"bos_token": "<s>"}"#;
let result = HuggingFaceTemplate::from_json(json);
assert!(result.is_err(), "Should error on missing chat_template");
}
#[test]
fn ctl_05_special_tokens_extracted() {
let json = r#"{
"chat_template": "test",
"bos_token": "<s>",
"eos_token": "</s>",
"unk_token": "<unk>"
}"#;
let template = HuggingFaceTemplate::from_json(json).expect("Parse failed");
assert_eq!(template.special_tokens.bos_token, Some("<s>".to_string()));
assert_eq!(template.special_tokens.eos_token, Some("</s>".to_string()));
}
#[test]
fn cts_01_bos_token_chatml() {
let template = ChatMLTemplate::new();
assert!(template.special_tokens().bos_token.is_some());
}
#[test]
fn cts_02_eos_token_chatml() {
let template = ChatMLTemplate::new();
assert!(template.special_tokens().eos_token.is_some());
}
#[test]
fn cts_03_chatml_tokens() {
let template = ChatMLTemplate::new();
assert_eq!(
template.special_tokens().im_start_token,
Some("<|im_start|>".to_string())
);
assert_eq!(
template.special_tokens().im_end_token,
Some("<|im_end|>".to_string())
);
}
#[test]
fn cts_04_inst_tokens_llama2() {
let template = Llama2Template::new();
assert_eq!(
template.special_tokens().inst_start,
Some("[INST]".to_string())
);
assert_eq!(
template.special_tokens().inst_end,
Some("[/INST]".to_string())
);
}
#[test]
fn cts_05_system_tokens_llama2() {
let template = Llama2Template::new();
assert_eq!(
template.special_tokens().sys_start,
Some("<<SYS>>".to_string())
);
assert_eq!(
template.special_tokens().sys_end,
Some("<</SYS>>".to_string())
);
}
#[test]
fn cta_01_chatml_detected() {
assert_eq!(
detect_format_from_name("Qwen2-0.5B-Instruct"),
TemplateFormat::ChatML
);
assert_eq!(
detect_format_from_name("OpenHermes-2.5"),
TemplateFormat::ChatML
);
assert_eq!(
detect_format_from_name("Yi-6B-Chat"),
TemplateFormat::ChatML
);
}
#[test]
fn cta_02_llama2_detected() {
assert_eq!(
detect_format_from_name("TinyLlama-1.1B-Chat"),
TemplateFormat::Llama2
);
assert_eq!(
detect_format_from_name("vicuna-7b-v1.5"),
TemplateFormat::Llama2
);
assert_eq!(
detect_format_from_name("Llama-2-7B-Chat"),
TemplateFormat::Llama2
);
}
#[test]
fn cta_03_mistral_detected() {
assert_eq!(
detect_format_from_name("Mistral-7B-Instruct"),
TemplateFormat::Mistral
);
assert_eq!(
detect_format_from_name("Mixtral-8x7B"),
TemplateFormat::Mistral
);
}
#[test]
fn cta_04_phi_detected() {
assert_eq!(detect_format_from_name("phi-2"), TemplateFormat::Phi);
assert_eq!(detect_format_from_name("phi-3-mini"), TemplateFormat::Phi);
}
#[test]
fn cta_05_alpaca_detected() {
assert_eq!(detect_format_from_name("alpaca-7b"), TemplateFormat::Alpaca);
}
#[test]
fn cta_07_raw_fallback() {
assert_eq!(
detect_format_from_name("unknown-model"),
TemplateFormat::Raw
);
}
#[test]
fn cta_08_detection_deterministic() {
let name = "TinyLlama-1.1B-Chat";
let format1 = detect_format_from_name(name);
let format2 = detect_format_from_name(name);
assert_eq!(format1, format2);
}
#[test]
fn ctm_01_system_first_chatml() {
let template = ChatMLTemplate::new();
let messages = vec![
ChatMessage::system("You are helpful."),
ChatMessage::user("Hello!"),
];
let output = template
.format_conversation(&messages)
.expect("Format failed");
let sys_pos = output.find("system").expect("system not found");
let user_pos = output.find("user").expect("user not found");
assert!(sys_pos < user_pos, "System should come before user");
}
#[test]
fn ctm_02_alternation_preserved() {
let template = ChatMLTemplate::new();
let messages = vec![
ChatMessage::user("Hi"),
ChatMessage::assistant("Hello!"),
ChatMessage::user("How are you?"),
];
let output = template
.format_conversation(&messages)
.expect("Format failed");
let pos1 = output.find("Hi").expect("Hi not found");
let pos2 = output.find("Hello!").expect("Hello not found");
let pos3 = output.find("How are you?").expect("How are you not found");
assert!(pos1 < pos2 && pos2 < pos3, "Messages out of order");
}
#[test]
fn ctm_04_generation_prompt_appended() {
let template = ChatMLTemplate::new();
let messages = vec![ChatMessage::user("Hello!")];
let output = template
.format_conversation(&messages)
.expect("Format failed");
assert!(
output.ends_with("<|im_start|>assistant\n"),
"Should end with assistant prompt"
);
}
#[test]
fn ctm_05_no_system_handled() {
let template = ChatMLTemplate::new();
let messages = vec![ChatMessage::user("Hello!")];
let result = template.format_conversation(&messages);
assert!(result.is_ok(), "Should handle conversation without system");
}
#[test]
fn ctm_07_empty_message_handled() {
let template = ChatMLTemplate::new();
let messages = vec![ChatMessage::user("")];
let result = template.format_conversation(&messages);
assert!(result.is_ok(), "Should handle empty content");
}
#[test]
fn ctx_01_qwen2_chatml_format() {
let template = ChatMLTemplate::new();
let messages = vec![ChatMessage::user("What is 2+2?")];
let output = template
.format_conversation(&messages)
.expect("Format failed");
assert!(output.contains("<|im_start|>user"));
assert!(output.contains("What is 2+2?"));
assert!(output.contains("<|im_end|>"));
}
#[test]
fn ctx_02_tinyllama_llama2_format() {
let template = Llama2Template::new();
let messages = vec![ChatMessage::user("Hello!")];
let output = template
.format_conversation(&messages)
.expect("Format failed");
assert!(output.contains("<s>"));
assert!(output.contains("[INST]"));
assert!(output.contains("Hello!"));
assert!(output.contains("[/INST]"));
}
#[test]
fn ctx_03_mistral_no_system() {
let template = MistralTemplate::new();
assert!(!template.supports_system_prompt());
let messages = vec![
ChatMessage::system("System prompt"),
ChatMessage::user("Hello!"),
];
let output = template
.format_conversation(&messages)
.expect("Format failed");
assert!(!output.contains("System prompt"));
}
#[test]
fn ctx_04_phi_format() {
let template = PhiTemplate::new();
let messages = vec![ChatMessage::user("Hello!")];
let output = template
.format_conversation(&messages)
.expect("Format failed");
assert!(output.contains("Instruct: Hello!"));
assert!(output.contains("Output:"));
}
#[test]
fn ctx_05_alpaca_format() {
let template = AlpacaTemplate::new();
let messages = vec![ChatMessage::user("Hello!")];
let output = template
.format_conversation(&messages)
.expect("Format failed");
assert!(output.contains("### Instruction:"));
assert!(output.contains("Hello!"));
assert!(output.contains("### Response:"));
}
#[test]
fn cte_01_empty_conversation() {
let template = ChatMLTemplate::new();
let messages: Vec<ChatMessage> = vec![];
let result = template.format_conversation(&messages);
assert!(result.is_ok(), "Should handle empty conversation");
}
#[test]
fn cte_02_unicode_preserved() {
let template = ChatMLTemplate::new();
let messages = vec![ChatMessage::user("Hello! 你好 مرحبا 🎉")];
let output = template
.format_conversation(&messages)
.expect("Format failed");
assert!(output.contains("你好"));
assert!(output.contains("مرحبا"));
assert!(output.contains("🎉"));
}
#[test]
fn cte_03_long_content() {
let template = ChatMLTemplate::new();
let long_content = "x".repeat(10_000);
let messages = vec![ChatMessage::user(&long_content)];
let result = template.format_conversation(&messages);
assert!(result.is_ok(), "Should handle long content");
}
#[test]
fn cte_07_whitespace_preserved() {
let template = ChatMLTemplate::new();
let messages = vec![ChatMessage::user(" content with spaces ")];
let output = template
.format_conversation(&messages)
.expect("Format failed");
assert!(output.contains(" content with spaces "));
}
#[test]
fn cte_09_nested_quotes() {
let template = ChatMLTemplate::new();
let messages = vec![ChatMessage::user(r#"He said "hello""#)];
let result = template.format_conversation(&messages);
assert!(result.is_ok(), "Should handle nested quotes");
}
#[test]
fn ctp_01_format_performance() {
let template = ChatMLTemplate::new();
let messages = vec![ChatMessage::user("Hello!")];
let start = std::time::Instant::now();
for _ in 0..1000 {
let _ = template.format_conversation(&messages);
}
let elapsed = start.elapsed();
assert!(
elapsed.as_millis() < 1000,
"Formatting too slow: {:?}",
elapsed
);
}
#[test]
fn test_falsify_chatml_models_detected_from_name() {
let chatml_models = &[
"Qwen2-0.5B-Instruct",
"Qwen2.5-Coder-0.5B-Instruct",
"Qwen2-7B-Chat",
"qwen3-1.5B",
];
for model in chatml_models {
let format = detect_format_from_name(model);
assert_eq!(format, TemplateFormat::ChatML,
"FALSIFY: '{model}' should detect as ChatML (has im_start/im_end tokens)");
}
}
#[test]
fn test_falsify_format_detection_case_insensitive() {
let cases = &[
("qwen2-0.5b-instruct", TemplateFormat::ChatML),
("QWEN2-0.5B-INSTRUCT", TemplateFormat::ChatML),
("Mistral-7B-Instruct", TemplateFormat::Mistral),
("MISTRAL-7B", TemplateFormat::Mistral),
("llama-3-8b", TemplateFormat::Llama2),
("LLAMA-3-8B", TemplateFormat::Llama2),
("PHI-3-mini", TemplateFormat::Phi),
("phi-2", TemplateFormat::Phi),
];
for &(name, expected) in cases {
let detected = detect_format_from_name(name);
assert_eq!(detected, expected,
"FALSIFY: detect_format_from_name('{name}') returned {detected:?}, expected {expected:?}");
}
}
#[path = "tests_toyota_compliance.rs"]
mod tests_toyota_compliance;
#[path = "tests_long_conversation.rs"]
mod tests_long_conversation;
#[path = "tests_phi_template.rs"]
mod tests_phi_template;
#[path = "tests_huggingface.rs"]
mod tests_huggingface;
#[path = "tests_sanitize.rs"]
mod tests_sanitize;
#[path = "tests_ct_proptest.rs"]
mod tests_ct_proptest;