1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
//! Proptest falsification for chat template semantics contract
//!
//! Contract: contracts/chat-template-semantics-v1.yaml
//! Tests: FALSIFY-CT-001-prop through FALSIFY-CT-UTF8-prop
//!
//! Property-based testing for invariants that hold for ALL valid inputs.
use super::*;
use proptest::prelude::*;
proptest! {
/// FALSIFY-CT-001-prop: Balanced delimiters in ChatML output.
///
/// For ChatML format with arbitrary user content, the generation prompt
/// appends one unpaired `<|im_start|>` for the assistant turn.
/// Therefore: count(im_start) == count(im_end) + 1
#[test]
fn falsify_ct_001_prop_balanced_delimiters(content in ".*") {
let template = ChatMLTemplate::new();
let messages = vec![ChatMessage::user(content)];
let output = template
.format_conversation(&messages)
.expect("format_conversation must not fail");
let im_start_count = output.matches("<|im_start|>").count();
let im_end_count = output.matches("<|im_end|>").count();
prop_assert_eq!(
im_start_count,
im_end_count + 1,
"im_start={} must equal im_end={} + 1 in output: {:?}",
im_start_count,
im_end_count,
output
);
}
/// FALSIFY-CT-002-prop: Sanitized output contains zero injection patterns.
///
/// For arbitrary string input, `sanitize_user_content(input)` must return
/// a string where `contains_injection_patterns()` is false.
#[test]
fn falsify_ct_002_prop_sanitize_removes_injections(input in ".*") {
let sanitized = sanitize_user_content(&input);
prop_assert!(
!contains_injection_patterns(&sanitized),
"Sanitized output still contains injection patterns: {:?}",
sanitized
);
}
/// FALSIFY-CT-003-prop: System content appears before first user content.
///
/// For conversations with system + user messages, the system content
/// appears before user content in the output for formats that support
/// system prompts (ChatML, Llama2).
///
/// Uses prefix markers to avoid false positives where short content
/// matches inside template role labels (e.g., "r" matching in "user").
#[test]
fn falsify_ct_003_prop_system_before_user_chatml(
sys_suffix in "[a-zA-Z0-9]{3,30}",
user_suffix in "[a-zA-Z0-9]{3,30}"
) {
// Prefix with unique markers so short strings don't match role labels
let sys_content = format!("SYS_{sys_suffix}");
let user_content = format!("USR_{user_suffix}");
let template = ChatMLTemplate::new();
let messages = vec![
ChatMessage::system(sys_content.clone()),
ChatMessage::user(user_content.clone()),
];
let output = template
.format_conversation(&messages)
.expect("format_conversation must not fail");
let sys_pos = output.find(&sys_content);
let user_pos = output.find(&user_content);
prop_assert!(sys_pos.is_some(), "System content not found in output");
prop_assert!(user_pos.is_some(), "User content not found in output");
prop_assert!(
sys_pos.expect("sys_pos checked above") < user_pos.expect("user_pos checked above"),
"System content must appear before user content. sys_pos={:?}, user_pos={:?}",
sys_pos,
user_pos
);
}
/// FALSIFY-CT-005-prop: Multi-turn message ordering preserved.
///
/// For 3 messages with different content, their content appears in the
/// same order in the formatted output.
#[test]
fn falsify_ct_005_prop_multi_turn_ordering(
msg1 in "[a-zA-Z]{5,15}",
msg2 in "[a-zA-Z]{5,15}",
msg3 in "[a-zA-Z]{5,15}"
) {
let template = ChatMLTemplate::new();
let messages = vec![
ChatMessage::user(msg1.clone()),
ChatMessage::assistant(msg2.clone()),
ChatMessage::user(msg3.clone()),
];
let output = template
.format_conversation(&messages)
.expect("format_conversation must not fail");
let pos1 = output.find(&msg1);
let pos2 = output.find(&msg2);
let pos3 = output.find(&msg3);
prop_assert!(pos1.is_some(), "msg1 not found in output");
prop_assert!(pos2.is_some(), "msg2 not found in output");
prop_assert!(pos3.is_some(), "msg3 not found in output");
prop_assert!(
pos1.expect("pos1 checked") < pos2.expect("pos2 checked"),
"msg1 must appear before msg2"
);
prop_assert!(
pos2.expect("pos2 checked") < pos3.expect("pos3 checked"),
"msg2 must appear before msg3"
);
}
/// FALSIFY-CT-006-prop: LLaMA2 system messages wrapped in <<SYS>> delimiters.
///
/// For Llama2 format with a system message, the output must contain
/// both `<<SYS>>` and `<</SYS>>`.
#[test]
fn falsify_ct_006_prop_llama2_system_wrapped(
sys_content in "[a-zA-Z0-9 ]{1,100}",
user_content in "[a-zA-Z0-9 ]{1,100}"
) {
let template = Llama2Template::new();
let messages = vec![
ChatMessage::system(sys_content),
ChatMessage::user(user_content),
];
let output = template
.format_conversation(&messages)
.expect("format_conversation must not fail");
prop_assert!(
output.contains("<<SYS>>"),
"LLaMA2 output missing <<SYS>> delimiter: {:?}",
output
);
prop_assert!(
output.contains("<</SYS>>"),
"LLaMA2 output missing <</SYS>> delimiter: {:?}",
output
);
}
/// FALSIFY-CT-008-prop: detect_format_from_name never panics.
///
/// Random strings must never cause `detect_format_from_name` to panic.
#[test]
fn falsify_ct_008_prop_detect_format_never_panics(name in ".*") {
let _ = detect_format_from_name(&name);
}
/// FALSIFY-CT-011-prop: Unknown model names produce Raw format.
///
/// Random strings that do not contain any known model keywords
/// (qwen, openhermes, yi-, mistral, mixtral, llama, vicuna,
/// tinyllama, phi-, phi2, phi3, alpaca) must produce `TemplateFormat::Raw`.
#[test]
fn falsify_ct_011_prop_unknown_names_produce_raw(name in "[0-9]{1,50}") {
// Strategy: purely numeric strings cannot contain any model keyword
let format = detect_format_from_name(&name);
prop_assert_eq!(
format,
TemplateFormat::Raw,
"Numeric-only name {:?} should produce Raw, got {:?}",
name,
format
);
}
/// FALSIFY-CT-012-prop: Sanitization is idempotent.
///
/// `sanitize_user_content(sanitize_user_content(x)) == sanitize_user_content(x)`
#[test]
fn falsify_ct_012_prop_sanitization_idempotent(input in ".*") {
let once = sanitize_user_content(&input);
let twice = sanitize_user_content(&once);
prop_assert_eq!(
once.clone(),
twice.clone(),
"Sanitization is not idempotent: once={:?}, twice={:?}",
once,
twice
);
}
/// FALSIFY-CT-UTF8-prop: Valid UTF-8 preservation.
///
/// Valid UTF-8 in -> valid UTF-8 out. The output of format_conversation
/// should always be valid UTF-8 (which Rust strings guarantee), and the
/// original Unicode content must be present in the output.
/// Strategy uses emojis, CJK characters, and ZWJ sequences.
#[test]
fn falsify_ct_utf8_prop_unicode_preservation(
content in prop::string::string_regex("[a-z\u{4e00}-\u{4e10}\u{1f600}-\u{1f610}\u{0410}-\u{042f} ]{1,50}").unwrap()
) {
let template = ChatMLTemplate::new();
let messages = vec![ChatMessage::user(content.clone())];
let output = template
.format_conversation(&messages)
.expect("format_conversation must not fail");
// Rust strings are always valid UTF-8 by construction, but verify
// the content is preserved in the output
prop_assert!(
output.contains(&content),
"Unicode content not preserved in output.\nContent: {:?}\nOutput: {:?}",
content,
output
);
}
}