1use std::error::Error;
2use std::fmt;
3
4const LITERAL_ESCAPED_CONTROLS: [&str; 3] = [r"\n", r"\r", r"\t"];
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct MarkdownPayloadViolation {
8 pub sequence: &'static str,
9 pub count: usize,
10}
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct MarkdownPayloadError {
14 violations: Vec<MarkdownPayloadViolation>,
15}
16
17impl MarkdownPayloadError {
18 pub fn violations(&self) -> &[MarkdownPayloadViolation] {
19 &self.violations
20 }
21}
22
23impl fmt::Display for MarkdownPayloadError {
24 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25 let details = self
26 .violations
27 .iter()
28 .map(|entry| format!("{} ({})", entry.sequence, entry.count))
29 .collect::<Vec<_>>()
30 .join(", ");
31 write!(
32 f,
33 "markdown payload contains literal escaped-control artifacts: {details}"
34 )
35 }
36}
37
38impl Error for MarkdownPayloadError {}
39
40pub fn markdown_payload_violations(markdown: &str) -> Vec<MarkdownPayloadViolation> {
41 let scannable = strip_code_segments(markdown);
46 let mut violations = Vec::new();
47
48 for sequence in LITERAL_ESCAPED_CONTROLS {
49 let count = scannable.match_indices(sequence).count();
50 if count > 0 {
51 violations.push(MarkdownPayloadViolation { sequence, count });
52 }
53 }
54
55 violations
56}
57
58fn strip_code_segments(markdown: &str) -> String {
64 let mut out = String::with_capacity(markdown.len());
65 let mut open_fence: Option<(char, usize)> = None;
66
67 for line in markdown.split_inclusive('\n') {
68 if let Some((fence_char, fence_len)) = fence_marker(line) {
69 match open_fence {
70 None => {
71 open_fence = Some((fence_char, fence_len));
73 out.push('\n');
74 continue;
75 }
76 Some((open_char, open_len)) if fence_char == open_char && fence_len >= open_len => {
77 open_fence = None;
79 out.push('\n');
80 continue;
81 }
82 Some(_) => {
84 out.push('\n');
85 continue;
86 }
87 }
88 }
89 if open_fence.is_some() {
90 out.push('\n');
92 continue;
93 }
94 out.push_str(&strip_inline_code(line));
95 }
96
97 out
98}
99
100fn fence_marker(line: &str) -> Option<(char, usize)> {
104 let trimmed = line.trim_start();
105 let fence_char = trimmed.chars().next()?;
106 if fence_char != '`' && fence_char != '~' {
107 return None;
108 }
109 let run = trimmed.chars().take_while(|&c| c == fence_char).count();
110 if run >= 3 {
111 Some((fence_char, run))
112 } else {
113 None
114 }
115}
116
117fn strip_inline_code(line: &str) -> String {
120 let chars: Vec<char> = line.chars().collect();
121 let mut out = String::with_capacity(line.len());
122 let mut i = 0;
123
124 while i < chars.len() {
125 if chars[i] != '`' {
126 out.push(chars[i]);
127 i += 1;
128 continue;
129 }
130
131 let mut run = 0;
133 while i < chars.len() && chars[i] == '`' {
134 run += 1;
135 i += 1;
136 }
137
138 let mut j = i;
140 let mut closed = false;
141 while j < chars.len() {
142 if chars[j] == '`' {
143 let mut close_run = 0;
144 while j < chars.len() && chars[j] == '`' {
145 close_run += 1;
146 j += 1;
147 }
148 if close_run == run {
149 out.push(' ');
152 i = j;
153 closed = true;
154 break;
155 }
156 } else {
157 j += 1;
158 }
159 }
160
161 if !closed {
162 for _ in 0..run {
164 out.push('`');
165 }
166 }
167 }
168
169 out
170}
171
172pub fn validate_markdown_payload(markdown: &str) -> Result<(), MarkdownPayloadError> {
173 let violations = markdown_payload_violations(markdown);
174 if violations.is_empty() {
175 Ok(())
176 } else {
177 Err(MarkdownPayloadError { violations })
178 }
179}
180
181pub fn canonicalize_table_cell(value: &str) -> String {
182 let mut out = String::with_capacity(value.len());
183 let mut in_line_break_run = false;
184
185 for ch in value.chars() {
186 match ch {
187 '\n' | '\r' => {
188 if !in_line_break_run {
189 out.push(' ');
190 in_line_break_run = true;
191 }
192 }
193 '|' => {
194 out.push('/');
195 in_line_break_run = false;
196 }
197 _ => {
198 out.push(ch);
199 in_line_break_run = false;
200 }
201 }
202 }
203
204 out
205}
206
207fn sort_json(value: &serde_json::Value) -> serde_json::Value {
208 match value {
209 serde_json::Value::Object(map) => {
210 let mut keys: Vec<&String> = map.keys().collect();
211 keys.sort();
212 let mut out = serde_json::Map::new();
213 for k in keys {
214 let v = map.get(k).expect("key exists");
215 out.insert(k.clone(), sort_json(v));
216 }
217 serde_json::Value::Object(out)
218 }
219 serde_json::Value::Array(values) => {
220 serde_json::Value::Array(values.iter().map(sort_json).collect())
221 }
222 other => other.clone(),
223 }
224}
225
226pub fn format_json_pretty_sorted(value: &serde_json::Value) -> Result<String, serde_json::Error> {
228 let sorted = sort_json(value);
229 serde_json::to_string_pretty(&sorted)
230}
231
232pub fn heading(level: u8, text: &str) -> String {
233 let level = level.clamp(1, 6);
234 format!("{} {}\n", "#".repeat(level.into()), text.trim())
235}
236
237pub fn code_block(lang: &str, body: &str) -> String {
238 let mut out = String::new();
239 out.push_str("```");
240 out.push_str(lang.trim());
241 out.push('\n');
242 out.push_str(body);
243 if !body.ends_with('\n') {
244 out.push('\n');
245 }
246 out.push_str("```\n");
247 out
248}
249
250#[cfg(test)]
251mod tests {
252 use super::{
253 canonicalize_table_cell, code_block, format_json_pretty_sorted, heading,
254 markdown_payload_violations, validate_markdown_payload,
255 };
256
257 #[test]
258 fn markdown_payload_validator_accepts_real_control_chars() {
259 let payload = "line one\nline two\tvalue\r\n";
260 let result = validate_markdown_payload(payload);
261 assert!(
262 result.is_ok(),
263 "unexpected markdown payload error: {result:?}"
264 );
265 }
266
267 #[test]
268 fn markdown_payload_validator_rejects_literal_escaped_controls() {
269 let payload = r"line one\nline two\rline three\tvalue";
270 let err = validate_markdown_payload(payload).expect_err("expected markdown payload error");
271
272 assert_eq!(err.violations().len(), 3);
273 assert!(
274 err.to_string().contains(r"\n"),
275 "expected escaped-newline mention in {:?}",
276 err
277 );
278 assert!(
279 err.to_string().contains(r"\r"),
280 "expected escaped-return mention in {:?}",
281 err
282 );
283 assert!(
284 err.to_string().contains(r"\t"),
285 "expected escaped-tab mention in {:?}",
286 err
287 );
288 }
289
290 #[test]
291 fn markdown_payload_violations_reports_counts_per_sequence() {
292 let payload = r"one\n two\n three\t";
293 let violations = markdown_payload_violations(payload);
294
295 assert_eq!(violations.len(), 2);
296 assert_eq!(violations[0].sequence, r"\n");
297 assert_eq!(violations[0].count, 2);
298 assert_eq!(violations[1].sequence, r"\t");
299 assert_eq!(violations[1].count, 1);
300 }
301
302 #[test]
303 fn markdown_payload_validator_ignores_escaped_controls_in_fenced_code() {
304 let payload = "Prose before.\n\n```sh\nprintf 'a\\nb'\n```\n\nProse after.\n";
305 assert!(
306 validate_markdown_payload(payload).is_ok(),
307 "escaped controls inside a fenced code block must not be flagged"
308 );
309 }
310
311 #[test]
312 fn markdown_payload_validator_ignores_escaped_controls_in_inline_code() {
313 let payload = r"Run `printf 'a\nb'` to print two lines.";
314 assert!(
315 validate_markdown_payload(payload).is_ok(),
316 "escaped controls inside an inline code span must not be flagged"
317 );
318 }
319
320 #[test]
321 fn markdown_payload_validator_still_flags_escaped_controls_in_prose() {
322 let violations = markdown_payload_violations(r"Status: done.\nNext: ship it.");
324 assert_eq!(violations.len(), 1);
325 assert_eq!(violations[0].sequence, r"\n");
326 assert_eq!(violations[0].count, 1);
327 }
328
329 #[test]
330 fn markdown_payload_validator_flags_prose_but_not_code_in_mixed_payload() {
331 let payload = "Bad prose: a\\nb\n\n```\nprintf 'x\\ny'\n```\n\nUse `echo 'p\\nq'` here.\n";
334 let violations = markdown_payload_violations(payload);
335 assert_eq!(
336 violations.len(),
337 1,
338 "only the prose occurrence counts: {violations:?}"
339 );
340 assert_eq!(violations[0].sequence, r"\n");
341 assert_eq!(violations[0].count, 1);
342 }
343
344 #[test]
345 fn canonicalize_table_cell_normalizes_markdown_unsafe_chars() {
346 let value = "A|B\r\nC\nD\rE";
347 assert_eq!(canonicalize_table_cell(value), "A/B C D E");
348 }
349
350 #[test]
351 fn canonicalize_table_cell_is_idempotent() {
352 let first = canonicalize_table_cell("x|y\r\nz");
353 let second = canonicalize_table_cell(&first);
354 assert_eq!(first, second);
355 }
356
357 #[test]
358 fn markdown_code_block_is_newline_stable() {
359 assert_eq!(code_block("json", "{ }"), "```json\n{ }\n```\n");
360 assert_eq!(code_block("json", "{ }\n"), "```json\n{ }\n```\n");
361 }
362
363 #[test]
364 fn markdown_heading_trims_and_clamps_level() {
365 assert_eq!(heading(1, " Title "), "# Title\n");
366 assert_eq!(heading(9, "Title"), "###### Title\n");
367 }
368
369 #[test]
370 fn json_format_sorts_keys_recursively() {
371 let v = serde_json::json!({"b": 1, "a": {"d": 4, "c": 3}});
372 let s = format_json_pretty_sorted(&v).expect("sorted json");
373 assert_eq!(
374 s,
375 "{\n \"a\": {\n \"c\": 3,\n \"d\": 4\n },\n \"b\": 1\n}"
376 );
377 }
378}