1use std::error::Error;
2use std::fmt;
3
4const LITERAL_ESCAPED_CONTROLS: [&str; 3] = [r"\n", r"\r", r"\t"];
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct MarkdownPayloadViolation {
8 pub sequence: &'static str,
9 pub count: usize,
10}
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct MarkdownPayloadError {
14 violations: Vec<MarkdownPayloadViolation>,
15}
16
17impl MarkdownPayloadError {
18 pub fn violations(&self) -> &[MarkdownPayloadViolation] {
19 &self.violations
20 }
21}
22
23impl fmt::Display for MarkdownPayloadError {
24 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25 let details = self
26 .violations
27 .iter()
28 .map(|entry| format!("{} ({})", entry.sequence, entry.count))
29 .collect::<Vec<_>>()
30 .join(", ");
31 write!(
32 f,
33 "markdown payload contains literal escaped-control artifacts: {details}"
34 )
35 }
36}
37
38impl Error for MarkdownPayloadError {}
39
40pub fn markdown_payload_violations(markdown: &str) -> Vec<MarkdownPayloadViolation> {
41 let mut violations = Vec::new();
42
43 for sequence in LITERAL_ESCAPED_CONTROLS {
44 let count = markdown.match_indices(sequence).count();
45 if count > 0 {
46 violations.push(MarkdownPayloadViolation { sequence, count });
47 }
48 }
49
50 violations
51}
52
53pub fn validate_markdown_payload(markdown: &str) -> Result<(), MarkdownPayloadError> {
54 let violations = markdown_payload_violations(markdown);
55 if violations.is_empty() {
56 Ok(())
57 } else {
58 Err(MarkdownPayloadError { violations })
59 }
60}
61
62pub fn canonicalize_table_cell(value: &str) -> String {
63 let mut out = String::with_capacity(value.len());
64 let mut in_line_break_run = false;
65
66 for ch in value.chars() {
67 match ch {
68 '\n' | '\r' => {
69 if !in_line_break_run {
70 out.push(' ');
71 in_line_break_run = true;
72 }
73 }
74 '|' => {
75 out.push('/');
76 in_line_break_run = false;
77 }
78 _ => {
79 out.push(ch);
80 in_line_break_run = false;
81 }
82 }
83 }
84
85 out
86}
87
88#[cfg(test)]
89mod tests {
90 use super::{canonicalize_table_cell, markdown_payload_violations, validate_markdown_payload};
91
92 #[test]
93 fn markdown_payload_validator_accepts_real_control_chars() {
94 let payload = "line one\nline two\tvalue\r\n";
95 let result = validate_markdown_payload(payload);
96 assert!(
97 result.is_ok(),
98 "unexpected markdown payload error: {result:?}"
99 );
100 }
101
102 #[test]
103 fn markdown_payload_validator_rejects_literal_escaped_controls() {
104 let payload = r"line one\nline two\rline three\tvalue";
105 let err = validate_markdown_payload(payload).expect_err("expected markdown payload error");
106
107 assert_eq!(err.violations().len(), 3);
108 assert!(
109 err.to_string().contains(r"\n"),
110 "expected escaped-newline mention in {:?}",
111 err
112 );
113 assert!(
114 err.to_string().contains(r"\r"),
115 "expected escaped-return mention in {:?}",
116 err
117 );
118 assert!(
119 err.to_string().contains(r"\t"),
120 "expected escaped-tab mention in {:?}",
121 err
122 );
123 }
124
125 #[test]
126 fn markdown_payload_violations_reports_counts_per_sequence() {
127 let payload = r"one\n two\n three\t";
128 let violations = markdown_payload_violations(payload);
129
130 assert_eq!(violations.len(), 2);
131 assert_eq!(violations[0].sequence, r"\n");
132 assert_eq!(violations[0].count, 2);
133 assert_eq!(violations[1].sequence, r"\t");
134 assert_eq!(violations[1].count, 1);
135 }
136
137 #[test]
138 fn canonicalize_table_cell_normalizes_markdown_unsafe_chars() {
139 let value = "A|B\r\nC\nD\rE";
140 assert_eq!(canonicalize_table_cell(value), "A/B C D E");
141 }
142
143 #[test]
144 fn canonicalize_table_cell_is_idempotent() {
145 let first = canonicalize_table_cell("x|y\r\nz");
146 let second = canonicalize_table_cell(&first);
147 assert_eq!(first, second);
148 }
149}