Skip to main content

nils_common/
markdown.rs

1use std::error::Error;
2use std::fmt;
3
4const LITERAL_ESCAPED_CONTROLS: [&str; 3] = [r"\n", r"\r", r"\t"];
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct MarkdownPayloadViolation {
8    pub sequence: &'static str,
9    pub count: usize,
10}
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct MarkdownPayloadError {
14    violations: Vec<MarkdownPayloadViolation>,
15}
16
17impl MarkdownPayloadError {
18    pub fn violations(&self) -> &[MarkdownPayloadViolation] {
19        &self.violations
20    }
21}
22
23impl fmt::Display for MarkdownPayloadError {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        let details = self
26            .violations
27            .iter()
28            .map(|entry| format!("{} ({})", entry.sequence, entry.count))
29            .collect::<Vec<_>>()
30            .join(", ");
31        write!(
32            f,
33            "markdown payload contains literal escaped-control artifacts: {details}"
34        )
35    }
36}
37
38impl Error for MarkdownPayloadError {}
39
40pub fn markdown_payload_violations(markdown: &str) -> Vec<MarkdownPayloadViolation> {
41    let mut violations = Vec::new();
42
43    for sequence in LITERAL_ESCAPED_CONTROLS {
44        let count = markdown.match_indices(sequence).count();
45        if count > 0 {
46            violations.push(MarkdownPayloadViolation { sequence, count });
47        }
48    }
49
50    violations
51}
52
53pub fn validate_markdown_payload(markdown: &str) -> Result<(), MarkdownPayloadError> {
54    let violations = markdown_payload_violations(markdown);
55    if violations.is_empty() {
56        Ok(())
57    } else {
58        Err(MarkdownPayloadError { violations })
59    }
60}
61
62pub fn canonicalize_table_cell(value: &str) -> String {
63    let mut out = String::with_capacity(value.len());
64    let mut in_line_break_run = false;
65
66    for ch in value.chars() {
67        match ch {
68            '\n' | '\r' => {
69                if !in_line_break_run {
70                    out.push(' ');
71                    in_line_break_run = true;
72                }
73            }
74            '|' => {
75                out.push('/');
76                in_line_break_run = false;
77            }
78            _ => {
79                out.push(ch);
80                in_line_break_run = false;
81            }
82        }
83    }
84
85    out
86}
87
88#[cfg(test)]
89mod tests {
90    use super::{canonicalize_table_cell, markdown_payload_violations, validate_markdown_payload};
91
92    #[test]
93    fn markdown_payload_validator_accepts_real_control_chars() {
94        let payload = "line one\nline two\tvalue\r\n";
95        let result = validate_markdown_payload(payload);
96        assert!(
97            result.is_ok(),
98            "unexpected markdown payload error: {result:?}"
99        );
100    }
101
102    #[test]
103    fn markdown_payload_validator_rejects_literal_escaped_controls() {
104        let payload = r"line one\nline two\rline three\tvalue";
105        let err = validate_markdown_payload(payload).expect_err("expected markdown payload error");
106
107        assert_eq!(err.violations().len(), 3);
108        assert!(
109            err.to_string().contains(r"\n"),
110            "expected escaped-newline mention in {:?}",
111            err
112        );
113        assert!(
114            err.to_string().contains(r"\r"),
115            "expected escaped-return mention in {:?}",
116            err
117        );
118        assert!(
119            err.to_string().contains(r"\t"),
120            "expected escaped-tab mention in {:?}",
121            err
122        );
123    }
124
125    #[test]
126    fn markdown_payload_violations_reports_counts_per_sequence() {
127        let payload = r"one\n two\n three\t";
128        let violations = markdown_payload_violations(payload);
129
130        assert_eq!(violations.len(), 2);
131        assert_eq!(violations[0].sequence, r"\n");
132        assert_eq!(violations[0].count, 2);
133        assert_eq!(violations[1].sequence, r"\t");
134        assert_eq!(violations[1].count, 1);
135    }
136
137    #[test]
138    fn canonicalize_table_cell_normalizes_markdown_unsafe_chars() {
139        let value = "A|B\r\nC\nD\rE";
140        assert_eq!(canonicalize_table_cell(value), "A/B C D E");
141    }
142
143    #[test]
144    fn canonicalize_table_cell_is_idempotent() {
145        let first = canonicalize_table_cell("x|y\r\nz");
146        let second = canonicalize_table_cell(&first);
147        assert_eq!(first, second);
148    }
149}