Skip to main content

nils_common/
markdown.rs

1use std::error::Error;
2use std::fmt;
3
4const LITERAL_ESCAPED_CONTROLS: [&str; 3] = [r"\n", r"\r", r"\t"];
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub struct MarkdownPayloadViolation {
8    pub sequence: &'static str,
9    pub count: usize,
10}
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct MarkdownPayloadError {
14    violations: Vec<MarkdownPayloadViolation>,
15}
16
17impl MarkdownPayloadError {
18    pub fn violations(&self) -> &[MarkdownPayloadViolation] {
19        &self.violations
20    }
21}
22
23impl fmt::Display for MarkdownPayloadError {
24    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
25        let details = self
26            .violations
27            .iter()
28            .map(|entry| format!("{} ({})", entry.sequence, entry.count))
29            .collect::<Vec<_>>()
30            .join(", ");
31        write!(
32            f,
33            "markdown payload contains literal escaped-control artifacts: {details}"
34        )
35    }
36}
37
38impl Error for MarkdownPayloadError {}
39
40pub fn markdown_payload_violations(markdown: &str) -> Vec<MarkdownPayloadViolation> {
41    let mut violations = Vec::new();
42
43    for sequence in LITERAL_ESCAPED_CONTROLS {
44        let count = markdown.match_indices(sequence).count();
45        if count > 0 {
46            violations.push(MarkdownPayloadViolation { sequence, count });
47        }
48    }
49
50    violations
51}
52
53pub fn validate_markdown_payload(markdown: &str) -> Result<(), MarkdownPayloadError> {
54    let violations = markdown_payload_violations(markdown);
55    if violations.is_empty() {
56        Ok(())
57    } else {
58        Err(MarkdownPayloadError { violations })
59    }
60}
61
62pub fn canonicalize_table_cell(value: &str) -> String {
63    let mut out = String::with_capacity(value.len());
64    let mut in_line_break_run = false;
65
66    for ch in value.chars() {
67        match ch {
68            '\n' | '\r' => {
69                if !in_line_break_run {
70                    out.push(' ');
71                    in_line_break_run = true;
72                }
73            }
74            '|' => {
75                out.push('/');
76                in_line_break_run = false;
77            }
78            _ => {
79                out.push(ch);
80                in_line_break_run = false;
81            }
82        }
83    }
84
85    out
86}
87
88fn sort_json(value: &serde_json::Value) -> serde_json::Value {
89    match value {
90        serde_json::Value::Object(map) => {
91            let mut keys: Vec<&String> = map.keys().collect();
92            keys.sort();
93            let mut out = serde_json::Map::new();
94            for k in keys {
95                let v = map.get(k).expect("key exists");
96                out.insert(k.clone(), sort_json(v));
97            }
98            serde_json::Value::Object(out)
99        }
100        serde_json::Value::Array(values) => {
101            serde_json::Value::Array(values.iter().map(sort_json).collect())
102        }
103        other => other.clone(),
104    }
105}
106
107/// Format JSON similar to `jq -S .` (stable key order, pretty printed).
108pub fn format_json_pretty_sorted(value: &serde_json::Value) -> Result<String, serde_json::Error> {
109    let sorted = sort_json(value);
110    serde_json::to_string_pretty(&sorted)
111}
112
113pub fn heading(level: u8, text: &str) -> String {
114    let level = level.clamp(1, 6);
115    format!("{} {}\n", "#".repeat(level.into()), text.trim())
116}
117
118pub fn code_block(lang: &str, body: &str) -> String {
119    let mut out = String::new();
120    out.push_str("```");
121    out.push_str(lang.trim());
122    out.push('\n');
123    out.push_str(body);
124    if !body.ends_with('\n') {
125        out.push('\n');
126    }
127    out.push_str("```\n");
128    out
129}
130
131#[cfg(test)]
132mod tests {
133    use super::{
134        canonicalize_table_cell, code_block, format_json_pretty_sorted, heading,
135        markdown_payload_violations, validate_markdown_payload,
136    };
137
138    #[test]
139    fn markdown_payload_validator_accepts_real_control_chars() {
140        let payload = "line one\nline two\tvalue\r\n";
141        let result = validate_markdown_payload(payload);
142        assert!(
143            result.is_ok(),
144            "unexpected markdown payload error: {result:?}"
145        );
146    }
147
148    #[test]
149    fn markdown_payload_validator_rejects_literal_escaped_controls() {
150        let payload = r"line one\nline two\rline three\tvalue";
151        let err = validate_markdown_payload(payload).expect_err("expected markdown payload error");
152
153        assert_eq!(err.violations().len(), 3);
154        assert!(
155            err.to_string().contains(r"\n"),
156            "expected escaped-newline mention in {:?}",
157            err
158        );
159        assert!(
160            err.to_string().contains(r"\r"),
161            "expected escaped-return mention in {:?}",
162            err
163        );
164        assert!(
165            err.to_string().contains(r"\t"),
166            "expected escaped-tab mention in {:?}",
167            err
168        );
169    }
170
171    #[test]
172    fn markdown_payload_violations_reports_counts_per_sequence() {
173        let payload = r"one\n two\n three\t";
174        let violations = markdown_payload_violations(payload);
175
176        assert_eq!(violations.len(), 2);
177        assert_eq!(violations[0].sequence, r"\n");
178        assert_eq!(violations[0].count, 2);
179        assert_eq!(violations[1].sequence, r"\t");
180        assert_eq!(violations[1].count, 1);
181    }
182
183    #[test]
184    fn canonicalize_table_cell_normalizes_markdown_unsafe_chars() {
185        let value = "A|B\r\nC\nD\rE";
186        assert_eq!(canonicalize_table_cell(value), "A/B C D E");
187    }
188
189    #[test]
190    fn canonicalize_table_cell_is_idempotent() {
191        let first = canonicalize_table_cell("x|y\r\nz");
192        let second = canonicalize_table_cell(&first);
193        assert_eq!(first, second);
194    }
195
196    #[test]
197    fn markdown_code_block_is_newline_stable() {
198        assert_eq!(code_block("json", "{ }"), "```json\n{ }\n```\n");
199        assert_eq!(code_block("json", "{ }\n"), "```json\n{ }\n```\n");
200    }
201
202    #[test]
203    fn markdown_heading_trims_and_clamps_level() {
204        assert_eq!(heading(1, " Title "), "# Title\n");
205        assert_eq!(heading(9, "Title"), "###### Title\n");
206    }
207
208    #[test]
209    fn json_format_sorts_keys_recursively() {
210        let v = serde_json::json!({"b": 1, "a": {"d": 4, "c": 3}});
211        let s = format_json_pretty_sorted(&v).expect("sorted json");
212        assert_eq!(
213            s,
214            "{\n  \"a\": {\n    \"c\": 3,\n    \"d\": 4\n  },\n  \"b\": 1\n}"
215        );
216    }
217}