Skip to main content

apcore_toolkit/formatting/
markdown.rs

1// Generic dict-to-Markdown conversion with depth control and table heuristics.
2//
3// Provides `to_markdown()` — a best-effort converter for arbitrary JSON values.
4
5use std::collections::HashSet;
6
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9use thiserror::Error;
10
11/// Absolute ceiling on recursion depth, regardless of caller-supplied
12/// `MarkdownOptions::max_depth`. Guards against stack overflow when a
13/// caller passes `usize::MAX` (or a similarly large value) together with
14/// a deeply-nested Value. The rendering routines recurse once per nested
15/// object / array, so this bounds the Rust call stack at roughly 32
16/// frames per `to_markdown` invocation plus constant overhead.
17const MAX_DEPTH_HARD_CAP: usize = 32;
18
19/// Options for Markdown conversion.
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct MarkdownOptions {
22    /// If provided, only include these top-level keys (order preserved).
23    pub fields: Option<Vec<String>>,
24    /// Keys to exclude at every nesting level.
25    pub exclude: Option<Vec<String>>,
26    /// Maximum nesting depth to render. Beyond this, values are shown inline.
27    pub max_depth: usize,
28    /// When a dict has at least this many keys and all values are scalars,
29    /// render as a Markdown table.
30    pub table_threshold: usize,
31    /// Optional heading prepended to output.
32    pub title: Option<String>,
33}
34
35impl Default for MarkdownOptions {
36    fn default() -> Self {
37        Self {
38            fields: None,
39            exclude: None,
40            max_depth: 3,
41            table_threshold: 5,
42            title: None,
43        }
44    }
45}
46
47/// Error returned by [`to_markdown`].
48#[derive(Debug, Error)]
49pub enum MarkdownError {
50    #[error("to_markdown() expects a JSON object, got {0}")]
51    NotAnObject(String),
52}
53
54/// Convert a JSON object to a Markdown string.
55///
56/// Returns an error if the input is not a JSON object.
57pub fn to_markdown(data: &Value, options: &MarkdownOptions) -> Result<String, MarkdownError> {
58    let obj = data
59        .as_object()
60        .ok_or_else(|| MarkdownError::NotAnObject(value_type(data).to_string()))?;
61
62    let filtered = filter_keys(obj, &options.fields, &options.exclude);
63    let mut lines: Vec<String> = Vec::new();
64
65    if let Some(title) = &options.title {
66        lines.push(format!("# {title}"));
67        lines.push(String::new());
68    }
69
70    let exclude_set: HashSet<String> = options
71        .exclude
72        .as_ref()
73        .map(|v| v.iter().cloned().collect())
74        .unwrap_or_default();
75
76    // Clamp the caller's max_depth to MAX_DEPTH_HARD_CAP to prevent
77    // stack overflow on adversarial input (e.g. max_depth = usize::MAX
78    // combined with a pathologically nested Value). Renderers further
79    // downstream trust this bound and do not re-clamp.
80    let effective_max_depth = options.max_depth.min(MAX_DEPTH_HARD_CAP);
81
82    render_dict(
83        &filtered,
84        &mut lines,
85        0,
86        0,
87        effective_max_depth,
88        options.table_threshold,
89        &exclude_set,
90    );
91
92    let mut result = lines.join("\n");
93    result = result.trim_end_matches('\n').to_string();
94    result.push('\n');
95    Ok(result)
96}
97
98fn value_type(v: &Value) -> &'static str {
99    match v {
100        Value::Null => "null",
101        Value::Bool(_) => "bool",
102        Value::Number(_) => "number",
103        Value::String(_) => "string",
104        Value::Array(_) => "array",
105        Value::Object(_) => "object",
106    }
107}
108
109fn filter_keys(
110    obj: &serde_json::Map<String, Value>,
111    fields: &Option<Vec<String>>,
112    exclude: &Option<Vec<String>>,
113) -> Vec<(String, Value)> {
114    let mut items: Vec<(String, Value)> = if let Some(f) = fields {
115        f.iter()
116            .filter_map(|k| obj.get(k).map(|v| (k.clone(), v.clone())))
117            .collect()
118    } else {
119        obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
120    };
121
122    if let Some(ex) = exclude {
123        let ex_set: HashSet<&str> = ex.iter().map(|s| s.as_str()).collect();
124        items.retain(|(k, _)| !ex_set.contains(k.as_str()));
125    }
126
127    items
128}
129
130fn is_scalar(v: &Value) -> bool {
131    matches!(
132        v,
133        Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_)
134    )
135}
136
137fn format_scalar(v: &Value) -> String {
138    match v {
139        Value::Null => "*N/A*".into(),
140        Value::Bool(b) => {
141            if *b {
142                "Yes".into()
143            } else {
144                "No".into()
145            }
146        }
147        Value::Number(n) => {
148            if let Some(f) = n.as_f64() {
149                if f == f.trunc() && f.abs() < 1e15 {
150                    format!("{}", f as i64)
151                } else {
152                    format!("{:.4}", f)
153                }
154            } else {
155                n.to_string()
156            }
157        }
158        Value::String(s) => s.clone(),
159        _ => compact_repr(v, 80),
160    }
161}
162
163fn escape_pipe(text: &str) -> String {
164    text.replace('|', "\\|")
165}
166
167fn render_dict(
168    items: &[(String, Value)],
169    lines: &mut Vec<String>,
170    depth: usize,
171    abs_depth: usize,
172    max_depth: usize,
173    table_threshold: usize,
174    exclude: &HashSet<String>,
175) {
176    if items.is_empty() {
177        return;
178    }
179
180    let filtered: Vec<&(String, Value)> =
181        items.iter().filter(|(k, _)| !exclude.contains(k)).collect();
182
183    let all_scalar = filtered.iter().all(|(_, v)| is_scalar(v));
184
185    if all_scalar && filtered.len() >= table_threshold {
186        render_table(&filtered, lines);
187        return;
188    }
189
190    let indent = "  ".repeat(depth);
191
192    for (key, value) in &filtered {
193        if is_scalar(value) {
194            lines.push(format!("{indent}- **{key}**: {}", format_scalar(value)));
195        } else if value.is_object() {
196            if abs_depth + 1 >= max_depth {
197                lines.push(format!("{indent}- **{key}**: {}", compact_repr(value, 80)));
198            } else if depth == 0 {
199                let heading_level = (abs_depth + 2).min(6);
200                lines.push(String::new());
201                lines.push(format!("{} {key}", "#".repeat(heading_level)));
202                lines.push(String::new());
203                if let Some(obj) = value.as_object() {
204                    let sub_items: Vec<(String, Value)> =
205                        obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
206                    render_dict(
207                        &sub_items,
208                        lines,
209                        0,
210                        abs_depth + 1,
211                        max_depth,
212                        table_threshold,
213                        exclude,
214                    );
215                }
216            } else {
217                lines.push(format!("{indent}- **{key}**:"));
218                if let Some(obj) = value.as_object() {
219                    let sub_items: Vec<(String, Value)> =
220                        obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
221                    render_dict(
222                        &sub_items,
223                        lines,
224                        depth + 1,
225                        abs_depth + 1,
226                        max_depth,
227                        table_threshold,
228                        exclude,
229                    );
230                }
231            }
232        } else if value.is_array() {
233            if abs_depth + 1 >= max_depth {
234                lines.push(format!("{indent}- **{key}**: {}", compact_repr(value, 80)));
235            } else {
236                lines.push(format!("{indent}- **{key}**:"));
237                if let Some(arr) = value.as_array() {
238                    render_list(arr, lines, depth + 1, abs_depth + 1, max_depth, exclude);
239                }
240            }
241        } else {
242            lines.push(format!("{indent}- **{key}**: {}", format_scalar(value)));
243        }
244    }
245}
246
247fn render_list(
248    items: &[Value],
249    lines: &mut Vec<String>,
250    depth: usize,
251    abs_depth: usize,
252    max_depth: usize,
253    exclude: &HashSet<String>,
254) {
255    let indent = "  ".repeat(depth);
256
257    if items.is_empty() {
258        lines.push(format!("{indent}- *(empty)*"));
259        return;
260    }
261
262    // Homogeneous list of scalar-only dicts with uniform keys -> render as table
263    if items.len() >= 2
264        && items.iter().all(|v| v.is_object())
265        && uniform_keys(items)
266        && items.iter().all(|v| {
267            v.as_object()
268                .map(|o| o.values().all(is_scalar))
269                .unwrap_or(false)
270        })
271    {
272        render_list_table(items, lines, exclude);
273        return;
274    }
275
276    for item in items {
277        if is_scalar(item) {
278            lines.push(format!("{indent}- {}", format_scalar(item)));
279        } else if let Some(obj) = item.as_object() {
280            if abs_depth >= max_depth {
281                lines.push(format!("{indent}- {}", compact_repr(item, 80)));
282            } else {
283                // Render each dict item inline under a bullet
284                let mut first = true;
285                for (k, v) in obj {
286                    if exclude.contains(k) {
287                        continue;
288                    }
289                    let prefix = if first {
290                        first = false;
291                        format!("{indent}- ")
292                    } else {
293                        "  ".repeat(depth + 1)
294                    };
295                    if is_scalar(v) {
296                        lines.push(format!("{prefix}**{k}**: {}", format_scalar(v)));
297                    } else {
298                        lines.push(format!("{prefix}**{k}**: {}", compact_repr(v, 80)));
299                    }
300                }
301            }
302        } else if item.is_array() {
303            lines.push(format!("{indent}- {}", compact_repr(item, 80)));
304        } else {
305            lines.push(format!("{indent}- {}", format_scalar(item)));
306        }
307    }
308}
309
310/// Check if all objects in a list share the same set of keys.
311fn uniform_keys(items: &[Value]) -> bool {
312    if items.is_empty() {
313        return true;
314    }
315    let first_keys: HashSet<&str> = match items[0].as_object() {
316        Some(obj) => obj.keys().map(|k| k.as_str()).collect(),
317        None => return false,
318    };
319    items[1..].iter().all(|v| {
320        v.as_object()
321            .map(|o| {
322                let keys: HashSet<&str> = o.keys().map(|k| k.as_str()).collect();
323                keys == first_keys
324            })
325            .unwrap_or(false)
326    })
327}
328
329/// Render a list of uniform dicts as a Markdown table.
330fn render_list_table(items: &[Value], lines: &mut Vec<String>, exclude: &HashSet<String>) {
331    if items.is_empty() {
332        return;
333    }
334    let first_obj = match items[0].as_object() {
335        Some(o) => o,
336        None => return,
337    };
338    let keys: Vec<&str> = first_obj
339        .keys()
340        .map(|k| k.as_str())
341        .filter(|k| !exclude.contains(*k))
342        .collect();
343
344    lines.push(format!(
345        "| {} |",
346        keys.iter()
347            .map(|k| escape_pipe(k))
348            .collect::<Vec<_>>()
349            .join(" | ")
350    ));
351    lines.push(format!(
352        "| {} |",
353        keys.iter().map(|_| "---").collect::<Vec<_>>().join(" | ")
354    ));
355    for item in items {
356        if let Some(obj) = item.as_object() {
357            let row: Vec<String> = keys
358                .iter()
359                .map(|k| {
360                    obj.get(*k)
361                        .map(|v| escape_pipe(&format_scalar(v)))
362                        .unwrap_or_default()
363                })
364                .collect();
365            lines.push(format!("| {} |", row.join(" | ")));
366        }
367    }
368    lines.push(String::new());
369}
370
371fn render_table(items: &[&(String, Value)], lines: &mut Vec<String>) {
372    lines.push("| Field | Value |".into());
373    lines.push("|-------|-------|".into());
374    for (key, value) in items {
375        lines.push(format!(
376            "| {} | {} |",
377            escape_pipe(key),
378            escape_pipe(&format_scalar(value))
379        ));
380    }
381    lines.push(String::new());
382}
383
384fn compact_repr(value: &Value, max_len: usize) -> String {
385    compact_repr_inner(value, max_len, 0)
386}
387
388fn compact_repr_inner(value: &Value, max_len: usize, depth: usize) -> String {
389    // Guard against deeply-nested values reaching compact_repr after the
390    // primary render_dict/render_list cap fires.  Without this, adversarial
391    // input can still stack-overflow inside compact_repr even though
392    // MAX_DEPTH_HARD_CAP is enforced at the top level.
393    if depth >= MAX_DEPTH_HARD_CAP {
394        return match value {
395            Value::Object(_) => "{...}".into(),
396            Value::Array(_) => "[...]".into(),
397            _ => format_scalar(value),
398        };
399    }
400
401    let text = match value {
402        Value::Object(obj) => {
403            let parts: Vec<String> = obj
404                .iter()
405                .map(|(k, v)| format!("{k}: {}", compact_repr_inner(v, 30, depth + 1)))
406                .collect();
407            format!("{{{}}}", parts.join(", "))
408        }
409        Value::Array(arr) => {
410            let parts: Vec<String> = arr
411                .iter()
412                .map(|v| compact_repr_inner(v, 30, depth + 1))
413                .collect();
414            format!("[{}]", parts.join(", "))
415        }
416        _ => format_scalar(value),
417    };
418
419    if text.len() > max_len {
420        let truncated: String = text.chars().take(max_len - 3).collect();
421        format!("{truncated}...")
422    } else {
423        text
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use super::*;
430    use serde_json::json;
431
432    #[test]
433    fn test_to_markdown_basic() {
434        let data = json!({"name": "Alice", "age": 30});
435        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
436        assert!(result.contains("**name**"));
437        assert!(result.contains("Alice"));
438        assert!(result.contains("**age**"));
439    }
440
441    #[test]
442    fn test_to_markdown_with_title() {
443        let data = json!({"key": "value"});
444        let opts = MarkdownOptions {
445            title: Some("My Title".into()),
446            ..Default::default()
447        };
448        let result = to_markdown(&data, &opts).unwrap();
449        assert!(result.starts_with("# My Title"));
450    }
451
452    #[test]
453    fn test_to_markdown_non_object() {
454        let data = json!("not an object");
455        let result = to_markdown(&data, &MarkdownOptions::default());
456        assert!(result.is_err());
457    }
458
459    #[test]
460    fn test_to_markdown_fields_filter() {
461        let data = json!({"a": 1, "b": 2, "c": 3});
462        let opts = MarkdownOptions {
463            fields: Some(vec!["a".into(), "c".into()]),
464            ..Default::default()
465        };
466        let result = to_markdown(&data, &opts).unwrap();
467        assert!(result.contains("**a**"));
468        assert!(result.contains("**c**"));
469        assert!(!result.contains("**b**"));
470    }
471
472    #[test]
473    fn test_to_markdown_exclude() {
474        let data = json!({"a": 1, "secret": "hidden", "c": 3});
475        let opts = MarkdownOptions {
476            exclude: Some(vec!["secret".into()]),
477            ..Default::default()
478        };
479        let result = to_markdown(&data, &opts).unwrap();
480        assert!(!result.contains("secret"));
481    }
482
483    #[test]
484    fn test_to_markdown_table_rendering() {
485        let data = json!({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5});
486        let opts = MarkdownOptions {
487            table_threshold: 5,
488            ..Default::default()
489        };
490        let result = to_markdown(&data, &opts).unwrap();
491        assert!(result.contains("| Field | Value |"));
492    }
493
494    #[test]
495    fn test_to_markdown_nested_object() {
496        let data = json!({"user": {"name": "Alice", "age": 30}});
497        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
498        assert!(result.contains("## user") || result.contains("**user**"));
499    }
500
501    #[test]
502    fn test_format_scalar_null() {
503        assert_eq!(format_scalar(&Value::Null), "*N/A*");
504    }
505
506    #[test]
507    fn test_format_scalar_bool() {
508        assert_eq!(format_scalar(&json!(true)), "Yes");
509        assert_eq!(format_scalar(&json!(false)), "No");
510    }
511
512    #[test]
513    fn test_to_markdown_empty_dict() {
514        let data = json!({});
515        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
516        assert_eq!(result, "\n");
517    }
518
519    #[test]
520    fn test_to_markdown_below_table_threshold() {
521        // 3 keys with threshold=5 should render as bullets, not a table
522        let data = json!({"a": 1, "b": 2, "c": 3});
523        let opts = MarkdownOptions {
524            table_threshold: 5,
525            ..Default::default()
526        };
527        let result = to_markdown(&data, &opts).unwrap();
528        assert!(result.contains("- **a**"));
529        assert!(!result.contains("| Field | Value |"));
530    }
531
532    #[test]
533    fn test_to_markdown_scalar_list() {
534        let data = json!({"items": ["alpha", "beta", "gamma"]});
535        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
536        assert!(result.contains("- alpha"));
537        assert!(result.contains("- beta"));
538        assert!(result.contains("- gamma"));
539    }
540
541    #[test]
542    fn test_to_markdown_empty_list() {
543        let data = json!({"items": []});
544        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
545        assert!(result.contains("*(empty)*"));
546    }
547
548    #[test]
549    fn test_to_markdown_none_renders_na() {
550        let data = json!({"value": null});
551        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
552        assert!(result.contains("*N/A*"));
553    }
554
555    #[test]
556    fn test_to_markdown_float_precision() {
557        // Whole float renders as integer
558        let data = json!({"count": 42.0});
559        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
560        assert!(result.contains("42"));
561        assert!(!result.contains("42.0"));
562
563        // Non-whole float renders with 4 decimal places
564        let data = json!({"ratio": 1.23456});
565        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
566        assert!(result.contains("1.2346"));
567    }
568
569    #[test]
570    fn test_to_markdown_pipe_escaped() {
571        let data = json!({"a": "x|y", "b": "1", "c": "2", "d": "3", "e": "4"});
572        let opts = MarkdownOptions {
573            table_threshold: 5,
574            ..Default::default()
575        };
576        let result = to_markdown(&data, &opts).unwrap();
577        // In the table, pipe characters in values must be escaped
578        assert!(result.contains("x\\|y"));
579    }
580
581    #[test]
582    fn test_to_markdown_max_depth_1() {
583        let data = json!({"outer": {"inner": "value"}});
584        let opts = MarkdownOptions {
585            max_depth: 1,
586            ..Default::default()
587        };
588        let result = to_markdown(&data, &opts).unwrap();
589        // At max_depth=1 the nested object should be compacted inline
590        assert!(result.contains("inner: value"));
591        // Should NOT get a sub-heading for 'outer'
592        assert!(!result.contains("## outer"));
593    }
594
595    #[test]
596    fn test_to_markdown_deeply_nested() {
597        let data = json!({"l1": {"l2": {"l3": {"l4": "deep"}}}});
598        let opts = MarkdownOptions {
599            max_depth: 2,
600            ..Default::default()
601        };
602        let result = to_markdown(&data, &opts).unwrap();
603        // l2 is at abs_depth=1, l3 would be abs_depth=2 which equals max_depth, so compacted
604        assert!(result.contains("l3:"));
605        // The deeply nested structure should not be fully expanded
606        assert!(!result.contains("## l3"));
607    }
608
609    #[test]
610    fn test_compact_repr_truncation() {
611        let long_value = json!({"key": "a]".repeat(50)});
612        let result = compact_repr(&long_value, 20);
613        assert!(result.len() <= 20);
614        assert!(result.ends_with("..."));
615    }
616
617    /// Regression guard: even with `max_depth = usize::MAX` and a deeply
618    /// nested Value, `to_markdown` must never stack-overflow.  The guard
619    /// covers both the primary render_dict/render_list recursion (bounded
620    /// by MAX_DEPTH_HARD_CAP) AND compact_repr (which is the terminal
621    /// renderer once the primary cap fires — also now bounded).
622    /// Uses 40 levels — enough to exercise the MAX_DEPTH_HARD_CAP=32 cap
623    /// without building a stack-overflowing Value on the test thread.
624    #[test]
625    fn test_to_markdown_deep_recursion_bounded() {
626        // Build a 40-level-deep nested object (well above MAX_DEPTH_HARD_CAP=32).
627        let mut data = json!({"leaf": "bottom"});
628        for i in 0..40 {
629            let key = format!("lvl_{i}");
630            data = json!({ key: data });
631        }
632
633        let opts = MarkdownOptions {
634            max_depth: usize::MAX,
635            ..Default::default()
636        };
637
638        // Must return Ok without stack-overflow.
639        let result = to_markdown(&data, &opts);
640        assert!(
641            result.is_ok(),
642            "to_markdown must not panic on deeply-nested input; got: {result:?}",
643        );
644    }
645
646    #[test]
647    fn test_max_depth_clamp_at_hard_cap() {
648        // Caller-supplied max_depth beyond MAX_DEPTH_HARD_CAP must produce
649        // the same output as max_depth = MAX_DEPTH_HARD_CAP for the same
650        // input. Build a 40-level nest; rendering with max_depth=1000 and
651        // max_depth=32 should match.
652        let mut data = json!({"leaf": "v"});
653        for i in 0..40 {
654            let key = format!("k{i}");
655            data = json!({ key: data });
656        }
657
658        let a = to_markdown(
659            &data,
660            &MarkdownOptions {
661                max_depth: MAX_DEPTH_HARD_CAP,
662                ..Default::default()
663            },
664        )
665        .unwrap();
666        let b = to_markdown(
667            &data,
668            &MarkdownOptions {
669                max_depth: 1000,
670                ..Default::default()
671            },
672        )
673        .unwrap();
674        assert_eq!(a, b);
675    }
676}