Skip to main content

apcore_toolkit/formatting/
markdown.rs

1// Generic dict-to-Markdown conversion with depth control and table heuristics.
2//
3// Provides `to_markdown()` — a best-effort converter for arbitrary JSON values.
4
5use std::collections::HashSet;
6
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9
10/// Options for Markdown conversion.
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct MarkdownOptions {
13    /// If provided, only include these top-level keys (order preserved).
14    pub fields: Option<Vec<String>>,
15    /// Keys to exclude at every nesting level.
16    pub exclude: Option<Vec<String>>,
17    /// Maximum nesting depth to render. Beyond this, values are shown inline.
18    pub max_depth: usize,
19    /// When a dict has at least this many keys and all values are scalars,
20    /// render as a Markdown table.
21    pub table_threshold: usize,
22    /// Optional heading prepended to output.
23    pub title: Option<String>,
24}
25
26impl Default for MarkdownOptions {
27    fn default() -> Self {
28        Self {
29            fields: None,
30            exclude: None,
31            max_depth: 3,
32            table_threshold: 5,
33            title: None,
34        }
35    }
36}
37
38/// Convert a JSON object to a Markdown string.
39///
40/// Returns an error if the input is not a JSON object.
41pub fn to_markdown(data: &Value, options: &MarkdownOptions) -> Result<String, String> {
42    let obj = data.as_object().ok_or_else(|| {
43        format!(
44            "to_markdown() expects a JSON object, got {}",
45            value_type(data)
46        )
47    })?;
48
49    let filtered = filter_keys(obj, &options.fields, &options.exclude);
50    let mut lines: Vec<String> = Vec::new();
51
52    if let Some(title) = &options.title {
53        lines.push(format!("# {title}"));
54        lines.push(String::new());
55    }
56
57    let exclude_set: HashSet<String> = options
58        .exclude
59        .as_ref()
60        .map(|v| v.iter().cloned().collect())
61        .unwrap_or_default();
62
63    render_dict(
64        &filtered,
65        &mut lines,
66        0,
67        0,
68        options.max_depth,
69        options.table_threshold,
70        &exclude_set,
71    );
72
73    let mut result = lines.join("\n");
74    result = result.trim_end_matches('\n').to_string();
75    result.push('\n');
76    Ok(result)
77}
78
79fn value_type(v: &Value) -> &'static str {
80    match v {
81        Value::Null => "null",
82        Value::Bool(_) => "bool",
83        Value::Number(_) => "number",
84        Value::String(_) => "string",
85        Value::Array(_) => "array",
86        Value::Object(_) => "object",
87    }
88}
89
90fn filter_keys(
91    obj: &serde_json::Map<String, Value>,
92    fields: &Option<Vec<String>>,
93    exclude: &Option<Vec<String>>,
94) -> Vec<(String, Value)> {
95    let mut items: Vec<(String, Value)> = if let Some(f) = fields {
96        f.iter()
97            .filter_map(|k| obj.get(k).map(|v| (k.clone(), v.clone())))
98            .collect()
99    } else {
100        obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
101    };
102
103    if let Some(ex) = exclude {
104        let ex_set: HashSet<&str> = ex.iter().map(|s| s.as_str()).collect();
105        items.retain(|(k, _)| !ex_set.contains(k.as_str()));
106    }
107
108    items
109}
110
111fn is_scalar(v: &Value) -> bool {
112    matches!(
113        v,
114        Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_)
115    )
116}
117
118fn format_scalar(v: &Value) -> String {
119    match v {
120        Value::Null => "*N/A*".into(),
121        Value::Bool(b) => {
122            if *b {
123                "Yes".into()
124            } else {
125                "No".into()
126            }
127        }
128        Value::Number(n) => {
129            if let Some(f) = n.as_f64() {
130                if f == f.trunc() && f.abs() < 1e15 {
131                    format!("{}", f as i64)
132                } else {
133                    format!("{:.4}", f)
134                }
135            } else {
136                n.to_string()
137            }
138        }
139        Value::String(s) => s.clone(),
140        _ => compact_repr(v, 80),
141    }
142}
143
144fn escape_pipe(text: &str) -> String {
145    text.replace('|', "\\|")
146}
147
148fn render_dict(
149    items: &[(String, Value)],
150    lines: &mut Vec<String>,
151    depth: usize,
152    abs_depth: usize,
153    max_depth: usize,
154    table_threshold: usize,
155    exclude: &HashSet<String>,
156) {
157    if items.is_empty() {
158        return;
159    }
160
161    let filtered: Vec<&(String, Value)> =
162        items.iter().filter(|(k, _)| !exclude.contains(k)).collect();
163
164    let all_scalar = filtered.iter().all(|(_, v)| is_scalar(v));
165
166    if all_scalar && filtered.len() >= table_threshold {
167        render_table(&filtered, lines);
168        return;
169    }
170
171    let indent = "  ".repeat(depth);
172
173    for (key, value) in &filtered {
174        if is_scalar(value) {
175            lines.push(format!("{indent}- **{key}**: {}", format_scalar(value)));
176        } else if value.is_object() {
177            if abs_depth + 1 >= max_depth {
178                lines.push(format!("{indent}- **{key}**: {}", compact_repr(value, 80)));
179            } else if depth == 0 {
180                let heading_level = (abs_depth + 2).min(6);
181                lines.push(String::new());
182                lines.push(format!("{} {key}", "#".repeat(heading_level)));
183                lines.push(String::new());
184                if let Some(obj) = value.as_object() {
185                    let sub_items: Vec<(String, Value)> =
186                        obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
187                    render_dict(
188                        &sub_items,
189                        lines,
190                        0,
191                        abs_depth + 1,
192                        max_depth,
193                        table_threshold,
194                        exclude,
195                    );
196                }
197            } else {
198                lines.push(format!("{indent}- **{key}**:"));
199                if let Some(obj) = value.as_object() {
200                    let sub_items: Vec<(String, Value)> =
201                        obj.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
202                    render_dict(
203                        &sub_items,
204                        lines,
205                        depth + 1,
206                        abs_depth + 1,
207                        max_depth,
208                        table_threshold,
209                        exclude,
210                    );
211                }
212            }
213        } else if value.is_array() {
214            if abs_depth + 1 >= max_depth {
215                lines.push(format!("{indent}- **{key}**: {}", compact_repr(value, 80)));
216            } else {
217                lines.push(format!("{indent}- **{key}**:"));
218                if let Some(arr) = value.as_array() {
219                    render_list(arr, lines, depth + 1, abs_depth + 1, max_depth, exclude);
220                }
221            }
222        } else {
223            lines.push(format!("{indent}- **{key}**: {}", format_scalar(value)));
224        }
225    }
226}
227
228fn render_list(
229    items: &[Value],
230    lines: &mut Vec<String>,
231    depth: usize,
232    abs_depth: usize,
233    max_depth: usize,
234    exclude: &HashSet<String>,
235) {
236    let indent = "  ".repeat(depth);
237
238    if items.is_empty() {
239        lines.push(format!("{indent}- *(empty)*"));
240        return;
241    }
242
243    // Homogeneous list of scalar-only dicts with uniform keys -> render as table
244    if items.len() >= 2
245        && items.iter().all(|v| v.is_object())
246        && uniform_keys(items)
247        && items.iter().all(|v| {
248            v.as_object()
249                .map(|o| o.values().all(is_scalar))
250                .unwrap_or(false)
251        })
252    {
253        render_list_table(items, lines, exclude);
254        return;
255    }
256
257    for item in items {
258        if is_scalar(item) {
259            lines.push(format!("{indent}- {}", format_scalar(item)));
260        } else if let Some(obj) = item.as_object() {
261            if abs_depth >= max_depth {
262                lines.push(format!("{indent}- {}", compact_repr(item, 80)));
263            } else {
264                // Render each dict item inline under a bullet
265                let mut first = true;
266                for (k, v) in obj {
267                    if exclude.contains(k) {
268                        continue;
269                    }
270                    let prefix = if first {
271                        first = false;
272                        format!("{indent}- ")
273                    } else {
274                        "  ".repeat(depth + 1)
275                    };
276                    if is_scalar(v) {
277                        lines.push(format!("{prefix}**{k}**: {}", format_scalar(v)));
278                    } else {
279                        lines.push(format!("{prefix}**{k}**: {}", compact_repr(v, 80)));
280                    }
281                }
282            }
283        } else if item.is_array() {
284            lines.push(format!("{indent}- {}", compact_repr(item, 80)));
285        } else {
286            lines.push(format!("{indent}- {}", format_scalar(item)));
287        }
288    }
289}
290
291/// Check if all objects in a list share the same set of keys.
292fn uniform_keys(items: &[Value]) -> bool {
293    if items.is_empty() {
294        return true;
295    }
296    let first_keys: HashSet<&str> = match items[0].as_object() {
297        Some(obj) => obj.keys().map(|k| k.as_str()).collect(),
298        None => return false,
299    };
300    items[1..].iter().all(|v| {
301        v.as_object()
302            .map(|o| {
303                let keys: HashSet<&str> = o.keys().map(|k| k.as_str()).collect();
304                keys == first_keys
305            })
306            .unwrap_or(false)
307    })
308}
309
310/// Render a list of uniform dicts as a Markdown table.
311fn render_list_table(items: &[Value], lines: &mut Vec<String>, exclude: &HashSet<String>) {
312    if items.is_empty() {
313        return;
314    }
315    let first_obj = match items[0].as_object() {
316        Some(o) => o,
317        None => return,
318    };
319    let keys: Vec<&str> = first_obj
320        .keys()
321        .map(|k| k.as_str())
322        .filter(|k| !exclude.contains(*k))
323        .collect();
324
325    lines.push(format!(
326        "| {} |",
327        keys.iter()
328            .map(|k| escape_pipe(k))
329            .collect::<Vec<_>>()
330            .join(" | ")
331    ));
332    lines.push(format!(
333        "| {} |",
334        keys.iter().map(|_| "---").collect::<Vec<_>>().join(" | ")
335    ));
336    for item in items {
337        if let Some(obj) = item.as_object() {
338            let row: Vec<String> = keys
339                .iter()
340                .map(|k| {
341                    obj.get(*k)
342                        .map(|v| escape_pipe(&format_scalar(v)))
343                        .unwrap_or_default()
344                })
345                .collect();
346            lines.push(format!("| {} |", row.join(" | ")));
347        }
348    }
349    lines.push(String::new());
350}
351
352fn render_table(items: &[&(String, Value)], lines: &mut Vec<String>) {
353    lines.push("| Field | Value |".into());
354    lines.push("|-------|-------|".into());
355    for (key, value) in items {
356        lines.push(format!(
357            "| {} | {} |",
358            escape_pipe(key),
359            escape_pipe(&format_scalar(value))
360        ));
361    }
362    lines.push(String::new());
363}
364
365fn compact_repr(value: &Value, max_len: usize) -> String {
366    let text = match value {
367        Value::Object(obj) => {
368            let parts: Vec<String> = obj
369                .iter()
370                .map(|(k, v)| format!("{k}: {}", compact_repr(v, 30)))
371                .collect();
372            format!("{{{}}}", parts.join(", "))
373        }
374        Value::Array(arr) => {
375            let parts: Vec<String> = arr.iter().map(|v| compact_repr(v, 30)).collect();
376            format!("[{}]", parts.join(", "))
377        }
378        _ => format_scalar(value),
379    };
380
381    if text.len() > max_len {
382        let truncated: String = text.chars().take(max_len - 3).collect();
383        format!("{truncated}...")
384    } else {
385        text
386    }
387}
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392    use serde_json::json;
393
394    #[test]
395    fn test_to_markdown_basic() {
396        let data = json!({"name": "Alice", "age": 30});
397        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
398        assert!(result.contains("**name**"));
399        assert!(result.contains("Alice"));
400        assert!(result.contains("**age**"));
401    }
402
403    #[test]
404    fn test_to_markdown_with_title() {
405        let data = json!({"key": "value"});
406        let opts = MarkdownOptions {
407            title: Some("My Title".into()),
408            ..Default::default()
409        };
410        let result = to_markdown(&data, &opts).unwrap();
411        assert!(result.starts_with("# My Title"));
412    }
413
414    #[test]
415    fn test_to_markdown_non_object() {
416        let data = json!("not an object");
417        let result = to_markdown(&data, &MarkdownOptions::default());
418        assert!(result.is_err());
419    }
420
421    #[test]
422    fn test_to_markdown_fields_filter() {
423        let data = json!({"a": 1, "b": 2, "c": 3});
424        let opts = MarkdownOptions {
425            fields: Some(vec!["a".into(), "c".into()]),
426            ..Default::default()
427        };
428        let result = to_markdown(&data, &opts).unwrap();
429        assert!(result.contains("**a**"));
430        assert!(result.contains("**c**"));
431        assert!(!result.contains("**b**"));
432    }
433
434    #[test]
435    fn test_to_markdown_exclude() {
436        let data = json!({"a": 1, "secret": "hidden", "c": 3});
437        let opts = MarkdownOptions {
438            exclude: Some(vec!["secret".into()]),
439            ..Default::default()
440        };
441        let result = to_markdown(&data, &opts).unwrap();
442        assert!(!result.contains("secret"));
443    }
444
445    #[test]
446    fn test_to_markdown_table_rendering() {
447        let data = json!({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5});
448        let opts = MarkdownOptions {
449            table_threshold: 5,
450            ..Default::default()
451        };
452        let result = to_markdown(&data, &opts).unwrap();
453        assert!(result.contains("| Field | Value |"));
454    }
455
456    #[test]
457    fn test_to_markdown_nested_object() {
458        let data = json!({"user": {"name": "Alice", "age": 30}});
459        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
460        assert!(result.contains("## user") || result.contains("**user**"));
461    }
462
463    #[test]
464    fn test_format_scalar_null() {
465        assert_eq!(format_scalar(&Value::Null), "*N/A*");
466    }
467
468    #[test]
469    fn test_format_scalar_bool() {
470        assert_eq!(format_scalar(&json!(true)), "Yes");
471        assert_eq!(format_scalar(&json!(false)), "No");
472    }
473
474    #[test]
475    fn test_to_markdown_empty_dict() {
476        let data = json!({});
477        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
478        assert_eq!(result, "\n");
479    }
480
481    #[test]
482    fn test_to_markdown_below_table_threshold() {
483        // 3 keys with threshold=5 should render as bullets, not a table
484        let data = json!({"a": 1, "b": 2, "c": 3});
485        let opts = MarkdownOptions {
486            table_threshold: 5,
487            ..Default::default()
488        };
489        let result = to_markdown(&data, &opts).unwrap();
490        assert!(result.contains("- **a**"));
491        assert!(!result.contains("| Field | Value |"));
492    }
493
494    #[test]
495    fn test_to_markdown_scalar_list() {
496        let data = json!({"items": ["alpha", "beta", "gamma"]});
497        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
498        assert!(result.contains("- alpha"));
499        assert!(result.contains("- beta"));
500        assert!(result.contains("- gamma"));
501    }
502
503    #[test]
504    fn test_to_markdown_empty_list() {
505        let data = json!({"items": []});
506        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
507        assert!(result.contains("*(empty)*"));
508    }
509
510    #[test]
511    fn test_to_markdown_none_renders_na() {
512        let data = json!({"value": null});
513        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
514        assert!(result.contains("*N/A*"));
515    }
516
517    #[test]
518    fn test_to_markdown_float_precision() {
519        // Whole float renders as integer
520        let data = json!({"count": 42.0});
521        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
522        assert!(result.contains("42"));
523        assert!(!result.contains("42.0"));
524
525        // Non-whole float renders with 4 decimal places
526        let data = json!({"ratio": 1.23456});
527        let result = to_markdown(&data, &MarkdownOptions::default()).unwrap();
528        assert!(result.contains("1.2346"));
529    }
530
531    #[test]
532    fn test_to_markdown_pipe_escaped() {
533        let data = json!({"a": "x|y", "b": "1", "c": "2", "d": "3", "e": "4"});
534        let opts = MarkdownOptions {
535            table_threshold: 5,
536            ..Default::default()
537        };
538        let result = to_markdown(&data, &opts).unwrap();
539        // In the table, pipe characters in values must be escaped
540        assert!(result.contains("x\\|y"));
541    }
542
543    #[test]
544    fn test_to_markdown_max_depth_1() {
545        let data = json!({"outer": {"inner": "value"}});
546        let opts = MarkdownOptions {
547            max_depth: 1,
548            ..Default::default()
549        };
550        let result = to_markdown(&data, &opts).unwrap();
551        // At max_depth=1 the nested object should be compacted inline
552        assert!(result.contains("inner: value"));
553        // Should NOT get a sub-heading for 'outer'
554        assert!(!result.contains("## outer"));
555    }
556
557    #[test]
558    fn test_to_markdown_deeply_nested() {
559        let data = json!({"l1": {"l2": {"l3": {"l4": "deep"}}}});
560        let opts = MarkdownOptions {
561            max_depth: 2,
562            ..Default::default()
563        };
564        let result = to_markdown(&data, &opts).unwrap();
565        // l2 is at abs_depth=1, l3 would be abs_depth=2 which equals max_depth, so compacted
566        assert!(result.contains("l3:"));
567        // The deeply nested structure should not be fully expanded
568        assert!(!result.contains("## l3"));
569    }
570
571    #[test]
572    fn test_compact_repr_truncation() {
573        let long_value = json!({"key": "a]".repeat(50)});
574        let result = compact_repr(&long_value, 20);
575        assert!(result.len() <= 20);
576        assert!(result.ends_with("..."));
577    }
578}