json2arkdown/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
use fancy_regex::Regex;
use inflections::Inflect;
use once_cell::sync::Lazy;
use serde_json::{Map, Value};
use std::borrow::Cow;

/// A static cached regex that splits at a period only if it’s followed by whitespace
/// that is not immediately followed by an uppercase letter and a dot.
static SPLIT_REGEX: Lazy<Regex> = Lazy::new(|| {
    // This regex uses lookahead assertions supported by fancy_regex.
    Regex::new(r"\.(?=\s+(?![A-Z]\.))")
        .unwrap_or_else(|e| panic!("regex failed to build, error: {e}"))
});

/// Enum to represent different Markdown rendering styles.
#[derive(Clone, Copy, Debug)]
enum RenderStyle {
    /// Root-level rendering style.
    Root,
    /// Section-level rendering style (e.g., first-level headers).
    Section,
    /// Subsection-level rendering style (e.g., second-level headers).
    Subsection,
    /// List item rendering style.
    ListItem,
    /// Nested item rendering style (e.g., items inside a list).
    NestedItem,
}

#[derive(Clone, Copy, Debug)]
pub struct MarkdownRenderer {
    /// Number of spaces used for indentation in the rendered Markdown.
    indent_spaces: usize,
    /// Increment in depth for nested structures.
    depth_increment: usize,
}

impl Default for MarkdownRenderer {
    fn default() -> Self {
        Self {
            indent_spaces: 1,
            depth_increment: 2,
        }
    }
}

impl MarkdownRenderer {
    /// Creates a new `MarkdownRenderer`.
    ///
    /// # Arguments
    ///
    /// * `indent_spaces` - Number of spaces to use for indentation.
    /// * `depth_increment` - Increment to apply for nested structures.
    ///
    /// # Examples
    ///
    /// ```
    /// let renderer = MarkdownRenderer::new(1, 2);
    /// ```
    #[must_use]
    pub const fn new(indent_spaces: usize, depth_increment: usize) -> Self {
        MarkdownRenderer {
            indent_spaces,
            depth_increment,
        }
    }

    /// Renders a JSON value into a Markdown string.
    ///
    /// # Arguments
    ///
    /// * `json` - The JSON value to render.
    ///
    /// # Errors
    ///
    ///
    /// # Examples
    ///
    /// ```
    /// let renderer = MarkdownRenderer::new(1, 2);
    /// let json = serde_json::json!({"title": "My Document"});
    /// let markdown = renderer.render(&json);
    /// ```
    #[must_use]
    pub fn render(&self, json: &Value) -> String {
        let mut output = String::with_capacity(4096); // Pre-allocate memory for large JSON
        self.render_value(json, 0, RenderStyle::Root, &mut output, false);
        output
    }

    /// Handles rendering of different JSON values based on their type.
    ///
    /// # Arguments
    ///
    /// * `value` - The JSON value to render.
    /// * `depth` - Current depth level in the hierarchy.
    /// * `style` - Current rendering style.
    /// * `output` - The output buffer to write the rendered Markdown.
    fn render_value(
        &self,
        value: &Value,
        depth: usize,
        style: RenderStyle,
        output: &mut String,
        written_before: bool,
    ) {
        match value {
            Value::Object(obj) => self.render_object(obj, depth, style, output),
            Value::Array(arr) => self.render_array(arr, depth, style, output),
            Value::String(s) => format_value(s, style, output, written_before),
            Value::Number(n) => format_value(&n.to_string(), style, output, written_before),
            Value::Bool(b) => format_value(&b.to_string(), style, output, written_before),
            Value::Null => format_value("N/A", style, output, written_before),
        }
    }

    fn render_object(
        &self,
        obj: &Map<String, Value>,
        depth: usize,
        style: RenderStyle,
        output: &mut String,
    ) {
        let indent = self.get_indent(depth);

        for (key, value) in obj {
            let (new_style, header_marker, depth_increment) = match (depth, style) {
                (0, RenderStyle::Root) => (RenderStyle::Section, "## ", 0),
                (1, RenderStyle::Section) => (RenderStyle::Subsection, "### ", 0),
                _ => (RenderStyle::ListItem, "", self.depth_increment),
            };

            let formatted_key = match new_style {
                RenderStyle::Section | RenderStyle::Subsection => {
                    format!("{indent}{header_marker}{}\n\n", key.to_title_case())
                }
                RenderStyle::ListItem => format!("{indent}- **{}**", key.to_title_case()),
                _ => key.to_title_case(),
            };

            output.push_str(&formatted_key);

            match value {
                Value::Object(inner_obj) if !inner_obj.is_empty() => {
                    output.push_str("\n\n");
                    self.render_object(inner_obj, depth + depth_increment, new_style, output);
                }
                Value::Array(arr) if !arr.is_empty() => {
                    output.push_str("\n\n");
                    self.render_array(
                        arr,
                        depth + depth_increment,
                        RenderStyle::NestedItem,
                        output,
                    );
                    output.push_str("\n\n");
                }
                Value::String(value) => {
                    output.push_str("\n\n");

                    // we don't touch it if it's a url
                    let s = if value.starts_with("http") {
                        value.to_string()
                    } else {
                        let is_in_root = depth_increment == 0;
                        let adjusted_depth = if is_in_root { 0 } else { depth + 2 };

                        let formatted = self.split_at_period(value, adjusted_depth);

                        match formatted {
                            Cow::Owned(owned_s) => owned_s,
                            // we don't add the indent if we are in the root
                            Cow::Borrowed(s) if is_in_root => s.to_string(),
                            Cow::Borrowed(s) => format!("{indent}{s}"),
                        }
                    };

                    output.push_str(&s);
                    output.push('\n');
                }
                _ => {
                    self.render_value(
                        value,
                        depth + depth_increment,
                        RenderStyle::NestedItem,
                        output,
                        true,
                    );
                }
            }
        }
    }

    fn render_array(&self, arr: &[Value], depth: usize, style: RenderStyle, output: &mut String) {
        let indent = self.get_indent(depth);

        for item in arr {
            let marker = match style {
                RenderStyle::NestedItem => "  - ",
                _ => "- ",
            };

            // we only want to do a '-' if it's not an object or an array
            let mut do_hyphen = || output.push_str(&format!("{indent}{marker}"));

            match item {
                Value::Object(obj) if !obj.is_empty() => {
                    self.render_object(
                        obj,
                        depth + self.depth_increment,
                        RenderStyle::NestedItem,
                        output,
                    );
                }
                Value::Array(inner_arr) if !inner_arr.is_empty() => {
                    self.render_array(
                        inner_arr,
                        depth + self.depth_increment,
                        RenderStyle::NestedItem,
                        output,
                    );
                }
                Value::String(s) => {
                    do_hyphen();
                    output.push_str(&format!("{s}\n"));
                }
                _ => {
                    do_hyphen();
                    self.render_value(
                        item,
                        depth + self.depth_increment,
                        RenderStyle::NestedItem,
                        output,
                        false,
                    );
                }
            }
        }
    }

    fn get_indent(&self, depth: usize) -> String {
        " ".repeat(depth * self.indent_spaces)
    }

    /// splits the strings at '.' and adds 2 new lines for readability, we return the given if
    /// there is no '.'
    fn split_at_period<'a>(&self, text: &'a str, depth: usize) -> Cow<'a, str> {
        let indent = self.get_indent(depth);

        if !SPLIT_REGEX.is_match(text).is_ok_and(|b| b) {
            return Cow::Borrowed(text);
        }

        // Split using the regex.
        let splitted = SPLIT_REGEX
            .split(text)
            .collect::<Result<Vec<_>, _>>()
            .unwrap_or_else(|e| panic!("regex failed to split, error: {e}"));

        let capacity = (splitted.len() * indent.len() + 4) + text.len();

        Cow::Owned(
            splitted
                .into_iter()
                .fold(String::with_capacity(capacity), |mut acc, part| {
                    acc.push_str(&indent);
                    acc.push_str(part.trim());
                    acc.push_str("\n\n");
                    acc
                }),
        )
    }
}

fn format_value(value: &str, style: RenderStyle, output: &mut String, written_before: bool) {
    // we don't want to do ": " if there is nothing before
    let before_value = if written_before { ": " } else { "" };

    match style {
        RenderStyle::ListItem | RenderStyle::NestedItem => {
            output.push_str(&format!("{before_value}{value}\n"));
        }
        RenderStyle::Root | RenderStyle::Section | RenderStyle::Subsection => {
            output.push_str(&format!("{before_value}{value}\n\n"));
        }
    }
}