Skip to main content

nu_command/formats/from/
md.rs

1use mq_markdown::{AttrValue, Markdown, Node};
2use nu_engine::command_prelude::*;
3
4#[derive(Clone)]
5pub struct FromMd;
6
7impl Command for FromMd {
8    fn name(&self) -> &str {
9        "from md"
10    }
11
12    fn description(&self) -> &str {
13        "Convert markdown text into human-friendly structured rows. Use --verbose for the full AST."
14    }
15
16    fn signature(&self) -> Signature {
17        Signature::build("from md")
18            .input_output_types(vec![(Type::String, Type::table())])
19            .switch(
20                "verbose",
21                "Return the full AST with type, position, attrs, and children fields.",
22                Some('v'),
23            )
24            .category(Category::Formats)
25    }
26
27    fn examples(&self) -> Vec<Example<'_>> {
28        vec![
29            Example {
30                example: "'# Title' | from md | get 0.element",
31                description: "Reduced mode promotes child rows; heading text is represented as a text element.",
32                result: Some(Value::test_string("text")),
33            },
34            Example {
35                example: "'# Title' | from md | get 0.content",
36                description: "Get the text content of the first element.",
37                result: Some(Value::test_string("Title")),
38            },
39            Example {
40                example: "'---
41title: Demo
42---
43# A' | from md | get 0.element",
44                description: "Parse markdown frontmatter as a dedicated yaml element.",
45                result: Some(Value::test_string("yaml")),
46            },
47            Example {
48                example: "'# Title' | from md --verbose | get 0.type",
49                description: "Use --verbose to get the full AST; the first node type is h1.",
50                result: Some(Value::test_string("h1")),
51            },
52        ]
53    }
54
55    fn run(
56        &self,
57        engine_state: &EngineState,
58        stack: &mut Stack,
59        call: &Call,
60        input: PipelineData,
61    ) -> Result<PipelineData, ShellError> {
62        let verbose = call.has_flag(engine_state, stack, "verbose")?;
63        from_md(input, call.head, verbose)
64    }
65}
66
67fn from_md(input: PipelineData, head: Span, verbose: bool) -> Result<PipelineData, ShellError> {
68    let (string_input, span, metadata) = input.collect_string_strict(head)?;
69
70    let markdown =
71        Markdown::from_markdown_str(&string_input).map_err(|err| ShellError::CantConvert {
72            to_type: "structured markdown data".into(),
73            from_type: "string".into(),
74            span,
75            help: Some(err.to_string()),
76        })?;
77
78    let value = if verbose {
79        markdown_to_ast_value(&markdown, span)
80    } else {
81        markdown_to_reduced_value(&markdown, span)
82    };
83
84    Ok(value.into_pipeline_data_with_metadata(metadata.map(|md| md.with_content_type(None))))
85}
86
87fn markdown_to_ast_value(markdown: &Markdown, span: Span) -> Value {
88    let nodes = markdown
89        .nodes
90        .iter()
91        .map(|node| node_to_ast_value(node, span))
92        .collect();
93
94    Value::list(nodes, span)
95}
96
97fn node_to_ast_value(node: &Node, span: Span) -> Value {
98    let children = node.children();
99
100    let mut record = Record::new();
101    record.push("type", Value::string(node_type_name(node), span));
102
103    if let Some(position) = node.position() {
104        record.push("position", position_to_value(position, span));
105    } else {
106        record.push("position", Value::nothing(span));
107    }
108
109    let attrs = node_attrs_to_value(node, children.is_empty(), span);
110    record.push("attrs", attrs);
111
112    let children = children
113        .iter()
114        .map(|child| node_to_ast_value(child, span))
115        .collect();
116    record.push("children", Value::list(children, span));
117
118    Value::record(record, span)
119}
120
121fn node_type_name(node: &Node) -> String {
122    if node.is_empty() {
123        return "empty".to_string();
124    }
125
126    if node.is_fragment() {
127        return "fragment".to_string();
128    }
129
130    node.name().to_string()
131}
132
133fn position_to_value(position: mq_markdown::Position, span: Span) -> Value {
134    Value::record(
135        record! {
136            "start" => Value::record(
137                record! {
138                    "line" => Value::int(position.start.line as i64, span),
139                    "column" => Value::int(position.start.column as i64, span),
140                },
141                span,
142            ),
143            "end" => Value::record(
144                record! {
145                    "line" => Value::int(position.end.line as i64, span),
146                    "column" => Value::int(position.end.column as i64, span),
147                },
148                span,
149            ),
150        },
151        span,
152    )
153}
154
155fn node_attrs_to_value(node: &Node, is_leaf: bool, span: Span) -> Value {
156    const ATTRIBUTE_KEYS: &[&str] = &[
157        "depth", "level", "index", "ordered", "checked", "lang", "meta", "fence", "url", "title",
158        "alt", "ident", "label", "row", "column", "align", "name",
159    ];
160
161    let mut attrs = Record::new();
162
163    // Emit `value` only for leaves to avoid parent/child text duplication.
164    if is_leaf {
165        if let Some(value) = node.attr("value") {
166            attrs.push("value", attr_value_to_nu_value(value, span));
167        } else if node.is_text() {
168            // Some text-like nodes can carry content without exposing a `value` attribute.
169            attrs.push("value", Value::string(node.value(), span));
170        }
171    }
172
173    for key in ATTRIBUTE_KEYS {
174        if let Some(value) = node.attr(key) {
175            attrs.push(*key, attr_value_to_nu_value(value, span));
176        }
177    }
178
179    Value::record(attrs, span)
180}
181
182fn attr_value_to_nu_value(value: AttrValue, span: Span) -> Value {
183    match value {
184        AttrValue::String(value) => Value::string(value, span),
185        AttrValue::Integer(value) => Value::int(value, span),
186        AttrValue::Number(value) => Value::float(value, span),
187        AttrValue::Boolean(value) => Value::bool(value, span),
188        AttrValue::Null => Value::nothing(span),
189        AttrValue::Array(value) => Value::list(
190            value
191                .iter()
192                .map(|node| node_to_ast_value(node, span))
193                .collect(),
194            span,
195        ),
196    }
197}
198
199/// Builds reduced rows by promoting each top-level node's immediate children to row level.
200///
201/// If a top-level node has no children, the node itself is emitted as a row.
202/// Parent attributes are inherited by promoted child rows when child attributes are absent.
203fn markdown_to_reduced_value(markdown: &Markdown, span: Span) -> Value {
204    let mut nodes = Vec::new();
205    for node in &markdown.nodes {
206        let parent_attrs = node_reduced_attrs(node, span);
207        let children = node.children();
208
209        if children.is_empty() {
210            nodes.push(node_to_reduced_value(
211                node,
212                span,
213                parent_attrs.clone(),
214                None,
215            ));
216        } else {
217            for child in children {
218                let child_attrs = node_reduced_attrs(&child, span);
219                nodes.push(node_to_reduced_value(
220                    &child,
221                    span,
222                    child_attrs,
223                    parent_attrs.clone(),
224                ));
225            }
226        }
227    }
228
229    Value::list(nodes, span)
230}
231
232fn node_to_reduced_value(
233    node: &Node,
234    span: Span,
235    own_attrs: Option<Value>,
236    inherited_attrs: Option<Value>,
237) -> Value {
238    let mut record = Record::new();
239    record.push("element", Value::string(node_type_name(node), span));
240    record.push("content", Value::string(extract_text(node), span));
241
242    if let Some(position) = node.position() {
243        record.push("content_span", position_to_value(position, span));
244    }
245
246    // Merge parent and child attributes: parent forms the base, child keys take precedence
247    // on collision so the most specific information wins.
248    if let Some(attrs) = merge_attrs(inherited_attrs, own_attrs, span) {
249        record.push("attributes", attrs);
250    }
251
252    Value::record(record, span)
253}
254
255/// Merges two optional attribute records into one, with `child` keys overriding `parent` keys.
256/// Returns `None` when both inputs are absent or produce an empty result.
257fn merge_attrs(parent: Option<Value>, child: Option<Value>, span: Span) -> Option<Value> {
258    match (parent, child) {
259        (None, child) => child,
260        (parent, None) => parent,
261        (
262            Some(Value::Record {
263                val: parent_rec, ..
264            }),
265            Some(Value::Record { val: child_rec, .. }),
266        ) => {
267            let mut merged = (*parent_rec).clone();
268            for (key, val) in child_rec.iter() {
269                if let Some(existing) = merged.get_mut(key) {
270                    *existing = val.clone();
271                } else {
272                    merged.push(key.clone(), val.clone());
273                }
274            }
275            if merged.is_empty() {
276                None
277            } else {
278                Some(Value::record(merged, span))
279            }
280        }
281        // Fallback: child wins if types are unexpected
282        (_, child) => child,
283    }
284}
285
286/// Recursively extracts the plain-text content of a node by joining all leaf values.
287fn extract_text(node: &Node) -> String {
288    let children = node.children();
289    if children.is_empty() {
290        if let Some(AttrValue::String(s)) = node.attr("value") {
291            s
292        } else if node.is_text() {
293            node.value().to_string()
294        } else {
295            String::new()
296        }
297    } else {
298        children
299            .iter()
300            .map(extract_text)
301            .filter(|s| !s.is_empty())
302            .collect::<Vec<_>>()
303            .join(" ")
304    }
305}
306
307/// Returns a reduced attributes record containing only meaningful metadata fields,
308/// or `None` when no such fields are present on the node.
309fn node_reduced_attrs(node: &Node, span: Span) -> Option<Value> {
310    const REDUCED_ATTRIBUTE_KEYS: &[&str] = &[
311        "depth", "level", "ordered", "checked", "lang", "url", "title", "alt", "align",
312    ];
313
314    let mut attrs = Record::new();
315    for key in REDUCED_ATTRIBUTE_KEYS {
316        if let Some(value) = node.attr(key) {
317            attrs.push(*key, attr_value_to_nu_value(value, span));
318        }
319    }
320
321    if attrs.is_empty() {
322        None
323    } else {
324        Some(Value::record(attrs, span))
325    }
326}
327
328#[cfg(test)]
329mod test {
330    use super::FromMd;
331
332    #[test]
333    fn test_examples() -> nu_test_support::Result {
334        nu_test_support::test().examples(FromMd)
335    }
336}