use markdown::mdast::Node;
pub fn extract_text_from_nodes(nodes: &[Node]) -> String {
nodes
.iter()
.map(|node| match node {
Node::Text(text) => text.value.clone(),
Node::Emphasis(emphasis) => extract_text_from_nodes(&emphasis.children),
Node::Strong(strong) => extract_text_from_nodes(&strong.children),
Node::InlineCode(code) => code.value.clone(),
Node::Delete(delete) => extract_text_from_nodes(&delete.children),
Node::Link(link) => extract_text_from_nodes(&link.children),
Node::Image(image) => {
image.alt.clone()
}
Node::InlineMath(math) => math.value.clone(),
Node::MdxTextExpression(_) | Node::MdxJsxTextElement(_) => {
String::new()
}
_ => String::new(),
})
.collect::<Vec<String>>()
.join("")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::markdown_config;
use markdown::mdast::{Emphasis, Image, InlineCode, Link, Strong, Text};
use markdown::to_mdast;
#[test]
fn test_extract_text_simple() {
let nodes = vec![
Node::Text(Text {
value: "Hello ".to_string(),
position: None,
}),
Node::Text(Text {
value: "world".to_string(),
position: None,
}),
];
let result = extract_text_from_nodes(&nodes);
assert_eq!(result, "Hello world");
}
#[test]
fn test_extract_text_with_emphasis() {
let nodes = vec![
Node::Text(Text {
value: "Hello ".to_string(),
position: None,
}),
Node::Emphasis(Emphasis {
children: vec![Node::Text(Text {
value: "emphasized".to_string(),
position: None,
})],
position: None,
}),
Node::Text(Text {
value: " world".to_string(),
position: None,
}),
];
let result = extract_text_from_nodes(&nodes);
assert_eq!(result, "Hello emphasized world");
}
#[test]
fn test_extract_text_with_strong() {
let nodes = vec![
Node::Text(Text {
value: "Hello ".to_string(),
position: None,
}),
Node::Strong(Strong {
children: vec![Node::Text(Text {
value: "bold".to_string(),
position: None,
})],
position: None,
}),
Node::Text(Text {
value: " world".to_string(),
position: None,
}),
];
let result = extract_text_from_nodes(&nodes);
assert_eq!(result, "Hello bold world");
}
#[test]
fn test_extract_text_with_inline_code() {
let nodes = vec![
Node::Text(Text {
value: "Use ".to_string(),
position: None,
}),
Node::InlineCode(InlineCode {
value: "printf()".to_string(),
position: None,
}),
Node::Text(Text {
value: " function".to_string(),
position: None,
}),
];
let result = extract_text_from_nodes(&nodes);
assert_eq!(result, "Use printf() function");
}
#[test]
fn test_extract_text_with_link() {
let nodes = vec![
Node::Text(Text {
value: "Visit ".to_string(),
position: None,
}),
Node::Link(Link {
children: vec![Node::Text(Text {
value: "this link".to_string(),
position: None,
})],
url: "https://example.com".to_string(),
title: None,
position: None,
}),
Node::Text(Text {
value: " for more".to_string(),
position: None,
}),
];
let result = extract_text_from_nodes(&nodes);
assert_eq!(result, "Visit this link for more");
}
#[test]
fn test_extract_text_with_image() {
let nodes = vec![
Node::Text(Text {
value: "See ".to_string(),
position: None,
}),
Node::Image(Image {
alt: "Alt text".to_string(),
url: "/image.jpg".to_string(),
title: None,
position: None,
}),
Node::Text(Text {
value: " here".to_string(),
position: None,
}),
];
let result = extract_text_from_nodes(&nodes);
assert_eq!(result, "See Alt text here");
}
#[test]
fn test_extract_text_complex_formatting() {
let content = "This has **bold**, *italic*, `code`, and [links](https://example.com)";
let options = markdown_config::default();
let root = to_mdast(content, &options).unwrap();
if let Node::Root(root_node) = root {
for child in &root_node.children {
if let Node::Paragraph(para) = child {
let text = extract_text_from_nodes(¶.children);
assert_eq!(text, "This has bold, italic, code, and links");
return;
}
}
}
panic!("Should have found paragraph in AST");
}
#[test]
fn test_extract_text_nested_formatting() {
let nodes = vec![
Node::Text(Text {
value: "Start ".to_string(),
position: None,
}),
Node::Strong(Strong {
children: vec![
Node::Text(Text {
value: "bold with ".to_string(),
position: None,
}),
Node::Emphasis(Emphasis {
children: vec![Node::Text(Text {
value: "italic".to_string(),
position: None,
})],
position: None,
}),
Node::Text(Text {
value: " inside".to_string(),
position: None,
}),
],
position: None,
}),
Node::Text(Text {
value: " end".to_string(),
position: None,
}),
];
let result = extract_text_from_nodes(&nodes);
assert_eq!(result, "Start bold with italic inside end");
}
#[test]
fn test_extract_text_empty_nodes() {
let nodes = vec![];
let result = extract_text_from_nodes(&nodes);
assert_eq!(result, "");
}
#[test]
fn test_extract_text_ignored_nodes() {
let nodes = vec![
Node::Text(Text {
value: "Before ".to_string(),
position: None,
}),
Node::MdxTextExpression(markdown::mdast::MdxTextExpression {
value: "ignored".to_string(),
position: None,
stops: vec![],
}),
Node::Text(Text {
value: " after".to_string(),
position: None,
}),
];
let result = extract_text_from_nodes(&nodes);
assert_eq!(result, "Before after");
}
}