use crate::error::MdxError;
use crate::models::{
ComponentDefinition, OutputFormat, RenderSettings, RenderedMdx, TsxTransformConfig,
};
use crate::renderer::JsRenderer;
use crate::transform::{transform_tsx_to_js_for_output, transform_tsx_to_js_with_config};
use gray_matter::{engine::YAML, Matter};
use markdown::{to_html_with_options, CompileOptions, Constructs, Options, ParseOptions};
use once_cell::sync::Lazy;
use regex::Regex;
use serde_json::json;
use std::collections::{HashMap, HashSet};
const MAX_JSX_PLACEHOLDERS: usize = 1000;
const MAX_JSX_NESTING_DEPTH: usize = 100;
static SELF_CLOSING_JSX_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"<([A-Z][a-zA-Z0-9]*)\s+[^>]*\{[^}]*\}[^>]*/\s*>")
.expect("Invalid self-closing JSX regex pattern")
});
static OPENING_JSX_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"<([A-Z][a-zA-Z0-9]*)\s+[^>]*\{[^}]*\}[^>]*>")
.expect("Invalid opening JSX regex pattern")
});
static COMPONENT_NAME_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"<([A-Z][a-zA-Z0-9]*)").expect("Invalid component name regex pattern")
});
struct RenderContext<'a> {
renderer: &'a JsRenderer,
components: Option<&'a HashMap<String, ComponentDefinition>>,
props_json: &'a str,
settings: &'a RenderSettings,
}
fn markdown_options() -> Options {
Options {
parse: ParseOptions {
constructs: Constructs {
html_flow: true, html_text: true, ..Constructs::default()
},
..ParseOptions::default()
},
compile: CompileOptions {
allow_dangerous_html: true, ..CompileOptions::default()
},
}
}
fn protect_jsx_components(content: &str) -> (String, HashMap<String, String>) {
if content.is_empty() {
return (String::new(), HashMap::new());
}
let estimated_placeholders = content.matches('<').count().min(MAX_JSX_PLACEHOLDERS) / 4;
let mut placeholders: HashMap<String, String> =
HashMap::with_capacity(estimated_placeholders.max(8));
let mut result = content.to_string();
let mut counter: usize = 0;
let matches: Vec<_> = SELF_CLOSING_JSX_PATTERN.find_iter(content).collect();
for mat in matches.into_iter().rev() {
if counter >= MAX_JSX_PLACEHOLDERS {
eprintln!(
"Warning: JSX placeholder limit ({}) reached, some JSX may not be protected",
MAX_JSX_PLACEHOLDERS
);
break;
}
let jsx = mat.as_str();
let placeholder = format!("<!--JSX:{}-->", counter);
let start = mat.start();
let end = mat.end();
if start < result.len() && end <= result.len() {
if result.get(start..end).map(|s| s == jsx).unwrap_or(false) {
result.replace_range(start..end, &placeholder);
placeholders.insert(placeholder, jsx.to_string());
counter += 1;
}
}
}
protect_jsx_with_children(&mut result, &mut placeholders, &mut counter);
(result, placeholders)
}
fn protect_jsx_with_children(
content: &mut String,
placeholders: &mut HashMap<String, String>,
counter: &mut usize,
) {
let mut depth = 0;
let mut iterations = 0;
let max_iterations = MAX_JSX_PLACEHOLDERS;
loop {
iterations += 1;
if iterations > max_iterations || *counter >= MAX_JSX_PLACEHOLDERS {
break;
}
let content_snapshot = content.clone();
let capture = match OPENING_JSX_PATTERN.captures(&content_snapshot) {
Some(cap) => cap,
None => break,
};
let tag_name = match capture.get(1) {
Some(m) => m.as_str(),
None => break,
};
let opening_tag = match capture.get(0) {
Some(m) => m.as_str(),
None => break,
};
let open_pos = match content.find(opening_tag) {
Some(pos) => pos,
None => break,
};
let closing_tag = format!("</{}>", tag_name);
let search_start = open_pos + opening_tag.len();
if let Some(close_pos) =
find_matching_close_tag(content, search_start, tag_name, &closing_tag, &mut depth)
{
if depth > MAX_JSX_NESTING_DEPTH {
eprintln!(
"Warning: JSX nesting depth ({}) exceeded limit ({})",
depth, MAX_JSX_NESTING_DEPTH
);
break;
}
let full_end = close_pos + closing_tag.len();
if full_end > content.len() {
break;
}
let full_jsx = content[open_pos..full_end].to_string();
let placeholder = format!("<!--JSX:{}-->", counter);
content.replace_range(open_pos..full_end, &placeholder);
placeholders.insert(placeholder, full_jsx);
*counter += 1;
} else {
break;
}
}
}
fn find_matching_close_tag(
content: &str,
start: usize,
tag_name: &str,
closing_tag: &str,
depth: &mut usize,
) -> Option<usize> {
let search_region = &content[start..];
let nested_open_pattern = format!("<{}", tag_name);
let mut nesting = 1;
let mut pos = 0;
while nesting > 0 && pos < search_region.len() {
let next_open = search_region[pos..].find(&nested_open_pattern);
let next_close = search_region[pos..].find(closing_tag);
match (next_open, next_close) {
(Some(open_offset), Some(close_offset)) => {
if open_offset < close_offset {
nesting += 1;
*depth = (*depth).max(nesting);
pos += open_offset + nested_open_pattern.len();
} else {
nesting -= 1;
if nesting == 0 {
return Some(start + pos + close_offset);
}
pos += close_offset + closing_tag.len();
}
}
(None, Some(close_offset)) => {
nesting -= 1;
if nesting == 0 {
return Some(start + pos + close_offset);
}
pos += close_offset + closing_tag.len();
}
(Some(open_offset), None) => {
nesting += 1;
*depth = (*depth).max(nesting);
pos += open_offset + nested_open_pattern.len();
}
(None, None) => {
break;
}
}
if nesting > MAX_JSX_NESTING_DEPTH {
return None;
}
}
None
}
fn restore_jsx_components(content: &str, placeholders: &HashMap<String, String>) -> String {
if placeholders.is_empty() {
return content.to_string();
}
let mut result = content.to_string();
let mut restored_count = 0;
let mut placeholder_vec: Vec<_> = placeholders.iter().collect();
placeholder_vec.sort_by_key(|(k, _)| {
k.strip_prefix("<!--JSX:")
.and_then(|s| s.strip_suffix("-->"))
.and_then(|s| s.parse::<usize>().ok())
.unwrap_or(0)
});
for (placeholder, jsx) in placeholder_vec {
if result.contains(placeholder.as_str()) {
result = result.replace(placeholder.as_str(), jsx);
restored_count += 1;
}
}
if restored_count != placeholders.len() {
eprintln!(
"Warning: JSX restoration incomplete - {} of {} placeholders restored",
restored_count,
placeholders.len()
);
}
if result.contains("<!--JSX:") {
eprintln!("Warning: Unreplaced JSX placeholders found in output");
}
result
}
fn unwrap_fragment(html: &str) -> String {
let trimmed = html.trim();
let fragment_start_patterns = ["<Fragment", "<fragment"];
let mut fragment_start: Option<usize> = None;
for pattern in &fragment_start_patterns {
if let Some(pos) = trimmed.find(pattern) {
fragment_start = Some(pos);
break;
}
}
if let Some(start) = fragment_start {
if let Some(tag_end) = trimmed[start..].find('>') {
let tag_end = start + tag_end + 1;
let content_start = tag_end;
let remaining = &trimmed[content_start..];
let fragment_end_patterns = ["</Fragment>", "</fragment>"];
let mut fragment_end: Option<usize> = None;
for pattern in &fragment_end_patterns {
if let Some(pos) = remaining.rfind(pattern) {
fragment_end = Some(pos);
break;
}
}
if let Some(end_pos) = fragment_end {
return remaining[..end_pos].trim().to_string();
}
}
}
html.to_string()
}
fn render_markdown(content: &str) -> Result<String, MdxError> {
let (protected_content, placeholders) = protect_jsx_components(content);
let options = markdown_options();
let html = to_html_with_options(&protected_content, &options)
.map_err(|e| MdxError::MarkdownRender(e.to_string()))?;
Ok(restore_jsx_components(&html, &placeholders))
}
fn log_render_error(e: &anyhow::Error, js_output: &str, context: &str) {
eprintln!("{context} render error details: {:#}", e);
eprintln!("JavaScript output: {js_output}");
}
pub fn mdx_to_html_with_frontmatter(
mdx_content: &str,
renderer: &JsRenderer,
components: Option<&HashMap<String, ComponentDefinition>>,
settings: &RenderSettings,
) -> Result<RenderedMdx, MdxError> {
let matter = Matter::<YAML>::new();
let parsed = matter
.parse::<serde_json::Value>(mdx_content)
.map_err(|e| MdxError::FrontmatterParse(e.to_string()))?;
let frontmatter = parsed
.data
.unwrap_or_else(|| serde_json::Value::Object(serde_json::Map::with_capacity(0)));
let html_output = render_markdown(&parsed.content)?;
let props_json = serde_json::to_string(&frontmatter)
.map_err(|e| MdxError::FrontmatterParse(format!("Failed to serialize frontmatter: {e}")))?;
let context = RenderContext {
renderer,
components,
props_json: &props_json,
settings,
};
let output = render_with_engine_pipeline(&context, &html_output)?;
Ok(RenderedMdx {
metadata: frontmatter,
output: Some(output),
})
}
pub fn create_error_response(error: &anyhow::Error) -> RenderedMdx {
let error_chain = format!("{:#}", error);
let error_message = error.to_string();
eprintln!("MDX rendering error: {error_chain}");
let error_html = format!("<p>Error rendering MDX: {error_message}</p>");
RenderedMdx {
metadata: json!({
"error": error_message,
"error_chain": error_chain
}),
output: Some(error_html),
}
}
#[derive(serde::Serialize, Default)]
struct SchemaResult {
components: Vec<String>,
directives: DirectivesResult,
}
#[derive(serde::Serialize, Default)]
struct DirectivesResult {
keys: Vec<String>,
patterns: Vec<String>,
values: Vec<serde_json::Value>,
}
fn extract_schema_from_json(
json_tree: &str,
directive_prefixes: Option<&Vec<String>>,
) -> Result<String, MdxError> {
let tree: serde_json::Value = serde_json::from_str(json_tree)
.map_err(|e| MdxError::FrontmatterParse(format!("Failed to parse JSON tree: {e}")))?;
let mut components: HashSet<String> = HashSet::new();
let mut directive_keys: HashSet<String> = HashSet::new();
let mut directive_patterns: HashSet<String> = HashSet::new();
let mut directive_values: HashSet<String> = HashSet::new();
let prefixes: Vec<&str> = directive_prefixes
.map(|d| d.iter().map(|s| s.as_str()).collect())
.unwrap_or_default();
traverse_json_tree(
&tree,
&prefixes,
&mut components,
&mut directive_keys,
&mut directive_patterns,
&mut directive_values,
);
let mut sorted_components: Vec<String> = components.into_iter().collect();
sorted_components.sort();
let mut sorted_keys: Vec<String> = directive_keys.into_iter().collect();
sorted_keys.sort();
let mut sorted_patterns: Vec<String> = directive_patterns.into_iter().collect();
sorted_patterns.sort();
let mut sorted_values: Vec<serde_json::Value> = directive_values
.into_iter()
.filter_map(|s| serde_json::from_str(&s).ok())
.collect();
sorted_values.sort_by_key(|a| a.to_string());
let result = SchemaResult {
components: sorted_components,
directives: DirectivesResult {
keys: sorted_keys,
patterns: sorted_patterns,
values: sorted_values,
},
};
serde_json::to_string(&result)
.map_err(|e| MdxError::FrontmatterParse(format!("Failed to serialize schema: {e}")))
}
fn traverse_json_tree(
node: &serde_json::Value,
prefixes: &[&str],
components: &mut HashSet<String>,
directive_keys: &mut HashSet<String>,
directive_patterns: &mut HashSet<String>,
directive_values: &mut HashSet<String>,
) {
match node {
serde_json::Value::Object(obj) => {
if let Some(serde_json::Value::String(tag)) = obj.get("type") {
if !tag.is_empty()
&& tag
.chars()
.next()
.map(|c| c.is_uppercase())
.unwrap_or(false)
&& tag != "Fragment"
{
components.insert(tag.clone());
}
}
if let Some(serde_json::Value::Object(attrs)) = obj.get("attributes") {
for (key, value) in attrs {
for prefix in prefixes {
if key.starts_with(prefix) {
directive_keys.insert(key.clone());
let pattern = if key.contains(':') {
let parts: Vec<&str> = key.splitn(2, ':').collect();
format!("{}:*", parts[0])
} else {
format!("{}*", prefix)
};
directive_patterns.insert(pattern);
if let Ok(value_str) = serde_json::to_string(value) {
directive_values.insert(value_str);
}
break;
}
}
}
}
if let Some(children) = obj.get("children") {
traverse_json_tree(
children,
prefixes,
components,
directive_keys,
directive_patterns,
directive_values,
);
}
for value in obj.values() {
traverse_json_tree(
value,
prefixes,
components,
directive_keys,
directive_patterns,
directive_values,
);
}
}
serde_json::Value::Array(arr) => {
for item in arr {
traverse_json_tree(
item,
prefixes,
components,
directive_keys,
directive_patterns,
directive_values,
);
}
}
_ => {}
}
}
fn render_with_engine_pipeline(
context: &RenderContext<'_>,
html_output: &str,
) -> Result<String, MdxError> {
let mut transform_config = TsxTransformConfig::for_engine(false);
match context.settings.output {
OutputFormat::Schema => {
let mut component_names: HashSet<String> = HashSet::new();
for cap in COMPONENT_NAME_PATTERN.captures_iter(html_output) {
if let Some(name) = cap.get(1) {
component_names.insert(name.as_str().to_string());
}
}
if let Some(components) = context.components {
for (key, comp_def) in components.iter() {
let name = comp_def
.name
.as_ref()
.cloned()
.unwrap_or_else(|| key.clone());
component_names.insert(name);
}
}
if !component_names.is_empty() {
transform_config.component_names = Some(component_names);
}
let javascript_output = transform_tsx_to_js_with_config(html_output, transform_config)
.map_err(|e| {
MdxError::TsxTransform(format!("Failed to transform TSX to JavaScript: {e}"))
})?;
let json_tree = render_template_to_schema(context, &javascript_output)?;
extract_schema_from_json(&json_tree, context.settings.directives.as_ref())
}
OutputFormat::Html | OutputFormat::Javascript | OutputFormat::Json => {
if matches!(context.settings.output, OutputFormat::Json) {
let mut component_names: HashSet<String> = HashSet::new();
for cap in COMPONENT_NAME_PATTERN.captures_iter(html_output) {
if let Some(name) = cap.get(1) {
component_names.insert(name.as_str().to_string());
}
}
if let Some(components) = context.components {
for (key, comp_def) in components.iter() {
let name = comp_def
.name
.as_ref()
.cloned()
.unwrap_or_else(|| key.clone());
component_names.insert(name);
}
}
if !component_names.is_empty() {
transform_config.component_names = Some(component_names);
}
}
let javascript_output = transform_tsx_to_js_with_config(html_output, transform_config)
.map_err(|e| {
MdxError::TsxTransform(format!("Failed to transform TSX to JavaScript: {e}"))
})?;
let template_output = render_template(context, &javascript_output)?;
match context.settings.output {
OutputFormat::Html => {
Ok(unwrap_fragment(&template_output))
}
OutputFormat::Javascript => {
transform_tsx_to_js_for_output(&template_output, context.settings.minify)
.map_err(|e| {
MdxError::TsxTransform(format!(
"Failed to transform template to JavaScript: {e}"
))
})
}
OutputFormat::Json => {
render_template_to_schema(context, &javascript_output)
}
OutputFormat::Schema => unreachable!("Schema handled in outer match"),
}
}
}
}
fn render_template(
context: &RenderContext<'_>,
javascript_output: &str,
) -> Result<String, MdxError> {
context
.renderer
.render_transformed_component(
javascript_output,
Some(context.props_json),
context.components,
context.settings.utils.as_deref(),
)
.map_err(|e| {
log_render_error(&e, javascript_output, "Component");
MdxError::TsxTransform(format!("Failed to render component template: {:#}", e))
})
}
fn render_template_to_schema(
context: &RenderContext<'_>,
javascript_output: &str,
) -> Result<String, MdxError> {
context
.renderer
.render_transformed_component_to_schema(
javascript_output,
Some(context.props_json),
context.components,
context.settings.utils.as_deref(),
)
.map_err(|e| {
log_render_error(&e, javascript_output, "Schema");
MdxError::TsxTransform(format!("Failed to render component to schema: {:#}", e))
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_protect_jsx_no_jsx() {
let content = "# Hello World\n\nThis is plain markdown.";
let (result, placeholders) = protect_jsx_components(content);
assert_eq!(result, content);
assert!(placeholders.is_empty());
}
#[test]
fn test_protect_jsx_without_expression_attributes() {
let content = r#"<Card title="Test">Content</Card>"#;
let (result, placeholders) = protect_jsx_components(content);
assert_eq!(result, content);
assert!(placeholders.is_empty());
}
#[test]
fn test_protect_self_closing_jsx_with_expression() {
let content = r#"<Hero title={context("title")} />"#;
let (result, placeholders) = protect_jsx_components(content);
assert!(result.contains("<!--JSX:"));
assert_eq!(placeholders.len(), 1);
assert!(placeholders.values().any(|v| v.contains("Hero")));
}
#[test]
fn test_protect_multiple_jsx_components() {
let content = r#"
<Hero title={context("title")} />
<Card data={props.data} />
"#;
let (result, placeholders) = protect_jsx_components(content);
assert_eq!(placeholders.len(), 2);
assert!(!result.contains("<Hero"));
assert!(!result.contains("<Card"));
}
#[test]
fn test_protect_jsx_with_children_and_expression() {
let content = r#"<Container theme={props.theme}>Child content</Container>"#;
let (result, placeholders) = protect_jsx_components(content);
assert!(result.contains("<!--JSX:"));
assert_eq!(placeholders.len(), 1);
}
#[test]
fn test_restore_multiple_jsx_components() {
let content = r#"
# Title
<Hero title={context("title")} />
Some text
<Card data={props.data} />
"#;
let (protected, placeholders) = protect_jsx_components(content);
let restored = restore_jsx_components(&protected, &placeholders);
assert!(restored.contains("<Hero"));
assert!(restored.contains("<Card"));
assert!(restored.contains("context(\"title\")"));
}
#[test]
fn test_protect_jsx_preserves_surrounding_content() {
let content = "# Title\n\n<Hero title={data} />\n\nMore content";
let (result, placeholders) = protect_jsx_components(content);
assert!(result.contains("# Title"));
assert!(result.contains("More content"));
assert_eq!(placeholders.len(), 1);
let restored = restore_jsx_components(&result, &placeholders);
assert_eq!(restored, content);
}
#[test]
fn test_find_matching_close_tag_nested() {
let content = "<div><div>inner</div>outer</div>";
let mut depth = 0;
let result = find_matching_close_tag(content, 5, "div", "</div>", &mut depth);
assert_eq!(result, Some(26));
assert!(depth >= 2); }
#[test]
fn test_protect_jsx_lowercase_tags_ignored() {
let content = r#"<div class={style}>Content</div>"#;
let (_result, placeholders) = protect_jsx_components(content);
assert!(placeholders.is_empty() || !placeholders.values().any(|v| v.starts_with("<div")));
}
}