use std::{path::Path, sync::Arc};
use dmc_codegen::{HtmlEmitter, MdxBodyEmitter, Walker};
use dmc_diagnostic::{
Code,
metadata::{Origin, SourceMeta},
};
use dmc_lexer::Lexer;
use dmc_parser::{Parser, ast::Document};
use dmc_transform::{CopyLinkedFilesOptions, MathEngine, MermaidOptions, PipelineConfig, PrettyCodeOptions};
use duck_diagnostic::DiagnosticEngine;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::engine::accumulator::Accumulator;
#[derive(Debug, Deserialize, Serialize, Clone)]
#[serde(default)]
pub struct CompileConfig {
pub markdown_gfm: bool,
pub emit_html: bool,
pub emit_body: bool,
pub mdx_minify: bool,
pub mdx_output_format: Option<String>,
pub markdown_remark_plugins: Vec<Value>,
pub markdown_rehype_plugins: Vec<Value>,
pub mdx_remark_plugins: Vec<Value>,
pub mdx_rehype_plugins: Vec<Value>,
pub copy_linked_files: bool,
pub output_assets: Option<String>,
pub output_base: Option<String>,
pub pretty_code: Option<PrettyCodeOptions>,
pub mermaid: Option<MermaidOptions>,
pub math_engine: Option<MathEngine>,
pub force_sidecar: bool,
pub allow_dangerous_html: bool,
pub prefer_sidecar: Vec<String>,
}
impl Default for CompileConfig {
fn default() -> Self {
Self {
markdown_gfm: true,
emit_html: true,
emit_body: true,
mdx_output_format: None,
mdx_minify: false,
markdown_remark_plugins: vec![],
markdown_rehype_plugins: vec![],
mdx_remark_plugins: vec![],
mdx_rehype_plugins: vec![],
copy_linked_files: false,
output_assets: None,
output_base: None,
pretty_code: None,
mermaid: None,
math_engine: None,
force_sidecar: false,
prefer_sidecar: vec![],
allow_dangerous_html: false,
}
}
}
impl CompileConfig {
pub fn new() -> Self {
Self::default()
}
pub fn has_js_plugins(&self) -> bool {
!self.effective_markdown_remark_plugins().is_empty()
|| !self.effective_mdx_remark_plugins().is_empty()
|| !self.effective_markdown_rehype_plugins().is_empty()
|| !self.effective_mdx_rehype_plugins().is_empty()
}
pub fn effective_markdown_remark_plugins(&self) -> Vec<Value> {
self.filter_native_owned_remark(&self.markdown_remark_plugins)
}
pub fn effective_mdx_remark_plugins(&self) -> Vec<Value> {
self.filter_native_owned_remark(&self.mdx_remark_plugins)
}
pub fn effective_markdown_rehype_plugins(&self) -> Vec<Value> {
self.filter_native_owned_rehype(&self.markdown_rehype_plugins)
}
pub fn effective_mdx_rehype_plugins(&self) -> Vec<Value> {
self.filter_native_owned_rehype(&self.mdx_rehype_plugins)
}
fn user_forces_sidecar(&self, name: &str) -> bool {
self.force_sidecar || self.prefer_sidecar.iter().any(|n| n == name)
}
fn filter_native_owned_remark(&self, plugins: &[Value]) -> Vec<Value> {
plugins
.iter()
.filter(|p| {
let Some(name) = plugin_name(p) else { return true };
if self.user_forces_sidecar(name) {
return true;
}
!is_native_owned_remark(p)
})
.cloned()
.collect()
}
fn filter_native_owned_rehype(&self, plugins: &[Value]) -> Vec<Value> {
plugins
.iter()
.filter(|p| {
let Some(name) = plugin_name(p) else { return true };
if self.user_forces_sidecar(name) {
return true;
}
!is_native_owned_rehype(p)
})
.cloned()
.collect()
}
pub fn for_render(&self) -> Self {
let mut c = self.clone();
c.emit_html = !self.has_js_plugins();
c
}
pub fn pipeline_config(&self, path: &Path) -> PipelineConfig {
let copy_linked_files = if self.copy_linked_files
&& let (Some(assets), Some(public)) = (self.output_assets.as_ref(), self.output_base.as_ref())
{
Some(CopyLinkedFilesOptions {
source_dir: path.parent().unwrap_or(Path::new(".")).to_path_buf(),
assets_dir: assets.into(),
public_base: public.clone(),
})
} else {
None
};
let prefers = |needles: &[&str]| -> bool {
self.force_sidecar || self.prefer_sidecar.iter().any(|n| needles.contains(&n.as_str()))
};
let drop_pretty_code = prefers(&["rehype-pretty-code", "shiki"]);
let drop_math = prefers(&["remark-math", "rehype-katex", "rehype-mathjax"]);
let drop_emoji = prefers(&["remark-emoji"]);
let drop_autolink_headings = prefers(&["rehype-slug", "rehype-autolink-headings"]);
let drop_gfm = prefers(&["remark-gfm"]);
let drop_mermaid = prefers(&["mermaid", "rehype-mermaid", "remark-mermaid"]);
PipelineConfig {
markdown_gfm: Some(if drop_gfm { false } else { self.markdown_gfm }),
pretty_code: if drop_pretty_code { None } else { self.pretty_code.clone() },
math_engine: if drop_math { None } else { self.math_engine },
copy_linked_files,
emoji: if drop_emoji { Some(false) } else { None },
autolink_headings: if drop_autolink_headings { Some(false) } else { None },
math: if drop_math { Some(false) } else { None },
pretty_code_enabled: if drop_pretty_code { Some(false) } else { None },
mermaid: if drop_mermaid { None } else { self.mermaid.clone() },
mermaid_enabled: if drop_mermaid { Some(false) } else { None },
}
}
}
fn plugin_name(plugin: &Value) -> Option<&str> {
match plugin {
Value::String(s) => Some(s.as_str()),
Value::Array(a) => a.first().and_then(Value::as_str),
_ => None,
}
}
#[allow(clippy::match_like_matches_macro)]
fn is_native_owned_remark(plugin: &Value) -> bool {
let Some(name) = plugin_name(plugin) else { return false };
match name {
"remark-gfm" => true,
"remark-math" => cfg!(feature = "math"),
"remark-emoji" => cfg!(feature = "emoji"),
_ => false,
}
}
#[allow(clippy::match_like_matches_macro)]
fn is_native_owned_rehype(plugin: &Value) -> bool {
let Some(name) = plugin_name(plugin) else { return false };
match name {
"rehype-pretty-code" | "shiki" => cfg!(feature = "pretty-code"),
"rehype-katex" | "rehype-mathjax" => cfg!(feature = "math"),
"rehype-slug" | "rehype-autolink-headings" => true,
_ => false,
}
}
pub struct Compiler;
impl Compiler {
pub fn compile(source: &str, diag_engine: &mut DiagnosticEngine<Code>) -> CompileOutput {
Self::compile_with_pipeline(source, Path::new("."), &CompileConfig::new(), diag_engine)
}
pub fn compile_with_pipeline(
source: &str,
path: &Path,
compile_cfg: &CompileConfig,
diag_engine: &mut DiagnosticEngine<Code>,
) -> CompileOutput {
let meta = Arc::from(SourceMeta { path: Arc::from(path.display().to_string()), origin: Origin::File(path.into()) });
#[cfg(feature = "math")]
let preprocessed = dmc_transform::Math::preprocess_source(source);
#[cfg(feature = "math")]
let source: &str = &preprocessed;
let mut lexer = Lexer::new(source, meta.clone(), diag_engine);
let _ = lexer.scan_tokens();
let mut doc = {
let mut parser = Parser::new(lexer.tokens, meta.clone(), diag_engine);
parser.parse()
};
let pipeline_cfg = compile_cfg.pipeline_config(path);
let pipeline = dmc_transform::Pipeline::with_defaults_for(&pipeline_cfg);
pipeline.run(&mut doc, &meta, diag_engine);
Self::finalize(source, doc, compile_cfg, diag_engine)
}
fn finalize(
source: &str,
doc: Document,
compile_cfg: &CompileConfig,
diag_engine: &mut DiagnosticEngine<Code>,
) -> CompileOutput {
let mut acc = Accumulator::new();
let render_opts =
dmc_codegen::RenderOptions { allow_dangerous_html: compile_cfg.allow_dangerous_html, ..Default::default() };
let mut html_sink = if compile_cfg.emit_html { Some(HtmlEmitter::new_with_options(render_opts)) } else { None };
let mut body_sink = if compile_cfg.emit_body { Some(MdxBodyEmitter::new_with_options(render_opts)) } else { None };
let mut sinks: Vec<&mut dyn dmc_codegen::NodeSink> = Vec::with_capacity(3);
sinks.push(&mut acc);
if let Some(ref mut h) = html_sink {
sinks.push(h);
}
if let Some(ref mut b) = body_sink {
sinks.push(b);
}
Walker::new(&doc).walk(sinks.as_mut_slice());
let (html, body) = match (html_sink, body_sink) {
(Some(h), Some(b)) => {
let (s, hd) = h.into_parts();
let (m, bd) = b.into_parts();
diag_engine.extend(hd);
diag_engine.extend(bd);
(s, m)
},
(Some(h), None) => {
let (s, hd) = h.into_parts();
diag_engine.extend(hd);
(s, String::new())
},
(None, Some(b)) => {
let (m, bd) = b.into_parts();
diag_engine.extend(bd);
(String::new(), m)
},
(None, None) => (String::new(), String::new()),
};
acc.into_compile_output(source, html, body, compile_cfg)
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
pub reading_time: u32,
pub word_count: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TocItem {
pub title: String,
pub url: String,
pub items: Vec<TocItem>,
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn empty_plugin_lists_no_sidecar() {
let cfg = CompileConfig::default();
assert!(!cfg.has_js_plugins());
}
#[test]
fn arbitrary_remark_plugin_triggers_sidecar() {
let mut cfg = CompileConfig::default();
cfg.markdown_remark_plugins.push(json!("remark-frontmatter"));
assert!(cfg.has_js_plugins());
}
#[test]
fn remark_gfm_alone_skips_sidecar() {
let mut cfg = CompileConfig::default();
cfg.markdown_remark_plugins.push(json!("remark-gfm"));
assert!(!cfg.has_js_plugins(), "dmc parser handles GFM natively");
}
#[test]
fn rehype_slug_and_autolink_alone_skip_sidecar() {
let mut cfg = CompileConfig::default();
cfg.markdown_rehype_plugins.push(json!("rehype-slug"));
cfg.markdown_rehype_plugins.push(json!(["rehype-autolink-headings", { "behavior": "wrap" }]));
assert!(!cfg.has_js_plugins(), "AutolinkHeadings transformer handles slug + anchor natively");
}
#[cfg(feature = "math")]
#[test]
fn remark_math_alone_with_native_skips_sidecar() {
let mut cfg = CompileConfig::default();
cfg.markdown_remark_plugins.push(json!("remark-math"));
cfg.markdown_rehype_plugins.push(json!(["rehype-katex", { "errorColor": "red" }]));
assert!(!cfg.has_js_plugins(), "native math should absorb remark-math + rehype-katex");
}
#[cfg(feature = "emoji")]
#[test]
fn remark_emoji_alone_with_native_skips_sidecar() {
let mut cfg = CompileConfig::default();
cfg.markdown_remark_plugins.push(json!("remark-emoji"));
assert!(!cfg.has_js_plugins(), "native emoji should absorb remark-emoji");
}
#[cfg(feature = "pretty-code")]
#[test]
fn rehype_pretty_code_alone_with_native_skips_sidecar() {
let mut cfg = CompileConfig::default();
cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
cfg.mdx_rehype_plugins.push(json!(["rehype-pretty-code", { "theme": "github-dark" }]));
cfg.mdx_rehype_plugins.push(json!("shiki"));
assert!(!cfg.has_js_plugins(), "native should absorb rehype-pretty-code/shiki");
}
#[cfg(feature = "pretty-code")]
#[test]
fn other_rehype_plugin_still_triggers_sidecar_even_with_native() {
let mut cfg = CompileConfig::default();
cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
cfg.markdown_rehype_plugins.push(json!("rehype-external-links"));
assert!(cfg.has_js_plugins());
}
#[cfg(not(feature = "pretty-code"))]
#[test]
fn pretty_code_feature_off_means_rehype_pretty_code_routes_to_sidecar() {
let mut cfg = CompileConfig::default();
cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
assert!(cfg.has_js_plugins());
}
#[test]
fn compile_does_not_ship_raw_script_in_mdx_body() {
let mut diag = DiagnosticEngine::<Code>::new();
let out = Compiler::compile("<script>alert(1)</script>\n", &mut diag);
assert!(
!out.body.contains("dangerouslySetInnerHTML"),
"raw HTML leaked as live dangerouslySetInnerHTML in MDX body:\n{}",
out.body
);
assert!(!out.body.contains("<script>"), "raw <script> leaked into MDX body:\n{}", out.body);
assert!(!out.html.contains("<script>"), "raw <script> leaked into HTML output:\n{}", out.html);
}
#[test]
fn compile_with_allow_dangerous_html_emits_raw_html() {
let mut diag = DiagnosticEngine::<Code>::new();
let cfg = CompileConfig { allow_dangerous_html: true, ..CompileConfig::default() };
let out = Compiler::compile_with_pipeline("<div>raw</div>\n", Path::new("."), &cfg, &mut diag);
assert!(out.body.contains("dangerouslySetInnerHTML"), "opt-in raw HTML not emitted in MDX body:\n{}", out.body);
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct CompileOutput {
pub frontmatter: serde_json::Value,
pub frontmatter_raw: String,
pub content: String,
pub html: String,
pub body: String,
pub excerpt: String,
pub metadata: Metadata,
pub toc: Vec<TocItem>,
pub imports: Vec<String>,
pub exports: Vec<String>,
}