1use std::{path::Path, sync::Arc};
2
3use dmc_codegen::{HtmlEmitter, MdxBodyEmitter, Walker};
4use dmc_diagnostic::{
5 Code,
6 metadata::{Origin, SourceMeta},
7};
8use dmc_lexer::Lexer;
9use dmc_parser::{Parser, ast::Document};
10use dmc_transform::{CopyLinkedFilesOptions, MathEngine, MermaidOptions, PipelineConfig, PrettyCodeOptions};
11use duck_diagnostic::DiagnosticEngine;
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14
15use crate::engine::accumulator::Accumulator;
16
17#[derive(Debug, Deserialize, Serialize, Clone)]
18#[serde(default)]
19pub struct CompileConfig {
20 pub markdown_gfm: bool,
21 pub emit_html: bool,
22 pub emit_body: bool,
23 pub mdx_minify: bool,
24 pub mdx_output_format: Option<String>,
25 pub markdown_remark_plugins: Vec<Value>,
26 pub markdown_rehype_plugins: Vec<Value>,
27 pub mdx_remark_plugins: Vec<Value>,
28 pub mdx_rehype_plugins: Vec<Value>,
29 pub copy_linked_files: bool,
30 pub output_assets: Option<String>,
31 pub output_base: Option<String>,
32 pub pretty_code: Option<PrettyCodeOptions>,
34 pub mermaid: Option<MermaidOptions>,
36 pub math_engine: Option<MathEngine>,
38 pub force_sidecar: bool,
40 pub allow_dangerous_html: bool,
46 pub prefer_sidecar: Vec<String>,
51}
52
53impl Default for CompileConfig {
54 fn default() -> Self {
55 Self {
56 markdown_gfm: true,
57 emit_html: true,
58 emit_body: true,
59 mdx_output_format: None,
60 mdx_minify: false,
61 markdown_remark_plugins: vec![],
62 markdown_rehype_plugins: vec![],
63 mdx_remark_plugins: vec![],
64 mdx_rehype_plugins: vec![],
65 copy_linked_files: false,
66 output_assets: None,
67 output_base: None,
68 pretty_code: None,
69 mermaid: None,
70 math_engine: None,
71 force_sidecar: false,
72 prefer_sidecar: vec![],
73 allow_dangerous_html: false,
74 }
75 }
76}
77
78impl CompileConfig {
79 pub fn new() -> Self {
80 Self::default()
81 }
82
83 pub fn has_js_plugins(&self) -> bool {
84 !self.effective_markdown_remark_plugins().is_empty()
85 || !self.effective_mdx_remark_plugins().is_empty()
86 || !self.effective_markdown_rehype_plugins().is_empty()
87 || !self.effective_mdx_rehype_plugins().is_empty()
88 }
89
90 pub fn effective_markdown_remark_plugins(&self) -> Vec<Value> {
94 self.filter_native_owned_remark(&self.markdown_remark_plugins)
95 }
96
97 pub fn effective_mdx_remark_plugins(&self) -> Vec<Value> {
98 self.filter_native_owned_remark(&self.mdx_remark_plugins)
99 }
100
101 pub fn effective_markdown_rehype_plugins(&self) -> Vec<Value> {
102 self.filter_native_owned_rehype(&self.markdown_rehype_plugins)
103 }
104
105 pub fn effective_mdx_rehype_plugins(&self) -> Vec<Value> {
106 self.filter_native_owned_rehype(&self.mdx_rehype_plugins)
107 }
108
109 fn user_forces_sidecar(&self, name: &str) -> bool {
110 self.force_sidecar || self.prefer_sidecar.iter().any(|n| n == name)
111 }
112
113 fn filter_native_owned_remark(&self, plugins: &[Value]) -> Vec<Value> {
114 plugins
115 .iter()
116 .filter(|p| {
117 let Some(name) = plugin_name(p) else { return true };
118 if self.user_forces_sidecar(name) {
119 return true;
120 }
121 !is_native_owned_remark(p)
122 })
123 .cloned()
124 .collect()
125 }
126
127 fn filter_native_owned_rehype(&self, plugins: &[Value]) -> Vec<Value> {
128 plugins
129 .iter()
130 .filter(|p| {
131 let Some(name) = plugin_name(p) else { return true };
132 if self.user_forces_sidecar(name) {
133 return true;
134 }
135 !is_native_owned_rehype(p)
136 })
137 .cloned()
138 .collect()
139 }
140
141 pub fn for_render(&self) -> Self {
143 let mut c = self.clone();
144 c.emit_html = !self.has_js_plugins();
145 c
146 }
147
148 pub fn pipeline_config(&self, path: &Path) -> PipelineConfig {
150 let copy_linked_files = if self.copy_linked_files
151 && let (Some(assets), Some(public)) = (self.output_assets.as_ref(), self.output_base.as_ref())
152 {
153 Some(CopyLinkedFilesOptions {
154 source_dir: path.parent().unwrap_or(Path::new(".")).to_path_buf(),
155 assets_dir: assets.into(),
156 public_base: public.clone(),
157 })
158 } else {
159 None
160 };
161 let prefers = |needles: &[&str]| -> bool {
163 self.force_sidecar || self.prefer_sidecar.iter().any(|n| needles.contains(&n.as_str()))
164 };
165 let drop_pretty_code = prefers(&["rehype-pretty-code", "shiki"]);
166 let drop_math = prefers(&["remark-math", "rehype-katex", "rehype-mathjax"]);
167 let drop_emoji = prefers(&["remark-emoji"]);
168 let drop_autolink_headings = prefers(&["rehype-slug", "rehype-autolink-headings"]);
169 let drop_gfm = prefers(&["remark-gfm"]);
170 let drop_mermaid = prefers(&["mermaid", "rehype-mermaid", "remark-mermaid"]);
171
172 PipelineConfig {
173 markdown_gfm: Some(if drop_gfm { false } else { self.markdown_gfm }),
174 pretty_code: if drop_pretty_code { None } else { self.pretty_code.clone() },
175 math_engine: if drop_math { None } else { self.math_engine },
176 copy_linked_files,
177 emoji: if drop_emoji { Some(false) } else { None },
178 autolink_headings: if drop_autolink_headings { Some(false) } else { None },
179 math: if drop_math { Some(false) } else { None },
180 pretty_code_enabled: if drop_pretty_code { Some(false) } else { None },
181 mermaid: if drop_mermaid { None } else { self.mermaid.clone() },
182 mermaid_enabled: if drop_mermaid { Some(false) } else { None },
183 }
184 }
185}
186
187fn plugin_name(plugin: &Value) -> Option<&str> {
189 match plugin {
190 Value::String(s) => Some(s.as_str()),
191 Value::Array(a) => a.first().and_then(Value::as_str),
192 _ => None,
193 }
194}
195
196#[allow(clippy::match_like_matches_macro)]
197fn is_native_owned_remark(plugin: &Value) -> bool {
198 let Some(name) = plugin_name(plugin) else { return false };
199 match name {
200 "remark-gfm" => true,
201 "remark-math" => cfg!(feature = "math"),
202 "remark-emoji" => cfg!(feature = "emoji"),
203 _ => false,
204 }
205}
206
207#[allow(clippy::match_like_matches_macro)]
208fn is_native_owned_rehype(plugin: &Value) -> bool {
209 let Some(name) = plugin_name(plugin) else { return false };
210 match name {
211 "rehype-pretty-code" | "shiki" => cfg!(feature = "pretty-code"),
212 "rehype-katex" | "rehype-mathjax" => cfg!(feature = "math"),
213 "rehype-slug" | "rehype-autolink-headings" => true,
214 _ => false,
215 }
216}
217
218pub struct Compiler;
219
220impl Compiler {
221 pub fn compile(source: &str, diag_engine: &mut DiagnosticEngine<Code>) -> CompileOutput {
224 Self::compile_with_pipeline(source, Path::new("."), &CompileConfig::new(), diag_engine)
225 }
226
227 pub fn compile_with_pipeline(
228 source: &str,
229 path: &Path,
230 compile_cfg: &CompileConfig,
231 diag_engine: &mut DiagnosticEngine<Code>,
232 ) -> CompileOutput {
233 let meta = Arc::from(SourceMeta { path: Arc::from(path.display().to_string()), origin: Origin::File(path.into()) });
234 #[cfg(feature = "math")]
237 let preprocessed = dmc_transform::Math::preprocess_source(source);
238 #[cfg(feature = "math")]
239 let source: &str = &preprocessed;
240 let mut lexer = Lexer::new(source, meta.clone(), diag_engine);
241 let _ = lexer.scan_tokens();
242
243 let mut doc = {
244 let mut parser = Parser::new(lexer.tokens, meta.clone(), diag_engine);
245 parser.parse()
246 };
247
248 let pipeline_cfg = compile_cfg.pipeline_config(path);
249 let pipeline = dmc_transform::Pipeline::with_defaults_for(&pipeline_cfg);
250
251 pipeline.run(&mut doc, &meta, diag_engine);
252
253 Self::finalize(source, doc, compile_cfg, diag_engine)
254 }
255
256 fn finalize(
259 source: &str,
260 doc: Document,
261 compile_cfg: &CompileConfig,
262 diag_engine: &mut DiagnosticEngine<Code>,
263 ) -> CompileOutput {
264 let mut acc = Accumulator::new();
265 let render_opts =
268 dmc_codegen::RenderOptions { allow_dangerous_html: compile_cfg.allow_dangerous_html, ..Default::default() };
269 let mut html_sink = if compile_cfg.emit_html { Some(HtmlEmitter::new_with_options(render_opts)) } else { None };
270 let mut body_sink = if compile_cfg.emit_body { Some(MdxBodyEmitter::new_with_options(render_opts)) } else { None };
271
272 let mut sinks: Vec<&mut dyn dmc_codegen::NodeSink> = Vec::with_capacity(3);
273 sinks.push(&mut acc);
274 if let Some(ref mut h) = html_sink {
275 sinks.push(h);
276 }
277 if let Some(ref mut b) = body_sink {
278 sinks.push(b);
279 }
280
281 Walker::new(&doc).walk(sinks.as_mut_slice());
282
283 let (html, body) = match (html_sink, body_sink) {
284 (Some(h), Some(b)) => {
285 let (s, hd) = h.into_parts();
286 let (m, bd) = b.into_parts();
287 diag_engine.extend(hd);
288 diag_engine.extend(bd);
289 (s, m)
290 },
291 (Some(h), None) => {
292 let (s, hd) = h.into_parts();
293 diag_engine.extend(hd);
294 (s, String::new())
295 },
296 (None, Some(b)) => {
297 let (m, bd) = b.into_parts();
298 diag_engine.extend(bd);
299 (String::new(), m)
300 },
301 (None, None) => (String::new(), String::new()),
302 };
303
304 acc.into_compile_output(source, html, body, compile_cfg)
305 }
306}
307
308#[derive(Debug, Clone, Serialize, Deserialize, Default)]
310#[serde(rename_all = "camelCase")]
311pub struct Metadata {
312 pub reading_time: u32,
313 pub word_count: u32,
314}
315
316#[derive(Debug, Clone, Serialize, Deserialize)]
318pub struct TocItem {
319 pub title: String,
320 pub url: String,
321 pub items: Vec<TocItem>,
322}
323
324#[cfg(test)]
325mod tests {
326 use super::*;
327 use serde_json::json;
328
329 #[test]
330 fn empty_plugin_lists_no_sidecar() {
331 let cfg = CompileConfig::default();
332 assert!(!cfg.has_js_plugins());
333 }
334
335 #[test]
336 fn arbitrary_remark_plugin_triggers_sidecar() {
337 let mut cfg = CompileConfig::default();
338 cfg.markdown_remark_plugins.push(json!("remark-frontmatter"));
339 assert!(cfg.has_js_plugins());
340 }
341
342 #[test]
343 fn remark_gfm_alone_skips_sidecar() {
344 let mut cfg = CompileConfig::default();
345 cfg.markdown_remark_plugins.push(json!("remark-gfm"));
346 assert!(!cfg.has_js_plugins(), "dmc parser handles GFM natively");
347 }
348
349 #[test]
350 fn rehype_slug_and_autolink_alone_skip_sidecar() {
351 let mut cfg = CompileConfig::default();
352 cfg.markdown_rehype_plugins.push(json!("rehype-slug"));
353 cfg.markdown_rehype_plugins.push(json!(["rehype-autolink-headings", { "behavior": "wrap" }]));
354 assert!(!cfg.has_js_plugins(), "AutolinkHeadings transformer handles slug + anchor natively");
355 }
356
357 #[cfg(feature = "math")]
358 #[test]
359 fn remark_math_alone_with_native_skips_sidecar() {
360 let mut cfg = CompileConfig::default();
361 cfg.markdown_remark_plugins.push(json!("remark-math"));
362 cfg.markdown_rehype_plugins.push(json!(["rehype-katex", { "errorColor": "red" }]));
363 assert!(!cfg.has_js_plugins(), "native math should absorb remark-math + rehype-katex");
364 }
365
366 #[cfg(feature = "emoji")]
367 #[test]
368 fn remark_emoji_alone_with_native_skips_sidecar() {
369 let mut cfg = CompileConfig::default();
370 cfg.markdown_remark_plugins.push(json!("remark-emoji"));
371 assert!(!cfg.has_js_plugins(), "native emoji should absorb remark-emoji");
372 }
373
374 #[cfg(feature = "pretty-code")]
375 #[test]
376 fn rehype_pretty_code_alone_with_native_skips_sidecar() {
377 let mut cfg = CompileConfig::default();
378 cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
379 cfg.mdx_rehype_plugins.push(json!(["rehype-pretty-code", { "theme": "github-dark" }]));
380 cfg.mdx_rehype_plugins.push(json!("shiki"));
381 assert!(!cfg.has_js_plugins(), "native should absorb rehype-pretty-code/shiki");
382 }
383
384 #[cfg(feature = "pretty-code")]
385 #[test]
386 fn other_rehype_plugin_still_triggers_sidecar_even_with_native() {
387 let mut cfg = CompileConfig::default();
388 cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
389 cfg.markdown_rehype_plugins.push(json!("rehype-external-links"));
390 assert!(cfg.has_js_plugins());
391 }
392
393 #[cfg(not(feature = "pretty-code"))]
394 #[test]
395 fn pretty_code_feature_off_means_rehype_pretty_code_routes_to_sidecar() {
396 let mut cfg = CompileConfig::default();
397 cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
398 assert!(cfg.has_js_plugins());
399 }
400
401 #[test]
405 fn compile_does_not_ship_raw_script_in_mdx_body() {
406 let mut diag = DiagnosticEngine::<Code>::new();
407 let out = Compiler::compile("<script>alert(1)</script>\n", &mut diag);
408 assert!(
409 !out.body.contains("dangerouslySetInnerHTML"),
410 "raw HTML leaked as live dangerouslySetInnerHTML in MDX body:\n{}",
411 out.body
412 );
413 assert!(!out.body.contains("<script>"), "raw <script> leaked into MDX body:\n{}", out.body);
414 assert!(!out.html.contains("<script>"), "raw <script> leaked into HTML output:\n{}", out.html);
415 }
416
417 #[test]
419 fn compile_with_allow_dangerous_html_emits_raw_html() {
420 let mut diag = DiagnosticEngine::<Code>::new();
421 let cfg = CompileConfig { allow_dangerous_html: true, ..CompileConfig::default() };
422 let out = Compiler::compile_with_pipeline("<div>raw</div>\n", Path::new("."), &cfg, &mut diag);
423 assert!(out.body.contains("dangerouslySetInnerHTML"), "opt-in raw HTML not emitted in MDX body:\n{}", out.body);
424 }
425}
426
427#[derive(Debug, Clone, Serialize, Deserialize)]
429#[serde(rename_all = "camelCase")]
430pub struct CompileOutput {
431 pub frontmatter: serde_json::Value,
432 pub frontmatter_raw: String,
433 pub content: String,
434 pub html: String,
435 pub body: String,
436 pub excerpt: String,
437 pub metadata: Metadata,
438 pub toc: Vec<TocItem>,
439 pub imports: Vec<String>,
440 pub exports: Vec<String>,
441}