Skip to main content

dmc/engine/
compile.rs

1use std::{path::Path, sync::Arc};
2
3use dmc_codegen::{HtmlEmitter, MdxBodyEmitter, Walker};
4use dmc_diagnostic::{
5  Code,
6  metadata::{Origin, SourceMeta},
7};
8use dmc_lexer::Lexer;
9use dmc_parser::{Parser, ast::Document};
10use dmc_transform::{CopyLinkedFilesOptions, MathEngine, MermaidOptions, PipelineConfig, PrettyCodeOptions};
11use duck_diagnostic::DiagnosticEngine;
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14
15use crate::engine::accumulator::Accumulator;
16
17#[derive(Debug, Deserialize, Serialize, Clone)]
18#[serde(default)]
19pub struct CompileConfig {
20  pub markdown_gfm: bool,
21  pub emit_html: bool,
22  pub emit_body: bool,
23  pub mdx_minify: bool,
24  pub mdx_output_format: Option<String>,
25  pub markdown_remark_plugins: Vec<Value>,
26  pub markdown_rehype_plugins: Vec<Value>,
27  pub mdx_remark_plugins: Vec<Value>,
28  pub mdx_rehype_plugins: Vec<Value>,
29  pub copy_linked_files: bool,
30  pub output_assets: Option<String>,
31  pub output_base: Option<String>,
32  /// `None` = bundled defaults (Catppuccin Latte/Mocha, CSS-vars output).
33  pub pretty_code: Option<PrettyCodeOptions>,
34  /// `None` = bundled defaults (light+dark, `htmlLabels:false`, responsive SVG).
35  pub mermaid: Option<MermaidOptions>,
36  /// `None` = KaTeX (rehype-katex parity); `Some(Mathml)` = pulldown-latex (fast).
37  pub math_engine: Option<MathEngine>,
38  /// Force every listed JS plugin to the sidecar; drop every native transformer.
39  pub force_sidecar: bool,
40  /// SEC-010: raw embedded HTML passthrough (CommonMark "unsafe" mode).
41  /// When `false` (the default), attacker-supplied `<script>` / `<iframe>`
42  /// / `on*=` markup is NOT emitted verbatim into the HTML or MDX output —
43  /// block-level raw HTML is dropped and inline raw HTML is escaped to
44  /// visible text. Set `true` only when the caller fully trusts the input.
45  pub allow_dangerous_html: bool,
46  /// Per-plugin sidecar override. Recognised entries: "remark-gfm",
47  /// "remark-math", "remark-emoji", "rehype-pretty-code", "shiki",
48  /// "rehype-katex", "rehype-mathjax", "rehype-slug",
49  /// "rehype-autolink-headings".
50  pub prefer_sidecar: Vec<String>,
51}
52
53impl Default for CompileConfig {
54  fn default() -> Self {
55    Self {
56      markdown_gfm: true,
57      emit_html: true,
58      emit_body: true,
59      mdx_output_format: None,
60      mdx_minify: false,
61      markdown_remark_plugins: vec![],
62      markdown_rehype_plugins: vec![],
63      mdx_remark_plugins: vec![],
64      mdx_rehype_plugins: vec![],
65      copy_linked_files: false,
66      output_assets: None,
67      output_base: None,
68      pretty_code: None,
69      mermaid: None,
70      math_engine: None,
71      force_sidecar: false,
72      prefer_sidecar: vec![],
73      allow_dangerous_html: false,
74    }
75  }
76}
77
78impl CompileConfig {
79  pub fn new() -> Self {
80    Self::default()
81  }
82
83  pub fn has_js_plugins(&self) -> bool {
84    !self.effective_markdown_remark_plugins().is_empty()
85      || !self.effective_mdx_remark_plugins().is_empty()
86      || !self.effective_markdown_rehype_plugins().is_empty()
87      || !self.effective_mdx_rehype_plugins().is_empty()
88  }
89
90  /// Drops JS plugins owned by an in-process transformer. When the
91  /// matching feature is off, the plugin stays in the list and the
92  /// sidecar runs it.
93  pub fn effective_markdown_remark_plugins(&self) -> Vec<Value> {
94    self.filter_native_owned_remark(&self.markdown_remark_plugins)
95  }
96
97  pub fn effective_mdx_remark_plugins(&self) -> Vec<Value> {
98    self.filter_native_owned_remark(&self.mdx_remark_plugins)
99  }
100
101  pub fn effective_markdown_rehype_plugins(&self) -> Vec<Value> {
102    self.filter_native_owned_rehype(&self.markdown_rehype_plugins)
103  }
104
105  pub fn effective_mdx_rehype_plugins(&self) -> Vec<Value> {
106    self.filter_native_owned_rehype(&self.mdx_rehype_plugins)
107  }
108
109  fn user_forces_sidecar(&self, name: &str) -> bool {
110    self.force_sidecar || self.prefer_sidecar.iter().any(|n| n == name)
111  }
112
113  fn filter_native_owned_remark(&self, plugins: &[Value]) -> Vec<Value> {
114    plugins
115      .iter()
116      .filter(|p| {
117        let Some(name) = plugin_name(p) else { return true };
118        if self.user_forces_sidecar(name) {
119          return true;
120        }
121        !is_native_owned_remark(p)
122      })
123      .cloned()
124      .collect()
125  }
126
127  fn filter_native_owned_rehype(&self, plugins: &[Value]) -> Vec<Value> {
128    plugins
129      .iter()
130      .filter(|p| {
131        let Some(name) = plugin_name(p) else { return true };
132        if self.user_forces_sidecar(name) {
133          return true;
134        }
135        !is_native_owned_rehype(p)
136      })
137      .cloned()
138      .collect()
139  }
140
141  /// Per-file config: native HTML off when sidecar will run.
142  pub fn for_render(&self) -> Self {
143    let mut c = self.clone();
144    c.emit_html = !self.has_js_plugins();
145    c
146  }
147
148  /// `path` resolves relative assets for `copy-linked-files`.
149  pub fn pipeline_config(&self, path: &Path) -> PipelineConfig {
150    let copy_linked_files = if self.copy_linked_files
151      && let (Some(assets), Some(public)) = (self.output_assets.as_ref(), self.output_base.as_ref())
152    {
153      Some(CopyLinkedFilesOptions {
154        source_dir: path.parent().unwrap_or(Path::new(".")).to_path_buf(),
155        assets_dir: assets.into(),
156        public_base: public.clone(),
157      })
158    } else {
159      None
160    };
161    // Drop the native transformer when the user prefers the JS plugin.
162    let prefers = |needles: &[&str]| -> bool {
163      self.force_sidecar || self.prefer_sidecar.iter().any(|n| needles.contains(&n.as_str()))
164    };
165    let drop_pretty_code = prefers(&["rehype-pretty-code", "shiki"]);
166    let drop_math = prefers(&["remark-math", "rehype-katex", "rehype-mathjax"]);
167    let drop_emoji = prefers(&["remark-emoji"]);
168    let drop_autolink_headings = prefers(&["rehype-slug", "rehype-autolink-headings"]);
169    let drop_gfm = prefers(&["remark-gfm"]);
170    let drop_mermaid = prefers(&["mermaid", "rehype-mermaid", "remark-mermaid"]);
171
172    PipelineConfig {
173      markdown_gfm: Some(if drop_gfm { false } else { self.markdown_gfm }),
174      pretty_code: if drop_pretty_code { None } else { self.pretty_code.clone() },
175      math_engine: if drop_math { None } else { self.math_engine },
176      copy_linked_files,
177      emoji: if drop_emoji { Some(false) } else { None },
178      autolink_headings: if drop_autolink_headings { Some(false) } else { None },
179      math: if drop_math { Some(false) } else { None },
180      pretty_code_enabled: if drop_pretty_code { Some(false) } else { None },
181      mermaid: if drop_mermaid { None } else { self.mermaid.clone() },
182      mermaid_enabled: if drop_mermaid { Some(false) } else { None },
183    }
184  }
185}
186
187/// Bare string or unified `[name, options]` array.
188fn plugin_name(plugin: &Value) -> Option<&str> {
189  match plugin {
190    Value::String(s) => Some(s.as_str()),
191    Value::Array(a) => a.first().and_then(Value::as_str),
192    _ => None,
193  }
194}
195
196#[allow(clippy::match_like_matches_macro)]
197fn is_native_owned_remark(plugin: &Value) -> bool {
198  let Some(name) = plugin_name(plugin) else { return false };
199  match name {
200    "remark-gfm" => true,
201    "remark-math" => cfg!(feature = "math"),
202    "remark-emoji" => cfg!(feature = "emoji"),
203    _ => false,
204  }
205}
206
207#[allow(clippy::match_like_matches_macro)]
208fn is_native_owned_rehype(plugin: &Value) -> bool {
209  let Some(name) = plugin_name(plugin) else { return false };
210  match name {
211    "rehype-pretty-code" | "shiki" => cfg!(feature = "pretty-code"),
212    "rehype-katex" | "rehype-mathjax" => cfg!(feature = "math"),
213    "rehype-slug" | "rehype-autolink-headings" => true,
214    _ => false,
215  }
216}
217
218pub struct Compiler;
219
220impl Compiler {
221  /// One-shot compile with default pipeline. Use
222  /// [`Self::compile_with_pipeline`] for real spans against a path.
223  pub fn compile(source: &str, diag_engine: &mut DiagnosticEngine<Code>) -> CompileOutput {
224    Self::compile_with_pipeline(source, Path::new("."), &CompileConfig::new(), diag_engine)
225  }
226
227  pub fn compile_with_pipeline(
228    source: &str,
229    path: &Path,
230    compile_cfg: &CompileConfig,
231    diag_engine: &mut DiagnosticEngine<Code>,
232  ) -> CompileOutput {
233    let meta = Arc::from(SourceMeta { path: Arc::from(path.display().to_string()), origin: Origin::File(path.into()) });
234    // Rewrite `$...$` / `$$...$$` to `<MathMl/>` so the parser does not
235    // treat `_` / `^` inside math as emphasis markers.
236    #[cfg(feature = "math")]
237    let preprocessed = dmc_transform::Math::preprocess_source(source);
238    #[cfg(feature = "math")]
239    let source: &str = &preprocessed;
240    let mut lexer = Lexer::new(source, meta.clone(), diag_engine);
241    let _ = lexer.scan_tokens();
242
243    let mut doc = {
244      let mut parser = Parser::new(lexer.tokens, meta.clone(), diag_engine);
245      parser.parse()
246    };
247
248    let pipeline_cfg = compile_cfg.pipeline_config(path);
249    let pipeline = dmc_transform::Pipeline::with_defaults_for(&pipeline_cfg);
250
251    pipeline.run(&mut doc, &meta, diag_engine);
252
253    Self::finalize(source, doc, compile_cfg, diag_engine)
254  }
255
256  /// Per-sink `DiagnosticEngine`s merge into the caller's engine after
257  /// the walk (avoids `RefCell` overhead on every sink emit).
258  fn finalize(
259    source: &str,
260    doc: Document,
261    compile_cfg: &CompileConfig,
262    diag_engine: &mut DiagnosticEngine<Code>,
263  ) -> CompileOutput {
264    let mut acc = Accumulator::new();
265    // SEC-010: propagate the caller's raw-HTML policy into both emitters
266    // so `compile()` / napi callers are safe-by-default.
267    let render_opts =
268      dmc_codegen::RenderOptions { allow_dangerous_html: compile_cfg.allow_dangerous_html, ..Default::default() };
269    let mut html_sink = if compile_cfg.emit_html { Some(HtmlEmitter::new_with_options(render_opts)) } else { None };
270    let mut body_sink = if compile_cfg.emit_body { Some(MdxBodyEmitter::new_with_options(render_opts)) } else { None };
271
272    let mut sinks: Vec<&mut dyn dmc_codegen::NodeSink> = Vec::with_capacity(3);
273    sinks.push(&mut acc);
274    if let Some(ref mut h) = html_sink {
275      sinks.push(h);
276    }
277    if let Some(ref mut b) = body_sink {
278      sinks.push(b);
279    }
280
281    Walker::new(&doc).walk(sinks.as_mut_slice());
282
283    let (html, body) = match (html_sink, body_sink) {
284      (Some(h), Some(b)) => {
285        let (s, hd) = h.into_parts();
286        let (m, bd) = b.into_parts();
287        diag_engine.extend(hd);
288        diag_engine.extend(bd);
289        (s, m)
290      },
291      (Some(h), None) => {
292        let (s, hd) = h.into_parts();
293        diag_engine.extend(hd);
294        (s, String::new())
295      },
296      (None, Some(b)) => {
297        let (m, bd) = b.into_parts();
298        diag_engine.extend(bd);
299        (String::new(), m)
300      },
301      (None, None) => (String::new(), String::new()),
302    };
303
304    acc.into_compile_output(source, html, body, compile_cfg)
305  }
306}
307
308/// `reading_time` in minutes (min 1).
309#[derive(Debug, Clone, Serialize, Deserialize, Default)]
310#[serde(rename_all = "camelCase")]
311pub struct Metadata {
312  pub reading_time: u32,
313  pub word_count: u32,
314}
315
316/// `url` is `#<heading-slug>`.
317#[derive(Debug, Clone, Serialize, Deserialize)]
318pub struct TocItem {
319  pub title: String,
320  pub url: String,
321  pub items: Vec<TocItem>,
322}
323
324#[cfg(test)]
325mod tests {
326  use super::*;
327  use serde_json::json;
328
329  #[test]
330  fn empty_plugin_lists_no_sidecar() {
331    let cfg = CompileConfig::default();
332    assert!(!cfg.has_js_plugins());
333  }
334
335  #[test]
336  fn arbitrary_remark_plugin_triggers_sidecar() {
337    let mut cfg = CompileConfig::default();
338    cfg.markdown_remark_plugins.push(json!("remark-frontmatter"));
339    assert!(cfg.has_js_plugins());
340  }
341
342  #[test]
343  fn remark_gfm_alone_skips_sidecar() {
344    let mut cfg = CompileConfig::default();
345    cfg.markdown_remark_plugins.push(json!("remark-gfm"));
346    assert!(!cfg.has_js_plugins(), "dmc parser handles GFM natively");
347  }
348
349  #[test]
350  fn rehype_slug_and_autolink_alone_skip_sidecar() {
351    let mut cfg = CompileConfig::default();
352    cfg.markdown_rehype_plugins.push(json!("rehype-slug"));
353    cfg.markdown_rehype_plugins.push(json!(["rehype-autolink-headings", { "behavior": "wrap" }]));
354    assert!(!cfg.has_js_plugins(), "AutolinkHeadings transformer handles slug + anchor natively");
355  }
356
357  #[cfg(feature = "math")]
358  #[test]
359  fn remark_math_alone_with_native_skips_sidecar() {
360    let mut cfg = CompileConfig::default();
361    cfg.markdown_remark_plugins.push(json!("remark-math"));
362    cfg.markdown_rehype_plugins.push(json!(["rehype-katex", { "errorColor": "red" }]));
363    assert!(!cfg.has_js_plugins(), "native math should absorb remark-math + rehype-katex");
364  }
365
366  #[cfg(feature = "emoji")]
367  #[test]
368  fn remark_emoji_alone_with_native_skips_sidecar() {
369    let mut cfg = CompileConfig::default();
370    cfg.markdown_remark_plugins.push(json!("remark-emoji"));
371    assert!(!cfg.has_js_plugins(), "native emoji should absorb remark-emoji");
372  }
373
374  #[cfg(feature = "pretty-code")]
375  #[test]
376  fn rehype_pretty_code_alone_with_native_skips_sidecar() {
377    let mut cfg = CompileConfig::default();
378    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
379    cfg.mdx_rehype_plugins.push(json!(["rehype-pretty-code", { "theme": "github-dark" }]));
380    cfg.mdx_rehype_plugins.push(json!("shiki"));
381    assert!(!cfg.has_js_plugins(), "native should absorb rehype-pretty-code/shiki");
382  }
383
384  #[cfg(feature = "pretty-code")]
385  #[test]
386  fn other_rehype_plugin_still_triggers_sidecar_even_with_native() {
387    let mut cfg = CompileConfig::default();
388    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
389    cfg.markdown_rehype_plugins.push(json!("rehype-external-links"));
390    assert!(cfg.has_js_plugins());
391  }
392
393  #[cfg(not(feature = "pretty-code"))]
394  #[test]
395  fn pretty_code_feature_off_means_rehype_pretty_code_routes_to_sidecar() {
396    let mut cfg = CompileConfig::default();
397    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
398    assert!(cfg.has_js_plugins());
399  }
400
401  /// SEC-010: `compile()` (and therefore napi `compile`/`build`) must be
402  /// safe-by-default — attacker `<script>` must not survive into the MDX
403  /// body as a live `dangerouslySetInnerHTML`, nor as a raw `<script>`.
404  #[test]
405  fn compile_does_not_ship_raw_script_in_mdx_body() {
406    let mut diag = DiagnosticEngine::<Code>::new();
407    let out = Compiler::compile("<script>alert(1)</script>\n", &mut diag);
408    assert!(
409      !out.body.contains("dangerouslySetInnerHTML"),
410      "raw HTML leaked as live dangerouslySetInnerHTML in MDX body:\n{}",
411      out.body
412    );
413    assert!(!out.body.contains("<script>"), "raw <script> leaked into MDX body:\n{}", out.body);
414    assert!(!out.html.contains("<script>"), "raw <script> leaked into HTML output:\n{}", out.html);
415  }
416
417  /// SEC-010: explicit opt-in restores raw-HTML passthrough.
418  #[test]
419  fn compile_with_allow_dangerous_html_emits_raw_html() {
420    let mut diag = DiagnosticEngine::<Code>::new();
421    let cfg = CompileConfig { allow_dangerous_html: true, ..CompileConfig::default() };
422    let out = Compiler::compile_with_pipeline("<div>raw</div>\n", Path::new("."), &cfg, &mut diag);
423    assert!(out.body.contains("dangerouslySetInnerHTML"), "opt-in raw HTML not emitted in MDX body:\n{}", out.body);
424  }
425}
426
427/// Compiled `.mdx` output. All fields always populated; serialised camelCase.
428#[derive(Debug, Clone, Serialize, Deserialize)]
429#[serde(rename_all = "camelCase")]
430pub struct CompileOutput {
431  pub frontmatter: serde_json::Value,
432  pub frontmatter_raw: String,
433  pub content: String,
434  pub html: String,
435  pub body: String,
436  pub excerpt: String,
437  pub metadata: Metadata,
438  pub toc: Vec<TocItem>,
439  pub imports: Vec<String>,
440  pub exports: Vec<String>,
441}