Skip to main content

dmc/engine/
compile.rs

1use std::{path::Path, sync::Arc};
2
3use dmc_codegen::{HtmlEmitter, MdxBodyEmitter, Walker};
4use dmc_diagnostic::{
5  Code,
6  metadata::{Origin, SourceMeta},
7};
8use dmc_lexer::Lexer;
9use dmc_parser::{Parser, ast::Document};
10use dmc_transform::{CopyLinkedFilesOptions, MathEngine, MermaidOptions, PipelineConfig, PrettyCodeOptions};
11use duck_diagnostic::DiagnosticEngine;
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14
15use crate::engine::accumulator::Accumulator;
16
17#[derive(Debug, Deserialize, Serialize, Clone)]
18#[serde(default)]
19pub struct CompileConfig {
20  pub markdown_gfm: bool,
21  pub emit_html: bool,
22  pub emit_body: bool,
23  pub mdx_minify: bool,
24  pub mdx_output_format: Option<String>,
25  pub markdown_remark_plugins: Vec<Value>,
26  pub markdown_rehype_plugins: Vec<Value>,
27  pub mdx_remark_plugins: Vec<Value>,
28  pub mdx_rehype_plugins: Vec<Value>,
29  pub copy_linked_files: bool,
30  pub output_assets: Option<String>,
31  pub output_base: Option<String>,
32  /// Pretty-code highlighter config. `None` = bundled defaults
33  /// (Catppuccin Latte/Mocha pair, dark primary, multi-mode CSS-vars
34  /// output). `Some` = explicit theme spec.
35  pub pretty_code: Option<PrettyCodeOptions>,
36  /// Mermaid render config. `None` = bundled defaults (light + dark
37  /// theme pair, `htmlLabels:false`, responsive SVG, centered labels).
38  /// `Some(MermaidOptions)` overrides theme set, mermaid initialize
39  /// config, post-process flags, etc.
40  pub mermaid: Option<MermaidOptions>,
41  /// LaTeX engine for `$...$` / `$$...$$`. `None` = KaTeX (slow, exact
42  /// rehype-katex parity). `Some(MathEngine::Mathml)` = pulldown-latex
43  /// MathML (fast, plainer visuals).
44  pub math_engine: Option<MathEngine>,
45  /// Global override: when `true`, no plugin is treated as native-owned,
46  /// every listed JS plugin runs in the sidecar, every native transformer
47  /// is dropped from the pipeline.
48  pub force_sidecar: bool,
49  /// Per-plugin override: names in this list are *not* stripped from the
50  /// sidecar payload, and the matching native transformer is dropped from
51  /// the pipeline. Use to keep one specific JS plugin and let dmc handle
52  /// everything else natively.
53  ///
54  /// Recognised entries:
55  ///   "remark-gfm", "remark-math", "remark-emoji",
56  ///   "rehype-pretty-code", "shiki",
57  ///   "rehype-katex", "rehype-mathjax",
58  ///   "rehype-slug", "rehype-autolink-headings"
59  pub prefer_sidecar: Vec<String>,
60}
61
62impl Default for CompileConfig {
63  fn default() -> Self {
64    Self {
65      markdown_gfm: true,
66      emit_html: true,
67      emit_body: true,
68      mdx_output_format: None,
69      mdx_minify: false,
70      markdown_remark_plugins: vec![],
71      markdown_rehype_plugins: vec![],
72      mdx_remark_plugins: vec![],
73      mdx_rehype_plugins: vec![],
74      copy_linked_files: false,
75      output_assets: None,
76      output_base: None,
77      pretty_code: None,
78      mermaid: None,
79      math_engine: None,
80      force_sidecar: false,
81      prefer_sidecar: vec![],
82    }
83  }
84}
85
86impl CompileConfig {
87  pub fn new() -> Self {
88    Self::default()
89  }
90
91  pub fn has_js_plugins(&self) -> bool {
92    !self.effective_markdown_remark_plugins().is_empty()
93      || !self.effective_mdx_remark_plugins().is_empty()
94      || !self.effective_markdown_rehype_plugins().is_empty()
95      || !self.effective_mdx_rehype_plugins().is_empty()
96  }
97
98  /// Plugin lists after stripping every JS plugin whose work is now done
99  /// by an in-process transformer (pretty-code/shiki, math, emoji). Used
100  /// both for the sidecar gate and for the request payload so the sidecar
101  /// never duplicates work. When the matching feature is off, that
102  /// plugin's name is left in the list and the sidecar runs it.
103  pub fn effective_markdown_remark_plugins(&self) -> Vec<Value> {
104    self.filter_native_owned_remark(&self.markdown_remark_plugins)
105  }
106
107  pub fn effective_mdx_remark_plugins(&self) -> Vec<Value> {
108    self.filter_native_owned_remark(&self.mdx_remark_plugins)
109  }
110
111  pub fn effective_markdown_rehype_plugins(&self) -> Vec<Value> {
112    self.filter_native_owned_rehype(&self.markdown_rehype_plugins)
113  }
114
115  pub fn effective_mdx_rehype_plugins(&self) -> Vec<Value> {
116    self.filter_native_owned_rehype(&self.mdx_rehype_plugins)
117  }
118
119  /// `true` when the user wants the sidecar to handle this specific
120  /// plugin (either via `prefer_sidecar` per-plugin list or via the
121  /// global `force_sidecar` flag).
122  fn user_forces_sidecar(&self, name: &str) -> bool {
123    self.force_sidecar || self.prefer_sidecar.iter().any(|n| n == name)
124  }
125
126  fn filter_native_owned_remark(&self, plugins: &[Value]) -> Vec<Value> {
127    plugins
128      .iter()
129      .filter(|p| {
130        let Some(name) = plugin_name(p) else { return true };
131        if self.user_forces_sidecar(name) {
132          return true;
133        }
134        !is_native_owned_remark(p)
135      })
136      .cloned()
137      .collect()
138  }
139
140  fn filter_native_owned_rehype(&self, plugins: &[Value]) -> Vec<Value> {
141    plugins
142      .iter()
143      .filter(|p| {
144        let Some(name) = plugin_name(p) else { return true };
145        if self.user_forces_sidecar(name) {
146          return true;
147        }
148        !is_native_owned_rehype(p)
149      })
150      .cloned()
151      .collect()
152  }
153
154  /// Per-file compile config: turns off native HTML when sidecar will run.
155  pub fn for_render(&self) -> Self {
156    let mut c = self.clone();
157    c.emit_html = !self.has_js_plugins();
158    c
159  }
160
161  /// Build the [`PipelineConfig`] consumed by
162  /// [`dmc_transform::Pipeline::with_defaults_for`]. `path` is the compiled
163  /// file's path,
164  /// used to resolve relative asset paths in the `copy-linked-files`
165  /// transformer.
166  pub fn pipeline_config(&self, path: &Path) -> PipelineConfig {
167    let copy_linked_files = if self.copy_linked_files
168      && let (Some(assets), Some(public)) = (self.output_assets.as_ref(), self.output_base.as_ref())
169    {
170      Some(CopyLinkedFilesOptions {
171        source_dir: path.parent().unwrap_or(Path::new(".")).to_path_buf(),
172        assets_dir: assets.into(),
173        public_base: public.clone(),
174      })
175    } else {
176      None
177    };
178    // Drop the native transformer when the user prefers the JS plugin
179    // for that role. Each `drop_*` flag flips the matching opt-out
180    // field in `PipelineConfig` so the transformer is not pushed.
181    let prefers = |needles: &[&str]| -> bool {
182      self.force_sidecar || self.prefer_sidecar.iter().any(|n| needles.contains(&n.as_str()))
183    };
184    let drop_pretty_code = prefers(&["rehype-pretty-code", "shiki"]);
185    let drop_math = prefers(&["remark-math", "rehype-katex", "rehype-mathjax"]);
186    let drop_emoji = prefers(&["remark-emoji"]);
187    let drop_autolink_headings = prefers(&["rehype-slug", "rehype-autolink-headings"]);
188    let drop_gfm = prefers(&["remark-gfm"]);
189    let drop_mermaid = prefers(&["mermaid", "rehype-mermaid", "remark-mermaid"]);
190
191    PipelineConfig {
192      markdown_gfm: Some(if drop_gfm { false } else { self.markdown_gfm }),
193      pretty_code: if drop_pretty_code { None } else { self.pretty_code.clone() },
194      math_engine: if drop_math { None } else { self.math_engine },
195      copy_linked_files,
196      emoji: if drop_emoji { Some(false) } else { None },
197      autolink_headings: if drop_autolink_headings { Some(false) } else { None },
198      math: if drop_math { Some(false) } else { None },
199      pretty_code_enabled: if drop_pretty_code { Some(false) } else { None },
200      mermaid: if drop_mermaid { None } else { self.mermaid.clone() },
201      mermaid_enabled: if drop_mermaid { Some(false) } else { None },
202    }
203  }
204}
205
206/// Extract the plugin name from either the bare string form
207/// (`"rehype-pretty-code"`) or the `[name, options]` array form used by
208/// unified-style plugin configs.
209fn plugin_name(plugin: &Value) -> Option<&str> {
210  match plugin {
211    Value::String(s) => Some(s.as_str()),
212    Value::Array(a) => a.first().and_then(Value::as_str),
213    _ => None,
214  }
215}
216
217/// `true` when `plugin` is a remark-side plugin whose work an in-process
218/// transformer now does. Stripped from the sidecar payload so the JS
219/// plugin chain does not redo native work.
220fn is_native_owned_remark(plugin: &Value) -> bool {
221  let Some(name) = plugin_name(plugin) else { return false };
222  match name {
223    // GFM tables, strikethrough, autolinks, task lists are handled by
224    // the dmc parser; remark-gfm in the sidecar is redundant.
225    "remark-gfm" => true,
226    "remark-math" => cfg!(feature = "math"),
227    "remark-emoji" => cfg!(feature = "emoji"),
228    _ => false,
229  }
230}
231
232/// Same for rehype-side plugins.
233fn is_native_owned_rehype(plugin: &Value) -> bool {
234  let Some(name) = plugin_name(plugin) else { return false };
235  match name {
236    "rehype-pretty-code" | "shiki" => cfg!(feature = "pretty-code"),
237    "rehype-katex" | "rehype-mathjax" => cfg!(feature = "math"),
238    // Heading slugs + anchor links handled by the AutolinkHeadings
239    // transformer in `Pipeline::with_defaults`.
240    "rehype-slug" | "rehype-autolink-headings" => true,
241    _ => false,
242  }
243}
244
245pub struct Compiler;
246
247impl Compiler {
248  /// One-shot compile of `source` with the default pipeline. Use
249  /// [`Self::compile_with_pipeline`] for file-aware compilation with real
250  /// spans.
251  pub fn compile(source: &str, diag_engine: &mut DiagnosticEngine<Code>) -> CompileOutput {
252    // FIX:
253    Self::compile_with_pipeline(source, Path::new("."), &CompileConfig::new(), diag_engine)
254  }
255
256  /// Like [`Self::compile`] with a caller-supplied pipeline + path for spans.
257  pub fn compile_with_pipeline(
258    source: &str,
259    path: &Path,
260    compile_cfg: &CompileConfig,
261    diag_engine: &mut DiagnosticEngine<Code>,
262  ) -> CompileOutput {
263    // Each layer holds its own DiagnosticEngine, mirroring the Lexer pattern.
264    let meta = Arc::from(SourceMeta { path: Arc::from(path.display().to_string()), origin: Origin::File(path.into()) });
265    // Source-level math: rewrite `$...$` / `$$...$$` to `<MathMl/>` JSX
266    // so the parser does not interpret `_` or `^` inside math as Markdown
267    // emphasis markers.
268    #[cfg(feature = "math")]
269    let preprocessed = dmc_transform::Math::preprocess_source(source);
270    #[cfg(feature = "math")]
271    let source: &str = &preprocessed;
272    let mut lexer = Lexer::new(source, meta.clone(), diag_engine);
273    let _ = lexer.scan_tokens();
274
275    let mut doc = {
276      let mut parser = Parser::new(lexer.tokens, meta.clone(), diag_engine);
277      parser.parse()
278    };
279
280    let pipeline_cfg = compile_cfg.pipeline_config(path);
281    let pipeline = dmc_transform::Pipeline::with_defaults_for(&pipeline_cfg);
282
283    pipeline.run(&mut doc, &meta, diag_engine);
284
285    Self::finalize(source, doc, compile_cfg, diag_engine)
286  }
287
288  /// Pull frontmatter + imports/exports, render HTML + MDX body, derive
289  /// excerpt / metadata / TOC, pack into a `CompileOutput`. Each sink
290  /// owns a private `DiagnosticEngine` during the walk; we merge them
291  /// into the caller's `diag_engine` after the walk completes (avoids
292  /// `RefCell` overhead on every sink emit).
293  fn finalize(
294    source: &str,
295    doc: Document,
296    compile_cfg: &CompileConfig,
297    diag_engine: &mut DiagnosticEngine<Code>,
298  ) -> CompileOutput {
299    let mut acc = Accumulator::new();
300    let mut html_sink = if compile_cfg.emit_html { Some(HtmlEmitter::new()) } else { None };
301    let mut body_sink = if compile_cfg.emit_body { Some(MdxBodyEmitter::new()) } else { None };
302
303    let mut sinks: Vec<&mut dyn dmc_codegen::NodeSink> = Vec::with_capacity(3);
304    sinks.push(&mut acc);
305    if let Some(ref mut h) = html_sink {
306      sinks.push(h);
307    }
308    if let Some(ref mut b) = body_sink {
309      sinks.push(b);
310    }
311
312    Walker::new(&doc).walk(sinks.as_mut_slice());
313
314    let (html, body) = match (html_sink, body_sink) {
315      (Some(h), Some(b)) => {
316        let (s, hd) = h.into_parts();
317        let (m, bd) = b.into_parts();
318        diag_engine.extend(hd);
319        diag_engine.extend(bd);
320        (s, m)
321      },
322      (Some(h), None) => {
323        let (s, hd) = h.into_parts();
324        diag_engine.extend(hd);
325        (s, String::new())
326      },
327      (None, Some(b)) => {
328        let (m, bd) = b.into_parts();
329        diag_engine.extend(bd);
330        (String::new(), m)
331      },
332      (None, None) => (String::new(), String::new()),
333    };
334
335    acc.into_compile_output(source, html, body, compile_cfg)
336  }
337}
338
339/// Reading-time + word-count from plain text. `reading_time` in minutes,
340/// ceil, min 1.
341#[derive(Debug, Clone, Serialize, Deserialize, Default)]
342#[serde(rename_all = "camelCase")]
343pub struct Metadata {
344  pub reading_time: u32,
345  pub word_count: u32,
346}
347
348/// One TOC node. `url` is `#<heading-slug>`.
349#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct TocItem {
351  pub title: String,
352  pub url: String,
353  pub items: Vec<TocItem>,
354}
355
356#[cfg(test)]
357mod tests {
358  use super::*;
359  use serde_json::json;
360
361  #[test]
362  fn empty_plugin_lists_no_sidecar() {
363    let cfg = CompileConfig::default();
364    assert!(!cfg.has_js_plugins());
365  }
366
367  #[test]
368  fn arbitrary_remark_plugin_triggers_sidecar() {
369    let mut cfg = CompileConfig::default();
370    // Pick a plugin not covered by any native transformer.
371    cfg.markdown_remark_plugins.push(json!("remark-frontmatter"));
372    assert!(cfg.has_js_plugins());
373  }
374
375  #[test]
376  fn remark_gfm_alone_skips_sidecar() {
377    let mut cfg = CompileConfig::default();
378    cfg.markdown_remark_plugins.push(json!("remark-gfm"));
379    assert!(!cfg.has_js_plugins(), "dmc parser handles GFM natively");
380  }
381
382  #[test]
383  fn rehype_slug_and_autolink_alone_skip_sidecar() {
384    let mut cfg = CompileConfig::default();
385    cfg.markdown_rehype_plugins.push(json!("rehype-slug"));
386    cfg.markdown_rehype_plugins.push(json!(["rehype-autolink-headings", { "behavior": "wrap" }]));
387    assert!(!cfg.has_js_plugins(), "AutolinkHeadings transformer handles slug + anchor natively");
388  }
389
390  #[cfg(feature = "math")]
391  #[test]
392  fn remark_math_alone_with_native_skips_sidecar() {
393    let mut cfg = CompileConfig::default();
394    cfg.markdown_remark_plugins.push(json!("remark-math"));
395    cfg.markdown_rehype_plugins.push(json!(["rehype-katex", { "errorColor": "red" }]));
396    assert!(!cfg.has_js_plugins(), "native math should absorb remark-math + rehype-katex");
397  }
398
399  #[cfg(feature = "emoji")]
400  #[test]
401  fn remark_emoji_alone_with_native_skips_sidecar() {
402    let mut cfg = CompileConfig::default();
403    cfg.markdown_remark_plugins.push(json!("remark-emoji"));
404    assert!(!cfg.has_js_plugins(), "native emoji should absorb remark-emoji");
405  }
406
407  #[cfg(feature = "pretty-code")]
408  #[test]
409  fn rehype_pretty_code_alone_with_native_skips_sidecar() {
410    let mut cfg = CompileConfig::default();
411    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
412    cfg.mdx_rehype_plugins.push(json!(["rehype-pretty-code", { "theme": "github-dark" }]));
413    cfg.mdx_rehype_plugins.push(json!("shiki"));
414    assert!(!cfg.has_js_plugins(), "native should absorb rehype-pretty-code/shiki");
415  }
416
417  #[cfg(feature = "pretty-code")]
418  #[test]
419  fn other_rehype_plugin_still_triggers_sidecar_even_with_native() {
420    let mut cfg = CompileConfig::default();
421    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
422    // Any rehype plugin not absorbed by a native transformer keeps the
423    // sidecar alive. Pick something that no current native pass owns.
424    cfg.markdown_rehype_plugins.push(json!("rehype-external-links"));
425    assert!(cfg.has_js_plugins());
426  }
427
428  #[cfg(not(feature = "pretty-code"))]
429  #[test]
430  fn pretty_code_feature_off_means_rehype_pretty_code_routes_to_sidecar() {
431    let mut cfg = CompileConfig::default();
432    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
433    assert!(cfg.has_js_plugins());
434  }
435}
436
437/// Compiled `.mdx` output. Every field is always populated; serialised
438/// camelCase for JS parity.
439#[derive(Debug, Clone, Serialize, Deserialize)]
440#[serde(rename_all = "camelCase")]
441pub struct CompileOutput {
442  pub frontmatter: serde_json::Value,
443  pub frontmatter_raw: String,
444  pub content: String,
445  pub html: String,
446  pub body: String,
447  pub excerpt: String,
448  pub metadata: Metadata,
449  pub toc: Vec<TocItem>,
450  pub imports: Vec<String>,
451  pub exports: Vec<String>,
452}