Skip to main content

dmc/engine/
compile.rs

1use std::{path::Path, sync::Arc};
2
3use dmc_codegen::{HtmlEmitter, MdxBodyEmitter, Walker};
4use dmc_diagnostic::{
5  Code,
6  metadata::{Origin, SourceMeta},
7};
8use dmc_lexer::Lexer;
9use dmc_parser::{Parser, ast::Document};
10use dmc_transform::{CopyLinkedFilesOptions, MathEngine, PipelineConfig, PrettyCodeOptions};
11use duck_diagnostic::DiagnosticEngine;
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14
15use crate::engine::accumulator::Accumulator;
16
17#[derive(Debug, Deserialize, Serialize, Clone)]
18#[serde(default)]
19pub struct CompileConfig {
20  pub markdown_gfm: bool,
21  pub emit_html: bool,
22  pub emit_body: bool,
23  pub mdx_minify: bool,
24  pub mdx_output_format: Option<String>,
25  pub markdown_remark_plugins: Vec<Value>,
26  pub markdown_rehype_plugins: Vec<Value>,
27  pub mdx_remark_plugins: Vec<Value>,
28  pub mdx_rehype_plugins: Vec<Value>,
29  pub copy_linked_files: bool,
30  pub output_assets: Option<String>,
31  pub output_base: Option<String>,
32  /// Pretty-code highlighter config. `None` = bundled defaults
33  /// (Catppuccin Latte/Mocha pair, dark primary, multi-mode CSS-vars
34  /// output). `Some` = explicit theme spec.
35  pub pretty_code: Option<PrettyCodeOptions>,
36  /// LaTeX engine for `$...$` / `$$...$$`. `None` = KaTeX (slow, exact
37  /// rehype-katex parity). `Some(MathEngine::Mathml)` = pulldown-latex
38  /// MathML (fast, plainer visuals).
39  pub math_engine: Option<MathEngine>,
40  /// Global override: when `true`, no plugin is treated as native-owned,
41  /// every listed JS plugin runs in the sidecar, every native transformer
42  /// is dropped from the pipeline.
43  pub force_sidecar: bool,
44  /// Per-plugin override: names in this list are *not* stripped from the
45  /// sidecar payload, and the matching native transformer is dropped from
46  /// the pipeline. Use to keep one specific JS plugin and let dmc handle
47  /// everything else natively.
48  ///
49  /// Recognised entries:
50  ///   "remark-gfm", "remark-math", "remark-emoji",
51  ///   "rehype-pretty-code", "shiki",
52  ///   "rehype-katex", "rehype-mathjax",
53  ///   "rehype-slug", "rehype-autolink-headings"
54  pub prefer_sidecar: Vec<String>,
55}
56
57impl Default for CompileConfig {
58  fn default() -> Self {
59    Self {
60      markdown_gfm: true,
61      emit_html: true,
62      emit_body: true,
63      mdx_output_format: None,
64      mdx_minify: false,
65      markdown_remark_plugins: vec![],
66      markdown_rehype_plugins: vec![],
67      mdx_remark_plugins: vec![],
68      mdx_rehype_plugins: vec![],
69      copy_linked_files: false,
70      output_assets: None,
71      output_base: None,
72      pretty_code: None,
73      math_engine: None,
74      force_sidecar: false,
75      prefer_sidecar: vec![],
76    }
77  }
78}
79
80impl CompileConfig {
81  pub fn new() -> Self {
82    Self::default()
83  }
84
85  pub fn has_js_plugins(&self) -> bool {
86    !self.effective_markdown_remark_plugins().is_empty()
87      || !self.effective_mdx_remark_plugins().is_empty()
88      || !self.effective_markdown_rehype_plugins().is_empty()
89      || !self.effective_mdx_rehype_plugins().is_empty()
90  }
91
92  /// Plugin lists after stripping every JS plugin whose work is now done
93  /// by an in-process transformer (pretty-code/shiki, math, emoji). Used
94  /// both for the sidecar gate and for the request payload so the sidecar
95  /// never duplicates work. When the matching feature is off, that
96  /// plugin's name is left in the list and the sidecar runs it.
97  pub fn effective_markdown_remark_plugins(&self) -> Vec<Value> {
98    self.filter_native_owned_remark(&self.markdown_remark_plugins)
99  }
100
101  pub fn effective_mdx_remark_plugins(&self) -> Vec<Value> {
102    self.filter_native_owned_remark(&self.mdx_remark_plugins)
103  }
104
105  pub fn effective_markdown_rehype_plugins(&self) -> Vec<Value> {
106    self.filter_native_owned_rehype(&self.markdown_rehype_plugins)
107  }
108
109  pub fn effective_mdx_rehype_plugins(&self) -> Vec<Value> {
110    self.filter_native_owned_rehype(&self.mdx_rehype_plugins)
111  }
112
113  /// `true` when the user wants the sidecar to handle this specific
114  /// plugin (either via `prefer_sidecar` per-plugin list or via the
115  /// global `force_sidecar` flag).
116  fn user_forces_sidecar(&self, name: &str) -> bool {
117    self.force_sidecar || self.prefer_sidecar.iter().any(|n| n == name)
118  }
119
120  fn filter_native_owned_remark(&self, plugins: &[Value]) -> Vec<Value> {
121    plugins
122      .iter()
123      .filter(|p| {
124        let Some(name) = plugin_name(p) else { return true };
125        if self.user_forces_sidecar(name) {
126          return true;
127        }
128        !is_native_owned_remark(p)
129      })
130      .cloned()
131      .collect()
132  }
133
134  fn filter_native_owned_rehype(&self, plugins: &[Value]) -> Vec<Value> {
135    plugins
136      .iter()
137      .filter(|p| {
138        let Some(name) = plugin_name(p) else { return true };
139        if self.user_forces_sidecar(name) {
140          return true;
141        }
142        !is_native_owned_rehype(p)
143      })
144      .cloned()
145      .collect()
146  }
147
148  /// Per-file compile config: turns off native HTML when sidecar will run.
149  pub fn for_render(&self) -> Self {
150    let mut c = self.clone();
151    c.emit_html = !self.has_js_plugins();
152    c
153  }
154
155  /// Build the [`PipelineConfig`] consumed by
156  /// [`Pipeline::with_defaults_for`]. `path` is the compiled file's path,
157  /// used to resolve relative asset paths in the `copy-linked-files`
158  /// transformer.
159  pub fn pipeline_config(&self, path: &Path) -> PipelineConfig {
160    let copy_linked_files = if self.copy_linked_files
161      && let (Some(assets), Some(public)) = (self.output_assets.as_ref(), self.output_base.as_ref())
162    {
163      Some(CopyLinkedFilesOptions {
164        source_dir: path.parent().unwrap_or(Path::new(".")).to_path_buf(),
165        assets_dir: assets.into(),
166        public_base: public.clone(),
167      })
168    } else {
169      None
170    };
171    // Drop the native transformer when the user prefers the JS plugin
172    // for that role. Each `drop_*` flag flips the matching opt-out
173    // field in `PipelineConfig` so the transformer is not pushed.
174    let prefers = |needles: &[&str]| -> bool {
175      self.force_sidecar || self.prefer_sidecar.iter().any(|n| needles.contains(&n.as_str()))
176    };
177    let drop_pretty_code = prefers(&["rehype-pretty-code", "shiki"]);
178    let drop_math = prefers(&["remark-math", "rehype-katex", "rehype-mathjax"]);
179    let drop_emoji = prefers(&["remark-emoji"]);
180    let drop_autolink_headings = prefers(&["rehype-slug", "rehype-autolink-headings"]);
181    let drop_gfm = prefers(&["remark-gfm"]);
182
183    PipelineConfig {
184      markdown_gfm: Some(if drop_gfm { false } else { self.markdown_gfm }),
185      pretty_code: if drop_pretty_code { None } else { self.pretty_code.clone() },
186      math_engine: if drop_math { None } else { self.math_engine },
187      copy_linked_files,
188      emoji: if drop_emoji { Some(false) } else { None },
189      autolink_headings: if drop_autolink_headings { Some(false) } else { None },
190      math: if drop_math { Some(false) } else { None },
191      pretty_code_enabled: if drop_pretty_code { Some(false) } else { None },
192    }
193  }
194}
195
196/// Extract the plugin name from either the bare string form
197/// (`"rehype-pretty-code"`) or the `[name, options]` array form used by
198/// unified-style plugin configs.
199fn plugin_name(plugin: &Value) -> Option<&str> {
200  match plugin {
201    Value::String(s) => Some(s.as_str()),
202    Value::Array(a) => a.first().and_then(Value::as_str),
203    _ => None,
204  }
205}
206
207/// `true` when `plugin` is a remark-side plugin whose work an in-process
208/// transformer now does. Stripped from the sidecar payload so the JS
209/// plugin chain does not redo native work.
210fn is_native_owned_remark(plugin: &Value) -> bool {
211  let Some(name) = plugin_name(plugin) else { return false };
212  match name {
213    // GFM tables, strikethrough, autolinks, task lists are handled by
214    // the dmc parser; remark-gfm in the sidecar is redundant.
215    "remark-gfm" => true,
216    "remark-math" => cfg!(feature = "math"),
217    "remark-emoji" => cfg!(feature = "emoji"),
218    _ => false,
219  }
220}
221
222/// Same for rehype-side plugins.
223fn is_native_owned_rehype(plugin: &Value) -> bool {
224  let Some(name) = plugin_name(plugin) else { return false };
225  match name {
226    "rehype-pretty-code" | "shiki" => cfg!(feature = "pretty-code"),
227    "rehype-katex" | "rehype-mathjax" => cfg!(feature = "math"),
228    // Heading slugs + anchor links handled by the AutolinkHeadings
229    // transformer in `Pipeline::with_defaults`.
230    "rehype-slug" | "rehype-autolink-headings" => true,
231    _ => false,
232  }
233}
234
235pub struct Compiler;
236
237impl Compiler {
238  /// One-shot compile of `source` with the default pipeline. Use
239  /// `compile_with_pipeline` for file-aware compilation with real spans.
240  pub fn compile(source: &str, diag_engine: &mut DiagnosticEngine<Code>) -> CompileOutput {
241    // FIX:
242    Self::compile_with_pipeline(source, Path::new("."), &CompileConfig::new(), diag_engine)
243  }
244
245  /// Like [`compile`] with a caller-supplied pipeline + path for spans.
246  pub fn compile_with_pipeline(
247    source: &str,
248    path: &Path,
249    compile_cfg: &CompileConfig,
250    diag_engine: &mut DiagnosticEngine<Code>,
251  ) -> CompileOutput {
252    // Each layer holds its own DiagnosticEngine, mirroring the Lexer pattern.
253    let meta = Arc::from(SourceMeta { path: Arc::from(path.display().to_string()), origin: Origin::File(path.into()) });
254    // Source-level math: rewrite `$...$` / `$$...$$` to `<MathMl/>` JSX
255    // so the parser does not interpret `_` or `^` inside math as Markdown
256    // emphasis markers.
257    #[cfg(feature = "math")]
258    let preprocessed = dmc_transform::Math::preprocess_source(source);
259    #[cfg(feature = "math")]
260    let source: &str = &preprocessed;
261    let mut lexer = Lexer::new(source, meta.clone(), diag_engine);
262    let _ = lexer.scan_tokens();
263
264    let mut doc = {
265      let mut parser = Parser::new(lexer.tokens, meta.clone(), diag_engine);
266      parser.parse()
267    };
268
269    let pipeline_cfg = compile_cfg.pipeline_config(path);
270    let pipeline = dmc_transform::Pipeline::with_defaults_for(&pipeline_cfg);
271
272    pipeline.run(&mut doc, &meta, diag_engine);
273
274    Self::finalize(source, doc, compile_cfg, diag_engine)
275  }
276
277  /// Pull frontmatter + imports/exports, render HTML + MDX body, derive
278  /// excerpt / metadata / TOC, pack into a `CompileOutput`. Each sink
279  /// owns a private `DiagnosticEngine` during the walk; we merge them
280  /// into the caller's `diag_engine` after the walk completes (avoids
281  /// `RefCell` overhead on every sink emit).
282  fn finalize(
283    source: &str,
284    doc: Document,
285    compile_cfg: &CompileConfig,
286    diag_engine: &mut DiagnosticEngine<Code>,
287  ) -> CompileOutput {
288    let mut acc = Accumulator::new();
289    let mut html_sink = if compile_cfg.emit_html { Some(HtmlEmitter::new()) } else { None };
290    let mut body_sink = if compile_cfg.emit_body { Some(MdxBodyEmitter::new()) } else { None };
291
292    let mut sinks: Vec<&mut dyn dmc_codegen::NodeSink> = Vec::with_capacity(3);
293    sinks.push(&mut acc);
294    if let Some(ref mut h) = html_sink {
295      sinks.push(h);
296    }
297    if let Some(ref mut b) = body_sink {
298      sinks.push(b);
299    }
300
301    Walker::new(&doc).walk(sinks.as_mut_slice());
302
303    let (html, body) = match (html_sink, body_sink) {
304      (Some(h), Some(b)) => {
305        let (s, hd) = h.into_parts();
306        let (m, bd) = b.into_parts();
307        diag_engine.extend(hd);
308        diag_engine.extend(bd);
309        (s, m)
310      },
311      (Some(h), None) => {
312        let (s, hd) = h.into_parts();
313        diag_engine.extend(hd);
314        (s, String::new())
315      },
316      (None, Some(b)) => {
317        let (m, bd) = b.into_parts();
318        diag_engine.extend(bd);
319        (String::new(), m)
320      },
321      (None, None) => (String::new(), String::new()),
322    };
323
324    acc.into_compile_output(source, html, body, compile_cfg)
325  }
326}
327
328/// Reading-time + word-count from plain text. `reading_time` in minutes,
329/// ceil, min 1.
330#[derive(Debug, Clone, Serialize, Deserialize, Default)]
331#[serde(rename_all = "camelCase")]
332pub struct Metadata {
333  pub reading_time: u32,
334  pub word_count: u32,
335}
336
337/// One TOC node. `url` is `#<heading-slug>`.
338#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct TocItem {
340  pub title: String,
341  pub url: String,
342  pub items: Vec<TocItem>,
343}
344
345#[cfg(test)]
346mod tests {
347  use super::*;
348  use serde_json::json;
349
350  #[test]
351  fn empty_plugin_lists_no_sidecar() {
352    let cfg = CompileConfig::default();
353    assert!(!cfg.has_js_plugins());
354  }
355
356  #[test]
357  fn arbitrary_remark_plugin_triggers_sidecar() {
358    let mut cfg = CompileConfig::default();
359    // Pick a plugin not covered by any native transformer.
360    cfg.markdown_remark_plugins.push(json!("remark-frontmatter"));
361    assert!(cfg.has_js_plugins());
362  }
363
364  #[test]
365  fn remark_gfm_alone_skips_sidecar() {
366    let mut cfg = CompileConfig::default();
367    cfg.markdown_remark_plugins.push(json!("remark-gfm"));
368    assert!(!cfg.has_js_plugins(), "dmc parser handles GFM natively");
369  }
370
371  #[test]
372  fn rehype_slug_and_autolink_alone_skip_sidecar() {
373    let mut cfg = CompileConfig::default();
374    cfg.markdown_rehype_plugins.push(json!("rehype-slug"));
375    cfg.markdown_rehype_plugins.push(json!(["rehype-autolink-headings", { "behavior": "wrap" }]));
376    assert!(!cfg.has_js_plugins(), "AutolinkHeadings transformer handles slug + anchor natively");
377  }
378
379  #[cfg(feature = "math")]
380  #[test]
381  fn remark_math_alone_with_native_skips_sidecar() {
382    let mut cfg = CompileConfig::default();
383    cfg.markdown_remark_plugins.push(json!("remark-math"));
384    cfg.markdown_rehype_plugins.push(json!(["rehype-katex", { "errorColor": "red" }]));
385    assert!(!cfg.has_js_plugins(), "native math should absorb remark-math + rehype-katex");
386  }
387
388  #[cfg(feature = "emoji")]
389  #[test]
390  fn remark_emoji_alone_with_native_skips_sidecar() {
391    let mut cfg = CompileConfig::default();
392    cfg.markdown_remark_plugins.push(json!("remark-emoji"));
393    assert!(!cfg.has_js_plugins(), "native emoji should absorb remark-emoji");
394  }
395
396  #[cfg(feature = "pretty-code")]
397  #[test]
398  fn rehype_pretty_code_alone_with_native_skips_sidecar() {
399    let mut cfg = CompileConfig::default();
400    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
401    cfg.mdx_rehype_plugins.push(json!(["rehype-pretty-code", { "theme": "github-dark" }]));
402    cfg.mdx_rehype_plugins.push(json!("shiki"));
403    assert!(!cfg.has_js_plugins(), "native should absorb rehype-pretty-code/shiki");
404  }
405
406  #[cfg(feature = "pretty-code")]
407  #[test]
408  fn other_rehype_plugin_still_triggers_sidecar_even_with_native() {
409    let mut cfg = CompileConfig::default();
410    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
411    // Any rehype plugin not absorbed by a native transformer keeps the
412    // sidecar alive. Pick something that no current native pass owns.
413    cfg.markdown_rehype_plugins.push(json!("rehype-external-links"));
414    assert!(cfg.has_js_plugins());
415  }
416
417  #[cfg(not(feature = "pretty-code"))]
418  #[test]
419  fn pretty_code_feature_off_means_rehype_pretty_code_routes_to_sidecar() {
420    let mut cfg = CompileConfig::default();
421    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
422    assert!(cfg.has_js_plugins());
423  }
424}
425
426/// Compiled `.mdx` output. Every field is always populated; serialised
427/// camelCase for JS parity.
428#[derive(Debug, Clone, Serialize, Deserialize)]
429#[serde(rename_all = "camelCase")]
430pub struct CompileOutput {
431  pub frontmatter: serde_json::Value,
432  pub frontmatter_raw: String,
433  pub content: String,
434  pub html: String,
435  pub body: String,
436  pub excerpt: String,
437  pub metadata: Metadata,
438  pub toc: Vec<TocItem>,
439  pub imports: Vec<String>,
440  pub exports: Vec<String>,
441}