Skip to main content

dmc/engine/
compile.rs

1use std::{path::Path, sync::Arc};
2
3use dmc_codegen::{HtmlEmitter, MdxBodyEmitter, Walker};
4use dmc_diagnostic::{
5  Code,
6  metadata::{Origin, SourceMeta},
7};
8use dmc_lexer::Lexer;
9use dmc_parser::{Parser, ast::Document};
10use dmc_transform::{CopyLinkedFilesOptions, MathEngine, PipelineConfig, PrettyCodeOptions};
11use duck_diagnostic::DiagnosticEngine;
12use serde::{Deserialize, Serialize};
13use serde_json::Value;
14
15use crate::engine::accumulator::Accumulator;
16
17#[derive(Debug, Deserialize, Serialize, Clone)]
18#[serde(default)]
19pub struct CompileConfig {
20  pub markdown_gfm: bool,
21  pub emit_html: bool,
22  pub emit_body: bool,
23  pub mdx_minify: bool,
24  pub mdx_output_format: Option<String>,
25  pub markdown_remark_plugins: Vec<Value>,
26  pub markdown_rehype_plugins: Vec<Value>,
27  pub mdx_remark_plugins: Vec<Value>,
28  pub mdx_rehype_plugins: Vec<Value>,
29  pub copy_linked_files: bool,
30  pub output_assets: Option<String>,
31  pub output_base: Option<String>,
32  /// Pretty-code highlighter config. `None` = bundled defaults
33  /// (Catppuccin Latte/Mocha pair, dark primary, multi-mode CSS-vars
34  /// output). `Some` = explicit theme spec.
35  pub pretty_code: Option<PrettyCodeOptions>,
36  /// LaTeX engine for `$...$` / `$$...$$`. `None` = KaTeX (slow, exact
37  /// rehype-katex parity). `Some(MathEngine::Mathml)` = pulldown-latex
38  /// MathML (fast, plainer visuals).
39  pub math_engine: Option<MathEngine>,
40}
41
42impl Default for CompileConfig {
43  fn default() -> Self {
44    Self {
45      markdown_gfm: true,
46      emit_html: true,
47      emit_body: true,
48      mdx_output_format: None,
49      mdx_minify: false,
50      markdown_remark_plugins: vec![],
51      markdown_rehype_plugins: vec![],
52      mdx_remark_plugins: vec![],
53      mdx_rehype_plugins: vec![],
54      copy_linked_files: false,
55      output_assets: None,
56      output_base: None,
57      pretty_code: None,
58      math_engine: None,
59    }
60  }
61}
62
63impl CompileConfig {
64  pub fn new() -> Self {
65    Self::default()
66  }
67
68  pub fn has_js_plugins(&self) -> bool {
69    !self.effective_markdown_remark_plugins().is_empty()
70      || !self.effective_mdx_remark_plugins().is_empty()
71      || !self.effective_markdown_rehype_plugins().is_empty()
72      || !self.effective_mdx_rehype_plugins().is_empty()
73  }
74
75  /// Plugin lists after stripping every JS plugin whose work is now done
76  /// by an in-process transformer (pretty-code/shiki, math, emoji). Used
77  /// both for the sidecar gate and for the request payload so the sidecar
78  /// never duplicates work. When the matching feature is off, that
79  /// plugin's name is left in the list and the sidecar runs it.
80  pub fn effective_markdown_remark_plugins(&self) -> Vec<Value> {
81    Self::filter_native_owned_remark(&self.markdown_remark_plugins)
82  }
83
84  pub fn effective_mdx_remark_plugins(&self) -> Vec<Value> {
85    Self::filter_native_owned_remark(&self.mdx_remark_plugins)
86  }
87
88  pub fn effective_markdown_rehype_plugins(&self) -> Vec<Value> {
89    Self::filter_native_owned_rehype(&self.markdown_rehype_plugins)
90  }
91
92  pub fn effective_mdx_rehype_plugins(&self) -> Vec<Value> {
93    Self::filter_native_owned_rehype(&self.mdx_rehype_plugins)
94  }
95
96  fn filter_native_owned_remark(plugins: &[Value]) -> Vec<Value> {
97    plugins.iter().filter(|p| !is_native_owned_remark(p)).cloned().collect()
98  }
99
100  fn filter_native_owned_rehype(plugins: &[Value]) -> Vec<Value> {
101    plugins.iter().filter(|p| !is_native_owned_rehype(p)).cloned().collect()
102  }
103
104  /// Per-file compile config: turns off native HTML when sidecar will run.
105  pub fn for_render(&self) -> Self {
106    let mut c = self.clone();
107    c.emit_html = !self.has_js_plugins();
108    c
109  }
110
111  /// Build the [`PipelineConfig`] consumed by
112  /// [`Pipeline::with_defaults_for`]. `path` is the compiled file's path,
113  /// used to resolve relative asset paths in the `copy-linked-files`
114  /// transformer.
115  pub fn pipeline_config(&self, path: &Path) -> PipelineConfig {
116    let copy_linked_files = if self.copy_linked_files
117      && let (Some(assets), Some(public)) = (self.output_assets.as_ref(), self.output_base.as_ref())
118    {
119      Some(CopyLinkedFilesOptions {
120        source_dir: path.parent().unwrap_or(Path::new(".")).to_path_buf(),
121        assets_dir: assets.into(),
122        public_base: public.clone(),
123      })
124    } else {
125      None
126    };
127    PipelineConfig {
128      markdown_gfm: Some(self.markdown_gfm),
129      pretty_code: self.pretty_code.clone(),
130      math_engine: self.math_engine,
131      copy_linked_files,
132    }
133  }
134}
135
136/// Extract the plugin name from either the bare string form
137/// (`"rehype-pretty-code"`) or the `[name, options]` array form used by
138/// unified-style plugin configs.
139fn plugin_name(plugin: &Value) -> Option<&str> {
140  match plugin {
141    Value::String(s) => Some(s.as_str()),
142    Value::Array(a) => a.first().and_then(Value::as_str),
143    _ => None,
144  }
145}
146
147/// `true` when `plugin` is a remark-side plugin whose work an in-process
148/// transformer now does. Stripped from the sidecar payload so the JS
149/// plugin chain does not redo native work.
150fn is_native_owned_remark(plugin: &Value) -> bool {
151  let Some(name) = plugin_name(plugin) else { return false };
152  match name {
153    // GFM tables, strikethrough, autolinks, task lists are handled by
154    // the dmc parser; remark-gfm in the sidecar is redundant.
155    "remark-gfm" => true,
156    "remark-math" => cfg!(feature = "math"),
157    "remark-emoji" => cfg!(feature = "emoji"),
158    _ => false,
159  }
160}
161
162/// Same for rehype-side plugins.
163fn is_native_owned_rehype(plugin: &Value) -> bool {
164  let Some(name) = plugin_name(plugin) else { return false };
165  match name {
166    "rehype-pretty-code" | "shiki" => cfg!(feature = "pretty-code"),
167    "rehype-katex" | "rehype-mathjax" => cfg!(feature = "math"),
168    // Heading slugs + anchor links handled by the AutolinkHeadings
169    // transformer in `Pipeline::with_defaults`.
170    "rehype-slug" | "rehype-autolink-headings" => true,
171    _ => false,
172  }
173}
174
175pub struct Compiler;
176
177impl Compiler {
178  /// One-shot compile of `source` with the default pipeline. Use
179  /// `compile_with_pipeline` for file-aware compilation with real spans.
180  pub fn compile(source: &str, diag_engine: &mut DiagnosticEngine<Code>) -> CompileOutput {
181    // FIX:
182    Self::compile_with_pipeline(source, Path::new("."), &CompileConfig::new(), diag_engine)
183  }
184
185  /// Like [`compile`] with a caller-supplied pipeline + path for spans.
186  pub fn compile_with_pipeline(
187    source: &str,
188    path: &Path,
189    compile_cfg: &CompileConfig,
190    diag_engine: &mut DiagnosticEngine<Code>,
191  ) -> CompileOutput {
192    // Each layer holds its own DiagnosticEngine, mirroring the Lexer pattern.
193    let meta = Arc::from(SourceMeta { path: Arc::from(path.display().to_string()), origin: Origin::File(path.into()) });
194    // Source-level math: rewrite `$...$` / `$$...$$` to `<MathMl/>` JSX
195    // so the parser does not interpret `_` or `^` inside math as Markdown
196    // emphasis markers.
197    #[cfg(feature = "math")]
198    let preprocessed = dmc_transform::Math::preprocess_source(source);
199    #[cfg(feature = "math")]
200    let source: &str = &preprocessed;
201    let mut lexer = Lexer::new(source, meta.clone(), diag_engine);
202    let _ = lexer.scan_tokens();
203
204    let mut doc = {
205      let mut parser = Parser::new(lexer.tokens, meta.clone(), diag_engine);
206      parser.parse()
207    };
208
209    let pipeline_cfg = compile_cfg.pipeline_config(path);
210    let pipeline = dmc_transform::Pipeline::with_defaults_for(&pipeline_cfg);
211
212    pipeline.run(&mut doc, &meta, diag_engine);
213
214    Self::finalize(source, doc, compile_cfg, diag_engine)
215  }
216
217  /// Pull frontmatter + imports/exports, render HTML + MDX body, derive
218  /// excerpt / metadata / TOC, pack into a `CompileOutput`. Each sink
219  /// owns a private `DiagnosticEngine` during the walk; we merge them
220  /// into the caller's `diag_engine` after the walk completes (avoids
221  /// `RefCell` overhead on every sink emit).
222  fn finalize(
223    source: &str,
224    doc: Document,
225    compile_cfg: &CompileConfig,
226    diag_engine: &mut DiagnosticEngine<Code>,
227  ) -> CompileOutput {
228    let mut acc = Accumulator::new();
229    let mut html_sink = if compile_cfg.emit_html { Some(HtmlEmitter::new()) } else { None };
230    let mut body_sink = if compile_cfg.emit_body { Some(MdxBodyEmitter::new()) } else { None };
231
232    let mut sinks: Vec<&mut dyn dmc_codegen::NodeSink> = Vec::with_capacity(3);
233    sinks.push(&mut acc);
234    if let Some(ref mut h) = html_sink {
235      sinks.push(h);
236    }
237    if let Some(ref mut b) = body_sink {
238      sinks.push(b);
239    }
240
241    Walker::new(&doc).walk(sinks.as_mut_slice());
242
243    let (html, body) = match (html_sink, body_sink) {
244      (Some(h), Some(b)) => {
245        let (s, hd) = h.into_parts();
246        let (m, bd) = b.into_parts();
247        diag_engine.extend(hd);
248        diag_engine.extend(bd);
249        (s, m)
250      },
251      (Some(h), None) => {
252        let (s, hd) = h.into_parts();
253        diag_engine.extend(hd);
254        (s, String::new())
255      },
256      (None, Some(b)) => {
257        let (m, bd) = b.into_parts();
258        diag_engine.extend(bd);
259        (String::new(), m)
260      },
261      (None, None) => (String::new(), String::new()),
262    };
263
264    acc.into_compile_output(source, html, body, compile_cfg)
265  }
266}
267
268/// Reading-time + word-count from plain text. `reading_time` in minutes,
269/// ceil, min 1.
270#[derive(Debug, Clone, Serialize, Deserialize, Default)]
271#[serde(rename_all = "camelCase")]
272pub struct Metadata {
273  pub reading_time: u32,
274  pub word_count: u32,
275}
276
277/// One TOC node. `url` is `#<heading-slug>`.
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct TocItem {
280  pub title: String,
281  pub url: String,
282  pub items: Vec<TocItem>,
283}
284
285#[cfg(test)]
286mod tests {
287  use super::*;
288  use serde_json::json;
289
290  #[test]
291  fn empty_plugin_lists_no_sidecar() {
292    let cfg = CompileConfig::default();
293    assert!(!cfg.has_js_plugins());
294  }
295
296  #[test]
297  fn arbitrary_remark_plugin_triggers_sidecar() {
298    let mut cfg = CompileConfig::default();
299    // Pick a plugin not covered by any native transformer.
300    cfg.markdown_remark_plugins.push(json!("remark-frontmatter"));
301    assert!(cfg.has_js_plugins());
302  }
303
304  #[test]
305  fn remark_gfm_alone_skips_sidecar() {
306    let mut cfg = CompileConfig::default();
307    cfg.markdown_remark_plugins.push(json!("remark-gfm"));
308    assert!(!cfg.has_js_plugins(), "dmc parser handles GFM natively");
309  }
310
311  #[test]
312  fn rehype_slug_and_autolink_alone_skip_sidecar() {
313    let mut cfg = CompileConfig::default();
314    cfg.markdown_rehype_plugins.push(json!("rehype-slug"));
315    cfg.markdown_rehype_plugins.push(json!(["rehype-autolink-headings", { "behavior": "wrap" }]));
316    assert!(!cfg.has_js_plugins(), "AutolinkHeadings transformer handles slug + anchor natively");
317  }
318
319  #[cfg(feature = "math")]
320  #[test]
321  fn remark_math_alone_with_native_skips_sidecar() {
322    let mut cfg = CompileConfig::default();
323    cfg.markdown_remark_plugins.push(json!("remark-math"));
324    cfg.markdown_rehype_plugins.push(json!(["rehype-katex", { "errorColor": "red" }]));
325    assert!(!cfg.has_js_plugins(), "native math should absorb remark-math + rehype-katex");
326  }
327
328  #[cfg(feature = "emoji")]
329  #[test]
330  fn remark_emoji_alone_with_native_skips_sidecar() {
331    let mut cfg = CompileConfig::default();
332    cfg.markdown_remark_plugins.push(json!("remark-emoji"));
333    assert!(!cfg.has_js_plugins(), "native emoji should absorb remark-emoji");
334  }
335
336  #[cfg(feature = "pretty-code")]
337  #[test]
338  fn rehype_pretty_code_alone_with_native_skips_sidecar() {
339    let mut cfg = CompileConfig::default();
340    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
341    cfg.mdx_rehype_plugins.push(json!(["rehype-pretty-code", { "theme": "github-dark" }]));
342    cfg.mdx_rehype_plugins.push(json!("shiki"));
343    assert!(!cfg.has_js_plugins(), "native should absorb rehype-pretty-code/shiki");
344  }
345
346  #[cfg(feature = "pretty-code")]
347  #[test]
348  fn other_rehype_plugin_still_triggers_sidecar_even_with_native() {
349    let mut cfg = CompileConfig::default();
350    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
351    // Any rehype plugin not absorbed by a native transformer keeps the
352    // sidecar alive. Pick something that no current native pass owns.
353    cfg.markdown_rehype_plugins.push(json!("rehype-external-links"));
354    assert!(cfg.has_js_plugins());
355  }
356
357  #[cfg(not(feature = "pretty-code"))]
358  #[test]
359  fn pretty_code_feature_off_means_rehype_pretty_code_routes_to_sidecar() {
360    let mut cfg = CompileConfig::default();
361    cfg.markdown_rehype_plugins.push(json!("rehype-pretty-code"));
362    assert!(cfg.has_js_plugins());
363  }
364}
365
366/// Compiled `.mdx` output. Every field is always populated; serialised
367/// camelCase for JS parity.
368#[derive(Debug, Clone, Serialize, Deserialize)]
369#[serde(rename_all = "camelCase")]
370pub struct CompileOutput {
371  pub frontmatter: serde_json::Value,
372  pub frontmatter_raw: String,
373  pub content: String,
374  pub html: String,
375  pub body: String,
376  pub excerpt: String,
377  pub metadata: Metadata,
378  pub toc: Vec<TocItem>,
379  pub imports: Vec<String>,
380  pub exports: Vec<String>,
381}