Skip to main content

dmc/engine/
collection.rs

1use dmc_diagnostic::Code;
2use duck_diagnostic::{DiagnosticEngine, diag};
3use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
4use std::path::PathBuf;
5
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8
9use crate::engine::{
10  cache::{FileCache, fingerprint},
11  compile::Compiler,
12  config::EngineConfig,
13  sidecar::run_sidecar,
14  utils::{CollectionReport, build_schema_ctx, build_velite_record, minify_js, wrap_mdx_module},
15};
16
17#[derive(Debug, Clone, Serialize, Deserialize, Default)]
18#[serde(default)]
19pub struct Collection {
20  pub name: String,
21  pub pattern: String,
22  pub base_dir: PathBuf,
23  #[serde(skip_serializing_if = "Option::is_none")]
24  pub schema: Option<Value>,
25  #[serde(skip_serializing_if = "std::ops::Not::not")]
26  pub single: bool,
27}
28
29impl Collection {
30  /// Compile every file matched by `pattern` in parallel, validate
31  /// frontmatter against `schema`, optionally run JS sidecars + MDX module
32  /// wrap + minify, then write `{name}.json`.
33  pub(crate) fn process(
34    &self,
35    cfg: &EngineConfig,
36    diag_engine: &mut DiagnosticEngine<Code>,
37  ) -> Result<CollectionReport, ()> {
38    let walker = globwalk::GlobWalkerBuilder::from_patterns(&self.base_dir, &[&self.pattern]).build().map_err(|e| {
39      diag_engine.emit(diag!(Code::EmptyFrontMatter, format!("globwalk error: {}", e)));
40    })?;
41
42    let paths = walker.filter_map(|e| e.ok()).map(|e| e.path().to_path_buf()).collect::<Vec<PathBuf>>();
43
44    let collection_schema = self.schema.as_ref().and_then(|d| {
45      dmc_schema::compile_descriptor(d)
46        .map_err(|e| {
47          diag_engine.emit(diag!(Code::EmptyFrontMatter, format!("schema error: {}", e)));
48        })
49        .ok()
50    });
51
52    // Persistent per-file cache. Each record is keyed by
53    // (dmc_version, source_bytes, path, full-cfg-fingerprint) so any
54    // change to source or relevant config invalidates the entry.
55    let cache = if cfg.cache_enabled { FileCache::open(cfg.output_dir.join(".cache").join("dmc")) } else { None };
56    let cfg_fp = fingerprint(&(&cfg.compile, &cfg.include_html, &self.name, &self.schema, &cfg.output_format));
57
58    let outcomes: Vec<(Option<Value>, DiagnosticEngine<Code>)> = paths
59      .par_iter()
60      .map(|path| {
61        let mut local_diag_engine = DiagnosticEngine::<Code>::new();
62
63        let source = match std::fs::read_to_string(path) {
64          Ok(s) => s,
65          Err(e) => {
66            local_diag_engine.emit(diag!(Code::EmptyFrontMatter, format!("error: {}", e)));
67            return (None, local_diag_engine);
68          },
69        };
70
71        // Cache lookup: skip lex/parse/transform/codegen + sidecar when
72        // (source + cfg) is unchanged. Hits the disk and returns the
73        // already-rendered Value directly.
74        let cache_key = cache.as_ref().map(|_| FileCache::key(source.as_bytes(), path, &cfg_fp));
75        if let (Some(c), Some(k)) = (cache.as_ref(), cache_key.as_ref())
76          && let Some(hit) = c.get(k)
77        {
78          return (Some(hit), local_diag_engine);
79        }
80
81        let local_compiler_cfg = cfg.compile.for_render();
82        let use_sidecar = cfg.compile.has_js_plugins();
83
84        let mut compiled = Compiler::compile_with_pipeline(&source, path, &local_compiler_cfg, &mut local_diag_engine);
85
86        if use_sidecar && let Some(html) = run_sidecar(&compiled.content, cfg) {
87          compiled.html = html;
88        }
89
90        if cfg.compile.mdx_output_format.as_deref() == Some("module") {
91          compiled.body = wrap_mdx_module(&compiled.body, &compiled.imports);
92        }
93        if cfg.compile.mdx_minify {
94          compiled.body = minify_js(&compiled.body);
95        }
96
97        let validated_frontmatter = match (&collection_schema, &compiled.frontmatter) {
98          (Some(schema), fm) if !fm.is_null() => {
99            let ctx = build_schema_ctx(path, &cfg.root, &compiled, cfg);
100            match schema.parse(fm, &ctx) {
101              Ok(v) => v,
102              Err(e) => {
103                local_diag_engine.emit(diag!(Code::EmptyFrontMatter, format!("schema error: {}", e)));
104                compiled.frontmatter.clone()
105              },
106            }
107          },
108          _ => compiled.frontmatter.clone(),
109        };
110
111        let include_html = cfg.include_html || use_sidecar;
112        let rec = build_velite_record(compiled, validated_frontmatter, path, &self.base_dir, &self.name, include_html);
113
114        // Persist into the on-disk cache so next build sees a hit.
115        if let (Some(c), Some(k)) = (cache.as_ref(), cache_key.as_ref()) {
116          c.put(k, &rec);
117        }
118        (Some(rec), local_diag_engine)
119      })
120      .collect();
121
122    let mut records: Vec<Value> = Vec::with_capacity(outcomes.len());
123    for (rec, local_diag_engine) in outcomes {
124      diag_engine.extend(local_diag_engine);
125      if let Some(r) = rec {
126        records.push(r);
127      }
128    }
129
130    let out_path = cfg.output_dir.join(format!("{}.json", self.name));
131    let count = if self.single { if records.is_empty() { 0 } else { 1 } } else { records.len() };
132    let json = if self.single {
133      let single = records.into_iter().next().unwrap_or(Value::Null);
134      serde_json::to_string_pretty(&single).unwrap()
135    } else {
136      serde_json::to_string_pretty(&records).unwrap()
137    };
138
139    std::fs::write(&out_path, json)
140      .map_err(|e| diag_engine.emit(diag!(Code::EmptyFrontMatter, format!("error: {}", e))))?;
141
142    Ok(CollectionReport { name: self.name.clone(), records: count, output_path: out_path })
143  }
144}