Skip to main content

dmc/engine/
accumulator.rs

1use dmc_codegen::{NodeSink, WalkCtx};
2use dmc_parser::ast::Node;
3
4use crate::engine::compile::{CompileConfig, CompileOutput, Metadata, TocItem};
5
6#[derive(Debug)]
7pub struct Accumulator {
8  // collected during walk
9  pub frontmatter: serde_json::Value,
10  pub frontmatter_raw: String,
11  pub imports: Vec<String>,
12  pub exports: Vec<String>,
13  pub plain: String,                       // text for excerpt + word count
14  pub toc_flat: Vec<(u8, String, String)>, // (level, title, slug) pre-nest
15
16  // transient state during heading capture
17  in_heading: Option<(u8, String)>,
18  heading_text: String,
19}
20
21impl NodeSink for Accumulator {
22  fn enter(&mut self, node: &Node, _ctx: &WalkCtx) {
23    match node {
24      Node::Frontmatter(f) => {
25        self.frontmatter_raw = f.raw.clone();
26        self.frontmatter = serde_yaml::from_str(&f.raw).unwrap_or(serde_json::Value::Null);
27      },
28      Node::Import(i) => self.imports.push(i.raw.clone()),
29      Node::Export(x) => self.exports.push(x.raw.clone()),
30      Node::Heading(h) => {
31        self.in_heading = Some((h.level, h.slug()));
32        self.heading_text.clear();
33      },
34      Node::Text(t) => {
35        if self.in_heading.is_some() {
36          self.heading_text.push_str(&t.value);
37        }
38        self.plain.push_str(&t.value)
39      },
40      Node::InlineCode(c) => {
41        if self.in_heading.is_some() {
42          self.heading_text.push_str(&c.value);
43        }
44        self.plain.push_str(&c.value);
45      },
46      Node::CodeBlock(cb) => {
47        if self.in_heading.is_some() {
48          self.heading_text.push_str(&cb.value);
49        }
50        self.plain.push_str(&cb.value);
51      },
52      Node::Image(i) => self.plain.push_str(&i.alt),
53      _ => {},
54    }
55  }
56  fn leave(&mut self, node: &Node, _ctx: &WalkCtx) {
57    match node {
58      Node::Heading(_) => {
59        if let Some((level, slug)) = self.in_heading.take() {
60          self.toc_flat.push((level, std::mem::take(&mut self.heading_text).trim().to_string(), slug));
61        }
62      },
63      Node::Paragraph(_) => self.plain.push('\n'),
64      _ => {},
65    }
66  }
67}
68
69impl Default for Accumulator {
70  fn default() -> Self {
71    Self::new()
72  }
73}
74
75impl Accumulator {
76  pub fn new() -> Self {
77    Self {
78      frontmatter: serde_json::Value::Null,
79      frontmatter_raw: String::new(),
80      imports: Vec::new(),
81      exports: Vec::new(),
82      plain: String::new(),
83      toc_flat: Vec::new(),
84      in_heading: None,
85      heading_text: String::new(),
86    }
87  }
88
89  /// Consume self + the other sinks' rendered outputs; assemble the
90  /// final `CompileOutput`. `cfg` is reserved for future excerpt-length /
91  /// reading-rate tuning; currently unused.
92  pub fn into_compile_output(self, source: &str, html: String, body: String, _cfg: &CompileConfig) -> CompileOutput {
93    let content = Self::frontmatter(source).to_string();
94    let excerpt = Self::excerpt(&self.plain, 260);
95    // velite splits its metadata fields:
96    // - `wordCount` is computed against the markdown body (catches
97    //   structural words + JSX text the AST walker drops),
98    // - `readingTime` is computed against the plain-text extraction
99    //   (the user-facing prose count, no source noise).
100    // Mirror that split so both numbers match velite output instead of
101    // sharing a single source-dependent heuristic.
102    let metadata = Self::metadata(&content, &self.plain);
103    let toc = Self::toc(&self.toc_flat);
104
105    CompileOutput {
106      frontmatter: self.frontmatter,
107      frontmatter_raw: self.frontmatter_raw,
108      content,
109      html,
110      body,
111      excerpt,
112      metadata,
113      toc,
114      imports: self.imports,
115      exports: self.exports,
116    }
117  }
118
119  /// `source` minus a leading `---...---` YAML frontmatter block (BOM tolerant).
120  fn frontmatter(source: &str) -> &str {
121    let s = source.trim_start_matches('\u{feff}');
122    if !s.starts_with("---") {
123      return source;
124    }
125    // find the next "---" on its own line
126    let after = &s[3..];
127    if let Some(end) = after.find("\n---") {
128      let rest_start = 3 + end + 4; // 3 dashes + \n--- = 4 chars after end
129      // skip optional newline after the closing
130      let rest = &s[rest_start..];
131      let rest = rest.trim_start_matches('\n');
132      return rest;
133    }
134    source
135  }
136
137  /// Collapse whitespace, truncate to `max` chars, append `...` if cut.
138  /// Char-aware (multibyte safe).
139  fn excerpt(plain: &str, max: usize) -> String {
140    let s: String = plain.split_whitespace().collect::<Vec<_>>().join(" ");
141    if s.chars().count() <= max {
142      return s;
143    }
144    let truncated: String = s.chars().take(max).collect();
145    format!("{}...", truncated.trim_end())
146  }
147
148  /// Word count + reading time from the markdown body (post-frontmatter).
149  /// Strips fenced code blocks and ATX heading markers, then
150  /// whitespace-tokenizes. 200 wpm, ceil, min 1 min.
151  ///
152  /// The two strips together land within ~0.2% of velite's
153  /// `reading-time` output for representative changelogs (V=908 -> G=906
154  /// for the duck-ui changelog). Without the heading-marker strip the
155  /// `##` / `###` tokens count as words and overshoot by 50 per doc.
156  fn metadata(source: &str, plain: &str) -> Metadata {
157    let mut filtered = String::with_capacity(source.len());
158    let mut in_fence = false;
159    for line in source.lines() {
160      if line.trim_start().starts_with("```") {
161        in_fence = !in_fence;
162        continue;
163      }
164      if in_fence {
165        continue;
166      }
167      // Drop the leading `#`+ run on ATX headings so `## 0.4.3` counts
168      // as one word, not two.
169      let trimmed = line.trim_start();
170      if let Some(rest) = trimmed.strip_prefix(|c: char| c == '#') {
171        let mut after_hashes = rest;
172        while let Some(r) = after_hashes.strip_prefix('#') {
173          after_hashes = r;
174        }
175        if after_hashes.starts_with(' ') || after_hashes.starts_with('\t') || after_hashes.is_empty() {
176          filtered.push_str(after_hashes);
177          filtered.push('\n');
178          continue;
179        }
180      }
181      filtered.push_str(line);
182      filtered.push('\n');
183    }
184    let words = filtered.split_whitespace().count() as u32;
185    let plain_words = plain.split_whitespace().count() as u32;
186    // velite rounds half-up (`Math.round`) instead of ceiling, so a
187    // 3.35-minute read displays as `3` not `4`.
188    let reading = ((plain_words as f32) / 200.0).round() as u32;
189    Metadata { word_count: words, reading_time: reading.max(1) }
190  }
191
192  /// Flat `(level, title, slug)` list -> hierarchical `TocItem` tree.
193  /// Level stack tracks ancestry; new headings nest under the last open
194  /// parent or pop back to an earlier ancestor.
195  fn toc(items: &[(u8, String, String)]) -> Vec<TocItem> {
196    let mut roots: Vec<TocItem> = Vec::new();
197    // index path into the children tree, parallel with the level stack
198    let mut path: Vec<usize> = Vec::new();
199    let mut levels: Vec<u8> = Vec::new();
200    for (level, title, id) in items {
201      let item = TocItem { title: title.clone(), url: format!("#{}", id), items: Vec::new() };
202      // pop until top has lower level
203      while let Some(top) = levels.last() {
204        if *top >= *level {
205          levels.pop();
206          path.pop();
207        } else {
208          break;
209        }
210      }
211      // navigate to insertion list
212      let parent_list: &mut Vec<TocItem> = if path.is_empty() {
213        &mut roots
214      } else {
215        let mut node = &mut roots[path[0]];
216        for idx in &path[1..] {
217          node = &mut node.items[*idx];
218        }
219        &mut node.items
220      };
221      parent_list.push(item);
222      let new_idx = parent_list.len() - 1;
223      path.push(new_idx);
224      levels.push(*level);
225    }
226    roots
227  }
228}