1use crate::Pass;
4use anyhow::Result;
5use typub_core::NodePolicyAction;
6use typub_ir::{Block, BlockAttrs, Document, Inline, UnknownChild};
7
8pub struct ApplyNodePolicyPass {
9 raw: NodePolicyAction,
10 unknown: NodePolicyAction,
11}
12
13impl ApplyNodePolicyPass {
14 pub fn new(raw: NodePolicyAction, unknown: NodePolicyAction) -> Self {
15 Self { raw, unknown }
16 }
17
18 fn transform_blocks(
19 &self,
20 blocks: Vec<Block>,
21 path: &str,
22 parent_action: Option<NodePolicyAction>,
23 ) -> Result<Vec<Block>> {
24 let mut out = Vec::with_capacity(blocks.len());
25 for (idx, block) in blocks.into_iter().enumerate() {
26 let block_path = format!("{path}.blocks[{idx}]");
27 let mut transformed = self.transform_block(block, &block_path, parent_action)?;
28 out.append(&mut transformed);
29 }
30 Ok(out)
31 }
32
33 fn transform_block(
34 &self,
35 block: Block,
36 path: &str,
37 parent_action: Option<NodePolicyAction>,
38 ) -> Result<Vec<Block>> {
39 match block {
40 Block::Heading {
41 level,
42 id,
43 content,
44 attrs,
45 } => Ok(vec![Block::Heading {
46 level,
47 id,
48 content: self.transform_inlines(
49 content,
50 &format!("{path}.inlines"),
51 parent_action,
52 )?,
53 attrs,
54 }]),
55 Block::Paragraph { content, attrs } => Ok(vec![Block::Paragraph {
56 content: self.transform_inlines(
57 content,
58 &format!("{path}.inlines"),
59 parent_action,
60 )?,
61 attrs,
62 }]),
63 Block::Quote {
64 blocks,
65 cite,
66 attrs,
67 } => Ok(vec![Block::Quote {
68 blocks: self.transform_blocks(blocks, path, parent_action)?,
69 cite,
70 attrs,
71 }]),
72 Block::CodeBlock { .. }
73 | Block::Divider { .. }
74 | Block::MathBlock { .. }
75 | Block::SvgBlock { .. } => Ok(vec![block]),
76 Block::List { mut list, attrs } => {
77 match &mut list.kind {
78 typub_ir::ListKind::Bullet { items }
79 | typub_ir::ListKind::Numbered { items, .. } => {
80 for (item_idx, item) in items.iter_mut().enumerate() {
81 let blocks = std::mem::take(&mut item.blocks);
82 item.blocks = self.transform_blocks(
83 blocks,
84 &format!("{path}.items[{item_idx}]"),
85 parent_action,
86 )?;
87 }
88 }
89 typub_ir::ListKind::Task { items } => {
90 for (item_idx, item) in items.iter_mut().enumerate() {
91 let blocks = std::mem::take(&mut item.blocks);
92 item.blocks = self.transform_blocks(
93 blocks,
94 &format!("{path}.items[{item_idx}]"),
95 parent_action,
96 )?;
97 }
98 }
99 typub_ir::ListKind::Custom { items, .. } => {
100 for (item_idx, item) in items.iter_mut().enumerate() {
101 let blocks = std::mem::take(&mut item.blocks);
102 item.blocks = self.transform_blocks(
103 blocks,
104 &format!("{path}.items[{item_idx}]"),
105 parent_action,
106 )?;
107 }
108 }
109 }
110 Ok(vec![Block::List { list, attrs }])
111 }
112 Block::DefinitionList { mut items, attrs } => {
113 for (item_idx, item) in items.iter_mut().enumerate() {
114 for (term_idx, term) in item.terms.iter_mut().enumerate() {
115 let blocks = std::mem::take(term);
116 *term = self.transform_blocks(
117 blocks,
118 &format!("{path}.definition_items[{item_idx}].terms[{term_idx}]"),
119 parent_action,
120 )?;
121 }
122 for (def_idx, defn) in item.definitions.iter_mut().enumerate() {
123 let blocks = std::mem::take(defn);
124 *defn = self.transform_blocks(
125 blocks,
126 &format!("{path}.definition_items[{item_idx}].definitions[{def_idx}]"),
127 parent_action,
128 )?;
129 }
130 }
131 Ok(vec![Block::DefinitionList { items, attrs }])
132 }
133 Block::Table {
134 mut caption,
135 mut sections,
136 attrs,
137 } => {
138 if let Some(caption_blocks) = caption.as_mut() {
139 let blocks = std::mem::take(caption_blocks);
140 *caption_blocks =
141 self.transform_blocks(blocks, &format!("{path}.caption"), parent_action)?;
142 }
143 for (section_idx, section) in sections.iter_mut().enumerate() {
144 for (row_idx, row) in section.rows.iter_mut().enumerate() {
145 for (cell_idx, cell) in row.cells.iter_mut().enumerate() {
146 let blocks = std::mem::take(&mut cell.blocks);
147 cell.blocks = self.transform_blocks(
148 blocks,
149 &format!(
150 "{path}.sections[{section_idx}].rows[{row_idx}].cells[{cell_idx}]"
151 ),
152 parent_action,
153 )?;
154 }
155 }
156 }
157 Ok(vec![Block::Table {
158 caption,
159 sections,
160 attrs,
161 }])
162 }
163 Block::Figure {
164 content,
165 mut caption,
166 attrs,
167 } => {
168 let content =
169 self.transform_blocks(content, &format!("{path}.content"), parent_action)?;
170 if let Some(caption_blocks) = caption.as_mut() {
171 let blocks = std::mem::take(caption_blocks);
172 *caption_blocks =
173 self.transform_blocks(blocks, &format!("{path}.caption"), parent_action)?;
174 }
175 Ok(vec![Block::Figure {
176 content,
177 caption,
178 attrs,
179 }])
180 }
181 Block::Admonition {
182 kind,
183 title,
184 blocks,
185 attrs,
186 } => Ok(vec![Block::Admonition {
187 kind,
188 title: title
189 .map(|inlines| {
190 self.transform_inlines(inlines, &format!("{path}.title"), parent_action)
191 })
192 .transpose()?,
193 blocks: self.transform_blocks(blocks, path, parent_action)?,
194 attrs,
195 }]),
196 Block::Details {
197 summary,
198 blocks,
199 open,
200 attrs,
201 } => Ok(vec![Block::Details {
202 summary: summary
203 .map(|inlines| {
204 self.transform_inlines(inlines, &format!("{path}.summary"), parent_action)
205 })
206 .transpose()?,
207 blocks: self.transform_blocks(blocks, path, parent_action)?,
208 open,
209 attrs,
210 }]),
211 Block::RawBlock {
212 html,
213 origin,
214 trust,
215 attrs,
216 } => {
217 let action = parent_action.unwrap_or(self.raw);
218 match action {
219 NodePolicyAction::Pass => Ok(vec![Block::RawBlock {
220 html,
221 origin,
222 trust,
223 attrs,
224 }]),
225 NodePolicyAction::Sanitize => Ok(if html.trim().is_empty() {
226 Vec::new()
227 } else {
228 vec![Block::Paragraph {
229 content: vec![Inline::Text(html)],
230 attrs: BlockAttrs::default(),
231 }]
232 }),
233 NodePolicyAction::Drop => Ok(Vec::new()),
234 NodePolicyAction::Error => anyhow::bail!(
235 "Raw node encountered at {path}, but adapter policy is 'error'"
236 ),
237 }
238 }
239 Block::UnknownBlock {
240 tag,
241 attrs,
242 children,
243 data,
244 note,
245 source,
246 } => {
247 let action = parent_action.unwrap_or(self.unknown);
248 match action {
249 NodePolicyAction::Pass => Ok(vec![Block::UnknownBlock {
250 tag,
251 attrs,
252 children: self.transform_unknown_children(children, path, Some(action))?,
253 data,
254 note,
255 source,
256 }]),
257 NodePolicyAction::Sanitize => {
258 let children =
259 self.transform_unknown_children(children, path, Some(action))?;
260 Ok(self.unknown_children_to_blocks(children))
261 }
262 NodePolicyAction::Drop => Ok(Vec::new()),
263 NodePolicyAction::Error => anyhow::bail!(
264 "Unknown node encountered at {path}, but adapter policy is 'error'"
265 ),
266 }
267 }
268 }
269 }
270
271 fn transform_inlines(
272 &self,
273 inlines: Vec<Inline>,
274 path: &str,
275 parent_action: Option<NodePolicyAction>,
276 ) -> Result<Vec<Inline>> {
277 let mut out = Vec::with_capacity(inlines.len());
278 for (idx, inline) in inlines.into_iter().enumerate() {
279 let inline_path = format!("{path}.inlines[{idx}]");
280 let mut transformed = self.transform_inline(inline, &inline_path, parent_action)?;
281 out.append(&mut transformed);
282 }
283 Ok(out)
284 }
285
286 fn transform_inline(
287 &self,
288 inline: Inline,
289 path: &str,
290 parent_action: Option<NodePolicyAction>,
291 ) -> Result<Vec<Inline>> {
292 match inline {
293 Inline::Text(_)
294 | Inline::Code(_)
295 | Inline::SoftBreak
296 | Inline::HardBreak
297 | Inline::Image { .. }
298 | Inline::FootnoteRef(_)
299 | Inline::MathInline { .. }
300 | Inline::SvgInline { .. } => Ok(vec![inline]),
301 Inline::Styled {
302 styles,
303 content,
304 attrs,
305 } => Ok(vec![Inline::Styled {
306 styles,
307 content: self.transform_inlines(content, path, parent_action)?,
308 attrs,
309 }]),
310 Inline::Link {
311 content,
312 href,
313 title,
314 attrs,
315 } => Ok(vec![Inline::Link {
316 content: self.transform_inlines(content, path, parent_action)?,
317 href,
318 title,
319 attrs,
320 }]),
321 Inline::RawInline {
322 html,
323 origin,
324 trust,
325 attrs,
326 } => {
327 let action = parent_action.unwrap_or(self.raw);
328 match action {
329 NodePolicyAction::Pass => Ok(vec![Inline::RawInline {
330 html,
331 origin,
332 trust,
333 attrs,
334 }]),
335 NodePolicyAction::Sanitize => Ok(if html.is_empty() {
336 Vec::new()
337 } else {
338 vec![Inline::Text(html)]
339 }),
340 NodePolicyAction::Drop => Ok(Vec::new()),
341 NodePolicyAction::Error => anyhow::bail!(
342 "Raw inline encountered at {path}, but adapter policy is 'error'"
343 ),
344 }
345 }
346 Inline::UnknownInline {
347 tag,
348 attrs,
349 content,
350 data,
351 note,
352 source,
353 } => {
354 let action = parent_action.unwrap_or(self.unknown);
355 match action {
356 NodePolicyAction::Pass => Ok(vec![Inline::UnknownInline {
357 tag,
358 attrs,
359 content: self.transform_inlines(content, path, Some(action))?,
360 data,
361 note,
362 source,
363 }]),
364 NodePolicyAction::Sanitize => {
365 self.transform_inlines(content, path, Some(action))
366 }
367 NodePolicyAction::Drop => Ok(Vec::new()),
368 NodePolicyAction::Error => anyhow::bail!(
369 "Unknown inline encountered at {path}, but adapter policy is 'error'"
370 ),
371 }
372 }
373 }
374 }
375
376 fn transform_unknown_children(
377 &self,
378 children: Vec<UnknownChild>,
379 path: &str,
380 parent_action: Option<NodePolicyAction>,
381 ) -> Result<Vec<UnknownChild>> {
382 let mut out = Vec::with_capacity(children.len());
383 for (idx, child) in children.into_iter().enumerate() {
384 let child_path = format!("{path}.unknown_children[{idx}]");
385 match child {
386 UnknownChild::Block(block) => {
387 for block in self.transform_block(block, &child_path, parent_action)? {
388 out.push(UnknownChild::Block(block));
389 }
390 }
391 UnknownChild::Inline(inline) => {
392 for inline in self.transform_inline(inline, &child_path, parent_action)? {
393 out.push(UnknownChild::Inline(inline));
394 }
395 }
396 }
397 }
398 Ok(out)
399 }
400
401 fn unknown_children_to_blocks(&self, children: Vec<UnknownChild>) -> Vec<Block> {
402 let mut out = Vec::new();
403 let mut pending_inline = Vec::new();
404 for child in children {
405 match child {
406 UnknownChild::Block(block) => {
407 if !pending_inline.is_empty() {
408 out.push(Block::Paragraph {
409 content: std::mem::take(&mut pending_inline),
410 attrs: BlockAttrs::default(),
411 });
412 }
413 out.push(block);
414 }
415 UnknownChild::Inline(inline) => {
416 pending_inline.push(inline);
417 }
418 }
419 }
420 if !pending_inline.is_empty() {
421 out.push(Block::Paragraph {
422 content: pending_inline,
423 attrs: BlockAttrs::default(),
424 });
425 }
426 out
427 }
428}
429
430impl Pass for ApplyNodePolicyPass {
431 fn name(&self) -> &'static str {
432 "apply_node_policy"
433 }
434
435 fn run(&mut self, doc: &mut Document, _ctx: &mut crate::PassCtx) -> Result<()> {
436 let blocks = std::mem::take(&mut doc.blocks);
437 doc.blocks = self.transform_blocks(blocks, "document", None)?;
438
439 for (id, def) in &mut doc.footnotes {
440 let blocks = std::mem::take(&mut def.blocks);
441 def.blocks =
442 self.transform_blocks(blocks, &format!("document.footnotes[{}]", id.0), None)?;
443 }
444
445 Ok(())
446 }
447}
448
449#[cfg(test)]
450mod tests {
451 #![allow(clippy::expect_used)]
452
453 use super::*;
454 use typub_ir::{DocMeta, InlineAttrs, RawOrigin, RawTrust};
455
456 fn doc_with(blocks: Vec<Block>) -> Document {
457 Document {
458 blocks,
459 footnotes: Default::default(),
460 assets: Default::default(),
461 meta: DocMeta::default(),
462 }
463 }
464
465 #[test]
466 fn pass_action_keeps_raw_and_unknown() {
467 let mut doc = doc_with(vec![
468 Block::RawBlock {
469 html: "<x/>".to_string(),
470 origin: RawOrigin::Markdown,
471 trust: RawTrust::Trusted,
472 attrs: BlockAttrs::default(),
473 },
474 Block::Paragraph {
475 content: vec![Inline::UnknownInline {
476 tag: "foo".to_string(),
477 attrs: InlineAttrs::default(),
478 content: vec![Inline::Text("ok".to_string())],
479 data: Default::default(),
480 note: None,
481 source: None,
482 }],
483 attrs: BlockAttrs::default(),
484 },
485 ]);
486
487 let mut pass = ApplyNodePolicyPass::new(NodePolicyAction::Pass, NodePolicyAction::Pass);
488 pass.run(&mut doc, &mut crate::PassCtx::default())
489 .expect("run pass");
490
491 assert!(matches!(doc.blocks[0], Block::RawBlock { .. }));
492 match &doc.blocks[1] {
493 Block::Paragraph { content, .. } => {
494 assert!(matches!(content[0], Inline::UnknownInline { .. }));
495 }
496 _ => panic!("expected paragraph"),
497 }
498 }
499
500 #[test]
501 fn sanitize_action_neutralizes_raw_and_unknown() {
502 let mut doc = doc_with(vec![
503 Block::RawBlock {
504 html: "<script>alert(1)</script>".to_string(),
505 origin: RawOrigin::Markdown,
506 trust: RawTrust::Untrusted,
507 attrs: BlockAttrs::default(),
508 },
509 Block::UnknownBlock {
510 tag: "x-card".to_string(),
511 attrs: BlockAttrs::default(),
512 children: vec![
513 UnknownChild::Inline(Inline::Text("hello".to_string())),
514 UnknownChild::Block(Block::Paragraph {
515 content: vec![Inline::Text("world".to_string())],
516 attrs: BlockAttrs::default(),
517 }),
518 ],
519 data: Default::default(),
520 note: None,
521 source: None,
522 },
523 ]);
524
525 let mut pass =
526 ApplyNodePolicyPass::new(NodePolicyAction::Sanitize, NodePolicyAction::Sanitize);
527 pass.run(&mut doc, &mut crate::PassCtx::default())
528 .expect("run pass");
529
530 match &doc.blocks[0] {
531 Block::Paragraph { content, .. } => {
532 assert!(matches!(content[0], Inline::Text(_)));
533 }
534 _ => panic!("expected paragraph for sanitized raw"),
535 }
536
537 assert!(matches!(doc.blocks[1], Block::Paragraph { .. }));
538 assert!(matches!(doc.blocks[2], Block::Paragraph { .. }));
539 }
540
541 #[test]
542 fn drop_action_removes_raw_and_unknown() {
543 let mut doc = doc_with(vec![
544 Block::Paragraph {
545 content: vec![
546 Inline::Text("a".to_string()),
547 Inline::RawInline {
548 html: "<b>x</b>".to_string(),
549 origin: RawOrigin::Markdown,
550 trust: RawTrust::Untrusted,
551 attrs: InlineAttrs::default(),
552 },
553 Inline::UnknownInline {
554 tag: "foo".to_string(),
555 attrs: InlineAttrs::default(),
556 content: vec![Inline::Text("b".to_string())],
557 data: Default::default(),
558 note: None,
559 source: None,
560 },
561 ],
562 attrs: BlockAttrs::default(),
563 },
564 Block::UnknownBlock {
565 tag: "x".to_string(),
566 attrs: BlockAttrs::default(),
567 children: vec![],
568 data: Default::default(),
569 note: None,
570 source: None,
571 },
572 ]);
573
574 let mut pass = ApplyNodePolicyPass::new(NodePolicyAction::Drop, NodePolicyAction::Drop);
575 pass.run(&mut doc, &mut crate::PassCtx::default())
576 .expect("run pass");
577
578 assert_eq!(doc.blocks.len(), 1);
579 match &doc.blocks[0] {
580 Block::Paragraph { content, .. } => {
581 assert_eq!(content.len(), 1);
582 assert!(matches!(content[0], Inline::Text(_)));
583 }
584 _ => panic!("expected paragraph"),
585 }
586 }
587
588 #[test]
589 fn error_action_fails_on_raw_unknown() {
590 let mut doc = doc_with(vec![Block::RawBlock {
591 html: "<x/>".to_string(),
592 origin: RawOrigin::Markdown,
593 trust: RawTrust::Untrusted,
594 attrs: BlockAttrs::default(),
595 }]);
596 let mut pass = ApplyNodePolicyPass::new(NodePolicyAction::Error, NodePolicyAction::Pass);
597 let err = pass
598 .run(&mut doc, &mut crate::PassCtx::default())
599 .expect_err("raw should error");
600 assert!(err.to_string().contains("Raw node encountered"));
601 }
602}