Skip to main content

typub_passes/
apply_node_policy.rs

1//! Apply node policy over semantic IR.
2
3use crate::Pass;
4use anyhow::Result;
5use typub_core::NodePolicyAction;
6use typub_ir::{Block, BlockAttrs, Document, Inline, UnknownChild};
7
8pub struct ApplyNodePolicyPass {
9    raw: NodePolicyAction,
10    unknown: NodePolicyAction,
11}
12
13impl ApplyNodePolicyPass {
14    pub fn new(raw: NodePolicyAction, unknown: NodePolicyAction) -> Self {
15        Self { raw, unknown }
16    }
17
18    fn transform_blocks(
19        &self,
20        blocks: Vec<Block>,
21        path: &str,
22        parent_action: Option<NodePolicyAction>,
23    ) -> Result<Vec<Block>> {
24        let mut out = Vec::with_capacity(blocks.len());
25        for (idx, block) in blocks.into_iter().enumerate() {
26            let block_path = format!("{path}.blocks[{idx}]");
27            let mut transformed = self.transform_block(block, &block_path, parent_action)?;
28            out.append(&mut transformed);
29        }
30        Ok(out)
31    }
32
33    fn transform_block(
34        &self,
35        block: Block,
36        path: &str,
37        parent_action: Option<NodePolicyAction>,
38    ) -> Result<Vec<Block>> {
39        match block {
40            Block::Heading {
41                level,
42                id,
43                content,
44                attrs,
45            } => Ok(vec![Block::Heading {
46                level,
47                id,
48                content: self.transform_inlines(
49                    content,
50                    &format!("{path}.inlines"),
51                    parent_action,
52                )?,
53                attrs,
54            }]),
55            Block::Paragraph { content, attrs } => Ok(vec![Block::Paragraph {
56                content: self.transform_inlines(
57                    content,
58                    &format!("{path}.inlines"),
59                    parent_action,
60                )?,
61                attrs,
62            }]),
63            Block::Quote {
64                blocks,
65                cite,
66                attrs,
67            } => Ok(vec![Block::Quote {
68                blocks: self.transform_blocks(blocks, path, parent_action)?,
69                cite,
70                attrs,
71            }]),
72            Block::CodeBlock { .. }
73            | Block::Divider { .. }
74            | Block::MathBlock { .. }
75            | Block::SvgBlock { .. } => Ok(vec![block]),
76            Block::List { mut list, attrs } => {
77                match &mut list.kind {
78                    typub_ir::ListKind::Bullet { items }
79                    | typub_ir::ListKind::Numbered { items, .. } => {
80                        for (item_idx, item) in items.iter_mut().enumerate() {
81                            let blocks = std::mem::take(&mut item.blocks);
82                            item.blocks = self.transform_blocks(
83                                blocks,
84                                &format!("{path}.items[{item_idx}]"),
85                                parent_action,
86                            )?;
87                        }
88                    }
89                    typub_ir::ListKind::Task { items } => {
90                        for (item_idx, item) in items.iter_mut().enumerate() {
91                            let blocks = std::mem::take(&mut item.blocks);
92                            item.blocks = self.transform_blocks(
93                                blocks,
94                                &format!("{path}.items[{item_idx}]"),
95                                parent_action,
96                            )?;
97                        }
98                    }
99                    typub_ir::ListKind::Custom { items, .. } => {
100                        for (item_idx, item) in items.iter_mut().enumerate() {
101                            let blocks = std::mem::take(&mut item.blocks);
102                            item.blocks = self.transform_blocks(
103                                blocks,
104                                &format!("{path}.items[{item_idx}]"),
105                                parent_action,
106                            )?;
107                        }
108                    }
109                }
110                Ok(vec![Block::List { list, attrs }])
111            }
112            Block::DefinitionList { mut items, attrs } => {
113                for (item_idx, item) in items.iter_mut().enumerate() {
114                    for (term_idx, term) in item.terms.iter_mut().enumerate() {
115                        let blocks = std::mem::take(term);
116                        *term = self.transform_blocks(
117                            blocks,
118                            &format!("{path}.definition_items[{item_idx}].terms[{term_idx}]"),
119                            parent_action,
120                        )?;
121                    }
122                    for (def_idx, defn) in item.definitions.iter_mut().enumerate() {
123                        let blocks = std::mem::take(defn);
124                        *defn = self.transform_blocks(
125                            blocks,
126                            &format!("{path}.definition_items[{item_idx}].definitions[{def_idx}]"),
127                            parent_action,
128                        )?;
129                    }
130                }
131                Ok(vec![Block::DefinitionList { items, attrs }])
132            }
133            Block::Table {
134                mut caption,
135                mut sections,
136                attrs,
137            } => {
138                if let Some(caption_blocks) = caption.as_mut() {
139                    let blocks = std::mem::take(caption_blocks);
140                    *caption_blocks =
141                        self.transform_blocks(blocks, &format!("{path}.caption"), parent_action)?;
142                }
143                for (section_idx, section) in sections.iter_mut().enumerate() {
144                    for (row_idx, row) in section.rows.iter_mut().enumerate() {
145                        for (cell_idx, cell) in row.cells.iter_mut().enumerate() {
146                            let blocks = std::mem::take(&mut cell.blocks);
147                            cell.blocks = self.transform_blocks(
148                                blocks,
149                                &format!(
150                                    "{path}.sections[{section_idx}].rows[{row_idx}].cells[{cell_idx}]"
151                                ),
152                                parent_action,
153                            )?;
154                        }
155                    }
156                }
157                Ok(vec![Block::Table {
158                    caption,
159                    sections,
160                    attrs,
161                }])
162            }
163            Block::Figure {
164                content,
165                mut caption,
166                attrs,
167            } => {
168                let content =
169                    self.transform_blocks(content, &format!("{path}.content"), parent_action)?;
170                if let Some(caption_blocks) = caption.as_mut() {
171                    let blocks = std::mem::take(caption_blocks);
172                    *caption_blocks =
173                        self.transform_blocks(blocks, &format!("{path}.caption"), parent_action)?;
174                }
175                Ok(vec![Block::Figure {
176                    content,
177                    caption,
178                    attrs,
179                }])
180            }
181            Block::Admonition {
182                kind,
183                title,
184                blocks,
185                attrs,
186            } => Ok(vec![Block::Admonition {
187                kind,
188                title: title
189                    .map(|inlines| {
190                        self.transform_inlines(inlines, &format!("{path}.title"), parent_action)
191                    })
192                    .transpose()?,
193                blocks: self.transform_blocks(blocks, path, parent_action)?,
194                attrs,
195            }]),
196            Block::Details {
197                summary,
198                blocks,
199                open,
200                attrs,
201            } => Ok(vec![Block::Details {
202                summary: summary
203                    .map(|inlines| {
204                        self.transform_inlines(inlines, &format!("{path}.summary"), parent_action)
205                    })
206                    .transpose()?,
207                blocks: self.transform_blocks(blocks, path, parent_action)?,
208                open,
209                attrs,
210            }]),
211            Block::RawBlock {
212                html,
213                origin,
214                trust,
215                attrs,
216            } => {
217                let action = parent_action.unwrap_or(self.raw);
218                match action {
219                    NodePolicyAction::Pass => Ok(vec![Block::RawBlock {
220                        html,
221                        origin,
222                        trust,
223                        attrs,
224                    }]),
225                    NodePolicyAction::Sanitize => Ok(if html.trim().is_empty() {
226                        Vec::new()
227                    } else {
228                        vec![Block::Paragraph {
229                            content: vec![Inline::Text(html)],
230                            attrs: BlockAttrs::default(),
231                        }]
232                    }),
233                    NodePolicyAction::Drop => Ok(Vec::new()),
234                    NodePolicyAction::Error => anyhow::bail!(
235                        "Raw node encountered at {path}, but adapter policy is 'error'"
236                    ),
237                }
238            }
239            Block::UnknownBlock {
240                tag,
241                attrs,
242                children,
243                data,
244                note,
245                source,
246            } => {
247                let action = parent_action.unwrap_or(self.unknown);
248                match action {
249                    NodePolicyAction::Pass => Ok(vec![Block::UnknownBlock {
250                        tag,
251                        attrs,
252                        children: self.transform_unknown_children(children, path, Some(action))?,
253                        data,
254                        note,
255                        source,
256                    }]),
257                    NodePolicyAction::Sanitize => {
258                        let children =
259                            self.transform_unknown_children(children, path, Some(action))?;
260                        Ok(self.unknown_children_to_blocks(children))
261                    }
262                    NodePolicyAction::Drop => Ok(Vec::new()),
263                    NodePolicyAction::Error => anyhow::bail!(
264                        "Unknown node encountered at {path}, but adapter policy is 'error'"
265                    ),
266                }
267            }
268        }
269    }
270
271    fn transform_inlines(
272        &self,
273        inlines: Vec<Inline>,
274        path: &str,
275        parent_action: Option<NodePolicyAction>,
276    ) -> Result<Vec<Inline>> {
277        let mut out = Vec::with_capacity(inlines.len());
278        for (idx, inline) in inlines.into_iter().enumerate() {
279            let inline_path = format!("{path}.inlines[{idx}]");
280            let mut transformed = self.transform_inline(inline, &inline_path, parent_action)?;
281            out.append(&mut transformed);
282        }
283        Ok(out)
284    }
285
286    fn transform_inline(
287        &self,
288        inline: Inline,
289        path: &str,
290        parent_action: Option<NodePolicyAction>,
291    ) -> Result<Vec<Inline>> {
292        match inline {
293            Inline::Text(_)
294            | Inline::Code(_)
295            | Inline::SoftBreak
296            | Inline::HardBreak
297            | Inline::Image { .. }
298            | Inline::FootnoteRef(_)
299            | Inline::MathInline { .. }
300            | Inline::SvgInline { .. } => Ok(vec![inline]),
301            Inline::Styled {
302                styles,
303                content,
304                attrs,
305            } => Ok(vec![Inline::Styled {
306                styles,
307                content: self.transform_inlines(content, path, parent_action)?,
308                attrs,
309            }]),
310            Inline::Link {
311                content,
312                href,
313                title,
314                attrs,
315            } => Ok(vec![Inline::Link {
316                content: self.transform_inlines(content, path, parent_action)?,
317                href,
318                title,
319                attrs,
320            }]),
321            Inline::RawInline {
322                html,
323                origin,
324                trust,
325                attrs,
326            } => {
327                let action = parent_action.unwrap_or(self.raw);
328                match action {
329                    NodePolicyAction::Pass => Ok(vec![Inline::RawInline {
330                        html,
331                        origin,
332                        trust,
333                        attrs,
334                    }]),
335                    NodePolicyAction::Sanitize => Ok(if html.is_empty() {
336                        Vec::new()
337                    } else {
338                        vec![Inline::Text(html)]
339                    }),
340                    NodePolicyAction::Drop => Ok(Vec::new()),
341                    NodePolicyAction::Error => anyhow::bail!(
342                        "Raw inline encountered at {path}, but adapter policy is 'error'"
343                    ),
344                }
345            }
346            Inline::UnknownInline {
347                tag,
348                attrs,
349                content,
350                data,
351                note,
352                source,
353            } => {
354                let action = parent_action.unwrap_or(self.unknown);
355                match action {
356                    NodePolicyAction::Pass => Ok(vec![Inline::UnknownInline {
357                        tag,
358                        attrs,
359                        content: self.transform_inlines(content, path, Some(action))?,
360                        data,
361                        note,
362                        source,
363                    }]),
364                    NodePolicyAction::Sanitize => {
365                        self.transform_inlines(content, path, Some(action))
366                    }
367                    NodePolicyAction::Drop => Ok(Vec::new()),
368                    NodePolicyAction::Error => anyhow::bail!(
369                        "Unknown inline encountered at {path}, but adapter policy is 'error'"
370                    ),
371                }
372            }
373        }
374    }
375
376    fn transform_unknown_children(
377        &self,
378        children: Vec<UnknownChild>,
379        path: &str,
380        parent_action: Option<NodePolicyAction>,
381    ) -> Result<Vec<UnknownChild>> {
382        let mut out = Vec::with_capacity(children.len());
383        for (idx, child) in children.into_iter().enumerate() {
384            let child_path = format!("{path}.unknown_children[{idx}]");
385            match child {
386                UnknownChild::Block(block) => {
387                    for block in self.transform_block(block, &child_path, parent_action)? {
388                        out.push(UnknownChild::Block(block));
389                    }
390                }
391                UnknownChild::Inline(inline) => {
392                    for inline in self.transform_inline(inline, &child_path, parent_action)? {
393                        out.push(UnknownChild::Inline(inline));
394                    }
395                }
396            }
397        }
398        Ok(out)
399    }
400
401    fn unknown_children_to_blocks(&self, children: Vec<UnknownChild>) -> Vec<Block> {
402        let mut out = Vec::new();
403        let mut pending_inline = Vec::new();
404        for child in children {
405            match child {
406                UnknownChild::Block(block) => {
407                    if !pending_inline.is_empty() {
408                        out.push(Block::Paragraph {
409                            content: std::mem::take(&mut pending_inline),
410                            attrs: BlockAttrs::default(),
411                        });
412                    }
413                    out.push(block);
414                }
415                UnknownChild::Inline(inline) => {
416                    pending_inline.push(inline);
417                }
418            }
419        }
420        if !pending_inline.is_empty() {
421            out.push(Block::Paragraph {
422                content: pending_inline,
423                attrs: BlockAttrs::default(),
424            });
425        }
426        out
427    }
428}
429
430impl Pass for ApplyNodePolicyPass {
431    fn name(&self) -> &'static str {
432        "apply_node_policy"
433    }
434
435    fn run(&mut self, doc: &mut Document, _ctx: &mut crate::PassCtx) -> Result<()> {
436        let blocks = std::mem::take(&mut doc.blocks);
437        doc.blocks = self.transform_blocks(blocks, "document", None)?;
438
439        for (id, def) in &mut doc.footnotes {
440            let blocks = std::mem::take(&mut def.blocks);
441            def.blocks =
442                self.transform_blocks(blocks, &format!("document.footnotes[{}]", id.0), None)?;
443        }
444
445        Ok(())
446    }
447}
448
449#[cfg(test)]
450mod tests {
451    #![allow(clippy::expect_used)]
452
453    use super::*;
454    use typub_ir::{DocMeta, InlineAttrs, RawOrigin, RawTrust};
455
456    fn doc_with(blocks: Vec<Block>) -> Document {
457        Document {
458            blocks,
459            footnotes: Default::default(),
460            assets: Default::default(),
461            meta: DocMeta::default(),
462        }
463    }
464
465    #[test]
466    fn pass_action_keeps_raw_and_unknown() {
467        let mut doc = doc_with(vec![
468            Block::RawBlock {
469                html: "<x/>".to_string(),
470                origin: RawOrigin::Markdown,
471                trust: RawTrust::Trusted,
472                attrs: BlockAttrs::default(),
473            },
474            Block::Paragraph {
475                content: vec![Inline::UnknownInline {
476                    tag: "foo".to_string(),
477                    attrs: InlineAttrs::default(),
478                    content: vec![Inline::Text("ok".to_string())],
479                    data: Default::default(),
480                    note: None,
481                    source: None,
482                }],
483                attrs: BlockAttrs::default(),
484            },
485        ]);
486
487        let mut pass = ApplyNodePolicyPass::new(NodePolicyAction::Pass, NodePolicyAction::Pass);
488        pass.run(&mut doc, &mut crate::PassCtx::default())
489            .expect("run pass");
490
491        assert!(matches!(doc.blocks[0], Block::RawBlock { .. }));
492        match &doc.blocks[1] {
493            Block::Paragraph { content, .. } => {
494                assert!(matches!(content[0], Inline::UnknownInline { .. }));
495            }
496            _ => panic!("expected paragraph"),
497        }
498    }
499
500    #[test]
501    fn sanitize_action_neutralizes_raw_and_unknown() {
502        let mut doc = doc_with(vec![
503            Block::RawBlock {
504                html: "<script>alert(1)</script>".to_string(),
505                origin: RawOrigin::Markdown,
506                trust: RawTrust::Untrusted,
507                attrs: BlockAttrs::default(),
508            },
509            Block::UnknownBlock {
510                tag: "x-card".to_string(),
511                attrs: BlockAttrs::default(),
512                children: vec![
513                    UnknownChild::Inline(Inline::Text("hello".to_string())),
514                    UnknownChild::Block(Block::Paragraph {
515                        content: vec![Inline::Text("world".to_string())],
516                        attrs: BlockAttrs::default(),
517                    }),
518                ],
519                data: Default::default(),
520                note: None,
521                source: None,
522            },
523        ]);
524
525        let mut pass =
526            ApplyNodePolicyPass::new(NodePolicyAction::Sanitize, NodePolicyAction::Sanitize);
527        pass.run(&mut doc, &mut crate::PassCtx::default())
528            .expect("run pass");
529
530        match &doc.blocks[0] {
531            Block::Paragraph { content, .. } => {
532                assert!(matches!(content[0], Inline::Text(_)));
533            }
534            _ => panic!("expected paragraph for sanitized raw"),
535        }
536
537        assert!(matches!(doc.blocks[1], Block::Paragraph { .. }));
538        assert!(matches!(doc.blocks[2], Block::Paragraph { .. }));
539    }
540
541    #[test]
542    fn drop_action_removes_raw_and_unknown() {
543        let mut doc = doc_with(vec![
544            Block::Paragraph {
545                content: vec![
546                    Inline::Text("a".to_string()),
547                    Inline::RawInline {
548                        html: "<b>x</b>".to_string(),
549                        origin: RawOrigin::Markdown,
550                        trust: RawTrust::Untrusted,
551                        attrs: InlineAttrs::default(),
552                    },
553                    Inline::UnknownInline {
554                        tag: "foo".to_string(),
555                        attrs: InlineAttrs::default(),
556                        content: vec![Inline::Text("b".to_string())],
557                        data: Default::default(),
558                        note: None,
559                        source: None,
560                    },
561                ],
562                attrs: BlockAttrs::default(),
563            },
564            Block::UnknownBlock {
565                tag: "x".to_string(),
566                attrs: BlockAttrs::default(),
567                children: vec![],
568                data: Default::default(),
569                note: None,
570                source: None,
571            },
572        ]);
573
574        let mut pass = ApplyNodePolicyPass::new(NodePolicyAction::Drop, NodePolicyAction::Drop);
575        pass.run(&mut doc, &mut crate::PassCtx::default())
576            .expect("run pass");
577
578        assert_eq!(doc.blocks.len(), 1);
579        match &doc.blocks[0] {
580            Block::Paragraph { content, .. } => {
581                assert_eq!(content.len(), 1);
582                assert!(matches!(content[0], Inline::Text(_)));
583            }
584            _ => panic!("expected paragraph"),
585        }
586    }
587
588    #[test]
589    fn error_action_fails_on_raw_unknown() {
590        let mut doc = doc_with(vec![Block::RawBlock {
591            html: "<x/>".to_string(),
592            origin: RawOrigin::Markdown,
593            trust: RawTrust::Untrusted,
594            attrs: BlockAttrs::default(),
595        }]);
596        let mut pass = ApplyNodePolicyPass::new(NodePolicyAction::Error, NodePolicyAction::Pass);
597        let err = pass
598            .run(&mut doc, &mut crate::PassCtx::default())
599            .expect_err("raw should error");
600        assert!(err.to_string().contains("Raw node encountered"));
601    }
602}