tinymist_task/compute/
text.rs

1//! The computation for text export.
2
3use core::fmt;
4use std::sync::Arc;
5use typst::html::{HtmlNode::*, tag};
6
7use crate::ExportTextTask;
8use tinymist_std::error::prelude::*;
9use tinymist_std::typst::{TypstDocument, TypstPagedDocument};
10use tinymist_world::{CompilerFeat, ExportComputation, WorldComputeGraph};
11
12/// The computation for text export.
13pub struct TextExport;
14
15impl TextExport {
16    /// Runs the computation on a document.
17    pub fn run_on_doc(doc: &TypstDocument) -> Result<String> {
18        Ok(format!("{}", FullTextDigest(doc)))
19    }
20}
21
22impl<F: CompilerFeat> ExportComputation<F, TypstPagedDocument> for TextExport {
23    type Output = String;
24    type Config = ExportTextTask;
25
26    fn run(
27        _g: &Arc<WorldComputeGraph<F>>,
28        doc: &Arc<TypstPagedDocument>,
29        _config: &ExportTextTask,
30    ) -> Result<String> {
31        Self::run_on_doc(&TypstDocument::Paged(doc.clone()))
32    }
33}
34
35/// A full text digest of a document.
36struct FullTextDigest<'a>(&'a TypstDocument);
37
38impl FullTextDigest<'_> {
39    fn export_frame(f: &mut fmt::Formatter<'_>, doc: &typst::layout::Frame) -> fmt::Result {
40        for (_, item) in doc.items() {
41            Self::export_item(f, item)?;
42        }
43        #[cfg(not(feature = "no-content-hint"))]
44        {
45            use std::fmt::Write;
46            let c = doc.content_hint();
47            if c != '\0' {
48                f.write_char(c)?;
49            }
50        }
51
52        Ok(())
53    }
54
55    fn export_item(f: &mut fmt::Formatter<'_>, item: &typst::layout::FrameItem) -> fmt::Result {
56        use typst::layout::FrameItem::*;
57        match item {
58            Group(g) => Self::export_frame(f, &g.frame),
59            Text(t) => f.write_str(t.text.as_str()),
60            Link(..) | Tag(..) | Shape(..) | Image(..) => Ok(()),
61        }
62    }
63
64    fn export_element(f: &mut fmt::Formatter<'_>, elem: &typst::html::HtmlElement) -> fmt::Result {
65        for child in elem.children.iter() {
66            Self::export_html_node(f, child)?;
67        }
68        Ok(())
69    }
70
71    fn export_html_node(f: &mut fmt::Formatter<'_>, node: &typst::html::HtmlNode) -> fmt::Result {
72        match node {
73            Tag(_) => Ok(()),
74            Element(elem) => {
75                // Skips certain tags that do not contribute to text content.
76                if matches!(elem.tag, tag::style | tag::script) {
77                    Ok(())
78                } else {
79                    Self::export_element(f, elem)
80                }
81            }
82            Text(t, _) => f.write_str(t.as_str()),
83            Frame(frame) => Self::export_frame(f, frame),
84        }
85    }
86}
87
88impl fmt::Display for FullTextDigest<'_> {
89    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90        match &self.0 {
91            TypstDocument::Paged(paged_doc) => {
92                for page in paged_doc.pages.iter() {
93                    Self::export_frame(f, &page.frame)?;
94                }
95                Ok(())
96            }
97            TypstDocument::Html(html_doc) => {
98                Self::export_element(f, &html_doc.root)?;
99                Ok(())
100            }
101        }
102    }
103}