typstify_parser/
typst_parser.rs

1//! Typst parser for converting Typst documents to HTML.
2//!
3//! This module provides Typst document parsing with frontmatter extraction
4//! and TOC generation. The actual Typst compilation requires setting up
5//! a proper TypstWorld which is deferred to the generator phase.
6
7use std::path::Path;
8
9use thiserror::Error;
10use typstify_core::{
11    content::{ParsedContent, TocEntry},
12    frontmatter::parse_typst_frontmatter,
13};
14
15/// Typst parsing errors.
16#[derive(Debug, Error)]
17pub enum TypstError {
18    /// Failed to parse frontmatter.
19    #[error("frontmatter error: {0}")]
20    Frontmatter(#[from] typstify_core::error::CoreError),
21
22    /// Typst compilation error.
23    #[error("typst compilation failed: {0}")]
24    Compilation(String),
25
26    /// SVG rendering error.
27    #[error("SVG rendering failed: {0}")]
28    Render(String),
29}
30
31/// Result type for Typst operations.
32pub type Result<T> = std::result::Result<T, TypstError>;
33
34/// Typst parser that extracts frontmatter and prepares content for compilation.
35///
36/// Note: Full Typst compilation requires a proper World implementation with
37/// file system access and font loading. This parser focuses on:
38/// - Extracting frontmatter from Typst comment syntax
39/// - Extracting TOC from heading patterns
40/// - Preparing content for later compilation
41#[derive(Debug)]
42pub struct TypstParser {
43    /// Whether to extract TOC from headings.
44    extract_toc: bool,
45}
46
47impl Default for TypstParser {
48    fn default() -> Self {
49        Self::new()
50    }
51}
52
53impl TypstParser {
54    /// Create a new Typst parser.
55    pub fn new() -> Self {
56        Self { extract_toc: true }
57    }
58
59    /// Parse a Typst document with frontmatter.
60    ///
61    /// This extracts frontmatter and TOC but does not perform full compilation.
62    /// The HTML field will contain the raw Typst source wrapped in a code block
63    /// for preview, or can be compiled later with a proper World implementation.
64    pub fn parse(&self, content: &str, path: &Path) -> Result<ParsedContent> {
65        // Parse frontmatter from Typst comments
66        let (frontmatter, body) = parse_typst_frontmatter(content, path)?;
67
68        // Extract TOC from source
69        let toc = if self.extract_toc {
70            self.extract_toc_from_source(&body)
71        } else {
72            Vec::new()
73        };
74
75        // For now, wrap the Typst source in a placeholder
76        // Full compilation will be done in the generator with proper World setup
77        let html = format!(
78            "<div class=\"typst-source\" data-path=\"{}\">\n<pre><code class=\"language-typst\">{}</code></pre>\n</div>",
79            path.display(),
80            html_escape(&body)
81        );
82
83        Ok(ParsedContent {
84            frontmatter,
85            html,
86            raw: body,
87            toc,
88        })
89    }
90
91    /// Extract TOC entries from Typst source (simple heuristic).
92    fn extract_toc_from_source(&self, content: &str) -> Vec<TocEntry> {
93        let mut toc = Vec::new();
94
95        for line in content.lines() {
96            let trimmed = line.trim();
97
98            // Match Typst headings: = Title, == Subtitle, etc.
99            if let Some(heading) = parse_typst_heading(trimmed) {
100                toc.push(heading);
101            }
102        }
103
104        toc
105    }
106}
107
108/// Parse a Typst heading line into a TocEntry.
109fn parse_typst_heading(line: &str) -> Option<TocEntry> {
110    if !line.starts_with('=') {
111        return None;
112    }
113
114    // Count the number of = at the start
115    let level = line.chars().take_while(|c| *c == '=').count();
116    if level == 0 || level > 6 {
117        return None;
118    }
119
120    // Extract the heading text
121    let text = line[level..].trim().to_string();
122    if text.is_empty() {
123        return None;
124    }
125
126    // Generate a slug from the text
127    let id = slugify(&text);
128
129    Some(TocEntry {
130        level: level as u8,
131        text,
132        id,
133    })
134}
135
136/// Convert text to a URL-safe slug.
137fn slugify(text: &str) -> String {
138    text.to_lowercase()
139        .chars()
140        .map(|c| {
141            if c.is_alphanumeric() {
142                c
143            } else if c.is_whitespace() || c == '-' || c == '_' {
144                '-'
145            } else {
146                '\0'
147            }
148        })
149        .filter(|c| *c != '\0')
150        .collect::<String>()
151        .split('-')
152        .filter(|s| !s.is_empty())
153        .collect::<Vec<_>>()
154        .join("-")
155}
156
157/// Escape HTML special characters.
158fn html_escape(s: &str) -> String {
159    s.replace('&', "&amp;")
160        .replace('<', "&lt;")
161        .replace('>', "&gt;")
162        .replace('"', "&quot;")
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    #[test]
170    fn test_parse_typst_heading() {
171        let h1 = parse_typst_heading("= Introduction").unwrap();
172        assert_eq!(h1.level, 1);
173        assert_eq!(h1.text, "Introduction");
174
175        let h2 = parse_typst_heading("== Sub Section").unwrap();
176        assert_eq!(h2.level, 2);
177        assert_eq!(h2.text, "Sub Section");
178
179        assert!(parse_typst_heading("Not a heading").is_none());
180        assert!(parse_typst_heading("=").is_none()); // Empty heading
181    }
182
183    #[test]
184    fn test_slugify() {
185        assert_eq!(slugify("Hello World"), "hello-world");
186        assert_eq!(slugify("Test 123"), "test-123");
187    }
188
189    #[test]
190    fn test_extract_toc() {
191        let parser = TypstParser::new();
192        let content = r#"= Main Title
193== Section One
194=== Subsection
195== Section Two"#;
196
197        let toc = parser.extract_toc_from_source(content);
198
199        assert_eq!(toc.len(), 4);
200        assert_eq!(toc[0].level, 1);
201        assert_eq!(toc[0].text, "Main Title");
202        assert_eq!(toc[1].level, 2);
203        assert_eq!(toc[2].level, 3);
204    }
205
206    #[test]
207    fn test_parse_with_frontmatter() {
208        let parser = TypstParser::new();
209        let content = r#"// typstify:frontmatter
210// title: "Test Document"
211
212= Hello Typst
213
214This is a test document."#;
215
216        let result = parser.parse(content, Path::new("test.typ")).unwrap();
217
218        assert_eq!(result.frontmatter.title, "Test Document");
219        assert!(!result.toc.is_empty());
220        assert!(result.html.contains("typst-source"));
221    }
222
223    #[test]
224    fn test_html_escape() {
225        assert_eq!(html_escape("<script>"), "&lt;script&gt;");
226        assert_eq!(html_escape("a & b"), "a &amp; b");
227    }
228}