Skip to main content

ass_editor/extensions/builtin/syntax_highlight/
document.rs

1//! Top-level document tokenization driving the per-line tokenizers.
2
3use super::{HighlightToken, SyntaxHighlightExtension, TokenType};
4use crate::core::{EditorDocument, Position, Range, Result};
5
6#[cfg(not(feature = "std"))]
7use alloc::{string::ToString, vec::Vec};
8
9impl SyntaxHighlightExtension {
10    /// Tokenize a document
11    pub fn tokenize_document(&mut self, document: &EditorDocument) -> Result<Vec<HighlightToken>> {
12        let content = document.text();
13        let doc_id = document.id();
14
15        // Check cache
16        if let Some(cached_tokens) = self.token_cache.get(doc_id) {
17            return Ok(cached_tokens.clone());
18        }
19
20        let mut tokens = Vec::new();
21        let mut current_section = None;
22        let mut line_start = 0;
23
24        for line in content.lines() {
25            let line_range = Range::new(
26                Position::new(line_start),
27                Position::new(line_start + line.len()),
28            );
29
30            // Handle section headers
31            if line.starts_with('[') && line.ends_with(']') {
32                tokens.push(HighlightToken {
33                    range: line_range,
34                    token_type: TokenType::SectionHeader,
35                    semantic_info: Some(line[1..line.len() - 1].to_string()),
36                });
37                current_section = Some(line[1..line.len() - 1].to_string());
38            }
39            // Handle comments (only lines starting with semicolon)
40            else if line.starts_with(';') {
41                tokens.push(HighlightToken {
42                    range: line_range,
43                    token_type: TokenType::Comment,
44                    semantic_info: None,
45                });
46            }
47            // Handle fields based on current section
48            else if let Some(ref section) = current_section {
49                match section.as_str() {
50                    "Script Info" | "Aegisub Project Garbage" => {
51                        self.tokenize_info_line(&mut tokens, line, line_start)?;
52                    }
53                    "V4+ Styles" | "V4 Styles" => {
54                        if line.starts_with("Format:") {
55                            self.tokenize_format_line(&mut tokens, line, line_start)?;
56                        } else if line.starts_with("Style:") {
57                            self.tokenize_style_line(&mut tokens, line, line_start)?;
58                        }
59                    }
60                    "Events" => {
61                        if line.starts_with("Format:") {
62                            self.tokenize_format_line(&mut tokens, line, line_start)?;
63                        } else if line.starts_with("Dialogue:") || line.starts_with("Comment:") {
64                            self.tokenize_event_line(&mut tokens, line, line_start)?;
65                        }
66                    }
67                    _ => {
68                        // Unknown section - highlight as text
69                        tokens.push(HighlightToken {
70                            range: line_range,
71                            token_type: TokenType::Text,
72                            semantic_info: None,
73                        });
74                    }
75                }
76            }
77
78            line_start += line.len() + 1; // +1 for newline
79
80            // Check token limit
81            if self.config.max_tokens > 0 && tokens.len() >= self.config.max_tokens {
82                break;
83            }
84        }
85
86        // Cache the tokens
87        self.token_cache.insert(doc_id.to_string(), tokens.clone());
88
89        Ok(tokens)
90    }
91}