ass_editor/extensions/builtin/
syntax_highlight.rs

1//! Built-in syntax highlighting extension for ASS/SSA files
2//!
3//! Provides syntax highlighting for ASS subtitle format, including:
4//! - Section headers (`[Script Info]`, `[Styles]`, `[Events]`)
5//! - Field names and values
6//! - Override tags and their parameters
7//! - Comments and special formatting
8
9use crate::core::{EditorDocument, Position, Range, Result};
10use crate::extensions::{
11    EditorExtension, ExtensionCapability, ExtensionCommand, ExtensionContext, ExtensionInfo,
12    ExtensionResult, ExtensionState, MessageLevel,
13};
14
15#[cfg(not(feature = "std"))]
16use alloc::{
17    collections::BTreeMap as HashMap,
18    format,
19    string::{String, ToString},
20    vec,
21    vec::Vec,
22};
23#[cfg(feature = "std")]
24use std::collections::HashMap;
25
26/// Token types for syntax highlighting
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum TokenType {
29    /// Section headers like [Script Info]
30    SectionHeader,
31    /// Field names like Title:, PlayResX:
32    FieldName,
33    /// Field values
34    FieldValue,
35    /// Event type (Dialogue, Comment)
36    EventType,
37    /// Style name references
38    StyleName,
39    /// Time codes
40    TimeCode,
41    /// Override tags like {\pos(100,200)}
42    OverrideTag,
43    /// Tag parameters
44    TagParameter,
45    /// Comments
46    Comment,
47    /// Plain text
48    Text,
49    /// Errors or invalid syntax
50    Error,
51}
52
53impl TokenType {
54    /// Get CSS class name for web-based highlighting
55    pub fn css_class(&self) -> &'static str {
56        match self {
57            Self::SectionHeader => "ass-section-header",
58            Self::FieldName => "ass-field-name",
59            Self::FieldValue => "ass-field-value",
60            Self::EventType => "ass-event-type",
61            Self::StyleName => "ass-style-name",
62            Self::TimeCode => "ass-timecode",
63            Self::OverrideTag => "ass-override-tag",
64            Self::TagParameter => "ass-tag-param",
65            Self::Comment => "ass-comment",
66            Self::Text => "ass-text",
67            Self::Error => "ass-error",
68        }
69    }
70
71    /// Get ANSI color code for terminal highlighting
72    pub fn ansi_color(&self) -> &'static str {
73        match self {
74            Self::SectionHeader => "\x1b[1;34m", // Bright Blue
75            Self::FieldName => "\x1b[36m",       // Cyan
76            Self::FieldValue => "\x1b[37m",      // White
77            Self::EventType => "\x1b[1;32m",     // Bright Green
78            Self::StyleName => "\x1b[35m",       // Magenta
79            Self::TimeCode => "\x1b[33m",        // Yellow
80            Self::OverrideTag => "\x1b[1;31m",   // Bright Red
81            Self::TagParameter => "\x1b[31m",    // Red
82            Self::Comment => "\x1b[90m",         // Bright Black (Gray)
83            Self::Text => "\x1b[0m",             // Reset
84            Self::Error => "\x1b[1;91m",         // Bright Red
85        }
86    }
87}
88
89/// A highlighted token in the document
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct HighlightToken {
92    /// Range of the token in the document
93    pub range: Range,
94    /// Type of the token
95    pub token_type: TokenType,
96    /// Optional semantic information
97    pub semantic_info: Option<String>,
98}
99
100/// Syntax highlighting extension
101pub struct SyntaxHighlightExtension {
102    info: ExtensionInfo,
103    state: ExtensionState,
104    /// Cached tokens for performance
105    token_cache: HashMap<String, Vec<HighlightToken>>,
106    /// Configuration
107    config: SyntaxHighlightConfig,
108}
109
110/// Configuration for syntax highlighting
111#[derive(Debug, Clone)]
112pub struct SyntaxHighlightConfig {
113    /// Enable semantic highlighting (slower but more accurate)
114    pub semantic_highlighting: bool,
115    /// Highlight override tags
116    pub highlight_tags: bool,
117    /// Highlight errors
118    pub highlight_errors: bool,
119    /// Maximum tokens to process (0 = unlimited)
120    pub max_tokens: usize,
121}
122
123impl Default for SyntaxHighlightConfig {
124    fn default() -> Self {
125        Self {
126            semantic_highlighting: true,
127            highlight_tags: true,
128            highlight_errors: true,
129            max_tokens: 10000,
130        }
131    }
132}
133
134impl SyntaxHighlightExtension {
135    /// Create a new syntax highlighting extension
136    pub fn new() -> Self {
137        let info = ExtensionInfo::new(
138            "syntax-highlight".to_string(),
139            "1.0.0".to_string(),
140            "ASS-RS Team".to_string(),
141            "Built-in syntax highlighting for ASS/SSA files".to_string(),
142        )
143        .with_capability(ExtensionCapability::SyntaxHighlighting)
144        .with_license("MIT".to_string());
145
146        Self {
147            info,
148            state: ExtensionState::Uninitialized,
149            token_cache: HashMap::new(),
150            config: SyntaxHighlightConfig::default(),
151        }
152    }
153
154    /// Tokenize a document
155    pub fn tokenize_document(&mut self, document: &EditorDocument) -> Result<Vec<HighlightToken>> {
156        let content = document.text();
157        let doc_id = document.id();
158
159        // Check cache
160        if let Some(cached_tokens) = self.token_cache.get(doc_id) {
161            return Ok(cached_tokens.clone());
162        }
163
164        let mut tokens = Vec::new();
165        let mut current_section = None;
166        let mut line_start = 0;
167
168        for line in content.lines() {
169            let line_range = Range::new(
170                Position::new(line_start),
171                Position::new(line_start + line.len()),
172            );
173
174            // Handle section headers
175            if line.starts_with('[') && line.ends_with(']') {
176                tokens.push(HighlightToken {
177                    range: line_range,
178                    token_type: TokenType::SectionHeader,
179                    semantic_info: Some(line[1..line.len() - 1].to_string()),
180                });
181                current_section = Some(line[1..line.len() - 1].to_string());
182            }
183            // Handle comments (only lines starting with semicolon)
184            else if line.starts_with(';') {
185                tokens.push(HighlightToken {
186                    range: line_range,
187                    token_type: TokenType::Comment,
188                    semantic_info: None,
189                });
190            }
191            // Handle fields based on current section
192            else if let Some(ref section) = current_section {
193                match section.as_str() {
194                    "Script Info" | "Aegisub Project Garbage" => {
195                        self.tokenize_info_line(&mut tokens, line, line_start)?;
196                    }
197                    "V4+ Styles" | "V4 Styles" => {
198                        if line.starts_with("Format:") {
199                            self.tokenize_format_line(&mut tokens, line, line_start)?;
200                        } else if line.starts_with("Style:") {
201                            self.tokenize_style_line(&mut tokens, line, line_start)?;
202                        }
203                    }
204                    "Events" => {
205                        if line.starts_with("Format:") {
206                            self.tokenize_format_line(&mut tokens, line, line_start)?;
207                        } else if line.starts_with("Dialogue:") || line.starts_with("Comment:") {
208                            self.tokenize_event_line(&mut tokens, line, line_start)?;
209                        }
210                    }
211                    _ => {
212                        // Unknown section - highlight as text
213                        tokens.push(HighlightToken {
214                            range: line_range,
215                            token_type: TokenType::Text,
216                            semantic_info: None,
217                        });
218                    }
219                }
220            }
221
222            line_start += line.len() + 1; // +1 for newline
223
224            // Check token limit
225            if self.config.max_tokens > 0 && tokens.len() >= self.config.max_tokens {
226                break;
227            }
228        }
229
230        // Cache the tokens
231        self.token_cache.insert(doc_id.to_string(), tokens.clone());
232
233        Ok(tokens)
234    }
235
236    /// Tokenize a Script Info line
237    fn tokenize_info_line(
238        &self,
239        tokens: &mut Vec<HighlightToken>,
240        line: &str,
241        line_start: usize,
242    ) -> Result<()> {
243        if let Some(colon_pos) = line.find(':') {
244            // Field name
245            tokens.push(HighlightToken {
246                range: Range::new(
247                    Position::new(line_start),
248                    Position::new(line_start + colon_pos + 1),
249                ),
250                token_type: TokenType::FieldName,
251                semantic_info: Some(line[..colon_pos].to_string()),
252            });
253
254            // Field value
255            let value_start = line_start + colon_pos + 1;
256            let value = line[colon_pos + 1..].trim_start();
257            if !value.is_empty() {
258                tokens.push(HighlightToken {
259                    range: Range::new(
260                        Position::new(value_start),
261                        Position::new(line_start + line.len()),
262                    ),
263                    token_type: TokenType::FieldValue,
264                    semantic_info: None,
265                });
266            }
267        }
268        Ok(())
269    }
270
271    /// Tokenize a Format line
272    fn tokenize_format_line(
273        &self,
274        tokens: &mut Vec<HighlightToken>,
275        line: &str,
276        line_start: usize,
277    ) -> Result<()> {
278        // "Format:" part
279        tokens.push(HighlightToken {
280            range: Range::new(Position::new(line_start), Position::new(line_start + 7)),
281            token_type: TokenType::FieldName,
282            semantic_info: Some("Format".to_string()),
283        });
284
285        // Rest is field value
286        tokens.push(HighlightToken {
287            range: Range::new(
288                Position::new(line_start + 7),
289                Position::new(line_start + line.len()),
290            ),
291            token_type: TokenType::FieldValue,
292            semantic_info: None,
293        });
294
295        Ok(())
296    }
297
298    /// Tokenize a Style line
299    fn tokenize_style_line(
300        &self,
301        tokens: &mut Vec<HighlightToken>,
302        line: &str,
303        line_start: usize,
304    ) -> Result<()> {
305        // "Style:" part
306        tokens.push(HighlightToken {
307            range: Range::new(Position::new(line_start), Position::new(line_start + 6)),
308            token_type: TokenType::FieldName,
309            semantic_info: Some("Style".to_string()),
310        });
311
312        // Parse style fields
313        let fields = line[6..].trim_start().split(',');
314        let mut field_start = line_start + 6;
315
316        for (i, field) in fields.enumerate() {
317            let field_len = field.len();
318
319            // First field is style name
320            if i == 0 {
321                tokens.push(HighlightToken {
322                    range: Range::new(
323                        Position::new(field_start),
324                        Position::new(field_start + field_len),
325                    ),
326                    token_type: TokenType::StyleName,
327                    semantic_info: Some(field.trim().to_string()),
328                });
329            } else {
330                tokens.push(HighlightToken {
331                    range: Range::new(
332                        Position::new(field_start),
333                        Position::new(field_start + field_len),
334                    ),
335                    token_type: TokenType::FieldValue,
336                    semantic_info: None,
337                });
338            }
339
340            field_start += field_len + 1; // +1 for comma
341        }
342
343        Ok(())
344    }
345
346    /// Tokenize an Event line
347    fn tokenize_event_line(
348        &self,
349        tokens: &mut Vec<HighlightToken>,
350        line: &str,
351        line_start: usize,
352    ) -> Result<()> {
353        let event_type = if line.starts_with("Dialogue:") {
354            "Dialogue"
355        } else {
356            "Comment"
357        };
358
359        // Event type
360        let type_len = event_type.len() + 1; // +1 for colon
361        tokens.push(HighlightToken {
362            range: Range::new(
363                Position::new(line_start),
364                Position::new(line_start + type_len),
365            ),
366            token_type: TokenType::EventType,
367            semantic_info: Some(event_type.to_string()),
368        });
369
370        // Parse event fields
371        let fields_start = line_start + type_len;
372        let fields_text = &line[type_len..];
373
374        // Find the text field (last field after 9 commas)
375        let mut comma_count = 0;
376        let mut text_start = None;
377
378        for (i, ch) in fields_text.char_indices() {
379            if ch == ',' {
380                comma_count += 1;
381                if comma_count == 9 {
382                    text_start = Some(i + 1);
383                    break;
384                }
385            }
386        }
387
388        // Tokenize fields before text
389        if let Some(text_offset) = text_start {
390            let pre_text = &fields_text[..text_offset];
391            let mut field_start = fields_start;
392
393            for (i, field) in pre_text.split(',').enumerate() {
394                let field_len = field.len();
395
396                match i {
397                    1 | 2 => {
398                        // Start and End times
399                        tokens.push(HighlightToken {
400                            range: Range::new(
401                                Position::new(field_start),
402                                Position::new(field_start + field_len),
403                            ),
404                            token_type: TokenType::TimeCode,
405                            semantic_info: None,
406                        });
407                    }
408                    3 => {
409                        // Style name
410                        tokens.push(HighlightToken {
411                            range: Range::new(
412                                Position::new(field_start),
413                                Position::new(field_start + field_len),
414                            ),
415                            token_type: TokenType::StyleName,
416                            semantic_info: Some(field.trim().to_string()),
417                        });
418                    }
419                    _ => {
420                        // Other fields
421                        tokens.push(HighlightToken {
422                            range: Range::new(
423                                Position::new(field_start),
424                                Position::new(field_start + field_len),
425                            ),
426                            token_type: TokenType::FieldValue,
427                            semantic_info: None,
428                        });
429                    }
430                }
431
432                field_start += field_len + 1; // +1 for comma
433            }
434
435            // Tokenize text field with override tags
436            if self.config.highlight_tags {
437                let text_field = &fields_text[text_offset..];
438                self.tokenize_text_with_tags(tokens, text_field, fields_start + text_offset)?;
439            } else {
440                // Just mark as text
441                tokens.push(HighlightToken {
442                    range: Range::new(
443                        Position::new(fields_start + text_offset),
444                        Position::new(line_start + line.len()),
445                    ),
446                    token_type: TokenType::Text,
447                    semantic_info: None,
448                });
449            }
450        }
451
452        Ok(())
453    }
454
455    /// Tokenize text with override tags
456    fn tokenize_text_with_tags(
457        &self,
458        tokens: &mut Vec<HighlightToken>,
459        text: &str,
460        text_start: usize,
461    ) -> Result<()> {
462        let mut pos = 0;
463        let bytes = text.as_bytes();
464
465        while pos < bytes.len() {
466            if bytes[pos] == b'{' {
467                // Find matching }
468                if let Some(end_pos) = text[pos..].find('}') {
469                    let tag_content = &text[pos + 1..pos + end_pos];
470
471                    // Opening brace
472                    tokens.push(HighlightToken {
473                        range: Range::new(
474                            Position::new(text_start + pos),
475                            Position::new(text_start + pos + 1),
476                        ),
477                        token_type: TokenType::OverrideTag,
478                        semantic_info: None,
479                    });
480
481                    // Tag content
482                    self.tokenize_tag_content(tokens, tag_content, text_start + pos + 1)?;
483
484                    // Closing brace
485                    tokens.push(HighlightToken {
486                        range: Range::new(
487                            Position::new(text_start + pos + end_pos),
488                            Position::new(text_start + pos + end_pos + 1),
489                        ),
490                        token_type: TokenType::OverrideTag,
491                        semantic_info: None,
492                    });
493
494                    pos += end_pos + 1;
495                } else {
496                    // Unclosed tag - mark as error
497                    tokens.push(HighlightToken {
498                        range: Range::new(
499                            Position::new(text_start + pos),
500                            Position::new(text_start + text.len()),
501                        ),
502                        token_type: TokenType::Error,
503                        semantic_info: Some("Unclosed override tag".to_string()),
504                    });
505                    break;
506                }
507            } else {
508                // Find next tag or end of text
509                let next_tag = text[pos..].find('{').unwrap_or(text.len() - pos);
510
511                if next_tag > 0 {
512                    tokens.push(HighlightToken {
513                        range: Range::new(
514                            Position::new(text_start + pos),
515                            Position::new(text_start + pos + next_tag),
516                        ),
517                        token_type: TokenType::Text,
518                        semantic_info: None,
519                    });
520                }
521
522                pos += next_tag;
523            }
524        }
525
526        Ok(())
527    }
528
529    /// Tokenize tag content
530    fn tokenize_tag_content(
531        &self,
532        tokens: &mut Vec<HighlightToken>,
533        content: &str,
534        content_start: usize,
535    ) -> Result<()> {
536        // Simple tag parsing - could be enhanced
537        let parts = content.split('\\').filter(|s| !s.is_empty());
538        let mut pos = 0;
539
540        for part in parts {
541            // Skip initial backslash positions
542            while pos < content.len() && content.as_bytes()[pos] == b'\\' {
543                pos += 1;
544            }
545
546            if pos >= content.len() {
547                break;
548            }
549
550            // Find tag name and parameters
551            let tag_end = part
552                .find(|c: char| !c.is_alphanumeric())
553                .unwrap_or(part.len());
554
555            if tag_end > 0 {
556                // Tag name
557                tokens.push(HighlightToken {
558                    range: Range::new(
559                        Position::new(content_start + pos),
560                        Position::new(content_start + pos + tag_end),
561                    ),
562                    token_type: TokenType::OverrideTag,
563                    semantic_info: Some(part[..tag_end].to_string()),
564                });
565
566                // Parameters
567                if tag_end < part.len() {
568                    tokens.push(HighlightToken {
569                        range: Range::new(
570                            Position::new(content_start + pos + tag_end),
571                            Position::new(content_start + pos + part.len()),
572                        ),
573                        token_type: TokenType::TagParameter,
574                        semantic_info: None,
575                    });
576                }
577            }
578
579            pos += part.len() + 1; // +1 for backslash
580        }
581
582        Ok(())
583    }
584
585    /// Clear token cache
586    pub fn clear_cache(&mut self) {
587        self.token_cache.clear();
588    }
589
590    /// Invalidate cache for a specific document
591    pub fn invalidate_document(&mut self, doc_id: &str) {
592        self.token_cache.remove(doc_id);
593    }
594}
595
596impl Default for SyntaxHighlightExtension {
597    fn default() -> Self {
598        Self::new()
599    }
600}
601
602impl EditorExtension for SyntaxHighlightExtension {
603    fn info(&self) -> &ExtensionInfo {
604        &self.info
605    }
606
607    fn initialize(&mut self, context: &mut dyn ExtensionContext) -> Result<()> {
608        self.state = ExtensionState::Active;
609
610        // Load configuration
611        if let Some(semantic) = context.get_config("syntax.semantic_highlighting") {
612            self.config.semantic_highlighting = semantic == "true";
613        }
614        if let Some(tags) = context.get_config("syntax.highlight_tags") {
615            self.config.highlight_tags = tags == "true";
616        }
617        if let Some(errors) = context.get_config("syntax.highlight_errors") {
618            self.config.highlight_errors = errors == "true";
619        }
620        if let Some(max_tokens) = context.get_config("syntax.max_tokens") {
621            if let Ok(max) = max_tokens.parse() {
622                self.config.max_tokens = max;
623            }
624        }
625
626        context.show_message("Syntax highlighting initialized", MessageLevel::Info)?;
627        Ok(())
628    }
629
630    fn shutdown(&mut self, _context: &mut dyn ExtensionContext) -> Result<()> {
631        self.state = ExtensionState::Shutdown;
632        self.clear_cache();
633        Ok(())
634    }
635
636    fn state(&self) -> ExtensionState {
637        self.state
638    }
639
640    fn execute_command(
641        &mut self,
642        command_id: &str,
643        _args: &HashMap<String, String>,
644        context: &mut dyn ExtensionContext,
645    ) -> Result<ExtensionResult> {
646        match command_id {
647            "syntax.highlight" => {
648                if let Some(doc) = context.current_document() {
649                    let tokens = self.tokenize_document(doc)?;
650                    Ok(ExtensionResult::success_with_message(format!(
651                        "Document highlighted with {} tokens",
652                        tokens.len()
653                    )))
654                } else {
655                    Ok(ExtensionResult::failure(
656                        "No active document to highlight".to_string(),
657                    ))
658                }
659            }
660            "syntax.clear_cache" => {
661                self.clear_cache();
662                Ok(ExtensionResult::success_with_message(
663                    "Syntax highlight cache cleared".to_string(),
664                ))
665            }
666            "syntax.get_tokens" => {
667                if let Some(doc) = context.current_document() {
668                    let tokens = self.tokenize_document(doc)?;
669                    let mut result = ExtensionResult::success_with_message(format!(
670                        "Found {} tokens",
671                        tokens.len()
672                    ));
673                    result
674                        .data
675                        .insert("token_count".to_string(), tokens.len().to_string());
676                    Ok(result)
677                } else {
678                    Ok(ExtensionResult::failure("No active document".to_string()))
679                }
680            }
681            _ => Ok(ExtensionResult::failure(format!(
682                "Unknown command: {command_id}"
683            ))),
684        }
685    }
686
687    fn commands(&self) -> Vec<ExtensionCommand> {
688        vec![
689            ExtensionCommand::new(
690                "syntax.highlight".to_string(),
691                "Highlight Document".to_string(),
692                "Apply syntax highlighting to the current document".to_string(),
693            )
694            .with_category("Syntax".to_string()),
695            ExtensionCommand::new(
696                "syntax.clear_cache".to_string(),
697                "Clear Highlight Cache".to_string(),
698                "Clear the syntax highlighting cache".to_string(),
699            )
700            .with_category("Syntax".to_string())
701            .requires_document(false),
702            ExtensionCommand::new(
703                "syntax.get_tokens".to_string(),
704                "Get Highlight Tokens".to_string(),
705                "Get syntax highlighting tokens for the current document".to_string(),
706            )
707            .with_category("Syntax".to_string()),
708        ]
709    }
710
711    fn config_schema(&self) -> HashMap<String, String> {
712        let mut schema = HashMap::new();
713        schema.insert(
714            "syntax.semantic_highlighting".to_string(),
715            "boolean".to_string(),
716        );
717        schema.insert("syntax.highlight_tags".to_string(), "boolean".to_string());
718        schema.insert("syntax.highlight_errors".to_string(), "boolean".to_string());
719        schema.insert("syntax.max_tokens".to_string(), "number".to_string());
720        schema
721    }
722}
723
724#[cfg(test)]
725mod tests {
726    use super::*;
727    #[cfg(not(feature = "std"))]
728    use alloc::string::ToString;
729
730    #[test]
731    fn test_token_types() {
732        assert_eq!(TokenType::SectionHeader.css_class(), "ass-section-header");
733        assert_eq!(TokenType::OverrideTag.ansi_color(), "\x1b[1;31m");
734    }
735
736    #[test]
737    fn test_syntax_highlight_extension_creation() {
738        let ext = SyntaxHighlightExtension::new();
739        assert_eq!(ext.info().name, "syntax-highlight");
740        assert!(ext
741            .info()
742            .has_capability(&ExtensionCapability::SyntaxHighlighting));
743    }
744
745    #[test]
746    fn test_simple_tokenization() {
747        let mut ext = SyntaxHighlightExtension::new();
748        let doc = EditorDocument::from_content("[Script Info]\nTitle: Test").unwrap();
749
750        let tokens = ext.tokenize_document(&doc).unwrap();
751        assert!(!tokens.is_empty());
752
753        // First token should be section header
754        assert_eq!(tokens[0].token_type, TokenType::SectionHeader);
755        assert_eq!(tokens[0].semantic_info, Some("Script Info".to_string()));
756    }
757
758    #[test]
759    fn test_config_schema() {
760        let ext = SyntaxHighlightExtension::new();
761        let schema = ext.config_schema();
762
763        assert!(schema.contains_key("syntax.semantic_highlighting"));
764        assert!(schema.contains_key("syntax.highlight_tags"));
765    }
766}
767
768// Include extended tests
769#[cfg(test)]
770#[path = "syntax_highlight_tests.rs"]
771mod extended_tests;