Skip to main content

telltale_language/compiler/
grammar.rs

1//! Dynamic Pest Grammar Composition for Extensions
2//!
3//! This module provides a system for dynamically composing Pest grammars by merging
4//! the base choreographic grammar with extension-provided grammar rules.
5
6use crate::extensions::{ExtensionRegistry, GrammarExtension};
7use std::collections::HashSet;
8use std::fs;
9use std::path::Path;
10
11/// Manages dynamic composition of Pest grammars with extensions
12pub struct GrammarComposer {
13    base_grammar: String,
14    extension_registry: ExtensionRegistry,
15    /// Cache for composed grammar to avoid recomputation
16    cached_grammar: Option<String>,
17    /// Hash of current extension state for cache invalidation
18    extension_hash: u64,
19}
20
21impl GrammarComposer {
22    /// Create a new grammar composer with the base grammar
23    pub fn new() -> Self {
24        let base_grammar = include_str!("choreography.pest").to_string();
25        Self {
26            base_grammar,
27            extension_registry: ExtensionRegistry::new(),
28            cached_grammar: None,
29            extension_hash: 0,
30        }
31    }
32
33    /// Register an extension with the grammar composer.
34    ///
35    /// # Errors
36    ///
37    /// Returns an error if there's a priority conflict with an existing extension.
38    pub fn register_extension<T: GrammarExtension + 'static>(
39        &mut self,
40        extension: T,
41    ) -> Result<(), crate::extensions::ParseError> {
42        let result = self.extension_registry.register_grammar(extension);
43        // Invalidate cache when extensions change (even on failure for consistency)
44        self.invalidate_cache();
45        result
46    }
47
48    /// Invalidate the cached grammar and force recomputation
49    fn invalidate_cache(&mut self) {
50        self.cached_grammar = None;
51        self.extension_hash = self.compute_extension_hash();
52    }
53
54    /// Compute a hash of current extensions for cache invalidation
55    fn compute_extension_hash(&self) -> u64 {
56        use std::collections::hash_map::DefaultHasher;
57        use std::hash::{Hash, Hasher};
58
59        let mut hasher = DefaultHasher::new();
60
61        // Hash extension count and IDs for simple cache invalidation
62        self.extension_registry
63            .grammar_extensions()
64            .count()
65            .hash(&mut hasher);
66        for ext in self.extension_registry.grammar_extensions() {
67            ext.extension_id().hash(&mut hasher);
68            ext.priority().hash(&mut hasher);
69        }
70
71        hasher.finish()
72    }
73
74    /// Compose the final grammar including all registered extensions
75    pub fn compose(&mut self) -> Result<String, GrammarCompositionError> {
76        // Check if we can use cached grammar
77        let current_hash = self.compute_extension_hash();
78        if let Some(ref cached) = self.cached_grammar {
79            if current_hash == self.extension_hash {
80                return Ok(cached.clone());
81            }
82        }
83
84        // Recompute grammar
85        let composed = self.compose_uncached()?;
86
87        // Cache the result
88        self.cached_grammar = Some(composed.clone());
89        self.extension_hash = current_hash;
90
91        Ok(composed)
92    }
93
94    /// Compose grammar without using cache (for internal use)
95    fn compose_uncached(&self) -> Result<String, GrammarCompositionError> {
96        let mut composed = self.base_grammar.clone();
97
98        // Validate that we can safely extend the base grammar (cached validation)
99        self.validate_base_grammar_cached(&composed)?;
100
101        // Get all grammar extensions sorted by priority
102        let extension_rules = self.extension_registry.compose_grammar("");
103
104        if !extension_rules.trim().is_empty() {
105            // Inject extension rules into the statement rule (optimized)
106            composed = self.inject_extension_rules_optimized(composed, &extension_rules)?;
107        }
108
109        // Validate the final composed grammar (cached validation)
110        self.validate_composed_grammar_cached(&composed)?;
111
112        Ok(composed)
113    }
114
115    /// Inject extension rules into the base grammar
116    fn inject_extension_rules_optimized(
117        &self,
118        base_grammar: String,
119        extension_rules: &str,
120    ) -> Result<String, GrammarCompositionError> {
121        self.inject_extension_rules_via_lines(base_grammar, extension_rules)
122    }
123
124    fn inject_extension_rules_via_lines(
125        &self,
126        base_grammar: String,
127        extension_rules: &str,
128    ) -> Result<String, GrammarCompositionError> {
129        let extension_statements = self.extract_extension_statements_optimized(extension_rules)?;
130        let mut lines: Vec<String> = base_grammar.lines().map(|line| line.to_string()).collect();
131
132        if !extension_statements.is_empty() {
133            let (stmt_start, stmt_end) = find_statement_rule_bounds(&lines)?;
134            let indent = find_statement_indent(&lines, stmt_start, stmt_end);
135            let insert_lines: Vec<String> = extension_statements
136                .iter()
137                .map(|rule| format!("{indent}| {rule}"))
138                .collect();
139
140            lines.splice(stmt_end..stmt_end, insert_lines);
141        }
142
143        let mut composed = lines.join("\n");
144        composed.push('\n');
145        composed.push_str("// Extension Rules\n");
146        composed.push_str(extension_rules);
147        Ok(composed)
148    }
149
150    /// Extract statement rule names from extension grammar (optimized version)
151    fn extract_extension_statements_optimized(
152        &self,
153        extension_rules: &str,
154    ) -> Result<Vec<String>, GrammarCompositionError> {
155        let mut statements = Vec::new();
156
157        // Pre-allocate based on estimated number of rules
158        let estimated_rules = extension_rules.matches("_stmt = {").count();
159        statements.reserve(estimated_rules);
160
161        for line in extension_rules.lines() {
162            let line = line.trim();
163            if line.ends_with("_stmt = {") {
164                if let Some(equals_pos) = line.find('=') {
165                    let rule_name = line[..equals_pos].trim();
166                    statements.push(rule_name.to_string());
167                }
168            }
169        }
170
171        Ok(statements)
172    }
173
174    /// Validate that the base grammar has the required extension points
175    ///
176    /// Note: `validate_base_grammar_cached` is preferred for production use.
177    /// This method is kept for testing and API compatibility.
178    #[cfg(test)]
179    fn validate_base_grammar(&self, grammar: &str) -> Result<(), GrammarCompositionError> {
180        let required_rules = ["statement = _{", "send_stmt", "broadcast_stmt"];
181
182        for rule in &required_rules {
183            if !grammar.contains(rule) {
184                return Err(GrammarCompositionError::InvalidBaseGrammar(format!(
185                    "Missing required rule: {}",
186                    rule
187                )));
188            }
189        }
190
191        Ok(())
192    }
193
194    /// Optimized validation with static patterns and early exit
195    fn validate_base_grammar_cached(&self, grammar: &str) -> Result<(), GrammarCompositionError> {
196        // Use static patterns for better performance
197        const REQUIRED_PATTERNS: &[&str] = &["statement = _{", "send_stmt", "broadcast_stmt"];
198
199        for &pattern in REQUIRED_PATTERNS {
200            if !grammar.contains(pattern) {
201                return Err(GrammarCompositionError::InvalidBaseGrammar(format!(
202                    "Missing required rule: {}",
203                    pattern
204                )));
205            }
206        }
207
208        Ok(())
209    }
210
211    /// Validate the composed grammar for common issues
212    ///
213    /// Note: `validate_composed_grammar_cached` is preferred for production use.
214    /// This method is kept for testing and API compatibility.
215    #[cfg(test)]
216    fn validate_composed_grammar(&self, grammar: &str) -> Result<(), GrammarCompositionError> {
217        // Check for duplicate rule names
218        let mut rule_names = HashSet::new();
219
220        for line in grammar.lines() {
221            let line = line.trim();
222            if line.contains(" = {") && !line.starts_with("//") {
223                if let Some(rule_name) = line.split(" = {").next() {
224                    let rule_name = rule_name.trim();
225                    if rule_names.contains(rule_name) {
226                        return Err(GrammarCompositionError::DuplicateRule(
227                            rule_name.to_string(),
228                        ));
229                    }
230                    rule_names.insert(rule_name.to_string());
231                }
232            }
233        }
234
235        // Basic syntax validation (check balanced braces outside of string literals)
236        let (open_braces, close_braces) = count_braces_outside_quotes(grammar);
237
238        if open_braces != close_braces {
239            return Err(GrammarCompositionError::SyntaxError(
240                "Unbalanced braces in composed grammar".to_string(),
241            ));
242        }
243
244        Ok(())
245    }
246
247    /// Optimized validation with pre-allocated HashSet and fast brace counting
248    fn validate_composed_grammar_cached(
249        &self,
250        grammar: &str,
251    ) -> Result<(), GrammarCompositionError> {
252        // Pre-allocate HashSet with estimated capacity
253        let estimated_rules = grammar.matches(" = {").count();
254        let mut rule_names = HashSet::with_capacity(estimated_rules);
255
256        for line in grammar.lines() {
257            let line = line.trim();
258            if line.contains(" = {") && !line.starts_with("//") {
259                if let Some(rule_name) = line.split(" = {").next() {
260                    let rule_name = rule_name.trim();
261                    if !rule_names.insert(rule_name) {
262                        return Err(GrammarCompositionError::DuplicateRule(
263                            rule_name.to_string(),
264                        ));
265                    }
266                }
267            }
268        }
269
270        // Optimized brace counting (ignore braces inside string literals)
271        let (open_braces, close_braces) = count_braces_outside_quotes(grammar);
272
273        if open_braces != close_braces {
274            return Err(GrammarCompositionError::SyntaxError(
275                "Unbalanced braces in composed grammar".to_string(),
276            ));
277        }
278
279        Ok(())
280    }
281
282    /// Check if an extension rule exists
283    pub fn has_extension_rule(&self, rule_name: &str) -> bool {
284        self.extension_registry.can_handle(rule_name)
285    }
286
287    /// Get the number of registered extensions
288    pub fn extension_count(&self) -> usize {
289        self.extension_registry.grammar_extensions().count()
290    }
291
292    /// Write the composed grammar to a file for debugging
293    pub fn write_composed_grammar<P: AsRef<Path>>(
294        &mut self,
295        path: P,
296    ) -> Result<(), GrammarCompositionError> {
297        let composed = self.compose()?;
298        fs::write(path, composed).map_err(|e| {
299            GrammarCompositionError::IoError(format!("Failed to write grammar: {}", e))
300        })?;
301        Ok(())
302    }
303}
304
305fn count_braces_outside_quotes(grammar: &str) -> (usize, usize) {
306    let mut open_braces = 0usize;
307    let mut close_braces = 0usize;
308    let mut in_string = false;
309    let mut escape = false;
310
311    for ch in grammar.chars() {
312        if in_string {
313            if escape {
314                escape = false;
315            } else if ch == '\\' {
316                escape = true;
317            } else if ch == '"' {
318                in_string = false;
319            }
320            continue;
321        }
322
323        if ch == '"' {
324            in_string = true;
325            continue;
326        }
327
328        match ch {
329            '{' => open_braces += 1,
330            '}' => close_braces += 1,
331            _ => {}
332        }
333    }
334
335    (open_braces, close_braces)
336}
337
338fn find_statement_rule_bounds(lines: &[String]) -> Result<(usize, usize), GrammarCompositionError> {
339    let mut start = None;
340    for (idx, line) in lines.iter().enumerate() {
341        if line.trim_start().starts_with("statement = _{") {
342            start = Some(idx);
343            break;
344        }
345    }
346
347    let start = start.ok_or_else(|| {
348        GrammarCompositionError::InvalidBaseGrammar(
349            "Could not find statement rule in base grammar".to_string(),
350        )
351    })?;
352
353    for (idx, line) in lines.iter().enumerate().skip(start + 1) {
354        if line.trim_start().starts_with('}') {
355            return Ok((start, idx));
356        }
357    }
358
359    Err(GrammarCompositionError::InvalidBaseGrammar(
360        "Could not find end of statement rule in base grammar".to_string(),
361    ))
362}
363
364fn find_statement_indent(lines: &[String], start: usize, end: usize) -> String {
365    for line in lines.iter().take(end).skip(start + 1) {
366        let trimmed = line.trim_start();
367        if trimmed.starts_with('|') {
368            let indent_len = line.len().saturating_sub(trimmed.len());
369            return line[..indent_len].to_string();
370        }
371    }
372    "    ".to_string()
373}
374
375impl Default for GrammarComposer {
376    fn default() -> Self {
377        Self::new()
378    }
379}
380
381/// Errors that can occur during grammar composition
382#[derive(Debug, thiserror::Error)]
383pub enum GrammarCompositionError {
384    #[error("Invalid base grammar: {0}")]
385    InvalidBaseGrammar(String),
386
387    #[error("Duplicate rule name: {0}")]
388    DuplicateRule(String),
389
390    #[error("Syntax error in composed grammar: {0}")]
391    SyntaxError(String),
392
393    #[error("Extension conflict: {0}")]
394    ExtensionConflict(String),
395
396    #[error("IO error: {0}")]
397    IoError(String),
398}
399
400/// Builder pattern for constructing grammar composers with extensions
401pub struct GrammarComposerBuilder {
402    composer: GrammarComposer,
403}
404
405impl GrammarComposerBuilder {
406    pub fn new() -> Self {
407        Self {
408            composer: GrammarComposer::new(),
409        }
410    }
411
412    pub fn with_extension<T: GrammarExtension + 'static>(
413        mut self,
414        extension: T,
415    ) -> Result<Self, crate::extensions::ParseError> {
416        self.composer.register_extension(extension)?;
417        Ok(self)
418    }
419
420    pub fn build(self) -> GrammarComposer {
421        self.composer
422    }
423}
424
425impl Default for GrammarComposerBuilder {
426    fn default() -> Self {
427        Self::new()
428    }
429}
430
431#[cfg(test)]
432mod tests {
433    use super::*;
434    use crate::extensions::GrammarExtension;
435
436    #[derive(Debug)]
437    struct TestExtension;
438
439    impl GrammarExtension for TestExtension {
440        fn grammar_rules(&self) -> &'static str {
441            "audit_stmt = { \"audit\" ~ ident ~ block }"
442        }
443
444        fn statement_rules(&self) -> Vec<&'static str> {
445            vec!["audit_stmt"]
446        }
447
448        fn extension_id(&self) -> &'static str {
449            "test_timeout"
450        }
451    }
452
453    #[test]
454    fn test_grammar_composer_creation() {
455        let composer = GrammarComposer::new();
456        assert_eq!(composer.extension_count(), 0);
457        assert!(composer.base_grammar.contains("choreography"));
458        assert!(composer.base_grammar.contains("statement = _{"));
459    }
460
461    #[test]
462    fn test_extension_registration() {
463        let mut composer = GrammarComposer::new();
464        composer
465            .register_extension(TestExtension)
466            .expect("extension should register");
467        assert_eq!(composer.extension_count(), 1);
468        assert!(composer.has_extension_rule("audit_stmt"));
469    }
470
471    #[test]
472    fn test_grammar_composition() {
473        let mut composer = GrammarComposer::new();
474        composer
475            .register_extension(TestExtension)
476            .expect("extension should register");
477
478        let result = composer.compose();
479        assert!(result.is_ok(), "Grammar composition should succeed");
480
481        let composed = result.unwrap();
482        assert!(composed.contains("audit_stmt"));
483        assert!(composed.contains("choreography"));
484        assert!(composed.contains("// Extension Rules"));
485    }
486
487    #[test]
488    fn test_grammar_caching() {
489        let mut composer = GrammarComposer::new();
490        composer
491            .register_extension(TestExtension)
492            .expect("extension should register");
493
494        // First composition
495        let start = std::time::Instant::now();
496        let result1 = composer.compose();
497        let first_time = start.elapsed();
498        assert!(result1.is_ok());
499
500        // Second composition (should use cache)
501        let start = std::time::Instant::now();
502        let result2 = composer.compose();
503        let second_time = start.elapsed();
504        assert!(result2.is_ok());
505
506        // Results should be identical
507        assert_eq!(result1.unwrap(), result2.unwrap());
508
509        // Second call should be faster due to caching
510        // Note: This might not always be true due to system variations,
511        // but it's a reasonable performance expectation
512        println!(
513            "First composition: {:?}, Second (cached): {:?}",
514            first_time, second_time
515        );
516    }
517
518    #[test]
519    fn test_builder_pattern() {
520        let composer = GrammarComposerBuilder::new()
521            .with_extension(TestExtension)
522            .expect("test extension should register")
523            .build();
524
525        assert_eq!(composer.extension_count(), 1);
526        assert!(composer.has_extension_rule("audit_stmt"));
527    }
528
529    #[test]
530    fn test_validation() {
531        let mut composer = GrammarComposer::new();
532
533        // Test base grammar validation
534        let valid_result = composer.validate_base_grammar(&composer.base_grammar);
535        assert!(valid_result.is_ok(), "Base grammar should be valid");
536
537        // Test composed grammar validation
538        let composed = composer.compose().unwrap();
539        let validation_result = composer.validate_composed_grammar(&composed);
540        assert!(
541            validation_result.is_ok(),
542            "Composed grammar should be valid"
543        );
544    }
545}