Skip to main content

telltale_language/compiler/
grammar.rs

1//! Dynamic Pest Grammar Composition for Extensions
2//!
3//! This module provides a system for dynamically composing Pest grammars by merging
4//! the base choreographic grammar with extension-provided grammar rules.
5
6use crate::extensions::{ExtensionRegistry, GrammarExtension};
7use std::collections::HashSet;
8use std::fs;
9use std::path::Path;
10
11/// Manages dynamic composition of Pest grammars with extensions
12pub struct GrammarComposer {
13    base_grammar: String,
14    extension_registry: ExtensionRegistry,
15    /// Cache for composed grammar to avoid recomputation
16    cached_grammar: Option<String>,
17    /// Hash of current extension state for cache invalidation
18    extension_hash: u64,
19}
20
21impl GrammarComposer {
22    /// Create a new grammar composer with the base grammar
23    pub fn new() -> Self {
24        let base_grammar = include_str!("choreography.pest").to_string();
25        Self {
26            base_grammar,
27            extension_registry: ExtensionRegistry::new(),
28            cached_grammar: None,
29            extension_hash: 0,
30        }
31    }
32
33    /// Register an extension with the grammar composer.
34    ///
35    /// # Errors
36    ///
37    /// Returns an error if there's a priority conflict with an existing extension.
38    pub fn register_extension<T: GrammarExtension + 'static>(
39        &mut self,
40        extension: T,
41    ) -> Result<(), crate::extensions::ParseError> {
42        let result = self.extension_registry.register_grammar(extension);
43        // Invalidate cache when extensions change (even on failure for consistency)
44        self.invalidate_cache();
45        result
46    }
47
48    /// Invalidate the cached grammar and force recomputation
49    fn invalidate_cache(&mut self) {
50        self.cached_grammar = None;
51        self.extension_hash = self.compute_extension_hash();
52    }
53
54    /// Compute a hash of current extensions for cache invalidation
55    fn compute_extension_hash(&self) -> u64 {
56        use std::collections::hash_map::DefaultHasher;
57        use std::hash::{Hash, Hasher};
58
59        let mut hasher = DefaultHasher::new();
60
61        // Hash extension count and IDs for simple cache invalidation
62        self.extension_registry
63            .grammar_extensions()
64            .count()
65            .hash(&mut hasher);
66        for ext in self.extension_registry.grammar_extensions() {
67            ext.extension_id().hash(&mut hasher);
68            ext.priority().hash(&mut hasher);
69        }
70
71        hasher.finish()
72    }
73
74    /// Compose the final grammar including all registered extensions
75    pub fn compose(&mut self) -> Result<String, GrammarCompositionError> {
76        // Check if we can use cached grammar
77        let current_hash = self.compute_extension_hash();
78        if let Some(ref cached) = self.cached_grammar {
79            if current_hash == self.extension_hash {
80                return Ok(cached.clone());
81            }
82        }
83
84        // Recompute grammar
85        let composed = self.compose_uncached()?;
86
87        // Cache the result
88        self.cached_grammar = Some(composed.clone());
89        self.extension_hash = current_hash;
90
91        Ok(composed)
92    }
93
94    /// Compose grammar without using cache (for internal use)
95    fn compose_uncached(&self) -> Result<String, GrammarCompositionError> {
96        let mut composed = self.base_grammar.clone();
97
98        // Validate that we can safely extend the base grammar (cached validation)
99        self.validate_base_grammar_cached(&composed)?;
100
101        // Get all grammar extensions sorted by priority
102        let extension_rules = self.extension_registry.compose_grammar("");
103
104        if !extension_rules.trim().is_empty() {
105            // Inject extension rules into the statement rule (optimized)
106            composed = self.inject_extension_rules_optimized(composed, &extension_rules)?;
107        }
108
109        // Validate the final composed grammar (cached validation)
110        self.validate_composed_grammar_cached(&composed)?;
111
112        Ok(composed)
113    }
114
115    /// Inject extension rules into the base grammar
116    fn inject_extension_rules_optimized(
117        &self,
118        base_grammar: String,
119        extension_rules: &str,
120    ) -> Result<String, GrammarCompositionError> {
121        self.inject_extension_rules_via_lines(base_grammar, extension_rules)
122    }
123
124    fn inject_extension_rules_via_lines(
125        &self,
126        base_grammar: String,
127        extension_rules: &str,
128    ) -> Result<String, GrammarCompositionError> {
129        let extension_statements = self.extract_extension_statements_optimized(extension_rules)?;
130        let mut lines: Vec<String> = base_grammar.lines().map(|line| line.to_string()).collect();
131
132        if !extension_statements.is_empty() {
133            let (stmt_start, stmt_end) = find_statement_rule_bounds(&lines)?;
134            let indent = find_statement_indent(&lines, stmt_start, stmt_end);
135            let insert_lines: Vec<String> = extension_statements
136                .iter()
137                .map(|rule| format!("{indent}| {rule}"))
138                .collect();
139
140            lines.splice(stmt_end..stmt_end, insert_lines);
141        }
142
143        let mut composed = lines.join("\n");
144        composed.push('\n');
145        composed.push_str("// Extension Rules\n");
146        composed.push_str(extension_rules);
147        Ok(composed)
148    }
149
150    /// Extract statement rule names from extension grammar (optimized version)
151    fn extract_extension_statements_optimized(
152        &self,
153        extension_rules: &str,
154    ) -> Result<Vec<String>, GrammarCompositionError> {
155        let mut statements = Vec::new();
156
157        // Pre-allocate based on estimated number of rules
158        let estimated_rules = extension_rules.matches("_stmt = {").count();
159        statements.reserve(estimated_rules);
160
161        for line in extension_rules.lines() {
162            let line = line.trim();
163            if line.ends_with("_stmt = {") {
164                if let Some(equals_pos) = line.find('=') {
165                    let rule_name = line[..equals_pos].trim();
166                    statements.push(rule_name.to_string());
167                }
168            }
169        }
170
171        Ok(statements)
172    }
173
174    /// Optimized validation with static patterns and early exit
175    fn validate_base_grammar_cached(&self, grammar: &str) -> Result<(), GrammarCompositionError> {
176        // Use static patterns for better performance
177        const REQUIRED_PATTERNS: &[&str] = &["statement = _{", "send_stmt", "broadcast_stmt"];
178
179        for &pattern in REQUIRED_PATTERNS {
180            if !grammar.contains(pattern) {
181                return Err(GrammarCompositionError::InvalidBaseGrammar(format!(
182                    "Missing required rule: {}",
183                    pattern
184                )));
185            }
186        }
187
188        Ok(())
189    }
190
191    /// Optimized validation with pre-allocated HashSet and fast brace counting
192    fn validate_composed_grammar_cached(
193        &self,
194        grammar: &str,
195    ) -> Result<(), GrammarCompositionError> {
196        // Pre-allocate HashSet with estimated capacity
197        let estimated_rules = grammar.matches(" = {").count();
198        let mut rule_names = HashSet::with_capacity(estimated_rules);
199
200        for line in grammar.lines() {
201            let line = line.trim();
202            if line.contains(" = {") && !line.starts_with("//") {
203                if let Some(rule_name) = line.split(" = {").next() {
204                    let rule_name = rule_name.trim();
205                    if !rule_names.insert(rule_name) {
206                        return Err(GrammarCompositionError::DuplicateRule(
207                            rule_name.to_string(),
208                        ));
209                    }
210                }
211            }
212        }
213
214        // Optimized brace counting (ignore braces inside string literals)
215        let (open_braces, close_braces) = count_braces_outside_quotes(grammar);
216
217        if open_braces != close_braces {
218            return Err(GrammarCompositionError::SyntaxError(
219                "Unbalanced braces in composed grammar".to_string(),
220            ));
221        }
222
223        Ok(())
224    }
225
226    /// Check if an extension rule exists
227    pub fn has_extension_rule(&self, rule_name: &str) -> bool {
228        self.extension_registry.can_handle(rule_name)
229    }
230
231    /// Get the number of registered extensions
232    pub fn extension_count(&self) -> usize {
233        self.extension_registry.grammar_extensions().count()
234    }
235
236    /// Write the composed grammar to a file for debugging
237    pub fn write_composed_grammar<P: AsRef<Path>>(
238        &mut self,
239        path: P,
240    ) -> Result<(), GrammarCompositionError> {
241        let composed = self.compose()?;
242        fs::write(path, composed).map_err(|e| {
243            GrammarCompositionError::IoError(format!("Failed to write grammar: {}", e))
244        })?;
245        Ok(())
246    }
247}
248
249fn count_braces_outside_quotes(grammar: &str) -> (usize, usize) {
250    let mut open_braces = 0usize;
251    let mut close_braces = 0usize;
252    let mut in_string = false;
253    let mut escape = false;
254
255    for ch in grammar.chars() {
256        if in_string {
257            if escape {
258                escape = false;
259            } else if ch == '\\' {
260                escape = true;
261            } else if ch == '"' {
262                in_string = false;
263            }
264            continue;
265        }
266
267        if ch == '"' {
268            in_string = true;
269            continue;
270        }
271
272        match ch {
273            '{' => open_braces += 1,
274            '}' => close_braces += 1,
275            _ => {}
276        }
277    }
278
279    (open_braces, close_braces)
280}
281
282fn find_statement_rule_bounds(lines: &[String]) -> Result<(usize, usize), GrammarCompositionError> {
283    let mut start = None;
284    for (idx, line) in lines.iter().enumerate() {
285        if line.trim_start().starts_with("statement = _{") {
286            start = Some(idx);
287            break;
288        }
289    }
290
291    let start = start.ok_or_else(|| {
292        GrammarCompositionError::InvalidBaseGrammar(
293            "Could not find statement rule in base grammar".to_string(),
294        )
295    })?;
296
297    for (idx, line) in lines.iter().enumerate().skip(start + 1) {
298        if line.trim_start().starts_with('}') {
299            return Ok((start, idx));
300        }
301    }
302
303    Err(GrammarCompositionError::InvalidBaseGrammar(
304        "Could not find end of statement rule in base grammar".to_string(),
305    ))
306}
307
308fn find_statement_indent(lines: &[String], start: usize, end: usize) -> String {
309    for line in lines.iter().take(end).skip(start + 1) {
310        let trimmed = line.trim_start();
311        if trimmed.starts_with('|') {
312            let indent_len = line.len().saturating_sub(trimmed.len());
313            return line[..indent_len].to_string();
314        }
315    }
316    "    ".to_string()
317}
318
319impl Default for GrammarComposer {
320    fn default() -> Self {
321        Self::new()
322    }
323}
324
325/// Errors that can occur during grammar composition
326#[derive(Debug, thiserror::Error)]
327pub enum GrammarCompositionError {
328    #[error("Invalid base grammar: {0}")]
329    InvalidBaseGrammar(String),
330
331    #[error("Duplicate rule name: {0}")]
332    DuplicateRule(String),
333
334    #[error("Syntax error in composed grammar: {0}")]
335    SyntaxError(String),
336
337    #[error("Extension conflict: {0}")]
338    ExtensionConflict(String),
339
340    #[error("IO error: {0}")]
341    IoError(String),
342}
343
344/// Builder pattern for constructing grammar composers with extensions
345pub struct GrammarComposerBuilder {
346    composer: GrammarComposer,
347}
348
349impl GrammarComposerBuilder {
350    pub fn new() -> Self {
351        Self {
352            composer: GrammarComposer::new(),
353        }
354    }
355
356    pub fn with_extension<T: GrammarExtension + 'static>(
357        mut self,
358        extension: T,
359    ) -> Result<Self, crate::extensions::ParseError> {
360        self.composer.register_extension(extension)?;
361        Ok(self)
362    }
363
364    pub fn build(self) -> GrammarComposer {
365        self.composer
366    }
367}
368
369impl Default for GrammarComposerBuilder {
370    fn default() -> Self {
371        Self::new()
372    }
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use crate::extensions::GrammarExtension;
379
380    #[derive(Debug)]
381    struct TestExtension;
382
383    impl GrammarExtension for TestExtension {
384        fn grammar_rules(&self) -> &'static str {
385            "audit_stmt = { \"audit\" ~ ident ~ block }"
386        }
387
388        fn statement_rules(&self) -> Vec<&'static str> {
389            vec!["audit_stmt"]
390        }
391
392        fn extension_id(&self) -> &'static str {
393            "test_timeout"
394        }
395    }
396
397    #[test]
398    fn test_grammar_composer_creation() {
399        let composer = GrammarComposer::new();
400        assert_eq!(composer.extension_count(), 0);
401        assert!(composer.base_grammar.contains("choreography"));
402        assert!(composer.base_grammar.contains("statement = _{"));
403    }
404
405    #[test]
406    fn test_extension_registration() {
407        let mut composer = GrammarComposer::new();
408        composer
409            .register_extension(TestExtension)
410            .expect("extension should register");
411        assert_eq!(composer.extension_count(), 1);
412        assert!(composer.has_extension_rule("audit_stmt"));
413    }
414
415    #[test]
416    fn test_grammar_composition() {
417        let mut composer = GrammarComposer::new();
418        composer
419            .register_extension(TestExtension)
420            .expect("extension should register");
421
422        let result = composer.compose();
423        assert!(result.is_ok(), "Grammar composition should succeed");
424
425        let composed = result.unwrap();
426        assert!(composed.contains("audit_stmt"));
427        assert!(composed.contains("choreography"));
428        assert!(composed.contains("// Extension Rules"));
429    }
430
431    #[test]
432    fn test_grammar_caching() {
433        let mut composer = GrammarComposer::new();
434        composer
435            .register_extension(TestExtension)
436            .expect("extension should register");
437
438        // First composition
439        let start = std::time::Instant::now();
440        let result1 = composer.compose();
441        let first_time = start.elapsed();
442        assert!(result1.is_ok());
443
444        // Second composition (should use cache)
445        let start = std::time::Instant::now();
446        let result2 = composer.compose();
447        let second_time = start.elapsed();
448        assert!(result2.is_ok());
449
450        // Results should be identical
451        assert_eq!(result1.unwrap(), result2.unwrap());
452
453        // Second call should be faster due to caching
454        // Note: This might not always be true due to system variations,
455        // but it's a reasonable performance expectation
456        println!(
457            "First composition: {:?}, Second (cached): {:?}",
458            first_time, second_time
459        );
460    }
461
462    #[test]
463    fn test_builder_pattern() {
464        let composer = GrammarComposerBuilder::new()
465            .with_extension(TestExtension)
466            .expect("test extension should register")
467            .build();
468
469        assert_eq!(composer.extension_count(), 1);
470        assert!(composer.has_extension_rule("audit_stmt"));
471    }
472
473    #[test]
474    fn test_validation() {
475        let mut composer = GrammarComposer::new();
476
477        // Test base grammar validation
478        let valid_result = composer.validate_base_grammar_cached(&composer.base_grammar);
479        assert!(valid_result.is_ok(), "Base grammar should be valid");
480
481        // Test composed grammar validation
482        let composed = composer.compose().unwrap();
483        let validation_result = composer.validate_composed_grammar_cached(&composed);
484        assert!(
485            validation_result.is_ok(),
486            "Composed grammar should be valid"
487        );
488    }
489}