Skip to main content

telltale_language/compiler/
grammar.rs

1//! Dynamic Pest Grammar Composition for Extensions
2//!
3//! This module provides a system for dynamically composing Pest grammars by merging
4//! the base choreographic grammar with extension-provided grammar rules.
5
6use crate::extensions::{ExtensionRegistry, GrammarExtension};
7use std::collections::HashSet;
8use std::fs;
9use std::path::Path;
10
11/// Manages dynamic composition of Pest grammars with extensions
12pub struct GrammarComposer {
13    base_grammar: String,
14    extension_registry: ExtensionRegistry,
15    /// Cache for composed grammar to avoid recomputation
16    cached_grammar: Option<String>,
17    /// Hash of current extension state for cache invalidation
18    extension_hash: u64,
19}
20
21impl GrammarComposer {
22    /// Create a new grammar composer with the base grammar
23    pub fn new() -> Self {
24        let base_grammar = include_str!("choreography.pest").to_string();
25        Self {
26            base_grammar,
27            extension_registry: ExtensionRegistry::new(),
28            cached_grammar: None,
29            extension_hash: 0,
30        }
31    }
32
33    /// Register an extension with the grammar composer.
34    ///
35    /// # Errors
36    ///
37    /// Returns an error if there's a priority conflict with an existing extension.
38    pub fn register_extension<T: GrammarExtension + 'static>(
39        &mut self,
40        extension: T,
41    ) -> Result<(), crate::extensions::ParseError> {
42        let result = self.extension_registry.register_grammar(extension);
43        // Invalidate cache when extensions change (even on failure for consistency)
44        self.invalidate_cache();
45        result
46    }
47
48    /// Invalidate the cached grammar and force recomputation
49    fn invalidate_cache(&mut self) {
50        self.cached_grammar = None;
51        self.extension_hash = self.compute_extension_hash();
52    }
53
54    /// Compute a hash of current extensions for cache invalidation
55    fn compute_extension_hash(&self) -> u64 {
56        use std::collections::hash_map::DefaultHasher;
57        use std::hash::{Hash, Hasher};
58
59        let mut hasher = DefaultHasher::new();
60
61        // Hash extension count and IDs for simple cache invalidation
62        self.extension_registry
63            .grammar_extensions()
64            .count()
65            .hash(&mut hasher);
66        for ext in self.extension_registry.grammar_extensions() {
67            ext.extension_id().hash(&mut hasher);
68            ext.priority().hash(&mut hasher);
69        }
70
71        hasher.finish()
72    }
73
74    /// Compose the final grammar including all registered extensions
75    pub fn compose(&mut self) -> Result<String, GrammarCompositionError> {
76        // Check if we can use cached grammar
77        let current_hash = self.compute_extension_hash();
78        if let Some(ref cached) = self.cached_grammar {
79            if current_hash == self.extension_hash {
80                return Ok(cached.clone());
81            }
82        }
83
84        // Recompute grammar
85        let composed = self.compose_uncached()?;
86
87        // Cache the result
88        self.cached_grammar = Some(composed.clone());
89        self.extension_hash = current_hash;
90
91        Ok(composed)
92    }
93
94    /// Compose grammar without using cache (for internal use)
95    fn compose_uncached(&self) -> Result<String, GrammarCompositionError> {
96        let mut composed = self.base_grammar.clone();
97
98        // Validate that we can safely extend the base grammar (cached validation)
99        self.validate_base_grammar_cached(&composed)?;
100
101        // Get all grammar extensions sorted by priority
102        let extension_rules = self.extension_registry.compose_grammar("");
103
104        if !extension_rules.trim().is_empty() {
105            // Inject extension rules into the statement rule (optimized)
106            composed = self.inject_extension_rules_optimized(composed, &extension_rules)?;
107        }
108
109        // Validate the final composed grammar (cached validation)
110        self.validate_composed_grammar_cached(&composed)?;
111
112        Ok(composed)
113    }
114
115    /// Inject extension rules into the base grammar
116    fn inject_extension_rules_optimized(
117        &self,
118        base_grammar: String,
119        extension_rules: &str,
120    ) -> Result<String, GrammarCompositionError> {
121        self.inject_extension_rules_via_lines(base_grammar, extension_rules)
122    }
123
124    fn inject_extension_rules_via_lines(
125        &self,
126        base_grammar: String,
127        extension_rules: &str,
128    ) -> Result<String, GrammarCompositionError> {
129        let extension_statements = self.extract_extension_statements_optimized(extension_rules)?;
130        let mut lines: Vec<String> = base_grammar.lines().map(|line| line.to_string()).collect();
131
132        if !extension_statements.is_empty() {
133            let (stmt_start, stmt_end) = find_statement_rule_bounds(&lines)?;
134            let indent = find_statement_indent(&lines, stmt_start, stmt_end);
135            let insert_lines: Vec<String> = extension_statements
136                .iter()
137                .map(|rule| format!("{indent}| {rule}"))
138                .collect();
139
140            lines.splice(stmt_end..stmt_end, insert_lines);
141        }
142
143        let mut composed = lines.join("\n");
144        composed.push('\n');
145        composed.push_str("// Extension Rules\n");
146        composed.push_str(extension_rules);
147        Ok(composed)
148    }
149
150    /// Extract statement rule names from extension grammar (optimized version)
151    fn extract_extension_statements_optimized(
152        &self,
153        extension_rules: &str,
154    ) -> Result<Vec<String>, GrammarCompositionError> {
155        let mut statements = Vec::new();
156
157        // Pre-allocate based on estimated number of rules
158        let estimated_rules = extension_rules.matches("_stmt = {").count();
159        statements.reserve(estimated_rules);
160
161        for line in extension_rules.lines() {
162            let line = line.trim();
163            if line.ends_with("_stmt = {") {
164                if let Some(equals_pos) = line.find('=') {
165                    let rule_name = line[..equals_pos].trim();
166                    statements.push(rule_name.to_string());
167                }
168            }
169        }
170
171        Ok(statements)
172    }
173
174    /// Optimized validation with static patterns and early exit
175    fn validate_base_grammar_cached(&self, grammar: &str) -> Result<(), GrammarCompositionError> {
176        // Use static patterns for better performance
177        const REQUIRED_PATTERNS: &[&str] = &["statement = _{", "send_stmt", "broadcast_stmt"];
178
179        for &pattern in REQUIRED_PATTERNS {
180            if !grammar.contains(pattern) {
181                return Err(GrammarCompositionError::InvalidBaseGrammar(format!(
182                    "Missing required rule: {}",
183                    pattern
184                )));
185            }
186        }
187
188        Ok(())
189    }
190
191    /// Optimized validation with pre-allocated HashSet and fast brace counting
192    fn validate_composed_grammar_cached(
193        &self,
194        grammar: &str,
195    ) -> Result<(), GrammarCompositionError> {
196        let rule_names = collect_pest_rule_names(grammar);
197        let mut unique_rule_names = HashSet::with_capacity(rule_names.len());
198
199        for rule_name in rule_names {
200            if !unique_rule_names.insert(rule_name.clone()) {
201                return Err(GrammarCompositionError::DuplicateRule(rule_name));
202            }
203        }
204
205        // Optimized brace counting (ignore braces inside string literals)
206        let (open_braces, close_braces) = count_braces_outside_quotes(grammar);
207
208        if open_braces != close_braces {
209            return Err(GrammarCompositionError::SyntaxError(
210                "Unbalanced braces in composed grammar".to_string(),
211            ));
212        }
213
214        Ok(())
215    }
216
217    /// Check if an extension rule exists
218    pub fn has_extension_rule(&self, rule_name: &str) -> bool {
219        self.extension_registry.can_handle(rule_name)
220    }
221
222    /// Get the number of registered extensions
223    pub fn extension_count(&self) -> usize {
224        self.extension_registry.grammar_extensions().count()
225    }
226
227    /// Write the composed grammar to a file for debugging
228    pub fn write_composed_grammar<P: AsRef<Path>>(
229        &mut self,
230        path: P,
231    ) -> Result<(), GrammarCompositionError> {
232        let composed = self.compose()?;
233        fs::write(path, composed).map_err(|e| {
234            GrammarCompositionError::IoError(format!("Failed to write grammar: {}", e))
235        })?;
236        Ok(())
237    }
238}
239
240fn collect_pest_rule_names(grammar: &str) -> Vec<String> {
241    let mut rules = Vec::new();
242
243    for line in grammar.lines() {
244        let trimmed = line.trim_start();
245        if trimmed.is_empty() || trimmed.starts_with("//") {
246            continue;
247        }
248
249        let Some(first) = trimmed.chars().next() else {
250            continue;
251        };
252        if !(first == '_' || first.is_ascii_alphabetic()) {
253            continue;
254        }
255
256        let name_len = trimmed
257            .char_indices()
258            .take_while(|(_, ch)| *ch == '_' || ch.is_ascii_alphanumeric())
259            .last()
260            .map_or(0, |(idx, ch)| idx + ch.len_utf8());
261        let (name, rest) = trimmed.split_at(name_len);
262
263        if rest.trim_start().starts_with('=') {
264            rules.push(name.to_string());
265        }
266    }
267
268    rules
269}
270
271fn count_braces_outside_quotes(grammar: &str) -> (usize, usize) {
272    let mut open_braces = 0usize;
273    let mut close_braces = 0usize;
274    let mut in_string = false;
275    let mut escape = false;
276
277    for ch in grammar.chars() {
278        if in_string {
279            if escape {
280                escape = false;
281            } else if ch == '\\' {
282                escape = true;
283            } else if ch == '"' {
284                in_string = false;
285            }
286            continue;
287        }
288
289        if ch == '"' {
290            in_string = true;
291            continue;
292        }
293
294        match ch {
295            '{' => open_braces += 1,
296            '}' => close_braces += 1,
297            _ => {}
298        }
299    }
300
301    (open_braces, close_braces)
302}
303
304fn find_statement_rule_bounds(lines: &[String]) -> Result<(usize, usize), GrammarCompositionError> {
305    let mut start = None;
306    for (idx, line) in lines.iter().enumerate() {
307        if line.trim_start().starts_with("statement = _{") {
308            start = Some(idx);
309            break;
310        }
311    }
312
313    let start = start.ok_or_else(|| {
314        GrammarCompositionError::InvalidBaseGrammar(
315            "Could not find statement rule in base grammar".to_string(),
316        )
317    })?;
318
319    for (idx, line) in lines.iter().enumerate().skip(start + 1) {
320        if line.trim_start().starts_with('}') {
321            return Ok((start, idx));
322        }
323    }
324
325    Err(GrammarCompositionError::InvalidBaseGrammar(
326        "Could not find end of statement rule in base grammar".to_string(),
327    ))
328}
329
330fn find_statement_indent(lines: &[String], start: usize, end: usize) -> String {
331    for line in lines.iter().take(end).skip(start + 1) {
332        let trimmed = line.trim_start();
333        if trimmed.starts_with('|') {
334            let indent_len = line.len().saturating_sub(trimmed.len());
335            return line[..indent_len].to_string();
336        }
337    }
338    "    ".to_string()
339}
340
341impl Default for GrammarComposer {
342    fn default() -> Self {
343        Self::new()
344    }
345}
346
347/// Errors that can occur during grammar composition
348#[derive(Debug, thiserror::Error)]
349pub enum GrammarCompositionError {
350    #[error("Invalid base grammar: {0}")]
351    InvalidBaseGrammar(String),
352
353    #[error("Duplicate rule name: {0}")]
354    DuplicateRule(String),
355
356    #[error("Syntax error in composed grammar: {0}")]
357    SyntaxError(String),
358
359    #[error("Extension conflict: {0}")]
360    ExtensionConflict(String),
361
362    #[error("IO error: {0}")]
363    IoError(String),
364}
365
366/// Builder pattern for constructing grammar composers with extensions
367pub struct GrammarComposerBuilder {
368    composer: GrammarComposer,
369}
370
371impl GrammarComposerBuilder {
372    pub fn new() -> Self {
373        Self {
374            composer: GrammarComposer::new(),
375        }
376    }
377
378    pub fn with_extension<T: GrammarExtension + 'static>(
379        mut self,
380        extension: T,
381    ) -> Result<Self, crate::extensions::ParseError> {
382        self.composer.register_extension(extension)?;
383        Ok(self)
384    }
385
386    pub fn build(self) -> GrammarComposer {
387        self.composer
388    }
389}
390
391impl Default for GrammarComposerBuilder {
392    fn default() -> Self {
393        Self::new()
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400    use crate::extensions::GrammarExtension;
401
402    #[derive(Debug)]
403    struct TestExtension;
404
405    impl GrammarExtension for TestExtension {
406        fn grammar_rules(&self) -> &'static str {
407            "test_timeout_audit_stmt = { \"audit\" ~ ident ~ block }"
408        }
409
410        fn statement_rules(&self) -> Vec<&'static str> {
411            vec!["test_timeout_audit_stmt"]
412        }
413
414        fn extension_id(&self) -> &'static str {
415            "test_timeout"
416        }
417    }
418
419    #[test]
420    fn test_grammar_composer_creation() {
421        let composer = GrammarComposer::new();
422        assert_eq!(composer.extension_count(), 0);
423        assert!(composer.base_grammar.contains("choreography"));
424        assert!(composer.base_grammar.contains("statement = _{"));
425    }
426
427    #[test]
428    fn test_extension_registration() {
429        let mut composer = GrammarComposer::new();
430        composer
431            .register_extension(TestExtension)
432            .expect("extension should register");
433        assert_eq!(composer.extension_count(), 1);
434        assert!(composer.has_extension_rule("test_timeout_audit_stmt"));
435    }
436
437    #[test]
438    fn test_grammar_composition() {
439        let mut composer = GrammarComposer::new();
440        composer
441            .register_extension(TestExtension)
442            .expect("extension should register");
443
444        let result = composer.compose();
445        assert!(result.is_ok(), "Grammar composition should succeed");
446
447        let composed = result.unwrap();
448        assert!(composed.contains("test_timeout_audit_stmt"));
449        assert!(composed.contains("choreography"));
450        assert!(composed.contains("// Extension Rules"));
451    }
452
453    #[test]
454    fn test_grammar_caching() {
455        let mut composer = GrammarComposer::new();
456        composer
457            .register_extension(TestExtension)
458            .expect("extension should register");
459
460        // First composition
461        let start = std::time::Instant::now();
462        let result1 = composer.compose();
463        let first_time = start.elapsed();
464        assert!(result1.is_ok());
465
466        // Second composition (should use cache)
467        let start = std::time::Instant::now();
468        let result2 = composer.compose();
469        let second_time = start.elapsed();
470        assert!(result2.is_ok());
471
472        // Results should be identical
473        assert_eq!(result1.unwrap(), result2.unwrap());
474
475        // Second call should be faster due to caching
476        // Note: This might not always be true due to system variations,
477        // but it's a reasonable performance expectation
478        println!(
479            "First composition: {:?}, Second (cached): {:?}",
480            first_time, second_time
481        );
482    }
483
484    #[test]
485    fn test_builder_pattern() {
486        let composer = GrammarComposerBuilder::new()
487            .with_extension(TestExtension)
488            .expect("test extension should register")
489            .build();
490
491        assert_eq!(composer.extension_count(), 1);
492        assert!(composer.has_extension_rule("test_timeout_audit_stmt"));
493    }
494
495    #[test]
496    fn test_validation() {
497        let mut composer = GrammarComposer::new();
498
499        // Test base grammar validation
500        let valid_result = composer.validate_base_grammar_cached(&composer.base_grammar);
501        assert!(valid_result.is_ok(), "Base grammar should be valid");
502
503        // Test composed grammar validation
504        let composed = composer.compose().unwrap();
505        let validation_result = composer.validate_composed_grammar_cached(&composed);
506        assert!(
507            validation_result.is_ok(),
508            "Composed grammar should be valid"
509        );
510    }
511}