gf_core/lib.rs
1mod pgf_json;
2pub use pgf_json::*;
3
4use regex::Regex;
5use serde::Serialize;
6use std::collections::HashMap;
7use std::sync::atomic::{AtomicBool, Ordering};
8
9/// Global debug flag for controlling debug output throughout the library
10static DEBUG_ENABLED: AtomicBool = AtomicBool::new(false);
11
12/// Enable or disable debug output for the entire library
13///
14/// When enabled, various operations will print debug information to stdout.
15/// This is useful for understanding the internal workings of parsing,
16/// linearization, and translation processes.
17///
18/// # Examples
19///
20/// ```
21/// use gf_core::set_debug;
22///
23/// // Enable debug output
24/// set_debug(true);
25///
26/// // Your GF operations will now show debug info
27///
28/// // Disable debug output
29/// set_debug(false);
30/// ```
31pub fn set_debug(enabled: bool) {
32 DEBUG_ENABLED.store(enabled, Ordering::Relaxed);
33}
34
35/// Check if debug output is currently enabled
36///
37/// # Examples
38///
39/// ```
40/// use gf_core::{set_debug, is_debug_enabled};
41///
42/// set_debug(true);
43/// assert!(is_debug_enabled());
44///
45/// set_debug(false);
46/// assert!(!is_debug_enabled());
47/// ```
48pub fn is_debug_enabled() -> bool {
49 DEBUG_ENABLED.load(Ordering::Relaxed)
50}
51
52/// Macro for conditional debug output
53macro_rules! debug_println {
54 ($($arg:tt)*) => {
55 if crate::is_debug_enabled() {
56 println!("[DEBUG] {}", format!($($arg)*));
57 }
58 };
59}
60
61#[derive(Serialize, Debug, Clone)]
62pub struct Type {
63 /// Input categories (arguments) for the function type.
64 pub args: Vec<String>,
65 /// Output category (result) for the function type.
66 pub cat: String,
67}
68
69////////////////////////////////////////////////////////////////////////////////
70// Parser Module Start
71
72// Parser Module Stop
73
74/// Rust Runtime for Grammatical Framework
75///
76/// This crate provides a runtime for Grammatical Framework (GF) grammars,
77/// allowing parsing, linearization, and translation between languages defined
78/// in GF abstract and concrete syntax.
79///
80/// For more information on GF, see [gf-dev.github.io](https://gf-dev.github.io/).
81////////////////////////////////////////////////////////////////////////////////
82// Constants and type aliases
83/// Type alias for nested HashMaps used in translation outputs.
84/// Represents source language -> abstract tree -> target language -> output string.
85type HMS3 = HashMap<String, HashMap<String, String>>;
86
87/// Every constituent has a unique id. If the constituent is discontinuous,
88/// it will be represented with several brackets sharing the same id.
89pub type FId = i32;
90
91////////////////////////////////////////////////////////////////////////////////
92/// Accumulator for completion suggestions during parsing.
93///
94/// Collects active parsing items that can suggest possible completions.
95#[derive(Debug, Clone)]
96pub struct CompletionAccumulator {
97 /// Optional vector of active items for suggestions.
98 pub value: Option<Vec<ActiveItem>>,
99}
100
101impl Default for CompletionAccumulator {
102 fn default() -> Self {
103 Self::new()
104 }
105}
106
107impl CompletionAccumulator {
108 /// Creates a new empty accumulator.
109 ///
110 /// # Examples
111 ///
112 /// ```
113 /// use gf_core::CompletionAccumulator;
114 /// let acc = CompletionAccumulator::new();
115 /// assert!(acc.value.is_none());
116 /// ```
117 pub fn new() -> Self {
118 CompletionAccumulator { value: None }
119 }
120}
121
122/// Result of completion, including consumed tokens and suggestions.
123///
124/// This struct is returned by completion methods to provide feedback on
125/// what has been parsed so far and possible ways to continue.
126#[derive(Debug, Clone)]
127pub struct CompletionResult {
128 /// Tokens already consumed from the input.
129 pub consumed: Vec<String>,
130 /// Suggested completions for the next token(s).
131 pub suggestions: Vec<String>,
132}
133
134////////////////////////////////////////////////////////////////////////////////
135/// A Grammatical Framework grammar consisting of one abstract and multiple concretes.
136///
137/// The `GFGrammar` type encapsulates an abstract grammar (logical structure) and
138/// one or more concrete grammars (linearizations). The abstract defines categories
139/// and functions independently of form, while concretes provide manifestations
140/// (e.g., natural language strings).
141///
142/// # Examples
143///
144/// ```
145/// use gf_core::{GFGrammar, GFAbstract};
146/// use std::collections::HashMap;
147/// let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
148/// let concretes = HashMap::new();
149/// let grammar = GFGrammar::new(abstract_, concretes);
150/// assert_eq!(grammar.abstract_grammar.startcat, "S");
151/// ```
152pub struct GFGrammar {
153 /// The abstract grammar (only one per GFGrammar).
154 pub abstract_grammar: GFAbstract,
155
156 /// Map of language codes to concrete grammars.
157 pub concretes: HashMap<String, GFConcrete>,
158}
159
160impl GFGrammar {
161 /// Creates a new GFGrammar from an abstract and concretes.
162 ///
163 /// # Arguments
164 /// * `abstract_` - The abstract grammar.
165 /// * `concretes` - A map of language codes to concrete grammars.
166 ///
167 /// # Examples
168 ///
169 /// ```
170 /// use gf_core::{GFGrammar, GFAbstract};
171 /// use std::collections::HashMap;
172 /// let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
173 /// let concretes = HashMap::new();
174 /// let grammar = GFGrammar::new(abstract_, concretes);
175 /// assert_eq!(grammar.abstract_grammar.startcat, "S");
176 /// ```
177 pub fn new(
178 abstract_: GFAbstract,
179 concretes: HashMap<String, GFConcrete>,
180 ) -> Self {
181 GFGrammar { abstract_grammar: abstract_, concretes }
182 }
183
184 /// Converts from PGF JSON format to GFGrammar.
185 ///
186 /// This method deserializes a PGF structure (typically from JSON) into a usable GFGrammar.
187 ///
188 /// # Arguments
189 /// * `json` - The PGF structure to convert.
190 ///
191 /// # Examples
192 ///
193 /// Assuming a valid PGF instance, this creates a grammar:
194 ///
195 /// ```
196 /// // Note: This doctest is illustrative; actual PGF creation may require deserialization.
197 /// use gf_core::{GFGrammar, PGF, Abstract, Concrete};
198 /// use std::collections::HashMap;
199 /// let abstract_ = Abstract { name: "TestGrammar".to_string(), startcat: "S".to_string(), funs: HashMap::new() };
200 /// let concretes = HashMap::new();
201 /// let pgf = PGF { abstract_, concretes };
202 /// let grammar = GFGrammar::from_json(pgf);
203 /// assert_eq!(grammar.abstract_grammar.startcat, "S");
204 /// ```
205 pub fn from_json(json: PGF) -> Self {
206 debug_println!("Loading GFGrammar from JSON with start category: {}", json.abstract_.startcat);
207 debug_println!("Found {} concrete grammars: {:?}", json.concretes.len(), json.concretes.keys().collect::<Vec<_>>());
208
209 // FIXME: HashMap ordering is non-deterministic which might cause problems
210 // when we try to match the ordering of the generated json file.
211 let cncs: HashMap<String, GFConcrete> = json
212 .concretes
213 .into_iter()
214 .map(|(key, concrete)| {
215 debug_println!("Loading concrete grammar: {}", key);
216 (key, GFConcrete::from_json(concrete))
217 })
218 .collect();
219
220 let abstract_grammar = GFAbstract::from_json(json.abstract_);
221 debug_println!("GFGrammar loaded successfully with {} concrete grammars", cncs.len());
222
223 GFGrammar {
224 abstract_grammar,
225 concretes: cncs,
226 }
227 }
228
229 /// Translates input from one language to another (or all if unspecified).
230 ///
231 /// Parses the input in the source language(s) and linearizes to target language(s).
232 ///
233 /// # Arguments
234 /// * `input` - The string to translate.
235 /// * `from_lang` - Optional source language code.
236 /// * `to_lang` - Optional target language code.
237 ///
238 /// # Returns
239 /// Vector of translation maps (source -> tree -> target -> output).
240 pub fn translate(
241 &self,
242 input: &str,
243 from_lang: Option<&str>,
244 to_lang: Option<&str>,
245 ) -> Vec<HMS3> {
246 debug_println!("Starting translation of input: '{}'", input);
247 debug_println!("From language: {:?}, To language: {:?}", from_lang, to_lang);
248
249 let mut outputs: Vec<HMS3> = Vec::new();
250
251 let from_cncs = if let Some(lang) = from_lang {
252 let mut map = HashMap::new();
253 if let Some(concrete) = self.concretes.get(lang) {
254 map.insert(lang.to_string(), concrete.clone());
255 }
256 map
257 } else {
258 self.concretes.clone()
259 };
260
261 let to_cncs = if let Some(lang) = to_lang {
262 let mut map = HashMap::new();
263 if let Some(concrete) = self.concretes.get(lang) {
264 map.insert(lang.to_string(), concrete.clone());
265 }
266 map
267 } else {
268 self.concretes.clone()
269 };
270
271 for (lang_code, concrete) in &from_cncs {
272 debug_println!("Attempting to parse with language: {}", lang_code);
273 let trees =
274 concrete.parse_string(input, &self.abstract_grammar.startcat);
275 debug_println!("Found {} parse tree(s) for language {}", trees.len(), lang_code);
276 if !trees.is_empty() {
277 let mut c1_outputs: HashMap<String, HashMap<String, String>> =
278 HashMap::new();
279 for tree in trees {
280 debug_println!("Processing parse tree: {}", tree.name);
281 let mut translations: HashMap<String, String> =
282 HashMap::new();
283 for (c2, to_concrete) in &to_cncs {
284 let linearized = to_concrete.linearize(&tree);
285 debug_println!("Linearized to {}: '{}'", c2, linearized);
286 translations
287 .insert(c2.clone(), linearized);
288 }
289 c1_outputs.insert(tree.name.clone(), translations);
290 }
291 outputs.push(c1_outputs);
292 }
293 }
294
295 debug_println!("Translation completed with {} result set(s)", outputs.len());
296 outputs
297 }
298
299 // Parser using the parser module
300 // TODO: Uncomment when parser module is fixed
301 /*
302 pub fn from_binary(data: &[u8]) -> Self {
303 let mut r = Cursor::new(data);
304 let major = read_int16(&mut r);
305 let minor = read_int16(&mut r);
306 if major != 1 || minor != 0 {
307 panic!("Unsupported PGF version {}:{}", major, minor);
308 }
309 let _flags = read_list(&mut r, read_flag); // Global flags, ignored
310 let abstract_ = read_abstract(&mut r);
311 let concretes_list = read_list(&mut r, read_concrete);
312 let concretes: HashMap<String, Concrete> = concretes_list.into_iter().map(|c| (c.name.clone(), c)).collect();
313 let pgf = PGF { abstract_, concretes };
314 Self::from_json(pgf)
315 }
316 */
317}
318
319////////////////////////////////////////////////////////////////////////////////
320/// Abstract grammar defining logical structure via functions, categories, and types.
321///
322/// The abstract grammar specifies the semantic structure without tying it to specific
323/// forms. It defines functions (fun) that combine categories (cat) into new categories.
324///
325/// # Example
326///
327/// ```gf
328/// abstract Lang = {
329/// cat Sentence;
330/// fun MakeSentence : Noun -> Verb -> Sentence;
331/// }
332/// ```
333#[derive(Debug, Clone)]
334pub struct GFAbstract {
335 /// Starting category for parsing/linearization.
336 pub startcat: String,
337 /// Map of function names to their types.
338 types: HashMap<String, Type>,
339}
340
341/// Parser for abstract syntax expressions.
342///
343/// Handles function call syntax like `Function(Arg1, Arg2)` and nested expressions.
344struct AbstractSyntaxParser {
345 tokens: Vec<String>,
346 position: usize,
347}
348
349impl AbstractSyntaxParser {
350 fn new(input: &str) -> Self {
351 // Use regex-like tokenization similar to TypeScript
352 // TypeScript: str.match(/[\w\u00C0-\u00FF\'\.\"]+|\(|\)|\?|\:/g)
353 let mut tokens = Vec::new();
354 let mut current_token = String::new();
355
356 for ch in input.chars() {
357 match ch {
358 '(' | ')' | '?' | ':' | ',' => {
359 if !current_token.is_empty() {
360 tokens.push(current_token.clone());
361 current_token.clear();
362 }
363 tokens.push(ch.to_string());
364 }
365 c if c.is_whitespace() => {
366 if !current_token.is_empty() {
367 tokens.push(current_token.clone());
368 current_token.clear();
369 }
370 }
371 c if c.is_alphanumeric() || c == '_' || c == '\'' || c == '.' || c == '"' => {
372 current_token.push(c);
373 }
374 _ => {
375 // For other characters, treat as separate tokens
376 if !current_token.is_empty() {
377 tokens.push(current_token.clone());
378 current_token.clear();
379 }
380 tokens.push(ch.to_string());
381 }
382 }
383 }
384
385 if !current_token.is_empty() {
386 tokens.push(current_token);
387 }
388
389 Self {
390 tokens,
391 position: 0,
392 }
393 }
394
395 fn current_token(&self) -> Option<&String> {
396 self.tokens.get(self.position)
397 }
398
399 fn advance(&mut self) -> Option<String> {
400 if self.position < self.tokens.len() {
401 let token = self.tokens[self.position].clone();
402 self.position += 1;
403 Some(token)
404 } else {
405 None
406 }
407 }
408
409 fn parse_expression(&mut self) -> Result<Fun, &'static str> {
410 self.parse_tree_with_prec(0)
411 }
412
413 // This follows the TypeScript parseTree_ implementation exactly
414 fn parse_tree_with_prec(&mut self, prec: usize) -> Result<Fun, &'static str> {
415 if self.tokens.is_empty() || self.position >= self.tokens.len() {
416 return Err("No more tokens");
417 }
418
419 let current = self.current_token().cloned();
420 if current.as_deref() == Some(")") {
421 return Err("Unexpected ')'");
422 }
423
424 let t = self.advance().ok_or("No token available")?;
425
426 if t == "(" {
427 let tree = self.parse_tree_with_prec(0)?;
428 // Expect closing parenthesis
429 if self.current_token().map(|s| s.as_str()) == Some(")") {
430 self.advance(); // consume ')'
431 }
432 Ok(tree)
433 } else if t == "?" {
434 Ok(Fun::new("?".to_string(), vec![]))
435 } else {
436 let mut tree = Fun::new(t, vec![]);
437
438 // Check if this is immediately followed by a parenthesis for function call syntax
439 // Function call syntax takes priority over space-separated args
440 if self.current_token().map(|s| s.as_str()) == Some("(") {
441 self.advance(); // consume '('
442
443 // Parse comma-separated arguments inside parentheses
444 if self.current_token().map(|s| s.as_str()) != Some(")") {
445 loop {
446 let arg = self.parse_tree_with_prec(1)?;
447 tree.args.push(arg);
448
449 match self.current_token().map(|s| s.as_str()) {
450 Some(",") => {
451 self.advance(); // consume comma
452 continue;
453 }
454 Some(")") => break,
455 _ => return Err("Expected ',' or ')'"),
456 }
457 }
458 }
459
460 if self.current_token().map(|s| s.as_str()) == Some(")") {
461 self.advance(); // consume ')'
462 }
463 } else if prec == 0 {
464 // TypeScript logic: when prec == 0, collect args until can't parse any more
465 // This handles space-separated args like "MakeS (NP) (VP)" and "hello world"
466 loop {
467 match self.parse_tree_with_prec(1) {
468 Ok(child) => tree.args.push(child),
469 Err(_) => break,
470 }
471 }
472 }
473
474 Ok(tree)
475 }
476 }
477}
478
479impl GFAbstract {
480 /// Creates a new abstract grammar.
481 ///
482 /// # Arguments
483 /// * `startcat` - The starting category.
484 /// * `types` - Map of function names to types.
485 ///
486 /// # Examples
487 ///
488 /// ```
489 /// use gf_core::GFAbstract;
490 /// use std::collections::HashMap;
491 /// let abs = GFAbstract::new("S".to_string(), HashMap::new());
492 /// assert_eq!(abs.startcat, "S");
493 /// ```
494 pub fn new(startcat: String, types: HashMap<String, Type>) -> Self {
495 GFAbstract { startcat, types }
496 }
497
498 /// Converts from JSON abstract to GFAbstract.
499 ///
500 /// # Arguments
501 /// * `json` - The Abstract structure from JSON.
502 ///
503 /// # Examples
504 ///
505 /// ```
506 /// use gf_core::{GFAbstract, Abstract};
507 /// use std::collections::HashMap;
508 /// let json = Abstract { name: "TestGrammar".to_string(), startcat: "S".to_string(), funs: HashMap::new() };
509 /// let abs = GFAbstract::from_json(json);
510 /// assert_eq!(abs.startcat, "S");
511 /// ```
512 pub fn from_json(json: Abstract) -> Self {
513 let types = json
514 .funs
515 .into_iter()
516 .map(|(key, fun)| (key, Type::new(fun.args, fun.cat)))
517 .collect();
518
519 GFAbstract { startcat: json.startcat, types }
520 }
521
522 /// Adds a new function type to the abstract.
523 ///
524 /// # Arguments
525 /// * `fun` - Function name.
526 /// * `args` - Argument categories.
527 /// * `cat` - Result category.
528 ///
529 /// # Examples
530 ///
531 /// ```
532 /// use gf_core::GFAbstract;
533 /// use std::collections::HashMap;
534 /// let mut abs = GFAbstract::new("S".to_string(), HashMap::new());
535 /// abs.add_type("MakeS".to_string(), vec!["NP".to_string(), "VP".to_string()], "S".to_string());
536 /// assert_eq!(abs.get_cat("MakeS").unwrap(), "S");
537 /// ```
538 pub fn add_type(&mut self, fun: String, args: Vec<String>, cat: String) {
539 self.types.insert(fun, Type::new(args, cat));
540 }
541
542 /// Gets arguments for a function.
543 ///
544 /// # Arguments
545 /// * `fun` - Function name.
546 ///
547 /// # Returns
548 /// Optional reference to vector of argument categories.
549 ///
550 /// # Examples
551 ///
552 /// ```
553 /// use gf_core::GFAbstract;
554 /// use std::collections::HashMap;
555 /// let mut abs = GFAbstract::new("S".to_string(), HashMap::new());
556 /// abs.add_type("MakeS".to_string(), vec!["NP".to_string(), "VP".to_string()], "S".to_string());
557 /// assert_eq!(abs.get_args("MakeS").unwrap(), &vec!["NP".to_string(), "VP".to_string()]);
558 /// ```
559 pub fn get_args(&self, fun: &str) -> Option<&Vec<String>> {
560 self.types.get(fun).map(|t| &t.args)
561 }
562
563 /// Gets category for a function.
564 ///
565 /// # Arguments
566 /// * `fun` - Function name.
567 ///
568 /// # Returns
569 /// Optional reference to result category.
570 ///
571 /// # Examples
572 ///
573 /// ```
574 /// use gf_core::GFAbstract;
575 /// use std::collections::HashMap;
576 /// let mut abs = GFAbstract::new("S".to_string(), HashMap::new());
577 /// abs.add_type("MakeS".to_string(), vec!["NP".to_string(), "VP".to_string()], "S".to_string());
578 /// assert_eq!(abs.get_cat("MakeS").unwrap(), "S");
579 /// ```
580 pub fn get_cat(&self, fun: &str) -> Option<&String> {
581 self.types.get(fun).map(|t| &t.cat)
582 }
583
584 /// Annotates meta variables in a tree with types.
585 ///
586 /// Recursively traverses the tree and assigns types to meta variables ('?') based on the abstract types.
587 ///
588 /// # Arguments
589 /// * `tree` - The tree to annotate.
590 /// * `type_` - Optional type to assign.
591 ///
592 /// # Returns
593 /// Annotated tree.
594 fn annotate(&self, mut tree: Fun, r#type: Option<&String>) -> Fun {
595 if tree.is_meta() {
596 tree.type_ = r#type.cloned();
597 } else if let Some(typ) = self.types.get(&tree.name) {
598 for (arg, expected_type) in tree.args.iter_mut().zip(&typ.args) {
599 *arg = self.annotate(arg.clone(), Some(expected_type));
600 }
601 }
602 tree
603 }
604
605 /// Handles literal wrapping for strings, ints, floats.
606 ///
607 /// Modifies tree nodes representing literals to wrap them in appropriate literal functions.
608 ///
609 /// # Arguments
610 /// * `tree` - The tree to process.
611 /// * `type_` - The type of the current node.
612 ///
613 /// # Returns
614 /// Processed tree.
615 ///
616 /// # Examples
617 ///
618 /// ```
619 /// use gf_core::{GFAbstract, Fun};
620 /// use std::collections::HashMap;
621 /// let abs = GFAbstract::new("S".to_string(), HashMap::new());
622 /// let tree = Fun::new("\"hello\"".to_string(), vec![]);
623 /// let handled = abs.handle_literals(tree, "String");
624 /// assert_eq!(handled.name, "String_Literal_\"hello\"");
625 /// ```
626 pub fn handle_literals(&self, mut tree: Fun, r#type: &str) -> Fun {
627 if tree.name != "?" {
628 if r#type == "String" || r#type == "Int" || r#type == "Float" {
629 tree.name = format!("{}_Literal_{}", r#type, tree.name);
630 } else if let Some(typ) = self.types.get(&tree.name) {
631 for (arg, expected_type) in tree.args.iter_mut().zip(&typ.args)
632 {
633 *arg = self.handle_literals(arg.clone(), expected_type);
634 }
635 }
636 }
637 tree
638 }
639
640 /// Deep copies a tree.
641 ///
642 /// # Arguments
643 /// * `x` - The tree to copy.
644 ///
645 /// # Returns
646 /// A deep clone of the tree.
647 ///
648 /// # Examples
649 ///
650 /// ```
651 /// use gf_core::{GFAbstract, Fun};
652 /// use std::collections::HashMap;
653 /// let abs = GFAbstract::new("S".to_string(), HashMap::new());
654 /// let tree = Fun::new("Test".to_string(), vec![Fun::new("Arg".to_string(), vec![])]);
655 /// let copy = abs.copy_tree(&tree);
656 /// assert_eq!(tree.name, copy.name);
657 /// assert_eq!(tree.args.len(), copy.args.len());
658 /// ```
659 #[allow(clippy::only_used_in_recursion)]
660 pub fn copy_tree(&self, x: &Fun) -> Fun {
661 let mut tree = Fun::new(x.name.clone(), vec![]);
662 tree.type_ = x.type_.clone();
663 for arg in &x.args {
664 tree.args.push(self.copy_tree(arg));
665 }
666 tree
667 }
668
669 /// Parses a string into a tree.
670 ///
671 /// # Arguments
672 /// * `str` - The string representation of the tree.
673 /// * `type_` - Optional type for annotation.
674 ///
675 /// # Returns
676 /// Optional parsed and annotated tree.
677 ///
678 /// # Examples
679 ///
680 /// ```
681 /// use gf_core::GFAbstract;
682 /// use std::collections::HashMap;
683 /// let abs = GFAbstract::new("S".to_string(), HashMap::new());
684 /// let tree = abs.parse_tree("Test (Arg)", None);
685 /// assert!(tree.is_some());
686 /// let tree = tree.unwrap();
687 /// assert_eq!(tree.name, "Test");
688 /// assert_eq!(tree.args.len(), 1);
689 /// assert_eq!(tree.args[0].name, "Arg");
690 /// ```
691 pub fn parse_tree(
692 &self,
693 str: &str,
694 r#type: Option<&String>,
695 ) -> Option<Fun> {
696 let mut parser = AbstractSyntaxParser::new(str);
697 match parser.parse_expression() {
698 Ok(tree) => Some(self.annotate(tree, r#type)),
699 Err(_) => None,
700 }
701 }
702}
703
704////////////////////////////////////////////////////////////////////////////////
705/* ... rest of the code ... */
706
707/// Concrete syntax for linearizing abstract trees.
708///
709/// Defines realizations (linearizations) of the abstract grammar, which can be
710/// natural language, images, etc.
711///
712/// A concrete grammar maps abstract functions to linearization rules, which
713/// produce output forms based on the arguments.
714///
715/// # Example
716///
717/// ```gf
718/// concrete LangEng of Lang = {
719/// lincat Sentence = Str;
720/// lin MakeSentence n v = n ++ v;
721/// }
722/// ```
723#[derive(Clone, Debug)]
724pub struct GFConcrete {
725 /// Flags for concrete-specific settings.
726 pub flags: HashMap<String, String>,
727 /// Concrete functions.
728 functions: Vec<RuntimeCncFun>,
729 /// Start category ranges.
730 pub start_cats: HashMap<String, (i32, i32)>,
731 /// Total number of FIds.
732 pub total_fids: i32,
733 /// Parameter productions.
734 pub pproductions: HashMap<i32, Vec<Production>>,
735 /// Label productions.
736 lproductions: HashMap<String, Vec<LProduction>>,
737}
738
739impl GFConcrete {
740 /// Creates a new concrete grammar.
741 ///
742 /// # Arguments
743 /// * `flags` - Concrete-specific flags.
744 /// * `functions` - List of runtime concrete functions.
745 /// * `productions` - Map of productions by FId.
746 /// * `start_cats` - Start category ranges.
747 /// * `total_fids` - Total number of FIds.
748 ///
749 /// # Examples
750 ///
751 /// ```
752 /// use gf_core::{GFConcrete, RuntimeCncFun, LinType, Production};
753 /// use std::collections::HashMap;
754 /// let flags = HashMap::new();
755 /// let functions = vec![];
756 /// let productions = HashMap::new();
757 /// let start_cats = HashMap::new();
758 /// let total_fids = 0;
759 /// let concrete = GFConcrete::new(flags, functions, productions, start_cats, total_fids);
760 /// ```
761 pub fn new(
762 flags: HashMap<String, String>,
763 functions: Vec<RuntimeCncFun>,
764 productions: HashMap<i32, Vec<Production>>,
765 start_cats: HashMap<String, (i32, i32)>,
766 total_fids: i32,
767 ) -> Self {
768 let mut lproductions = HashMap::new();
769
770 #[allow(clippy::too_many_arguments)]
771 fn register_recursive(
772 args: &[PArg],
773 key: String,
774 i: usize,
775 lproductions: &mut HashMap<String, Vec<LProduction>>,
776 productions: &HashMap<i32, Vec<Production>>,
777 fun: &RuntimeCncFun,
778 fid: FId,
779 depth: usize,
780 ) {
781 if depth > 100 {
782 // Prevent stack overflow
783 return;
784 }
785 if i < args.len() {
786 let arg = args[i].fid;
787 let mut count = 0;
788
789 if let Some(rules) = productions.get(&arg) {
790 for rule in rules {
791 if let Production::Coerce(ref coerce_rule) = rule {
792 let new_key =
793 format!("{}_{}", key, coerce_rule.arg);
794 register_recursive(
795 args,
796 new_key,
797 i + 1,
798 lproductions,
799 productions,
800 fun,
801 fid,
802 depth + 1,
803 );
804 count += 1;
805 }
806 }
807 }
808
809 if count == 0 {
810 let new_key = format!("{key}_{arg}");
811 register_recursive(
812 args,
813 new_key,
814 i + 1,
815 lproductions,
816 productions,
817 fun,
818 fid,
819 depth + 1,
820 );
821 }
822 } else {
823 lproductions
824 .entry(key)
825 .or_default()
826 .push(LProduction { fun: fun.clone(), fid });
827 }
828 }
829
830 for (&fid, rules) in &productions {
831 for rule in rules {
832 if let Production::Apply(ref apply_rule) = rule {
833 match apply_rule.to_apply_fun() {
834 ApplyFun::FId(fun_id) => {
835 // For FId, we need to find the corresponding function name
836 // This is a simplified approach - in real usage we'd need better mapping
837 if (fun_id as usize) < functions.len() {
838 let fun = &functions[fun_id as usize];
839 register_recursive(
840 &apply_rule.args,
841 fun.name.clone(),
842 0,
843 &mut lproductions,
844 &productions,
845 fun,
846 fid,
847 0,
848 );
849 }
850 }
851 ApplyFun::CncFun(json_fun) => {
852 // Create a temporary runtime function for registration
853 let runtime_fun = RuntimeCncFun::new(
854 json_fun.name.clone(),
855 LinType::FId(json_fun.lins.clone()),
856 );
857 register_recursive(
858 &apply_rule.args,
859 runtime_fun.name.clone(),
860 0,
861 &mut lproductions,
862 &productions,
863 &runtime_fun,
864 fid,
865 0,
866 );
867 }
868 }
869 }
870 }
871 }
872
873 GFConcrete {
874 flags,
875 pproductions: productions,
876 functions,
877 start_cats,
878 total_fids,
879 lproductions,
880 }
881 }
882
883 /// Converts from JSON concrete to GFConcrete.
884 ///
885 /// This method deserializes a Concrete structure into a usable GFConcrete,
886 /// resolving sequence indices to actual symbols.
887 ///
888 /// # Arguments
889 /// * `json` - The Concrete structure from JSON.
890 pub fn from_json(json: Concrete) -> Self {
891 let productions: HashMap<i32, Vec<Production>> = json.productions;
892
893 let sequences: Vec<Vec<Sym>> = json.sequences;
894
895 let functions: Vec<RuntimeCncFun> = json
896 .functions
897 .into_iter()
898 .map(|f| {
899 // Convert Vec<i32> indices to actual symbol sequences
900 let lins = if f.lins.is_empty() {
901 LinType::Sym(vec![])
902 } else {
903 let symbol_sequences: Vec<Vec<Sym>> = f
904 .lins
905 .into_iter()
906 .map(|seq_idx| {
907 if seq_idx as usize >= sequences.len() {
908 vec![]
909 } else {
910 sequences[seq_idx as usize].clone()
911 }
912 })
913 .collect();
914 LinType::Sym(symbol_sequences)
915 };
916
917 RuntimeCncFun { name: f.name, lins }
918 })
919 .collect();
920
921 let start_cats = json
922 .categories
923 .into_iter()
924 .map(|(key, cat)| (key, (cat.start, cat.end)))
925 .collect();
926
927 GFConcrete::new(
928 json.flags,
929 functions,
930 productions,
931 start_cats,
932 json.totalfids,
933 )
934 }
935
936 /// Linearizes a tree into symbols with tags.
937 ///
938 /// Produces a vector of linearized symbols for the tree, handling different types
939 /// like metas, literals, and functions.
940 ///
941 /// # Arguments
942 /// * `tree` - The abstract tree to linearize.
943 /// * `tag` - The tag for tracking.
944 ///
945 /// # Returns
946 /// Vector of LinearizedSym.
947 pub fn linearize_syms(&self, tree: &Fun, tag: &str) -> Vec<LinearizedSym> {
948 let mut res = Vec::new();
949
950 if tree.is_string() {
951 let mut sym = SymKS::new(vec![tree.name.clone()]);
952 sym.tag = Some(tag.to_string());
953 res.push(LinearizedSym {
954 fid: -1,
955 table: vec![vec![Sym::SymKS(sym)]],
956 });
957 } else if tree.is_int() {
958 let mut sym = SymKS::new(vec![tree.name.clone()]);
959 sym.tag = Some(tag.to_string());
960 res.push(LinearizedSym {
961 fid: -2,
962 table: vec![vec![Sym::SymKS(sym)]],
963 });
964 } else if tree.is_float() {
965 let mut sym = SymKS::new(vec![tree.name.clone()]);
966 sym.tag = Some(tag.to_string());
967 res.push(LinearizedSym {
968 fid: -3,
969 table: vec![vec![Sym::SymKS(sym)]],
970 });
971 } else if tree.is_meta() {
972 let cat = self
973 .start_cats
974 .get(tree.type_.as_ref().unwrap_or(&String::new()))
975 .cloned()
976 .unwrap_or((0, 0));
977 let sym = Sym::SymKS(SymKS {
978 id: "KS".to_string(),
979 tokens: vec![tree.name.clone()],
980 tag: Some(tag.to_string()),
981 });
982
983 for fid in cat.0..=cat.1 {
984 res.push(LinearizedSym {
985 fid,
986 table: vec![vec![sym.clone()]],
987 });
988 }
989 } else {
990 let cs: Vec<LinearizedSym> = tree
991 .args
992 .iter()
993 .enumerate()
994 .filter_map(|(i, arg)| {
995 let syms = self.linearize_syms(arg, &format!("{tag}-{i}"));
996 syms.first().cloned()
997 })
998 .collect();
999
1000 let mut key = tree.name.clone();
1001 for c in &cs {
1002 if c.fid == -5 {
1003 if let Some((matched_key, _)) = self
1004 .lproductions
1005 .iter()
1006 .find(|(k, _)| k.contains(&tree.name))
1007 {
1008 key = matched_key.clone();
1009 }
1010 break;
1011 } else {
1012 key = format!("{}_{}", key, c.fid);
1013 }
1014 }
1015
1016 if let Some(rules) = self.lproductions.get(&key) {
1017 for rule in rules {
1018 let mut row =
1019 LinearizedSym { fid: rule.fid, table: Vec::new() };
1020
1021 match &rule.fun.lins {
1022 LinType::Sym(lins) => {
1023 for (j, lin) in lins.iter().enumerate() {
1024 let mut toks: Vec<Sym> = Vec::new();
1025 if j >= row.table.len() {
1026 row.table.push(Vec::new());
1027 }
1028
1029 for sym0 in lin {
1030 match sym0 {
1031 Sym::SymCat { i, .. }
1032 | Sym::SymLit { i, .. } => {
1033 if *i < cs.len()
1034 && j < cs[*i].table.len()
1035 {
1036 let ts = &cs[*i].table[j];
1037 toks.extend_from_slice(ts);
1038 }
1039 }
1040 Sym::SymKS(ks) => {
1041 toks.push(Sym::SymKS(
1042 ks.tag_with(tag),
1043 ));
1044 }
1045 Sym::SymKP(kp) => {
1046 toks.push(Sym::SymKP(
1047 kp.tag_with(tag),
1048 ));
1049 }
1050 }
1051 }
1052
1053 row.table[j] = toks;
1054 }
1055 }
1056 LinType::FId(_) => {
1057 // Handle FId case - create empty table
1058 row.table.push(Vec::new());
1059 }
1060 }
1061
1062 res.push(row);
1063 }
1064 }
1065 }
1066
1067 res
1068 }
1069
1070 /// Converts symbols to tagged tokens.
1071 ///
1072 /// Processes a sequence of symbols into tagged strings, handling KS and KP symbols.
1073 ///
1074 /// # Arguments
1075 /// * `syms` - Slice of symbols.
1076 ///
1077 /// # Returns
1078 /// Vector of TaggedString.
1079 pub fn syms2toks(&self, syms: &[Sym]) -> Vec<TaggedString> {
1080 let mut ts = Vec::new();
1081
1082 for i in 0..syms.len() {
1083 match &syms[i] {
1084 Sym::SymKS(sym) => {
1085 if let Some(tag) = &sym.tag {
1086 for token in &sym.tokens {
1087 ts.push(TaggedString::new(token, tag));
1088 }
1089 }
1090 }
1091 Sym::SymKP(sym) => {
1092 let mut added_alt = false;
1093
1094 if i + 1 < syms.len() {
1095 if let Sym::SymKS(next_sym) = &syms[i + 1] {
1096 if let Some(next_token) = next_sym.tokens.first() {
1097 for alt in &sym.alts {
1098 if alt
1099 .prefixes
1100 .iter()
1101 .any(|p| next_token.starts_with(p))
1102 {
1103 for symks in &alt.tokens {
1104 if let Some(tag) = &sym.tag {
1105 for token in &symks.tokens {
1106 ts.push(
1107 TaggedString::new(
1108 token, tag,
1109 ),
1110 );
1111 }
1112 }
1113 }
1114 added_alt = true;
1115 break;
1116 }
1117 }
1118 }
1119 }
1120 }
1121
1122 if !added_alt {
1123 if let Some(tag) = &sym.tag {
1124 for symks in &sym.tokens {
1125 for token in &symks.tokens {
1126 ts.push(TaggedString::new(token, tag));
1127 }
1128 }
1129 }
1130 }
1131 }
1132 _ => {} // Ignore non-token symbols
1133 }
1134 }
1135
1136 ts
1137 }
1138
1139 /// Linearizes a tree to all possible strings.
1140 ///
1141 /// # Arguments
1142 /// * `tree` - The tree to linearize.
1143 ///
1144 /// # Returns
1145 /// Vector of all possible linearizations.
1146 pub fn linearize_all(&self, tree: &Fun) -> Vec<String> {
1147 self.linearize_syms(tree, "0")
1148 .into_iter()
1149 .map(|r| self.unlex(&self.syms2toks(&r.table[0])))
1150 .collect()
1151 }
1152
1153 /// Linearizes a tree to a single string (first variant).
1154 ///
1155 /// # Arguments
1156 /// * `tree` - The tree to linearize.
1157 ///
1158 /// # Returns
1159 /// The linearized string.
1160 pub fn linearize(&self, tree: &Fun) -> String {
1161 debug_println!("Linearizing tree: {}", tree.name);
1162 let res = self.linearize_syms(tree, "0");
1163 debug_println!("Generated {} linearized symbol row(s)", res.len());
1164 if !res.is_empty() {
1165 let result = self.unlex(&self.syms2toks(&res[0].table[0]));
1166 debug_println!("Linearization result: '{}'", result);
1167 result
1168 } else {
1169 debug_println!("No linearization result generated");
1170 String::new()
1171 }
1172 }
1173
1174 /// Linearizes a tree with tags.
1175 ///
1176 /// # Arguments
1177 /// * `tree` - The tree to linearize.
1178 ///
1179 /// # Returns
1180 /// Vector of tagged strings.
1181 pub fn tag_and_linearize(&self, tree: &Fun) -> Vec<TaggedString> {
1182 let res = self.linearize_syms(tree, "0");
1183 if !res.is_empty() {
1184 self.syms2toks(&res[0].table[0])
1185 } else {
1186 Vec::new()
1187 }
1188 }
1189
1190 /// Joins tagged strings into a single string, handling spacing.
1191 ///
1192 /// Applies rules for when to insert spaces between tokens.
1193 ///
1194 /// # Arguments
1195 /// * `ts` - Slice of tagged strings.
1196 ///
1197 /// # Returns
1198 /// The joined string.
1199 fn unlex(&self, ts: &[TaggedString]) -> String {
1200 if ts.is_empty() {
1201 return String::new();
1202 }
1203
1204 let no_space_after = Regex::new(r"^[\(\-\[]").unwrap();
1205 let no_space_before = Regex::new(r"^[\.\,\?\!\)\:\;\-\]]").unwrap();
1206
1207 let mut s = String::new();
1208
1209 for i in 0..ts.len() {
1210 let t = &ts[i].token;
1211 s.push_str(t);
1212
1213 if i + 1 < ts.len() {
1214 let after = &ts[i + 1].token;
1215 if !no_space_after.is_match(t)
1216 && !no_space_before.is_match(after)
1217 {
1218 s.push(' ');
1219 }
1220 }
1221 }
1222
1223 s
1224 }
1225
1226 /// Tokenizes input string by whitespace.
1227 ///
1228 /// # Arguments
1229 /// * `input` - The input string.
1230 ///
1231 /// # Returns
1232 /// Vector of tokens.
1233 ///
1234 fn tokenize(&self, input: &str) -> Vec<String> {
1235 input.split_whitespace().map(String::from).collect()
1236 }
1237
1238 /// Parses a string into trees using the given start category.
1239 ///
1240 /// # Arguments
1241 /// * `input` - The input string.
1242 /// * `cat` - The start category.
1243 ///
1244 /// # Returns
1245 /// Vector of parsed trees.
1246 pub fn parse_string(&self, input: &str, cat: &str) -> Vec<Fun> {
1247 debug_println!("Parsing string '{}' with start category '{}'", input, cat);
1248 let tokens = self.tokenize(input);
1249 debug_println!("Tokenized input into {} tokens: {:?}", tokens.len(), tokens);
1250
1251 let mut ps = ParseState::new(self.clone(), cat.to_string());
1252 for (i, token) in tokens.iter().enumerate() {
1253 debug_println!("Processing token {} of {}: '{}'", i + 1, tokens.len(), token);
1254 if !ps.next(token) {
1255 debug_println!("Parsing failed at token '{}'", token);
1256 return Vec::new();
1257 }
1258 }
1259
1260 let trees = ps.extract_trees();
1261 debug_println!("Extracted {} parse tree(s)", trees.len());
1262 trees
1263 }
1264
1265 /// Provides completions for partial input.
1266 ///
1267 /// # Arguments
1268 /// * `input` - The partial input string.
1269 /// * `cat` - The start category.
1270 ///
1271 /// # Returns
1272 /// CompletionResult with consumed tokens and suggestions.
1273 pub fn complete(&self, input: &str, cat: &str) -> CompletionResult {
1274 let mut tokens: Vec<String> = input
1275 .split_whitespace()
1276 .filter(|t| !t.is_empty())
1277 .map(|t| t.to_string())
1278 .collect();
1279
1280 let current = tokens.pop().unwrap_or_default();
1281
1282 let mut ps = ParseState::new(self.clone(), cat.to_string());
1283 let mut ps2 = ParseState::new(self.clone(), cat.to_string());
1284
1285 for token in &tokens {
1286 if !ps.next(token) {
1287 return CompletionResult {
1288 consumed: vec![],
1289 suggestions: vec![],
1290 };
1291 }
1292 ps2.next(token);
1293 }
1294
1295 let mut current = current;
1296 if ps2.next(¤t) {
1297 ps.next(¤t);
1298 tokens.push(current.clone());
1299 current = String::new();
1300 }
1301
1302 let acc = ps.complete(¤t);
1303 let mut suggestions = Vec::new();
1304
1305 if let Some(items) = acc.value {
1306 for a in items {
1307 for s in &a.seq {
1308 match s {
1309 Sym::SymKS(sym) => {
1310 for t in &sym.tokens {
1311 suggestions.push(t.clone());
1312 }
1313 }
1314 Sym::SymKP(sym) => {
1315 for symks in &sym.tokens {
1316 for t in &symks.tokens {
1317 suggestions.push(t.clone());
1318 }
1319 }
1320 }
1321 _ => {}
1322 }
1323 }
1324 }
1325 }
1326
1327 CompletionResult { consumed: tokens, suggestions }
1328 }
1329}
1330
1331////////////////////////////////////////////////////////////////////////////////
1332/// Prefix tree for parsing items.
1333///
1334/// Used to efficiently store and retrieve sequences of active items during parsing.
1335#[derive(Debug, Clone)]
1336pub struct Trie<T> {
1337 /// Value at node.
1338 value: Option<Vec<T>>,
1339 /// Child nodes.
1340 items: HashMap<String, Trie<T>>,
1341}
1342
1343impl<T: Clone> Default for Trie<T> {
1344 fn default() -> Self {
1345 Self::new()
1346 }
1347}
1348
1349impl<T: Clone> Trie<T> {
1350 /// Creates a new trie.
1351 ///
1352 /// # Examples
1353 ///
1354 /// ```
1355 /// use gf_core::Trie;
1356 /// let trie: Trie<i32> = Trie::new();
1357 /// assert!(trie.is_empty());
1358 /// ```
1359 pub fn new() -> Self {
1360 Trie { value: None, items: HashMap::new() }
1361 }
1362
1363 /// Inserts a chain of keys with value.
1364 ///
1365 /// # Arguments
1366 /// * `keys` - Slice of keys.
1367 /// * `obj` - Value to insert.
1368 pub fn insert_chain(&mut self, keys: &[String], obj: Vec<T>) {
1369 let mut node = self;
1370 for key in keys {
1371 node = node.items.entry(key.clone()).or_default();
1372 }
1373 node.value = Some(obj);
1374 }
1375
1376 /// Inserts a chain with single item.
1377 ///
1378 /// # Arguments
1379 /// * `keys` - Slice of keys.
1380 /// * `obj` - Single item to insert.
1381 ///
1382 /// # Examples
1383 ///
1384 /// ```
1385 /// use gf_core::Trie;
1386 /// let mut trie: Trie<i32> = Trie::new();
1387 /// trie.insert_chain1(&["a".to_string(), "b".to_string()], 42);
1388 /// // Verify the key path exists
1389 /// assert!(trie.lookup("a").is_some());
1390 /// assert!(trie.lookup("a").unwrap().lookup("b").is_some());
1391 /// ```
1392 pub fn insert_chain1(&mut self, keys: &[String], obj: T) {
1393 let mut node = self;
1394 for key in keys {
1395 node = node.items.entry(key.clone()).or_default();
1396 }
1397 if let Some(value) = &mut node.value {
1398 value.push(obj);
1399 } else {
1400 node.value = Some(vec![obj]);
1401 }
1402 }
1403
1404 /// Looks up a key.
1405 ///
1406 /// # Arguments
1407 /// * `key` - The key to look up.
1408 ///
1409 /// # Returns
1410 /// Optional reference to the sub-trie.
1411 pub fn lookup(&self, key: &str) -> Option<&Trie<T>> {
1412 self.items.get(key)
1413 }
1414
1415 /// Checks if trie is empty.
1416 ///
1417 /// # Returns
1418 /// True if no value and no items.
1419 ///
1420 /// # Examples
1421 ///
1422 /// ```
1423 /// use gf_core::Trie;
1424 /// let trie: Trie<i32> = Trie::new();
1425 /// assert!(trie.is_empty());
1426 /// ```
1427 pub fn is_empty(&self) -> bool {
1428 self.value.is_none() && self.items.is_empty()
1429 }
1430}
1431
1432////////////////////////////////////////////////////////////////////////////////
1433/// Chart for parsing.
1434///
1435/// Manages active and passive items during chart parsing.
1436#[derive(Debug, Clone)]
1437pub struct Chart {
1438 /// Active items by FId and label.
1439 active: HashMap<FId, HashMap<i32, Vec<ActiveItem>>>,
1440 /// Active items by offset.
1441 actives: Vec<HashMap<FId, HashMap<i32, Vec<ActiveItem>>>>,
1442 /// Passive FIds by key.
1443 passive: HashMap<String, FId>,
1444 /// Forest of productions.
1445 pub forest: HashMap<FId, Vec<Production>>,
1446 /// Next ID.
1447 pub next_id: FId,
1448 /// Current offset.
1449 pub offset: usize,
1450}
1451
1452impl Chart {
1453 /// Creates a new chart from concrete.
1454 ///
1455 /// Initializes the forest with productions from the concrete grammar.
1456 ///
1457 /// # Arguments
1458 /// * `concrete` - The concrete grammar.
1459 pub fn new(concrete: GFConcrete) -> Self {
1460 let mut forest = HashMap::new();
1461 for (&fid, prods) in &concrete.pproductions {
1462 forest.insert(fid, prods.clone());
1463 }
1464 Chart {
1465 active: HashMap::new(),
1466 actives: vec![],
1467 passive: HashMap::new(),
1468 forest,
1469 next_id: concrete.total_fids,
1470 offset: 0,
1471 }
1472 }
1473
1474 /// Looks up active items.
1475 ///
1476 /// # Arguments
1477 /// * `fid` - FId.
1478 /// * `label` - Label.
1479 ///
1480 /// # Returns
1481 /// Optional reference to vector of active items.
1482 pub fn lookup_ac(&self, fid: FId, label: i32) -> Option<&Vec<ActiveItem>> {
1483 self.active.get(&fid).and_then(|m| m.get(&label))
1484 }
1485
1486 /// Looks up active items by offset.
1487 ///
1488 /// # Arguments
1489 /// * `offset` - Offset.
1490 /// * `fid` - FId.
1491 /// * `label` - Label.
1492 ///
1493 /// # Returns
1494 /// Optional reference to vector of active items.
1495 pub fn lookup_aco(
1496 &self,
1497 offset: usize,
1498 fid: FId,
1499 label: i32,
1500 ) -> Option<&Vec<ActiveItem>> {
1501 if offset == self.offset {
1502 self.lookup_ac(fid, label)
1503 } else {
1504 self.actives.get(offset)?.get(&fid).and_then(|m| m.get(&label))
1505 }
1506 }
1507
1508 /// Gets labels for active FId.
1509 ///
1510 /// # Arguments
1511 /// * `fid` - FId.
1512 ///
1513 /// # Returns
1514 /// Optional vector of labels.
1515 pub fn labels_ac(&self, fid: FId) -> Option<Vec<i32>> {
1516 self.active.get(&fid).map(|m| m.keys().cloned().collect())
1517 }
1518
1519 /// Inserts active items.
1520 ///
1521 /// # Arguments
1522 /// * `fid` - FId.
1523 /// * `label` - Label.
1524 /// * `items` - Vector of active items.
1525 pub fn insert_ac(&mut self, fid: FId, label: i32, items: Vec<ActiveItem>) {
1526 self.active.entry(fid).or_default().insert(label, items);
1527 }
1528
1529 /// Looks up passive FId.
1530 ///
1531 /// # Arguments
1532 /// * `fid` - FId.
1533 /// * `label` - Label.
1534 /// * `offset` - Offset.
1535 ///
1536 /// # Returns
1537 /// Optional FId.
1538 pub fn lookup_pc(
1539 &self,
1540 fid: FId,
1541 label: i32,
1542 offset: usize,
1543 ) -> Option<FId> {
1544 let key = format!("{fid}.{label}-{offset}");
1545 self.passive.get(&key).cloned()
1546 }
1547
1548 /// Inserts passive FId.
1549 ///
1550 /// # Arguments
1551 /// * `fid` - FId.
1552 /// * `label` - Label.
1553 /// * `offset` - Offset.
1554 /// * `fid2` - FId to insert.
1555 pub fn insert_pc(
1556 &mut self,
1557 fid: FId,
1558 label: i32,
1559 offset: usize,
1560 fid2: FId,
1561 ) {
1562 let key = format!("{fid}.{label}-{offset}");
1563 self.passive.insert(key, fid2);
1564 }
1565
1566 /// Shifts the chart to next offset.
1567 ///
1568 /// Moves current active to actives and clears current active and passive.
1569 pub fn shift(&mut self) {
1570 self.actives.push(self.active.clone());
1571 self.active.clear();
1572 self.passive.clear();
1573 self.offset += 1;
1574 }
1575
1576 /// Expands forest for FId.
1577 ///
1578 /// Recursively expands coercions to get all applicable productions.
1579 ///
1580 /// # Arguments
1581 /// * `fid` - FId.
1582 ///
1583 /// # Returns
1584 /// Vector of productions.
1585 pub fn expand_forest(&self, fid: FId) -> Vec<Production> {
1586 let mut rules = Vec::new();
1587
1588 fn go(
1589 forest: &HashMap<FId, Vec<Production>>,
1590 fid: FId,
1591 rules: &mut Vec<Production>,
1592 ) {
1593 if let Some(prods) = forest.get(&fid) {
1594 for prod in prods {
1595 match prod {
1596 Production::Apply(apply) => {
1597 rules.push(Production::Apply(apply.clone()))
1598 }
1599 Production::Coerce(coerce) => {
1600 go(forest, coerce.arg, rules)
1601 }
1602 Production::Const(const_) => {
1603 rules.push(Production::Const(const_.clone()))
1604 }
1605 }
1606 }
1607 }
1608 }
1609
1610 go(&self.forest, fid, &mut rules);
1611 rules
1612 }
1613}
1614
1615////////////////////////////////////////////////////////////////////////////////
1616/// Parsing state.
1617///
1618/// Maintains the current state of parsing, including the trie of items and the chart.
1619#[derive(Debug, Clone)]
1620pub struct ParseState {
1621 /// Concrete grammar.
1622 concrete: GFConcrete,
1623 /// Start category.
1624 start_cat: String,
1625 /// Trie of active items.
1626 items: Trie<ActiveItem>,
1627 /// Chart.
1628 chart: Chart,
1629}
1630
1631impl ParseState {
1632 /// Processes items with callbacks.
1633 ///
1634 /// Advances the parsing agenda using provided callbacks for literals and tokens.
1635 ///
1636 /// # Arguments
1637 /// * `agenda` - Mutable vector of active items.
1638 /// * `literal_callback` - Callback for literals.
1639 /// * `token_callback` - Callback for tokens.
1640 pub fn process<F, G>(
1641 &mut self,
1642 agenda: &mut Vec<ActiveItem>,
1643 literal_callback: F,
1644 mut token_callback: G,
1645 ) where
1646 F: Fn(FId) -> Option<Const>,
1647 G: FnMut(&[String], ActiveItem),
1648 {
1649 while let Some(item) = agenda.pop() {
1650 if item.dot < item.seq.len() {
1651 let sym = &item.seq[item.dot];
1652 match sym {
1653 Sym::SymCat { i, label } => {
1654 let fid = item.args[*i].fid;
1655
1656 // Follow TypeScript implementation logic exactly
1657 if let Some(items) = self.chart.lookup_ac(fid, *label as i32) {
1658 // There are already waiting items for this SymCat
1659 if !items.contains(&item) {
1660 let mut items = items.clone();
1661 items.push(item.clone());
1662 self.chart.insert_ac(fid, *label as i32, items);
1663
1664 // Check if there's already a completion available
1665 if let Some(fid2) = self.chart.lookup_pc(fid, *label as i32, self.chart.offset) {
1666 // There's already a completion - advance immediately
1667 agenda.push(item.shift_over_arg(*i, fid2));
1668 }
1669 }
1670 } else {
1671 // No existing waiting items - create new parsing items AND add current item as waiting
1672 let rules = self.chart.expand_forest(fid);
1673 for rule in rules {
1674 if let Production::Apply(apply) = rule {
1675 match apply.to_apply_fun() {
1676 ApplyFun::CncFun(json_fun) => {
1677 for &lin_idx in &json_fun.lins {
1678 if (lin_idx as usize) < self.concrete.functions.len() {
1679 let runtime_fun = self.concrete.functions[lin_idx as usize].clone();
1680 match &runtime_fun.lins {
1681 LinType::Sym(lins) => {
1682 for (lbl, lin) in lins.iter().enumerate() {
1683 if lbl == *label { // Only create items for the specific label
1684 let new_item = ActiveItem::new(
1685 self.chart.offset,
1686 0,
1687 runtime_fun.clone(),
1688 lin.clone(),
1689 apply.args.clone(),
1690 fid,
1691 lbl as i32,
1692 );
1693
1694 // Check for duplicates to prevent infinite recursion
1695 if !agenda.contains(&new_item) {
1696 agenda.push(new_item);
1697 }
1698 }
1699 }
1700 }
1701 LinType::FId(_) => {
1702 let new_item = ActiveItem::new(
1703 self.chart.offset,
1704 0,
1705 runtime_fun.clone(),
1706 vec![],
1707 apply.args.clone(),
1708 fid,
1709 0,
1710 );
1711
1712 // Check for duplicates to prevent infinite recursion
1713 if !agenda.contains(&new_item) {
1714 agenda.push(new_item);
1715 }
1716 }
1717 }
1718 }
1719 }
1720 }
1721 ApplyFun::FId(fun_id) => {
1722 if (fun_id as usize) < self.concrete.functions.len() {
1723 let runtime_fun = self.concrete.functions[fun_id as usize].clone();
1724 match &runtime_fun.lins {
1725 LinType::Sym(lins) => {
1726 for (lbl, lin) in lins.iter().enumerate() {
1727 if lbl == *label { // Only create items for the specific label
1728 let new_item = ActiveItem::new(
1729 self.chart.offset,
1730 0,
1731 runtime_fun.clone(),
1732 lin.clone(),
1733 apply.args.clone(),
1734 fid,
1735 lbl as i32,
1736 );
1737
1738 // Check for duplicates to prevent infinite recursion
1739 if !agenda.contains(&new_item) {
1740 agenda.push(new_item);
1741 }
1742 }
1743 }
1744 }
1745 LinType::FId(_) => {
1746 let new_item = ActiveItem::new(
1747 self.chart.offset,
1748 0,
1749 runtime_fun.clone(),
1750 vec![],
1751 apply.args.clone(),
1752 fid,
1753 0,
1754 );
1755
1756 // Check for duplicates to prevent infinite recursion
1757 if !agenda.contains(&new_item) {
1758 agenda.push(new_item);
1759 }
1760 }
1761 }
1762 }
1763 }
1764 }
1765 }
1766 }
1767
1768 // Store current item as waiting for this SymCat completion
1769 self.chart.insert_ac(fid, *label as i32, vec![item.clone()]);
1770 }
1771 }
1772 Sym::SymKS(sym) => {
1773 token_callback(&sym.tokens, item.shift_over_token())
1774 }
1775 Sym::SymKP(sym) => {
1776 let pitem = item.shift_over_token();
1777 for symks in &sym.tokens {
1778 token_callback(&symks.tokens, pitem.clone());
1779 }
1780 for alt in &sym.alts {
1781 for symks in &alt.tokens {
1782 token_callback(&symks.tokens, pitem.clone());
1783 }
1784 }
1785 }
1786 Sym::SymLit { i, .. } => {
1787 let fid = item.args[*i].fid;
1788 if let Some(rules) = self.chart.forest.get(&fid) {
1789 if let Some(Production::Const(const_rule)) =
1790 rules.first()
1791 {
1792 token_callback(
1793 &const_rule.toks,
1794 item.shift_over_token(),
1795 );
1796 }
1797 } else if let Some(rule) = literal_callback(fid) {
1798 let new_fid = self.chart.next_id;
1799 self.chart.next_id += 1;
1800 self.chart.forest.insert(
1801 new_fid,
1802 vec![Production::Const(rule.clone())],
1803 );
1804 token_callback(
1805 &rule.toks,
1806 item.shift_over_arg(*i, new_fid),
1807 );
1808 }
1809 }
1810 }
1811 } else {
1812 // Item has completed parsing (dot >= seq.len)
1813 let fid = self
1814 .chart
1815 .lookup_pc(item.fid, item.lbl, item.offset)
1816 .unwrap_or_else(|| {
1817 let new_fid = self.chart.next_id;
1818 self.chart.next_id += 1;
1819
1820 self.chart.insert_pc(
1821 item.fid,
1822 item.lbl,
1823 item.offset,
1824 new_fid,
1825 );
1826
1827 // Record the completion in the forest
1828 let apply = Apply {
1829 fid: None,
1830 fun: Some(CncFun::new(
1831 item.fun.name.clone(),
1832 vec![],
1833 )),
1834 args: item.args.clone(),
1835 };
1836 self.chart
1837 .forest
1838 .insert(new_fid, vec![Production::Apply(apply)]);
1839
1840 new_fid
1841 });
1842
1843 // Check for any active items that were waiting for this completion
1844 // Use lookup_aco with the item's original offset (like TypeScript)
1845 if let Some(waiting_items) = self.chart.lookup_aco(item.offset, item.fid, item.lbl) {
1846 for waiting_item in waiting_items.clone() {
1847 if let Some(sym_cat) = waiting_item.seq.get(waiting_item.dot) {
1848 if let Sym::SymCat { i, .. } = sym_cat {
1849 agenda.push(waiting_item.shift_over_arg(*i, fid));
1850 }
1851 }
1852 }
1853 }
1854
1855 // Also check for any labels that might need processing
1856 if let Some(labels) = self.chart.labels_ac(fid) {
1857 for lbl in labels {
1858 let seq = match &item.fun.lins {
1859 LinType::Sym(syms) => {
1860 if (lbl as usize) < syms.len() {
1861 syms[lbl as usize].clone()
1862 } else {
1863 vec![]
1864 }
1865 }
1866 LinType::FId(_) => vec![],
1867 };
1868 agenda.push(ActiveItem::new(
1869 self.chart.offset,
1870 0,
1871 item.fun.clone(),
1872 seq,
1873 item.args.clone(),
1874 fid,
1875 lbl,
1876 ));
1877 }
1878 }
1879 }
1880 }
1881 }
1882
1883 /// Creates a new parse state.
1884 ///
1885 /// Initializes the parse state with starting active items based on the start category.
1886 ///
1887 /// # Arguments
1888 /// * `concrete` - The concrete grammar.
1889 /// * `start_cat` - The start category.
1890 /// There are five inference rules:
1891 /// Pre-Predict, Pre-Combine,
1892 /// Mark-Predict, Mark-Combine and Convert.
1893 pub fn new(concrete: GFConcrete, start_cat: String) -> Self {
1894 let mut items = Trie::new();
1895 let chart = Chart::new(concrete.clone());
1896
1897 let mut active_items = Vec::new();
1898
1899 debug_println!("ParseState::new: Looking for start category '{}'", start_cat);
1900 debug_println!("Available start categories: {:?}", concrete.start_cats.keys().collect::<Vec<_>>());
1901
1902 if let Some((start, end)) = concrete.start_cats.get(&start_cat) {
1903 debug_println!("Found start category '{}' with FID range: {} to {}", start_cat, start, end);
1904 for fid in *start..=*end {
1905 let rules = chart.expand_forest(fid);
1906 debug_println!(" FID {}: found {} rules", fid, rules.len());
1907 for rule in rules {
1908 if let Production::Apply(apply) = rule {
1909 match apply.to_apply_fun() {
1910 ApplyFun::CncFun(json_fun) => {
1911 // Find the actual RuntimeCncFun that corresponds to this CncFun
1912 // The json_fun.lins contains indices into the concrete.functions array
1913 for &lin_idx in &json_fun.lins {
1914 if (lin_idx as usize) < concrete.functions.len() {
1915 let runtime_fun = concrete.functions[lin_idx as usize].clone();
1916 match &runtime_fun.lins {
1917 LinType::Sym(lins) => {
1918 for (lbl, lin) in lins.iter().enumerate() {
1919 active_items.push(ActiveItem::new(
1920 0,
1921 0,
1922 runtime_fun.clone(),
1923 lin.clone(),
1924 apply.args.clone(),
1925 fid,
1926 lbl as i32,
1927 ));
1928 }
1929 }
1930 LinType::FId(_) => {
1931 active_items.push(ActiveItem::new(
1932 0,
1933 0,
1934 runtime_fun.clone(),
1935 vec![],
1936 apply.args.clone(),
1937 fid,
1938 0,
1939 ));
1940 }
1941 }
1942 }
1943 }
1944 }
1945 ApplyFun::FId(fun_id) => {
1946 if (fun_id as usize) < concrete.functions.len()
1947 {
1948 let runtime_fun = concrete.functions
1949 [fun_id as usize]
1950 .clone();
1951 match &runtime_fun.lins {
1952 LinType::Sym(lins) => {
1953 for (lbl, lin) in
1954 lins.iter().enumerate()
1955 {
1956 active_items.push(
1957 ActiveItem::new(
1958 0,
1959 0,
1960 runtime_fun.clone(),
1961 lin.clone(),
1962 apply.args.clone(),
1963 fid,
1964 lbl as i32,
1965 ),
1966 );
1967 }
1968 }
1969 LinType::FId(_) => {
1970 active_items.push(
1971 ActiveItem::new(
1972 0,
1973 0,
1974 runtime_fun,
1975 vec![],
1976 apply.args.clone(),
1977 fid,
1978 0,
1979 ),
1980 );
1981 }
1982 }
1983 }
1984 }
1985 }
1986 }
1987 }
1988 }
1989 }
1990
1991 debug_println!("ParseState::new: Created {} initial active items", active_items.len());
1992 for (i, item) in active_items.iter().enumerate() {
1993 debug_println!(" Item {}: fun={}, seq_len={}, fid={}, lbl={}",
1994 i, item.fun.name, item.seq.len(), item.fid, item.lbl);
1995 if !item.seq.is_empty() {
1996 debug_println!(" First symbol: {:?}", item.seq[0]);
1997 }
1998 }
1999
2000 items.insert_chain(&[], active_items);
2001
2002 ParseState { concrete, start_cat, items, chart }
2003 }
2004
2005 /// Advances parsing with next token.
2006 ///
2007 /// # Arguments
2008 /// * `token` - The next token.
2009 ///
2010 /// # Returns
2011 /// True if parsing can continue, false if failed.
2012 pub fn next(&mut self, token: &str) -> bool {
2013 let mut acc =
2014 self.items.lookup(token).cloned().unwrap_or_else(Trie::new);
2015
2016 let mut agenda = self.items.value.clone().unwrap_or_default();
2017
2018 let mut completed_items = Vec::new();
2019
2020 self.process(
2021 &mut agenda,
2022 |fid| match fid {
2023 -1 => Some(Const::new(
2024 Fun::new(format!("\"{token}\""), vec![]),
2025 vec![token.to_string()],
2026 )),
2027 -2 if token.parse::<i32>().is_ok() => Some(Const::new(
2028 Fun::new(token.to_string(), vec![]),
2029 vec![token.to_string()],
2030 )),
2031 -3 if token.parse::<f64>().is_ok() => Some(Const::new(
2032 Fun::new(token.to_string(), vec![]),
2033 vec![token.to_string()],
2034 )),
2035 _ => None,
2036 },
2037 |tokens, item| {
2038 if tokens.first() == Some(&token.to_string()) {
2039 let tokens1 = tokens[1..].to_vec();
2040
2041 // Check if this item is now complete (dot >= seq.len)
2042 if item.dot >= item.seq.len() {
2043 completed_items.push(item.clone());
2044 }
2045
2046 acc.insert_chain1(&tokens1, item);
2047 }
2048 },
2049 );
2050
2051 // Process any completed items
2052 if !completed_items.is_empty() {
2053 agenda.extend(completed_items);
2054 self.process(
2055 &mut agenda,
2056 |_| None, // No literal callback needed
2057 |_, _| {}, // No token callback needed
2058 );
2059 }
2060
2061 self.items = acc;
2062 self.chart.shift();
2063
2064 !self.items.is_empty()
2065 }
2066
2067 /// Gets completions for partial token.
2068 ///
2069 /// # Arguments
2070 /// * `current_token` - The partial current token.
2071 ///
2072 /// # Returns
2073 /// CompletionAccumulator with possible completions.
2074 pub fn complete(&self, current_token: &str) -> CompletionAccumulator {
2075 let mut acc = self
2076 .items
2077 .lookup(current_token)
2078 .cloned()
2079 .unwrap_or_else(Trie::new);
2080
2081 let mut agenda = self.items.value.clone().unwrap_or_default();
2082
2083 let mut clone_self = self.clone();
2084 clone_self.process(
2085 &mut agenda,
2086 |_| None,
2087 |tokens, item| {
2088 if current_token.is_empty()
2089 || tokens
2090 .first()
2091 .is_some_and(|t| t.starts_with(current_token))
2092 {
2093 let tokens1 = tokens[1..].to_vec();
2094 acc.insert_chain1(&tokens1, item);
2095 }
2096 },
2097 );
2098
2099 CompletionAccumulator { value: acc.value }
2100 }
2101
2102 /// Extracts parsed trees.
2103 ///
2104 /// Reconstructs abstract trees from the chart after parsing.
2105 ///
2106 /// # Returns
2107 /// Vector of unique parsed trees.
2108 pub fn extract_trees(&self) -> Vec<Fun> {
2109 let total_fids = self.concrete.total_fids;
2110 let forest = &self.chart.forest;
2111
2112 fn go(
2113 fid: FId,
2114 total_fids: FId,
2115 forest: &HashMap<FId, Vec<Production>>,
2116 ) -> Vec<Fun> {
2117 if fid < total_fids {
2118 vec![Fun::new("?".to_string(), vec![])]
2119 } else if let Some(rules) = forest.get(&fid) {
2120 let mut trees = Vec::new();
2121 for rule in rules {
2122 match rule {
2123 Production::Const(c) => trees.push(c.lit.clone()),
2124 Production::Apply(a) => {
2125 let arg_trees: Vec<Vec<Fun>> = a
2126 .args
2127 .iter()
2128 .map(|arg| go(arg.fid, total_fids, forest))
2129 .collect();
2130 let mut indices = vec![0; a.args.len()];
2131 loop {
2132 let mut t = Fun::new(a.get_name(), vec![]);
2133 for (k, idx) in indices.iter().enumerate() {
2134 t.args.push(arg_trees[k][*idx].clone());
2135 }
2136 trees.push(t);
2137
2138 let mut carry = true;
2139 for i in 0..indices.len() {
2140 if carry {
2141 indices[i] += 1;
2142 if indices[i] < arg_trees[i].len() {
2143 carry = false;
2144 } else {
2145 indices[i] = 0;
2146 }
2147 }
2148 }
2149 if carry {
2150 break;
2151 }
2152 }
2153 }
2154 _ => {}
2155 }
2156 }
2157 trees
2158 } else {
2159 vec![]
2160 }
2161 }
2162
2163 let mut trees = Vec::new();
2164
2165 if let Some((start, end)) =
2166 self.concrete.start_cats.get(&self.start_cat)
2167 {
2168
2169 for fid0 in *start..=*end {
2170 let rules = self.chart.expand_forest(fid0);
2171
2172 let mut labels = vec![];
2173 for rule in &rules {
2174 if let Production::Apply(a) = rule {
2175 match a.to_apply_fun() {
2176 ApplyFun::CncFun(fun) => {
2177 // JSON CncFun has Vec<i32> lins, each one is an index
2178 labels.extend(0..fun.lins.len() as i32);
2179 }
2180 ApplyFun::FId(fun_id) => {
2181 if (fun_id as usize)
2182 < self.concrete.functions.len()
2183 {
2184 let runtime_fun = &self.concrete.functions
2185 [fun_id as usize];
2186 match &runtime_fun.lins {
2187 LinType::Sym(lins) => {
2188 labels
2189 .extend(0..lins.len() as i32);
2190 }
2191 LinType::FId(indices) => {
2192 labels.extend(
2193 0..indices.len() as i32,
2194 );
2195 }
2196 }
2197 }
2198 }
2199 }
2200 }
2201 }
2202
2203 for lbl in labels {
2204 let mut found = false;
2205
2206 // Try different offsets since shift() clears the passive chart
2207 for try_offset in 0..=self.chart.offset {
2208 if let Some(fid) = self.chart.lookup_pc(fid0, lbl, try_offset) {
2209 let arg_trees = go(fid, total_fids, forest);
2210 for tree in arg_trees {
2211 if !trees.contains(&tree) {
2212 trees.push(tree);
2213 }
2214 }
2215 found = true;
2216 break;
2217 }
2218 }
2219
2220 if !found {
2221 // As a fallback, look directly in the forest for any completions
2222 // Since shift() clears the passive chart, we need to find completions in the forest
2223 for (&forest_fid, productions) in forest.iter() {
2224 if forest_fid >= total_fids {
2225 for production in productions {
2226 if let Production::Apply(apply) = production {
2227 let name = apply.get_name();
2228
2229 // Accept any completion that looks like a valid function name
2230 if !name.is_empty() && !name.chars().all(|c| c.is_ascii_digit()) {
2231 let arg_trees = go(forest_fid, total_fids, forest);
2232 for tree in arg_trees {
2233 if !trees.contains(&tree) {
2234 trees.push(tree);
2235 }
2236 }
2237 }
2238 }
2239 }
2240 }
2241 }
2242 }
2243 }
2244 }
2245 }
2246
2247 trees
2248 }
2249}
2250
2251////////////////////////////////////////////////////////////////////////////////
2252/// Runtime concrete function with actual symbol sequences for linearization.
2253#[derive(Debug, Clone, PartialEq)]
2254pub struct RuntimeCncFun {
2255 /// Function name.
2256 pub name: String,
2257 /// Linearization sequences (converted from indices to actual symbols).
2258 pub lins: LinType,
2259}
2260
2261impl RuntimeCncFun {
2262 /// Creates a new runtime concrete function.
2263 ///
2264 /// # Arguments
2265 /// * `name` - Function name.
2266 /// * `lins` - Linearization type.
2267 ///
2268 /// # Examples
2269 ///
2270 /// ```
2271 /// use gf_core::{RuntimeCncFun, LinType};
2272 /// let fun = RuntimeCncFun::new("Test".to_string(), LinType::Sym(vec![]));
2273 /// assert_eq!(fun.name, "Test");
2274 /// ```
2275 pub fn new(name: String, lins: LinType) -> Self {
2276 RuntimeCncFun { name, lins }
2277 }
2278}
2279
2280////////////////////////////////////////////////////////////////////////////////
2281/// Label production.
2282///
2283/// Associates a function with an FId for label-based lookup.
2284#[derive(Debug, Clone)]
2285struct LProduction {
2286 fid: FId,
2287 fun: RuntimeCncFun,
2288}
2289
2290////////////////////////////////////////////////////////////////////////////////
2291/// Tagged string for linearization.
2292///
2293/// Pairs a token with a tag for tracking origins in linearized output.
2294#[derive(Debug, Clone)]
2295pub struct TaggedString {
2296 /// Token.
2297 pub token: String,
2298 /// Tag.
2299 pub tag: String,
2300}
2301
2302impl TaggedString {
2303 /// Creates a new tagged string.
2304 ///
2305 /// # Arguments
2306 /// * `token` - The token.
2307 /// * `tag` - The tag.
2308 ///
2309 /// # Examples
2310 ///
2311 /// ```
2312 /// use gf_core::TaggedString;
2313 /// let ts = TaggedString::new("hello", "0");
2314 /// assert_eq!(ts.token, "hello");
2315 /// assert_eq!(ts.tag, "0");
2316 /// ```
2317 pub fn new(token: &str, tag: &str) -> Self {
2318 TaggedString { token: token.to_string(), tag: tag.to_string() }
2319 }
2320}
2321
2322////////////////////////////////////////////////////////////////////////////////
2323/// Linearized symbol row.
2324///
2325/// Represents a row in the linearization table with FId and symbol tables.
2326#[derive(Debug, Clone)]
2327pub struct LinearizedSym {
2328 /// FId.
2329 pub fid: i32,
2330 /// Table of symbols.
2331 pub table: Vec<Vec<Sym>>,
2332}
2333
2334////////////////////////////////////////////////////////////////////////////////
2335/// Active item for parsing.
2336///
2337/// Represents an active parsing item in the chart, with position and components.
2338#[derive(Debug, Clone, PartialEq)]
2339pub struct ActiveItem {
2340 /// Offset.
2341 pub offset: usize,
2342 /// Dot position.
2343 pub dot: usize,
2344 /// Function.
2345 pub fun: RuntimeCncFun,
2346 /// Sequence.
2347 pub seq: Vec<Sym>,
2348 /// Arguments.
2349 pub args: Vec<PArg>,
2350 /// FId.
2351 pub fid: FId,
2352 /// Label.
2353 pub lbl: i32,
2354}
2355
2356impl ActiveItem {
2357 /// Creates a new active item.
2358 ///
2359 /// # Arguments
2360 /// * `offset` - Offset.
2361 /// * `dot` - Dot position.
2362 /// * `fun` - Function.
2363 /// * `seq` - Sequence.
2364 /// * `args` - Arguments.
2365 /// * `fid` - FId.
2366 /// * `lbl` - Label.
2367 ///
2368 /// # Examples
2369 ///
2370 /// ```
2371 /// use gf_core::{ActiveItem, RuntimeCncFun, LinType, PArg, Sym};
2372 /// let fun = RuntimeCncFun::new("Test".to_string(), LinType::Sym(vec![]));
2373 /// let item = ActiveItem::new(0, 0, fun, vec![], vec![], 1, 0);
2374 /// assert_eq!(item.offset, 0);
2375 /// ```
2376 pub fn new(
2377 offset: usize,
2378 dot: usize,
2379 fun: RuntimeCncFun,
2380 seq: Vec<Sym>,
2381 args: Vec<PArg>,
2382 fid: FId,
2383 lbl: i32,
2384 ) -> Self {
2385 ActiveItem { offset, dot, fun, seq, args, fid, lbl }
2386 }
2387
2388 /// Checks equality.
2389 ///
2390 /// Compares all fields for equality.
2391 ///
2392 /// # Arguments
2393 /// * `other` - The other item.
2394 ///
2395 /// # Returns
2396 /// True if equal.
2397 pub fn is_equal(&self, other: &ActiveItem) -> bool {
2398 self.offset == other.offset
2399 && self.dot == other.dot
2400 && self.fun.name == other.fun.name
2401 && self.seq == other.seq
2402 && self.args == other.args
2403 && self.fid == other.fid
2404 && self.lbl == other.lbl
2405 }
2406
2407 /// Shifts over argument.
2408 ///
2409 /// Advances the dot and updates argument fid.
2410 ///
2411 /// # Arguments
2412 /// * `i` - Argument index.
2413 /// * `fid` - New fid.
2414 ///
2415 /// # Returns
2416 /// Updated active item.
2417 pub fn shift_over_arg(&self, i: usize, fid: FId) -> ActiveItem {
2418 let mut args = self.args.clone();
2419 args[i].fid = fid;
2420 ActiveItem {
2421 offset: self.offset,
2422 dot: self.dot + 1,
2423 fun: self.fun.clone(),
2424 seq: self.seq.clone(),
2425 args,
2426 fid: self.fid,
2427 lbl: self.lbl,
2428 }
2429 }
2430
2431 /// Shifts over token.
2432 ///
2433 /// Advances the dot position.
2434 ///
2435 /// # Returns
2436 /// Updated active item.
2437 pub fn shift_over_token(&self) -> ActiveItem {
2438 ActiveItem {
2439 offset: self.offset,
2440 dot: self.dot + 1,
2441 fun: self.fun.clone(),
2442 seq: self.seq.clone(),
2443 args: self.args.clone(),
2444 fid: self.fid,
2445 lbl: self.lbl,
2446 }
2447 }
2448}
2449
2450////////////////////////////////////////////////////////////////////////////////
2451/// Utility to check if value is undefined.
2452///
2453/// # Arguments
2454/// * `value` - The optional value.
2455///
2456/// # Returns
2457/// True if None.
2458///
2459/// # Examples
2460///
2461/// ```
2462/// use gf_core::is_undefined;
2463/// assert!(is_undefined::<i32>(&None));
2464/// assert!(!is_undefined(&Some(42)));
2465/// ```
2466pub fn is_undefined<T>(value: &Option<T>) -> bool {
2467 value.is_none()
2468}
2469
2470////////////////////////////////////////////////////////////////////////////////
2471/// Maps a hashmap with a function.
2472///
2473/// Applies a function to each value in the hashmap.
2474///
2475/// # Arguments
2476/// * `obj` - The hashmap.
2477/// * `fun` - The function to apply.
2478///
2479/// # Returns
2480/// New hashmap with transformed values.
2481///
2482/// # Examples
2483///
2484/// ```
2485/// use gf_core::map_object;
2486/// use std::collections::HashMap;
2487/// let mut map: HashMap<String, i32> = HashMap::new();
2488/// map.insert("a".to_string(), 1);
2489/// let new_map = map_object(&map, |&v| v * 2);
2490/// assert_eq!(new_map.get("a"), Some(&2));
2491/// ```
2492pub fn map_object<K: Eq + std::hash::Hash + Clone, V, F: Fn(&V) -> U, U>(
2493 obj: &HashMap<K, V>,
2494 fun: F,
2495) -> HashMap<K, U> {
2496 obj.iter().map(|(k, v)| (k.clone(), fun(v))).collect()
2497}
2498
2499impl Type {
2500 /// Creates a new type.
2501 ///
2502 /// # Arguments
2503 /// * `args` - Argument categories.
2504 /// * `cat` - Result category.
2505 ///
2506 /// # Examples
2507 ///
2508 /// ```
2509 /// use gf_core::Type;
2510 /// let typ = Type::new(vec!["A".to_string()], "B".to_string());
2511 /// assert_eq!(typ.args, vec!["A".to_string()]);
2512 /// assert_eq!(typ.cat, "B");
2513 /// ```
2514 pub fn new(args: Vec<String>, cat: String) -> Self {
2515 Type { args, cat }
2516 }
2517}
2518
2519impl Apply {
2520 /// Shows the apply rule as string.
2521 ///
2522 /// # Arguments
2523 /// * `cat` - Category.
2524 ///
2525 /// # Returns
2526 /// String representation.
2527 pub fn show(&self, cat: &str) -> String {
2528 format!("{} -> {} [{:?}]", cat, self.get_name(), self.args)
2529 }
2530
2531 /// Checks equality with another apply.
2532 ///
2533 /// # Arguments
2534 /// * `obj` - Other Apply.
2535 ///
2536 /// # Returns
2537 /// True if equal.
2538 pub fn is_equal(&self, obj: &Apply) -> bool {
2539 self.fun == obj.fun && self.args == obj.args
2540 }
2541}
2542
2543impl Coerce {
2544 /// Shows the coerce rule as string.
2545 ///
2546 /// # Arguments
2547 /// * `cat` - Category.
2548 ///
2549 /// # Returns
2550 /// String representation.
2551 pub fn show(&self, cat: &str) -> String {
2552 format!("{} -> _ [{}]", cat, self.arg)
2553 }
2554}
2555
2556impl PArg {
2557 /// Creates a new PArg.
2558 ///
2559 /// # Arguments
2560 /// * `type_` - Type.
2561 /// * `hypos` - Hypos.
2562 /// * `fid` - FId.
2563 ///
2564 /// # Examples
2565 ///
2566 /// ```
2567 /// use gf_core::PArg;
2568 /// let parg = PArg::new("Type".to_string(), vec![1], 2);
2569 /// assert_eq!(parg.type_, "Type");
2570 /// ```
2571 pub fn new(type_: String, hypos: Vec<FId>, fid: FId) -> Self {
2572 PArg { type_, hypos, fid }
2573 }
2574}
2575
2576impl Const {
2577 /// Shows the const rule as string.
2578 ///
2579 /// # Arguments
2580 /// * `cat` - Category.
2581 ///
2582 /// # Returns
2583 /// String representation.
2584 pub fn show(&self, cat: &str) -> String {
2585 format!("{} -> {}", cat, self.lit.print())
2586 }
2587
2588 /// Checks equality with another const.
2589 ///
2590 /// # Arguments
2591 /// * `obj` - Other Const.
2592 ///
2593 /// # Returns
2594 /// True if equal.
2595 pub fn is_equal(&self, obj: &Const) -> bool {
2596 self.lit.is_equal(&obj.lit) && self.toks == obj.toks
2597 }
2598}
2599
2600#[cfg(test)]
2601mod tests {
2602 use super::*;
2603 use std::fs;
2604
2605 /// Test creating a GFGrammar.
2606 #[test]
2607 fn test_gfgrammar_new() {
2608 let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
2609 let concretes = HashMap::new();
2610 let grammar = GFGrammar::new(abstract_, concretes);
2611 assert_eq!(grammar.abstract_grammar.startcat, "S");
2612 }
2613
2614 /// Test adding type to abstract.
2615 #[test]
2616 fn test_gfabstract_add_type() {
2617 let mut abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
2618 abstract_.add_type(
2619 "MakeS".to_string(),
2620 vec!["NP".to_string(), "VP".to_string()],
2621 "S".to_string(),
2622 );
2623 assert_eq!(abstract_.get_cat("MakeS"), Some(&"S".to_string()));
2624 assert_eq!(
2625 abstract_.get_args("MakeS"),
2626 Some(&vec!["NP".to_string(), "VP".to_string()])
2627 );
2628 }
2629
2630 /// Test tree parsing.
2631 #[test]
2632 fn test_parse_tree() {
2633 let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
2634 let tree = abstract_.parse_tree("MakeS (NP) (VP)", None);
2635 assert!(tree.is_some());
2636 let tree = tree.unwrap();
2637 assert_eq!(tree.name, "MakeS");
2638 assert_eq!(tree.args.len(), 2);
2639 }
2640
2641 /// Test literal handling.
2642 #[test]
2643 fn test_handle_literals() {
2644 let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
2645 let tree = Fun::new("\"test\"".to_string(), vec![]);
2646 let handled = abstract_.handle_literals(tree, "String");
2647 assert_eq!(handled.name, "String_Literal_\"test\"");
2648 }
2649
2650 /// Test tree equality.
2651 #[test]
2652 fn test_tree_equality() {
2653 let tree1 = Fun::new(
2654 "Test".to_string(),
2655 vec![Fun::new("Arg1".to_string(), vec![])],
2656 );
2657 let tree2 = Fun::new(
2658 "Test".to_string(),
2659 vec![Fun::new("Arg1".to_string(), vec![])],
2660 );
2661 assert!(tree1.is_equal(&tree2));
2662 let tree3 = Fun::new(
2663 "Test".to_string(),
2664 vec![Fun::new("Arg2".to_string(), vec![])],
2665 );
2666 assert!(!tree1.is_equal(&tree3));
2667 }
2668
2669 /// Test importing from JSON.
2670 #[test]
2671 fn test_import_from_json() {
2672 let json_content = fs::read_to_string("grammars/Hello/Hello.json")
2673 .expect("Failed to read Hello.json");
2674 let json: serde_json::Value =
2675 serde_json::from_str(&json_content).expect("Failed to parse JSON");
2676 let pgf: PGF =
2677 serde_json::from_value(json).expect("Failed to deserialize PGF");
2678 let grammar = GFGrammar::from_json(pgf);
2679
2680 // Verify grammar was created successfully
2681 assert_eq!(grammar.abstract_grammar.startcat, "Greeting");
2682 // Note: This test JSON has no concrete grammars
2683 }
2684
2685 /// Test parsing tree.
2686 #[test]
2687 fn test_parse_tree_from_grammar() {
2688 let json_content = fs::read_to_string("grammars/Hello/Hello.json")
2689 .expect("Failed to read Hello.json");
2690 let json: serde_json::Value =
2691 serde_json::from_str(&json_content).expect("Failed to parse JSON");
2692 let pgf: PGF =
2693 serde_json::from_value(json).expect("Failed to deserialize PGF");
2694 let grammar = GFGrammar::from_json(pgf);
2695
2696 let tree = grammar.abstract_grammar.parse_tree("hello world", None);
2697 assert!(tree.is_some(), "Should be able to parse 'hello world'");
2698 let tree = tree.unwrap();
2699 assert_eq!(tree.name, "hello");
2700 assert_eq!(tree.args.len(), 1);
2701 assert_eq!(tree.args[0].name, "world");
2702 }
2703
2704 /// Test English linearization.
2705 #[test]
2706 fn test_linearize_english() {
2707 let json_content = fs::read_to_string("grammars/Food/gf_make_generated.json")
2708 .expect("Failed to read gf_make_generated.json");
2709
2710 let json: serde_json::Value =
2711 serde_json::from_str(&json_content).expect("Failed to parse JSON");
2712 let pgf: PGF =
2713 serde_json::from_value(json).expect("Failed to deserialize PGF");
2714 let grammar = GFGrammar::from_json(pgf);
2715
2716 // Create the correct abstract syntax tree: Is(This(Fish), Delicious)
2717 let fish = Fun { name: "Fish".to_string(), args: vec![], type_: None };
2718 let this_fish = Fun { name: "This".to_string(), args: vec![fish], type_: None };
2719 let delicious = Fun { name: "Delicious".to_string(), args: vec![], type_: None };
2720 let tree = Fun { name: "Is".to_string(), args: vec![this_fish, delicious], type_: None };
2721 let linearized = grammar.concretes["FoodEng"].linearize(&tree);
2722 assert_eq!(linearized, "this fish is delicious");
2723 }
2724
2725 /// Test Italian linearization.
2726 #[test]
2727 #[ignore] // Disabled: test JSON has no concrete grammars
2728 fn test_linearize_italian() {
2729 let json_content = fs::read_to_string("grammars/Food/gf_make_generated.json")
2730 .expect("Failed to read gf_make_generated.json");
2731 let json: serde_json::Value =
2732 serde_json::from_str(&json_content).expect("Failed to parse JSON");
2733 let pgf: PGF =
2734 serde_json::from_value(json).expect("Failed to deserialize PGF");
2735 let grammar = GFGrammar::from_json(pgf);
2736
2737 let tree = grammar
2738 .abstract_grammar
2739 .parse_tree("this fish is delicious", None)
2740 .expect("Failed to parse tree");
2741 let linearized = grammar.concretes["FoodEng"].linearize(&tree);
2742 assert_eq!(linearized, "this fish is delicious");
2743 }
2744}