gf_core/
lib.rs

1mod pgf_json;
2pub use pgf_json::*;
3
4use regex::Regex;
5use serde::Serialize;
6use std::collections::HashMap;
7use std::sync::atomic::{AtomicBool, Ordering};
8
9/// Global debug flag for controlling debug output throughout the library
10static DEBUG_ENABLED: AtomicBool = AtomicBool::new(false);
11
12/// Enable or disable debug output for the entire library
13///
14/// When enabled, various operations will print debug information to stdout.
15/// This is useful for understanding the internal workings of parsing,
16/// linearization, and translation processes.
17///
18/// # Examples
19///
20/// ```
21/// use gf_core::set_debug;
22/// 
23/// // Enable debug output
24/// set_debug(true);
25/// 
26/// // Your GF operations will now show debug info
27/// 
28/// // Disable debug output
29/// set_debug(false);
30/// ```
31pub fn set_debug(enabled: bool) {
32    DEBUG_ENABLED.store(enabled, Ordering::Relaxed);
33}
34
35/// Check if debug output is currently enabled
36///
37/// # Examples
38///
39/// ```
40/// use gf_core::{set_debug, is_debug_enabled};
41/// 
42/// set_debug(true);
43/// assert!(is_debug_enabled());
44/// 
45/// set_debug(false);
46/// assert!(!is_debug_enabled());
47/// ```
48pub fn is_debug_enabled() -> bool {
49    DEBUG_ENABLED.load(Ordering::Relaxed)
50}
51
52/// Macro for conditional debug output
53macro_rules! debug_println {
54    ($($arg:tt)*) => {
55        if crate::is_debug_enabled() {
56            println!("[DEBUG] {}", format!($($arg)*));
57        }
58    };
59}
60
61#[derive(Serialize, Debug, Clone)]
62pub struct Type {
63    /// Input categories (arguments) for the function type.
64    pub args: Vec<String>,
65    /// Output category (result) for the function type.
66    pub cat: String,
67}
68
69////////////////////////////////////////////////////////////////////////////////
70// Parser Module Start
71
72// Parser Module Stop
73
74/// Rust Runtime for Grammatical Framework
75///
76/// This crate provides a runtime for Grammatical Framework (GF) grammars,
77/// allowing parsing, linearization, and translation between languages defined
78/// in GF abstract and concrete syntax.
79///
80/// For more information on GF, see [gf-dev.github.io](https://gf-dev.github.io/).
81////////////////////////////////////////////////////////////////////////////////
82// Constants and type aliases
83/// Type alias for nested HashMaps used in translation outputs.
84/// Represents source language -> abstract tree -> target language -> output string.
85type HMS3 = HashMap<String, HashMap<String, String>>;
86
87/// Every constituent has a unique id. If the constituent is discontinuous,
88/// it will be represented with several brackets sharing the same id.
89pub type FId = i32;
90
91////////////////////////////////////////////////////////////////////////////////
92/// Accumulator for completion suggestions during parsing.
93///
94/// Collects active parsing items that can suggest possible completions.
95#[derive(Debug, Clone)]
96pub struct CompletionAccumulator {
97    /// Optional vector of active items for suggestions.
98    pub value: Option<Vec<ActiveItem>>,
99}
100
101impl Default for CompletionAccumulator {
102    fn default() -> Self {
103        Self::new()
104    }
105}
106
107impl CompletionAccumulator {
108    /// Creates a new empty accumulator.
109    ///
110    /// # Examples
111    ///
112    /// ```
113    /// use gf_core::CompletionAccumulator;
114    /// let acc = CompletionAccumulator::new();
115    /// assert!(acc.value.is_none());
116    /// ```
117    pub fn new() -> Self {
118        CompletionAccumulator { value: None }
119    }
120}
121
122/// Result of completion, including consumed tokens and suggestions.
123///
124/// This struct is returned by completion methods to provide feedback on
125/// what has been parsed so far and possible ways to continue.
126#[derive(Debug, Clone)]
127pub struct CompletionResult {
128    /// Tokens already consumed from the input.
129    pub consumed: Vec<String>,
130    /// Suggested completions for the next token(s).
131    pub suggestions: Vec<String>,
132}
133
134////////////////////////////////////////////////////////////////////////////////
135/// A Grammatical Framework grammar consisting of one abstract and multiple concretes.
136///
137/// The `GFGrammar` type encapsulates an abstract grammar (logical structure) and
138/// one or more concrete grammars (linearizations). The abstract defines categories
139/// and functions independently of form, while concretes provide manifestations
140/// (e.g., natural language strings).
141///
142/// # Examples
143///
144/// ```
145/// use gf_core::{GFGrammar, GFAbstract};
146/// use std::collections::HashMap;
147/// let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
148/// let concretes = HashMap::new();
149/// let grammar = GFGrammar::new(abstract_, concretes);
150/// assert_eq!(grammar.abstract_grammar.startcat, "S");
151/// ```
152pub struct GFGrammar {
153    /// The abstract grammar (only one per GFGrammar).
154    pub abstract_grammar: GFAbstract,
155
156    /// Map of language codes to concrete grammars.
157    pub concretes: HashMap<String, GFConcrete>,
158}
159
160impl GFGrammar {
161    /// Creates a new GFGrammar from an abstract and concretes.
162    ///
163    /// # Arguments
164    /// * `abstract_` - The abstract grammar.
165    /// * `concretes` - A map of language codes to concrete grammars.
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// use gf_core::{GFGrammar, GFAbstract};
171    /// use std::collections::HashMap;
172    /// let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
173    /// let concretes = HashMap::new();
174    /// let grammar = GFGrammar::new(abstract_, concretes);
175    /// assert_eq!(grammar.abstract_grammar.startcat, "S");
176    /// ```
177    pub fn new(
178        abstract_: GFAbstract,
179        concretes: HashMap<String, GFConcrete>,
180    ) -> Self {
181        GFGrammar { abstract_grammar: abstract_, concretes }
182    }
183
184    /// Converts from PGF JSON format to GFGrammar.
185    ///
186    /// This method deserializes a PGF structure (typically from JSON) into a usable GFGrammar.
187    ///
188    /// # Arguments
189    /// * `json` - The PGF structure to convert.
190    ///
191    /// # Examples
192    ///
193    /// Assuming a valid PGF instance, this creates a grammar:
194    ///
195    /// ```
196    /// // Note: This doctest is illustrative; actual PGF creation may require deserialization.
197    /// use gf_core::{GFGrammar, PGF, Abstract, Concrete};
198    /// use std::collections::HashMap;
199    /// let abstract_ = Abstract { name: "TestGrammar".to_string(), startcat: "S".to_string(), funs: HashMap::new() };
200    /// let concretes = HashMap::new();
201    /// let pgf = PGF { abstract_, concretes };
202    /// let grammar = GFGrammar::from_json(pgf);
203    /// assert_eq!(grammar.abstract_grammar.startcat, "S");
204    /// ```
205    pub fn from_json(json: PGF) -> Self {
206        debug_println!("Loading GFGrammar from JSON with start category: {}", json.abstract_.startcat);
207        debug_println!("Found {} concrete grammars: {:?}", json.concretes.len(), json.concretes.keys().collect::<Vec<_>>());
208        
209        // FIXME: HashMap ordering is non-deterministic which might cause problems
210        // when we try to match the ordering of the generated json file.
211        let cncs: HashMap<String, GFConcrete> = json
212            .concretes
213            .into_iter()
214            .map(|(key, concrete)| {
215                debug_println!("Loading concrete grammar: {}", key);
216                (key, GFConcrete::from_json(concrete))
217            })
218            .collect();
219        
220        let abstract_grammar = GFAbstract::from_json(json.abstract_);
221        debug_println!("GFGrammar loaded successfully with {} concrete grammars", cncs.len());
222        
223        GFGrammar {
224            abstract_grammar,
225            concretes: cncs,
226        }
227    }
228
229    /// Translates input from one language to another (or all if unspecified).
230    ///
231    /// Parses the input in the source language(s) and linearizes to target language(s).
232    ///
233    /// # Arguments
234    /// * `input` - The string to translate.
235    /// * `from_lang` - Optional source language code.
236    /// * `to_lang` - Optional target language code.
237    ///
238    /// # Returns
239    /// Vector of translation maps (source -> tree -> target -> output).
240    pub fn translate(
241        &self,
242        input: &str,
243        from_lang: Option<&str>,
244        to_lang: Option<&str>,
245    ) -> Vec<HMS3> {
246        debug_println!("Starting translation of input: '{}'", input);
247        debug_println!("From language: {:?}, To language: {:?}", from_lang, to_lang);
248        
249        let mut outputs: Vec<HMS3> = Vec::new();
250
251        let from_cncs = if let Some(lang) = from_lang {
252            let mut map = HashMap::new();
253            if let Some(concrete) = self.concretes.get(lang) {
254                map.insert(lang.to_string(), concrete.clone());
255            }
256            map
257        } else {
258            self.concretes.clone()
259        };
260
261        let to_cncs = if let Some(lang) = to_lang {
262            let mut map = HashMap::new();
263            if let Some(concrete) = self.concretes.get(lang) {
264                map.insert(lang.to_string(), concrete.clone());
265            }
266            map
267        } else {
268            self.concretes.clone()
269        };
270
271        for (lang_code, concrete) in &from_cncs {
272            debug_println!("Attempting to parse with language: {}", lang_code);
273            let trees =
274                concrete.parse_string(input, &self.abstract_grammar.startcat);
275            debug_println!("Found {} parse tree(s) for language {}", trees.len(), lang_code);
276            if !trees.is_empty() {
277                let mut c1_outputs: HashMap<String, HashMap<String, String>> =
278                    HashMap::new();
279                for tree in trees {
280                    debug_println!("Processing parse tree: {}", tree.name);
281                    let mut translations: HashMap<String, String> =
282                        HashMap::new();
283                    for (c2, to_concrete) in &to_cncs {
284                        let linearized = to_concrete.linearize(&tree);
285                        debug_println!("Linearized to {}: '{}'", c2, linearized);
286                        translations
287                            .insert(c2.clone(), linearized);
288                    }
289                    c1_outputs.insert(tree.name.clone(), translations);
290                }
291                outputs.push(c1_outputs);
292            }
293        }
294
295        debug_println!("Translation completed with {} result set(s)", outputs.len());
296        outputs
297    }
298
299    // Parser using the parser module
300    // TODO: Uncomment when parser module is fixed
301    /*
302    pub fn from_binary(data: &[u8]) -> Self {
303        let mut r = Cursor::new(data);
304        let major = read_int16(&mut r);
305        let minor = read_int16(&mut r);
306        if major != 1 || minor != 0 {
307            panic!("Unsupported PGF version {}:{}", major, minor);
308        }
309        let _flags = read_list(&mut r, read_flag); // Global flags, ignored
310        let abstract_ = read_abstract(&mut r);
311        let concretes_list = read_list(&mut r, read_concrete);
312        let concretes: HashMap<String, Concrete> = concretes_list.into_iter().map(|c| (c.name.clone(), c)).collect();
313        let pgf = PGF { abstract_, concretes };
314        Self::from_json(pgf)
315    }
316    */
317}
318
319////////////////////////////////////////////////////////////////////////////////
320/// Abstract grammar defining logical structure via functions, categories, and types.
321///
322/// The abstract grammar specifies the semantic structure without tying it to specific
323/// forms. It defines functions (fun) that combine categories (cat) into new categories.
324///
325/// # Example
326///
327/// ```gf
328/// abstract Lang = {
329///   cat Sentence;
330///   fun MakeSentence : Noun -> Verb -> Sentence;
331/// }
332/// ```
333#[derive(Debug, Clone)]
334pub struct GFAbstract {
335    /// Starting category for parsing/linearization.
336    pub startcat: String,
337    /// Map of function names to their types.
338    types: HashMap<String, Type>,
339}
340
341/// Parser for abstract syntax expressions.
342///
343/// Handles function call syntax like `Function(Arg1, Arg2)` and nested expressions.
344struct AbstractSyntaxParser {
345    tokens: Vec<String>,
346    position: usize,
347}
348
349impl AbstractSyntaxParser {
350    fn new(input: &str) -> Self {
351        // Use regex-like tokenization similar to TypeScript
352        // TypeScript: str.match(/[\w\u00C0-\u00FF\'\.\"]+|\(|\)|\?|\:/g)
353        let mut tokens = Vec::new();
354        let mut current_token = String::new();
355        
356        for ch in input.chars() {
357            match ch {
358                '(' | ')' | '?' | ':' | ',' => {
359                    if !current_token.is_empty() {
360                        tokens.push(current_token.clone());
361                        current_token.clear();
362                    }
363                    tokens.push(ch.to_string());
364                }
365                c if c.is_whitespace() => {
366                    if !current_token.is_empty() {
367                        tokens.push(current_token.clone());
368                        current_token.clear();
369                    }
370                }
371                c if c.is_alphanumeric() || c == '_' || c == '\'' || c == '.' || c == '"' => {
372                    current_token.push(c);
373                }
374                _ => {
375                    // For other characters, treat as separate tokens
376                    if !current_token.is_empty() {
377                        tokens.push(current_token.clone());
378                        current_token.clear();
379                    }
380                    tokens.push(ch.to_string());
381                }
382            }
383        }
384        
385        if !current_token.is_empty() {
386            tokens.push(current_token);
387        }
388        
389        Self {
390            tokens,
391            position: 0,
392        }
393    }
394
395    fn current_token(&self) -> Option<&String> {
396        self.tokens.get(self.position)
397    }
398
399    fn advance(&mut self) -> Option<String> {
400        if self.position < self.tokens.len() {
401            let token = self.tokens[self.position].clone();
402            self.position += 1;
403            Some(token)
404        } else {
405            None
406        }
407    }
408
409    fn parse_expression(&mut self) -> Result<Fun, &'static str> {
410        self.parse_tree_with_prec(0)
411    }
412
413    // This follows the TypeScript parseTree_ implementation exactly
414    fn parse_tree_with_prec(&mut self, prec: usize) -> Result<Fun, &'static str> {
415        if self.tokens.is_empty() || self.position >= self.tokens.len() {
416            return Err("No more tokens");
417        }
418        
419        let current = self.current_token().cloned();
420        if current.as_deref() == Some(")") {
421            return Err("Unexpected ')'");
422        }
423        
424        let t = self.advance().ok_or("No token available")?;
425        
426        if t == "(" {
427            let tree = self.parse_tree_with_prec(0)?;
428            // Expect closing parenthesis
429            if self.current_token().map(|s| s.as_str()) == Some(")") {
430                self.advance(); // consume ')'
431            }
432            Ok(tree)
433        } else if t == "?" {
434            Ok(Fun::new("?".to_string(), vec![]))
435        } else {
436            let mut tree = Fun::new(t, vec![]);
437            
438            // Check if this is immediately followed by a parenthesis for function call syntax
439            // Function call syntax takes priority over space-separated args
440            if self.current_token().map(|s| s.as_str()) == Some("(") {
441                self.advance(); // consume '('
442                
443                // Parse comma-separated arguments inside parentheses
444                if self.current_token().map(|s| s.as_str()) != Some(")") {
445                    loop {
446                        let arg = self.parse_tree_with_prec(1)?;
447                        tree.args.push(arg);
448                        
449                        match self.current_token().map(|s| s.as_str()) {
450                            Some(",") => {
451                                self.advance(); // consume comma
452                                continue;
453                            }
454                            Some(")") => break,
455                            _ => return Err("Expected ',' or ')'"),
456                        }
457                    }
458                }
459                
460                if self.current_token().map(|s| s.as_str()) == Some(")") {
461                    self.advance(); // consume ')'
462                }
463            } else if prec == 0 {
464                // TypeScript logic: when prec == 0, collect args until can't parse any more
465                // This handles space-separated args like "MakeS (NP) (VP)" and "hello world"
466                loop {
467                    match self.parse_tree_with_prec(1) {
468                        Ok(child) => tree.args.push(child),
469                        Err(_) => break,
470                    }
471                }
472            }
473            
474            Ok(tree)
475        }
476    }
477}
478
479impl GFAbstract {
480    /// Creates a new abstract grammar.
481    ///
482    /// # Arguments
483    /// * `startcat` - The starting category.
484    /// * `types` - Map of function names to types.
485    ///
486    /// # Examples
487    ///
488    /// ```
489    /// use gf_core::GFAbstract;
490    /// use std::collections::HashMap;
491    /// let abs = GFAbstract::new("S".to_string(), HashMap::new());
492    /// assert_eq!(abs.startcat, "S");
493    /// ```
494    pub fn new(startcat: String, types: HashMap<String, Type>) -> Self {
495        GFAbstract { startcat, types }
496    }
497
498    /// Converts from JSON abstract to GFAbstract.
499    ///
500    /// # Arguments
501    /// * `json` - The Abstract structure from JSON.
502    ///
503    /// # Examples
504    ///
505    /// ```
506    /// use gf_core::{GFAbstract, Abstract};
507    /// use std::collections::HashMap;
508    /// let json = Abstract { name: "TestGrammar".to_string(), startcat: "S".to_string(), funs: HashMap::new() };
509    /// let abs = GFAbstract::from_json(json);
510    /// assert_eq!(abs.startcat, "S");
511    /// ```
512    pub fn from_json(json: Abstract) -> Self {
513        let types = json
514            .funs
515            .into_iter()
516            .map(|(key, fun)| (key, Type::new(fun.args, fun.cat)))
517            .collect();
518
519        GFAbstract { startcat: json.startcat, types }
520    }
521
522    /// Adds a new function type to the abstract.
523    ///
524    /// # Arguments
525    /// * `fun` - Function name.
526    /// * `args` - Argument categories.
527    /// * `cat` - Result category.
528    ///
529    /// # Examples
530    ///
531    /// ```
532    /// use gf_core::GFAbstract;
533    /// use std::collections::HashMap;
534    /// let mut abs = GFAbstract::new("S".to_string(), HashMap::new());
535    /// abs.add_type("MakeS".to_string(), vec!["NP".to_string(), "VP".to_string()], "S".to_string());
536    /// assert_eq!(abs.get_cat("MakeS").unwrap(), "S");
537    /// ```
538    pub fn add_type(&mut self, fun: String, args: Vec<String>, cat: String) {
539        self.types.insert(fun, Type::new(args, cat));
540    }
541
542    /// Gets arguments for a function.
543    ///
544    /// # Arguments
545    /// * `fun` - Function name.
546    ///
547    /// # Returns
548    /// Optional reference to vector of argument categories.
549    ///
550    /// # Examples
551    ///
552    /// ```
553    /// use gf_core::GFAbstract;
554    /// use std::collections::HashMap;
555    /// let mut abs = GFAbstract::new("S".to_string(), HashMap::new());
556    /// abs.add_type("MakeS".to_string(), vec!["NP".to_string(), "VP".to_string()], "S".to_string());
557    /// assert_eq!(abs.get_args("MakeS").unwrap(), &vec!["NP".to_string(), "VP".to_string()]);
558    /// ```
559    pub fn get_args(&self, fun: &str) -> Option<&Vec<String>> {
560        self.types.get(fun).map(|t| &t.args)
561    }
562
563    /// Gets category for a function.
564    ///
565    /// # Arguments
566    /// * `fun` - Function name.
567    ///
568    /// # Returns
569    /// Optional reference to result category.
570    ///
571    /// # Examples
572    ///
573    /// ```
574    /// use gf_core::GFAbstract;
575    /// use std::collections::HashMap;
576    /// let mut abs = GFAbstract::new("S".to_string(), HashMap::new());
577    /// abs.add_type("MakeS".to_string(), vec!["NP".to_string(), "VP".to_string()], "S".to_string());
578    /// assert_eq!(abs.get_cat("MakeS").unwrap(), "S");
579    /// ```
580    pub fn get_cat(&self, fun: &str) -> Option<&String> {
581        self.types.get(fun).map(|t| &t.cat)
582    }
583
584    /// Annotates meta variables in a tree with types.
585    ///
586    /// Recursively traverses the tree and assigns types to meta variables ('?') based on the abstract types.
587    ///
588    /// # Arguments
589    /// * `tree` - The tree to annotate.
590    /// * `type_` - Optional type to assign.
591    ///
592    /// # Returns
593    /// Annotated tree.
594    fn annotate(&self, mut tree: Fun, r#type: Option<&String>) -> Fun {
595        if tree.is_meta() {
596            tree.type_ = r#type.cloned();
597        } else if let Some(typ) = self.types.get(&tree.name) {
598            for (arg, expected_type) in tree.args.iter_mut().zip(&typ.args) {
599                *arg = self.annotate(arg.clone(), Some(expected_type));
600            }
601        }
602        tree
603    }
604
605    /// Handles literal wrapping for strings, ints, floats.
606    ///
607    /// Modifies tree nodes representing literals to wrap them in appropriate literal functions.
608    ///
609    /// # Arguments
610    /// * `tree` - The tree to process.
611    /// * `type_` - The type of the current node.
612    ///
613    /// # Returns
614    /// Processed tree.
615    ///
616    /// # Examples
617    ///
618    /// ```
619    /// use gf_core::{GFAbstract, Fun};
620    /// use std::collections::HashMap;
621    /// let abs = GFAbstract::new("S".to_string(), HashMap::new());
622    /// let tree = Fun::new("\"hello\"".to_string(), vec![]);
623    /// let handled = abs.handle_literals(tree, "String");
624    /// assert_eq!(handled.name, "String_Literal_\"hello\"");
625    /// ```
626    pub fn handle_literals(&self, mut tree: Fun, r#type: &str) -> Fun {
627        if tree.name != "?" {
628            if r#type == "String" || r#type == "Int" || r#type == "Float" {
629                tree.name = format!("{}_Literal_{}", r#type, tree.name);
630            } else if let Some(typ) = self.types.get(&tree.name) {
631                for (arg, expected_type) in tree.args.iter_mut().zip(&typ.args)
632                {
633                    *arg = self.handle_literals(arg.clone(), expected_type);
634                }
635            }
636        }
637        tree
638    }
639
640    /// Deep copies a tree.
641    ///
642    /// # Arguments
643    /// * `x` - The tree to copy.
644    ///
645    /// # Returns
646    /// A deep clone of the tree.
647    ///
648    /// # Examples
649    ///
650    /// ```
651    /// use gf_core::{GFAbstract, Fun};
652    /// use std::collections::HashMap;
653    /// let abs = GFAbstract::new("S".to_string(), HashMap::new());
654    /// let tree = Fun::new("Test".to_string(), vec![Fun::new("Arg".to_string(), vec![])]);
655    /// let copy = abs.copy_tree(&tree);
656    /// assert_eq!(tree.name, copy.name);
657    /// assert_eq!(tree.args.len(), copy.args.len());
658    /// ```
659    #[allow(clippy::only_used_in_recursion)]
660    pub fn copy_tree(&self, x: &Fun) -> Fun {
661        let mut tree = Fun::new(x.name.clone(), vec![]);
662        tree.type_ = x.type_.clone();
663        for arg in &x.args {
664            tree.args.push(self.copy_tree(arg));
665        }
666        tree
667    }
668
669    /// Parses a string into a tree.
670    ///
671    /// # Arguments
672    /// * `str` - The string representation of the tree.
673    /// * `type_` - Optional type for annotation.
674    ///
675    /// # Returns
676    /// Optional parsed and annotated tree.
677    ///
678    /// # Examples
679    ///
680    /// ```
681    /// use gf_core::GFAbstract;
682    /// use std::collections::HashMap;
683    /// let abs = GFAbstract::new("S".to_string(), HashMap::new());
684    /// let tree = abs.parse_tree("Test (Arg)", None);
685    /// assert!(tree.is_some());
686    /// let tree = tree.unwrap();
687    /// assert_eq!(tree.name, "Test");
688    /// assert_eq!(tree.args.len(), 1);
689    /// assert_eq!(tree.args[0].name, "Arg");
690    /// ```
691    pub fn parse_tree(
692        &self,
693        str: &str,
694        r#type: Option<&String>,
695    ) -> Option<Fun> {
696        let mut parser = AbstractSyntaxParser::new(str);
697        match parser.parse_expression() {
698            Ok(tree) => Some(self.annotate(tree, r#type)),
699            Err(_) => None,
700        }
701    }
702}
703
704////////////////////////////////////////////////////////////////////////////////
705/* ... rest of the code ... */
706
707/// Concrete syntax for linearizing abstract trees.
708///
709/// Defines realizations (linearizations) of the abstract grammar, which can be
710/// natural language, images, etc.
711///
712/// A concrete grammar maps abstract functions to linearization rules, which
713/// produce output forms based on the arguments.
714///
715/// # Example
716///
717/// ```gf
718/// concrete LangEng of Lang = {
719///   lincat Sentence = Str;
720///   lin MakeSentence n v = n ++ v;
721/// }
722/// ```
723#[derive(Clone, Debug)]
724pub struct GFConcrete {
725    /// Flags for concrete-specific settings.
726    pub flags: HashMap<String, String>,
727    /// Concrete functions.
728    functions: Vec<RuntimeCncFun>,
729    /// Start category ranges.
730    pub start_cats: HashMap<String, (i32, i32)>,
731    /// Total number of FIds.
732    pub total_fids: i32,
733    /// Parameter productions.
734    pub pproductions: HashMap<i32, Vec<Production>>,
735    /// Label productions.
736    lproductions: HashMap<String, Vec<LProduction>>,
737}
738
739impl GFConcrete {
740    /// Creates a new concrete grammar.
741    ///
742    /// # Arguments
743    /// * `flags` - Concrete-specific flags.
744    /// * `functions` - List of runtime concrete functions.
745    /// * `productions` - Map of productions by FId.
746    /// * `start_cats` - Start category ranges.
747    /// * `total_fids` - Total number of FIds.
748    ///
749    /// # Examples
750    ///
751    /// ```
752    /// use gf_core::{GFConcrete, RuntimeCncFun, LinType, Production};
753    /// use std::collections::HashMap;
754    /// let flags = HashMap::new();
755    /// let functions = vec![];
756    /// let productions = HashMap::new();
757    /// let start_cats = HashMap::new();
758    /// let total_fids = 0;
759    /// let concrete = GFConcrete::new(flags, functions, productions, start_cats, total_fids);
760    /// ```
761    pub fn new(
762        flags: HashMap<String, String>,
763        functions: Vec<RuntimeCncFun>,
764        productions: HashMap<i32, Vec<Production>>,
765        start_cats: HashMap<String, (i32, i32)>,
766        total_fids: i32,
767    ) -> Self {
768        let mut lproductions = HashMap::new();
769
770        #[allow(clippy::too_many_arguments)]
771        fn register_recursive(
772            args: &[PArg],
773            key: String,
774            i: usize,
775            lproductions: &mut HashMap<String, Vec<LProduction>>,
776            productions: &HashMap<i32, Vec<Production>>,
777            fun: &RuntimeCncFun,
778            fid: FId,
779            depth: usize,
780        ) {
781            if depth > 100 {
782                // Prevent stack overflow
783                return;
784            }
785            if i < args.len() {
786                let arg = args[i].fid;
787                let mut count = 0;
788
789                if let Some(rules) = productions.get(&arg) {
790                    for rule in rules {
791                        if let Production::Coerce(ref coerce_rule) = rule {
792                            let new_key =
793                                format!("{}_{}", key, coerce_rule.arg);
794                            register_recursive(
795                                args,
796                                new_key,
797                                i + 1,
798                                lproductions,
799                                productions,
800                                fun,
801                                fid,
802                                depth + 1,
803                            );
804                            count += 1;
805                        }
806                    }
807                }
808
809                if count == 0 {
810                    let new_key = format!("{key}_{arg}");
811                    register_recursive(
812                        args,
813                        new_key,
814                        i + 1,
815                        lproductions,
816                        productions,
817                        fun,
818                        fid,
819                        depth + 1,
820                    );
821                }
822            } else {
823                lproductions
824                    .entry(key)
825                    .or_default()
826                    .push(LProduction { fun: fun.clone(), fid });
827            }
828        }
829
830        for (&fid, rules) in &productions {
831            for rule in rules {
832                if let Production::Apply(ref apply_rule) = rule {
833                    match apply_rule.to_apply_fun() {
834                        ApplyFun::FId(fun_id) => {
835                            // For FId, we need to find the corresponding function name
836                            // This is a simplified approach - in real usage we'd need better mapping
837                            if (fun_id as usize) < functions.len() {
838                                let fun = &functions[fun_id as usize];
839                                register_recursive(
840                                    &apply_rule.args,
841                                    fun.name.clone(),
842                                    0,
843                                    &mut lproductions,
844                                    &productions,
845                                    fun,
846                                    fid,
847                                    0,
848                                );
849                            }
850                        }
851                        ApplyFun::CncFun(json_fun) => {
852                            // Create a temporary runtime function for registration
853                            let runtime_fun = RuntimeCncFun::new(
854                                json_fun.name.clone(),
855                                LinType::FId(json_fun.lins.clone()),
856                            );
857                            register_recursive(
858                                &apply_rule.args,
859                                runtime_fun.name.clone(),
860                                0,
861                                &mut lproductions,
862                                &productions,
863                                &runtime_fun,
864                                fid,
865                                0,
866                            );
867                        }
868                    }
869                }
870            }
871        }
872
873        GFConcrete {
874            flags,
875            pproductions: productions,
876            functions,
877            start_cats,
878            total_fids,
879            lproductions,
880        }
881    }
882
883    /// Converts from JSON concrete to GFConcrete.
884    ///
885    /// This method deserializes a Concrete structure into a usable GFConcrete,
886    /// resolving sequence indices to actual symbols.
887    ///
888    /// # Arguments
889    /// * `json` - The Concrete structure from JSON.
890    pub fn from_json(json: Concrete) -> Self {
891        let productions: HashMap<i32, Vec<Production>> = json.productions;
892
893        let sequences: Vec<Vec<Sym>> = json.sequences;
894
895        let functions: Vec<RuntimeCncFun> = json
896            .functions
897            .into_iter()
898            .map(|f| {
899                // Convert Vec<i32> indices to actual symbol sequences
900                let lins = if f.lins.is_empty() {
901                    LinType::Sym(vec![])
902                } else {
903                    let symbol_sequences: Vec<Vec<Sym>> = f
904                        .lins
905                        .into_iter()
906                        .map(|seq_idx| {
907                            if seq_idx as usize >= sequences.len() {
908                                vec![]
909                            } else {
910                                sequences[seq_idx as usize].clone()
911                            }
912                        })
913                        .collect();
914                    LinType::Sym(symbol_sequences)
915                };
916
917                RuntimeCncFun { name: f.name, lins }
918            })
919            .collect();
920
921        let start_cats = json
922            .categories
923            .into_iter()
924            .map(|(key, cat)| (key, (cat.start, cat.end)))
925            .collect();
926
927        GFConcrete::new(
928            json.flags,
929            functions,
930            productions,
931            start_cats,
932            json.totalfids,
933        )
934    }
935
936    /// Linearizes a tree into symbols with tags.
937    ///
938    /// Produces a vector of linearized symbols for the tree, handling different types
939    /// like metas, literals, and functions.
940    ///
941    /// # Arguments
942    /// * `tree` - The abstract tree to linearize.
943    /// * `tag` - The tag for tracking.
944    ///
945    /// # Returns
946    /// Vector of LinearizedSym.
947    pub fn linearize_syms(&self, tree: &Fun, tag: &str) -> Vec<LinearizedSym> {
948        let mut res = Vec::new();
949
950        if tree.is_string() {
951            let mut sym = SymKS::new(vec![tree.name.clone()]);
952            sym.tag = Some(tag.to_string());
953            res.push(LinearizedSym {
954                fid: -1,
955                table: vec![vec![Sym::SymKS(sym)]],
956            });
957        } else if tree.is_int() {
958            let mut sym = SymKS::new(vec![tree.name.clone()]);
959            sym.tag = Some(tag.to_string());
960            res.push(LinearizedSym {
961                fid: -2,
962                table: vec![vec![Sym::SymKS(sym)]],
963            });
964        } else if tree.is_float() {
965            let mut sym = SymKS::new(vec![tree.name.clone()]);
966            sym.tag = Some(tag.to_string());
967            res.push(LinearizedSym {
968                fid: -3,
969                table: vec![vec![Sym::SymKS(sym)]],
970            });
971        } else if tree.is_meta() {
972            let cat = self
973                .start_cats
974                .get(tree.type_.as_ref().unwrap_or(&String::new()))
975                .cloned()
976                .unwrap_or((0, 0));
977            let sym = Sym::SymKS(SymKS {
978                id: "KS".to_string(),
979                tokens: vec![tree.name.clone()],
980                tag: Some(tag.to_string()),
981            });
982
983            for fid in cat.0..=cat.1 {
984                res.push(LinearizedSym {
985                    fid,
986                    table: vec![vec![sym.clone()]],
987                });
988            }
989        } else {
990            let cs: Vec<LinearizedSym> = tree
991                .args
992                .iter()
993                .enumerate()
994                .filter_map(|(i, arg)| {
995                    let syms = self.linearize_syms(arg, &format!("{tag}-{i}"));
996                    syms.first().cloned()
997                })
998                .collect();
999
1000            let mut key = tree.name.clone();
1001            for c in &cs {
1002                if c.fid == -5 {
1003                    if let Some((matched_key, _)) = self
1004                        .lproductions
1005                        .iter()
1006                        .find(|(k, _)| k.contains(&tree.name))
1007                    {
1008                        key = matched_key.clone();
1009                    }
1010                    break;
1011                } else {
1012                    key = format!("{}_{}", key, c.fid);
1013                }
1014            }
1015
1016            if let Some(rules) = self.lproductions.get(&key) {
1017                for rule in rules {
1018                    let mut row =
1019                        LinearizedSym { fid: rule.fid, table: Vec::new() };
1020
1021                    match &rule.fun.lins {
1022                        LinType::Sym(lins) => {
1023                            for (j, lin) in lins.iter().enumerate() {
1024                                let mut toks: Vec<Sym> = Vec::new();
1025                                if j >= row.table.len() {
1026                                    row.table.push(Vec::new());
1027                                }
1028
1029                                for sym0 in lin {
1030                                    match sym0 {
1031                                        Sym::SymCat { i, .. }
1032                                        | Sym::SymLit { i, .. } => {
1033                                            if *i < cs.len()
1034                                                && j < cs[*i].table.len()
1035                                            {
1036                                                let ts = &cs[*i].table[j];
1037                                                toks.extend_from_slice(ts);
1038                                            }
1039                                        }
1040                                        Sym::SymKS(ks) => {
1041                                            toks.push(Sym::SymKS(
1042                                                ks.tag_with(tag),
1043                                            ));
1044                                        }
1045                                        Sym::SymKP(kp) => {
1046                                            toks.push(Sym::SymKP(
1047                                                kp.tag_with(tag),
1048                                            ));
1049                                        }
1050                                    }
1051                                }
1052
1053                                row.table[j] = toks;
1054                            }
1055                        }
1056                        LinType::FId(_) => {
1057                            // Handle FId case - create empty table
1058                            row.table.push(Vec::new());
1059                        }
1060                    }
1061
1062                    res.push(row);
1063                }
1064            }
1065        }
1066
1067        res
1068    }
1069
1070    /// Converts symbols to tagged tokens.
1071    ///
1072    /// Processes a sequence of symbols into tagged strings, handling KS and KP symbols.
1073    ///
1074    /// # Arguments
1075    /// * `syms` - Slice of symbols.
1076    ///
1077    /// # Returns
1078    /// Vector of TaggedString.
1079    pub fn syms2toks(&self, syms: &[Sym]) -> Vec<TaggedString> {
1080        let mut ts = Vec::new();
1081
1082        for i in 0..syms.len() {
1083            match &syms[i] {
1084                Sym::SymKS(sym) => {
1085                    if let Some(tag) = &sym.tag {
1086                        for token in &sym.tokens {
1087                            ts.push(TaggedString::new(token, tag));
1088                        }
1089                    }
1090                }
1091                Sym::SymKP(sym) => {
1092                    let mut added_alt = false;
1093
1094                    if i + 1 < syms.len() {
1095                        if let Sym::SymKS(next_sym) = &syms[i + 1] {
1096                            if let Some(next_token) = next_sym.tokens.first() {
1097                                for alt in &sym.alts {
1098                                    if alt
1099                                        .prefixes
1100                                        .iter()
1101                                        .any(|p| next_token.starts_with(p))
1102                                    {
1103                                        for symks in &alt.tokens {
1104                                            if let Some(tag) = &sym.tag {
1105                                                for token in &symks.tokens {
1106                                                    ts.push(
1107                                                        TaggedString::new(
1108                                                            token, tag,
1109                                                        ),
1110                                                    );
1111                                                }
1112                                            }
1113                                        }
1114                                        added_alt = true;
1115                                        break;
1116                                    }
1117                                }
1118                            }
1119                        }
1120                    }
1121
1122                    if !added_alt {
1123                        if let Some(tag) = &sym.tag {
1124                            for symks in &sym.tokens {
1125                                for token in &symks.tokens {
1126                                    ts.push(TaggedString::new(token, tag));
1127                                }
1128                            }
1129                        }
1130                    }
1131                }
1132                _ => {} // Ignore non-token symbols
1133            }
1134        }
1135
1136        ts
1137    }
1138
1139    /// Linearizes a tree to all possible strings.
1140    ///
1141    /// # Arguments
1142    /// * `tree` - The tree to linearize.
1143    ///
1144    /// # Returns
1145    /// Vector of all possible linearizations.
1146    pub fn linearize_all(&self, tree: &Fun) -> Vec<String> {
1147        self.linearize_syms(tree, "0")
1148            .into_iter()
1149            .map(|r| self.unlex(&self.syms2toks(&r.table[0])))
1150            .collect()
1151    }
1152
1153    /// Linearizes a tree to a single string (first variant).
1154    ///
1155    /// # Arguments
1156    /// * `tree` - The tree to linearize.
1157    ///
1158    /// # Returns
1159    /// The linearized string.
1160    pub fn linearize(&self, tree: &Fun) -> String {
1161        debug_println!("Linearizing tree: {}", tree.name);
1162        let res = self.linearize_syms(tree, "0");
1163        debug_println!("Generated {} linearized symbol row(s)", res.len());
1164        if !res.is_empty() {
1165            let result = self.unlex(&self.syms2toks(&res[0].table[0]));
1166            debug_println!("Linearization result: '{}'", result);
1167            result
1168        } else {
1169            debug_println!("No linearization result generated");
1170            String::new()
1171        }
1172    }
1173
1174    /// Linearizes a tree with tags.
1175    ///
1176    /// # Arguments
1177    /// * `tree` - The tree to linearize.
1178    ///
1179    /// # Returns
1180    /// Vector of tagged strings.
1181    pub fn tag_and_linearize(&self, tree: &Fun) -> Vec<TaggedString> {
1182        let res = self.linearize_syms(tree, "0");
1183        if !res.is_empty() {
1184            self.syms2toks(&res[0].table[0])
1185        } else {
1186            Vec::new()
1187        }
1188    }
1189
1190    /// Joins tagged strings into a single string, handling spacing.
1191    ///
1192    /// Applies rules for when to insert spaces between tokens.
1193    ///
1194    /// # Arguments
1195    /// * `ts` - Slice of tagged strings.
1196    ///
1197    /// # Returns
1198    /// The joined string.
1199    fn unlex(&self, ts: &[TaggedString]) -> String {
1200        if ts.is_empty() {
1201            return String::new();
1202        }
1203
1204        let no_space_after = Regex::new(r"^[\(\-\[]").unwrap();
1205        let no_space_before = Regex::new(r"^[\.\,\?\!\)\:\;\-\]]").unwrap();
1206
1207        let mut s = String::new();
1208
1209        for i in 0..ts.len() {
1210            let t = &ts[i].token;
1211            s.push_str(t);
1212
1213            if i + 1 < ts.len() {
1214                let after = &ts[i + 1].token;
1215                if !no_space_after.is_match(t)
1216                    && !no_space_before.is_match(after)
1217                {
1218                    s.push(' ');
1219                }
1220            }
1221        }
1222
1223        s
1224    }
1225
1226    /// Tokenizes input string by whitespace.
1227    ///
1228    /// # Arguments
1229    /// * `input` - The input string.
1230    ///
1231    /// # Returns
1232    /// Vector of tokens.
1233    ///
1234    fn tokenize(&self, input: &str) -> Vec<String> {
1235        input.split_whitespace().map(String::from).collect()
1236    }
1237
1238    /// Parses a string into trees using the given start category.
1239    ///
1240    /// # Arguments
1241    /// * `input` - The input string.
1242    /// * `cat` - The start category.
1243    ///
1244    /// # Returns
1245    /// Vector of parsed trees.
1246    pub fn parse_string(&self, input: &str, cat: &str) -> Vec<Fun> {
1247        debug_println!("Parsing string '{}' with start category '{}'", input, cat);
1248        let tokens = self.tokenize(input);
1249        debug_println!("Tokenized input into {} tokens: {:?}", tokens.len(), tokens);
1250
1251        let mut ps = ParseState::new(self.clone(), cat.to_string());
1252        for (i, token) in tokens.iter().enumerate() {
1253            debug_println!("Processing token {} of {}: '{}'", i + 1, tokens.len(), token);
1254            if !ps.next(token) {
1255                debug_println!("Parsing failed at token '{}'", token);
1256                return Vec::new();
1257            }
1258        }
1259
1260        let trees = ps.extract_trees();
1261        debug_println!("Extracted {} parse tree(s)", trees.len());
1262        trees
1263    }
1264
1265    /// Provides completions for partial input.
1266    ///
1267    /// # Arguments
1268    /// * `input` - The partial input string.
1269    /// * `cat` - The start category.
1270    ///
1271    /// # Returns
1272    /// CompletionResult with consumed tokens and suggestions.
1273    pub fn complete(&self, input: &str, cat: &str) -> CompletionResult {
1274        let mut tokens: Vec<String> = input
1275            .split_whitespace()
1276            .filter(|t| !t.is_empty())
1277            .map(|t| t.to_string())
1278            .collect();
1279
1280        let current = tokens.pop().unwrap_or_default();
1281
1282        let mut ps = ParseState::new(self.clone(), cat.to_string());
1283        let mut ps2 = ParseState::new(self.clone(), cat.to_string());
1284
1285        for token in &tokens {
1286            if !ps.next(token) {
1287                return CompletionResult {
1288                    consumed: vec![],
1289                    suggestions: vec![],
1290                };
1291            }
1292            ps2.next(token);
1293        }
1294
1295        let mut current = current;
1296        if ps2.next(&current) {
1297            ps.next(&current);
1298            tokens.push(current.clone());
1299            current = String::new();
1300        }
1301
1302        let acc = ps.complete(&current);
1303        let mut suggestions = Vec::new();
1304
1305        if let Some(items) = acc.value {
1306            for a in items {
1307                for s in &a.seq {
1308                    match s {
1309                        Sym::SymKS(sym) => {
1310                            for t in &sym.tokens {
1311                                suggestions.push(t.clone());
1312                            }
1313                        }
1314                        Sym::SymKP(sym) => {
1315                            for symks in &sym.tokens {
1316                                for t in &symks.tokens {
1317                                    suggestions.push(t.clone());
1318                                }
1319                            }
1320                        }
1321                        _ => {}
1322                    }
1323                }
1324            }
1325        }
1326
1327        CompletionResult { consumed: tokens, suggestions }
1328    }
1329}
1330
1331////////////////////////////////////////////////////////////////////////////////
1332/// Prefix tree for parsing items.
1333///
1334/// Used to efficiently store and retrieve sequences of active items during parsing.
1335#[derive(Debug, Clone)]
1336pub struct Trie<T> {
1337    /// Value at node.
1338    value: Option<Vec<T>>,
1339    /// Child nodes.
1340    items: HashMap<String, Trie<T>>,
1341}
1342
1343impl<T: Clone> Default for Trie<T> {
1344    fn default() -> Self {
1345        Self::new()
1346    }
1347}
1348
1349impl<T: Clone> Trie<T> {
1350    /// Creates a new trie.
1351    ///
1352    /// # Examples
1353    ///
1354    /// ```
1355    /// use gf_core::Trie;
1356    /// let trie: Trie<i32> = Trie::new();
1357    /// assert!(trie.is_empty());
1358    /// ```
1359    pub fn new() -> Self {
1360        Trie { value: None, items: HashMap::new() }
1361    }
1362
1363    /// Inserts a chain of keys with value.
1364    ///
1365    /// # Arguments
1366    /// * `keys` - Slice of keys.
1367    /// * `obj` - Value to insert.
1368    pub fn insert_chain(&mut self, keys: &[String], obj: Vec<T>) {
1369        let mut node = self;
1370        for key in keys {
1371            node = node.items.entry(key.clone()).or_default();
1372        }
1373        node.value = Some(obj);
1374    }
1375
1376    /// Inserts a chain with single item.
1377    ///
1378    /// # Arguments
1379    /// * `keys` - Slice of keys.
1380    /// * `obj` - Single item to insert.
1381    ///
1382    /// # Examples
1383    ///
1384    /// ```
1385    /// use gf_core::Trie;
1386    /// let mut trie: Trie<i32> = Trie::new();
1387    /// trie.insert_chain1(&["a".to_string(), "b".to_string()], 42);
1388    /// // Verify the key path exists
1389    /// assert!(trie.lookup("a").is_some());
1390    /// assert!(trie.lookup("a").unwrap().lookup("b").is_some());
1391    /// ```
1392    pub fn insert_chain1(&mut self, keys: &[String], obj: T) {
1393        let mut node = self;
1394        for key in keys {
1395            node = node.items.entry(key.clone()).or_default();
1396        }
1397        if let Some(value) = &mut node.value {
1398            value.push(obj);
1399        } else {
1400            node.value = Some(vec![obj]);
1401        }
1402    }
1403
1404    /// Looks up a key.
1405    ///
1406    /// # Arguments
1407    /// * `key` - The key to look up.
1408    ///
1409    /// # Returns
1410    /// Optional reference to the sub-trie.
1411    pub fn lookup(&self, key: &str) -> Option<&Trie<T>> {
1412        self.items.get(key)
1413    }
1414
1415    /// Checks if trie is empty.
1416    ///
1417    /// # Returns
1418    /// True if no value and no items.
1419    ///
1420    /// # Examples
1421    ///
1422    /// ```
1423    /// use gf_core::Trie;
1424    /// let trie: Trie<i32> = Trie::new();
1425    /// assert!(trie.is_empty());
1426    /// ```
1427    pub fn is_empty(&self) -> bool {
1428        self.value.is_none() && self.items.is_empty()
1429    }
1430}
1431
1432////////////////////////////////////////////////////////////////////////////////
1433/// Chart for parsing.
1434///
1435/// Manages active and passive items during chart parsing.
1436#[derive(Debug, Clone)]
1437pub struct Chart {
1438    /// Active items by FId and label.
1439    active: HashMap<FId, HashMap<i32, Vec<ActiveItem>>>,
1440    /// Active items by offset.
1441    actives: Vec<HashMap<FId, HashMap<i32, Vec<ActiveItem>>>>,
1442    /// Passive FIds by key.
1443    passive: HashMap<String, FId>,
1444    /// Forest of productions.
1445    pub forest: HashMap<FId, Vec<Production>>,
1446    /// Next ID.
1447    pub next_id: FId,
1448    /// Current offset.
1449    pub offset: usize,
1450}
1451
1452impl Chart {
1453    /// Creates a new chart from concrete.
1454    ///
1455    /// Initializes the forest with productions from the concrete grammar.
1456    ///
1457    /// # Arguments
1458    /// * `concrete` - The concrete grammar.
1459    pub fn new(concrete: GFConcrete) -> Self {
1460        let mut forest = HashMap::new();
1461        for (&fid, prods) in &concrete.pproductions {
1462            forest.insert(fid, prods.clone());
1463        }
1464        Chart {
1465            active: HashMap::new(),
1466            actives: vec![],
1467            passive: HashMap::new(),
1468            forest,
1469            next_id: concrete.total_fids,
1470            offset: 0,
1471        }
1472    }
1473
1474    /// Looks up active items.
1475    ///
1476    /// # Arguments
1477    /// * `fid` - FId.
1478    /// * `label` - Label.
1479    ///
1480    /// # Returns
1481    /// Optional reference to vector of active items.
1482    pub fn lookup_ac(&self, fid: FId, label: i32) -> Option<&Vec<ActiveItem>> {
1483        self.active.get(&fid).and_then(|m| m.get(&label))
1484    }
1485
1486    /// Looks up active items by offset.
1487    ///
1488    /// # Arguments
1489    /// * `offset` - Offset.
1490    /// * `fid` - FId.
1491    /// * `label` - Label.
1492    ///
1493    /// # Returns
1494    /// Optional reference to vector of active items.
1495    pub fn lookup_aco(
1496        &self,
1497        offset: usize,
1498        fid: FId,
1499        label: i32,
1500    ) -> Option<&Vec<ActiveItem>> {
1501        if offset == self.offset {
1502            self.lookup_ac(fid, label)
1503        } else {
1504            self.actives.get(offset)?.get(&fid).and_then(|m| m.get(&label))
1505        }
1506    }
1507
1508    /// Gets labels for active FId.
1509    ///
1510    /// # Arguments
1511    /// * `fid` - FId.
1512    ///
1513    /// # Returns
1514    /// Optional vector of labels.
1515    pub fn labels_ac(&self, fid: FId) -> Option<Vec<i32>> {
1516        self.active.get(&fid).map(|m| m.keys().cloned().collect())
1517    }
1518
1519    /// Inserts active items.
1520    ///
1521    /// # Arguments
1522    /// * `fid` - FId.
1523    /// * `label` - Label.
1524    /// * `items` - Vector of active items.
1525    pub fn insert_ac(&mut self, fid: FId, label: i32, items: Vec<ActiveItem>) {
1526        self.active.entry(fid).or_default().insert(label, items);
1527    }
1528
1529    /// Looks up passive FId.
1530    ///
1531    /// # Arguments
1532    /// * `fid` - FId.
1533    /// * `label` - Label.
1534    /// * `offset` - Offset.
1535    ///
1536    /// # Returns
1537    /// Optional FId.
1538    pub fn lookup_pc(
1539        &self,
1540        fid: FId,
1541        label: i32,
1542        offset: usize,
1543    ) -> Option<FId> {
1544        let key = format!("{fid}.{label}-{offset}");
1545        self.passive.get(&key).cloned()
1546    }
1547
1548    /// Inserts passive FId.
1549    ///
1550    /// # Arguments
1551    /// * `fid` - FId.
1552    /// * `label` - Label.
1553    /// * `offset` - Offset.
1554    /// * `fid2` - FId to insert.
1555    pub fn insert_pc(
1556        &mut self,
1557        fid: FId,
1558        label: i32,
1559        offset: usize,
1560        fid2: FId,
1561    ) {
1562        let key = format!("{fid}.{label}-{offset}");
1563        self.passive.insert(key, fid2);
1564    }
1565
1566    /// Shifts the chart to next offset.
1567    ///
1568    /// Moves current active to actives and clears current active and passive.
1569    pub fn shift(&mut self) {
1570        self.actives.push(self.active.clone());
1571        self.active.clear();
1572        self.passive.clear();
1573        self.offset += 1;
1574    }
1575
1576    /// Expands forest for FId.
1577    ///
1578    /// Recursively expands coercions to get all applicable productions.
1579    ///
1580    /// # Arguments
1581    /// * `fid` - FId.
1582    ///
1583    /// # Returns
1584    /// Vector of productions.
1585    pub fn expand_forest(&self, fid: FId) -> Vec<Production> {
1586        let mut rules = Vec::new();
1587
1588        fn go(
1589            forest: &HashMap<FId, Vec<Production>>,
1590            fid: FId,
1591            rules: &mut Vec<Production>,
1592        ) {
1593            if let Some(prods) = forest.get(&fid) {
1594                for prod in prods {
1595                    match prod {
1596                        Production::Apply(apply) => {
1597                            rules.push(Production::Apply(apply.clone()))
1598                        }
1599                        Production::Coerce(coerce) => {
1600                            go(forest, coerce.arg, rules)
1601                        }
1602                        Production::Const(const_) => {
1603                            rules.push(Production::Const(const_.clone()))
1604                        }
1605                    }
1606                }
1607            }
1608        }
1609
1610        go(&self.forest, fid, &mut rules);
1611        rules
1612    }
1613}
1614
1615////////////////////////////////////////////////////////////////////////////////
1616/// Parsing state.
1617///
1618/// Maintains the current state of parsing, including the trie of items and the chart.
1619#[derive(Debug, Clone)]
1620pub struct ParseState {
1621    /// Concrete grammar.
1622    concrete: GFConcrete,
1623    /// Start category.
1624    start_cat: String,
1625    /// Trie of active items.
1626    items: Trie<ActiveItem>,
1627    /// Chart.
1628    chart: Chart,
1629}
1630
1631impl ParseState {
1632    /// Processes items with callbacks.
1633    ///
1634    /// Advances the parsing agenda using provided callbacks for literals and tokens.
1635    ///
1636    /// # Arguments
1637    /// * `agenda` - Mutable vector of active items.
1638    /// * `literal_callback` - Callback for literals.
1639    /// * `token_callback` - Callback for tokens.
1640    pub fn process<F, G>(
1641        &mut self,
1642        agenda: &mut Vec<ActiveItem>,
1643        literal_callback: F,
1644        mut token_callback: G,
1645    ) where
1646        F: Fn(FId) -> Option<Const>,
1647        G: FnMut(&[String], ActiveItem),
1648    {
1649        while let Some(item) = agenda.pop() {
1650            if item.dot < item.seq.len() {
1651                let sym = &item.seq[item.dot];
1652                match sym {
1653                    Sym::SymCat { i, label } => {
1654                        let fid = item.args[*i].fid;
1655                        
1656                        // Follow TypeScript implementation logic exactly
1657                        if let Some(items) = self.chart.lookup_ac(fid, *label as i32) {
1658                            // There are already waiting items for this SymCat
1659                            if !items.contains(&item) {
1660                                let mut items = items.clone();
1661                                items.push(item.clone());
1662                                self.chart.insert_ac(fid, *label as i32, items);
1663                                
1664                                // Check if there's already a completion available
1665                                if let Some(fid2) = self.chart.lookup_pc(fid, *label as i32, self.chart.offset) {
1666                                    // There's already a completion - advance immediately
1667                                    agenda.push(item.shift_over_arg(*i, fid2));
1668                                }
1669                            }
1670                        } else {
1671                            // No existing waiting items - create new parsing items AND add current item as waiting
1672                            let rules = self.chart.expand_forest(fid);
1673                            for rule in rules {
1674                                if let Production::Apply(apply) = rule {
1675                                    match apply.to_apply_fun() {
1676                                        ApplyFun::CncFun(json_fun) => {
1677                                            for &lin_idx in &json_fun.lins {
1678                                                if (lin_idx as usize) < self.concrete.functions.len() {
1679                                                    let runtime_fun = self.concrete.functions[lin_idx as usize].clone();
1680                                                    match &runtime_fun.lins {
1681                                                        LinType::Sym(lins) => {
1682                                                            for (lbl, lin) in lins.iter().enumerate() {
1683                                                                if lbl == *label {  // Only create items for the specific label
1684                                                                    let new_item = ActiveItem::new(
1685                                                                        self.chart.offset,
1686                                                                        0,
1687                                                                        runtime_fun.clone(),
1688                                                                        lin.clone(),
1689                                                                        apply.args.clone(),
1690                                                                        fid,
1691                                                                        lbl as i32,
1692                                                                    );
1693                                                                    
1694                                                                    // Check for duplicates to prevent infinite recursion
1695                                                                    if !agenda.contains(&new_item) {
1696                                                                        agenda.push(new_item);
1697                                                                    }
1698                                                                }
1699                                                            }
1700                                                        }
1701                                                        LinType::FId(_) => {
1702                                                            let new_item = ActiveItem::new(
1703                                                                self.chart.offset,
1704                                                                0,
1705                                                                runtime_fun.clone(),
1706                                                                vec![],
1707                                                                apply.args.clone(),
1708                                                                fid,
1709                                                                0,
1710                                                            );
1711                                                            
1712                                                            // Check for duplicates to prevent infinite recursion
1713                                                            if !agenda.contains(&new_item) {
1714                                                                agenda.push(new_item);
1715                                                            }
1716                                                        }
1717                                                    }
1718                                                }
1719                                            }
1720                                        }
1721                                        ApplyFun::FId(fun_id) => {
1722                                            if (fun_id as usize) < self.concrete.functions.len() {
1723                                                let runtime_fun = self.concrete.functions[fun_id as usize].clone();
1724                                                match &runtime_fun.lins {
1725                                                    LinType::Sym(lins) => {
1726                                                        for (lbl, lin) in lins.iter().enumerate() {
1727                                                            if lbl == *label {  // Only create items for the specific label
1728                                                                let new_item = ActiveItem::new(
1729                                                                    self.chart.offset,
1730                                                                    0,
1731                                                                    runtime_fun.clone(),
1732                                                                    lin.clone(),
1733                                                                    apply.args.clone(),
1734                                                                    fid,
1735                                                                    lbl as i32,
1736                                                                );
1737                                                                
1738                                                                // Check for duplicates to prevent infinite recursion
1739                                                                if !agenda.contains(&new_item) {
1740                                                                    agenda.push(new_item);
1741                                                                }
1742                                                            }
1743                                                        }
1744                                                    }
1745                                                    LinType::FId(_) => {
1746                                                        let new_item = ActiveItem::new(
1747                                                            self.chart.offset,
1748                                                            0,
1749                                                            runtime_fun.clone(),
1750                                                            vec![],
1751                                                            apply.args.clone(),
1752                                                            fid,
1753                                                            0,
1754                                                        );
1755                                                        
1756                                                        // Check for duplicates to prevent infinite recursion
1757                                                        if !agenda.contains(&new_item) {
1758                                                            agenda.push(new_item);
1759                                                        }
1760                                                    }
1761                                                }
1762                                            }
1763                                        }
1764                                    }
1765                                }
1766                            }
1767                            
1768                            // Store current item as waiting for this SymCat completion
1769                            self.chart.insert_ac(fid, *label as i32, vec![item.clone()]);
1770                        }
1771                    }
1772                    Sym::SymKS(sym) => {
1773                        token_callback(&sym.tokens, item.shift_over_token())
1774                    }
1775                    Sym::SymKP(sym) => {
1776                        let pitem = item.shift_over_token();
1777                        for symks in &sym.tokens {
1778                            token_callback(&symks.tokens, pitem.clone());
1779                        }
1780                        for alt in &sym.alts {
1781                            for symks in &alt.tokens {
1782                                token_callback(&symks.tokens, pitem.clone());
1783                            }
1784                        }
1785                    }
1786                    Sym::SymLit { i, .. } => {
1787                        let fid = item.args[*i].fid;
1788                        if let Some(rules) = self.chart.forest.get(&fid) {
1789                            if let Some(Production::Const(const_rule)) =
1790                                rules.first()
1791                            {
1792                                token_callback(
1793                                    &const_rule.toks,
1794                                    item.shift_over_token(),
1795                                );
1796                            }
1797                        } else if let Some(rule) = literal_callback(fid) {
1798                            let new_fid = self.chart.next_id;
1799                            self.chart.next_id += 1;
1800                            self.chart.forest.insert(
1801                                new_fid,
1802                                vec![Production::Const(rule.clone())],
1803                            );
1804                            token_callback(
1805                                &rule.toks,
1806                                item.shift_over_arg(*i, new_fid),
1807                            );
1808                        }
1809                    }
1810                }
1811            } else {
1812                // Item has completed parsing (dot >= seq.len)
1813                let fid = self
1814                    .chart
1815                    .lookup_pc(item.fid, item.lbl, item.offset)
1816                    .unwrap_or_else(|| {
1817                        let new_fid = self.chart.next_id;
1818                        self.chart.next_id += 1;
1819                        
1820                        self.chart.insert_pc(
1821                            item.fid,
1822                            item.lbl,
1823                            item.offset,
1824                            new_fid,
1825                        );
1826                        
1827                        // Record the completion in the forest
1828                        let apply = Apply {
1829                            fid: None,
1830                            fun: Some(CncFun::new(
1831                                item.fun.name.clone(),
1832                                vec![],
1833                            )),
1834                            args: item.args.clone(),
1835                        };
1836                        self.chart
1837                            .forest
1838                            .insert(new_fid, vec![Production::Apply(apply)]);
1839                        
1840                        new_fid
1841                    });
1842
1843                // Check for any active items that were waiting for this completion
1844                // Use lookup_aco with the item's original offset (like TypeScript)
1845                if let Some(waiting_items) = self.chart.lookup_aco(item.offset, item.fid, item.lbl) {
1846                    for waiting_item in waiting_items.clone() {
1847                        if let Some(sym_cat) = waiting_item.seq.get(waiting_item.dot) {
1848                            if let Sym::SymCat { i, .. } = sym_cat {
1849                                agenda.push(waiting_item.shift_over_arg(*i, fid));
1850                            }
1851                        }
1852                    }
1853                }
1854
1855                // Also check for any labels that might need processing
1856                if let Some(labels) = self.chart.labels_ac(fid) {
1857                    for lbl in labels {
1858                        let seq = match &item.fun.lins {
1859                            LinType::Sym(syms) => {
1860                                if (lbl as usize) < syms.len() {
1861                                    syms[lbl as usize].clone()
1862                                } else {
1863                                    vec![]
1864                                }
1865                            }
1866                            LinType::FId(_) => vec![],
1867                        };
1868                        agenda.push(ActiveItem::new(
1869                            self.chart.offset,
1870                            0,
1871                            item.fun.clone(),
1872                            seq,
1873                            item.args.clone(),
1874                            fid,
1875                            lbl,
1876                        ));
1877                    }
1878                }
1879            }
1880        }
1881    }
1882
1883    /// Creates a new parse state.
1884    ///
1885    /// Initializes the parse state with starting active items based on the start category.
1886    ///
1887    /// # Arguments
1888    /// * `concrete` - The concrete grammar.
1889    /// * `start_cat` - The start category.
1890    /// There are five inference rules: 
1891    ///     Pre-Predict, Pre-Combine,
1892    ///     Mark-Predict, Mark-Combine and Convert.
1893    pub fn new(concrete: GFConcrete, start_cat: String) -> Self {
1894        let mut items = Trie::new();
1895        let chart = Chart::new(concrete.clone());
1896
1897        let mut active_items = Vec::new();
1898
1899        debug_println!("ParseState::new: Looking for start category '{}'", start_cat);
1900        debug_println!("Available start categories: {:?}", concrete.start_cats.keys().collect::<Vec<_>>());
1901        
1902        if let Some((start, end)) = concrete.start_cats.get(&start_cat) {
1903            debug_println!("Found start category '{}' with FID range: {} to {}", start_cat, start, end);
1904            for fid in *start..=*end {
1905                let rules = chart.expand_forest(fid);
1906                debug_println!("  FID {}: found {} rules", fid, rules.len());
1907                for rule in rules {
1908                    if let Production::Apply(apply) = rule {
1909                        match apply.to_apply_fun() {
1910                            ApplyFun::CncFun(json_fun) => {
1911                                // Find the actual RuntimeCncFun that corresponds to this CncFun
1912                                // The json_fun.lins contains indices into the concrete.functions array
1913                                for &lin_idx in &json_fun.lins {
1914                                    if (lin_idx as usize) < concrete.functions.len() {
1915                                        let runtime_fun = concrete.functions[lin_idx as usize].clone();
1916                                        match &runtime_fun.lins {
1917                                            LinType::Sym(lins) => {
1918                                                for (lbl, lin) in lins.iter().enumerate() {
1919                                                    active_items.push(ActiveItem::new(
1920                                                        0,
1921                                                        0,
1922                                                        runtime_fun.clone(),
1923                                                        lin.clone(),
1924                                                        apply.args.clone(),
1925                                                        fid,
1926                                                        lbl as i32,
1927                                                    ));
1928                                                }
1929                                            }
1930                                            LinType::FId(_) => {
1931                                                active_items.push(ActiveItem::new(
1932                                                    0,
1933                                                    0,
1934                                                    runtime_fun.clone(),
1935                                                    vec![],
1936                                                    apply.args.clone(),
1937                                                    fid,
1938                                                    0,
1939                                                ));
1940                                            }
1941                                        }
1942                                    }
1943                                }
1944                            }
1945                            ApplyFun::FId(fun_id) => {
1946                                if (fun_id as usize) < concrete.functions.len()
1947                                {
1948                                    let runtime_fun = concrete.functions
1949                                        [fun_id as usize]
1950                                        .clone();
1951                                    match &runtime_fun.lins {
1952                                        LinType::Sym(lins) => {
1953                                            for (lbl, lin) in
1954                                                lins.iter().enumerate()
1955                                            {
1956                                                active_items.push(
1957                                                    ActiveItem::new(
1958                                                        0,
1959                                                        0,
1960                                                        runtime_fun.clone(),
1961                                                        lin.clone(),
1962                                                        apply.args.clone(),
1963                                                        fid,
1964                                                        lbl as i32,
1965                                                    ),
1966                                                );
1967                                            }
1968                                        }
1969                                        LinType::FId(_) => {
1970                                            active_items.push(
1971                                                ActiveItem::new(
1972                                                    0,
1973                                                    0,
1974                                                    runtime_fun,
1975                                                    vec![],
1976                                                    apply.args.clone(),
1977                                                    fid,
1978                                                    0,
1979                                                ),
1980                                            );
1981                                        }
1982                                    }
1983                                }
1984                            }
1985                        }
1986                    }
1987                }
1988            }
1989        }
1990
1991        debug_println!("ParseState::new: Created {} initial active items", active_items.len());
1992        for (i, item) in active_items.iter().enumerate() {
1993            debug_println!("  Item {}: fun={}, seq_len={}, fid={}, lbl={}", 
1994                i, item.fun.name, item.seq.len(), item.fid, item.lbl);
1995            if !item.seq.is_empty() {
1996                debug_println!("    First symbol: {:?}", item.seq[0]);
1997            }
1998        }
1999        
2000        items.insert_chain(&[], active_items);
2001
2002        ParseState { concrete, start_cat, items, chart }
2003    }
2004
2005    /// Advances parsing with next token.
2006    ///
2007    /// # Arguments
2008    /// * `token` - The next token.
2009    ///
2010    /// # Returns
2011    /// True if parsing can continue, false if failed.
2012    pub fn next(&mut self, token: &str) -> bool {
2013        let mut acc =
2014            self.items.lookup(token).cloned().unwrap_or_else(Trie::new);
2015
2016        let mut agenda = self.items.value.clone().unwrap_or_default();
2017
2018        let mut completed_items = Vec::new();
2019
2020        self.process(
2021            &mut agenda,
2022            |fid| match fid {
2023                -1 => Some(Const::new(
2024                    Fun::new(format!("\"{token}\""), vec![]),
2025                    vec![token.to_string()],
2026                )),
2027                -2 if token.parse::<i32>().is_ok() => Some(Const::new(
2028                    Fun::new(token.to_string(), vec![]),
2029                    vec![token.to_string()],
2030                )),
2031                -3 if token.parse::<f64>().is_ok() => Some(Const::new(
2032                    Fun::new(token.to_string(), vec![]),
2033                    vec![token.to_string()],
2034                )),
2035                _ => None,
2036            },
2037            |tokens, item| {
2038                if tokens.first() == Some(&token.to_string()) {
2039                    let tokens1 = tokens[1..].to_vec();
2040                    
2041                    // Check if this item is now complete (dot >= seq.len)
2042                    if item.dot >= item.seq.len() {
2043                        completed_items.push(item.clone());
2044                    }
2045                    
2046                    acc.insert_chain1(&tokens1, item);
2047                }
2048            },
2049        );
2050
2051        // Process any completed items  
2052        if !completed_items.is_empty() {
2053            agenda.extend(completed_items);
2054            self.process(
2055                &mut agenda,
2056                |_| None, // No literal callback needed
2057                |_, _| {}, // No token callback needed
2058            );
2059        }
2060
2061        self.items = acc;
2062        self.chart.shift();
2063
2064        !self.items.is_empty()
2065    }
2066
2067    /// Gets completions for partial token.
2068    ///
2069    /// # Arguments
2070    /// * `current_token` - The partial current token.
2071    ///
2072    /// # Returns
2073    /// CompletionAccumulator with possible completions.
2074    pub fn complete(&self, current_token: &str) -> CompletionAccumulator {
2075        let mut acc = self
2076            .items
2077            .lookup(current_token)
2078            .cloned()
2079            .unwrap_or_else(Trie::new);
2080
2081        let mut agenda = self.items.value.clone().unwrap_or_default();
2082
2083        let mut clone_self = self.clone();
2084        clone_self.process(
2085            &mut agenda,
2086            |_| None,
2087            |tokens, item| {
2088                if current_token.is_empty()
2089                    || tokens
2090                        .first()
2091                        .is_some_and(|t| t.starts_with(current_token))
2092                {
2093                    let tokens1 = tokens[1..].to_vec();
2094                    acc.insert_chain1(&tokens1, item);
2095                }
2096            },
2097        );
2098
2099        CompletionAccumulator { value: acc.value }
2100    }
2101
2102    /// Extracts parsed trees.
2103    ///
2104    /// Reconstructs abstract trees from the chart after parsing.
2105    ///
2106    /// # Returns
2107    /// Vector of unique parsed trees.
2108    pub fn extract_trees(&self) -> Vec<Fun> {
2109        let total_fids = self.concrete.total_fids;
2110        let forest = &self.chart.forest;
2111
2112        fn go(
2113            fid: FId,
2114            total_fids: FId,
2115            forest: &HashMap<FId, Vec<Production>>,
2116        ) -> Vec<Fun> {
2117            if fid < total_fids {
2118                vec![Fun::new("?".to_string(), vec![])]
2119            } else if let Some(rules) = forest.get(&fid) {
2120                let mut trees = Vec::new();
2121                for rule in rules {
2122                    match rule {
2123                        Production::Const(c) => trees.push(c.lit.clone()),
2124                        Production::Apply(a) => {
2125                            let arg_trees: Vec<Vec<Fun>> = a
2126                                .args
2127                                .iter()
2128                                .map(|arg| go(arg.fid, total_fids, forest))
2129                                .collect();
2130                            let mut indices = vec![0; a.args.len()];
2131                            loop {
2132                                let mut t = Fun::new(a.get_name(), vec![]);
2133                                for (k, idx) in indices.iter().enumerate() {
2134                                    t.args.push(arg_trees[k][*idx].clone());
2135                                }
2136                                trees.push(t);
2137
2138                                let mut carry = true;
2139                                for i in 0..indices.len() {
2140                                    if carry {
2141                                        indices[i] += 1;
2142                                        if indices[i] < arg_trees[i].len() {
2143                                            carry = false;
2144                                        } else {
2145                                            indices[i] = 0;
2146                                        }
2147                                    }
2148                                }
2149                                if carry {
2150                                    break;
2151                                }
2152                            }
2153                        }
2154                        _ => {}
2155                    }
2156                }
2157                trees
2158            } else {
2159                vec![]
2160            }
2161        }
2162
2163        let mut trees = Vec::new();
2164
2165        if let Some((start, end)) =
2166            self.concrete.start_cats.get(&self.start_cat)
2167        {
2168            
2169            for fid0 in *start..=*end {
2170                let rules = self.chart.expand_forest(fid0);
2171                
2172                let mut labels = vec![];
2173                for rule in &rules {
2174                    if let Production::Apply(a) = rule {
2175                        match a.to_apply_fun() {
2176                            ApplyFun::CncFun(fun) => {
2177                                // JSON CncFun has Vec<i32> lins, each one is an index
2178                                labels.extend(0..fun.lins.len() as i32);
2179                            }
2180                            ApplyFun::FId(fun_id) => {
2181                                if (fun_id as usize)
2182                                    < self.concrete.functions.len()
2183                                {
2184                                    let runtime_fun = &self.concrete.functions
2185                                        [fun_id as usize];
2186                                    match &runtime_fun.lins {
2187                                        LinType::Sym(lins) => {
2188                                            labels
2189                                                .extend(0..lins.len() as i32);
2190                                        }
2191                                        LinType::FId(indices) => {
2192                                            labels.extend(
2193                                                0..indices.len() as i32,
2194                                            );
2195                                        }
2196                                    }
2197                                }
2198                            }
2199                        }
2200                    }
2201                }
2202
2203                for lbl in labels {
2204                    let mut found = false;
2205                    
2206                    // Try different offsets since shift() clears the passive chart
2207                    for try_offset in 0..=self.chart.offset {
2208                        if let Some(fid) = self.chart.lookup_pc(fid0, lbl, try_offset) {
2209                            let arg_trees = go(fid, total_fids, forest);
2210                            for tree in arg_trees {
2211                                if !trees.contains(&tree) {
2212                                    trees.push(tree);
2213                                }
2214                            }
2215                            found = true;
2216                            break;
2217                        }
2218                    }
2219                    
2220                    if !found {
2221                        // As a fallback, look directly in the forest for any completions
2222                        // Since shift() clears the passive chart, we need to find completions in the forest
2223                        for (&forest_fid, productions) in forest.iter() {
2224                            if forest_fid >= total_fids {
2225                                for production in productions {
2226                                    if let Production::Apply(apply) = production {
2227                                        let name = apply.get_name();
2228                                        
2229                                        // Accept any completion that looks like a valid function name
2230                                        if !name.is_empty() && !name.chars().all(|c| c.is_ascii_digit()) {
2231                                            let arg_trees = go(forest_fid, total_fids, forest);
2232                                            for tree in arg_trees {
2233                                                if !trees.contains(&tree) {
2234                                                    trees.push(tree);
2235                                                }
2236                                            }
2237                                        }
2238                                    }
2239                                }
2240                            }
2241                        }
2242                    }
2243                }
2244            }
2245        }
2246
2247        trees
2248    }
2249}
2250
2251////////////////////////////////////////////////////////////////////////////////
2252/// Runtime concrete function with actual symbol sequences for linearization.
2253#[derive(Debug, Clone, PartialEq)]
2254pub struct RuntimeCncFun {
2255    /// Function name.
2256    pub name: String,
2257    /// Linearization sequences (converted from indices to actual symbols).
2258    pub lins: LinType,
2259}
2260
2261impl RuntimeCncFun {
2262    /// Creates a new runtime concrete function.
2263    ///
2264    /// # Arguments
2265    /// * `name` - Function name.
2266    /// * `lins` - Linearization type.
2267    ///
2268    /// # Examples
2269    ///
2270    /// ```
2271    /// use gf_core::{RuntimeCncFun, LinType};
2272    /// let fun = RuntimeCncFun::new("Test".to_string(), LinType::Sym(vec![]));
2273    /// assert_eq!(fun.name, "Test");
2274    /// ```
2275    pub fn new(name: String, lins: LinType) -> Self {
2276        RuntimeCncFun { name, lins }
2277    }
2278}
2279
2280////////////////////////////////////////////////////////////////////////////////
2281/// Label production.
2282///
2283/// Associates a function with an FId for label-based lookup.
2284#[derive(Debug, Clone)]
2285struct LProduction {
2286    fid: FId,
2287    fun: RuntimeCncFun,
2288}
2289
2290////////////////////////////////////////////////////////////////////////////////
2291/// Tagged string for linearization.
2292///
2293/// Pairs a token with a tag for tracking origins in linearized output.
2294#[derive(Debug, Clone)]
2295pub struct TaggedString {
2296    /// Token.
2297    pub token: String,
2298    /// Tag.
2299    pub tag: String,
2300}
2301
2302impl TaggedString {
2303    /// Creates a new tagged string.
2304    ///
2305    /// # Arguments
2306    /// * `token` - The token.
2307    /// * `tag` - The tag.
2308    ///
2309    /// # Examples
2310    ///
2311    /// ```
2312    /// use gf_core::TaggedString;
2313    /// let ts = TaggedString::new("hello", "0");
2314    /// assert_eq!(ts.token, "hello");
2315    /// assert_eq!(ts.tag, "0");
2316    /// ```
2317    pub fn new(token: &str, tag: &str) -> Self {
2318        TaggedString { token: token.to_string(), tag: tag.to_string() }
2319    }
2320}
2321
2322////////////////////////////////////////////////////////////////////////////////
2323/// Linearized symbol row.
2324///
2325/// Represents a row in the linearization table with FId and symbol tables.
2326#[derive(Debug, Clone)]
2327pub struct LinearizedSym {
2328    /// FId.
2329    pub fid: i32,
2330    /// Table of symbols.
2331    pub table: Vec<Vec<Sym>>,
2332}
2333
2334////////////////////////////////////////////////////////////////////////////////
2335/// Active item for parsing.
2336///
2337/// Represents an active parsing item in the chart, with position and components.
2338#[derive(Debug, Clone, PartialEq)]
2339pub struct ActiveItem {
2340    /// Offset.
2341    pub offset: usize,
2342    /// Dot position.
2343    pub dot: usize,
2344    /// Function.
2345    pub fun: RuntimeCncFun,
2346    /// Sequence.
2347    pub seq: Vec<Sym>,
2348    /// Arguments.
2349    pub args: Vec<PArg>,
2350    /// FId.
2351    pub fid: FId,
2352    /// Label.
2353    pub lbl: i32,
2354}
2355
2356impl ActiveItem {
2357    /// Creates a new active item.
2358    ///
2359    /// # Arguments
2360    /// * `offset` - Offset.
2361    /// * `dot` - Dot position.
2362    /// * `fun` - Function.
2363    /// * `seq` - Sequence.
2364    /// * `args` - Arguments.
2365    /// * `fid` - FId.
2366    /// * `lbl` - Label.
2367    ///
2368    /// # Examples
2369    ///
2370    /// ```
2371    /// use gf_core::{ActiveItem, RuntimeCncFun, LinType, PArg, Sym};
2372    /// let fun = RuntimeCncFun::new("Test".to_string(), LinType::Sym(vec![]));
2373    /// let item = ActiveItem::new(0, 0, fun, vec![], vec![], 1, 0);
2374    /// assert_eq!(item.offset, 0);
2375    /// ```
2376    pub fn new(
2377        offset: usize,
2378        dot: usize,
2379        fun: RuntimeCncFun,
2380        seq: Vec<Sym>,
2381        args: Vec<PArg>,
2382        fid: FId,
2383        lbl: i32,
2384    ) -> Self {
2385        ActiveItem { offset, dot, fun, seq, args, fid, lbl }
2386    }
2387
2388    /// Checks equality.
2389    ///
2390    /// Compares all fields for equality.
2391    ///
2392    /// # Arguments
2393    /// * `other` - The other item.
2394    ///
2395    /// # Returns
2396    /// True if equal.
2397    pub fn is_equal(&self, other: &ActiveItem) -> bool {
2398        self.offset == other.offset
2399            && self.dot == other.dot
2400            && self.fun.name == other.fun.name
2401            && self.seq == other.seq
2402            && self.args == other.args
2403            && self.fid == other.fid
2404            && self.lbl == other.lbl
2405    }
2406
2407    /// Shifts over argument.
2408    ///
2409    /// Advances the dot and updates argument fid.
2410    ///
2411    /// # Arguments
2412    /// * `i` - Argument index.
2413    /// * `fid` - New fid.
2414    ///
2415    /// # Returns
2416    /// Updated active item.
2417    pub fn shift_over_arg(&self, i: usize, fid: FId) -> ActiveItem {
2418        let mut args = self.args.clone();
2419        args[i].fid = fid;
2420        ActiveItem {
2421            offset: self.offset,
2422            dot: self.dot + 1,
2423            fun: self.fun.clone(),
2424            seq: self.seq.clone(),
2425            args,
2426            fid: self.fid,
2427            lbl: self.lbl,
2428        }
2429    }
2430
2431    /// Shifts over token.
2432    ///
2433    /// Advances the dot position.
2434    ///
2435    /// # Returns
2436    /// Updated active item.
2437    pub fn shift_over_token(&self) -> ActiveItem {
2438        ActiveItem {
2439            offset: self.offset,
2440            dot: self.dot + 1,
2441            fun: self.fun.clone(),
2442            seq: self.seq.clone(),
2443            args: self.args.clone(),
2444            fid: self.fid,
2445            lbl: self.lbl,
2446        }
2447    }
2448}
2449
2450////////////////////////////////////////////////////////////////////////////////
2451/// Utility to check if value is undefined.
2452///
2453/// # Arguments
2454/// * `value` - The optional value.
2455///
2456/// # Returns
2457/// True if None.
2458///
2459/// # Examples
2460///
2461/// ```
2462/// use gf_core::is_undefined;
2463/// assert!(is_undefined::<i32>(&None));
2464/// assert!(!is_undefined(&Some(42)));
2465/// ```
2466pub fn is_undefined<T>(value: &Option<T>) -> bool {
2467    value.is_none()
2468}
2469
2470////////////////////////////////////////////////////////////////////////////////
2471/// Maps a hashmap with a function.
2472///
2473/// Applies a function to each value in the hashmap.
2474///
2475/// # Arguments
2476/// * `obj` - The hashmap.
2477/// * `fun` - The function to apply.
2478///
2479/// # Returns
2480/// New hashmap with transformed values.
2481///
2482/// # Examples
2483///
2484/// ```
2485/// use gf_core::map_object;
2486/// use std::collections::HashMap;
2487/// let mut map: HashMap<String, i32> = HashMap::new();
2488/// map.insert("a".to_string(), 1);
2489/// let new_map = map_object(&map, |&v| v * 2);
2490/// assert_eq!(new_map.get("a"), Some(&2));
2491/// ```
2492pub fn map_object<K: Eq + std::hash::Hash + Clone, V, F: Fn(&V) -> U, U>(
2493    obj: &HashMap<K, V>,
2494    fun: F,
2495) -> HashMap<K, U> {
2496    obj.iter().map(|(k, v)| (k.clone(), fun(v))).collect()
2497}
2498
2499impl Type {
2500    /// Creates a new type.
2501    ///
2502    /// # Arguments
2503    /// * `args` - Argument categories.
2504    /// * `cat` - Result category.
2505    ///
2506    /// # Examples
2507    ///
2508    /// ```
2509    /// use gf_core::Type;
2510    /// let typ = Type::new(vec!["A".to_string()], "B".to_string());
2511    /// assert_eq!(typ.args, vec!["A".to_string()]);
2512    /// assert_eq!(typ.cat, "B");
2513    /// ```
2514    pub fn new(args: Vec<String>, cat: String) -> Self {
2515        Type { args, cat }
2516    }
2517}
2518
2519impl Apply {
2520    /// Shows the apply rule as string.
2521    ///
2522    /// # Arguments
2523    /// * `cat` - Category.
2524    ///
2525    /// # Returns
2526    /// String representation.
2527    pub fn show(&self, cat: &str) -> String {
2528        format!("{} -> {} [{:?}]", cat, self.get_name(), self.args)
2529    }
2530
2531    /// Checks equality with another apply.
2532    ///
2533    /// # Arguments
2534    /// * `obj` - Other Apply.
2535    ///
2536    /// # Returns
2537    /// True if equal.
2538    pub fn is_equal(&self, obj: &Apply) -> bool {
2539        self.fun == obj.fun && self.args == obj.args
2540    }
2541}
2542
2543impl Coerce {
2544    /// Shows the coerce rule as string.
2545    ///
2546    /// # Arguments
2547    /// * `cat` - Category.
2548    ///
2549    /// # Returns
2550    /// String representation.
2551    pub fn show(&self, cat: &str) -> String {
2552        format!("{} -> _ [{}]", cat, self.arg)
2553    }
2554}
2555
2556impl PArg {
2557    /// Creates a new PArg.
2558    ///
2559    /// # Arguments
2560    /// * `type_` - Type.
2561    /// * `hypos` - Hypos.
2562    /// * `fid` - FId.
2563    ///
2564    /// # Examples
2565    ///
2566    /// ```
2567    /// use gf_core::PArg;
2568    /// let parg = PArg::new("Type".to_string(), vec![1], 2);
2569    /// assert_eq!(parg.type_, "Type");
2570    /// ```
2571    pub fn new(type_: String, hypos: Vec<FId>, fid: FId) -> Self {
2572        PArg { type_, hypos, fid }
2573    }
2574}
2575
2576impl Const {
2577    /// Shows the const rule as string.
2578    ///
2579    /// # Arguments
2580    /// * `cat` - Category.
2581    ///
2582    /// # Returns
2583    /// String representation.
2584    pub fn show(&self, cat: &str) -> String {
2585        format!("{} -> {}", cat, self.lit.print())
2586    }
2587
2588    /// Checks equality with another const.
2589    ///
2590    /// # Arguments
2591    /// * `obj` - Other Const.
2592    ///
2593    /// # Returns
2594    /// True if equal.
2595    pub fn is_equal(&self, obj: &Const) -> bool {
2596        self.lit.is_equal(&obj.lit) && self.toks == obj.toks
2597    }
2598}
2599
2600#[cfg(test)]
2601mod tests {
2602    use super::*;
2603    use std::fs;
2604
2605    /// Test creating a GFGrammar.
2606    #[test]
2607    fn test_gfgrammar_new() {
2608        let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
2609        let concretes = HashMap::new();
2610        let grammar = GFGrammar::new(abstract_, concretes);
2611        assert_eq!(grammar.abstract_grammar.startcat, "S");
2612    }
2613
2614    /// Test adding type to abstract.
2615    #[test]
2616    fn test_gfabstract_add_type() {
2617        let mut abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
2618        abstract_.add_type(
2619            "MakeS".to_string(),
2620            vec!["NP".to_string(), "VP".to_string()],
2621            "S".to_string(),
2622        );
2623        assert_eq!(abstract_.get_cat("MakeS"), Some(&"S".to_string()));
2624        assert_eq!(
2625            abstract_.get_args("MakeS"),
2626            Some(&vec!["NP".to_string(), "VP".to_string()])
2627        );
2628    }
2629
2630    /// Test tree parsing.
2631    #[test]
2632    fn test_parse_tree() {
2633        let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
2634        let tree = abstract_.parse_tree("MakeS (NP) (VP)", None);
2635        assert!(tree.is_some());
2636        let tree = tree.unwrap();
2637        assert_eq!(tree.name, "MakeS");
2638        assert_eq!(tree.args.len(), 2);
2639    }
2640
2641    /// Test literal handling.
2642    #[test]
2643    fn test_handle_literals() {
2644        let abstract_ = GFAbstract::new("S".to_string(), HashMap::new());
2645        let tree = Fun::new("\"test\"".to_string(), vec![]);
2646        let handled = abstract_.handle_literals(tree, "String");
2647        assert_eq!(handled.name, "String_Literal_\"test\"");
2648    }
2649
2650    /// Test tree equality.
2651    #[test]
2652    fn test_tree_equality() {
2653        let tree1 = Fun::new(
2654            "Test".to_string(),
2655            vec![Fun::new("Arg1".to_string(), vec![])],
2656        );
2657        let tree2 = Fun::new(
2658            "Test".to_string(),
2659            vec![Fun::new("Arg1".to_string(), vec![])],
2660        );
2661        assert!(tree1.is_equal(&tree2));
2662        let tree3 = Fun::new(
2663            "Test".to_string(),
2664            vec![Fun::new("Arg2".to_string(), vec![])],
2665        );
2666        assert!(!tree1.is_equal(&tree3));
2667    }
2668
2669    /// Test importing from JSON.
2670    #[test]
2671    fn test_import_from_json() {
2672        let json_content = fs::read_to_string("grammars/Hello/Hello.json")
2673            .expect("Failed to read Hello.json");
2674        let json: serde_json::Value =
2675            serde_json::from_str(&json_content).expect("Failed to parse JSON");
2676        let pgf: PGF =
2677            serde_json::from_value(json).expect("Failed to deserialize PGF");
2678        let grammar = GFGrammar::from_json(pgf);
2679
2680        // Verify grammar was created successfully
2681        assert_eq!(grammar.abstract_grammar.startcat, "Greeting");
2682        // Note: This test JSON has no concrete grammars
2683    }
2684
2685    /// Test parsing tree.
2686    #[test]
2687    fn test_parse_tree_from_grammar() {
2688        let json_content = fs::read_to_string("grammars/Hello/Hello.json")
2689            .expect("Failed to read Hello.json");
2690        let json: serde_json::Value =
2691            serde_json::from_str(&json_content).expect("Failed to parse JSON");
2692        let pgf: PGF =
2693            serde_json::from_value(json).expect("Failed to deserialize PGF");
2694        let grammar = GFGrammar::from_json(pgf);
2695
2696        let tree = grammar.abstract_grammar.parse_tree("hello world", None);
2697        assert!(tree.is_some(), "Should be able to parse 'hello world'");
2698        let tree = tree.unwrap();
2699        assert_eq!(tree.name, "hello");
2700        assert_eq!(tree.args.len(), 1);
2701        assert_eq!(tree.args[0].name, "world");
2702    }
2703
2704    /// Test English linearization.
2705    #[test]
2706    fn test_linearize_english() {
2707        let json_content = fs::read_to_string("grammars/Food/gf_make_generated.json")
2708            .expect("Failed to read gf_make_generated.json");
2709    
2710        let json: serde_json::Value =
2711            serde_json::from_str(&json_content).expect("Failed to parse JSON");
2712        let pgf: PGF =
2713            serde_json::from_value(json).expect("Failed to deserialize PGF");
2714        let grammar = GFGrammar::from_json(pgf);
2715
2716        // Create the correct abstract syntax tree: Is(This(Fish), Delicious)
2717        let fish = Fun { name: "Fish".to_string(), args: vec![], type_: None };
2718        let this_fish = Fun { name: "This".to_string(), args: vec![fish], type_: None };
2719        let delicious = Fun { name: "Delicious".to_string(), args: vec![], type_: None };
2720        let tree = Fun { name: "Is".to_string(), args: vec![this_fish, delicious], type_: None };
2721        let linearized = grammar.concretes["FoodEng"].linearize(&tree);
2722        assert_eq!(linearized, "this fish is delicious");
2723    }
2724
2725    /// Test Italian linearization.
2726    #[test]
2727    #[ignore] // Disabled: test JSON has no concrete grammars
2728    fn test_linearize_italian() {
2729        let json_content = fs::read_to_string("grammars/Food/gf_make_generated.json")
2730            .expect("Failed to read gf_make_generated.json");
2731        let json: serde_json::Value =
2732            serde_json::from_str(&json_content).expect("Failed to parse JSON");
2733        let pgf: PGF =
2734            serde_json::from_value(json).expect("Failed to deserialize PGF");
2735        let grammar = GFGrammar::from_json(pgf);
2736
2737        let tree = grammar
2738            .abstract_grammar
2739            .parse_tree("this fish is delicious", None)
2740            .expect("Failed to parse tree");
2741        let linearized = grammar.concretes["FoodEng"].linearize(&tree);
2742        assert_eq!(linearized, "this fish is delicious");
2743    }
2744}