gf_core/
pgf_json.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3use std::fmt;
4
5#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
6pub struct Fun {
7    pub name: String,
8    pub args: Vec<Fun>,
9    pub type_: Option<String>,
10}
11
12#[derive(Serialize, Deserialize, Debug)]
13pub struct PGF {
14    #[serde(rename = "abstract")]
15    pub abstract_: Abstract,
16    pub concretes: HashMap<String, Concrete>,
17}
18
19#[derive(Serialize, Deserialize, Debug)]
20pub struct Abstract {
21    pub name: String,
22    pub startcat: String,
23    pub funs: HashMap<String, AbsFun>,
24}
25
26#[derive(Serialize, Deserialize, Debug)]
27pub struct Concrete {
28    pub flags: HashMap<String, String>,
29    pub productions: HashMap<i32, Vec<Production>>,
30    pub functions: Vec<CncFun>,
31    pub sequences: Vec<Vec<Sym>>,
32    pub categories: HashMap<String, Category>,
33    pub totalfids: i32,
34}
35/// The Abstract function name for a constituent in a parse tree.
36#[derive(Serialize, Deserialize, Debug)]
37pub struct AbsFun {
38    pub args: Vec<String>,
39    pub cat: String,
40}
41
42#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
43#[serde(tag = "type")]
44pub enum Production {
45    Apply(Apply),
46    Coerce(Coerce),
47    Const(Const),
48}
49
50#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
51pub struct Apply {
52    #[serde(default)]
53    pub fid: Option<i32>,
54    #[serde(default)]
55    pub fun: Option<CncFun>,
56    pub args: Vec<PArg>,
57}
58
59#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
60pub enum ApplyFun {
61    FId(i32),
62    CncFun(CncFun),
63}
64
65#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
66pub struct Const {
67    pub lit: Fun,
68    pub toks: Vec<String>,
69}
70
71#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
72pub struct Coerce {
73    pub arg: i32,
74}
75
76#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
77pub struct PArg {
78    #[serde(rename = "type")]
79    pub type_: String,
80    pub hypos: Vec<i32>,
81    pub fid: i32,
82}
83
84#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
85pub struct CncFun {
86    pub name: String,
87    pub lins: Vec<i32>,
88}
89
90#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
91pub enum LinType {
92    FId(Vec<i32>),
93    Sym(Vec<Vec<Sym>>),
94}
95
96#[derive(Serialize, Clone, Debug, PartialEq)]
97pub enum Sym {
98    SymCat { i: usize, label: usize },
99    SymLit { i: usize, label: usize },
100    SymKS(SymKS),
101    SymKP(SymKP),
102}
103
104// Custom deserializer for Sym
105impl<'de> Deserialize<'de> for Sym {
106    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
107    where
108        D: serde::Deserializer<'de>,
109    {
110        use serde_json::Value;
111        let value: Value = Deserialize::deserialize(deserializer)?;
112
113        let type_str = value["type"]
114            .as_str()
115            .ok_or_else(|| serde::de::Error::missing_field("type"))?;
116
117        match type_str {
118            "SymCat" => {
119                let args = value["args"]
120                    .as_array()
121                    .ok_or_else(|| serde::de::Error::missing_field("args"))?;
122                if args.len() != 2 {
123                    return Err(serde::de::Error::custom(
124                        "SymCat args must have 2 elements",
125                    ));
126                }
127                let i = args[0].as_u64().unwrap_or(0) as usize;
128                let label = args[1].as_u64().unwrap_or(0) as usize;
129                Ok(Sym::SymCat { i, label })
130            }
131            "SymLit" => {
132                let args = value["args"]
133                    .as_array()
134                    .ok_or_else(|| serde::de::Error::missing_field("args"))?;
135                if args.len() != 2 {
136                    return Err(serde::de::Error::custom(
137                        "SymLit args must have 2 elements",
138                    ));
139                }
140                let i = args[0].as_u64().unwrap_or(0) as usize;
141                let label = args[1].as_u64().unwrap_or(0) as usize;
142                Ok(Sym::SymLit { i, label })
143            }
144            "SymKS" => {
145                let args = value["args"]
146                    .as_array()
147                    .ok_or_else(|| serde::de::Error::missing_field("args"))?;
148                let tokens: Vec<String> = args
149                    .iter()
150                    .map(|v| v.as_str().unwrap_or("").to_string())
151                    .collect();
152                Ok(Sym::SymKS(SymKS::new(tokens)))
153            }
154            "SymKP" => {
155                // SymKP is more complex - it has nested structure
156                let args = value["args"]
157                    .as_array()
158                    .ok_or_else(|| serde::de::Error::missing_field("args"))?;
159
160                let mut tokens = Vec::new();
161                let mut alts = Vec::new();
162
163                // Parse the nested structure
164                for arg_group in args {
165                    if let Some(arr) = arg_group.as_array() {
166                        for item in arr {
167                            if let Some(obj) = item.as_object() {
168                                if obj.get("type").and_then(|v| v.as_str())
169                                    == Some("SymKS")
170                                {
171                                    if let Some(item_args) = obj
172                                        .get("args")
173                                        .and_then(|v| v.as_array())
174                                    {
175                                        let item_tokens: Vec<String> =
176                                            item_args
177                                                .iter()
178                                                .map(|v| {
179                                                    v.as_str()
180                                                        .unwrap_or("")
181                                                        .to_string()
182                                                })
183                                                .collect();
184                                        tokens.push(SymKS::new(item_tokens));
185                                    }
186                                } else if obj
187                                    .get("type")
188                                    .and_then(|v| v.as_str())
189                                    == Some("Alt")
190                                {
191                                    if let Some(alt_args) = obj
192                                        .get("args")
193                                        .and_then(|v| v.as_array())
194                                    {
195                                        if alt_args.len() >= 2 {
196                                            // First element is tokens array, second is prefixes
197                                            let mut alt_tokens = Vec::new();
198                                            if let Some(tokens_arr) =
199                                                alt_args[0].as_array()
200                                            {
201                                                for token_item in tokens_arr {
202                                                    if let Some(token_obj) =
203                                                        token_item.as_object()
204                                                    {
205                                                        if let Some(
206                                                            token_args,
207                                                        ) = token_obj
208                                                            .get("args")
209                                                            .and_then(|v| {
210                                                                v.as_array()
211                                                            })
212                                                        {
213                                                            let item_tokens: Vec<String> = token_args.iter()
214                                                                .map(|v| v.as_str().unwrap_or("").to_string())
215                                                                .collect();
216                                                            alt_tokens.push(SymKS::new(item_tokens));
217                                                        }
218                                                    }
219                                                }
220                                            }
221                                            let prefixes: Vec<String> =
222                                                alt_args[1]
223                                                    .as_array()
224                                                    .map(|arr| {
225                                                        arr.iter()
226                                                            .map(|v| {
227                                                                v.as_str()
228                                                                    .unwrap_or(
229                                                                        "",
230                                                                    )
231                                                                    .to_string(
232                                                                    )
233                                                            })
234                                                            .collect()
235                                                    })
236                                                    .unwrap_or_default();
237                                            alts.push(Alt::new(
238                                                alt_tokens, prefixes,
239                                            ));
240                                        }
241                                    }
242                                }
243                            }
244                        }
245                    }
246                }
247
248                Ok(Sym::SymKP(SymKP::new(tokens, alts)))
249            }
250            _ => Err(serde::de::Error::custom(format!(
251                "Unknown symbol type: {type_str}"
252            ))),
253        }
254    }
255}
256
257#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
258pub struct SymKS {
259    pub id: String,
260    pub tokens: Vec<String>,
261    pub tag: Option<String>,
262}
263
264#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
265pub struct SymKP {
266    pub id: String,
267    pub tokens: Vec<SymKS>,
268    pub alts: Vec<Alt>,
269    pub tag: Option<String>,
270}
271
272#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
273pub struct Alt {
274    pub tokens: Vec<SymKS>,
275    pub prefixes: Vec<String>,
276}
277
278#[derive(Serialize, Deserialize, Debug, Clone)]
279#[serde(untagged)]
280pub enum SymArg {
281    Number(i32),
282    Text(String),
283    TextVec(Vec<String>),
284    SymVec(Vec<Sym>),
285}
286
287/// A syntactic constituent category in the a parse tree of a sentence.
288#[derive(Serialize, Deserialize, Debug)]
289pub struct Category {
290    pub start: i32,
291    pub end: i32,
292}
293
294impl Fun {
295    /// Creates a new function tree.
296    pub fn new(name: String, args: Vec<Fun>) -> Self {
297        Fun { name, args, type_: None }
298    }
299
300    /// Prints the tree as a string.
301    pub fn print(&self) -> String {
302        self.show(0)
303    }
304
305    /// Shows the tree with precedence.
306    fn show(&self, prec: usize) -> String {
307        if self.is_meta() {
308            if let Some(ref t) = self.type_ {
309                let mut s = format!("?:{t}");
310                if prec > 0 {
311                    s = format!("({s})");
312                }
313                s
314            } else {
315                "?".to_string()
316            }
317        } else {
318            let mut s = self.name.clone();
319            for arg in &self.args {
320                s.push(' ');
321                s.push_str(&arg.show(1));
322            }
323            if prec > 0 && !self.args.is_empty() {
324                s = format!("({s})");
325            }
326            s
327        }
328    }
329
330    /// Gets argument by index.
331    pub fn get_arg(&self, i: usize) -> Option<&Fun> {
332        self.args.get(i)
333    }
334
335    /// Sets argument by index.
336    pub fn set_arg(&mut self, i: usize, c: Fun) {
337        if i < self.args.len() {
338            self.args[i] = c;
339        }
340    }
341
342    /// Checks if this is a meta variable.
343    pub fn is_meta(&self) -> bool {
344        self.name == "?"
345    }
346
347    /// Checks if the tree is complete (no metas).
348    pub fn is_complete(&self) -> bool {
349        if self.is_meta() {
350            false
351        } else {
352            self.args.iter().all(|arg| arg.is_complete())
353        }
354    }
355
356    /// Checks if this is a literal.
357    pub fn is_literal(&self) -> bool {
358        self.name.starts_with('"')
359            || self.name.starts_with('-')
360            || self.name.chars().next().is_some_and(|c| c.is_ascii_digit())
361    }
362
363    /// Checks if this is a string literal.
364    pub fn is_string(&self) -> bool {
365        self.name.starts_with('"') && self.name.ends_with('"')
366    }
367
368    /// Checks if this is an integer literal.
369    pub fn is_int(&self) -> bool {
370        self.name.parse::<i32>().is_ok()
371    }
372
373    /// Checks if this is a float literal.
374    pub fn is_float(&self) -> bool {
375        self.name.parse::<f64>().is_ok()
376            && self.name != "."
377            && self.name != "-."
378    }
379
380    /// Checks equality with another tree.
381    pub fn is_equal(&self, other: &Fun) -> bool {
382        if self.name != other.name || self.args.len() != other.args.len() {
383            return false;
384        }
385        self.args.iter().zip(&other.args).all(|(a, b)| a.is_equal(b))
386    }
387}
388
389impl fmt::Display for Fun {
390    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
391        write!(f, "{}", self.print())
392    }
393}
394
395impl Apply {
396    /// Creates a new apply rule.
397    pub fn new(fun: ApplyFun, args: Vec<PArg>) -> Self {
398        match fun {
399            ApplyFun::FId(id) => Apply { fid: Some(id), fun: None, args },
400            ApplyFun::CncFun(cnc_fun) => {
401                Apply { fid: None, fun: Some(cnc_fun), args }
402            }
403        }
404    }
405
406    /// Converts to runtime ApplyFun.
407    pub fn to_apply_fun(&self) -> ApplyFun {
408        if let Some(fid) = self.fid {
409            ApplyFun::FId(fid)
410        } else if let Some(ref fun) = self.fun {
411            ApplyFun::CncFun(fun.clone())
412        } else {
413            // This should not happen in well-formed data
414            ApplyFun::FId(-1) // Use sentinel value instead of panic
415        }
416    }
417
418    /// Gets the name of the function for debugging.
419    pub fn get_name(&self) -> String {
420        self.to_apply_fun().get_name()
421    }
422}
423
424impl ApplyFun {
425    /// Gets the name of the function.
426    pub fn get_name(&self) -> String {
427        match self {
428            ApplyFun::FId(id) => id.to_string(),
429            ApplyFun::CncFun(fun) => fun.name.clone(),
430        }
431    }
432
433    /// Gets the ID if FId.
434    pub fn get_id(&self) -> i32 {
435        match self {
436            ApplyFun::FId(id) => *id,
437            ApplyFun::CncFun(_) => -1, // CncFun doesn't have an ID, return sentinel
438        }
439    }
440}
441
442impl Const {
443    /// Creates a new const rule.
444    pub fn new(lit: Fun, toks: Vec<String>) -> Self {
445        Const { lit, toks }
446    }
447}
448
449impl Coerce {
450    /// Creates a new coerce rule.
451    pub fn new(arg: i32) -> Self {
452        Coerce { arg }
453    }
454}
455
456impl SymKS {
457    /// Creates a new SymKS.
458    pub fn new(tokens: Vec<String>) -> Self {
459        SymKS { id: "KS".to_string(), tokens, tag: None }
460    }
461
462    /// Shows as string.
463    pub fn show(&self) -> String {
464        format!("\"{:?}\"", self.tokens)
465    }
466
467    /// Tags the symbol.
468    pub fn tag_with(&self, tag: &str) -> SymKS {
469        SymKS {
470            id: self.id.clone(),
471            tokens: self.tokens.clone(),
472            tag: Some(tag.to_string()),
473        }
474    }
475}
476
477impl SymKP {
478    /// Creates a new SymKP.
479    pub fn new(tokens: Vec<SymKS>, alts: Vec<Alt>) -> Self {
480        SymKP { id: "KP".to_string(), tokens, alts, tag: None }
481    }
482
483    /// Shows as string.
484    pub fn show(&self) -> String {
485        format!("\"{:?}\"", self.tokens)
486    }
487
488    /// Tags the phrase.
489    pub fn tag_with(&self, tag: &str) -> SymKP {
490        SymKP {
491            id: self.id.clone(),
492            tokens: self.tokens.clone(),
493            alts: self.alts.clone(),
494            tag: Some(tag.to_string()),
495        }
496    }
497}
498
499impl Alt {
500    /// Creates a new Alt.
501    pub fn new(tokens: Vec<SymKS>, prefixes: Vec<String>) -> Self {
502        Alt { tokens, prefixes }
503    }
504}
505
506impl CncFun {
507    /// Creates a new CncFun.
508    pub fn new(name: String, lins: Vec<i32>) -> Self {
509        CncFun { name, lins }
510    }
511}