doke 0.3.0

The parsing library of `Dokedex`, a game content authoring tool that uses natural-looking language to define a game's objects. This rust crate defines an API to make `doke` parsers by using a simple pipeline syntax, and provides helpers for things like templating, debugging, and common syntax elements.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
// src/parsers/typed_sentences.rs
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};

use glob::glob;
use hashlink::LinkedHashMap;
use thiserror::Error;
use yaml_rust2::Yaml;

use crate::parsers::sentence::SentenceParser;
use crate::{DokeNode, DokeNodeState, DokeParser, GodotValue};

#[derive(Debug, Error)]
pub enum TypedSentencesError {
    #[error("YAML parse error: {0}")]
    YamlParseError(String),

    #[error("Invalid rule configuration: {0}")]
    InvalidRule(String),

    #[error("No matching sentence parser for node")]
    NoMatchingParser,

    #[error("File error: {0}")]
    FileError(String),

    #[error("Glob pattern error: {0}")]
    GlobError(String),
}

#[derive(Debug, Clone)]
pub struct ParserReference {
    pub pattern: String,
    pub base_dir: PathBuf,
}

// src/parsers/typed_sentences.rs
#[derive(Debug, Clone)]
pub enum ChildSpec {
    Simple(Vec<String>), // Old syntax: children: [ItemEffect, DamageEffect]
    Structured(HashMap<String, Vec<String>>), // New syntax: children: {damage_effects: [DamageEffect], other_effects: [ItemEffect]}
}

impl ChildSpec {
    fn allowed(&self, child_abstract_type: &str) -> bool {
        match self {
            ChildSpec::Simple(items) => items.contains(&child_abstract_type.to_string()),
            ChildSpec::Structured(hash_map) => hash_map
                .values()
                .any(|child_types| child_types.contains(&child_abstract_type.to_string())),
        }
    }
}

#[derive(Debug, Clone)]
pub struct TypeRule {
    pub target_type: String,
    pub parser_ref: ParserReference,
    pub priority: i32,
    pub children: ChildSpec, // Changed from allowed_children
    pub sentence_parser: SentenceParser,
}

#[derive(Debug)]
pub struct TypedSentencesParser {
    rules: Vec<TypeRule>,
}

impl TypedSentencesParser {
    pub fn from_config_file(config_path: &Path) -> Result<Self, TypedSentencesError> {
        let config_content = fs::read_to_string(config_path)
            .map_err(|e| TypedSentencesError::FileError(e.to_string()))?;

        let base_dir = config_path.parent().unwrap_or(Path::new(".")).to_path_buf();

        Self::from_config(&config_content, &base_dir)
    }

    pub fn from_config(config: &str, base_dir: &Path) -> Result<Self, TypedSentencesError> {
        let docs = yaml_rust2::YamlLoader::load_from_str(config)
            .map_err(|e| TypedSentencesError::YamlParseError(e.to_string()))?;

        let doc = docs
            .first()
            .ok_or(TypedSentencesError::YamlParseError("Empty YAML".into()))?;

        let mut rules = Vec::new();

        if let Yaml::Hash(root) = doc {
            if let Some(Yaml::Array(rules_array)) = root.get(&Yaml::String("rules".into())) {
                for rule_config in rules_array {
                    if let Yaml::Hash(rule_hash) = rule_config {
                        let rule = Self::parse_rule(rule_hash, base_dir)?;
                        rules.push(rule);
                    }
                }
            }
        }

        // Load the actual sentence parsers from the referenced files
        let mut loaded_rules = Vec::new();
        for rule in rules {
            let sentence_parser =
                Self::load_parser_from_reference(&rule.parser_ref, rule.target_type.clone())?;

            loaded_rules.push(TypeRule {
                sentence_parser,
                target_type: rule.target_type.clone(),
                priority: rule.priority,
                children: ChildSpec::Simple(vec![]),
                parser_ref: rule.parser_ref,
            });
        }

        // Sort by priority (highest first)
        loaded_rules.sort_by(|a, b| b.priority.cmp(&a.priority));

        Ok(Self {
            rules: loaded_rules,
        })
    }

    fn parse_rule(
        rule_hash: &LinkedHashMap<Yaml, Yaml>,
        base_dir: &Path,
    ) -> Result<TypeRule, TypedSentencesError> {
        let mut target_type = None;
        let mut parser_pattern = None;
        let mut priority = 0;
        let mut children = ChildSpec::Simple(Vec::new());

        for (key, value) in rule_hash {
            if let Yaml::String(key_str) = key {
                match key_str.as_str() {
                    "for" => {
                        if let Yaml::String(type_str) = value {
                            target_type = Some(type_str.clone());
                        }
                    }
                    "parser" => {
                        if let Yaml::String(pattern) = value {
                            parser_pattern = Some(pattern.clone());
                        }
                    }
                    "priority" => {
                        if let Yaml::Integer(prio) = value {
                            priority = *prio as i32;
                        }
                    }
                    "children" => {
                        if let Ok(spec) = Self::parse_child_spec(value) {
                            children = spec
                        }
                    }
                    _ => {}
                }
            }
        }

        let target_type = target_type.ok_or(TypedSentencesError::InvalidRule(
            "Missing 'for' field".into(),
        ))?;
        let parser_pattern = parser_pattern.ok_or(TypedSentencesError::InvalidRule(
            "Missing 'parser' field".into(),
        ))?;

        Ok(TypeRule {
            target_type: target_type.clone(),
            parser_ref: ParserReference {
                pattern: parser_pattern,
                base_dir: base_dir.to_path_buf(),
            },
            priority,
            children,
            sentence_parser: SentenceParser {
                phrases: Vec::new(),
                type_patterns: HashMap::new(),
                abstract_type: "".into(),
                children_map: HashMap::new(),
            }, // Temporary placeholder
        })
    }

    fn parse_child_spec(yaml: &Yaml) -> Result<ChildSpec, TypedSentencesError> {
        match yaml {
            // Old syntax: children: [ItemEffect, DamageEffect]
            Yaml::Array(children_array) => {
                let mut child_types = Vec::new();
                for child in children_array {
                    if let Yaml::String(child_type) = child {
                        child_types.push(child_type.clone());
                    }
                }
                Ok(ChildSpec::Simple(child_types))
            }
            // New syntax: children: {damage_effects: [DamageEffect], other_effects: [ItemEffect]}
            Yaml::Hash(children_map) => {
                let mut structured_children = HashMap::new();
                for (field_name, child_types) in children_map {
                    if let Yaml::String(field_str) = field_name {
                        if let Yaml::Array(types_array) = child_types {
                            let mut types_vec = Vec::new();
                            for child_type in types_array {
                                if let Yaml::String(type_str) = child_type {
                                    types_vec.push(type_str.clone());
                                }
                            }
                            structured_children.insert(field_str.clone(), types_vec);
                        }
                    }
                }
                Ok(ChildSpec::Structured(structured_children))
            }
            _ => Ok(ChildSpec::Simple(Vec::new())), // Empty if invalid
        }
    }
    fn load_parser_from_reference(
        parser_ref: &ParserReference,
        abstract_type: String,
    ) -> Result<SentenceParser, TypedSentencesError> {
        let mut config_content = String::new();
        let mut found_files = Vec::new();

        let full_pattern = parser_ref
            .base_dir
            .join(&parser_ref.pattern)
            .to_string_lossy()
            .into_owned();

        let glob_iter = glob(&full_pattern).map_err(|e| {
            TypedSentencesError::GlobError(format!(
                "Invalid glob pattern '{}': {}",
                full_pattern, e
            ))
        })?;

        for entry in glob_iter {
            match entry {
                Ok(path) => {
                    if path.is_file() && is_dokedef_file(&path) {
                        match fs::read_to_string(&path) {
                            Ok(content) => {
                                config_content.push_str(&content);
                                config_content.push_str("\n---\n");
                                found_files.push(path);
                            }
                            Err(e) => {
                                println!("Warning: Could not read file {}: {}", path.display(), e);
                            }
                        }
                    }
                }
                Err(e) => {
                    println!(
                        "Warning: Error accessing file in pattern {}: {}",
                        full_pattern, e
                    );
                }
            }
        }

        if found_files.is_empty() {
            return Err(TypedSentencesError::FileError(format!(
                "No .dokedef.yaml files found for pattern: {} (searched: {})",
                parser_ref.pattern, full_pattern
            )));
        }

        println!(
            "Loaded parser from {} files: {:?}",
            found_files.len(),
            found_files
        );

        SentenceParser::from_yaml(abstract_type, &config_content).map_err(|e| {
            TypedSentencesError::InvalidRule(format!(
                "Failed to parse YAML from {} files: {}",
                found_files.len(),
                e
            ))
        })
    }

    fn rule_matches_parent(&self, rule: &TypeRule, parent_abstract_type: Option<&str>) -> bool {
        parent_abstract_type.map_or(true, |parent_type| {
            let child_spec = &rule.children;
            child_spec.allowed(parent_type)
        })
    }

    fn try_process_with_rule(
        &self,
        node: &mut DokeNode,
        frontmatter: &HashMap<String, GodotValue>,
        rule: &TypeRule,
    ) -> bool {
        // Store original state manually (simplified approach)
        let was_unresolved = matches!(node.state, DokeNodeState::Unresolved);

        rule.sentence_parser.process(node, frontmatter);

        if let DokeNodeState::Resolved(_) = &node.state {
            node.parse_data.insert(
                "abstract_type".to_string(),
                GodotValue::String(rule.target_type.clone()),
            );
            true
        } else {
            // If we didn't resolve it, restore the unresolved state
            if was_unresolved {
                node.state = DokeNodeState::Unresolved;
            }
            false
        }
    }

    fn process_node_recursive(
        &self,
        node: &mut DokeNode,
        frontmatter: &HashMap<String, GodotValue>,
        parent_abstract_type: Option<&str>,
        depth: usize,
    ) {
        if depth > 100 {
            return;
        }

        if let DokeNodeState::Unresolved = &node.state {
            let mut candidate_rules: Vec<&TypeRule> = self
                .rules
                .iter()
                .filter(|rule| self.rule_matches_parent(rule, parent_abstract_type))
                .collect();

            candidate_rules.sort_by(|a, b| b.priority.cmp(&a.priority));

            for rule in candidate_rules {
                if self.try_process_with_rule(node, frontmatter, rule) {
                    break;
                }
            }

            if let DokeNodeState::Unresolved = &node.state {
                let mut all_rules: Vec<&TypeRule> = self.rules.iter().collect();
                all_rules.sort_by(|a, b| b.priority.cmp(&a.priority));

                for rule in all_rules {
                    if self.try_process_with_rule(node, frontmatter, rule) {
                        break;
                    }
                }
            }
        }

        let current_abstract_type = if let DokeNodeState::Resolved(_) = &node.state {
            node.parse_data.get("abstract_type").and_then(|v| {
                if let GodotValue::String(s) = v {
                    Some(s.as_str())
                } else {
                    None
                }
            })
        } else {
            None
        };

        for child in &mut node.children {
            self.process_node_recursive(child, frontmatter, current_abstract_type, depth + 1);
        }

        for constituent in node.constituents.values_mut() {
            self.process_node_recursive(constituent, frontmatter, current_abstract_type, depth + 1);
        }
    }

    pub fn debug_glob_pattern(
        &self,
        pattern: &str,
        base_dir: &Path,
    ) -> Result<Vec<PathBuf>, TypedSentencesError> {
        let full_pattern = base_dir.join(pattern).to_string_lossy().into_owned();
        let mut results = Vec::new();

        for entry in
            glob(&full_pattern).map_err(|e| TypedSentencesError::GlobError(e.to_string()))?
        {
            match entry {
                Ok(path) => results.push(path),
                Err(e) => println!("Warning: {}", e),
            }
        }

        Ok(results)
    }
}

impl DokeParser for TypedSentencesParser {
    fn process(&self, node: &mut DokeNode, frontmatter: &HashMap<String, GodotValue>) {
        self.process_node_recursive(node, frontmatter, None, 0);
    }
}

fn is_dokedef_file(path: &Path) -> bool {
    if let Some(ext) = path.extension() {
        if ext != "yaml" && ext != "yml" {
            return false;
        }
    } else {
        return false;
    }

    if let Some(name) = path.file_stem() {
        let name_str = name.to_string_lossy();
        name_str.contains("dokedef") || name_str.contains("doke") || name_str.ends_with("Parser")
    } else {
        false
    }
}