Skip to main content

poe_data_tools_cli/commands/
dump_tables_json.rs

1use std::{
2    collections::HashMap,
3    fs::{self, File},
4    io::BufWriter,
5    path::Path,
6};
7
8use anyhow::{Context, Result, ensure};
9use glob::{MatchOptions, Pattern};
10use poe_data_tools::{
11    Patch,
12    dat::{
13        ivy_schema::{Enumeration, SchemaCollection, fetch_schema, load_schema},
14        parser::create_parser,
15    },
16    file_parsers::{
17        FileParser,
18        dat::{DatParser, types::DatFile},
19    },
20    fs::{FS, FileSystem},
21};
22use winnow::Parser;
23
24use crate::VERBOSE;
25
26fn resolve_enum(schema: &Enumeration) -> Vec<serde_json::Value> {
27    std::iter::repeat_n(serde_json::Value::Null, schema.indexing)
28        .chain(schema.enumerators.iter().map(|e| match e {
29            Some(value) => serde_json::Value::String(value.clone()),
30            None => serde_json::Value::Null,
31        }))
32        .collect()
33}
34
35type ResolvedKeys = HashMap<String, Option<Vec<serde_json::Value>>>;
36
37/// Depth-first resolution of table keys
38fn resolve_keys(
39    fs: &mut FS,
40    schemas: &SchemaCollection,
41    version: &Patch,
42    keys: &mut ResolvedKeys,
43    table_name: &str,
44    resolve_keys_stack: &mut Vec<String>,
45) -> anyhow::Result<()> {
46    let schema = schemas
47        .tables
48        .iter()
49        .find(|s| s.name.eq_ignore_ascii_case(table_name))
50        .context("Failed to find schema for table")?;
51
52    let mut keys_columns = schema.primary_keys().collect::<Vec<_>>();
53    if keys_columns.is_empty()
54        && let Some(col_name) = schema.column_names().next()
55    {
56        // Fall back to first column as key for tables without any key
57        log::debug!(
58            "No keys for table {:?}, falling back to first column: {:?}",
59            schema.name,
60            col_name
61        );
62        keys_columns.push(col_name);
63    }
64
65    let ref_keys = schema
66        .enumerate()
67        // Select key columns that are references
68        .filter_map(|(name, c)| {
69            keys_columns
70                .contains(&name)
71                .then_some(c.get_ref())
72                .flatten()
73        })
74        .map(|s| s.to_lowercase())
75        // And only ones that have not yet had their keys resolved
76        .filter(|table_name| !keys.contains_key(table_name))
77        .collect::<Vec<_>>();
78
79    if !ref_keys.is_empty() {
80        // This table is not yet ready to be resolved. Push children to stack and go again
81        log::debug!("Table not yet resolvable: {table_name}");
82        resolve_keys_stack.push(table_name.to_owned());
83        resolve_keys_stack.extend(ref_keys);
84        return Ok(());
85    }
86
87    // All reference keys have been resolved, so this table can be resolved
88    // Load up this file's contents
89    let filename = match version.major() {
90        1 => format!("data/{}.datc64", table_name),
91        2 => format!("data/balance/{}.datc64", table_name),
92        _ => unreachable!("Invalid major version"),
93    };
94    let bytes = fs.read(&filename).context("Failed to read file contents")?;
95    let contents = DatParser
96        .parse(&bytes)
97        .as_anyhow()
98        .context("Failed to parse dat file")?;
99
100    let DatFile {
101        rows,
102        variable_data,
103    } = contents;
104
105    // FIXME: Figure out a way to give variable section to the parser without leaking it to a
106    //          'static lifetime
107    let variable_section = Box::leak(Box::new(variable_data.clone()));
108    let parsed = {
109        let mut parser = create_parser(keys, variable_section, schema);
110
111        rows.iter()
112            .map(|row| parser.parse(row).unwrap_or(serde_json::Value::Null))
113            .collect::<Vec<_>>()
114    };
115
116    // Extract keys from the parsed table
117    let key_values = (!keys_columns.is_empty()).then(|| {
118        // Try get the corresponding values for them
119        parsed
120            .iter()
121            .map(|row| {
122                let keys = keys_columns
123                    .iter()
124                    .map(|k| row.get(k).unwrap_or(&serde_json::Value::Null).clone())
125                    .collect::<Vec<_>>();
126
127                // If there's multiple primary keys, use a list
128                match keys.len() {
129                    0 => unreachable!(),
130                    1 => keys[0].clone(),
131                    _ => serde_json::Value::Array(keys),
132                }
133            })
134            .collect::<Vec<_>>()
135    });
136
137    log::debug!("Resolved keys for table: {table_name}");
138    if keys.insert(table_name.to_owned(), key_values).is_some() {
139        unreachable!("Keys already present for {:?}", table_name);
140    }
141
142    Ok(())
143}
144
145fn resolve_table(
146    fs: &mut FS,
147    schemas: &SchemaCollection,
148    version: &Patch,
149    keys: &mut ResolvedKeys,
150    table_name: &str,
151) -> anyhow::Result<Vec<serde_json::Value>> {
152    let schema = schemas
153        .tables
154        .iter()
155        .find(|s| s.name.eq_ignore_ascii_case(table_name))
156        .context("Failed to find schema for table")?;
157
158    // Start off with all unresolved children in the stack
159    let mut resolve_keys_stack = schema
160        .references()
161        .map(|r| r.to_lowercase())
162        .filter(|r| !keys.contains_key(r))
163        .collect::<Vec<_>>();
164
165    // Recursively resolve all keys
166    while let Some(child) = resolve_keys_stack.pop() {
167        // Child may have already been resolved, so check again
168        if keys.contains_key(&child) {
169            continue;
170        }
171
172        resolve_keys(fs, schemas, version, keys, &child, &mut resolve_keys_stack)?;
173    }
174
175    // All keys for reference tables have been resolved, so we can now fully resolve this table
176    // Load up this file's contents
177    let filename = match version.major() {
178        1 => format!("data/{}.datc64", table_name),
179        2 => format!("data/balance/{}.datc64", table_name),
180        _ => unreachable!("Invalid major version"),
181    };
182    let bytes = fs.read(&filename).context("Failed to read file contents")?;
183    let contents = DatParser
184        .parse(&bytes)
185        .as_anyhow()
186        .context("Failed to parse dat file")?;
187
188    let DatFile {
189        rows,
190        variable_data,
191    } = contents;
192
193    // FIXME: Figure out a way to give variable section to the parser without leaking it to a
194    //          'static lifetime
195    let variable_section = Box::leak(Box::new(variable_data.clone()));
196    let parsed = {
197        let mut parser = create_parser(keys, variable_section, schema);
198
199        rows.iter()
200            .map(|row| parser.parse(row).unwrap_or(serde_json::Value::Null))
201            .collect::<Vec<_>>()
202    };
203
204    Ok(parsed)
205}
206
207fn dump_table(
208    fs: &mut FS,
209    version: &Patch,
210    schemas: &SchemaCollection,
211    output_folder: &Path,
212    resolved: &mut ResolvedKeys,
213    filename: &str,
214) -> anyhow::Result<()> {
215    let path = Path::new(&filename);
216    let table_name = path.file_stem().unwrap().to_str().unwrap().to_lowercase();
217
218    let json = resolve_table(fs, schemas, version, resolved, &table_name)
219        .context("Failed to resolve table")?;
220
221    // Save out
222    let output_path = output_folder.join(path).with_added_extension("json");
223    fs::create_dir_all(output_path.parent().unwrap()).context("Failed to create output folder")?;
224
225    let mut out =
226        BufWriter::new(File::create(&output_path).context("Failed to create output file")?);
227    serde_json::to_writer_pretty(&mut out, &json).context("Failed to serialize json")?;
228
229    Ok(())
230}
231
232pub fn dump_tables(
233    fs: &mut FS,
234    patterns: &[Pattern],
235    cache_dir: &Path,
236    output_folder: &Path,
237    version: &Patch,
238    schema: Option<impl AsRef<Path>>,
239) -> Result<()> {
240    for pattern in patterns {
241        ensure!(
242            pattern.as_str().ends_with(".datc64"),
243            "Only .datc64 table export is supported."
244        );
245    }
246
247    // Load schema
248    let schemas = if let Some(path) = schema {
249        load_schema(path.as_ref()).context("Failed to load schema file")?
250    } else {
251        fetch_schema(cache_dir).context("Failed to fetch schema file")?
252    }
253    .filter_version(version);
254
255    let mut resolved = HashMap::new();
256
257    // Resolve enums first as they have no dependencies
258    schemas.enumerations.iter().for_each(|e| {
259        let e_resolved = resolve_enum(e);
260        resolved.insert(e.name.to_lowercase(), Some(e_resolved));
261    });
262
263    let schema_names = schemas
264        .tables
265        .iter()
266        .map(|t| t.name.to_lowercase())
267        .collect::<Vec<_>>();
268
269    // Filter list of files we're going to extract
270    let filenames = fs
271        .list()
272        // Filter on glob
273        .filter(|filename| {
274            patterns.iter().any(|pattern| {
275                pattern.matches_with(
276                    filename,
277                    MatchOptions {
278                        require_literal_separator: true,
279                        ..Default::default()
280                    },
281                )
282            })
283        })
284        // Skip files we can't process
285        .filter(|filename| {
286            let path = Path::new(filename);
287            let table_name = path.file_stem().unwrap().to_str().unwrap().to_lowercase();
288
289            let keep = schema_names.contains(&table_name);
290
291            if !keep {
292                log::warn!("Skipping {:?}, schema not found", path);
293            }
294
295            keep
296        })
297        .collect::<Vec<_>>();
298
299    filenames.into_iter().for_each(|filename| {
300        let result = dump_table(
301            fs,
302            version,
303            &schemas,
304            output_folder,
305            &mut resolved,
306            &filename,
307        );
308
309        if let Err(e) = result {
310            let error_message = if *VERBOSE.get().unwrap() {
311                format!("{e:?}")
312            } else {
313                format!("{e}")
314            };
315            log::error!("Failed to extract file {filename:?}: {error_message}");
316        } else {
317            log::info!("Extracted file: {}", filename);
318        }
319    });
320
321    Ok(())
322}