rpfm_lib/files/text/
mod.rs

1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2024 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! This is a module to read/write Text files.
12//!
13//! Text files are any kind of plain-text files, really. Encodings supported by this lib are:
14//! - `ISO-8859-15`
15//! - `UTF-8`
16//! - `UTF-16` (LittleEndian)
17//!
18//! Also, the module automatically tries to guess the language of a Text file, so programs
19//! can query the guess language format and apply extended functionality.
20//!
21//! The full list of file extension this lib supports as `Text` files is:
22//!
23//! | ------------------------ | ---------- | ------------------------------------------- |
24//! | Extension                | Language   | Description                                 |
25//! | ------------------------ | ---------- | ------------------------------------------- |
26//! | `.agf`                   | `Plain`    |                                             |
27//! | `.battle_speech_camera`  | `Plain`    | Camera settings file for battle speeches.   |
28//! | `.benchmark`             | `Xml`      | Benchmark settings.                         |
29//! | `.bob`                   | `Plain`    | BoB settings file.                          |
30//! | `.cindyscene`            | `Xml`      | Cutscene editor data.                       |
31//! | `.cindyscenemanager`     | `Xml`      | Cutscene editor data.                       |
32//! | `.code-snippets`         | `Json`     | VSCode snippet file.                        |
33//! | `.code-workspace`        | `Json`     | VSCode workspace file.                      |
34//! | `.csv`                   | `Plain`    | Normal CSV file.                            |
35//! | `.css`                   | `Css`      | Normal CSS file.                            |
36//! | `.environment`           | `Xml`      |                                             |
37//! | `.glsl`                  | `Cpp`      | GLSL shader source file.                    |
38//! | `.htm`                   | `Html`     | Normal HTML file.                           |
39//! | `.html`                  | `Html`     | Normal HTML file.                           |
40//! | `.inl`                   | `Cpp`      |                                             |
41//! | `.json`                  | `Json`     | Normal JSON file.                           |
42//! | `.js`                    | `Js`       | Normal Javascript file.                     |
43//! | `.kfa`                   | `Xml`      | Battle Audio Event file.                    |
44//! | `.kfe`                   | `Xml`      | Battle Effect file.                         |
45//! | `.kfl`                   | `Xml`      | Battle Point Light file.                    |
46//! | `.kfsl`                  | `Xml`      | Battle Spot Light file.                     |
47//! | `.kfp`                   | `Xml`      | Battle Prop file.                           |
48//! | `.kfcs`                  | `Xml`      | Battle Composite Scene file.                |
49//! | `.lighting`              | `Xml`      |                                             |
50//! | `.log`                   | `Plain`    | Generic log file.                           |
51//! | `.lua`                   | `Lua`      | LUA Script file.                            |
52//! | `.material`              | `Xml`      |                                             |
53//! | `.md`                    | `Markdown` | Markdown files, for readmes.                |
54//! | `.model_statistics`      | `Xml`      |                                             |
55//! | `.tai`                   | `Plain`    |                                             |
56//! | `.technique`             | `Xml`      |                                             |
57//! | `.texture_array`         | `Plain`    | List of Campaign Map textures.              |
58//! | `.tsv`                   | `Plain`    | Normal TSV file.                            |
59//! | `.twui`                  | `Lua`      | TWui file, in lua format.                   |
60//! | `.txt`                   | `Plain`    | Plain TXT file.                             |
61//! | `.variantmeshdefinition` | `Xml`      |                                             |
62//! | `.wsmodel`               | `Xml`      |                                             |
63//! | `.xml`                   | `Xml`      | Normal XML file.                            |
64//! | `.xml.shader`            | `Xml`      | Shader setup metadata.                      |
65//! | `.xml.material`          | `Xml`      |                                             |
66//! | `.xt`                    | `Plain`    | Txt, but with a typo?                       |
67//! | `.yaml`                  | `Yaml`     | Yaml file.                                  |
68//! | `.yml`                   | `Yaml`     | Yaml file.                                  |
69
70use getset::*;
71use serde_derive::{Serialize, Deserialize};
72
73use std::io::SeekFrom;
74
75use crate::binary::{ReadBytes, WriteBytes};
76use crate::error::{Result, RLibError};
77use crate::files::{Decodeable, EncodeableExtraData, Encodeable};
78
79use super::DecodeableExtraData;
80
81/// UTF-8 BOM (Byte Order Mark).
82const BOM_UTF_8: [u8;3] = [0xEF,0xBB,0xBF];
83
84/// UTF-16 BOM (Byte Order Mark), Little Endian.
85const BOM_UTF_16_LE: [u8;2] = [0xFF,0xFE];
86
87/// List of extensions we recognize as `Text` files, with their respective known format.
88pub const EXTENSIONS: [(&str, TextFormat); 63] = [
89    (".agf", TextFormat::Plain),
90    (".bat", TextFormat::Bat),
91    (".battle_script", TextFormat::Lua),
92    (".battle_speech_camera", TextFormat::Plain),
93    (".benchmark", TextFormat::Xml),
94    (".bob", TextFormat::Plain),
95    (".cco", TextFormat::Plain),
96    (".cindyscene", TextFormat::Xml),
97    (".cindyscenemanager", TextFormat::Xml),
98    (".code-snippets", TextFormat::Json),
99    (".code-workspace", TextFormat::Json),
100    (".css", TextFormat::Css),
101    (".csv", TextFormat::Plain),
102    (".environment", TextFormat::Xml),
103    (".environment_group", TextFormat::Xml),
104    (".environment_group.override", TextFormat::Xml),
105    (".fbx", TextFormat::Plain),
106    (".fx", TextFormat::Cpp),
107    (".fx_fragment", TextFormat::Cpp),
108    (".glsl", TextFormat::Cpp),
109    (".h", TextFormat::Cpp),
110    (".hlsl", TextFormat::Hlsl),
111    (".htm", TextFormat::Html),
112    (".html", TextFormat::Html),
113    (".inl", TextFormat::Cpp),
114    (".json", TextFormat::Json),
115    (".js", TextFormat::Js),
116    (".kfa", TextFormat::Xml),
117    (".kfc", TextFormat::Xml),
118    (".kfe", TextFormat::Xml),
119    (".kfe_temp", TextFormat::Xml),
120    (".kfl", TextFormat::Xml),
121    (".kfl_temp", TextFormat::Xml),
122    (".kfsl", TextFormat::Xml),
123    (".kfp", TextFormat::Xml),
124    (".kfcs", TextFormat::Xml),
125    (".kfcs_temp", TextFormat::Xml),
126    (".ktr", TextFormat::Xml),
127    (".ktr_temp", TextFormat::Xml),
128    (".lighting", TextFormat::Xml),
129    (".log", TextFormat::Plain),
130    (".lua", TextFormat::Lua),
131    (".md", TextFormat::Markdown),
132    (".model_statistics", TextFormat::Xml),
133    (".mvscene", TextFormat::Xml),
134    (".py", TextFormat::Python),
135    (".sbs", TextFormat::Xml),
136    (".shader", TextFormat::Xml),
137    (".sql", TextFormat::Sql),
138    (".tai", TextFormat::Plain),
139    (".technique", TextFormat::Xml),
140    (".texture_array", TextFormat::Plain),
141    (".tsv", TextFormat::Plain),
142    (".twui", TextFormat::Lua),
143    (".txt", TextFormat::Plain),
144    (".xml", TextFormat::Xml),
145    (".xml_temp", TextFormat::Xml),
146    (".xml.shader", TextFormat::Xml),
147    (".xml.material", TextFormat::Xml),
148    (".xt", TextFormat::Plain),
149    (".yml", TextFormat::Yaml),
150    (".yaml", TextFormat::Yaml),
151    (".material", TextFormat::Xml),     // This has to be under xml.material
152];
153
154pub const EXTENSION_VMD: (&str, TextFormat) = (".variantmeshdefinition", TextFormat::Xml);
155pub const EXTENSION_WSMODEL: (&str, TextFormat) = (".wsmodel", TextFormat::Xml);
156
157#[cfg(test)] mod text_test;
158
159//---------------------------------------------------------------------------//
160//                              Enum & Structs
161//---------------------------------------------------------------------------//
162
163/// This holds an entire `Text` file decoded in memory.
164#[derive(Default, PartialEq, Eq, Clone, Debug, Getters, MutGetters, Setters, Serialize, Deserialize)]
165#[getset(get = "pub", get_mut = "pub", set = "pub")]
166pub struct Text {
167
168    /// The encoding used by the file.
169    encoding: Encoding,
170
171    /// The format of the file.
172    format: TextFormat,
173
174    /// The text inside the file.
175    contents: String
176}
177
178/// This enum represents the multiple encodings we can read/write to.
179#[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)]
180pub enum Encoding {
181    Iso8859_1,
182    Utf8,
183    Utf8Bom,
184    Utf16Le,
185}
186
187/// This enum represents the formats we know.
188#[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)]
189pub enum TextFormat {
190    Bat,
191    Cpp,
192    Html,
193    Hlsl,
194    Json,
195    Js,
196    Css,
197    Lua,
198    Markdown,
199    Plain,
200    Python,
201    Sql,
202    Xml,
203    Yaml,
204}
205
206//---------------------------------------------------------------------------//
207//                           Implementation of Text
208//---------------------------------------------------------------------------//
209
210/// Implementation of `Default` for `Encoding`.
211impl Default for Encoding {
212
213    /// This returns `Encoding::Utf8`, as it's our default encoding.
214    fn default() -> Self {
215        Encoding::Utf8
216    }
217}
218
219/// Implementation of `Default` for `TextFormat`.
220impl Default for TextFormat {
221
222    /// This returns `TextFormat::Plain`, as it's our default format.
223    fn default() -> Self {
224        TextFormat::Plain
225    }
226}
227
228impl Text {
229
230    pub fn detect_encoding<R: ReadBytes>(data: &mut R) -> Result<Encoding> {
231        let len = data.len()?;
232
233        // First, check for BOMs. 2 bytes for UTF-16 BOMs, 3 for UTF-8.
234        if len > 2 && data.read_slice(3, true)? == BOM_UTF_8 {
235            data.seek(SeekFrom::Start(3))?;
236            return Ok(Encoding::Utf8Bom)
237        }
238        else if len > 1 && data.read_slice(2, true)? == BOM_UTF_16_LE {
239            data.seek(SeekFrom::Start(2))?;
240            return Ok(Encoding::Utf16Le)
241        }
242
243        // If no BOM is found, we assume UTF-8 if it decodes properly.
244        else {
245            let utf8_string = data.read_string_u8(len as usize);
246            if utf8_string.is_ok() {
247                data.rewind()?;
248                return Ok(Encoding::Utf8)
249            }
250
251            data.rewind()?;
252            let iso_8859_1_string = data.read_string_u8_iso_8859_15(len as usize);
253            if iso_8859_1_string.is_ok() {
254                data.rewind()?;
255                return Ok(Encoding::Iso8859_1)
256            }
257        }
258
259        // If we reach this, we do not support the format.
260        data.rewind()?;
261        Err(RLibError::DecodingTextUnsupportedEncodingOrNotATextFile)
262    }
263}
264
265impl Decodeable for Text {
266
267    fn decode<R: ReadBytes>(data: &mut R, extra_data: &Option<DecodeableExtraData>) -> Result<Self> {
268        let len = data.len()?;
269        let encoding = Self::detect_encoding(data)?;
270        let contents = match encoding {
271            Encoding::Iso8859_1 => data.read_string_u8_iso_8859_15(len as usize)
272                .map_err(|_| RLibError::DecodingTextUnsupportedEncodingOrNotATextFile)?,
273
274            Encoding::Utf8 |
275            Encoding::Utf8Bom => {
276                let curr_pos = data.stream_position()?;
277                data.read_string_u8((len - curr_pos) as usize)
278                    .map_err(|_| RLibError::DecodingTextUnsupportedEncodingOrNotATextFile)?
279            },
280            Encoding::Utf16Le => {
281                let curr_pos = data.stream_position()?;
282                data.read_string_u16((len - curr_pos) as usize)
283                    .map_err(|_| RLibError::DecodingTextUnsupportedEncodingOrNotATextFile)?
284            }
285        };
286
287        // Try to get the format of the file.
288        let format = match extra_data {
289            Some(extra_data) => match extra_data.file_name {
290                Some(file_name) => {
291                    match EXTENSIONS.iter().find_map(|(extension, format)| if file_name.ends_with(extension) { Some(format) } else { None }) {
292                        Some(format) => *format,
293                        None => TextFormat::Plain,
294                    }
295                }
296                None => TextFormat::Plain,
297            }
298
299            None => TextFormat::Plain,
300        };
301
302        Ok(Self {
303            encoding,
304            format,
305            contents,
306        })
307    }
308}
309
310impl Encodeable for Text {
311
312    fn encode<W: WriteBytes>(&mut self, buffer: &mut W, _extra_data: &Option<EncodeableExtraData>) -> Result<()> {
313        match self.encoding {
314            Encoding::Iso8859_1 => buffer.write_string_u8_iso_8859_1(&self.contents),
315            Encoding::Utf8 => buffer.write_string_u8(&self.contents),
316            Encoding::Utf8Bom => {
317                buffer.write_all(&BOM_UTF_8)?;
318                buffer.write_string_u8(&self.contents)
319            },
320
321            // For UTF-16 we always have to add the BOM. Otherwise we have no way to easily tell what this file is.
322            Encoding::Utf16Le => {
323                buffer.write_all(&BOM_UTF_16_LE)?;
324                buffer.write_string_u16(&self.contents)
325            },
326        }
327    }
328}