rpfm_lib/files/text/mod.rs
1//---------------------------------------------------------------------------//
2// Copyright (c) 2017-2024 Ismael Gutiérrez González. All rights reserved.
3//
4// This file is part of the Rusted PackFile Manager (RPFM) project,
5// which can be found here: https://github.com/Frodo45127/rpfm.
6//
7// This file is licensed under the MIT license, which can be found here:
8// https://github.com/Frodo45127/rpfm/blob/master/LICENSE.
9//---------------------------------------------------------------------------//
10
11//! This is a module to read/write Text files.
12//!
13//! Text files are any kind of plain-text files, really. Encodings supported by this lib are:
14//! - `ISO-8859-15`
15//! - `UTF-8`
16//! - `UTF-16` (LittleEndian)
17//!
18//! Also, the module automatically tries to guess the language of a Text file, so programs
19//! can query the guess language format and apply extended functionality.
20//!
21//! The full list of file extension this lib supports as `Text` files is:
22//!
23//! | ------------------------ | ---------- | ------------------------------------------- |
24//! | Extension | Language | Description |
25//! | ------------------------ | ---------- | ------------------------------------------- |
26//! | `.agf` | `Plain` | |
27//! | `.battle_speech_camera` | `Plain` | Camera settings file for battle speeches. |
28//! | `.benchmark` | `Xml` | Benchmark settings. |
29//! | `.bob` | `Plain` | BoB settings file. |
30//! | `.cindyscene` | `Xml` | Cutscene editor data. |
31//! | `.cindyscenemanager` | `Xml` | Cutscene editor data. |
32//! | `.code-snippets` | `Json` | VSCode snippet file. |
33//! | `.code-workspace` | `Json` | VSCode workspace file. |
34//! | `.csv` | `Plain` | Normal CSV file. |
35//! | `.css` | `Css` | Normal CSS file. |
36//! | `.environment` | `Xml` | |
37//! | `.glsl` | `Cpp` | GLSL shader source file. |
38//! | `.htm` | `Html` | Normal HTML file. |
39//! | `.html` | `Html` | Normal HTML file. |
40//! | `.inl` | `Cpp` | |
41//! | `.json` | `Json` | Normal JSON file. |
42//! | `.js` | `Js` | Normal Javascript file. |
43//! | `.kfa` | `Xml` | Battle Audio Event file. |
44//! | `.kfe` | `Xml` | Battle Effect file. |
45//! | `.kfl` | `Xml` | Battle Point Light file. |
46//! | `.kfsl` | `Xml` | Battle Spot Light file. |
47//! | `.kfp` | `Xml` | Battle Prop file. |
48//! | `.kfcs` | `Xml` | Battle Composite Scene file. |
49//! | `.lighting` | `Xml` | |
50//! | `.log` | `Plain` | Generic log file. |
51//! | `.lua` | `Lua` | LUA Script file. |
52//! | `.material` | `Xml` | |
53//! | `.md` | `Markdown` | Markdown files, for readmes. |
54//! | `.model_statistics` | `Xml` | |
55//! | `.tai` | `Plain` | |
56//! | `.technique` | `Xml` | |
57//! | `.texture_array` | `Plain` | List of Campaign Map textures. |
58//! | `.tsv` | `Plain` | Normal TSV file. |
59//! | `.twui` | `Lua` | TWui file, in lua format. |
60//! | `.txt` | `Plain` | Plain TXT file. |
61//! | `.variantmeshdefinition` | `Xml` | |
62//! | `.wsmodel` | `Xml` | |
63//! | `.xml` | `Xml` | Normal XML file. |
64//! | `.xml.shader` | `Xml` | Shader setup metadata. |
65//! | `.xml.material` | `Xml` | |
66//! | `.xt` | `Plain` | Txt, but with a typo? |
67//! | `.yaml` | `Yaml` | Yaml file. |
68//! | `.yml` | `Yaml` | Yaml file. |
69
70use getset::*;
71use serde_derive::{Serialize, Deserialize};
72
73use std::io::SeekFrom;
74
75use crate::binary::{ReadBytes, WriteBytes};
76use crate::error::{Result, RLibError};
77use crate::files::{Decodeable, EncodeableExtraData, Encodeable};
78
79use super::DecodeableExtraData;
80
81/// UTF-8 BOM (Byte Order Mark).
82const BOM_UTF_8: [u8;3] = [0xEF,0xBB,0xBF];
83
84/// UTF-16 BOM (Byte Order Mark), Little Endian.
85const BOM_UTF_16_LE: [u8;2] = [0xFF,0xFE];
86
87/// List of extensions we recognize as `Text` files, with their respective known format.
88pub const EXTENSIONS: [(&str, TextFormat); 63] = [
89 (".agf", TextFormat::Plain),
90 (".bat", TextFormat::Bat),
91 (".battle_script", TextFormat::Lua),
92 (".battle_speech_camera", TextFormat::Plain),
93 (".benchmark", TextFormat::Xml),
94 (".bob", TextFormat::Plain),
95 (".cco", TextFormat::Plain),
96 (".cindyscene", TextFormat::Xml),
97 (".cindyscenemanager", TextFormat::Xml),
98 (".code-snippets", TextFormat::Json),
99 (".code-workspace", TextFormat::Json),
100 (".css", TextFormat::Css),
101 (".csv", TextFormat::Plain),
102 (".environment", TextFormat::Xml),
103 (".environment_group", TextFormat::Xml),
104 (".environment_group.override", TextFormat::Xml),
105 (".fbx", TextFormat::Plain),
106 (".fx", TextFormat::Cpp),
107 (".fx_fragment", TextFormat::Cpp),
108 (".glsl", TextFormat::Cpp),
109 (".h", TextFormat::Cpp),
110 (".hlsl", TextFormat::Hlsl),
111 (".htm", TextFormat::Html),
112 (".html", TextFormat::Html),
113 (".inl", TextFormat::Cpp),
114 (".json", TextFormat::Json),
115 (".js", TextFormat::Js),
116 (".kfa", TextFormat::Xml),
117 (".kfc", TextFormat::Xml),
118 (".kfe", TextFormat::Xml),
119 (".kfe_temp", TextFormat::Xml),
120 (".kfl", TextFormat::Xml),
121 (".kfl_temp", TextFormat::Xml),
122 (".kfsl", TextFormat::Xml),
123 (".kfp", TextFormat::Xml),
124 (".kfcs", TextFormat::Xml),
125 (".kfcs_temp", TextFormat::Xml),
126 (".ktr", TextFormat::Xml),
127 (".ktr_temp", TextFormat::Xml),
128 (".lighting", TextFormat::Xml),
129 (".log", TextFormat::Plain),
130 (".lua", TextFormat::Lua),
131 (".md", TextFormat::Markdown),
132 (".model_statistics", TextFormat::Xml),
133 (".mvscene", TextFormat::Xml),
134 (".py", TextFormat::Python),
135 (".sbs", TextFormat::Xml),
136 (".shader", TextFormat::Xml),
137 (".sql", TextFormat::Sql),
138 (".tai", TextFormat::Plain),
139 (".technique", TextFormat::Xml),
140 (".texture_array", TextFormat::Plain),
141 (".tsv", TextFormat::Plain),
142 (".twui", TextFormat::Lua),
143 (".txt", TextFormat::Plain),
144 (".xml", TextFormat::Xml),
145 (".xml_temp", TextFormat::Xml),
146 (".xml.shader", TextFormat::Xml),
147 (".xml.material", TextFormat::Xml),
148 (".xt", TextFormat::Plain),
149 (".yml", TextFormat::Yaml),
150 (".yaml", TextFormat::Yaml),
151 (".material", TextFormat::Xml), // This has to be under xml.material
152];
153
154pub const EXTENSION_VMD: (&str, TextFormat) = (".variantmeshdefinition", TextFormat::Xml);
155pub const EXTENSION_WSMODEL: (&str, TextFormat) = (".wsmodel", TextFormat::Xml);
156
157#[cfg(test)] mod text_test;
158
159//---------------------------------------------------------------------------//
160// Enum & Structs
161//---------------------------------------------------------------------------//
162
163/// This holds an entire `Text` file decoded in memory.
164#[derive(Default, PartialEq, Eq, Clone, Debug, Getters, MutGetters, Setters, Serialize, Deserialize)]
165#[getset(get = "pub", get_mut = "pub", set = "pub")]
166pub struct Text {
167
168 /// The encoding used by the file.
169 encoding: Encoding,
170
171 /// The format of the file.
172 format: TextFormat,
173
174 /// The text inside the file.
175 contents: String
176}
177
178/// This enum represents the multiple encodings we can read/write to.
179#[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)]
180pub enum Encoding {
181 Iso8859_1,
182 Utf8,
183 Utf8Bom,
184 Utf16Le,
185}
186
187/// This enum represents the formats we know.
188#[derive(PartialEq, Eq, Clone, Copy, Debug, Serialize, Deserialize)]
189pub enum TextFormat {
190 Bat,
191 Cpp,
192 Html,
193 Hlsl,
194 Json,
195 Js,
196 Css,
197 Lua,
198 Markdown,
199 Plain,
200 Python,
201 Sql,
202 Xml,
203 Yaml,
204}
205
206//---------------------------------------------------------------------------//
207// Implementation of Text
208//---------------------------------------------------------------------------//
209
210/// Implementation of `Default` for `Encoding`.
211impl Default for Encoding {
212
213 /// This returns `Encoding::Utf8`, as it's our default encoding.
214 fn default() -> Self {
215 Encoding::Utf8
216 }
217}
218
219/// Implementation of `Default` for `TextFormat`.
220impl Default for TextFormat {
221
222 /// This returns `TextFormat::Plain`, as it's our default format.
223 fn default() -> Self {
224 TextFormat::Plain
225 }
226}
227
228impl Text {
229
230 pub fn detect_encoding<R: ReadBytes>(data: &mut R) -> Result<Encoding> {
231 let len = data.len()?;
232
233 // First, check for BOMs. 2 bytes for UTF-16 BOMs, 3 for UTF-8.
234 if len > 2 && data.read_slice(3, true)? == BOM_UTF_8 {
235 data.seek(SeekFrom::Start(3))?;
236 return Ok(Encoding::Utf8Bom)
237 }
238 else if len > 1 && data.read_slice(2, true)? == BOM_UTF_16_LE {
239 data.seek(SeekFrom::Start(2))?;
240 return Ok(Encoding::Utf16Le)
241 }
242
243 // If no BOM is found, we assume UTF-8 if it decodes properly.
244 else {
245 let utf8_string = data.read_string_u8(len as usize);
246 if utf8_string.is_ok() {
247 data.rewind()?;
248 return Ok(Encoding::Utf8)
249 }
250
251 data.rewind()?;
252 let iso_8859_1_string = data.read_string_u8_iso_8859_15(len as usize);
253 if iso_8859_1_string.is_ok() {
254 data.rewind()?;
255 return Ok(Encoding::Iso8859_1)
256 }
257 }
258
259 // If we reach this, we do not support the format.
260 data.rewind()?;
261 Err(RLibError::DecodingTextUnsupportedEncodingOrNotATextFile)
262 }
263}
264
265impl Decodeable for Text {
266
267 fn decode<R: ReadBytes>(data: &mut R, extra_data: &Option<DecodeableExtraData>) -> Result<Self> {
268 let len = data.len()?;
269 let encoding = Self::detect_encoding(data)?;
270 let contents = match encoding {
271 Encoding::Iso8859_1 => data.read_string_u8_iso_8859_15(len as usize)
272 .map_err(|_| RLibError::DecodingTextUnsupportedEncodingOrNotATextFile)?,
273
274 Encoding::Utf8 |
275 Encoding::Utf8Bom => {
276 let curr_pos = data.stream_position()?;
277 data.read_string_u8((len - curr_pos) as usize)
278 .map_err(|_| RLibError::DecodingTextUnsupportedEncodingOrNotATextFile)?
279 },
280 Encoding::Utf16Le => {
281 let curr_pos = data.stream_position()?;
282 data.read_string_u16((len - curr_pos) as usize)
283 .map_err(|_| RLibError::DecodingTextUnsupportedEncodingOrNotATextFile)?
284 }
285 };
286
287 // Try to get the format of the file.
288 let format = match extra_data {
289 Some(extra_data) => match extra_data.file_name {
290 Some(file_name) => {
291 match EXTENSIONS.iter().find_map(|(extension, format)| if file_name.ends_with(extension) { Some(format) } else { None }) {
292 Some(format) => *format,
293 None => TextFormat::Plain,
294 }
295 }
296 None => TextFormat::Plain,
297 }
298
299 None => TextFormat::Plain,
300 };
301
302 Ok(Self {
303 encoding,
304 format,
305 contents,
306 })
307 }
308}
309
310impl Encodeable for Text {
311
312 fn encode<W: WriteBytes>(&mut self, buffer: &mut W, _extra_data: &Option<EncodeableExtraData>) -> Result<()> {
313 match self.encoding {
314 Encoding::Iso8859_1 => buffer.write_string_u8_iso_8859_1(&self.contents),
315 Encoding::Utf8 => buffer.write_string_u8(&self.contents),
316 Encoding::Utf8Bom => {
317 buffer.write_all(&BOM_UTF_8)?;
318 buffer.write_string_u8(&self.contents)
319 },
320
321 // For UTF-16 we always have to add the BOM. Otherwise we have no way to easily tell what this file is.
322 Encoding::Utf16Le => {
323 buffer.write_all(&BOM_UTF_16_LE)?;
324 buffer.write_string_u16(&self.contents)
325 },
326 }
327 }
328}