Skip to main content

lib_bcsv_jmap/
io.rs

1use std::fs::File;
2use std::io::{Read, Write};
3use std::path::Path;
4
5use byteorder::{BigEndian, ByteOrder, LittleEndian};
6
7use crate::entry::Entry;
8use crate::error::{JMapError, Result};
9use crate::field::{Field, FieldType, FieldValue};
10use crate::hash::HashTable;
11use crate::jmap::JMapInfo;
12
13/// Options for reading/writing BCSV files
14#[derive(Debug, Clone)]
15pub struct IoOptions {
16    /// Whether data is big-endian or little-endian
17    pub big_endian: bool,
18    /// String encoding: "shift_jis" (for japanese language) or "utf-8"
19    pub encoding: Encoding,
20}
21
22/// String encoding options
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum Encoding {
25    /// Shift-JIS encoding
26    ShiftJis,
27    /// UTF-8 encoding
28    Utf8,
29}
30
31impl Default for IoOptions {
32    fn default() -> Self {
33        Self {
34            big_endian: true,
35            encoding: Encoding::ShiftJis,
36        }
37    }
38}
39
40impl IoOptions {
41    /// Options for Super Mario Galaxy (Wii)
42    pub fn super_mario_galaxy() -> Self {
43        Self {
44            big_endian: true,
45            encoding: Encoding::ShiftJis,
46        }
47    }
48}
49
50/// Read a JMapInfo from a byte buffer
51///
52/// # Arguments
53/// - `hash_table` - The hash table to use for field name lookups
54/// - `data` - The byte buffer containing the BCSV data
55/// - `options` - Options for endianness and string encoding
56///
57/// # TYpe
58/// - `H` - The type of hash table to use, which must implement the `HashTable` trait
59///
60/// # Returns
61/// A `JMapInfo` instance populated with the data from the buffer, or an error if parsing fails
62pub fn from_buffer<H: HashTable>(
63    hash_table: H,
64    data: &[u8],
65    options: &IoOptions,
66) -> Result<JMapInfo<H>> {
67    let mut jmap = JMapInfo::new(hash_table);
68
69    // The header size is 16 bytes, so we need at least that much to read the header
70    if data.len() < 0x10 {
71        return Err(JMapError::BufferTooSmall {
72            expected: 0x10,
73            got: data.len(),
74        });
75    }
76
77    // Read header
78    let (num_entries, num_fields, off_data, entry_size) = if options.big_endian {
79        (
80            BigEndian::read_u32(&data[0x00..0x04]),
81            BigEndian::read_u32(&data[0x04..0x08]),
82            BigEndian::read_u32(&data[0x08..0x0C]),
83            BigEndian::read_u32(&data[0x0C..0x10]),
84        )
85    } else {
86        (
87            LittleEndian::read_u32(&data[0x00..0x04]),
88            LittleEndian::read_u32(&data[0x04..0x08]),
89            LittleEndian::read_u32(&data[0x08..0x0C]),
90            LittleEndian::read_u32(&data[0x0C..0x10]),
91        )
92    };
93
94    jmap.entry_size = entry_size;
95
96    // Calculate string table offset
97    // string table starts immediately after the entries, which start at off_data and each entry is entry_size bytes
98    // So the string table is at off_data + (num_entries * entry_size)
99    let off_strings = off_data as usize + (num_entries as usize * entry_size as usize);
100
101    // Read fields (each field is 0xC bytes)
102    let mut off = 0x10_usize;
103    for _ in 0..num_fields {
104        let field = read_field(data, off, options.big_endian)?;
105        jmap.fields_map_mut().insert(field.hash, field);
106        off += 0x0C;
107    }
108
109    // Read entries
110    off = off_data as usize;
111    for _ in 0..num_entries {
112        let entry = read_entry(data, off, off_strings, &jmap, options)?;
113        jmap.entries_vec_mut().push(entry);
114        off += entry_size as usize;
115    }
116
117    Ok(jmap)
118}
119
120/// Read a JMapInfo from a file
121///
122/// # Arguments
123/// - `hash_table` - The hash table to use for field name lookups
124/// - `path` - The path to the BCSV file to read
125/// - `options` - Options for endianness and string encoding
126///
127/// # Type
128/// - `H` - The type of hash table to use, which must implement the `HashTable` trait
129/// - `P` - A type that can be converted to a `Path` reference, such as `&str` or `String`
130///
131/// # Returns
132/// A `JMapInfo` instance populated with the data from the file, or an error if the file cannot be read or parsed
133pub fn from_file<H: HashTable, P: AsRef<Path>>(
134    hash_table: H,
135    path: P,
136    options: &IoOptions,
137) -> Result<JMapInfo<H>> {
138    let mut file = File::open(path)?;
139    let mut data = Vec::new();
140    file.read_to_end(&mut data)?;
141    from_buffer(hash_table, &data, options)
142}
143
144/// Write a JMapInfo to a byte buffer
145/// This function serializes the `JMapInfo` into the BCSV format, including the header, field definitions, entries, and string table
146///
147/// # Arguments
148/// - `jmap` - The `JMapInfo` instance to serialize
149/// - `options` - Options for endianness and string encoding
150///
151/// # Type
152/// - `H` - The type of hash table used by the `JMapInfo`, which must implement the `HashTable` trait
153///
154/// # Returns
155/// A `Result` containing the serialized byte buffer if successful, or an error if serialization fails
156///
157/// TODO: This function is pretty complex and could use some refactoring to break it down into smaller functions
158pub fn to_buffer<H: HashTable>(jmap: &JMapInfo<H>, options: &IoOptions) -> Result<Vec<u8>> {
159    let num_entries = jmap.len() as u32;
160    let num_fields = jmap.num_fields() as u32;
161    let off_data = 0x10 + num_fields * 0x0C; // Header (16 bytes) + field definitions (12 bytes each)
162
163    // Calculate entry size and field offsets
164    let mut fields_with_offsets: Vec<(u32, Field)> = jmap
165        .fields()
166        .map(|f| (f.hash, f.clone()))
167        .collect();
168
169    // Sort by type order and assign offsets
170    fields_with_offsets.sort_by_key(|(_, f)| f.field_type.order());
171
172    let mut current_offset: u16 = 0;
173    for (_, field) in &mut fields_with_offsets {
174        field.offset = current_offset;
175        current_offset += field.field_type.size() as u16;
176    }
177
178    // Align entry size to 4 bytes
179    let entry_size = ((current_offset as u32 + 3) & !3) as u32;
180
181    // Create buffer
182    let mut buffer = vec![0u8; (off_data + num_entries * entry_size) as usize];
183
184    // Write header
185    if options.big_endian {
186        BigEndian::write_u32(&mut buffer[0x00..0x04], num_entries);
187        BigEndian::write_u32(&mut buffer[0x04..0x08], num_fields);
188        BigEndian::write_u32(&mut buffer[0x08..0x0C], off_data);
189        BigEndian::write_u32(&mut buffer[0x0C..0x10], entry_size);
190    } else {
191        LittleEndian::write_u32(&mut buffer[0x00..0x04], num_entries);
192        LittleEndian::write_u32(&mut buffer[0x04..0x08], num_fields);
193        LittleEndian::write_u32(&mut buffer[0x08..0x0C], off_data);
194        LittleEndian::write_u32(&mut buffer[0x0C..0x10], entry_size);
195    }
196
197    // Build a map of hash -> offset for quick lookup
198    let field_offsets: std::collections::HashMap<u32, &Field> = fields_with_offsets
199        .iter()
200        .map(|(hash, field)| (*hash, field))
201        .collect();
202
203    // Write fields
204    let mut off = 0x10_usize;
205    for (hash, field) in &fields_with_offsets {
206        write_field(&mut buffer, off, *hash, field, options.big_endian);
207        off += 12;
208    }
209
210    // Prepare string table for StringOffset fields
211    let mut string_table: Vec<u8> = Vec::new();
212    let mut string_offsets: std::collections::HashMap<String, u32> = std::collections::HashMap::new();
213
214    // Write entries
215    off = off_data as usize;
216    for entry in jmap.entries() {
217        write_entry(
218            &mut buffer,
219            off,
220            entry,
221            &field_offsets,
222            &mut string_table,
223            &mut string_offsets,
224            options,
225        )?;
226        off += entry_size as usize;
227    }
228
229    // Append string table
230    buffer.extend_from_slice(&string_table);
231
232    // Align to 32 bytes with 0x40 padding
233    let len = buffer.len();
234    let aligned_len = (len + 31) & !31;
235    buffer.resize(aligned_len, 0x40);
236
237    Ok(buffer)
238}
239
240/// Write a JMapInfo to a file
241///
242/// # Arguments
243/// - `jmap` - The `JMapInfo` instance to write to the file
244/// - `path` - The path to the file where the BCSV data should be written
245/// - `options` - Options for endianness and string encoding
246///
247/// # Type
248/// - `H` - The type of hash table used by the `JMapInfo`, which must implement the `HashTable` trait
249/// - `P` - A type that can be converted to a `Path` reference, such as `&str` or `String`
250///
251/// # Returns
252/// Ok(()) if the file was successfully written, or an error if the file cannot be created or written to
253pub fn to_file<H: HashTable, P: AsRef<Path>>(
254    jmap: &JMapInfo<H>,
255    path: P,
256    options: &IoOptions,
257) -> Result<()> {
258    let buffer = to_buffer(jmap, options)?;
259    let mut file = File::create(path)?;
260    file.write_all(&buffer)?;
261    file.flush()?;
262    Ok(())
263}
264
265// Helper functions
266
267/// Read a field definition from the buffer at the given offset
268///
269/// # Arguments
270/// - `data` - The byte buffer containing the field definitions
271/// - `offset` - The offset in the buffer where the field definition starts
272/// - `big_endian` - Whether the data is big-endian or little-endian
273///
274/// # Errors
275/// - `JMapError::InvalidFieldType` if the field type byte is not a valid `FieldType`
276///
277/// # Returns
278/// A `Field` instance representing the field definition, or an error if the field type is invalid
279fn read_field(data: &[u8], offset: usize, big_endian: bool) -> Result<Field> {
280    let (hash, mask, field_offset, shift, raw_type) = if big_endian {
281        (
282            BigEndian::read_u32(&data[offset..offset + 0x04]),
283            BigEndian::read_u32(&data[offset + 0x04..offset + 0x08]),
284            BigEndian::read_u16(&data[offset + 0x08..offset + 0x0A]),
285            data[offset + 0x0A],
286            data[offset + 0x0B],
287        )
288    } else {
289        (
290            LittleEndian::read_u32(&data[offset..offset + 0x04]),
291            LittleEndian::read_u32(&data[offset + 0x04..offset + 0x08]),
292            LittleEndian::read_u16(&data[offset + 0x08..offset + 0x0A]),
293            data[offset + 0x0A],
294            data[offset + 0x0B],
295        )
296    };
297
298    let field_type = FieldType::from_raw(raw_type)
299        .ok_or(JMapError::InvalidFieldType(raw_type))?;
300
301    Ok(Field {
302        hash,
303        field_type,
304        mask,
305        shift,
306        offset: field_offset,
307        default: FieldValue::default_for(field_type),
308    })
309}
310
311/// Write a field definition to the buffer at the given offset
312///
313/// # Arguments
314/// - `buffer` - The byte buffer where the field definition should be written
315/// - `offset` - The offset in the buffer where the field definition should start
316/// - `hash` - The hash of the field name
317/// - `field` - The `Field` instance containing the field definition to write
318/// - `big_endian` - Whether the data should be written in big-endian or little-endian format
319fn write_field(buffer: &mut [u8], offset: usize, hash: u32, field: &Field, big_endian: bool) {
320    if big_endian {
321        BigEndian::write_u32(&mut buffer[offset..offset + 0x04], hash);
322        BigEndian::write_u32(&mut buffer[offset + 0x04..offset + 0x08], field.mask);
323        BigEndian::write_u16(&mut buffer[offset + 0x08..offset + 0x0A], field.offset);
324    } else {
325        LittleEndian::write_u32(&mut buffer[offset..offset + 0x04], hash);
326        LittleEndian::write_u32(&mut buffer[offset + 0x04..offset + 0x08], field.mask);
327        LittleEndian::write_u16(&mut buffer[offset + 0x08..offset + 0x0A], field.offset);
328    }
329    buffer[offset + 0x0A] = field.shift;
330    buffer[offset + 0x0B] = field.field_type as u8;
331}
332
333/// Read an entry from the buffer at the given offset, using the field definitions from the JMapInfo
334///
335/// # Arguments
336/// - `data` - The byte buffer containing the entry data
337/// - `entry_offset` - The offset in the buffer where the entry starts
338/// - `string_table_offset` - The offset in the buffer where the string table starts (for StringOffset fields)
339/// - `jmap` - The `JMapInfo` instance containing the field definitions to use for parsing the entry
340/// - `options` - Options for endianness and string encoding
341///
342/// # Returns
343/// An `Entry` instance representing the parsed entry, or an error if parsing fails
344fn read_entry<H: HashTable>(
345    data: &[u8],
346    entry_offset: usize,
347    string_table_offset: usize,
348    jmap: &JMapInfo<H>,
349    options: &IoOptions,
350) -> Result<Entry> {
351    let mut entry = Entry::with_capacity(jmap.num_fields());
352
353    for field in jmap.fields() {
354        let val_offset = entry_offset + field.offset as usize;
355        let value = read_field_value(data, val_offset, string_table_offset, field, options)?;
356        entry.set_by_hash(field.hash, value);
357    }
358
359    Ok(entry)
360}
361
362/// Read a field value from the buffer at the given offset, applying the field's mask and shift, and using the string table for StringOffset fields
363///
364/// # Arguments
365/// - `data` - The byte buffer containing the field value data
366/// - `offset` - The offset in the buffer where the field value starts
367/// - `string_table_offset` - The offset in the buffer where the string table starts (for StringOffset fields)
368/// - `field` - The `Field` instance containing the field definition to use for parsing the value
369/// - `options` - Options for endianness and string encoding
370///
371/// # Returns
372/// A `FieldValue` instance representing the parsed field value, or an error if parsing fails
373///
374/// TODO: This function is quite big and could be refactored by implementation of a trait for reading/writing field values based on the field type, to reduce the amount of code
375fn read_field_value(
376    data: &[u8],
377    offset: usize,
378    string_table_offset: usize,
379    field: &Field,
380    options: &IoOptions,
381) -> Result<FieldValue> {
382    let value = match field.field_type {
383        FieldType::Long | FieldType::UnsignedLong => {
384            let raw = if options.big_endian {
385                BigEndian::read_u32(&data[offset..offset + 4])
386            } else {
387                LittleEndian::read_u32(&data[offset..offset + 4])
388            };
389            let masked = (raw & field.mask) >> field.shift;
390            // Sign extend for signed types
391            let signed = if masked & 0x80000000 != 0 {
392                masked as i32
393            } else {
394                masked as i32
395            };
396            FieldValue::Int(signed)
397        }
398
399        FieldType::Float => {
400            let val = if options.big_endian {
401                BigEndian::read_f32(&data[offset..offset + 4])
402            } else {
403                LittleEndian::read_f32(&data[offset..offset + 4])
404            };
405            FieldValue::Float(val)
406        }
407
408        FieldType::Short => {
409            let raw = if options.big_endian {
410                BigEndian::read_u16(&data[offset..offset + 2])
411            } else {
412                LittleEndian::read_u16(&data[offset..offset + 2])
413            };
414            let masked = ((raw as u32) & field.mask) >> field.shift;
415            let signed = if masked & 0x8000 != 0 {
416                (masked | 0xFFFF0000) as i32
417            } else {
418                masked as i32
419            };
420            FieldValue::Int(signed)
421        }
422
423        FieldType::Char => {
424            let raw = data[offset];
425            let masked = ((raw as u32) & field.mask) >> field.shift;
426            let signed = if masked & 0x80 != 0 {
427                (masked | 0xFFFFFF00) as i32
428            } else {
429                masked as i32
430            };
431            FieldValue::Int(signed)
432        }
433
434        FieldType::String => {
435            // Read up to 32 bytes until null terminator
436            let end = data[offset..offset + 32]
437                .iter()
438                .position(|&b| b == 0)
439                .unwrap_or(32);
440            let bytes = &data[offset..offset + end];
441            let s = decode_string(bytes, options.encoding)?;
442            FieldValue::String(s)
443        }
444
445        FieldType::StringOffset => {
446            let str_offset = if options.big_endian {
447                BigEndian::read_u32(&data[offset..offset + 4])
448            } else {
449                LittleEndian::read_u32(&data[offset..offset + 4])
450            };
451            let str_start = string_table_offset + str_offset as usize;
452            let end = data[str_start..]
453                .iter()
454                .position(|&b| b == 0)
455                .unwrap_or(0);
456            let bytes = &data[str_start..str_start + end];
457            let s = decode_string(bytes, options.encoding)?;
458            FieldValue::String(s)
459        }
460    };
461
462    Ok(value)
463}
464
465/// Write an entry to the buffer at the given offset, using the field definitions from the JMapInfo, and updating the string table for StringOffset fields
466///
467/// # Arguments
468/// - `buffer` - The byte buffer where the entry should be written
469/// - `entry_offset` - The offset in the buffer where the entry should start
470/// - `entry` - The `Entry` instance containing the field values to write
471/// - `field_offsets` - A map of field hash to `Field` instance, used for looking up field definitions when writing values
472/// - `string_table` - A mutable byte vector representing the string table, which will be updated with new strings for StringOffset fields
473/// - `string_offsets` - A mutable map of string to offset in the string table, used for reusing existing strings and avoiding duplicates in the string table
474/// - `options` - Options for endianness and string encoding
475///
476/// # Returns
477/// Ok(()) if the entry was successfully written to the buffer, or an error if writing fails (e.g. due to type mismatch or encoding errors)
478fn write_entry(
479    buffer: &mut [u8],
480    entry_offset: usize,
481    entry: &Entry,
482    field_offsets: &std::collections::HashMap<u32, &Field>,
483    string_table: &mut Vec<u8>,
484    string_offsets: &mut std::collections::HashMap<String, u32>,
485    options: &IoOptions,
486) -> Result<()> {
487    for (hash, value) in entry.iter() {
488        if let Some(field) = field_offsets.get(hash) {
489            let val_offset = entry_offset + field.offset as usize;
490            write_field_value(
491                buffer,
492                val_offset,
493                value,
494                field,
495                string_table,
496                string_offsets,
497                options,
498            )?;
499        }
500    }
501    Ok(())
502}
503
504/// Write a field value to the buffer at the given offset, applying the field's mask and shift, and updating the string table for StringOffset fields
505///
506/// # Arguments
507/// - `buffer` - The byte buffer where the field value should be written
508/// - `offset` - The offset in the buffer where the field value should start
509/// - `value` - The `FieldValue` instance representing the value to write
510/// - `field` - The `Field` instance containing the field definition to use for writing the value
511/// - `string_table` - A mutable byte vector representing the string table, which will be updated with new strings for StringOffset fields
512/// - `string_offsets` - A mutable map of string to offset in the string table, used for reusing existing strings and avoiding duplicates in the string table
513/// - `options` - Options for endianness and string encoding
514///
515/// # Returns
516/// Ok(()) if the field value was successfully written to the buffer, or an error if writing fails (e.g. due to type mismatch or encoding errors)
517fn write_field_value(
518    buffer: &mut [u8],
519    offset: usize,
520    value: &FieldValue,
521    field: &Field,
522    string_table: &mut Vec<u8>,
523    string_offsets: &mut std::collections::HashMap<String, u32>,
524    options: &IoOptions,
525) -> Result<()> {
526    match (field.field_type, value) {
527        (FieldType::Long | FieldType::UnsignedLong, FieldValue::Int(v)) => {
528            let existing = if options.big_endian {
529                BigEndian::read_u32(&buffer[offset..offset + 4])
530            } else {
531                LittleEndian::read_u32(&buffer[offset..offset + 4])
532            };
533            let masked = (existing & !field.mask) | (((*v as u32) << field.shift) & field.mask);
534            if options.big_endian {
535                BigEndian::write_u32(&mut buffer[offset..offset + 4], masked);
536            } else {
537                LittleEndian::write_u32(&mut buffer[offset..offset + 4], masked);
538            }
539        }
540
541        (FieldType::Float, FieldValue::Float(v)) => {
542            if options.big_endian {
543                BigEndian::write_f32(&mut buffer[offset..offset + 4], *v);
544            } else {
545                LittleEndian::write_f32(&mut buffer[offset..offset + 4], *v);
546            }
547        }
548
549        (FieldType::Short, FieldValue::Int(v)) => {
550            let existing = if options.big_endian {
551                BigEndian::read_u16(&buffer[offset..offset + 2])
552            } else {
553                LittleEndian::read_u16(&buffer[offset..offset + 2])
554            };
555            let masked = ((existing as u32 & !field.mask) | (((*v as u32) << field.shift) & field.mask)) as u16;
556            if options.big_endian {
557                BigEndian::write_u16(&mut buffer[offset..offset + 2], masked);
558            } else {
559                LittleEndian::write_u16(&mut buffer[offset..offset + 2], masked);
560            }
561        }
562
563        (FieldType::Char, FieldValue::Int(v)) => {
564            let existing = buffer[offset] as u32;
565            let masked = ((existing & !field.mask) | (((*v as u32) << field.shift) & field.mask)) as u8;
566            buffer[offset] = masked;
567        }
568
569        (FieldType::String, FieldValue::String(s)) => {
570            let bytes = encode_string(s, options.encoding)?;
571            let len = bytes.len().min(32);
572            buffer[offset..offset + len].copy_from_slice(&bytes[..len]);
573        }
574
575        (FieldType::StringOffset, FieldValue::String(s)) => {
576            let str_offset = if let Some(&existing_offset) = string_offsets.get(s) {
577                existing_offset
578            } else {
579                let offset = string_table.len() as u32;
580                let bytes = encode_string(s, options.encoding)?;
581                string_table.extend_from_slice(&bytes);
582                string_table.push(0); // Null terminator
583                string_offsets.insert(s.clone(), offset);
584                offset
585            };
586
587            if options.big_endian {
588                BigEndian::write_u32(&mut buffer[offset..offset + 4], str_offset);
589            } else {
590                LittleEndian::write_u32(&mut buffer[offset..offset + 4], str_offset);
591            }
592        }
593
594        _ => {
595            return Err(JMapError::TypeMismatch {
596                expected: field.field_type.csv_name(),
597                got: value.type_name(),
598            });
599        }
600    }
601
602    Ok(())
603}
604
605/// Decode a byte slice into a string using the specified encoding
606///
607/// # Arguments
608/// - `bytes` - The byte slice to decode
609/// - `encoding` - The encoding to use for decoding the bytes (e.g. Shift-JIS or UTF-8)
610///
611/// # Errors
612/// - `JMapError::EncodingError` if the bytes cannot be decoded using the specified encoding
613///
614/// # Returns
615/// A `String` containing the decoded text, or an error if decoding fails
616fn decode_string(bytes: &[u8], encoding: Encoding) -> Result<String> {
617    match encoding {
618        Encoding::Utf8 => String::from_utf8(bytes.to_vec())
619            .map_err(|e| JMapError::EncodingError(e.to_string())),
620        Encoding::ShiftJis => {
621            let (decoded, _, had_errors) = encoding_rs::SHIFT_JIS.decode(bytes);
622            if had_errors {
623                // Try to decode anyway, some bytes might be valid
624            }
625            Ok(decoded.into_owned())
626        }
627    }
628}
629
630/// Encode a string into a byte vector using the specified encoding
631///
632/// # Arguments
633/// - `s` - The string to encode
634/// - `encoding` - The encoding to use for encoding the string (e.g. Shift-JIS or UTF-8)
635///
636/// # Returns
637/// A `Vec<u8>` containing the encoded bytes of the string, or an error if encoding fails
638fn encode_string(s: &str, encoding: Encoding) -> Result<Vec<u8>> {
639    match encoding {
640        Encoding::Utf8 => Ok(s.as_bytes().to_vec()),
641        Encoding::ShiftJis => {
642            let (encoded, _, _) = encoding_rs::SHIFT_JIS.encode(s);
643            Ok(encoded.into_owned())
644        }
645    }
646}