wow_cdbc/
stringblock.rs

1//! String block parsing functionality
2
3use crate::{Error, Result, StringRef};
4use std::collections::HashMap;
5use std::io::{Read, Seek, SeekFrom};
6use std::sync::Arc;
7
8/// Represents a string block in a DBC file
9#[derive(Debug, Clone)]
10pub struct StringBlock {
11    /// The raw bytes of the string block
12    data: Vec<u8>,
13}
14
15impl StringBlock {
16    /// Parse a string block from a reader
17    pub fn parse<R: Read + Seek>(reader: &mut R, offset: u64, size: u32) -> Result<Self> {
18        reader.seek(SeekFrom::Start(offset))?;
19
20        let mut data = vec![0u8; size as usize];
21        reader.read_exact(&mut data)?;
22
23        Ok(Self { data })
24    }
25
26    /// Get a string from the string block using a string reference
27    pub fn get_string(&self, string_ref: StringRef) -> Result<&str> {
28        let offset = string_ref.offset() as usize;
29        if offset >= self.data.len() {
30            return Err(Error::OutOfBounds(format!(
31                "String reference offset out of bounds: {} (max: {})",
32                offset,
33                self.data.len()
34            )));
35        }
36
37        // Find the end of the string (null terminator)
38        let mut end = offset;
39        while end < self.data.len() && self.data[end] != 0 {
40            end += 1;
41        }
42
43        // Convert the bytes to a string
44        std::str::from_utf8(&self.data[offset..end])
45            .map_err(|e| Error::TypeConversion(format!("Invalid UTF-8 string: {e}")))
46    }
47
48    /// Get the raw data of the string block
49    pub fn data(&self) -> &[u8] {
50        &self.data
51    }
52
53    /// Get the size of the string block in bytes
54    pub fn size(&self) -> usize {
55        self.data.len()
56    }
57
58    /// Check if an offset is the start of a string in the block
59    ///
60    /// A valid string start is either at offset 0 (beginning of block)
61    /// or immediately after a NUL terminator (byte at offset-1 is 0).
62    pub fn is_string_start(&self, offset: u32) -> bool {
63        let offset = offset as usize;
64        if offset >= self.data.len() {
65            return false;
66        }
67        // Offset 0 is always a valid string start
68        // Otherwise, the previous byte must be a NUL terminator
69        offset == 0 || self.data[offset - 1] == 0
70    }
71}
72
73/// A cached string block for efficient string lookups
74#[derive(Debug, Clone)]
75pub struct CachedStringBlock {
76    /// The raw bytes of the string block
77    data: Arc<Vec<u8>>,
78    /// Cache of string references to string slices
79    cache: HashMap<u32, (usize, usize)>,
80}
81
82impl CachedStringBlock {
83    /// Create a cached string block from a string block
84    pub fn from_string_block(string_block: &StringBlock) -> Self {
85        let data = Arc::new(string_block.data().to_vec());
86        let mut cache = HashMap::new();
87
88        let mut offset = 0;
89        while offset < data.len() {
90            let start_offset = offset;
91
92            // Find the end of the string (null terminator)
93            while offset < data.len() && data[offset] != 0 {
94                offset += 1;
95            }
96
97            // Cache the string position
98            cache.insert(start_offset as u32, (start_offset, offset));
99
100            // Skip the null terminator
101            offset += 1;
102        }
103
104        Self { data, cache }
105    }
106
107    /// Get a string from the string block using a string reference
108    pub fn get_string(&self, string_ref: StringRef) -> Result<&str> {
109        let offset = string_ref.offset() as usize;
110
111        if let Some((start, end)) = self.cache.get(&string_ref.offset()) {
112            // Convert the bytes to a string
113            std::str::from_utf8(&self.data[*start..*end])
114                .map_err(|e| Error::TypeConversion(format!("Invalid UTF-8 string: {e}")))
115        } else {
116            // If not cached, find the end of the string
117            if offset >= self.data.len() {
118                return Err(Error::OutOfBounds(format!(
119                    "String reference offset out of bounds: {} (max: {})",
120                    offset,
121                    self.data.len()
122                )));
123            }
124
125            let mut end = offset;
126            while end < self.data.len() && self.data[end] != 0 {
127                end += 1;
128            }
129
130            // Convert the bytes to a string
131            std::str::from_utf8(&self.data[offset..end])
132                .map_err(|e| Error::TypeConversion(format!("Invalid UTF-8 string: {e}")))
133        }
134    }
135}