wow_cdbc/
stringblock.rs

1//! String block parsing functionality
2
3use crate::{Error, Result, StringRef};
4use std::collections::HashMap;
5use std::io::{Read, Seek, SeekFrom};
6use std::sync::Arc;
7
8/// Represents a string block in a DBC file
9#[derive(Debug, Clone)]
10pub struct StringBlock {
11    /// The raw bytes of the string block
12    data: Vec<u8>,
13}
14
15impl StringBlock {
16    /// Parse a string block from a reader
17    pub fn parse<R: Read + Seek>(reader: &mut R, offset: u64, size: u32) -> Result<Self> {
18        reader.seek(SeekFrom::Start(offset))?;
19
20        let mut data = vec![0u8; size as usize];
21        reader.read_exact(&mut data)?;
22
23        Ok(Self { data })
24    }
25
26    /// Get a string from the string block using a string reference
27    pub fn get_string(&self, string_ref: StringRef) -> Result<&str> {
28        let offset = string_ref.offset() as usize;
29        if offset >= self.data.len() {
30            return Err(Error::OutOfBounds(format!(
31                "String reference offset out of bounds: {} (max: {})",
32                offset,
33                self.data.len()
34            )));
35        }
36
37        // Find the end of the string (null terminator)
38        let mut end = offset;
39        while end < self.data.len() && self.data[end] != 0 {
40            end += 1;
41        }
42
43        // Convert the bytes to a string
44        std::str::from_utf8(&self.data[offset..end])
45            .map_err(|e| Error::TypeConversion(format!("Invalid UTF-8 string: {e}")))
46    }
47
48    /// Get the raw data of the string block
49    pub fn data(&self) -> &[u8] {
50        &self.data
51    }
52
53    /// Get the size of the string block in bytes
54    pub fn size(&self) -> usize {
55        self.data.len()
56    }
57}
58
59/// A cached string block for efficient string lookups
60#[derive(Debug, Clone)]
61pub struct CachedStringBlock {
62    /// The raw bytes of the string block
63    data: Arc<Vec<u8>>,
64    /// Cache of string references to string slices
65    cache: HashMap<u32, (usize, usize)>,
66}
67
68impl CachedStringBlock {
69    /// Create a cached string block from a string block
70    pub fn from_string_block(string_block: &StringBlock) -> Self {
71        let data = Arc::new(string_block.data().to_vec());
72        let mut cache = HashMap::new();
73
74        let mut offset = 0;
75        while offset < data.len() {
76            let start_offset = offset;
77
78            // Find the end of the string (null terminator)
79            while offset < data.len() && data[offset] != 0 {
80                offset += 1;
81            }
82
83            // Cache the string position
84            cache.insert(start_offset as u32, (start_offset, offset));
85
86            // Skip the null terminator
87            offset += 1;
88        }
89
90        Self { data, cache }
91    }
92
93    /// Get a string from the string block using a string reference
94    pub fn get_string(&self, string_ref: StringRef) -> Result<&str> {
95        let offset = string_ref.offset() as usize;
96
97        if let Some((start, end)) = self.cache.get(&string_ref.offset()) {
98            // Convert the bytes to a string
99            std::str::from_utf8(&self.data[*start..*end])
100                .map_err(|e| Error::TypeConversion(format!("Invalid UTF-8 string: {e}")))
101        } else {
102            // If not cached, find the end of the string
103            if offset >= self.data.len() {
104                return Err(Error::OutOfBounds(format!(
105                    "String reference offset out of bounds: {} (max: {})",
106                    offset,
107                    self.data.len()
108                )));
109            }
110
111            let mut end = offset;
112            while end < self.data.len() && self.data[end] != 0 {
113                end += 1;
114            }
115
116            // Convert the bytes to a string
117            std::str::from_utf8(&self.data[offset..end])
118                .map_err(|e| Error::TypeConversion(format!("Invalid UTF-8 string: {e}")))
119        }
120    }
121}