tact_parser/
espec.rs

1//! ESpec (Encoding Specification) parser for BLTE compression
2//!
3//! ESpec strings define how data should be encoded/compressed in BLTE format.
4//! This module implements a parser for the EBNF grammar defined at:
5//! <https://wowdev.wiki/BLTE#Encoding_Specification_(ESpec)>
6
7use std::fmt;
8use std::str::FromStr;
9
10use crate::{Error, Result};
11
12/// Encoding specification (ESpec) defining how to encode/compress data
13#[derive(Debug, Clone, PartialEq)]
14pub enum ESpec {
15    /// No compression ('n')
16    None,
17    /// ZLib compression ('z')
18    ZLib {
19        level: Option<u8>,
20        bits: Option<ZLibBits>,
21    },
22    /// Encryption ('e')
23    Encrypted {
24        key: String,
25        iv: Vec<u8>,
26        spec: Box<ESpec>,
27    },
28    /// Block table ('b')
29    BlockTable { chunks: Vec<BlockChunk> },
30    /// BCPack compression ('c')
31    BCPack { bcn: u8 },
32    /// GDeflate compression ('g')
33    GDeflate { level: u8 },
34}
35
36/// Block chunk specification
37#[derive(Debug, Clone, PartialEq)]
38pub struct BlockChunk {
39    /// Block size specification (optional for final chunk)
40    pub size_spec: Option<BlockSizeSpec>,
41    /// Encoding specification for this chunk
42    pub spec: ESpec,
43}
44
45/// Block size specification
46#[derive(Debug, Clone, PartialEq)]
47pub struct BlockSizeSpec {
48    /// Block size in bytes
49    pub size: u64,
50    /// Number of blocks (optional)
51    pub count: Option<u32>,
52}
53
54/// ZLib compression bits specification
55#[derive(Debug, Clone, PartialEq)]
56pub enum ZLibBits {
57    /// Numeric window bits
58    Bits(u8),
59    /// MPQ compression
60    MPQ,
61    /// ZLib compression
62    ZLib,
63    /// LZ4HC compression
64    LZ4HC,
65}
66
67impl ESpec {
68    /// Parse an ESpec string
69    pub fn parse(input: &str) -> Result<Self> {
70        Parser::new(input).parse_espec()
71    }
72
73    /// Check if this ESpec uses encryption
74    pub fn is_encrypted(&self) -> bool {
75        matches!(self, ESpec::Encrypted { .. })
76    }
77
78    /// Check if this ESpec uses compression
79    pub fn is_compressed(&self) -> bool {
80        match self {
81            ESpec::None => false,
82            ESpec::ZLib { .. } | ESpec::BCPack { .. } | ESpec::GDeflate { .. } => true,
83            ESpec::BlockTable { chunks } => chunks.iter().any(|c| c.spec.is_compressed()),
84            ESpec::Encrypted { spec, .. } => spec.is_compressed(),
85        }
86    }
87
88    /// Get the compression type as a string
89    pub fn compression_type(&self) -> &str {
90        match self {
91            ESpec::None => "none",
92            ESpec::ZLib { .. } => "zlib",
93            ESpec::BCPack { .. } => "bcpack",
94            ESpec::GDeflate { .. } => "gdeflate",
95            ESpec::BlockTable { .. } => "block",
96            ESpec::Encrypted { .. } => "encrypted",
97        }
98    }
99}
100
101impl fmt::Display for ESpec {
102    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103        match self {
104            ESpec::None => write!(f, "n"),
105            ESpec::ZLib { level, bits } => {
106                write!(f, "z")?;
107                if let Some(level) = level {
108                    write!(f, ":{}", level)?;
109                    if let Some(bits) = bits {
110                        write!(f, ",{}", bits)?;
111                    }
112                }
113                Ok(())
114            }
115            ESpec::Encrypted { key, iv, spec } => {
116                write!(f, "e:{{{},{},{}}}", key, hex::encode(iv), spec)
117            }
118            ESpec::BlockTable { chunks } => {
119                write!(f, "b:")?;
120                if chunks.len() == 1 && chunks[0].size_spec.is_none() {
121                    write!(f, "{}", chunks[0].spec)
122                } else {
123                    write!(f, "{{")?;
124                    for (i, chunk) in chunks.iter().enumerate() {
125                        if i > 0 {
126                            write!(f, ",")?;
127                        }
128                        if let Some(size_spec) = &chunk.size_spec {
129                            write!(f, "{}=", size_spec)?;
130                        } else {
131                            write!(f, "*=")?;
132                        }
133                        write!(f, "{}", chunk.spec)?;
134                    }
135                    write!(f, "}}")
136                }
137            }
138            ESpec::BCPack { bcn } => write!(f, "c:{{{}}}", bcn),
139            ESpec::GDeflate { level } => write!(f, "g:{{{}}}", level),
140        }
141    }
142}
143
144impl fmt::Display for BlockSizeSpec {
145    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146        if self.size % (1024 * 1024) == 0 {
147            write!(f, "{}M", self.size / (1024 * 1024))?;
148        } else if self.size % 1024 == 0 {
149            write!(f, "{}K", self.size / 1024)?;
150        } else {
151            write!(f, "{}", self.size)?;
152        }
153        if let Some(count) = self.count {
154            write!(f, "*{}", count)?;
155        }
156        Ok(())
157    }
158}
159
160impl fmt::Display for ZLibBits {
161    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
162        match self {
163            ZLibBits::Bits(n) => write!(f, "{}", n),
164            ZLibBits::MPQ => write!(f, "mpq"),
165            ZLibBits::ZLib => write!(f, "zlib"),
166            ZLibBits::LZ4HC => write!(f, "lz4hc"),
167        }
168    }
169}
170
171/// Parser for ESpec strings
172struct Parser<'a> {
173    input: &'a str,
174    pos: usize,
175}
176
177impl<'a> Parser<'a> {
178    fn new(input: &'a str) -> Self {
179        Self { input, pos: 0 }
180    }
181
182    fn peek(&self) -> Option<char> {
183        self.input[self.pos..].chars().next()
184    }
185
186    fn consume(&mut self, ch: char) -> Result<()> {
187        if self.peek() == Some(ch) {
188            self.pos += ch.len_utf8();
189            Ok(())
190        } else {
191            Err(Error::IOError(std::io::Error::new(
192                std::io::ErrorKind::InvalidData,
193                format!("Expected '{}' at position {}", ch, self.pos),
194            )))
195        }
196    }
197
198    fn parse_number(&mut self) -> Result<u64> {
199        let start = self.pos;
200        while let Some(ch) = self.peek() {
201            if ch.is_ascii_digit() {
202                self.pos += 1;
203            } else {
204                break;
205            }
206        }
207        if self.pos == start {
208            return Err(Error::IOError(std::io::Error::new(
209                std::io::ErrorKind::InvalidData,
210                format!("Expected number at position {}", self.pos),
211            )));
212        }
213        self.input[start..self.pos].parse().map_err(|e| {
214            Error::IOError(std::io::Error::new(
215                std::io::ErrorKind::InvalidData,
216                format!("Invalid number: {}", e),
217            ))
218        })
219    }
220
221    fn parse_hex_string(&mut self, len: usize) -> Result<Vec<u8>> {
222        let start = self.pos;
223        self.pos = (self.pos + len * 2).min(self.input.len());
224        let hex_str = &self.input[start..self.pos];
225        hex::decode(hex_str).map_err(|e| {
226            Error::IOError(std::io::Error::new(
227                std::io::ErrorKind::InvalidData,
228                format!("Invalid hex string: {}", e),
229            ))
230        })
231    }
232
233    fn parse_identifier(&mut self) -> String {
234        let start = self.pos;
235        while let Some(ch) = self.peek() {
236            if ch.is_ascii_alphanumeric() {
237                self.pos += 1;
238            } else {
239                break;
240            }
241        }
242        self.input[start..self.pos].to_string()
243    }
244
245    fn parse_espec(&mut self) -> Result<ESpec> {
246        match self.peek() {
247            Some('n') => {
248                self.consume('n')?;
249                Ok(ESpec::None)
250            }
251            Some('z') => self.parse_zlib(),
252            Some('e') => self.parse_encrypted(),
253            Some('b') => self.parse_block_table(),
254            Some('c') => self.parse_bcpack(),
255            Some('g') => self.parse_gdeflate(),
256            _ => Err(Error::IOError(std::io::Error::new(
257                std::io::ErrorKind::InvalidData,
258                format!("Unknown ESpec type at position {}", self.pos),
259            ))),
260        }
261    }
262
263    fn parse_zlib(&mut self) -> Result<ESpec> {
264        self.consume('z')?;
265
266        if self.peek() != Some(':') {
267            return Ok(ESpec::ZLib {
268                level: None,
269                bits: None,
270            });
271        }
272
273        self.consume(':')?;
274
275        // Check for braces (optional)
276        let has_braces = self.peek() == Some('{');
277        if has_braces {
278            self.consume('{')?;
279        }
280
281        // Parse level if present and it's a number
282        let level = if self.peek().is_some_and(|c| c.is_ascii_digit()) {
283            Some(self.parse_number()? as u8)
284        } else {
285            None
286        };
287
288        // Only parse bits if we have a comma AND we're inside braces
289        // (bits require braces in the format)
290        let bits = if has_braces && self.peek() == Some(',') {
291            self.consume(',')?;
292            Some(self.parse_zlib_bits()?)
293        } else {
294            None
295        };
296
297        if has_braces {
298            self.consume('}')?;
299        }
300
301        Ok(ESpec::ZLib { level, bits })
302    }
303
304    fn parse_zlib_bits(&mut self) -> Result<ZLibBits> {
305        if self.peek().is_some_and(|c| c.is_ascii_digit()) {
306            Ok(ZLibBits::Bits(self.parse_number()? as u8))
307        } else {
308            let ident = self.parse_identifier();
309            if ident.is_empty() {
310                // No bits specified
311                return Err(Error::IOError(std::io::Error::new(
312                    std::io::ErrorKind::InvalidData,
313                    "Expected zlib bits specification after comma",
314                )));
315            }
316            match ident.as_str() {
317                "mpq" => Ok(ZLibBits::MPQ),
318                "zlib" => Ok(ZLibBits::ZLib),
319                "lz4hc" => Ok(ZLibBits::LZ4HC),
320                _ => Err(Error::IOError(std::io::Error::new(
321                    std::io::ErrorKind::InvalidData,
322                    format!("Unknown zlib bits type: {}", ident),
323                ))),
324            }
325        }
326    }
327
328    fn parse_encrypted(&mut self) -> Result<ESpec> {
329        self.consume('e')?;
330        self.consume(':')?;
331        self.consume('{')?;
332
333        // Parse 8-byte hex key name
334        let key = self.parse_identifier();
335        if key.len() != 16 {
336            return Err(Error::IOError(std::io::Error::new(
337                std::io::ErrorKind::InvalidData,
338                format!("Encryption key must be 16 hex chars, got {}", key.len()),
339            )));
340        }
341
342        self.consume(',')?;
343
344        // Parse 4-byte hex IV
345        let iv = self.parse_hex_string(4)?;
346
347        self.consume(',')?;
348
349        // Parse nested ESpec
350        let spec = Box::new(self.parse_espec()?);
351
352        self.consume('}')?;
353
354        Ok(ESpec::Encrypted { key, iv, spec })
355    }
356
357    fn parse_block_table(&mut self) -> Result<ESpec> {
358        self.consume('b')?;
359        self.consume(':')?;
360
361        // Check if it's a single spec or multiple chunks
362        if self.peek() != Some('{') {
363            // Single spec without size
364            let spec = self.parse_espec()?;
365            return Ok(ESpec::BlockTable {
366                chunks: vec![BlockChunk {
367                    size_spec: None,
368                    spec,
369                }],
370            });
371        }
372
373        self.consume('{')?;
374
375        let mut chunks = Vec::new();
376
377        loop {
378            // Parse size spec or final chunk
379            let size_spec = if self.peek() == Some('*') {
380                self.consume('*')?;
381                if self.peek() == Some('=') {
382                    None // Final chunk with no size
383                } else {
384                    // Parse count after *
385                    let count = self.parse_number()? as u32;
386                    Some(BlockSizeSpec {
387                        size: 0, // Will be determined by total size
388                        count: Some(count),
389                    })
390                }
391            } else {
392                Some(self.parse_block_size_spec()?)
393            };
394
395            self.consume('=')?;
396
397            let spec = self.parse_espec()?;
398            chunks.push(BlockChunk { size_spec, spec });
399
400            if self.peek() == Some(',') {
401                self.consume(',')?;
402            } else {
403                break;
404            }
405        }
406
407        self.consume('}')?;
408
409        Ok(ESpec::BlockTable { chunks })
410    }
411
412    fn parse_block_size_spec(&mut self) -> Result<BlockSizeSpec> {
413        let mut size = self.parse_number()?;
414
415        // Check for unit (K or M)
416        if let Some(unit) = self.peek() {
417            if unit == 'K' {
418                self.consume('K')?;
419                size *= 1024;
420            } else if unit == 'M' {
421                self.consume('M')?;
422                size *= 1024 * 1024;
423            }
424        }
425
426        // Check for count
427        let count = if self.peek() == Some('*') {
428            self.consume('*')?;
429            if self.peek().is_some_and(|c| c.is_ascii_digit()) {
430                Some(self.parse_number()? as u32)
431            } else {
432                None // * without number means "rest of file"
433            }
434        } else {
435            None
436        };
437
438        Ok(BlockSizeSpec { size, count })
439    }
440
441    fn parse_bcpack(&mut self) -> Result<ESpec> {
442        self.consume('c')?;
443        self.consume(':')?;
444        self.consume('{')?;
445        let bcn = self.parse_number()? as u8;
446        self.consume('}')?;
447        Ok(ESpec::BCPack { bcn })
448    }
449
450    fn parse_gdeflate(&mut self) -> Result<ESpec> {
451        self.consume('g')?;
452        self.consume(':')?;
453        self.consume('{')?;
454        let level = self.parse_number()? as u8;
455        self.consume('}')?;
456        Ok(ESpec::GDeflate { level })
457    }
458}
459
460impl FromStr for ESpec {
461    type Err = Error;
462
463    fn from_str(s: &str) -> Result<Self> {
464        Self::parse(s)
465    }
466}
467
468#[cfg(test)]
469mod tests {
470    use super::*;
471
472    #[test]
473    fn test_parse_none() {
474        let spec = ESpec::parse("n").unwrap();
475        assert_eq!(spec, ESpec::None);
476        assert_eq!(spec.to_string(), "n");
477    }
478
479    #[test]
480    fn test_parse_zlib_default() {
481        let spec = ESpec::parse("z").unwrap();
482        assert_eq!(
483            spec,
484            ESpec::ZLib {
485                level: None,
486                bits: None
487            }
488        );
489        assert_eq!(spec.to_string(), "z");
490    }
491
492    #[test]
493    fn test_parse_zlib_with_level() {
494        let spec = ESpec::parse("z:9").unwrap();
495        assert_eq!(
496            spec,
497            ESpec::ZLib {
498                level: Some(9),
499                bits: None
500            }
501        );
502        assert_eq!(spec.to_string(), "z:9");
503    }
504
505    #[test]
506    fn test_parse_zlib_with_level_and_bits() {
507        let spec = ESpec::parse("z:{9,15}").unwrap();
508        assert_eq!(
509            spec,
510            ESpec::ZLib {
511                level: Some(9),
512                bits: Some(ZLibBits::Bits(15))
513            }
514        );
515    }
516
517    #[test]
518    fn test_parse_zlib_with_mpq() {
519        let spec = ESpec::parse("z:{9,mpq}").unwrap();
520        assert_eq!(
521            spec,
522            ESpec::ZLib {
523                level: Some(9),
524                bits: Some(ZLibBits::MPQ)
525            }
526        );
527    }
528
529    #[test]
530    fn test_parse_block_table_simple() {
531        let spec = ESpec::parse("b:n").unwrap();
532        match spec {
533            ESpec::BlockTable { chunks } => {
534                assert_eq!(chunks.len(), 1);
535                assert_eq!(chunks[0].spec, ESpec::None);
536                assert!(chunks[0].size_spec.is_none());
537            }
538            _ => panic!("Expected BlockTable"),
539        }
540    }
541
542    #[test]
543    fn test_parse_block_table_with_sizes() {
544        let spec = ESpec::parse("b:{1M*3=z:9,*=n}").unwrap();
545        match spec {
546            ESpec::BlockTable { chunks } => {
547                assert_eq!(chunks.len(), 2);
548
549                // First chunk: 1M * 3 blocks with zlib level 9
550                let first = &chunks[0];
551                assert_eq!(
552                    first.size_spec,
553                    Some(BlockSizeSpec {
554                        size: 1024 * 1024,
555                        count: Some(3),
556                    })
557                );
558                assert_eq!(
559                    first.spec,
560                    ESpec::ZLib {
561                        level: Some(9),
562                        bits: None
563                    }
564                );
565
566                // Final chunk: rest of file uncompressed
567                let second = &chunks[1];
568                assert!(second.size_spec.is_none());
569                assert_eq!(second.spec, ESpec::None);
570            }
571            _ => panic!("Expected BlockTable"),
572        }
573    }
574
575    #[test]
576    fn test_parse_bcpack() {
577        let spec = ESpec::parse("c:{4}").unwrap();
578        assert_eq!(spec, ESpec::BCPack { bcn: 4 });
579        assert_eq!(spec.to_string(), "c:{4}");
580    }
581
582    #[test]
583    fn test_parse_gdeflate() {
584        let spec = ESpec::parse("g:{5}").unwrap();
585        assert_eq!(spec, ESpec::GDeflate { level: 5 });
586        assert_eq!(spec.to_string(), "g:{5}");
587    }
588
589    #[test]
590    fn test_compression_detection() {
591        assert!(!ESpec::None.is_compressed());
592        assert!(
593            ESpec::ZLib {
594                level: None,
595                bits: None
596            }
597            .is_compressed()
598        );
599        assert!(ESpec::BCPack { bcn: 4 }.is_compressed());
600        assert!(ESpec::GDeflate { level: 5 }.is_compressed());
601    }
602
603    #[test]
604    fn test_complex_block_table() {
605        let spec = ESpec::parse("b:{256K=n,512K*2=z:6,*=z:9}").unwrap();
606        match spec {
607            ESpec::BlockTable { chunks } => {
608                assert_eq!(chunks.len(), 3);
609
610                // 256KB uncompressed
611                assert_eq!(
612                    chunks[0].size_spec,
613                    Some(BlockSizeSpec {
614                        size: 256 * 1024,
615                        count: None,
616                    })
617                );
618                assert_eq!(chunks[0].spec, ESpec::None);
619
620                // 512KB * 2 with zlib 6
621                assert_eq!(
622                    chunks[1].size_spec,
623                    Some(BlockSizeSpec {
624                        size: 512 * 1024,
625                        count: Some(2),
626                    })
627                );
628                assert_eq!(
629                    chunks[1].spec,
630                    ESpec::ZLib {
631                        level: Some(6),
632                        bits: None,
633                    }
634                );
635
636                // Rest with zlib 9
637                assert!(chunks[2].size_spec.is_none());
638                assert_eq!(
639                    chunks[2].spec,
640                    ESpec::ZLib {
641                        level: Some(9),
642                        bits: None,
643                    }
644                );
645            }
646            _ => panic!("Expected BlockTable"),
647        }
648    }
649}