inside_baseball/
extract.rs

1use crate::{
2    config::Config,
3    script::{decompile, disasm_to_string, get_script_name, Scope},
4    xor::XorStream,
5};
6use byteordered::byteorder::{ReadBytesExt, BE, LE};
7use std::{
8    collections::HashMap,
9    error::Error,
10    fmt,
11    fmt::Write,
12    io,
13    io::{BufReader, Read, Seek, SeekFrom},
14    mem,
15    ops::Range,
16    str,
17};
18use tracing::info_span;
19
20pub const NICE: u8 = 0x69;
21
22pub struct Index {
23    pub lfl_disks: Vec<u8>,
24    pub lfl_offsets: Vec<i32>,
25    pub scripts: Directory,
26    pub sounds: Directory,
27    pub talkies: Directory,
28}
29
30pub struct Directory {
31    /// The game internally calls this "disk number"
32    pub room_numbers: Vec<u8>,
33    /// The game internally calls this "disk offset"
34    pub offsets: Vec<i32>,
35    pub glob_sizes: Vec<i32>,
36}
37
38pub fn read_index(s: &mut (impl Read + Seek)) -> Result<Index, Box<dyn Error>> {
39    let s = XorStream::new(s, NICE);
40    let mut s = BufReader::new(s);
41
42    let len = s.seek(SeekFrom::End(0))?;
43    s.rewind()?;
44
45    let mut state = IndexState {
46        buf: Vec::with_capacity(64 << 10),
47        lfl_disks: None,
48        lfl_offsets: None,
49        scripts: None,
50        sounds: None,
51        talkies: None,
52    };
53
54    scan_blocks(&mut s, &mut state, handle_index_block, len)?;
55
56    Ok(Index {
57        lfl_disks: state.lfl_disks.ok_or("index incomplete")?,
58        lfl_offsets: state.lfl_offsets.ok_or("index incomplete")?,
59        scripts: state.scripts.ok_or("index incomplete")?,
60        sounds: state.sounds.ok_or("index incomplete")?,
61        talkies: state.talkies.ok_or("index incomplete")?,
62    })
63}
64
65struct IndexState {
66    buf: Vec<u8>,
67    lfl_disks: Option<Vec<u8>>,
68    lfl_offsets: Option<Vec<i32>>,
69    scripts: Option<Directory>,
70    sounds: Option<Directory>,
71    talkies: Option<Directory>,
72}
73
74fn handle_index_block<R: Read>(
75    stream: &mut R,
76    state: &mut IndexState,
77    id: [u8; 4],
78    len: u64,
79) -> Result<(), Box<dyn Error>> {
80    state.buf.resize(len.try_into().unwrap(), 0);
81    stream.read_exact(&mut state.buf)?;
82    let mut r = io::Cursor::new(&state.buf);
83
84    match &id {
85        b"DISK" => {
86            let count = r.read_i16::<LE>()?;
87            let mut list = vec![0; count.try_into()?];
88            r.read_exact(&mut list)?;
89            state.lfl_disks = Some(list);
90        }
91        b"DLFL" => {
92            let count = r.read_i16::<LE>()?;
93            let mut list = vec![0; count.try_into()?];
94            r.read_i32_into::<LE>(&mut list)?;
95            state.lfl_offsets = Some(list);
96        }
97        b"DIRS" => {
98            state.scripts = Some(read_directory(&mut r)?);
99        }
100        b"DIRN" => {
101            state.sounds = Some(read_directory(&mut r)?);
102        }
103        b"DIRT" => {
104            state.talkies = Some(read_directory(&mut r)?);
105        }
106        _ => {
107            r.seek(SeekFrom::End(0))?;
108        }
109    }
110    Ok(())
111}
112
113fn read_directory(r: &mut impl Read) -> Result<Directory, Box<dyn Error>> {
114    let count = r.read_i16::<LE>()?;
115    let mut dir = Directory {
116        room_numbers: vec![0; count.try_into()?],
117        offsets: vec![0; count.try_into()?],
118        glob_sizes: vec![0; count.try_into()?],
119    };
120    r.read_exact(&mut dir.room_numbers)?;
121    r.read_i32_into::<LE>(&mut dir.offsets)?;
122    r.read_i32_into::<LE>(&mut dir.glob_sizes)?;
123    Ok(dir)
124}
125
126pub fn dump_index(w: &mut impl Write, index: &Index) -> fmt::Result {
127    w.write_str("lfl_disks:\n")?;
128    for (i, x) in index.lfl_disks.iter().enumerate() {
129        writeln!(w, "\t{i}\t{x}")?;
130    }
131    w.write_str("lfl_offsets:\n")?;
132    for (i, x) in index.lfl_disks.iter().enumerate() {
133        writeln!(w, "\t{i}\t{x}")?;
134    }
135    w.write_str("scripts:\n")?;
136    dump_directory(w, index, &index.scripts)?;
137    w.write_str("sounds:\n")?;
138    dump_directory(w, index, &index.sounds)?;
139    Ok(())
140}
141
142fn dump_directory(w: &mut impl Write, index: &Index, dir: &Directory) -> fmt::Result {
143    for i in 0..dir.room_numbers.len() {
144        let room: usize = dir.room_numbers[i].into();
145        writeln!(
146            w,
147            "\t{}\t{}\t{}\t{}\t{}\t{}",
148            i,
149            dir.room_numbers[i],
150            dir.offsets[i],
151            dir.glob_sizes[i],
152            index.lfl_disks[room],
153            index.lfl_offsets[room] + dir.offsets[i],
154        )?;
155    }
156    Ok(())
157}
158
159pub fn extract(
160    index: &Index,
161    disk_number: u8,
162    config: &Config,
163    publish_scripts: bool,
164    s: &mut (impl Read + Seek),
165    write: &mut impl FnMut(&str, &[u8]) -> Result<(), Box<dyn Error>>,
166) -> Result<(), Box<dyn Error>> {
167    let s = XorStream::new(s, NICE);
168    let mut s = BufReader::new(s);
169
170    let len = s.seek(SeekFrom::End(0))?;
171    s.rewind()?;
172
173    let mut state = ExtractState {
174        disk_number,
175        index,
176        config,
177        write,
178        path: {
179            let mut path = String::with_capacity(64);
180            path.push('.');
181            path
182        },
183        publish_scripts,
184        current_room: 0,
185        current_object: 0,
186        block_numbers: HashMap::new(),
187        map: String::with_capacity(1 << 10),
188        buf: Vec::with_capacity(64 << 10),
189    };
190
191    scan_blocks(&mut s, &mut state, handle_extract_block, len)?;
192
193    (state.write)(&format!("{}/.map", state.path), state.map.as_bytes())?;
194    Ok(())
195}
196
197struct ExtractState<'a> {
198    disk_number: u8,
199    index: &'a Index,
200    config: &'a Config,
201    write: &'a mut dyn FnMut(&str, &[u8]) -> Result<(), Box<dyn Error>>,
202    path: String,
203    publish_scripts: bool,
204    current_room: i32,
205    current_object: u16,
206    block_numbers: HashMap<[u8; 4], i32>,
207    map: String,
208    buf: Vec<u8>,
209}
210
211fn handle_extract_block<R: Read + Seek>(
212    r: &mut R,
213    state: &mut ExtractState,
214    id: [u8; 4],
215    len: u64,
216) -> Result<(), Box<dyn Error>> {
217    let offset = r.stream_position()?;
218
219    if guess_is_block_recursive(r, len)? {
220        extract_recursive(r, state, id, offset, len)?;
221    } else {
222        extract_flat(r, state, id, len, offset)?;
223    }
224    Ok(())
225}
226
227fn extract_recursive<R: Read + Seek>(
228    r: &mut R,
229    state: &mut ExtractState,
230    id: [u8; 4],
231    offset: u64,
232    len: u64,
233) -> Result<(), Box<dyn Error>> {
234    let number = match &id {
235        b"LECF" => state.disk_number.into(),
236        b"LFLF" => {
237            state.current_room = find_lfl_number(state.disk_number, offset, state.index)
238                .ok_or("LFL not in index")?;
239            state.current_room
240        }
241        b"DIGI" | b"TALK" => {
242            find_object_number(
243                state.index,
244                &state.index.sounds,
245                state.disk_number,
246                offset - 8,
247            )
248            .ok_or("sound not in index")?
249        }
250        b"TLKE" => {
251            find_object_number(
252                state.index,
253                &state.index.talkies,
254                state.disk_number,
255                offset - 8,
256            )
257            .ok_or("talkie not in index")?
258        }
259        _ => {
260            *state
261                .block_numbers
262                .entry(id)
263                .and_modify(|n| *n += 1)
264                .or_insert(1)
265        }
266    };
267
268    writeln!(state.map, "{}", IdAndNum(id, number))?;
269
270    write!(state.path, "/{}", IdAndNum(id, number))?;
271
272    // copy most fields, temporarily move some
273    let mut inner = ExtractState {
274        disk_number: state.disk_number,
275        index: state.index,
276        config: state.config,
277        write: state.write,
278        path: mem::take(&mut state.path),
279        publish_scripts: state.publish_scripts,
280        current_room: state.current_room,
281        current_object: state.current_object,
282        block_numbers: HashMap::new(),
283        map: String::with_capacity(1 << 10),
284        buf: mem::take(&mut state.buf),
285    };
286
287    scan_blocks(r, &mut inner, handle_extract_block, len)?;
288
289    // return temporarily moved fields
290    state.buf = mem::take(&mut inner.buf);
291    state.path = mem::take(&mut inner.path);
292
293    let map = inner.map;
294    (state.write)(&format!("{}/.map", state.path), map.as_bytes())?;
295
296    state.path.truncate(state.path.rfind('/').unwrap());
297    Ok(())
298}
299
300fn extract_flat<R: Read + Seek>(
301    r: &mut R,
302    state: &mut ExtractState,
303    id: [u8; 4],
304    len: u64,
305    offset: u64,
306) -> Result<(), Box<dyn Error>> {
307    state.buf.clear();
308    state.buf.reserve(len.try_into()?);
309    io::copy(&mut r.take(len), &mut state.buf)?;
310
311    let number = match &id {
312        // SCRP number comes from index
313        b"SCRP" => {
314            find_object_number(
315                state.index,
316                &state.index.scripts,
317                state.disk_number,
318                offset - 8,
319            )
320            .ok_or("script missing from index")?
321        }
322        // LSC2 number comes from block header
323        b"LSC2" => {
324            let number_bytes = state.buf.get(..4).ok_or("local script missing header")?;
325            i32::from_le_bytes(number_bytes.try_into().unwrap())
326        }
327        b"CDHD" => {
328            let number_bytes = state.buf.get(..2).ok_or("bad object header")?;
329            state.current_object = u16::from_le_bytes(number_bytes.try_into().unwrap());
330            state.current_object.into()
331        }
332        // Otherwise use a counter per block type
333        _ => {
334            *state
335                .block_numbers
336                .entry(id)
337                .and_modify(|n| *n += 1)
338                .or_insert(1)
339        }
340    };
341
342    if !(id == *b"SCRP" && number == 85) {
343        // return Ok(());
344    }
345    if !(id == *b"LSC2" && state.current_room == 8 && number == 2056) {
346        // return Ok(());
347    }
348
349    writeln!(state.map, "{}", IdAndNum(id, number))?;
350
351    let filename = format!("{}/{}.bin", state.path, IdAndNum(id, number));
352    eprintln!("filename = {:?}", filename);
353    (state.write)(&filename, &state.buf)?;
354
355    match &id {
356        b"SCRP" | b"LSC2" | b"ENCD" | b"EXCD" => extract_script(state, id, number)?,
357        b"VERB" => {
358            debug_assert!(number == 1); // only one VERB per OBCD
359            extract_verb(state)?;
360        }
361        _ => {}
362    }
363    Ok(())
364}
365
366fn extract_script(
367    state: &mut ExtractState,
368    id: [u8; 4],
369    number: i32,
370) -> Result<(), Box<dyn Error>> {
371    let mut range = 0..state.buf.len();
372
373    if id == *b"LSC2" {
374        // Skip header. Code starts at offset 4.
375        range.start = 4;
376    }
377
378    let id_num = IdAndNum(id, number);
379    let scope = match &id {
380        b"SCRP" => Scope::Global(number),
381        b"LSC2" => Scope::RoomLocal(state.current_room, number),
382        b"ENCD" => Scope::RoomEnter(state.current_room),
383        b"EXCD" => Scope::RoomExit(state.current_room),
384        _ => unreachable!(),
385    };
386    output_script(state, range, id_num, scope)?;
387    Ok(())
388}
389
390fn extract_verb(state: &mut ExtractState) -> Result<(), Box<dyn Error>> {
391    let mut pos = 0;
392    while let Some((number, offset)) = read_verb(&state.buf[pos..]) {
393        let start = (offset - 8).try_into()?; // relative to block including type/len
394        pos += 3;
395        let next_offset = read_verb(&state.buf[pos..]).map(|(_, o)| o);
396        let end = match next_offset {
397            Some(o) => (o - 8).try_into()?,
398            None => state.buf.len(),
399        };
400
401        let id_num = IdAndNum(*b"VERB", number.into());
402        let scope = Scope::Verb(state.current_room, state.current_object);
403        output_script(state, start..end, id_num, scope)?;
404    }
405    Ok(())
406}
407
408fn read_verb(buf: &[u8]) -> Option<(u8, u16)> {
409    let number = *buf.get(0)?;
410    if number == 0 {
411        return None;
412    }
413    let offset = u16::from_le_bytes(buf.get(1..3)?.try_into().unwrap());
414    Some((number, offset))
415}
416
417fn output_script(
418    state: &mut ExtractState,
419    range: Range<usize>,
420    id_num: IdAndNum,
421    scope: Scope,
422) -> Result<(), Box<dyn Error>> {
423    let code = &state.buf[range];
424
425    let disasm = disasm_to_string(code);
426    let filename = format!("{}/{}.s", state.path, id_num);
427    (state.write)(&filename, disasm.as_bytes())?;
428
429    let decomp = {
430        let _span = info_span!("decompile", script = %id_num).entered();
431        decompile(code, scope, state.config)
432    };
433    let mut filename = format!("{}/{}.scu", state.path, id_num);
434    (state.write)(&filename, decomp.as_bytes())?;
435
436    if state.publish_scripts {
437        filename.clear();
438        write!(filename, "scripts/")?;
439        match scope {
440            Scope::Global(number) => write!(filename, "scr{number:04}")?,
441            Scope::RoomLocal(room, number) => write!(filename, "{room:02}/lsc{number:04}")?,
442            Scope::RoomEnter(room) => write!(filename, "{room:02}/enter")?,
443            Scope::RoomExit(room) => write!(filename, "{room:02}/exit")?,
444            Scope::Verb(room, object) => {
445                write!(
446                    filename,
447                    "{room:02}/obj{object:04} verb{verb:02}",
448                    verb = id_num.1,
449                )?;
450            }
451        }
452        if let Some(name) = get_script_name(scope, state.config) {
453            write!(filename, " {name}")?;
454        }
455        write!(filename, ".scu")?;
456        (state.write)(&filename, decomp.as_bytes())?;
457    }
458
459    Ok(())
460}
461
462fn find_lfl_number(disk_number: u8, offset: u64, index: &Index) -> Option<i32> {
463    for i in 0..index.lfl_disks.len() {
464        if index.lfl_disks[i] == disk_number && Ok(index.lfl_offsets[i]) == offset.try_into() {
465            return Some(i.try_into().unwrap());
466        }
467    }
468    None
469}
470
471fn find_object_number(index: &Index, dir: &Directory, disk_number: u8, offset: u64) -> Option<i32> {
472    let offset: i32 = offset.try_into().ok()?;
473    for i in 0..dir.room_numbers.len() {
474        let room_number: usize = dir.room_numbers[i].into();
475        let dnum = index.lfl_disks[room_number];
476        let doff = index.lfl_offsets[room_number] + dir.offsets[i];
477        if dnum == disk_number && doff == offset {
478            return Some(i.try_into().unwrap());
479        }
480    }
481    None
482}
483
484#[derive(Copy, Clone)]
485struct IdAndNum([u8; 4], i32);
486
487impl fmt::Display for IdAndNum {
488    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
489        f.write_str(str::from_utf8(&self.0).unwrap())?;
490        f.write_char('_')?;
491        write!(f, "{:02}", self.1)?;
492        Ok(())
493    }
494}
495
496type BlockHandler<Stream, State> =
497    fn(&mut Stream, &mut State, [u8; 4], u64) -> Result<(), Box<dyn Error>>;
498
499fn scan_blocks<Stream: Read + Seek, State>(
500    s: &mut Stream,
501    state: &mut State,
502    handle_block: BlockHandler<Stream, State>,
503    parent_len: u64,
504) -> Result<(), Box<dyn Error>> {
505    let start = s.stream_position()?;
506    loop {
507        let pos = s.stream_position()?;
508        if pos == start + parent_len {
509            break;
510        }
511        if pos > start + parent_len {
512            return Err("misaligned block end".into());
513        }
514
515        read_block(s, |s, id, len| handle_block(s, state, id, len))?;
516    }
517    Ok(())
518}
519
520fn read_block<S: Read + Seek, R>(
521    s: &mut S,
522    f: impl FnOnce(&mut S, [u8; 4], u64) -> Result<R, Box<dyn Error>>,
523) -> Result<R, Box<dyn Error>> {
524    let start = s.stream_position()?;
525    let mut id = [0; 4];
526    s.read_exact(&mut id)?;
527    let len = s.read_i32::<BE>()?;
528    let len: u64 = len.try_into()?;
529    let result = f(s, id, len - 8)?;
530    let end = s.stream_position()?;
531    if end - start != len {
532        return Err("bug: block reader read wrong length".into());
533    }
534    Ok(result)
535}
536
537// heuristic
538fn guess_is_block_recursive<S: Read + Seek>(s: &mut S, len: u64) -> Result<bool, Box<dyn Error>> {
539    if len < 8 {
540        return Ok(false);
541    }
542    let start = s.stream_position()?;
543    let mut id = [0; 4];
544    s.read_exact(&mut id)?;
545    let len = s.read_i32::<BE>()?;
546    s.seek(SeekFrom::Start(start))?; // only peek, don't consume
547
548    Ok(id.iter().all(|&ch| (32..=126).contains(&ch)) && (0..0x100_0000).contains(&len))
549}