Skip to main content

zipatch_rs/index/
builder.rs

1//! Multi-patch plan builder.
2//!
3//! [`PlanBuilder`] consumes one or more [`ZiPatchReader`]s in chain order via
4//! [`PlanBuilder::add_patch`] and produces a single [`Plan`] describing the
5//! end state. Mid-chain destructive operations — `SqpkFile::RemoveAll`,
6//! `SqpkFile::DeleteFile`, and `SqpkFile::AddFile` at `file_offset == 0` —
7//! retroactively drop accumulated regions from earlier patches so that the
8//! returned plan reflects only the writes that survive.
9
10use std::collections::HashMap;
11use std::io::Read;
12
13use crate::apply::path::expansion_folder_id;
14use crate::chunk::sqpk::add_data::SqpkAddData;
15use crate::chunk::{
16    Chunk, SqpackFileId, SqpkCommand, SqpkFile, SqpkFileOperation, SqpkHeader, SqpkHeaderTarget,
17    TargetHeaderKind, ZiPatchReader,
18};
19use crate::newtypes::PatchIndex;
20use crate::{IndexError, IndexResult as Result, Platform};
21use tracing::{info, info_span, trace};
22
23use super::plan::{
24    FilesystemOp, PartExpected, PartSource, PatchRef, PatchSourceKind, PatchType, Plan, Region,
25    Target, TargetPath,
26};
27use super::region_map;
28
29// Offset of a SQPK sub-command body from the start of the SQPK chunk body
30// (the body starts with `[inner_size: i32 BE][sub_cmd: u8]` = 5 bytes).
31const SQPK_SUB_CMD_BODY_OFFSET: u64 = 5;
32
33// Offset of `header_data` from the start of a SqpkHeader sub-command body.
34// Layout: file_kind(1) + header_kind(1) + pad(1) + main_id(2) + sub_id(2) + file_id(4).
35const SQPK_HEADER_DATA_OFFSET: u64 = 11;
36
37/// Reject a relative path that escapes the install root.
38///
39/// The indexed builder is a natural choke point for path-traversal checks: it
40/// owns every `Generic` path that enters a plan, plus every relative path
41/// fed to a [`FilesystemOp`]. The sequential apply path does not currently
42/// enforce this guard — a malicious patch supplying `../../etc/passwd` would
43/// land bytes outside the install root — but the indexed plan refuses to
44/// build at all, surfacing the bad path at construction time rather than
45/// apply time. `SqPack`-encoded targets are structurally constrained by
46/// their numeric `(main_id, sub_id, file_id)` triple and skip this check.
47///
48/// Rejects:
49/// - empty path components and `..` components anywhere in the path,
50/// - absolute Unix paths (leading `/`),
51/// - Windows drive-letter prefixes (`C:\`, `c:/`, etc.).
52fn reject_unsafe_relative_path(path: &str) -> Result<()> {
53    if path.starts_with('/') || path.starts_with('\\') {
54        return Err(IndexError::UnsafeTargetPath(path.to_owned()));
55    }
56    // Windows drive letter: `[A-Za-z]:` followed by `/` or `\`, or just `[A-Za-z]:`
57    // at minimum (`C:` alone is still absolute on Windows).
58    let bytes = path.as_bytes();
59    if bytes.len() >= 2 && bytes[1] == b':' && bytes[0].is_ascii_alphabetic() {
60        return Err(IndexError::UnsafeTargetPath(path.to_owned()));
61    }
62    for component in path.split(['/', '\\']) {
63        if component == ".." {
64            return Err(IndexError::UnsafeTargetPath(path.to_owned()));
65        }
66    }
67    Ok(())
68}
69
70// Largest `units` value whose `units * 128` byte length still fits in
71// `Region::length: u32`. Used by `push_empty_block_region` to split a single
72// huge EmptyBlock into a capped (header-bearing) region plus trailing Zeros
73// fillers; see the function body for the byte-equivalence asterisk this
74// implies for `units >= 2^25`.
75const MAX_UNITS_PER_REGION: u32 = u32::MAX / 128; // 0x01FF_FFFF
76
77/// Accumulating builder for a multi-patch [`Plan`].
78///
79/// Construct via [`PlanBuilder::new`], feed each patch in chain order via
80/// [`PlanBuilder::add_patch`], then consume with [`PlanBuilder::finish`].
81/// For a one-shot single-patch build, the three-line dance
82/// `PlanBuilder::new().add_patch(name, reader)?.finish()` (the `add_patch`
83/// call returning `&mut Self` is not supported — use a `let mut`) is the
84/// canonical form; there is no freestanding convenience wrapper.
85#[derive(Debug)]
86pub struct PlanBuilder {
87    state: BuilderState,
88}
89
90impl Default for PlanBuilder {
91    fn default() -> Self {
92        Self::new()
93    }
94}
95
96impl PlanBuilder {
97    /// Start a new, empty builder.
98    #[must_use]
99    pub fn new() -> Self {
100        Self {
101            state: BuilderState::new(),
102        }
103    }
104
105    /// Read `reader` to completion, appending the patch to the chain.
106    ///
107    /// Mid-chain destructive ops (`RemoveAll`, `DeleteFile`,
108    /// `AddFile@file_offset=0`) retroactively prune accumulated regions from
109    /// earlier patches at this point.
110    ///
111    /// # Errors
112    ///
113    /// - [`IndexError::DuplicatePatch`] if `name` matches a patch already
114    ///   added to this builder. The chain protocol is order-sensitive; adding
115    ///   the same patch twice is almost always a caller bug.
116    /// - Any parser error surfaced by `reader.next_chunk()`.
117    ///
118    /// # Panics
119    ///
120    /// Panics if the chain grows past `u32::MAX` patches.
121    pub fn add_patch<R: Read>(
122        &mut self,
123        name: impl Into<String>,
124        mut reader: ZiPatchReader<R>,
125    ) -> Result<()> {
126        let name = name.into();
127        let span = info_span!(crate::tracing_schema::span_names::BUILD_PLAN_PATCH, patch = %name);
128        let _enter = span.enter();
129        self.state.begin_patch(name)?;
130        let mut chunks: usize = 0;
131        while let Some(rec) = reader.next_chunk()? {
132            self.state.consume_chunk(rec.chunk, rec.body_offset)?;
133            chunks += 1;
134        }
135        info!(
136            chunks,
137            targets = self.state.target_order.len(),
138            fs_ops = self.state.fs_ops.len(),
139            "plan: patch consumed"
140        );
141        Ok(())
142    }
143
144    /// Consume the builder and return the accumulated [`Plan`].
145    ///
146    /// Emits a single `info!` summary event carrying the final patch / target /
147    /// region / `fs_op` counts so a subscriber sees one completion record per
148    /// fully-built plan regardless of how many `add_patch` calls fed it.
149    #[must_use]
150    pub fn finish(self) -> Plan {
151        let plan = self.state.finalize();
152        let region_count: usize = plan.targets.iter().map(|t| t.regions.len()).sum();
153        info!(
154            patches = plan.patches.len(),
155            targets = plan.targets.len(),
156            regions = region_count,
157            fs_ops = plan.fs_ops.len(),
158            "plan: built"
159        );
160        plan
161    }
162}
163
164#[derive(Debug)]
165struct BuilderState {
166    platform: Platform,
167    patches: Vec<PatchRef>,
168    // Index of the patch currently being consumed; set by begin_patch and read
169    // when constructing PartSource::Patch values.
170    current_patch: PatchIndex,
171    fs_ops: Vec<FilesystemOp>,
172    targets: HashMap<TargetPath, Vec<Region>>,
173    target_order: Vec<TargetPath>,
174}
175
176impl BuilderState {
177    fn new() -> Self {
178        Self {
179            platform: Platform::Win32,
180            patches: Vec::new(),
181            current_patch: PatchIndex::new(0),
182            fs_ops: Vec::new(),
183            targets: HashMap::new(),
184            target_order: Vec::new(),
185        }
186    }
187
188    fn begin_patch(&mut self, name: String) -> Result<()> {
189        if self.patches.iter().any(|p| p.name == name) {
190            return Err(IndexError::DuplicatePatch { name });
191        }
192        let idx = u32::try_from(self.patches.len()).expect("more than u32::MAX patches");
193        self.current_patch = PatchIndex::new(idx);
194        self.patches.push(PatchRef {
195            name,
196            patch_type: None,
197        });
198        Ok(())
199    }
200
201    fn current_patch_ref_mut(&mut self) -> &mut PatchRef {
202        let idx = self.current_patch.get() as usize;
203        &mut self.patches[idx]
204    }
205
206    fn consume_chunk(&mut self, chunk: Chunk, body_offset: u64) -> Result<()> {
207        match chunk {
208            Chunk::FileHeader(fh) => {
209                let pt = PatchType::from_tag(*fh.patch_type());
210                self.current_patch_ref_mut().patch_type = Some(pt);
211                trace!(version = fh.version(), "plan: file header");
212            }
213            Chunk::ApplyOption(opt) => {
214                trace!(kind = ?opt.kind, value = opt.value, "plan: apply option (ignored in v1)");
215            }
216            Chunk::ApplyFreeSpace(_) => {
217                trace!("plan: apply free space (ignored in v1)");
218            }
219            Chunk::AddDirectory(ad) => {
220                reject_unsafe_relative_path(&ad.name)?;
221                self.fs_ops.push(FilesystemOp::EnsureDir(ad.name));
222            }
223            Chunk::DeleteDirectory(dd) => {
224                reject_unsafe_relative_path(&dd.name)?;
225                self.fs_ops.push(FilesystemOp::DeleteDir(dd.name));
226            }
227            Chunk::Sqpk(cmd) => self.consume_sqpk(cmd, body_offset)?,
228            // ZiPatchReader consumes `EOF_` internally and never yields it,
229            // but the match still has to cover the variant.
230            Chunk::EndOfFile => {}
231        }
232        Ok(())
233    }
234
235    fn consume_sqpk(&mut self, cmd: SqpkCommand, body_offset: u64) -> Result<()> {
236        match cmd {
237            SqpkCommand::TargetInfo(t) => {
238                self.platform = match t.platform_id {
239                    0 => Platform::Win32,
240                    1 => Platform::Ps3,
241                    2 => Platform::Ps4,
242                    id => Platform::Unknown(id),
243                };
244                trace!(platform = ?self.platform, "plan: target info");
245            }
246            SqpkCommand::PatchInfo(_) | SqpkCommand::Index(_) => {
247                trace!("plan: SQPK metadata-only chunk (ignored in v1)");
248            }
249            SqpkCommand::AddData(c) => self.consume_add_data(&c, body_offset),
250            SqpkCommand::DeleteData(c) => {
251                self.push_empty_block_region(&c.target_file, c.block_offset, c.block_count);
252            }
253            SqpkCommand::ExpandData(c) => {
254                self.push_empty_block_region(&c.target_file, c.block_offset, c.block_count);
255            }
256            SqpkCommand::Header(c) => self.consume_header(&c, body_offset),
257            SqpkCommand::File(c) => self.consume_file(*c, body_offset)?,
258        }
259        Ok(())
260    }
261
262    fn consume_add_data(&mut self, c: &SqpkAddData, body_offset: u64) {
263        let data_abs_offset =
264            body_offset + SQPK_SUB_CMD_BODY_OFFSET + SqpkAddData::DATA_SOURCE_OFFSET;
265        let data_bytes = u32::try_from(c.data_bytes)
266            .expect("SqpkAddData::data_bytes is bounded by the parser's 512 MiB chunk size limit");
267        let path = dat_target(&c.target_file);
268        self.push_region(
269            &path,
270            Region {
271                target_offset: c.block_offset,
272                length: data_bytes,
273                source: PartSource::Patch {
274                    patch_idx: self.current_patch,
275                    offset: data_abs_offset,
276                    kind: PatchSourceKind::Raw { len: data_bytes },
277                    decoded_skip: 0,
278                },
279                expected: PartExpected::SizeOnly,
280            },
281        );
282        if c.block_delete_number > 0 {
283            // `block_delete_number` is wire-decoded as `(raw_u32 << 7)` and so
284            // can legally reach ~549 GiB — well beyond `u32::MAX`. The on-disk
285            // operation is just zero-fill (see `write_zeros` in
286            // `apply/sqpk.rs`) and adjacent `Zeros` regions are
287            // semantically equivalent to a single zero run, so split the run
288            // into `u32::MAX`-sized chunks instead of widening `Region::length`.
289            let mut remaining = c.block_delete_number;
290            let mut cursor = c.block_offset + c.data_bytes;
291            while remaining > 0 {
292                let chunk = u32::try_from(remaining.min(u64::from(u32::MAX)))
293                    .expect("clamped to u32::MAX above");
294                self.push_region(
295                    &path,
296                    Region {
297                        target_offset: cursor,
298                        length: chunk,
299                        source: PartSource::Zeros,
300                        expected: PartExpected::Zeros,
301                    },
302                );
303                cursor += u64::from(chunk);
304                remaining -= u64::from(chunk);
305            }
306        }
307    }
308
309    fn push_empty_block_region(&mut self, target_file: &SqpackFileId, offset: u64, units: u32) {
310        let path = dat_target(target_file);
311        // `units * 128` is the byte length of the region. For `units` up to
312        // `u32::MAX / 128 == 0x01FF_FFFF` this fits in a `u32` and we emit a
313        // single `EmptyBlock` region whose applier writes the canonical
314        // SqPack empty-block header followed by `units*128 - 20` zero bytes.
315        //
316        // For `units >= 2^25` the byte count overflows `Region::length: u32`.
317        // The wire format permits arbitrary `u32` `block_count` values, even
318        // though no real ZiPatch ever uses one that large — guard against the
319        // pathological case rather than silently saturating. We split into:
320        //   1. One `EmptyBlock { units: cap }` region of `cap*128` bytes
321        //      (where `cap = u32::MAX / 128`), so the canonical 20-byte
322        //      empty-block header is written exactly as the sequential path
323        //      would write it.
324        //   2. Successive `u32::MAX`-sized `Zeros` regions covering the
325        //      remaining `(units - cap) * 128` bytes.
326        //
327        // This means the header's `block_number - 1` field reflects only the
328        // first capped chunk rather than the full original `units` count —
329        // an unavoidable byte-equivalence asterisk for the unreachable
330        // `units >= 2^25` case (mirroring the same family of `u32`-width
331        // splits already used by `consume_add_data`'s zero-fill path).
332        if units <= MAX_UNITS_PER_REGION {
333            self.push_region(
334                &path,
335                Region {
336                    target_offset: offset,
337                    length: units * 128,
338                    source: PartSource::EmptyBlock { units },
339                    expected: PartExpected::EmptyBlock { units },
340                },
341            );
342            return;
343        }
344
345        let cap = MAX_UNITS_PER_REGION;
346        let cap_bytes = u64::from(cap) * 128;
347        self.push_region(
348            &path,
349            Region {
350                target_offset: offset,
351                length: cap * 128,
352                source: PartSource::EmptyBlock { units: cap },
353                expected: PartExpected::EmptyBlock { units: cap },
354            },
355        );
356
357        let total_bytes = u64::from(units) * 128;
358        let mut cursor = offset + cap_bytes;
359        let mut remaining = total_bytes - cap_bytes;
360        while remaining > 0 {
361            let chunk = u32::try_from(remaining.min(u64::from(u32::MAX)))
362                .expect("clamped to u32::MAX above");
363            self.push_region(
364                &path,
365                Region {
366                    target_offset: cursor,
367                    length: chunk,
368                    source: PartSource::Zeros,
369                    expected: PartExpected::Zeros,
370                },
371            );
372            cursor += u64::from(chunk);
373            remaining -= u64::from(chunk);
374        }
375    }
376
377    fn consume_header(&mut self, c: &SqpkHeader, body_offset: u64) {
378        let header_abs_offset = body_offset + SQPK_SUB_CMD_BODY_OFFSET + SQPK_HEADER_DATA_OFFSET;
379        let target_offset: u64 = match c.header_kind {
380            TargetHeaderKind::Version => 0,
381            TargetHeaderKind::Index | TargetHeaderKind::Data => 1024,
382        };
383        let path = match &c.target {
384            SqpkHeaderTarget::Dat(f) => dat_target(f),
385            SqpkHeaderTarget::Index(f) => index_target(f),
386        };
387        self.push_region(
388            &path,
389            Region {
390                target_offset,
391                length: 1024,
392                source: PartSource::Patch {
393                    patch_idx: self.current_patch,
394                    offset: header_abs_offset,
395                    kind: PatchSourceKind::Raw { len: 1024 },
396                    decoded_skip: 0,
397                },
398                expected: PartExpected::SizeOnly,
399            },
400        );
401    }
402
403    fn consume_file(&mut self, c: SqpkFile, body_offset: u64) -> Result<()> {
404        // Every SqpkFile op carries a relative path that lands in either a
405        // TargetPath::Generic or a FilesystemOp (DeleteFile / MakeDirTree).
406        // Reject path-traversal sequences here, before any plan state is mutated,
407        // so a malicious `../../...` payload cannot construct a plan that would
408        // write outside the install root at apply time.
409        reject_unsafe_relative_path(&c.path)?;
410        match c.operation {
411            SqpkFileOperation::AddFile => {
412                // Build the `TargetPath::Generic` once and keep its inner
413                // `String` accessible by name so the offset-0 truncate-hint
414                // branch can clone it without re-matching on the enum.
415                // Pre-polish code matched `&path` with an `unreachable!()` else
416                // arm; the unreachability was a *call-site* invariant (the
417                // enum was constructed two lines above) rather than a type
418                // invariant, so a future refactor that swapped out `Generic`
419                // could silently turn the panic into a runtime hazard. Binding
420                // `inner_path` to the raw string up front and reconstructing
421                // the enum value next to it makes the invariant structural.
422                let inner_path: String = c.path;
423                let path = TargetPath::Generic(inner_path.clone());
424                if c.file_offset == 0 {
425                    // AddFile at offset 0 truncates the target. Drop any
426                    // accumulated regions from earlier patches and emit a
427                    // DeleteFile hint so a stale on-disk file (from outside
428                    // the chain, e.g. a pre-existing install) is removed
429                    // before the new regions land.
430                    self.drop_target(&path);
431                    self.fs_ops.push(FilesystemOp::DeleteFile(inner_path));
432                }
433                let mut cursor = c.file_offset;
434                for (i, block) in c.blocks.iter().enumerate() {
435                    let block_source_offset = c.block_source_offsets[i];
436                    let abs_offset = body_offset + SQPK_SUB_CMD_BODY_OFFSET + block_source_offset;
437                    let decompressed_len = u32::try_from(block.decompressed_size())
438                        .expect("block decompressed_size bounded by chunk size limit");
439                    let kind = if block.is_compressed() {
440                        PatchSourceKind::Deflated {
441                            compressed_len: u32::try_from(block.data_len())
442                                .expect("block data_len bounded by chunk size limit"),
443                            decompressed_len,
444                        }
445                    } else {
446                        PatchSourceKind::Raw {
447                            len: decompressed_len,
448                        }
449                    };
450                    self.push_region(
451                        &path,
452                        Region {
453                            target_offset: cursor,
454                            length: decompressed_len,
455                            source: PartSource::Patch {
456                                patch_idx: self.current_patch,
457                                offset: abs_offset,
458                                kind,
459                                decoded_skip: 0,
460                            },
461                            expected: PartExpected::SizeOnly,
462                        },
463                    );
464                    cursor += u64::from(decompressed_len);
465                }
466            }
467            SqpkFileOperation::RemoveAll => {
468                self.fs_ops
469                    .push(FilesystemOp::RemoveAllInExpansion(c.expansion_id));
470                self.drop_targets_under_expansion(c.expansion_id);
471            }
472            SqpkFileOperation::DeleteFile => {
473                let path = TargetPath::Generic(c.path.clone());
474                self.drop_target(&path);
475                self.fs_ops.push(FilesystemOp::DeleteFile(c.path));
476            }
477            SqpkFileOperation::MakeDirTree => {
478                self.fs_ops.push(FilesystemOp::MakeDirTree(c.path));
479            }
480        }
481        Ok(())
482    }
483
484    fn push_region(&mut self, path: &TargetPath, region: Region) {
485        if region.length == 0 {
486            return;
487        }
488        // Hot path: target already exists. A single `get_mut` lookup with an
489        // early return halves the per-region HashMap work vs the original
490        // `contains_key` + `get_mut` pair, and avoids cloning the `TargetPath`
491        // entirely (the `Generic` variant wraps a `String`, so clones allocate).
492        if let Some(regions) = self.targets.get_mut(path) {
493            region_map::insert(regions, region);
494            return;
495        }
496        let owned = path.clone();
497        self.target_order.push(owned.clone());
498        let regions = self.targets.entry(owned).or_default();
499        region_map::insert(regions, region);
500    }
501
502    /// Drop every accumulated region (and the target's bookkeeping entry) for
503    /// a single target. Used by `DeleteFile` and by `AddFile@0` for the
504    /// truncate-then-rewrite case.
505    fn drop_target(&mut self, path: &TargetPath) {
506        self.targets.remove(path);
507        self.target_order.retain(|tp| tp != path);
508    }
509
510    /// Drop every accumulated target that falls under the expansion folder
511    /// `sqpack/<exp>/` or `movie/<exp>/`. Used by `RemoveAll`.
512    fn drop_targets_under_expansion(&mut self, expansion_id: u16) {
513        let folder = expansion_folder_id(expansion_id);
514        let sqpack_prefix = format!("sqpack/{folder}/");
515        let movie_prefix = format!("movie/{folder}/");
516
517        // Take `target_order` out so the retain closure can mutate
518        // `self.targets` without a split-borrow conflict. One pass instead of
519        // building a `HashSet<TargetPath>` of cloned keys and then running two
520        // separate retains over both maps.
521        let mut order = std::mem::take(&mut self.target_order);
522        order.retain(|tp| {
523            if target_falls_under(tp, expansion_id, &sqpack_prefix, &movie_prefix) {
524                self.targets.remove(tp);
525                false
526            } else {
527                true
528            }
529        });
530        self.target_order = order;
531    }
532
533    fn finalize(self) -> Plan {
534        let BuilderState {
535            platform,
536            patches,
537            current_patch: _,
538            fs_ops,
539            mut targets,
540            target_order,
541        } = self;
542
543        let mut out_targets = Vec::with_capacity(target_order.len());
544        for path in target_order {
545            let regions = targets.remove(&path).unwrap_or_default();
546            let final_size = regions
547                .last()
548                .map_or(0, |r| r.target_offset + u64::from(r.length));
549            debug_assert!(
550                regions
551                    .windows(2)
552                    .all(|w| w[0].target_offset + u64::from(w[0].length) <= w[1].target_offset),
553                "regions must be sorted and non-overlapping after build"
554            );
555            out_targets.push(Target {
556                path,
557                final_size,
558                regions,
559            });
560        }
561
562        Plan {
563            schema_version: Plan::CURRENT_SCHEMA_VERSION,
564            platform,
565            patches,
566            targets: out_targets,
567            fs_ops,
568        }
569    }
570}
571
572fn target_falls_under(
573    tp: &TargetPath,
574    expansion_id: u16,
575    sqpack_prefix: &str,
576    movie_prefix: &str,
577) -> bool {
578    match tp {
579        TargetPath::SqpackDat { sub_id, .. } | TargetPath::SqpackIndex { sub_id, .. } => {
580            (sub_id >> 8) == expansion_id
581        }
582        TargetPath::Generic(path) => {
583            path.starts_with(sqpack_prefix) || path.starts_with(movie_prefix)
584        }
585    }
586}
587
588fn dat_target(f: &SqpackFileId) -> TargetPath {
589    TargetPath::SqpackDat {
590        main_id: f.main_id,
591        sub_id: f.sub_id,
592        file_id: f.file_id,
593    }
594}
595
596fn index_target(f: &SqpackFileId) -> TargetPath {
597    TargetPath::SqpackIndex {
598        main_id: f.main_id,
599        sub_id: f.sub_id,
600        file_id: f.file_id,
601    }
602}
603
604#[cfg(test)]
605mod tests {
606    use super::*;
607
608    fn synthetic_sqpack_file() -> SqpackFileId {
609        SqpackFileId {
610            main_id: 1,
611            sub_id: 2,
612            file_id: 0,
613        }
614    }
615
616    /// `block_delete_number` is wire-decoded as `(raw_u32 << 7)` and can reach
617    /// ~549 GiB. A naive `u32::try_from(...).expect(...)` would panic on any
618    /// raw value `>= 2^25`. Use a value that is comfortably above `u32::MAX`
619    /// to force at least two chunks of splitting.
620    #[test]
621    fn consume_add_data_splits_huge_block_delete_into_u32_chunks() {
622        let mut state = BuilderState::new();
623        state.begin_patch("synthetic".into()).unwrap();
624
625        let huge: u64 = u64::from(u32::MAX) + 1024; // forces a second chunk
626        let cmd = SqpkAddData {
627            target_file: synthetic_sqpack_file(),
628            block_offset: 0,
629            data_bytes: 128,
630            block_delete_number: huge,
631            data: vec![0xAA; 128],
632        };
633        // body_offset value is irrelevant for the zero-fill split path.
634        state.consume_add_data(&cmd, 0);
635
636        let plan = state.finalize();
637        assert_eq!(plan.targets.len(), 1);
638        let regions = &plan.targets[0].regions;
639        // 1 raw payload region + 2 Zeros regions (u32::MAX + remainder).
640        assert_eq!(regions.len(), 3);
641
642        assert_eq!(regions[0].target_offset, 0);
643        assert_eq!(regions[0].length, 128);
644        assert!(matches!(regions[0].source, PartSource::Patch { .. }));
645
646        // First zero chunk is exactly u32::MAX, starting right after data.
647        assert_eq!(regions[1].target_offset, 128);
648        assert_eq!(regions[1].length, u32::MAX);
649        assert!(matches!(regions[1].source, PartSource::Zeros));
650
651        // Second zero chunk holds the remainder, starting where the first ends.
652        assert_eq!(regions[2].target_offset, 128 + u64::from(u32::MAX));
653        assert_eq!(regions[2].length, 1024);
654        assert!(matches!(regions[2].source, PartSource::Zeros));
655
656        assert_eq!(plan.targets[0].final_size, 128 + huge);
657    }
658
659    /// `EmptyBlock` regions whose byte length fits in `u32` must round-trip
660    /// as a single `EmptyBlock` region — the split path only kicks in for
661    /// pathological `units >= 2^25`.
662    #[test]
663    fn push_empty_block_region_emits_single_region_when_in_range() {
664        let mut state = BuilderState::new();
665        state.begin_patch("synthetic".into()).unwrap();
666
667        state.push_empty_block_region(&synthetic_sqpack_file(), 0, 8);
668
669        let plan = state.finalize();
670        assert_eq!(plan.targets.len(), 1);
671        let regions = &plan.targets[0].regions;
672        assert_eq!(regions.len(), 1);
673        assert_eq!(regions[0].length, 8 * 128);
674        assert!(matches!(
675            regions[0].source,
676            PartSource::EmptyBlock { units: 8 }
677        ));
678        assert!(matches!(
679            regions[0].expected,
680            PartExpected::EmptyBlock { units: 8 }
681        ));
682    }
683
684    /// `EmptyBlock` with `units * 128 > u32::MAX` must split into a capped
685    /// `EmptyBlock` region (header-bearing) followed by `Zeros` fillers.
686    /// Verifies both the total bytes covered and the region kind shapes —
687    /// this prevents the v1.0 silent saturation of `Region::length`.
688    #[test]
689    fn push_empty_block_region_splits_when_bytes_exceed_u32_max() {
690        let mut state = BuilderState::new();
691        state.begin_patch("synthetic".into()).unwrap();
692
693        // `units = 2^25` produces exactly `2^32` bytes — one byte past
694        // `u32::MAX` — the smallest input that triggers the split.
695        let units: u32 = 1 << 25; // 2^25
696        state.push_empty_block_region(&synthetic_sqpack_file(), 0, units);
697
698        let plan = state.finalize();
699        assert_eq!(plan.targets.len(), 1);
700        let regions = &plan.targets[0].regions;
701
702        // First region: capped EmptyBlock with the header.
703        let cap_units: u32 = u32::MAX / 128;
704        let cap_bytes: u64 = u64::from(cap_units) * 128;
705        assert_eq!(regions[0].target_offset, 0);
706        assert_eq!(regions[0].length, cap_units * 128);
707        match regions[0].source {
708            PartSource::EmptyBlock { units: u } => assert_eq!(u, cap_units),
709            ref other => panic!("expected EmptyBlock, got {other:?}"),
710        }
711
712        // Tail: one or more Zeros regions covering the remaining bytes.
713        let total_bytes: u64 = u64::from(units) * 128;
714        let mut covered: u64 = cap_bytes;
715        for region in &regions[1..] {
716            assert_eq!(region.target_offset, covered);
717            assert!(matches!(region.source, PartSource::Zeros));
718            assert!(matches!(region.expected, PartExpected::Zeros));
719            covered += u64::from(region.length);
720        }
721        assert_eq!(covered, total_bytes);
722        assert_eq!(plan.targets[0].final_size, total_bytes);
723
724        for region in &regions[1..] {
725            assert!(region.length <= u32::MAX);
726        }
727    }
728
729    // ---- path traversal validation ----
730
731    #[test]
732    fn reject_unsafe_relative_path_accepts_safe_paths() {
733        for safe in [
734            "sqpack/ffxiv/000000.win32.dat0",
735            "movie/ffxiv/opening.bk2",
736            "boot/launcher.exe",
737            "a/b/c.txt",
738            "single",
739        ] {
740            assert!(
741                reject_unsafe_relative_path(safe).is_ok(),
742                "safe path rejected: {safe}"
743            );
744        }
745    }
746
747    #[test]
748    fn reject_unsafe_relative_path_rejects_traversal_and_absolute() {
749        for bad in [
750            "../etc/passwd",
751            "..\\etc\\passwd",
752            "sqpack/../../etc/passwd",
753            "a/b/../../../etc/passwd",
754            "/etc/passwd",
755            "\\\\server\\share\\file",
756            "C:/Windows/system32",
757            "c:\\Windows\\system32",
758            "C:",
759        ] {
760            let err = reject_unsafe_relative_path(bad)
761                .expect_err(&format!("unsafe path accepted: {bad}"));
762            match err {
763                IndexError::UnsafeTargetPath(s) => assert_eq!(s, bad),
764                other => panic!("expected UnsafeTargetPath, got {other:?}"),
765            }
766        }
767    }
768
769    #[test]
770    fn consume_file_rejects_path_traversal() {
771        let mut state = BuilderState::new();
772        state.begin_patch("synthetic".into()).unwrap();
773        let cmd = SqpkFile {
774            operation: SqpkFileOperation::AddFile,
775            file_offset: 0,
776            file_size: 0,
777            expansion_id: 0,
778            path: "../../etc/passwd".into(),
779            block_source_offsets: Vec::new(),
780            blocks: Vec::new(),
781        };
782        let err = state
783            .consume_file(cmd, 0)
784            .expect_err("must reject traversal");
785        assert!(matches!(err, IndexError::UnsafeTargetPath(_)));
786    }
787
788    /// Re-adding a patch with the same name must surface
789    /// `IndexError::DuplicatePatch` rather than silently appending a second
790    /// `PatchRef`. Exercised at the `BuilderState` boundary; the public-API
791    /// equivalent (via `PlanBuilder::add_patch`) is covered in
792    /// `tests/index_chain.rs`.
793    #[test]
794    fn begin_patch_rejects_duplicate_name() {
795        let mut state = BuilderState::new();
796        state.begin_patch("p1".into()).unwrap();
797        let err = state
798            .begin_patch("p1".into())
799            .expect_err("duplicate name must error");
800        match err {
801            IndexError::DuplicatePatch { name } => assert_eq!(name, "p1"),
802            other => panic!("expected DuplicatePatch, got {other:?}"),
803        }
804    }
805}