Skip to main content

pkg_extractor/
lib.rs

1// Copyright (C) 2026 Thibault Saunier <tsaunier@igalia.com>
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at https://mozilla.org/MPL/2.0/.
6
7use apple_flat_package::component_package::ComponentPackageReader;
8use apple_flat_package::reader::{PkgFlavor, PkgReader};
9use cpio_archive::{CpioReader as _, OdcReader};
10use log::{debug, error, info, warn};
11use std::error::Error;
12use std::fmt::Debug;
13use std::fs::{self, File};
14use std::io::{Cursor, Read, Seek, Write};
15use std::path::{Path, PathBuf};
16use std::process::Command;
17
18mod pbzx;
19
20pub struct PkgExtractor<R: Read + Seek + Sized + Debug> {
21    reader: Option<R>,
22    output_dir: PathBuf,
23    pkg_file_path: Option<PathBuf>,
24}
25
26#[derive(Debug, PartialEq)]
27enum FileType {
28    Directory,
29    Regular,
30    Symlink,
31    Other,
32}
33
34impl FileType {
35    fn from_mode(mode: u32) -> Self {
36        match mode & 0o170000 {
37            0o040000 => FileType::Directory,
38            0o100000 => FileType::Regular,
39            0o120000 => FileType::Symlink,
40            _ => FileType::Other,
41        }
42    }
43}
44
45impl<R: Read + Seek + Sized + Debug> PkgExtractor<R> {
46    pub fn new(reader: R, output_dir: Option<PathBuf>) -> Self {
47        let output_dir = output_dir.unwrap_or_else(|| PathBuf::from("extracted_pkg"));
48
49        Self {
50            reader: Some(reader),
51            output_dir,
52            pkg_file_path: None,
53        }
54    }
55
56    pub fn new_with_file_path(
57        reader: R,
58        output_dir: Option<PathBuf>,
59        pkg_file_path: PathBuf,
60    ) -> Self {
61        let output_dir = output_dir.unwrap_or_else(|| PathBuf::from("extracted_pkg"));
62
63        Self {
64            reader: Some(reader),
65            output_dir,
66            pkg_file_path: Some(pkg_file_path),
67        }
68    }
69
70    pub fn extract(mut self) -> Result<(), Box<dyn Error>> {
71        fs::create_dir_all(&self.output_dir)?;
72
73        let reader = self.reader.take().unwrap();
74
75        // If no file path is available, save reader data to a temp file
76        // so xar fallback can work if needed
77        if self.pkg_file_path.is_none() {
78            let temp_file = tempfile::NamedTempFile::new()?;
79            let temp_path = temp_file.path().to_path_buf();
80
81            let mut file = File::create(&temp_path)?;
82            let mut buffer = Vec::new();
83            let mut reader_cursor = reader;
84            reader_cursor.read_to_end(&mut buffer)?;
85            file.write_all(&buffer)?;
86            file.sync_all()?;
87            drop(file);
88
89            self.pkg_file_path = Some(temp_path.clone());
90
91            let temp_file_reader = File::open(&temp_path)?;
92            let mut pkg_reader = PkgReader::new(temp_file_reader)?;
93            self.extract_with_pkg_reader(&mut pkg_reader)
94        } else {
95            let mut pkg_reader = PkgReader::new(reader)?;
96            self.extract_with_pkg_reader(&mut pkg_reader)
97        }
98    }
99
100    fn extract_with_pkg_reader<T: Read + Seek + Sized + Debug>(
101        &self,
102        pkg_reader: &mut PkgReader<T>,
103    ) -> Result<(), Box<dyn Error>> {
104        match pkg_reader.flavor() {
105            PkgFlavor::Component => {
106                debug!("Package type: Component");
107                self.extract_component(pkg_reader)?;
108            }
109            PkgFlavor::Product => {
110                debug!("Package type: Product");
111                self.extract_product(pkg_reader)?;
112            }
113        }
114
115        info!(
116            "Extraction completed. Files in: {}",
117            self.output_dir.display()
118        );
119        Ok(())
120    }
121
122    fn extract_component<T: Read + Seek + Sized + Debug>(
123        &self,
124        pkg_reader: &mut PkgReader<T>,
125    ) -> Result<(), Box<dyn Error>> {
126        match pkg_reader.root_component() {
127            Ok(Some(component_pkg_reader)) => self.extract_component_package(&component_pkg_reader),
128            Ok(None) => {
129                warn!("No root component found, trying xar fallback");
130                self.extract_with_xar_fallback()
131            }
132            Err(e) => {
133                warn!("Error reading component: {}, trying xar fallback", e);
134                self.extract_with_xar_fallback()
135            }
136        }
137    }
138
139    fn extract_product<T: Read + Seek + Sized + Debug>(
140        &self,
141        pkg_reader: &mut PkgReader<T>,
142    ) -> Result<(), Box<dyn Error>> {
143        match pkg_reader.component_packages() {
144            Ok(component_packages) => {
145                info!("Found {} component packages", component_packages.len());
146                for (i, component_pkg_reader) in component_packages.iter().enumerate() {
147                    debug!(
148                        "Extracting component package {} of {}",
149                        i + 1,
150                        component_packages.len()
151                    );
152                    self.extract_component_package(component_pkg_reader)?;
153                }
154                Ok(())
155            }
156            Err(e) => {
157                warn!("Error reading product package: {}, trying xar fallback", e);
158                self.extract_with_xar_fallback()
159            }
160        }
161    }
162
163    fn extract_component_package(
164        &self,
165        component_pkg_reader: &ComponentPackageReader,
166    ) -> Result<(), Box<dyn Error>> {
167        if let Some(package_info) = component_pkg_reader.package_info() {
168            if let Some(ref payload) = package_info.payload {
169                debug!(
170                    "Component: {} ({} files, {} KB)",
171                    package_info.identifier, payload.number_of_files, payload.install_kbytes
172                );
173            }
174        }
175
176        if let Ok(Some(mut payload_reader)) = component_pkg_reader.payload_reader() {
177            let mut total_bytes: u64 = 0;
178            let mut file_count: u64 = 0;
179
180            while let Ok(Some(header)) = payload_reader.read_next() {
181                let name = header.name();
182                let file_size = header.file_size();
183                let mode = header.mode();
184
185                if name.is_empty() || name == "." || name == "Payload" {
186                    payload_reader.finish()?;
187                    continue;
188                }
189
190                let clean_name = name.strip_prefix("Payload/").unwrap_or(name);
191                let target_path = self.output_dir.join(clean_name);
192
193                if let Some(parent) = target_path.parent() {
194                    fs::create_dir_all(parent)?;
195                }
196
197                match FileType::from_mode(mode) {
198                    FileType::Directory => {
199                        fs::create_dir_all(&target_path)?;
200                    }
201                    FileType::Regular if file_size > 0 => {
202                        let mut outfile = File::create(&target_path)?;
203                        let mut buf = vec![0; 8192];
204                        let mut remaining = file_size;
205
206                        while remaining > 0 {
207                            let to_read = remaining.min(buf.len() as u64) as usize;
208                            match payload_reader.read(&mut buf[..to_read]) {
209                                Ok(0) => break,
210                                Ok(n) => {
211                                    outfile.write_all(&buf[..n])?;
212                                    remaining -= n as u64;
213                                    total_bytes += n as u64;
214                                }
215                                Err(e) => {
216                                    error!("Error reading file {}: {}", name, e);
217                                    break;
218                                }
219                            }
220                        }
221                        file_count += 1;
222                    }
223                    _ => {
224                        debug!("Skipping {:?} entry: {}", FileType::from_mode(mode), name);
225                    }
226                }
227
228                payload_reader.finish()?;
229            }
230
231            debug!("Extracted {} files, {} bytes", file_count, total_bytes);
232        } else {
233            warn!("No payload reader available");
234        }
235
236        Ok(())
237    }
238
239    /// Fallback extraction using the system `xar` command
240    fn extract_with_xar_fallback(&self) -> Result<(), Box<dyn Error>> {
241        let pkg_path = self
242            .pkg_file_path
243            .as_ref()
244            .ok_or("No file path available for xar fallback")?;
245        let pkg_path = fs::canonicalize(pkg_path)?;
246
247        info!("Using xar fallback extraction");
248
249        let temp_dir = tempfile::tempdir()?;
250        let temp_path = temp_dir.path();
251
252        // List components
253        let list_output = Command::new("xar")
254            .args(["-tf", &pkg_path.to_string_lossy()])
255            .output()?;
256
257        if !list_output.status.success() {
258            return Err(format!(
259                "xar list failed: {}",
260                String::from_utf8_lossy(&list_output.stderr)
261            )
262            .into());
263        }
264
265        let contents = String::from_utf8_lossy(&list_output.stdout);
266        let component_names: Vec<&str> = contents
267            .lines()
268            .filter(|line| line.ends_with(".pkg") && !line.contains('/'))
269            .collect();
270
271        info!("Found {} components", component_names.len());
272
273        // Extract each component
274        for component_name in &component_names {
275            let output = Command::new("xar")
276                .args(["-xf", &pkg_path.to_string_lossy(), component_name])
277                .current_dir(temp_path)
278                .output()?;
279
280            if !output.status.success() {
281                warn!(
282                    "Failed to extract {}: {}",
283                    component_name,
284                    String::from_utf8_lossy(&output.stderr)
285                );
286            }
287        }
288
289        // Process extracted component packages
290        for entry in fs::read_dir(temp_path)? {
291            let entry = entry?;
292            let path = entry.path();
293
294            if path.is_dir() && path.extension().is_some_and(|e| e == "pkg") {
295                let payload_path = path.join("Payload");
296                if payload_path.exists() {
297                    if let Err(e) = self.extract_payload_file(&payload_path) {
298                        warn!(
299                            "Failed to extract payload from {}: {}",
300                            path.file_name().unwrap().to_string_lossy(),
301                            e
302                        );
303                    }
304                }
305            }
306        }
307
308        Ok(())
309    }
310
311    /// Extract a payload file (gzipped cpio or pbzx format)
312    fn extract_payload_file(&self, payload_path: &Path) -> Result<(), Box<dyn Error>> {
313        let mut file = File::open(payload_path)?;
314        let mut header = [0u8; 4];
315        file.read_exact(&mut header)?;
316        drop(file);
317
318        if &header == b"pbzx" {
319            self.extract_pbzx_payload(payload_path)
320        } else {
321            self.extract_gzip_cpio_payload(payload_path)
322        }
323    }
324
325    fn extract_pbzx_payload(&self, payload_path: &Path) -> Result<(), Box<dyn Error>> {
326        let file = File::open(payload_path)?;
327
328        // Try pure Rust implementation first
329        match pbzx::PbzxReader::new(file) {
330            Ok(mut pbzx_reader) => {
331                let mut decompressed = Vec::new();
332                match pbzx_reader.decompress_to(&mut decompressed) {
333                    Ok(_) => {
334                        debug!("pbzx decompressed {} bytes", decompressed.len());
335                        return self.extract_cpio(&decompressed);
336                    }
337                    Err(e) => {
338                        warn!("Pure Rust pbzx failed: {}, trying shell fallback", e);
339                    }
340                }
341            }
342            Err(e) => {
343                warn!("Failed to create pbzx reader: {}, trying shell fallback", e);
344            }
345        }
346
347        // Shell fallback
348        let output = Command::new("sh")
349            .arg("-c")
350            .arg(format!(
351                "cd '{}' && pbzx -n '{}' | cpio -idm",
352                self.output_dir.display(),
353                payload_path.display()
354            ))
355            .output()?;
356
357        if !output.status.success() {
358            return Err(format!(
359                "Shell pbzx extraction failed: {}",
360                String::from_utf8_lossy(&output.stderr)
361            )
362            .into());
363        }
364        Ok(())
365    }
366
367    fn extract_gzip_cpio_payload(&self, payload_path: &Path) -> Result<(), Box<dyn Error>> {
368        let file = File::open(payload_path)?;
369        let gz_decoder = libflate::gzip::Decoder::new(file)?;
370        let mut cpio_reader = OdcReader::new(gz_decoder);
371
372        let mut file_count = 0;
373        while let Some(entry) = cpio_reader.read_next()? {
374            let path = entry.name();
375
376            if path == "." || path == "TRAILER!!!" || path.is_empty() {
377                continue;
378            }
379
380            let target_path = self.output_dir.join(path);
381
382            if let Some(parent) = target_path.parent() {
383                fs::create_dir_all(parent)?;
384            }
385
386            match FileType::from_mode(entry.mode()) {
387                FileType::Directory => {
388                    fs::create_dir_all(&target_path)?;
389                }
390                FileType::Regular => {
391                    let mut file = File::create(&target_path)?;
392                    let mut content = Vec::new();
393                    cpio_reader.read_to_end(&mut content)?;
394                    file.write_all(&content)?;
395                    file_count += 1;
396                }
397                _ => {}
398            }
399        }
400
401        debug!("Extracted {} files from gzipped cpio", file_count);
402        Ok(())
403    }
404
405    /// Extract a cpio archive from decompressed data
406    fn extract_cpio(&self, data: &[u8]) -> Result<(), Box<dyn Error>> {
407        let cursor = Cursor::new(data);
408        let mut cpio_reader = OdcReader::new(cursor);
409
410        let mut file_count = 0;
411        while let Some(entry) = cpio_reader.read_next()? {
412            let path = entry.name();
413
414            if path == "." || path == "TRAILER!!!" || path.is_empty() {
415                continue;
416            }
417
418            let target_path = self.output_dir.join(path);
419
420            if let Some(parent) = target_path.parent() {
421                fs::create_dir_all(parent)?;
422            }
423
424            match FileType::from_mode(entry.mode()) {
425                FileType::Directory => {
426                    fs::create_dir_all(&target_path)?;
427                }
428                FileType::Regular => {
429                    let mut file = File::create(&target_path)?;
430                    let mut content = Vec::new();
431                    cpio_reader.read_to_end(&mut content)?;
432                    file.write_all(&content)?;
433                    file_count += 1;
434                }
435                _ => {}
436            }
437        }
438
439        debug!("Extracted {} files from cpio archive", file_count);
440        Ok(())
441    }
442}