use anyhow::{Context, Result};
use std::fs::File;
use std::io::{BufReader, Read, Seek, SeekFrom};
use std::path::Path;
use tracing::{debug, info};
use encoding_rs::UTF_16LE;
use quick_xml::events::Event;
use quick_xml::Reader;
#[derive(Debug)]
struct StringPool {
pool: Vec<String>,
index: usize,
}
#[allow(dead_code)]
impl StringPool {
fn new() -> Self {
Self {
pool: Vec::with_capacity(32), index: 0,
}
}
fn get_string(&mut self) -> &mut String {
if self.index >= self.pool.len() {
self.pool.push(String::with_capacity(256)); }
let string = &mut self.pool[self.index];
string.clear();
self.index += 1;
string
}
fn reset(&mut self) {
self.index = 0;
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct WimHeader {
pub signature: [u8; 8],
pub header_size: u32,
pub format_version: u32,
pub file_flags: u32,
pub compressed_size: u32,
pub guid: [u8; 16],
pub segment_number: u16,
pub total_segments: u16,
pub image_count: u32,
pub offset_table_resource: FileResourceEntry,
pub xml_data_resource: FileResourceEntry,
pub boot_metadata_resource: FileResourceEntry,
pub bootable_image_index: u32,
pub integrity_resource: FileResourceEntry,
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct FileResourceEntry {
pub size: u64,
pub flags: u8,
pub offset: u64,
pub original_size: u64,
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct ResourceFlags;
#[allow(dead_code)]
impl ResourceFlags {
pub const FREE: u8 = 0x01; pub const METADATA: u8 = 0x02; pub const COMPRESSED: u8 = 0x04; pub const SPANNED: u8 = 0x08; }
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct FileFlags;
#[allow(dead_code)]
impl FileFlags {
pub const COMPRESSION: u32 = 0x00000002; pub const READONLY: u32 = 0x00000004; pub const SPANNED: u32 = 0x00000008; pub const RESOURCE_ONLY: u32 = 0x00000010; pub const METADATA_ONLY: u32 = 0x00000020; pub const COMPRESS_XPRESS: u32 = 0x00020000; pub const COMPRESS_LZX: u32 = 0x00040000; }
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub struct ImageInfo {
pub index: u32,
pub name: String,
pub description: String,
pub dir_count: u32,
pub file_count: u32,
pub total_bytes: u64,
pub creation_time: Option<u64>,
pub last_modification_time: Option<u64>,
pub version: Option<String>,
pub architecture: Option<String>,
}
#[allow(dead_code)]
impl ImageInfo {
pub fn new_with_index(index: u32) -> Self {
Self {
index,
name: String::new(),
description: String::new(),
dir_count: 0,
file_count: 0,
total_bytes: 0,
creation_time: None,
last_modification_time: None,
version: None,
architecture: None,
}
}
pub fn set_field(&mut self, tag: &str, value: &str) {
match tag {
"DISPLAYNAME" => self.name = value.to_string(),
"DISPLAYDESCRIPTION" => self.description = value.to_string(),
"DIRCOUNT" => self.dir_count = value.parse().unwrap_or(0),
"FILECOUNT" => self.file_count = value.parse().unwrap_or(0),
"TOTALBYTES" => self.total_bytes = value.parse().unwrap_or(0),
"ARCH" => {
self.architecture = match value {
"0" => Some("x86".to_string()),
"9" => Some("x64".to_string()),
"5" => Some("ARM".to_string()),
"12" => Some("ARM64".to_string()),
_ => None,
};
}
_ => {} }
}
pub fn infer_version_and_arch(&mut self) {
let combined_text = format!("{} {}", self.name, self.description).to_lowercase();
if self.version.is_none() {
self.version = if combined_text.contains("windows 11") {
Some("Windows 11".to_string())
} else if combined_text.contains("windows 10") {
Some("Windows 10".to_string())
} else if combined_text.contains("windows server 2022") {
Some("Windows Server 2022".to_string())
} else if combined_text.contains("windows server 2019") {
Some("Windows Server 2019".to_string())
} else if combined_text.contains("windows server") {
Some("Windows Server".to_string())
} else if combined_text.contains("windows") {
Some("Windows".to_string())
} else {
None
};
}
if self.architecture.is_none() {
self.architecture = if combined_text.contains("x64") || combined_text.contains("amd64")
{
Some("x64".to_string())
} else if combined_text.contains("x86") {
Some("x86".to_string())
} else if combined_text.contains("arm64") {
Some("ARM64".to_string())
} else {
None
};
}
}
}
#[allow(dead_code)]
pub struct WimParser {
file: BufReader<File>,
header: Option<WimHeader>,
images: Vec<ImageInfo>,
string_pool: StringPool,
}
#[allow(dead_code)]
impl WimParser {
pub fn new<P: AsRef<Path>>(wim_path: P) -> Result<Self> {
let file = File::open(wim_path.as_ref())
.with_context(|| format!("无法打开 WIM 文件: {}", wim_path.as_ref().display()))?;
let buffered_file = BufReader::with_capacity(64 * 1024, file);
debug!("创建 WIM 解析器: {}", wim_path.as_ref().display());
Ok(Self {
file: buffered_file,
header: None,
images: Vec::with_capacity(8), string_pool: StringPool::new(),
})
}
#[doc(hidden)]
#[allow(dead_code)]
pub fn new_for_test(file: File) -> Self {
Self {
file: BufReader::new(file),
header: None,
images: Vec::with_capacity(8),
string_pool: StringPool::new(),
}
}
pub fn read_header(&mut self) -> Result<&WimHeader> {
if self.header.is_some() {
return Ok(self.header.as_ref().unwrap());
}
debug!("开始读取 WIM 文件头");
self.file.seek(SeekFrom::Start(0))?;
let mut header_buffer = vec![0u8; 204];
self.file
.read_exact(&mut header_buffer)
.context("读取 WIM 文件头失败")?;
let header = self.parse_header_buffer(&header_buffer)?;
if &header.signature != b"MSWIM\x00\x00\x00" {
return Err(anyhow::anyhow!("无效的 WIM 文件签名"));
}
info!(
"成功读取 WIM 文件头 - 版本: {}, 镜像数: {}",
header.format_version, header.image_count
);
self.header = Some(header);
Ok(self.header.as_ref().unwrap())
}
fn parse_header_buffer(&self, buffer: &[u8]) -> Result<WimHeader> {
use std::convert::TryInto;
let read_u32_le = |offset: usize| -> u32 {
u32::from_le_bytes(buffer[offset..offset + 4].try_into().unwrap())
};
let read_u16_le = |offset: usize| -> u16 {
u16::from_le_bytes(buffer[offset..offset + 2].try_into().unwrap())
};
let read_u64_le = |offset: usize| -> u64 {
u64::from_le_bytes(buffer[offset..offset + 8].try_into().unwrap())
};
let parse_resource_entry = |offset: usize| -> FileResourceEntry {
let size_bytes = &buffer[offset..offset + 7];
let mut size_array = [0u8; 8];
size_array[..7].copy_from_slice(size_bytes);
let size = u64::from_le_bytes(size_array);
let flags = buffer[offset + 7];
let offset_val = read_u64_le(offset + 8);
let original_size = read_u64_le(offset + 16);
FileResourceEntry {
size,
flags,
offset: offset_val,
original_size,
}
};
let mut signature = [0u8; 8];
signature.copy_from_slice(&buffer[0..8]);
let header = WimHeader {
signature,
header_size: read_u32_le(8),
format_version: read_u32_le(12),
file_flags: read_u32_le(16),
compressed_size: read_u32_le(20),
guid: buffer[24..40].try_into().unwrap(),
segment_number: read_u16_le(40),
total_segments: read_u16_le(42),
image_count: read_u32_le(44),
offset_table_resource: parse_resource_entry(48),
xml_data_resource: parse_resource_entry(72),
boot_metadata_resource: parse_resource_entry(96),
bootable_image_index: read_u32_le(120),
integrity_resource: parse_resource_entry(124),
};
debug!(
"解析 WIM 头部完成 - 镜像数: {}, 文件标志: 0x{:08X}",
header.image_count, header.file_flags
);
Ok(header)
}
pub fn read_xml_data(&mut self) -> Result<()> {
if self.header.is_none() {
self.read_header()?;
}
let header = self.header.as_ref().unwrap();
if header.xml_data_resource.size == 0 {
return Err(anyhow::anyhow!("WIM 文件中没有 XML 数据资源"));
}
debug!(
"开始读取 XML 数据,偏移: {}, 大小: {}",
header.xml_data_resource.offset, header.xml_data_resource.size
);
self.file
.seek(SeekFrom::Start(header.xml_data_resource.offset))?;
let mut xml_buffer = vec![0u8; header.xml_data_resource.size as usize];
self.file
.read_exact(&mut xml_buffer)
.context("读取 XML 数据失败")?;
self.parse_xml_data(&xml_buffer)?;
info!("成功解析 {} 个镜像的信息", self.images.len());
Ok(())
}
fn parse_xml_data(&mut self, xml_buffer: &[u8]) -> Result<()> {
if xml_buffer.len() < 2 {
return Err(anyhow::anyhow!("XML 数据太短"));
}
if xml_buffer[0] != 0xFF || xml_buffer[1] != 0xFE {
return Err(anyhow::anyhow!("无效的 XML 数据 BOM"));
}
let xml_utf16_data = &xml_buffer[2..];
if xml_utf16_data.len() % 2 != 0 {
return Err(anyhow::anyhow!("XML UTF-16 数据长度不是偶数"));
}
let mut utf16_chars = Vec::new();
for chunk in xml_utf16_data.chunks_exact(2) {
let char_val = u16::from_le_bytes([chunk[0], chunk[1]]);
utf16_chars.push(char_val);
}
let xml_string = String::from_utf16(&utf16_chars).context("无法将 XML 数据转换为 UTF-8")?;
debug!("XML 数据长度: {} 字符", xml_string.len());
self.parse_xml_images(&xml_string)?;
Ok(())
}
fn parse_xml_data_optimized(&mut self, xml_buffer: &[u8]) -> Result<()> {
if xml_buffer.len() < 2 {
return Err(anyhow::anyhow!("XML 数据太短"));
}
if xml_buffer[0] != 0xFF || xml_buffer[1] != 0xFE {
return Err(anyhow::anyhow!("无效的 XML 数据 BOM"));
}
let (xml_string, _, had_errors) = UTF_16LE.decode(&xml_buffer[2..]);
if had_errors {
return Err(anyhow::anyhow!("UTF-16解码过程中发现错误"));
}
debug!("XML 数据长度: {} 字符", xml_string.len());
self.parse_xml_images_optimized(&xml_string)?;
Ok(())
}
fn parse_xml_images_optimized(&mut self, xml_content: &str) -> Result<()> {
self.images.clear();
let mut reader = Reader::from_str(xml_content);
reader.config_mut().trim_text(true);
let mut current_image: Option<ImageInfo> = None;
let mut current_tag = String::new();
let mut in_windows_section = false;
loop {
match reader.read_event() {
Ok(Event::Start(ref e)) => {
match e.name().as_ref() {
b"IMAGE" => {
for attr in e.attributes().flatten() {
if attr.key.as_ref() == b"INDEX" {
if let Ok(index_str) = std::str::from_utf8(&attr.value) {
if let Ok(index) = index_str.parse::<u32>() {
current_image = Some(ImageInfo::new_with_index(index));
}
}
}
}
}
b"WINDOWS" => {
in_windows_section = true;
}
tag => {
current_tag = String::from_utf8_lossy(tag).into_owned();
}
}
}
Ok(Event::Text(e)) => {
if let Some(ref mut image) = current_image {
let text = std::str::from_utf8(&e)?;
if in_windows_section && current_tag == "ARCH" {
image.set_field("ARCH", text);
} else if !in_windows_section {
image.set_field(¤t_tag, text);
}
}
}
Ok(Event::End(ref e)) => {
match e.name().as_ref() {
b"IMAGE" => {
if let Some(mut image) = current_image.take() {
image.infer_version_and_arch();
self.images.push(image);
}
}
b"WINDOWS" => {
in_windows_section = false;
}
_ => {}
}
}
Ok(Event::Eof) => break,
Err(e) => return Err(anyhow::anyhow!("XML解析错误: {}", e)),
_ => {}
}
}
info!("优化解析完成:成功解析 {} 个镜像的信息", self.images.len());
Ok(())
}
fn parse_xml_images(&mut self, xml_content: &str) -> Result<()> {
self.images.clear();
let mut start_pos = 0;
while let Some(image_start) = xml_content[start_pos..].find("<IMAGE") {
let absolute_start = start_pos + image_start;
if let Some(image_end) = xml_content[absolute_start..].find("</IMAGE>") {
let absolute_end = absolute_start + image_end + 8; let image_xml = &xml_content[absolute_start..absolute_end];
if let Ok(image_info) = self.parse_single_image_xml(image_xml) {
self.images.push(image_info);
}
start_pos = absolute_end;
} else {
break;
}
}
Ok(())
}
pub fn parse_single_image_xml(&self, image_xml: &str) -> Result<ImageInfo> {
let extract_tag_value = |xml: &str, tag: &str| -> Option<String> {
let start_tag = format!("<{tag}>");
let end_tag = format!("</{tag}>");
if let Some(start) = xml.find(&start_tag) {
if let Some(end) = xml.find(&end_tag) {
let value_start = start + start_tag.len();
if value_start < end {
return Some(xml[value_start..end].trim().to_string());
}
}
}
None
};
let index = if let Some(index_start) = image_xml.find("INDEX=\"") {
let index_value_start = index_start + 7; if let Some(index_end) = image_xml[index_value_start..].find("\"") {
let index_str = &image_xml[index_value_start..index_value_start + index_end];
index_str.parse().unwrap_or(0)
} else {
0
}
} else {
0
};
let name =
extract_tag_value(image_xml, "DISPLAYNAME").unwrap_or_else(|| format!("Image {index}"));
let description = extract_tag_value(image_xml, "DISPLAYDESCRIPTION")
.unwrap_or_else(|| "Unknown".to_string());
let dir_count = extract_tag_value(image_xml, "DIRCOUNT")
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let file_count = extract_tag_value(image_xml, "FILECOUNT")
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let total_bytes = extract_tag_value(image_xml, "TOTALBYTES")
.and_then(|s| s.parse().ok())
.unwrap_or(0);
let arch_from_xml = self.parse_arch_from_xml(image_xml);
let (version, arch_from_name) = self.extract_version_and_arch(&name, &description);
let architecture = arch_from_xml.or(arch_from_name);
let image_info = ImageInfo {
index,
name,
description,
dir_count,
file_count,
total_bytes,
creation_time: None, last_modification_time: None, version,
architecture,
};
debug!(
"解析镜像信息: {} - {} - {} - {:#?}",
image_info.index, image_info.name, image_info.description, image_info.architecture
);
Ok(image_info)
}
fn extract_version_and_arch(
&self,
name: &str,
description: &str,
) -> (Option<String>, Option<String>) {
let combined_text = format!("{name} {description}").to_lowercase();
let version = if combined_text.contains("windows 11") {
Some("Windows 11".to_string())
} else if combined_text.contains("windows 10") {
Some("Windows 10".to_string())
} else if combined_text.contains("windows server 2022") {
Some("Windows Server 2022".to_string())
} else if combined_text.contains("windows server 2019") {
Some("Windows Server 2019".to_string())
} else if combined_text.contains("windows server") {
Some("Windows Server".to_string())
} else if combined_text.contains("windows") {
Some("Windows".to_string())
} else {
None
};
let architecture = if combined_text.contains("x64") || combined_text.contains("amd64") {
Some("x64".to_string())
} else if combined_text.contains("x86") {
Some("x86".to_string())
} else if combined_text.contains("arm64") {
Some("ARM64".to_string())
} else {
None
};
(version, architecture)
}
pub fn parse_arch_from_xml(&self, image_xml: &str) -> Option<String> {
let extract_tag_value = |xml: &str, tag: &str| -> Option<String> {
let start_tag = format!("<{tag}>");
let end_tag = format!("</{tag}>");
if let Some(start) = xml.find(&start_tag) {
if let Some(end) = xml.find(&end_tag) {
let value_start = start + start_tag.len();
if value_start < end {
return Some(xml[value_start..end].trim().to_string());
}
}
}
None
};
if let Some(arch_value) = extract_tag_value(image_xml, "ARCH") {
match arch_value.as_str() {
"0" => Some("x86".to_string()),
"9" => Some("x64".to_string()),
"5" => Some("ARM".to_string()),
"12" => Some("ARM64".to_string()),
_ => {
debug!("未知的架构值: {}", arch_value);
None
}
}
} else {
None
}
}
pub fn get_images(&self) -> &[ImageInfo] {
&self.images
}
#[allow(dead_code)]
pub fn get_image(&self, index: u32) -> Option<&ImageInfo> {
self.images.iter().find(|img| img.index == index)
}
#[allow(dead_code)]
pub fn get_header(&self) -> Option<&WimHeader> {
self.header.as_ref()
}
#[allow(dead_code)]
pub fn has_multiple_images(&self) -> bool {
self.header
.as_ref()
.map(|h| h.image_count > 1)
.unwrap_or(false)
}
#[allow(dead_code)]
pub fn get_image_count(&self) -> u32 {
self.header.as_ref().map(|h| h.image_count).unwrap_or(0)
}
#[allow(dead_code)]
pub fn is_compressed(&self) -> bool {
self.header
.as_ref()
.map(|h| h.file_flags & FileFlags::COMPRESSION != 0)
.unwrap_or(false)
}
#[allow(dead_code)]
pub fn get_compression_type(&self) -> Option<&'static str> {
if let Some(header) = &self.header {
if header.file_flags & FileFlags::COMPRESS_XPRESS != 0 {
Some("XPRESS")
} else if header.file_flags & FileFlags::COMPRESS_LZX != 0 {
Some("LZX")
} else if header.file_flags & FileFlags::COMPRESSION != 0 {
Some("Unknown")
} else {
None
}
} else {
None
}
}
pub fn parse_full(&mut self) -> Result<()> {
self.read_header()?;
self.read_xml_data()?;
Ok(())
}
}
impl std::fmt::Display for ImageInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "镜像 {} - {}", self.index, self.name)?;
if let Some(ref version) = self.version {
write!(f, " [{version}]")?;
}
if let Some(ref arch) = self.architecture {
write!(f, " [{arch}]")?;
}
write!(f, " | 描述: {}", self.description)?;
write!(
f,
" | 文件数: {}, 目录数: {}",
self.file_count, self.dir_count
)?;
write!(f, " | 总大小: {} MB", self.total_bytes / (1024 * 1024))?;
Ok(())
}
}
impl std::fmt::Display for WimHeader {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "WIM Header:")?;
writeln!(f, " Format Version: {}", self.format_version)?;
writeln!(f, " File Flags: 0x{:08X}", self.file_flags)?;
writeln!(f, " Image Count: {}", self.image_count)?;
writeln!(
f,
" Segment: {}/{}",
self.segment_number, self.total_segments
)?;
writeln!(f, " Bootable Image Index: {}", self.bootable_image_index)?;
Ok(())
}
}
#[allow(dead_code)]
impl WimParser {
#[allow(dead_code)]
pub fn get_version_summary(&self) -> Vec<String> {
let mut summaries = Vec::new();
for image in &self.images {
let mut summary = format!("镜像 {}: {}", image.index, image.name);
if let Some(ref version) = image.version {
summary.push_str(&format!(" ({version})"));
}
if let Some(ref arch) = image.architecture {
summary.push_str(&format!(" [{arch}]"));
}
summaries.push(summary);
}
summaries
}
pub fn get_primary_version(&self) -> Option<String> {
if self.images.is_empty() {
return None;
}
let mut version_counts = std::collections::HashMap::new();
for image in &self.images {
if let Some(ref version) = image.version {
*version_counts.entry(version.clone()).or_insert(0) += 1;
}
}
version_counts
.into_iter()
.max_by_key(|(_, count)| *count)
.map(|(version, _)| version)
}
pub fn get_primary_architecture(&self) -> Option<String> {
if self.images.is_empty() {
return None;
}
let mut arch_counts = std::collections::HashMap::new();
for image in &self.images {
if let Some(ref arch) = image.architecture {
*arch_counts.entry(arch.clone()).or_insert(0) += 1;
}
}
arch_counts
.into_iter()
.max_by_key(|(_, count)| *count)
.map(|(arch, _)| arch)
}
#[allow(dead_code)]
pub fn has_version(&self, version: &str) -> bool {
self.images.iter().any(|img| {
img.version
.as_ref()
.is_some_and(|v| v.to_lowercase().contains(&version.to_lowercase()))
})
}
#[allow(dead_code)]
pub fn has_architecture(&self, arch: &str) -> bool {
self.images.iter().any(|img| {
img.architecture
.as_ref()
.is_some_and(|a| a.to_lowercase().contains(&arch.to_lowercase()))
})
}
pub fn get_windows_info(&self) -> Option<WindowsInfo> {
let primary_version = self.get_primary_version()?;
let primary_arch = self.get_primary_architecture()?;
if !primary_version.to_lowercase().contains("windows") {
return None;
}
let mut editions = Vec::new();
for image in &self.images {
let name_lower = image.name.to_lowercase();
if name_lower.contains("pro") && !editions.contains(&"Pro".to_string()) {
editions.push("Pro".to_string());
} else if name_lower.contains("home") && !editions.contains(&"Home".to_string()) {
editions.push("Home".to_string());
} else if name_lower.contains("enterprise")
&& !editions.contains(&"Enterprise".to_string())
{
editions.push("Enterprise".to_string());
} else if name_lower.contains("education")
&& !editions.contains(&"Education".to_string())
{
editions.push("Education".to_string());
}
}
Some(WindowsInfo {
version: primary_version,
architecture: primary_arch,
editions,
image_count: self.images.len() as u32,
total_size: self.images.iter().map(|img| img.total_bytes).sum(),
})
}
}
#[derive(Debug, Clone)]
pub struct WindowsInfo {
pub version: String,
pub architecture: String,
pub editions: Vec<String>,
pub image_count: u32,
pub total_size: u64,
}
impl std::fmt::Display for WindowsInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} ({})", self.version, self.architecture)?;
if !self.editions.is_empty() {
write!(f, " - 版本: {}", self.editions.join(", "))?;
}
write!(f, " | 镜像数量: {}", self.image_count)?;
write!(f, " | 总大小: {} MB", self.total_size / (1024 * 1024))?;
Ok(())
}
}
#[cfg(any(test, feature = "benchmarking"))]
impl WimParser {
pub fn parse_xml_data_for_bench(&mut self, xml_buffer: &[u8]) -> Result<()> {
self.parse_xml_data(xml_buffer)
}
pub fn parse_xml_data_optimized_for_bench(&mut self, xml_buffer: &[u8]) -> Result<()> {
self.parse_xml_data_optimized(xml_buffer)
}
pub fn use_optimized_parsing(&mut self, xml_buffer: &[u8]) -> Result<()> {
self.parse_xml_data_optimized(xml_buffer)
}
}