wim_parser/
lib.rs

1use anyhow::{Context, Result};
2use std::fs::File;
3use std::io::{BufReader, Read, Seek, SeekFrom};
4use std::path::Path;
5use tracing::{debug, info};
6
7// 性能优化导入
8use encoding_rs::UTF_16LE;
9use quick_xml::events::Event;
10use quick_xml::Reader;
11
12/// 字符串池用于减少内存分配
13#[derive(Debug)]
14struct StringPool {
15    pool: Vec<String>,
16    index: usize,
17}
18
19#[allow(dead_code)]
20impl StringPool {
21    fn new() -> Self {
22        Self {
23            pool: Vec::with_capacity(32), // 预分配32个字符串
24            index: 0,
25        }
26    }
27
28    fn get_string(&mut self) -> &mut String {
29        if self.index >= self.pool.len() {
30            self.pool.push(String::with_capacity(256)); // 预分配容量
31        }
32        let string = &mut self.pool[self.index];
33        string.clear();
34        self.index += 1;
35        string
36    }
37
38    fn reset(&mut self) {
39        self.index = 0;
40        // 保留字符串对象，只重置索引
41    }
42}
43
44/// WIM 文件头结构体 (WIMHEADER_V1_PACKED)
45/// 总大小：204 字节
46#[derive(Debug, Clone)]
47#[allow(dead_code)]
48pub struct WimHeader {
49    /// 文件签名 "MSWIM\x00\x00\x00"
50    pub signature: [u8; 8],
51    /// 文件头大小
52    pub header_size: u32,
53    /// 格式版本
54    pub format_version: u32,
55    /// 文件标志
56    pub file_flags: u32,
57    /// 压缩文件大小
58    pub compressed_size: u32,
59    /// 唯一标识符 (GUID)
60    pub guid: [u8; 16],
61    /// 段号
62    pub segment_number: u16,
63    /// 段总数
64    pub total_segments: u16,
65    /// 镜像数量
66    pub image_count: u32,
67    /// 偏移表文件资源
68    pub offset_table_resource: FileResourceEntry,
69    /// XML 数据文件资源
70    pub xml_data_resource: FileResourceEntry,
71    /// 引导元数据文件资源
72    pub boot_metadata_resource: FileResourceEntry,
73    /// 可引导镜像索引
74    pub bootable_image_index: u32,
75    /// 完整性数据文件资源
76    pub integrity_resource: FileResourceEntry,
77}
78
79/// 文件资源条目结构体 (_RESHDR_DISK_SHORT)
80/// 总大小：24 字节
81#[derive(Debug, Clone)]
82#[allow(dead_code)]
83pub struct FileResourceEntry {
84    /// 资源大小 (7 字节)
85    pub size: u64,
86    /// 资源标志 (1 字节)
87    pub flags: u8,
88    /// 资源偏移 (8 字节)
89    pub offset: u64,
90    /// 原始大小 (8 字节)
91    pub original_size: u64,
92}
93
94/// 文件资源条目标志
95#[derive(Debug, Clone)]
96#[allow(dead_code)]
97pub struct ResourceFlags;
98
99#[allow(dead_code)]
100impl ResourceFlags {
101    pub const FREE: u8 = 0x01; // 条目空闲
102    pub const METADATA: u8 = 0x02; // 包含元数据
103    pub const COMPRESSED: u8 = 0x04; // 已压缩
104    pub const SPANNED: u8 = 0x08; // 跨段
105}
106
107/// 文件标志
108#[derive(Debug, Clone)]
109#[allow(dead_code)]
110pub struct FileFlags;
111
112#[allow(dead_code)]
113impl FileFlags {
114    pub const COMPRESSION: u32 = 0x00000002; // 资源已压缩
115    pub const READONLY: u32 = 0x00000004; // 只读
116    pub const SPANNED: u32 = 0x00000008; // 跨段
117    pub const RESOURCE_ONLY: u32 = 0x00000010; // 仅包含文件资源
118    pub const METADATA_ONLY: u32 = 0x00000020; // 仅包含元数据
119    pub const COMPRESS_XPRESS: u32 = 0x00020000; // XPRESS 压缩
120    pub const COMPRESS_LZX: u32 = 0x00040000; // LZX 压缩
121}
122
123/// 镜像信息结构体
124#[derive(Debug, Clone)]
125#[allow(dead_code)]
126pub struct ImageInfo {
127    /// 镜像索引
128    pub index: u32,
129    /// 镜像名称
130    pub name: String,
131    /// 镜像描述
132    pub description: String,
133    /// 目录数量
134    pub dir_count: u32,
135    /// 文件数量
136    pub file_count: u32,
137    /// 总字节数
138    pub total_bytes: u64,
139    /// 创建时间
140    pub creation_time: Option<u64>,
141    /// 最后修改时间
142    pub last_modification_time: Option<u64>,
143    /// 版本信息
144    pub version: Option<String>,
145    /// 架构信息
146    pub architecture: Option<String>,
147}
148
149#[allow(dead_code)]
150impl ImageInfo {
151    /// 创建新的ImageInfo实例（用于优化的XML解析）
152    pub fn new_with_index(index: u32) -> Self {
153        Self {
154            index,
155            name: String::new(),
156            description: String::new(),
157            dir_count: 0,
158            file_count: 0,
159            total_bytes: 0,
160            creation_time: None,
161            last_modification_time: None,
162            version: None,
163            architecture: None,
164        }
165    }
166
167    /// 高效设置字段值（避免多次字符串分配）
168    pub fn set_field(&mut self, tag: &str, value: &str) {
169        match tag {
170            "DISPLAYNAME" => self.name = value.to_string(),
171            "DISPLAYDESCRIPTION" => self.description = value.to_string(),
172            "DIRCOUNT" => self.dir_count = value.parse().unwrap_or(0),
173            "FILECOUNT" => self.file_count = value.parse().unwrap_or(0),
174            "TOTALBYTES" => self.total_bytes = value.parse().unwrap_or(0),
175            "ARCH" => {
176                self.architecture = match value {
177                    "0" => Some("x86".to_string()),
178                    "9" => Some("x64".to_string()),
179                    "5" => Some("ARM".to_string()),
180                    "12" => Some("ARM64".to_string()),
181                    _ => None,
182                };
183            }
184            _ => {} // 忽略其他标签
185        }
186    }
187
188    /// 根据名称和描述推断版本和架构信息
189    pub fn infer_version_and_arch(&mut self) {
190        let combined_text = format!("{} {}", self.name, self.description).to_lowercase();
191
192        // 推断版本信息
193        if self.version.is_none() {
194            self.version = if combined_text.contains("windows 11") {
195                Some("Windows 11".to_string())
196            } else if combined_text.contains("windows 10") {
197                Some("Windows 10".to_string())
198            } else if combined_text.contains("windows server 2022") {
199                Some("Windows Server 2022".to_string())
200            } else if combined_text.contains("windows server 2019") {
201                Some("Windows Server 2019".to_string())
202            } else if combined_text.contains("windows server") {
203                Some("Windows Server".to_string())
204            } else if combined_text.contains("windows") {
205                Some("Windows".to_string())
206            } else {
207                None
208            };
209        }
210
211        // 推断架构信息（仅在未从XML ARCH标签获取时）
212        if self.architecture.is_none() {
213            self.architecture = if combined_text.contains("x64") || combined_text.contains("amd64")
214            {
215                Some("x64".to_string())
216            } else if combined_text.contains("x86") {
217                Some("x86".to_string())
218            } else if combined_text.contains("arm64") {
219                Some("ARM64".to_string())
220            } else {
221                None
222            };
223        }
224    }
225}
226
227/// WIM 文件解析器
228#[allow(dead_code)]
229pub struct WimParser {
230    file: BufReader<File>,
231    header: Option<WimHeader>,
232    images: Vec<ImageInfo>,
233    string_pool: StringPool,
234}
235
236#[allow(dead_code)]
237impl WimParser {
238    /// 创建新的 WIM 解析器
239    pub fn new<P: AsRef<Path>>(wim_path: P) -> Result<Self> {
240        let file = File::open(wim_path.as_ref())
241            .with_context(|| format!("无法打开 WIM 文件: {}", wim_path.as_ref().display()))?;
242
243        let buffered_file = BufReader::with_capacity(64 * 1024, file); // 64KB缓冲区
244
245        debug!("创建 WIM 解析器: {}", wim_path.as_ref().display());
246
247        Ok(Self {
248            file: buffered_file,
249            header: None,
250            images: Vec::with_capacity(8), // 预分配镜像容量
251            string_pool: StringPool::new(),
252        })
253    }
254
255    /// 创建用于测试的 WIM 解析器（不需要实际文件）
256    #[doc(hidden)]
257    #[allow(dead_code)]
258    pub fn new_for_test(file: File) -> Self {
259        Self {
260            file: BufReader::new(file),
261            header: None,
262            images: Vec::with_capacity(8),
263            string_pool: StringPool::new(),
264        }
265    }
266
267    /// 读取并解析 WIM 文件头
268    pub fn read_header(&mut self) -> Result<&WimHeader> {
269        if self.header.is_some() {
270            return Ok(self.header.as_ref().unwrap());
271        }
272
273        debug!("开始读取 WIM 文件头");
274
275        // 跳转到文件开始
276        self.file.seek(SeekFrom::Start(0))?;
277
278        // 读取 204 字节的文件头
279        let mut header_buffer = vec![0u8; 204];
280        self.file
281            .read_exact(&mut header_buffer)
282            .context("读取 WIM 文件头失败")?;
283
284        let header = self.parse_header_buffer(&header_buffer)?;
285
286        // 验证签名
287        if &header.signature != b"MSWIM\x00\x00\x00" {
288            return Err(anyhow::anyhow!("无效的 WIM 文件签名"));
289        }
290
291        info!(
292            "成功读取 WIM 文件头 - 版本: {}, 镜像数: {}",
293            header.format_version, header.image_count
294        );
295
296        self.header = Some(header);
297        Ok(self.header.as_ref().unwrap())
298    }
299
300    /// 解析文件头缓冲区
301    fn parse_header_buffer(&self, buffer: &[u8]) -> Result<WimHeader> {
302        use std::convert::TryInto;
303
304        // 辅助函数：从缓冲区读取 little-endian 数值
305        let read_u32_le = |offset: usize| -> u32 {
306            u32::from_le_bytes(buffer[offset..offset + 4].try_into().unwrap())
307        };
308
309        let read_u16_le = |offset: usize| -> u16 {
310            u16::from_le_bytes(buffer[offset..offset + 2].try_into().unwrap())
311        };
312
313        let read_u64_le = |offset: usize| -> u64 {
314            u64::from_le_bytes(buffer[offset..offset + 8].try_into().unwrap())
315        };
316
317        // 解析文件资源条目
318        let parse_resource_entry = |offset: usize| -> FileResourceEntry {
319            // 读取 7 字节的大小 + 1 字节标志
320            let size_bytes = &buffer[offset..offset + 7];
321            let mut size_array = [0u8; 8];
322            size_array[..7].copy_from_slice(size_bytes);
323            let size = u64::from_le_bytes(size_array);
324
325            let flags = buffer[offset + 7];
326            let offset_val = read_u64_le(offset + 8);
327            let original_size = read_u64_le(offset + 16);
328
329            FileResourceEntry {
330                size,
331                flags,
332                offset: offset_val,
333                original_size,
334            }
335        };
336
337        // 解析文件头各个字段
338        let mut signature = [0u8; 8];
339        signature.copy_from_slice(&buffer[0..8]);
340
341        let header = WimHeader {
342            signature,
343            header_size: read_u32_le(8),
344            format_version: read_u32_le(12),
345            file_flags: read_u32_le(16),
346            compressed_size: read_u32_le(20),
347            guid: buffer[24..40].try_into().unwrap(),
348            segment_number: read_u16_le(40),
349            total_segments: read_u16_le(42),
350            image_count: read_u32_le(44),
351            offset_table_resource: parse_resource_entry(48),
352            xml_data_resource: parse_resource_entry(72),
353            boot_metadata_resource: parse_resource_entry(96),
354            bootable_image_index: read_u32_le(120),
355            integrity_resource: parse_resource_entry(124),
356        };
357
358        debug!(
359            "解析 WIM 头部完成 - 镜像数: {}, 文件标志: 0x{:08X}",
360            header.image_count, header.file_flags
361        );
362
363        Ok(header)
364    }
365
366    /// 读取并解析 XML 数据
367    pub fn read_xml_data(&mut self) -> Result<()> {
368        // 确保文件头已读取
369        if self.header.is_none() {
370            self.read_header()?;
371        }
372
373        let header = self.header.as_ref().unwrap();
374
375        // 检查 XML 数据资源是否存在
376        if header.xml_data_resource.size == 0 {
377            return Err(anyhow::anyhow!("WIM 文件中没有 XML 数据资源"));
378        }
379
380        debug!(
381            "开始读取 XML 数据，偏移: {}, 大小: {}",
382            header.xml_data_resource.offset, header.xml_data_resource.size
383        );
384
385        // 跳转到 XML 数据位置
386        self.file
387            .seek(SeekFrom::Start(header.xml_data_resource.offset))?;
388
389        // 读取 XML 数据
390        let mut xml_buffer = vec![0u8; header.xml_data_resource.size as usize];
391        self.file
392            .read_exact(&mut xml_buffer)
393            .context("读取 XML 数据失败")?;
394
395        // 解析 XML 数据
396        self.parse_xml_data(&xml_buffer)?;
397
398        info!("成功解析 {} 个镜像的信息", self.images.len());
399        Ok(())
400    }
401
402    /// 解析 XML 数据
403    fn parse_xml_data(&mut self, xml_buffer: &[u8]) -> Result<()> {
404        // XML 数据以 UTF-16 LE BOM 开始
405        if xml_buffer.len() < 2 {
406            return Err(anyhow::anyhow!("XML 数据太短"));
407        }
408
409        // 检查 BOM (0xFEFF)
410        if xml_buffer[0] != 0xFF || xml_buffer[1] != 0xFE {
411            return Err(anyhow::anyhow!("无效的 XML 数据 BOM"));
412        }
413
414        // 将 UTF-16 LE 转换为 UTF-8
415        let xml_utf16_data = &xml_buffer[2..]; // 跳过 BOM
416
417        // 确保数据长度为偶数（UTF-16 每个字符 2 字节）
418        if xml_utf16_data.len() % 2 != 0 {
419            return Err(anyhow::anyhow!("XML UTF-16 数据长度不是偶数"));
420        }
421
422        // 转换为 u16 数组
423        let mut utf16_chars = Vec::new();
424        for chunk in xml_utf16_data.chunks_exact(2) {
425            let char_val = u16::from_le_bytes([chunk[0], chunk[1]]);
426            utf16_chars.push(char_val);
427        }
428
429        // 转换为 UTF-8 字符串
430        let xml_string = String::from_utf16(&utf16_chars).context("无法将 XML 数据转换为 UTF-8")?;
431
432        debug!("XML 数据长度: {} 字符", xml_string.len());
433
434        // 解析 XML 镜像信息
435        self.parse_xml_images(&xml_string)?;
436
437        Ok(())
438    }
439
440    /// 优化的XML解析函数 - 使用proper XML parser和高效UTF-16解码
441    fn parse_xml_data_optimized(&mut self, xml_buffer: &[u8]) -> Result<()> {
442        // 检查基本格式
443        if xml_buffer.len() < 2 {
444            return Err(anyhow::anyhow!("XML 数据太短"));
445        }
446
447        // 检查 BOM (0xFEFF)
448        if xml_buffer[0] != 0xFF || xml_buffer[1] != 0xFE {
449            return Err(anyhow::anyhow!("无效的 XML 数据 BOM"));
450        }
451
452        // 使用encoding_rs进行高效UTF-16解码
453        let (xml_string, _, had_errors) = UTF_16LE.decode(&xml_buffer[2..]);
454        if had_errors {
455            return Err(anyhow::anyhow!("UTF-16解码过程中发现错误"));
456        }
457
458        debug!("XML 数据长度: {} 字符", xml_string.len());
459
460        // 使用quick-xml进行解析
461        self.parse_xml_images_optimized(&xml_string)?;
462
463        Ok(())
464    }
465
466    /// 优化的XML镜像解析函数 - 使用quick-xml
467    fn parse_xml_images_optimized(&mut self, xml_content: &str) -> Result<()> {
468        self.images.clear();
469
470        let mut reader = Reader::from_str(xml_content);
471        reader.config_mut().trim_text(true);
472
473        let mut current_image: Option<ImageInfo> = None;
474        let mut current_tag = String::new();
475        let mut in_windows_section = false;
476
477        loop {
478            match reader.read_event() {
479                Ok(Event::Start(ref e)) => {
480                    match e.name().as_ref() {
481                        b"IMAGE" => {
482                            // 提取INDEX属性
483                            for attr in e.attributes().flatten() {
484                                if attr.key.as_ref() == b"INDEX" {
485                                    if let Ok(index_str) = std::str::from_utf8(&attr.value) {
486                                        if let Ok(index) = index_str.parse::<u32>() {
487                                            current_image = Some(ImageInfo::new_with_index(index));
488                                        }
489                                    }
490                                }
491                            }
492                        }
493                        b"WINDOWS" => {
494                            in_windows_section = true;
495                        }
496                        tag => {
497                            current_tag = String::from_utf8_lossy(tag).into_owned();
498                        }
499                    }
500                }
501                Ok(Event::Text(e)) => {
502                    if let Some(ref mut image) = current_image {
503                        // 获取文本内容
504                        let text = std::str::from_utf8(&e)?;
505
506                        // 特殊处理WINDOWS节中的ARCH标签
507                        if in_windows_section && current_tag == "ARCH" {
508                            image.set_field("ARCH", text);
509                        } else if !in_windows_section {
510                            // 其他标签在非WINDOWS节中处理
511                            image.set_field(&current_tag, text);
512                        }
513                    }
514                }
515                Ok(Event::End(ref e)) => {
516                    match e.name().as_ref() {
517                        b"IMAGE" => {
518                            if let Some(mut image) = current_image.take() {
519                                // 推断版本和架构信息（如果尚未设置）
520                                image.infer_version_and_arch();
521                                self.images.push(image);
522                            }
523                        }
524                        b"WINDOWS" => {
525                            in_windows_section = false;
526                        }
527                        _ => {}
528                    }
529                }
530                Ok(Event::Eof) => break,
531                Err(e) => return Err(anyhow::anyhow!("XML解析错误: {}", e)),
532                _ => {}
533            }
534        }
535
536        info!("优化解析完成：成功解析 {} 个镜像的信息", self.images.len());
537        Ok(())
538    }
539
540    /// 解析 XML 中的镜像信息
541    fn parse_xml_images(&mut self, xml_content: &str) -> Result<()> {
542        // 简单的 XML 解析（基于字符串匹配）
543        // 在实际生产环境中，建议使用专门的 XML 解析库
544
545        self.images.clear();
546
547        // 查找所有 <IMAGE> 标签
548        let mut start_pos = 0;
549        while let Some(image_start) = xml_content[start_pos..].find("<IMAGE") {
550            let absolute_start = start_pos + image_start;
551
552            // 查找对应的 </IMAGE> 标签
553            if let Some(image_end) = xml_content[absolute_start..].find("</IMAGE>") {
554                let absolute_end = absolute_start + image_end + 8; // 包含 </IMAGE>
555                let image_xml = &xml_content[absolute_start..absolute_end];
556
557                // 解析单个镜像信息
558                if let Ok(image_info) = self.parse_single_image_xml(image_xml) {
559                    self.images.push(image_info);
560                }
561
562                start_pos = absolute_end;
563            } else {
564                break;
565            }
566        }
567
568        Ok(())
569    }
570
571    /// 解析单个镜像的 XML 信息
572    pub fn parse_single_image_xml(&self, image_xml: &str) -> Result<ImageInfo> {
573        // 辅助函数：从 XML 中提取标签值
574        let extract_tag_value = |xml: &str, tag: &str| -> Option<String> {
575            let start_tag = format!("<{tag}>");
576            let end_tag = format!("</{tag}>");
577
578            if let Some(start) = xml.find(&start_tag) {
579                if let Some(end) = xml.find(&end_tag) {
580                    let value_start = start + start_tag.len();
581                    if value_start < end {
582                        return Some(xml[value_start..end].trim().to_string());
583                    }
584                }
585            }
586            None
587        };
588
589        // 提取 INDEX 属性
590        let index = if let Some(index_start) = image_xml.find("INDEX=\"") {
591            let index_value_start = index_start + 7; // "INDEX=\"".len()
592            if let Some(index_end) = image_xml[index_value_start..].find("\"") {
593                let index_str = &image_xml[index_value_start..index_value_start + index_end];
594                index_str.parse().unwrap_or(0)
595            } else {
596                0
597            }
598        } else {
599            0
600        };
601
602        // 提取各种信息
603        let name =
604            extract_tag_value(image_xml, "DISPLAYNAME").unwrap_or_else(|| format!("Image {index}"));
605        let description = extract_tag_value(image_xml, "DISPLAYDESCRIPTION")
606            .unwrap_or_else(|| "Unknown".to_string());
607        let dir_count = extract_tag_value(image_xml, "DIRCOUNT")
608            .and_then(|s| s.parse().ok())
609            .unwrap_or(0);
610        let file_count = extract_tag_value(image_xml, "FILECOUNT")
611            .and_then(|s| s.parse().ok())
612            .unwrap_or(0);
613        let total_bytes = extract_tag_value(image_xml, "TOTALBYTES")
614            .and_then(|s| s.parse().ok())
615            .unwrap_or(0);
616
617        // 尝试从XML中的ARCH标签解析架构信息
618        let arch_from_xml = self.parse_arch_from_xml(image_xml);
619
620        // 从名称中提取版本信息，架构信息优先使用XML中的ARCH标签
621        let (version, arch_from_name) = self.extract_version_and_arch(&name, &description);
622        let architecture = arch_from_xml.or(arch_from_name);
623
624        let image_info = ImageInfo {
625            index,
626            name,
627            description,
628            dir_count,
629            file_count,
630            total_bytes,
631            creation_time: None,          // 可以进一步解析 CREATIONTIME
632            last_modification_time: None, // 可以进一步解析 LASTMODIFICATIONTIME
633            version,
634            architecture,
635        };
636
637        debug!(
638            "解析镜像信息: {} - {} - {} - {:#?}",
639            image_info.index, image_info.name, image_info.description, image_info.architecture
640        );
641
642        Ok(image_info)
643    }
644
645    /// 从镜像名称和描述中提取版本和架构信息
646    fn extract_version_and_arch(
647        &self,
648        name: &str,
649        description: &str,
650    ) -> (Option<String>, Option<String>) {
651        let combined_text = format!("{name} {description}").to_lowercase();
652
653        // 提取版本信息
654        let version = if combined_text.contains("windows 11") {
655            Some("Windows 11".to_string())
656        } else if combined_text.contains("windows 10") {
657            Some("Windows 10".to_string())
658        } else if combined_text.contains("windows server 2022") {
659            Some("Windows Server 2022".to_string())
660        } else if combined_text.contains("windows server 2019") {
661            Some("Windows Server 2019".to_string())
662        } else if combined_text.contains("windows server") {
663            Some("Windows Server".to_string())
664        } else if combined_text.contains("windows") {
665            Some("Windows".to_string())
666        } else {
667            None
668        };
669
670        // 提取架构信息
671        let architecture = if combined_text.contains("x64") || combined_text.contains("amd64") {
672            Some("x64".to_string())
673        } else if combined_text.contains("x86") {
674            Some("x86".to_string())
675        } else if combined_text.contains("arm64") {
676            Some("ARM64".to_string())
677        } else {
678            None
679        };
680
681        (version, architecture)
682    }
683
684    /// 从XML中的ARCH标签解析架构信息
685    pub fn parse_arch_from_xml(&self, image_xml: &str) -> Option<String> {
686        // 辅助函数：从 XML 中提取标签值
687        let extract_tag_value = |xml: &str, tag: &str| -> Option<String> {
688            let start_tag = format!("<{tag}>");
689            let end_tag = format!("</{tag}>");
690
691            if let Some(start) = xml.find(&start_tag) {
692                if let Some(end) = xml.find(&end_tag) {
693                    let value_start = start + start_tag.len();
694                    if value_start < end {
695                        return Some(xml[value_start..end].trim().to_string());
696                    }
697                }
698            }
699            None
700        };
701
702        // 提取ARCH标签值
703        if let Some(arch_value) = extract_tag_value(image_xml, "ARCH") {
704            match arch_value.as_str() {
705                "0" => Some("x86".to_string()),
706                "9" => Some("x64".to_string()),
707                "5" => Some("ARM".to_string()),
708                "12" => Some("ARM64".to_string()),
709                _ => {
710                    debug!("未知的架构值: {}", arch_value);
711                    None
712                }
713            }
714        } else {
715            None
716        }
717    }
718
719    /// 获取所有镜像信息
720    pub fn get_images(&self) -> &[ImageInfo] {
721        &self.images
722    }
723
724    /// 获取指定索引的镜像信息
725    #[allow(dead_code)]
726    pub fn get_image(&self, index: u32) -> Option<&ImageInfo> {
727        self.images.iter().find(|img| img.index == index)
728    }
729
730    /// 获取文件头信息
731    #[allow(dead_code)]
732    pub fn get_header(&self) -> Option<&WimHeader> {
733        self.header.as_ref()
734    }
735
736    /// 检查是否包含多个镜像
737    #[allow(dead_code)]
738    pub fn has_multiple_images(&self) -> bool {
739        self.header
740            .as_ref()
741            .map(|h| h.image_count > 1)
742            .unwrap_or(false)
743    }
744
745    /// 获取镜像数量
746    #[allow(dead_code)]
747    pub fn get_image_count(&self) -> u32 {
748        self.header.as_ref().map(|h| h.image_count).unwrap_or(0)
749    }
750
751    /// 检查是否为压缩文件
752    #[allow(dead_code)]
753    pub fn is_compressed(&self) -> bool {
754        self.header
755            .as_ref()
756            .map(|h| h.file_flags & FileFlags::COMPRESSION != 0)
757            .unwrap_or(false)
758    }
759
760    /// 获取压缩类型
761    #[allow(dead_code)]
762    pub fn get_compression_type(&self) -> Option<&'static str> {
763        if let Some(header) = &self.header {
764            if header.file_flags & FileFlags::COMPRESS_XPRESS != 0 {
765                Some("XPRESS")
766            } else if header.file_flags & FileFlags::COMPRESS_LZX != 0 {
767                Some("LZX")
768            } else if header.file_flags & FileFlags::COMPRESSION != 0 {
769                Some("Unknown")
770            } else {
771                None
772            }
773        } else {
774            None
775        }
776    }
777
778    /// 完整解析 WIM 文件（头部 + XML 数据）
779    pub fn parse_full(&mut self) -> Result<()> {
780        self.read_header()?;
781        self.read_xml_data()?;
782        Ok(())
783    }
784}
785
786impl std::fmt::Display for ImageInfo {
787    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
788        write!(f, "镜像 {} - {}", self.index, self.name)?;
789        if let Some(ref version) = self.version {
790            write!(f, " [{version}]")?;
791        }
792        if let Some(ref arch) = self.architecture {
793            write!(f, " [{arch}]")?;
794        }
795        write!(f, " | 描述: {}", self.description)?;
796        write!(
797            f,
798            " | 文件数: {}, 目录数: {}",
799            self.file_count, self.dir_count
800        )?;
801        write!(f, " | 总大小: {} MB", self.total_bytes / (1024 * 1024))?;
802        Ok(())
803    }
804}
805
806impl std::fmt::Display for WimHeader {
807    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
808        writeln!(f, "WIM Header:")?;
809        writeln!(f, "  Format Version: {}", self.format_version)?;
810        writeln!(f, "  File Flags: 0x{:08X}", self.file_flags)?;
811        writeln!(f, "  Image Count: {}", self.image_count)?;
812        writeln!(
813            f,
814            "  Segment: {}/{}",
815            self.segment_number, self.total_segments
816        )?;
817        writeln!(f, "  Bootable Image Index: {}", self.bootable_image_index)?;
818        Ok(())
819    }
820}
821
822#[allow(dead_code)]
823impl WimParser {
824    /// 获取所有镜像的版本摘要
825    #[allow(dead_code)]
826    pub fn get_version_summary(&self) -> Vec<String> {
827        let mut summaries = Vec::new();
828
829        for image in &self.images {
830            let mut summary = format!("镜像 {}: {}", image.index, image.name);
831
832            if let Some(ref version) = image.version {
833                summary.push_str(&format!(" ({version})"));
834            }
835
836            if let Some(ref arch) = image.architecture {
837                summary.push_str(&format!(" [{arch}]"));
838            }
839
840            summaries.push(summary);
841        }
842
843        summaries
844    }
845
846    /// 获取主要版本信息（如果有多个镜像，返回最常见的版本）
847    pub fn get_primary_version(&self) -> Option<String> {
848        if self.images.is_empty() {
849            return None;
850        }
851
852        // 统计版本出现频率
853        let mut version_counts = std::collections::HashMap::new();
854        for image in &self.images {
855            if let Some(ref version) = image.version {
856                *version_counts.entry(version.clone()).or_insert(0) += 1;
857            }
858        }
859
860        // 找到最常见的版本
861        version_counts
862            .into_iter()
863            .max_by_key(|(_, count)| *count)
864            .map(|(version, _)| version)
865    }
866
867    /// 获取主要架构信息（如果有多个镜像，返回最常见的架构）
868    pub fn get_primary_architecture(&self) -> Option<String> {
869        if self.images.is_empty() {
870            return None;
871        }
872
873        // 统计架构出现频率
874        let mut arch_counts = std::collections::HashMap::new();
875        for image in &self.images {
876            if let Some(ref arch) = image.architecture {
877                *arch_counts.entry(arch.clone()).or_insert(0) += 1;
878            }
879        }
880
881        // 找到最常见的架构
882        arch_counts
883            .into_iter()
884            .max_by_key(|(_, count)| *count)
885            .map(|(arch, _)| arch)
886    }
887
888    /// 检查是否包含指定版本的镜像
889    #[allow(dead_code)]
890    pub fn has_version(&self, version: &str) -> bool {
891        self.images.iter().any(|img| {
892            img.version
893                .as_ref()
894                .is_some_and(|v| v.to_lowercase().contains(&version.to_lowercase()))
895        })
896    }
897
898    /// 检查是否包含指定架构的镜像
899    #[allow(dead_code)]
900    pub fn has_architecture(&self, arch: &str) -> bool {
901        self.images.iter().any(|img| {
902            img.architecture
903                .as_ref()
904                .is_some_and(|a| a.to_lowercase().contains(&arch.to_lowercase()))
905        })
906    }
907
908    /// 获取Windows版本的详细信息
909    pub fn get_windows_info(&self) -> Option<WindowsInfo> {
910        let primary_version = self.get_primary_version()?;
911        let primary_arch = self.get_primary_architecture()?;
912
913        // 检查是否是Windows镜像
914        if !primary_version.to_lowercase().contains("windows") {
915            return None;
916        }
917
918        // 计算总的镜像版本（如Pro, Home, Enterprise等）
919        let mut editions = Vec::new();
920        for image in &self.images {
921            let name_lower = image.name.to_lowercase();
922            if name_lower.contains("pro") && !editions.contains(&"Pro".to_string()) {
923                editions.push("Pro".to_string());
924            } else if name_lower.contains("home") && !editions.contains(&"Home".to_string()) {
925                editions.push("Home".to_string());
926            } else if name_lower.contains("enterprise")
927                && !editions.contains(&"Enterprise".to_string())
928            {
929                editions.push("Enterprise".to_string());
930            } else if name_lower.contains("education")
931                && !editions.contains(&"Education".to_string())
932            {
933                editions.push("Education".to_string());
934            }
935        }
936
937        Some(WindowsInfo {
938            version: primary_version,
939            architecture: primary_arch,
940            editions,
941            image_count: self.images.len() as u32,
942            total_size: self.images.iter().map(|img| img.total_bytes).sum(),
943        })
944    }
945}
946
947/// Windows 版本信息摘要
948#[derive(Debug, Clone)]
949pub struct WindowsInfo {
950    pub version: String,
951    pub architecture: String,
952    pub editions: Vec<String>,
953    pub image_count: u32,
954    pub total_size: u64,
955}
956
957impl std::fmt::Display for WindowsInfo {
958    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
959        write!(f, "{} ({})", self.version, self.architecture)?;
960        if !self.editions.is_empty() {
961            write!(f, " - 版本: {}", self.editions.join(", "))?;
962        }
963        write!(f, " | 镜像数量: {}", self.image_count)?;
964        write!(f, " | 总大小: {} MB", self.total_size / (1024 * 1024))?;
965        Ok(())
966    }
967}
968
969// 基准测试和测试辅助函数
970#[cfg(any(test, feature = "benchmarking"))]
971impl WimParser {
972    /// 测试用：直接解析XML数据（当前实现）
973    pub fn parse_xml_data_for_bench(&mut self, xml_buffer: &[u8]) -> Result<()> {
974        self.parse_xml_data(xml_buffer)
975    }
976
977    /// 测试用：直接解析XML数据（优化实现）
978    pub fn parse_xml_data_optimized_for_bench(&mut self, xml_buffer: &[u8]) -> Result<()> {
979        self.parse_xml_data_optimized(xml_buffer)
980    }
981
982    /// 测试用：切换到优化解析模式
983    pub fn use_optimized_parsing(&mut self, xml_buffer: &[u8]) -> Result<()> {
984        self.parse_xml_data_optimized(xml_buffer)
985    }
986}
wim_parser/lib.rs

wim_parser/
lib.rs