Skip to main content

provenant/parsers/
microsoft_update_manifest.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Microsoft Update Manifest (.mum) files.
5//!
6//! Extracts Windows Update package metadata from .mum XML manifest files.
7//!
8//! # Supported Formats
9//! - `*.mum` - Microsoft Update Manifest XML files
10//!
11//! # Implementation Notes
12//! - Format: XML with assembly and package metadata
13//! - Spec: Windows Update manifests
14
15use crate::models::{DatasourceId, PackageType, Party};
16use std::path::Path;
17
18use crate::parser_warn as warn;
19use quick_xml::events::Event;
20use quick_xml::reader::Reader;
21
22use crate::models::PackageData;
23use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
24
25use super::PackageParser;
26
27const PACKAGE_TYPE: PackageType = PackageType::WindowsUpdate;
28
29pub struct MicrosoftUpdateManifestParser;
30
31impl PackageParser for MicrosoftUpdateManifestParser {
32    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
33
34    fn is_match(path: &Path) -> bool {
35        path.extension().is_some_and(|ext| ext == "mum")
36    }
37
38    fn extract_packages(path: &Path) -> Vec<PackageData> {
39        let content = match read_file_to_string(path, None) {
40            Ok(c) => c,
41            Err(e) => {
42                warn!("Failed to read .mum file {:?}: {}", path, e);
43                return vec![PackageData {
44                    package_type: Some(PACKAGE_TYPE),
45                    datasource_id: Some(DatasourceId::MicrosoftUpdateManifestMum),
46                    ..Default::default()
47                }];
48            }
49        };
50
51        vec![parse_mum_xml(&content)]
52    }
53}
54
55pub(crate) fn parse_mum_xml(content: &str) -> PackageData {
56    let mut reader = Reader::from_str(content);
57    reader.config_mut().trim_text(true);
58
59    let mut company = None;
60    let mut name = None;
61    let mut version = None;
62    let mut description = None;
63    let mut copyright = None;
64    let mut homepage_url = None;
65
66    let mut buf = Vec::new();
67    let mut iteration_count: usize = 0;
68
69    loop {
70        iteration_count += 1;
71        if iteration_count > MAX_ITERATION_COUNT {
72            warn!(
73                "Exceeded MAX_ITERATION_COUNT ({}) parsing .mum XML, stopping",
74                MAX_ITERATION_COUNT
75            );
76            break;
77        }
78        match reader.read_event_into(&mut buf) {
79            Ok(Event::Empty(e)) if e.name().as_ref() == b"assemblyIdentity" => {
80                for attr in e.attributes().filter_map(|a| a.ok()) {
81                    match attr.key.as_ref() {
82                        b"name" => {
83                            let raw = attr.value.to_vec();
84                            let has_invalid = String::from_utf8(raw.clone()).is_err();
85                            let val = String::from_utf8_lossy(&raw).into_owned();
86                            if has_invalid {
87                                warn!("Invalid UTF-8 in 'name' attribute, using lossy conversion");
88                            }
89                            if name.is_none() {
90                                name = Some(truncate_field(val));
91                            }
92                        }
93                        b"version" => {
94                            let raw = attr.value.to_vec();
95                            let has_invalid = String::from_utf8(raw.clone()).is_err();
96                            let val = String::from_utf8_lossy(&raw).into_owned();
97                            if has_invalid {
98                                warn!(
99                                    "Invalid UTF-8 in 'version' attribute, using lossy conversion"
100                                );
101                            }
102                            if version.is_none() {
103                                version = Some(truncate_field(val));
104                            }
105                        }
106                        _ => {}
107                    }
108                }
109            }
110            Ok(Event::Start(e)) if e.name().as_ref() == b"assembly" => {
111                for attr in e.attributes().filter_map(|a| a.ok()) {
112                    match attr.key.as_ref() {
113                        b"description" => {
114                            let raw = attr.value.to_vec();
115                            let has_invalid = String::from_utf8(raw.clone()).is_err();
116                            let val = String::from_utf8_lossy(&raw).into_owned();
117                            if has_invalid {
118                                warn!(
119                                    "Invalid UTF-8 in 'description' attribute, using lossy conversion"
120                                );
121                            }
122                            description = Some(truncate_field(val));
123                        }
124                        b"company" => {
125                            let raw = attr.value.to_vec();
126                            let has_invalid = String::from_utf8(raw.clone()).is_err();
127                            let val = String::from_utf8_lossy(&raw).into_owned();
128                            if has_invalid {
129                                warn!(
130                                    "Invalid UTF-8 in 'company' attribute, using lossy conversion"
131                                );
132                            }
133                            company = Some(truncate_field(val));
134                        }
135                        b"copyright" => {
136                            let raw = attr.value.to_vec();
137                            let has_invalid = String::from_utf8(raw.clone()).is_err();
138                            let val = String::from_utf8_lossy(&raw).into_owned();
139                            if has_invalid {
140                                warn!(
141                                    "Invalid UTF-8 in 'copyright' attribute, using lossy conversion"
142                                );
143                            }
144                            copyright = Some(truncate_field(val));
145                        }
146                        b"supportInformation" => {
147                            let raw = attr.value.to_vec();
148                            let has_invalid = String::from_utf8(raw.clone()).is_err();
149                            let val = String::from_utf8_lossy(&raw).into_owned();
150                            if has_invalid {
151                                warn!(
152                                    "Invalid UTF-8 in 'supportInformation' attribute, using lossy conversion"
153                                );
154                            }
155                            homepage_url = Some(truncate_field(val));
156                        }
157                        _ => {}
158                    }
159                }
160            }
161            Ok(Event::Eof) => break,
162            Err(e) => {
163                warn!(
164                    "Error parsing XML at position {}: {}",
165                    reader.buffer_position(),
166                    e
167                );
168                break;
169            }
170            _ => {}
171        }
172        buf.clear();
173    }
174
175    let parties = company.clone().map_or_else(Vec::new, |company_name| {
176        vec![Party {
177            r#type: Some("organization".to_string()),
178            role: Some("owner".to_string()),
179            name: Some(company_name),
180            email: None,
181            url: None,
182            organization: None,
183            organization_url: None,
184            timezone: None,
185        }]
186    });
187
188    PackageData {
189        package_type: Some(PACKAGE_TYPE),
190        name,
191        version,
192        description,
193        parties,
194        homepage_url,
195        copyright,
196        holder: company,
197        datasource_id: Some(DatasourceId::MicrosoftUpdateManifestMum),
198        ..Default::default()
199    }
200}
201
202crate::register_parser!(
203    "Microsoft Update Manifest .mum file",
204    &["*.mum"],
205    "windows-update",
206    "",
207    None,
208);