Skip to main content

provenant/parsers/
microsoft_update_manifest.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Microsoft Update Manifest (.mum) files.
5//!
6//! Extracts Windows Update package metadata from .mum XML manifest files.
7//!
8//! # Supported Formats
9//! - `*.mum` - Microsoft Update Manifest XML files
10//!
11//! # Implementation Notes
12//! - Format: XML with assembly and package metadata
13//! - Spec: Windows Update manifests
14
15use crate::models::{DatasourceId, PackageType, Party};
16use std::path::Path;
17
18use crate::parser_warn as warn;
19use quick_xml::events::Event;
20use quick_xml::reader::Reader;
21
22use crate::models::PackageData;
23use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
24
25use super::PackageParser;
26use super::metadata::ParserMetadata;
27
28const PACKAGE_TYPE: PackageType = PackageType::WindowsUpdate;
29
30pub struct MicrosoftUpdateManifestParser;
31
32impl PackageParser for MicrosoftUpdateManifestParser {
33    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
34
35    fn metadata() -> Vec<ParserMetadata> {
36        vec![ParserMetadata {
37            description: "Microsoft Update Manifest .mum file",
38            file_patterns: &["*.mum"],
39            package_type: "windows-update",
40            primary_language: "",
41            documentation_url: None,
42        }]
43    }
44
45    fn is_match(path: &Path) -> bool {
46        path.extension().is_some_and(|ext| ext == "mum")
47    }
48
49    fn extract_packages(path: &Path) -> Vec<PackageData> {
50        let content = match read_file_to_string(path, None) {
51            Ok(c) => c,
52            Err(e) => {
53                warn!("Failed to read .mum file {:?}: {}", path, e);
54                return vec![PackageData {
55                    package_type: Some(PACKAGE_TYPE),
56                    datasource_id: Some(DatasourceId::MicrosoftUpdateManifestMum),
57                    ..Default::default()
58                }];
59            }
60        };
61
62        vec![parse_mum_xml(&content)]
63    }
64}
65
66pub(crate) fn parse_mum_xml(content: &str) -> PackageData {
67    let mut reader = Reader::from_str(content);
68    reader.config_mut().trim_text(true);
69
70    let mut company = None;
71    let mut name = None;
72    let mut version = None;
73    let mut description = None;
74    let mut copyright = None;
75    let mut homepage_url = None;
76
77    let mut buf = Vec::new();
78    let mut iteration_count: usize = 0;
79
80    loop {
81        iteration_count += 1;
82        if iteration_count > MAX_ITERATION_COUNT {
83            warn!(
84                "Exceeded MAX_ITERATION_COUNT ({}) parsing .mum XML, stopping",
85                MAX_ITERATION_COUNT
86            );
87            break;
88        }
89        match reader.read_event_into(&mut buf) {
90            Ok(Event::Empty(e)) if e.name().as_ref() == b"assemblyIdentity" => {
91                for attr in e.attributes().filter_map(|a| a.ok()) {
92                    match attr.key.as_ref() {
93                        b"name" => {
94                            let raw = attr.value.to_vec();
95                            let has_invalid = String::from_utf8(raw.clone()).is_err();
96                            let val = String::from_utf8_lossy(&raw).into_owned();
97                            if has_invalid {
98                                warn!("Invalid UTF-8 in 'name' attribute, using lossy conversion");
99                            }
100                            if name.is_none() {
101                                name = Some(truncate_field(val));
102                            }
103                        }
104                        b"version" => {
105                            let raw = attr.value.to_vec();
106                            let has_invalid = String::from_utf8(raw.clone()).is_err();
107                            let val = String::from_utf8_lossy(&raw).into_owned();
108                            if has_invalid {
109                                warn!(
110                                    "Invalid UTF-8 in 'version' attribute, using lossy conversion"
111                                );
112                            }
113                            if version.is_none() {
114                                version = Some(truncate_field(val));
115                            }
116                        }
117                        _ => {}
118                    }
119                }
120            }
121            Ok(Event::Start(e)) if e.name().as_ref() == b"assembly" => {
122                for attr in e.attributes().filter_map(|a| a.ok()) {
123                    match attr.key.as_ref() {
124                        b"description" => {
125                            let raw = attr.value.to_vec();
126                            let has_invalid = String::from_utf8(raw.clone()).is_err();
127                            let val = String::from_utf8_lossy(&raw).into_owned();
128                            if has_invalid {
129                                warn!(
130                                    "Invalid UTF-8 in 'description' attribute, using lossy conversion"
131                                );
132                            }
133                            description = Some(truncate_field(val));
134                        }
135                        b"company" => {
136                            let raw = attr.value.to_vec();
137                            let has_invalid = String::from_utf8(raw.clone()).is_err();
138                            let val = String::from_utf8_lossy(&raw).into_owned();
139                            if has_invalid {
140                                warn!(
141                                    "Invalid UTF-8 in 'company' attribute, using lossy conversion"
142                                );
143                            }
144                            company = Some(truncate_field(val));
145                        }
146                        b"copyright" => {
147                            let raw = attr.value.to_vec();
148                            let has_invalid = String::from_utf8(raw.clone()).is_err();
149                            let val = String::from_utf8_lossy(&raw).into_owned();
150                            if has_invalid {
151                                warn!(
152                                    "Invalid UTF-8 in 'copyright' attribute, using lossy conversion"
153                                );
154                            }
155                            copyright = Some(truncate_field(val));
156                        }
157                        b"supportInformation" => {
158                            let raw = attr.value.to_vec();
159                            let has_invalid = String::from_utf8(raw.clone()).is_err();
160                            let val = String::from_utf8_lossy(&raw).into_owned();
161                            if has_invalid {
162                                warn!(
163                                    "Invalid UTF-8 in 'supportInformation' attribute, using lossy conversion"
164                                );
165                            }
166                            homepage_url = Some(truncate_field(val));
167                        }
168                        _ => {}
169                    }
170                }
171            }
172            Ok(Event::Eof) => break,
173            Err(e) => {
174                warn!(
175                    "Error parsing XML at position {}: {}",
176                    reader.buffer_position(),
177                    e
178                );
179                break;
180            }
181            _ => {}
182        }
183        buf.clear();
184    }
185
186    let parties = company.clone().map_or_else(Vec::new, |company_name| {
187        vec![Party {
188            r#type: Some("organization".to_string()),
189            role: Some("owner".to_string()),
190            name: Some(company_name),
191            email: None,
192            url: None,
193            organization: None,
194            organization_url: None,
195            timezone: None,
196        }]
197    });
198
199    PackageData {
200        package_type: Some(PACKAGE_TYPE),
201        name,
202        version,
203        description,
204        parties,
205        homepage_url,
206        copyright,
207        holder: company,
208        datasource_id: Some(DatasourceId::MicrosoftUpdateManifestMum),
209        ..Default::default()
210    }
211}