Skip to main content

provenant/parsers/
microsoft_update_manifest.rs

1//! Parser for Microsoft Update Manifest (.mum) files.
2//!
3//! Extracts Windows Update package metadata from .mum XML manifest files.
4//!
5//! # Supported Formats
6//! - `*.mum` - Microsoft Update Manifest XML files
7//!
8//! # Implementation Notes
9//! - Format: XML with assembly and package metadata
10//! - Spec: Windows Update manifests
11
12use crate::models::{DatasourceId, PackageType};
13use std::path::Path;
14
15use crate::parser_warn as warn;
16use quick_xml::events::Event;
17use quick_xml::reader::Reader;
18
19use crate::models::PackageData;
20use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
21
22use super::PackageParser;
23
24const PACKAGE_TYPE: PackageType = PackageType::WindowsUpdate;
25
26pub struct MicrosoftUpdateManifestParser;
27
28impl PackageParser for MicrosoftUpdateManifestParser {
29    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
30
31    fn is_match(path: &Path) -> bool {
32        path.extension().is_some_and(|ext| ext == "mum")
33    }
34
35    fn extract_packages(path: &Path) -> Vec<PackageData> {
36        let content = match read_file_to_string(path, None) {
37            Ok(c) => c,
38            Err(e) => {
39                warn!("Failed to read .mum file {:?}: {}", path, e);
40                return vec![PackageData {
41                    package_type: Some(PACKAGE_TYPE),
42                    datasource_id: Some(DatasourceId::MicrosoftUpdateManifestMum),
43                    ..Default::default()
44                }];
45            }
46        };
47
48        vec![parse_mum_xml(&content)]
49    }
50}
51
52pub(crate) fn parse_mum_xml(content: &str) -> PackageData {
53    let mut reader = Reader::from_str(content);
54    reader.config_mut().trim_text(true);
55
56    let mut name = None;
57    let mut version = None;
58    let mut description = None;
59    let mut copyright = None;
60    let mut homepage_url = None;
61
62    let mut buf = Vec::new();
63    let mut iteration_count: usize = 0;
64
65    loop {
66        iteration_count += 1;
67        if iteration_count > MAX_ITERATION_COUNT {
68            warn!(
69                "Exceeded MAX_ITERATION_COUNT ({}) parsing .mum XML, stopping",
70                MAX_ITERATION_COUNT
71            );
72            break;
73        }
74        match reader.read_event_into(&mut buf) {
75            Ok(Event::Empty(e)) => {
76                if e.name().as_ref() == b"assemblyIdentity" {
77                    for attr in e.attributes().filter_map(|a| a.ok()) {
78                        match attr.key.as_ref() {
79                            b"name" => {
80                                let raw = attr.value.to_vec();
81                                let has_invalid = String::from_utf8(raw.clone()).is_err();
82                                let val = String::from_utf8_lossy(&raw).into_owned();
83                                if has_invalid {
84                                    warn!(
85                                        "Invalid UTF-8 in 'name' attribute, using lossy conversion"
86                                    );
87                                }
88                                name = Some(truncate_field(val));
89                            }
90                            b"version" => {
91                                let raw = attr.value.to_vec();
92                                let has_invalid = String::from_utf8(raw.clone()).is_err();
93                                let val = String::from_utf8_lossy(&raw).into_owned();
94                                if has_invalid {
95                                    warn!(
96                                        "Invalid UTF-8 in 'version' attribute, using lossy conversion"
97                                    );
98                                }
99                                version = Some(truncate_field(val));
100                            }
101                            _ => {}
102                        }
103                    }
104                }
105            }
106            Ok(Event::Start(e)) => {
107                if e.name().as_ref() == b"assembly" {
108                    for attr in e.attributes().filter_map(|a| a.ok()) {
109                        match attr.key.as_ref() {
110                            b"description" => {
111                                let raw = attr.value.to_vec();
112                                let has_invalid = String::from_utf8(raw.clone()).is_err();
113                                let val = String::from_utf8_lossy(&raw).into_owned();
114                                if has_invalid {
115                                    warn!(
116                                        "Invalid UTF-8 in 'description' attribute, using lossy conversion"
117                                    );
118                                }
119                                description = Some(truncate_field(val));
120                            }
121                            b"copyright" => {
122                                let raw = attr.value.to_vec();
123                                let has_invalid = String::from_utf8(raw.clone()).is_err();
124                                let val = String::from_utf8_lossy(&raw).into_owned();
125                                if has_invalid {
126                                    warn!(
127                                        "Invalid UTF-8 in 'copyright' attribute, using lossy conversion"
128                                    );
129                                }
130                                copyright = Some(truncate_field(val));
131                            }
132                            b"supportInformation" => {
133                                let raw = attr.value.to_vec();
134                                let has_invalid = String::from_utf8(raw.clone()).is_err();
135                                let val = String::from_utf8_lossy(&raw).into_owned();
136                                if has_invalid {
137                                    warn!(
138                                        "Invalid UTF-8 in 'supportInformation' attribute, using lossy conversion"
139                                    );
140                                }
141                                homepage_url = Some(truncate_field(val));
142                            }
143                            _ => {}
144                        }
145                    }
146                }
147            }
148            Ok(Event::Eof) => break,
149            Err(e) => {
150                warn!(
151                    "Error parsing XML at position {}: {}",
152                    reader.buffer_position(),
153                    e
154                );
155                break;
156            }
157            _ => {}
158        }
159        buf.clear();
160    }
161
162    PackageData {
163        package_type: Some(PACKAGE_TYPE),
164        name,
165        version,
166        description,
167        homepage_url,
168        copyright,
169        datasource_id: Some(DatasourceId::MicrosoftUpdateManifestMum),
170        ..Default::default()
171    }
172}
173
174crate::register_parser!(
175    "Microsoft Update Manifest .mum file",
176    &["*.mum"],
177    "windows-update",
178    "",
179    None,
180);