provenant/parsers/
microsoft_update_manifest.rs1use crate::models::{DatasourceId, PackageType, Party};
16use std::path::Path;
17
18use crate::parser_warn as warn;
19use quick_xml::events::Event;
20use quick_xml::reader::Reader;
21
22use crate::models::PackageData;
23use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
24
25use super::PackageParser;
26use super::metadata::ParserMetadata;
27
28const PACKAGE_TYPE: PackageType = PackageType::WindowsUpdate;
29
30pub struct MicrosoftUpdateManifestParser;
31
32impl PackageParser for MicrosoftUpdateManifestParser {
33 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
34
35 fn metadata() -> Vec<ParserMetadata> {
36 vec![ParserMetadata {
37 description: "Microsoft Update Manifest .mum file",
38 file_patterns: &["*.mum"],
39 package_type: "windows-update",
40 primary_language: "",
41 documentation_url: None,
42 }]
43 }
44
45 fn is_match(path: &Path) -> bool {
46 path.extension().is_some_and(|ext| ext == "mum")
47 }
48
49 fn extract_packages(path: &Path) -> Vec<PackageData> {
50 let content = match read_file_to_string(path, None) {
51 Ok(c) => c,
52 Err(e) => {
53 warn!("Failed to read .mum file {:?}: {}", path, e);
54 return vec![PackageData {
55 package_type: Some(PACKAGE_TYPE),
56 datasource_id: Some(DatasourceId::MicrosoftUpdateManifestMum),
57 ..Default::default()
58 }];
59 }
60 };
61
62 vec![parse_mum_xml(&content)]
63 }
64}
65
66pub(crate) fn parse_mum_xml(content: &str) -> PackageData {
67 let mut reader = Reader::from_str(content);
68 reader.config_mut().trim_text(true);
69
70 let mut company = None;
71 let mut name = None;
72 let mut version = None;
73 let mut description = None;
74 let mut copyright = None;
75 let mut homepage_url = None;
76
77 let mut buf = Vec::new();
78 let mut iteration_count: usize = 0;
79
80 loop {
81 iteration_count += 1;
82 if iteration_count > MAX_ITERATION_COUNT {
83 warn!(
84 "Exceeded MAX_ITERATION_COUNT ({}) parsing .mum XML, stopping",
85 MAX_ITERATION_COUNT
86 );
87 break;
88 }
89 match reader.read_event_into(&mut buf) {
90 Ok(Event::Empty(e)) if e.name().as_ref() == b"assemblyIdentity" => {
91 for attr in e.attributes().filter_map(|a| a.ok()) {
92 match attr.key.as_ref() {
93 b"name" => {
94 let raw = attr.value.to_vec();
95 let has_invalid = String::from_utf8(raw.clone()).is_err();
96 let val = String::from_utf8_lossy(&raw).into_owned();
97 if has_invalid {
98 warn!("Invalid UTF-8 in 'name' attribute, using lossy conversion");
99 }
100 if name.is_none() {
101 name = Some(truncate_field(val));
102 }
103 }
104 b"version" => {
105 let raw = attr.value.to_vec();
106 let has_invalid = String::from_utf8(raw.clone()).is_err();
107 let val = String::from_utf8_lossy(&raw).into_owned();
108 if has_invalid {
109 warn!(
110 "Invalid UTF-8 in 'version' attribute, using lossy conversion"
111 );
112 }
113 if version.is_none() {
114 version = Some(truncate_field(val));
115 }
116 }
117 _ => {}
118 }
119 }
120 }
121 Ok(Event::Start(e)) if e.name().as_ref() == b"assembly" => {
122 for attr in e.attributes().filter_map(|a| a.ok()) {
123 match attr.key.as_ref() {
124 b"description" => {
125 let raw = attr.value.to_vec();
126 let has_invalid = String::from_utf8(raw.clone()).is_err();
127 let val = String::from_utf8_lossy(&raw).into_owned();
128 if has_invalid {
129 warn!(
130 "Invalid UTF-8 in 'description' attribute, using lossy conversion"
131 );
132 }
133 description = Some(truncate_field(val));
134 }
135 b"company" => {
136 let raw = attr.value.to_vec();
137 let has_invalid = String::from_utf8(raw.clone()).is_err();
138 let val = String::from_utf8_lossy(&raw).into_owned();
139 if has_invalid {
140 warn!(
141 "Invalid UTF-8 in 'company' attribute, using lossy conversion"
142 );
143 }
144 company = Some(truncate_field(val));
145 }
146 b"copyright" => {
147 let raw = attr.value.to_vec();
148 let has_invalid = String::from_utf8(raw.clone()).is_err();
149 let val = String::from_utf8_lossy(&raw).into_owned();
150 if has_invalid {
151 warn!(
152 "Invalid UTF-8 in 'copyright' attribute, using lossy conversion"
153 );
154 }
155 copyright = Some(truncate_field(val));
156 }
157 b"supportInformation" => {
158 let raw = attr.value.to_vec();
159 let has_invalid = String::from_utf8(raw.clone()).is_err();
160 let val = String::from_utf8_lossy(&raw).into_owned();
161 if has_invalid {
162 warn!(
163 "Invalid UTF-8 in 'supportInformation' attribute, using lossy conversion"
164 );
165 }
166 homepage_url = Some(truncate_field(val));
167 }
168 _ => {}
169 }
170 }
171 }
172 Ok(Event::Eof) => break,
173 Err(e) => {
174 warn!(
175 "Error parsing XML at position {}: {}",
176 reader.buffer_position(),
177 e
178 );
179 break;
180 }
181 _ => {}
182 }
183 buf.clear();
184 }
185
186 let parties = company.clone().map_or_else(Vec::new, |company_name| {
187 vec![Party {
188 r#type: Some("organization".to_string()),
189 role: Some("owner".to_string()),
190 name: Some(company_name),
191 email: None,
192 url: None,
193 organization: None,
194 organization_url: None,
195 timezone: None,
196 }]
197 });
198
199 PackageData {
200 package_type: Some(PACKAGE_TYPE),
201 name,
202 version,
203 description,
204 parties,
205 homepage_url,
206 copyright,
207 holder: company,
208 datasource_id: Some(DatasourceId::MicrosoftUpdateManifestMum),
209 ..Default::default()
210 }
211}