provenant/parsers/
microsoft_update_manifest.rs1use crate::models::{DatasourceId, PackageType, Party};
16use std::path::Path;
17
18use crate::parser_warn as warn;
19use quick_xml::events::Event;
20use quick_xml::reader::Reader;
21
22use crate::models::PackageData;
23use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
24
25use super::PackageParser;
26
27const PACKAGE_TYPE: PackageType = PackageType::WindowsUpdate;
28
29pub struct MicrosoftUpdateManifestParser;
30
31impl PackageParser for MicrosoftUpdateManifestParser {
32 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
33
34 fn is_match(path: &Path) -> bool {
35 path.extension().is_some_and(|ext| ext == "mum")
36 }
37
38 fn extract_packages(path: &Path) -> Vec<PackageData> {
39 let content = match read_file_to_string(path, None) {
40 Ok(c) => c,
41 Err(e) => {
42 warn!("Failed to read .mum file {:?}: {}", path, e);
43 return vec![PackageData {
44 package_type: Some(PACKAGE_TYPE),
45 datasource_id: Some(DatasourceId::MicrosoftUpdateManifestMum),
46 ..Default::default()
47 }];
48 }
49 };
50
51 vec![parse_mum_xml(&content)]
52 }
53}
54
55pub(crate) fn parse_mum_xml(content: &str) -> PackageData {
56 let mut reader = Reader::from_str(content);
57 reader.config_mut().trim_text(true);
58
59 let mut company = None;
60 let mut name = None;
61 let mut version = None;
62 let mut description = None;
63 let mut copyright = None;
64 let mut homepage_url = None;
65
66 let mut buf = Vec::new();
67 let mut iteration_count: usize = 0;
68
69 loop {
70 iteration_count += 1;
71 if iteration_count > MAX_ITERATION_COUNT {
72 warn!(
73 "Exceeded MAX_ITERATION_COUNT ({}) parsing .mum XML, stopping",
74 MAX_ITERATION_COUNT
75 );
76 break;
77 }
78 match reader.read_event_into(&mut buf) {
79 Ok(Event::Empty(e)) if e.name().as_ref() == b"assemblyIdentity" => {
80 for attr in e.attributes().filter_map(|a| a.ok()) {
81 match attr.key.as_ref() {
82 b"name" => {
83 let raw = attr.value.to_vec();
84 let has_invalid = String::from_utf8(raw.clone()).is_err();
85 let val = String::from_utf8_lossy(&raw).into_owned();
86 if has_invalid {
87 warn!("Invalid UTF-8 in 'name' attribute, using lossy conversion");
88 }
89 if name.is_none() {
90 name = Some(truncate_field(val));
91 }
92 }
93 b"version" => {
94 let raw = attr.value.to_vec();
95 let has_invalid = String::from_utf8(raw.clone()).is_err();
96 let val = String::from_utf8_lossy(&raw).into_owned();
97 if has_invalid {
98 warn!(
99 "Invalid UTF-8 in 'version' attribute, using lossy conversion"
100 );
101 }
102 if version.is_none() {
103 version = Some(truncate_field(val));
104 }
105 }
106 _ => {}
107 }
108 }
109 }
110 Ok(Event::Start(e)) if e.name().as_ref() == b"assembly" => {
111 for attr in e.attributes().filter_map(|a| a.ok()) {
112 match attr.key.as_ref() {
113 b"description" => {
114 let raw = attr.value.to_vec();
115 let has_invalid = String::from_utf8(raw.clone()).is_err();
116 let val = String::from_utf8_lossy(&raw).into_owned();
117 if has_invalid {
118 warn!(
119 "Invalid UTF-8 in 'description' attribute, using lossy conversion"
120 );
121 }
122 description = Some(truncate_field(val));
123 }
124 b"company" => {
125 let raw = attr.value.to_vec();
126 let has_invalid = String::from_utf8(raw.clone()).is_err();
127 let val = String::from_utf8_lossy(&raw).into_owned();
128 if has_invalid {
129 warn!(
130 "Invalid UTF-8 in 'company' attribute, using lossy conversion"
131 );
132 }
133 company = Some(truncate_field(val));
134 }
135 b"copyright" => {
136 let raw = attr.value.to_vec();
137 let has_invalid = String::from_utf8(raw.clone()).is_err();
138 let val = String::from_utf8_lossy(&raw).into_owned();
139 if has_invalid {
140 warn!(
141 "Invalid UTF-8 in 'copyright' attribute, using lossy conversion"
142 );
143 }
144 copyright = Some(truncate_field(val));
145 }
146 b"supportInformation" => {
147 let raw = attr.value.to_vec();
148 let has_invalid = String::from_utf8(raw.clone()).is_err();
149 let val = String::from_utf8_lossy(&raw).into_owned();
150 if has_invalid {
151 warn!(
152 "Invalid UTF-8 in 'supportInformation' attribute, using lossy conversion"
153 );
154 }
155 homepage_url = Some(truncate_field(val));
156 }
157 _ => {}
158 }
159 }
160 }
161 Ok(Event::Eof) => break,
162 Err(e) => {
163 warn!(
164 "Error parsing XML at position {}: {}",
165 reader.buffer_position(),
166 e
167 );
168 break;
169 }
170 _ => {}
171 }
172 buf.clear();
173 }
174
175 let parties = company.clone().map_or_else(Vec::new, |company_name| {
176 vec![Party {
177 r#type: Some("organization".to_string()),
178 role: Some("owner".to_string()),
179 name: Some(company_name),
180 email: None,
181 url: None,
182 organization: None,
183 organization_url: None,
184 timezone: None,
185 }]
186 });
187
188 PackageData {
189 package_type: Some(PACKAGE_TYPE),
190 name,
191 version,
192 description,
193 parties,
194 homepage_url,
195 copyright,
196 holder: company,
197 datasource_id: Some(DatasourceId::MicrosoftUpdateManifestMum),
198 ..Default::default()
199 }
200}
201
202crate::register_parser!(
203 "Microsoft Update Manifest .mum file",
204 &["*.mum"],
205 "windows-update",
206 "",
207 None,
208);