1use std::collections::HashMap;
5use std::fs::{self, File};
6use std::io::{Cursor, Read};
7use std::path::Path;
8
9use prost::Message;
10use quick_xml::Reader;
11use quick_xml::events::Event;
12use rusty_axml::{find_nodes_by_type, get_requested_permissions, parse_from_reader};
13use zip::ZipArchive;
14
15use crate::models::{DatasourceId, PackageData, PackageType};
16use crate::parser_warn as warn;
17use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
18use crate::utils::magic;
19
20use super::PackageParser;
21
22const PACKAGE_TYPE: PackageType = PackageType::Android;
23const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024;
24const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
25const MAX_TOTAL_UNCOMPRESSED_SIZE: u64 = 1024 * 1024 * 1024;
26const MAX_COMPRESSION_RATIO: f64 = 100.0;
27const ANDROID_XML_NAMESPACE: &str = "http://schemas.android.com/apk/res/android";
28
29fn default_package_data(datasource_id: DatasourceId) -> PackageData {
30 PackageData {
31 package_type: Some(PACKAGE_TYPE),
32 datasource_id: Some(datasource_id),
33 ..Default::default()
34 }
35}
36
37pub struct AndroidSoongMetadataParser;
38pub struct AndroidManifestParser;
39pub struct AndroidApkParser;
40pub struct AndroidAabParser;
41
42impl PackageParser for AndroidSoongMetadataParser {
43 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
44
45 fn is_match(path: &Path) -> bool {
46 path.file_name().and_then(|name| name.to_str()) == Some("METADATA")
47 && !path
48 .parent()
49 .and_then(|parent| parent.file_name())
50 .and_then(|name| name.to_str())
51 .is_some_and(|name| name.ends_with(".dist-info"))
52 }
53
54 fn extract_packages(path: &Path) -> Vec<PackageData> {
55 let content = match crate::parsers::utils::read_file_to_string(path, None) {
56 Ok(content) => content,
57 Err(error) => {
58 warn!(
59 "Failed to read Android Soong METADATA {:?}: {}",
60 path, error
61 );
62 return vec![default_package_data(DatasourceId::AndroidSoongMetadata)];
63 }
64 };
65
66 vec![parse_soong_metadata(&content)]
67 }
68}
69
70impl PackageParser for AndroidManifestParser {
71 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
72
73 fn is_match(path: &Path) -> bool {
74 path.file_name().and_then(|name| name.to_str()) == Some("AndroidManifest.xml")
75 }
76
77 fn extract_packages(path: &Path) -> Vec<PackageData> {
78 let bytes = match read_file_bytes(path, None) {
79 Ok(bytes) => bytes,
80 Err(error) => {
81 warn!("Failed to read AndroidManifest.xml {:?}: {}", path, error);
82 return vec![default_package_data(DatasourceId::AndroidManifestXml)];
83 }
84 };
85
86 parse_manifest_bytes(
87 &bytes,
88 DatasourceId::AndroidManifestXml,
89 "AndroidManifest.xml",
90 )
91 .into_iter()
92 .collect()
93 }
94}
95
96impl PackageParser for AndroidApkParser {
97 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
98
99 fn is_match(path: &Path) -> bool {
100 path.extension().and_then(|ext| ext.to_str()) == Some("apk") && magic::is_zip(path)
101 }
102
103 fn extract_packages(path: &Path) -> Vec<PackageData> {
104 let package_data = match read_best_zip_entry(path, |entry_name| {
105 if entry_name == "AndroidManifest.xml" {
106 Some(0)
107 } else {
108 None
109 }
110 }) {
111 Ok(Some((_, bytes))) => parse_binary_manifest_bytes(&bytes, DatasourceId::AndroidApk)
112 .unwrap_or_else(|error| {
113 warn!("Failed to parse APK manifest {:?}: {}", path, error);
114 default_package_data(DatasourceId::AndroidApk)
115 }),
116 Ok(None) => {
117 warn!("No AndroidManifest.xml found in APK {:?}", path);
118 default_package_data(DatasourceId::AndroidApk)
119 }
120 Err(error) => {
121 warn!("Failed to read APK archive {:?}: {}", path, error);
122 default_package_data(DatasourceId::AndroidApk)
123 }
124 };
125
126 vec![package_data]
127 }
128}
129
130impl PackageParser for AndroidAabParser {
131 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
132
133 fn is_match(path: &Path) -> bool {
134 path.extension().and_then(|ext| ext.to_str()) == Some("aab") && magic::is_zip(path)
135 }
136
137 fn extract_packages(path: &Path) -> Vec<PackageData> {
138 let package_data = match read_best_zip_entry(path, |entry_name| {
139 if entry_name == "base/manifest/AndroidManifest.xml" {
140 Some(0)
141 } else if entry_name.ends_with("/manifest/AndroidManifest.xml") {
142 Some(1)
143 } else {
144 None
145 }
146 }) {
147 Ok(Some((entry_name, bytes))) => {
148 parse_proto_manifest_bytes(&bytes).unwrap_or_else(|error| {
149 warn!(
150 "Failed to parse AAB manifest {:?} ({}): {}",
151 path, entry_name, error
152 );
153 default_package_data(DatasourceId::AndroidAab)
154 })
155 }
156 Ok(None) => {
157 warn!("No proto AndroidManifest.xml found in AAB {:?}", path);
158 default_package_data(DatasourceId::AndroidAab)
159 }
160 Err(error) => {
161 warn!("Failed to read AAB archive {:?}: {}", path, error);
162 default_package_data(DatasourceId::AndroidAab)
163 }
164 };
165
166 vec![package_data]
167 }
168}
169
170fn read_file_bytes(path: &Path, max_size: Option<u64>) -> Result<Vec<u8>, String> {
171 let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
172 let metadata =
173 fs::metadata(path).map_err(|error| format!("Cannot stat file {:?}: {}", path, error))?;
174
175 if metadata.len() > limit {
176 return Err(format!(
177 "File {:?} is {} bytes, exceeding the {} byte limit",
178 path,
179 metadata.len(),
180 limit
181 ));
182 }
183
184 let mut file =
185 File::open(path).map_err(|error| format!("Failed to open {:?}: {}", path, error))?;
186 let mut bytes = Vec::with_capacity(metadata.len() as usize);
187 file.read_to_end(&mut bytes)
188 .map_err(|error| format!("Failed to read {:?}: {}", path, error))?;
189 Ok(bytes)
190}
191
192fn parse_soong_metadata(content: &str) -> PackageData {
193 let parsed = parse_textproto_map(content).unwrap_or_else(|error| {
194 warn!("Failed to parse Android Soong METADATA: {}", error);
195 ProtoMap::default()
196 });
197
198 let mut package = default_package_data(DatasourceId::AndroidSoongMetadata);
199 package.name = parsed.get_first_string("name").map(truncate_field);
200 package.description = parsed.get_first_string("description").map(truncate_field);
201
202 if let Some(third_party) = parsed.get_first_map("third_party") {
203 package.version = third_party.get_first_string("version").map(truncate_field);
204
205 let url_entries = third_party
206 .get_all_maps("url")
207 .into_iter()
208 .map(|entry| {
209 let type_ = entry.get_first_string("type").map(truncate_field);
210 let value = entry.get_first_string("value").map(truncate_field);
211 (type_, value)
212 })
213 .collect::<Vec<_>>();
214
215 let homepage_url = third_party.get_first_string("homepage").or_else(|| {
216 url_entries
217 .iter()
218 .find(|(type_, _)| {
219 type_
220 .as_deref()
221 .is_some_and(|type_| type_.eq_ignore_ascii_case("homepage"))
222 })
223 .and_then(|(_, value)| value.clone())
224 });
225 package.homepage_url = homepage_url.map(truncate_field);
226
227 let license_types = third_party
228 .get_all_strings("license_type")
229 .into_iter()
230 .map(truncate_field)
231 .collect::<Vec<_>>();
232 if !license_types.is_empty() {
233 package.extracted_license_statement = Some(license_types.join(", "));
234 }
235
236 let identifiers = third_party
237 .get_all_maps("identifier")
238 .into_iter()
239 .map(|identifier| {
240 let type_ = identifier.get_first_string("type").map(truncate_field);
241 let value = identifier.get_first_string("value").map(truncate_field);
242 let mut object = serde_json::Map::new();
243 if let Some(type_) = type_ {
244 object.insert("type".to_string(), type_.into());
245 }
246 if let Some(value) = &value {
247 object.insert("value".to_string(), value.clone().into());
248 }
249
250 if package.vcs_url.is_none()
251 && let (Some(type_), Some(value)) = (
252 identifier.get_first_string("type"),
253 identifier.get_first_string("value"),
254 )
255 {
256 let lower_type = type_.to_ascii_lowercase();
257 if lower_type.contains("git") {
258 package.vcs_url = Some(truncate_field(value));
259 } else if lower_type.contains("archive")
260 || lower_type.contains("tar")
261 || lower_type.contains("zip")
262 {
263 package.download_url = Some(truncate_field(value));
264 }
265 }
266
267 serde_json::Value::Object(object)
268 })
269 .collect::<Vec<_>>();
270
271 for (type_, value) in &url_entries {
272 let Some(value) = value else {
273 continue;
274 };
275
276 match type_.as_deref().map(str::to_ascii_lowercase).as_deref() {
277 Some("git") if package.vcs_url.is_none() => {
278 package.vcs_url = Some(value.clone());
279 }
280 Some("archive") if package.download_url.is_none() => {
281 package.download_url = Some(value.clone());
282 }
283 Some("homepage") if package.homepage_url.is_none() => {
284 package.homepage_url = Some(value.clone());
285 }
286 _ => {}
287 }
288 }
289
290 let mut extra_data = HashMap::new();
291 if !identifiers.is_empty() {
292 extra_data.insert("identifiers".to_string(), identifiers.into());
293 }
294 if !url_entries.is_empty() {
295 extra_data.insert(
296 "urls".to_string(),
297 url_entries
298 .iter()
299 .map(|(type_, value)| {
300 let mut object = serde_json::Map::new();
301 if let Some(type_) = type_ {
302 object.insert("type".to_string(), type_.clone().into());
303 }
304 if let Some(value) = value {
305 object.insert("value".to_string(), value.clone().into());
306 }
307 serde_json::Value::Object(object)
308 })
309 .collect::<Vec<_>>()
310 .into(),
311 );
312 }
313
314 if let Some(last_upgrade_date) = third_party.get_first_map("last_upgrade_date") {
315 let year = last_upgrade_date.get_first_string("year");
316 let month = last_upgrade_date.get_first_string("month");
317 let day = last_upgrade_date.get_first_string("day");
318 if let (Some(year), Some(month), Some(day)) = (year, month, day) {
319 let formatted = format!(
320 "{:04}-{:02}-{:02}",
321 year.parse::<u32>().unwrap_or_default(),
322 month.parse::<u32>().unwrap_or_default(),
323 day.parse::<u32>().unwrap_or_default()
324 );
325 extra_data.insert(
326 "last_upgrade_date".to_string(),
327 truncate_field(formatted).into(),
328 );
329 }
330 }
331
332 if let Some(upstream_url) = third_party.get_first_string("url") {
333 extra_data.insert(
334 "upstream_url".to_string(),
335 truncate_field(upstream_url).into(),
336 );
337 }
338
339 if !extra_data.is_empty() {
340 package.extra_data = Some(extra_data);
341 }
342 }
343
344 package
345}
346
347fn parse_manifest_bytes(
348 bytes: &[u8],
349 datasource_id: DatasourceId,
350 context: &str,
351) -> Option<PackageData> {
352 if looks_like_text_xml(bytes) {
353 match parse_text_manifest_bytes(bytes, datasource_id) {
354 Ok(package) => return Some(package),
355 Err(error) => {
356 warn!("Failed to parse {} as text XML: {}", context, error);
357 return None;
358 }
359 }
360 }
361
362 parse_binary_manifest_bytes(bytes, datasource_id)
363 .map(Some)
364 .unwrap_or_else(|error| {
365 warn!(
366 "Failed to parse {} as binary Android XML: {}",
367 context, error
368 );
369 None
370 })
371}
372
373fn looks_like_text_xml(bytes: &[u8]) -> bool {
374 bytes
375 .iter()
376 .find(|byte| !byte.is_ascii_whitespace())
377 .is_some_and(|byte| *byte == b'<')
378}
379
380fn parse_text_manifest_bytes(
381 bytes: &[u8],
382 datasource_id: DatasourceId,
383) -> Result<PackageData, String> {
384 let content = String::from_utf8(bytes.to_vec())
385 .map_err(|error| format!("Invalid UTF-8 in AndroidManifest.xml: {}", error))?;
386
387 let mut reader = Reader::from_str(&content);
388 reader.config_mut().trim_text(true);
389
390 let mut buf = Vec::new();
391 let mut manifest_attributes = HashMap::new();
392 let mut uses_sdk_attributes = HashMap::new();
393 let mut application_attributes = HashMap::new();
394 let mut requested_permissions = Vec::new();
395 let mut uses_libraries = Vec::new();
396 let mut iteration_count = 0usize;
397
398 loop {
399 iteration_count += 1;
400 if iteration_count > MAX_ITERATION_COUNT {
401 return Err(format!(
402 "Exceeded MAX_ITERATION_COUNT ({}) while parsing AndroidManifest.xml",
403 MAX_ITERATION_COUNT
404 ));
405 }
406
407 match reader.read_event_into(&mut buf) {
408 Ok(Event::Start(event)) | Ok(Event::Empty(event)) => {
409 let name = String::from_utf8_lossy(event.name().as_ref()).into_owned();
410 let attributes = xml_attributes_to_map(&reader, &event)?;
411 match name.as_str() {
412 "manifest" if manifest_attributes.is_empty() => {
413 manifest_attributes = attributes
414 }
415 "uses-sdk" => uses_sdk_attributes = attributes,
416 "application" if application_attributes.is_empty() => {
417 application_attributes = attributes;
418 }
419 "uses-permission" | "uses-permission-sdk-23" => {
420 if let Some(permission) = attributes.get("android:name") {
421 requested_permissions.push(permission.clone());
422 }
423 }
424 "uses-library" => {
425 if let Some(library_name) = attributes.get("android:name") {
426 uses_libraries.push(library_name.clone());
427 }
428 }
429 _ => {}
430 }
431 }
432 Ok(Event::Eof) => break,
433 Err(error) => {
434 return Err(format!(
435 "XML parse error at position {}: {}",
436 reader.buffer_position(),
437 error
438 ));
439 }
440 _ => {}
441 }
442
443 buf.clear();
444 }
445
446 Ok(build_manifest_package_data(
447 datasource_id,
448 &manifest_attributes,
449 &uses_sdk_attributes,
450 &application_attributes,
451 requested_permissions,
452 uses_libraries,
453 ))
454}
455
456fn xml_attributes_to_map(
457 reader: &Reader<&[u8]>,
458 event: &quick_xml::events::BytesStart<'_>,
459) -> Result<HashMap<String, String>, String> {
460 let mut attributes = HashMap::new();
461
462 for attribute in event.attributes().flatten().take(MAX_ITERATION_COUNT) {
463 let key = String::from_utf8_lossy(attribute.key.as_ref()).into_owned();
464 let value = attribute
465 .decode_and_unescape_value(reader.decoder())
466 .map_err(|error| format!("Failed to decode XML attribute {}: {}", key, error))?
467 .into_owned();
468 attributes.insert(key, truncate_field(value));
469 }
470
471 Ok(attributes)
472}
473
474fn parse_binary_manifest_bytes(
475 bytes: &[u8],
476 datasource_id: DatasourceId,
477) -> Result<PackageData, String> {
478 let axml = std::panic::catch_unwind(|| parse_from_reader(Cursor::new(bytes.to_vec())))
479 .map_err(|_| "rusty-axml panicked while parsing binary Android XML".to_string())?
480 .map_err(|error| format!("rusty-axml parse failure: {}", error))?;
481
482 let manifest_attributes =
483 normalize_binary_attributes(axml.root().borrow().attributes().clone());
484 let uses_sdk_attributes = find_nodes_by_type(&axml, "uses-sdk")
485 .into_iter()
486 .next()
487 .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
488 .unwrap_or_default();
489 let application_attributes = find_nodes_by_type(&axml, "application")
490 .into_iter()
491 .next()
492 .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
493 .unwrap_or_default();
494
495 let requested_permissions = get_requested_permissions(&axml)
496 .into_iter()
497 .map(truncate_field)
498 .collect::<Vec<_>>();
499 let uses_libraries = find_nodes_by_type(&axml, "uses-library")
500 .into_iter()
501 .filter_map(|node| node.borrow().get_attr("android:name").map(str::to_string))
502 .map(truncate_field)
503 .collect::<Vec<_>>();
504
505 Ok(build_manifest_package_data(
506 datasource_id,
507 &manifest_attributes,
508 &uses_sdk_attributes,
509 &application_attributes,
510 requested_permissions,
511 uses_libraries,
512 ))
513}
514
515fn build_manifest_package_data(
516 datasource_id: DatasourceId,
517 manifest_attributes: &HashMap<String, String>,
518 uses_sdk_attributes: &HashMap<String, String>,
519 application_attributes: &HashMap<String, String>,
520 requested_permissions: Vec<String>,
521 uses_libraries: Vec<String>,
522) -> PackageData {
523 let mut package = default_package_data(datasource_id);
524 package.name = manifest_attributes.get("package").cloned();
525 package.version = manifest_attributes
526 .get("android:versionName")
527 .cloned()
528 .or_else(|| manifest_attributes.get("android:versionCode").cloned());
529
530 package.description = application_attributes
531 .get("android:label")
532 .filter(|label| {
533 !label.starts_with('@') && !label.chars().all(|character| character.is_ascii_digit())
534 })
535 .cloned();
536
537 let mut extra_data = HashMap::new();
538 insert_extra(
539 &mut extra_data,
540 "version_code",
541 manifest_attributes.get("android:versionCode"),
542 );
543 insert_extra(
544 &mut extra_data,
545 "compile_sdk_version",
546 manifest_attributes.get("android:compileSdkVersion"),
547 );
548 insert_extra(
549 &mut extra_data,
550 "compile_sdk_version_codename",
551 manifest_attributes.get("android:compileSdkVersionCodename"),
552 );
553 insert_extra(
554 &mut extra_data,
555 "platform_build_version_code",
556 manifest_attributes.get("platformBuildVersionCode"),
557 );
558 insert_extra(
559 &mut extra_data,
560 "platform_build_version_name",
561 manifest_attributes.get("platformBuildVersionName"),
562 );
563 insert_extra(
564 &mut extra_data,
565 "min_sdk_version",
566 uses_sdk_attributes.get("android:minSdkVersion"),
567 );
568 insert_extra(
569 &mut extra_data,
570 "target_sdk_version",
571 uses_sdk_attributes.get("android:targetSdkVersion"),
572 );
573 insert_extra(
574 &mut extra_data,
575 "max_sdk_version",
576 uses_sdk_attributes.get("android:maxSdkVersion"),
577 );
578
579 if !requested_permissions.is_empty() {
580 extra_data.insert(
581 "requested_permissions".to_string(),
582 requested_permissions
583 .into_iter()
584 .map(serde_json::Value::from)
585 .collect::<Vec<_>>()
586 .into(),
587 );
588 }
589 if !uses_libraries.is_empty() {
590 extra_data.insert(
591 "uses_libraries".to_string(),
592 uses_libraries
593 .into_iter()
594 .map(serde_json::Value::from)
595 .collect::<Vec<_>>()
596 .into(),
597 );
598 }
599
600 if !extra_data.is_empty() {
601 package.extra_data = Some(extra_data);
602 }
603
604 package
605}
606
607fn normalize_binary_attributes(attributes: HashMap<String, String>) -> HashMap<String, String> {
608 attributes
609 .into_iter()
610 .map(|(key, value)| (key, normalize_binary_attribute_value(&value)))
611 .collect()
612}
613
614fn normalize_binary_attribute_value(value: &str) -> String {
615 let hex_value = value
616 .strip_prefix("(type 0x10) 0x")
617 .or_else(|| value.strip_prefix("0x"));
618
619 if let Some(hex_value) = hex_value
620 && let Ok(parsed) = u64::from_str_radix(hex_value, 16)
621 {
622 return parsed.to_string();
623 }
624
625 value.to_string()
626}
627
628fn insert_extra(
629 extra_data: &mut HashMap<String, serde_json::Value>,
630 key: &str,
631 value: Option<&String>,
632) {
633 if let Some(value) = value {
634 extra_data.insert(key.to_string(), truncate_field(value.clone()).into());
635 }
636}
637
638fn read_best_zip_entry<F>(
639 path: &Path,
640 mut rank_entry: F,
641) -> Result<Option<(String, Vec<u8>)>, String>
642where
643 F: FnMut(&str) -> Option<u8>,
644{
645 let metadata = fs::metadata(path)
646 .map_err(|error| format!("Failed to stat archive {:?}: {}", path, error))?;
647 if metadata.len() > MAX_ARCHIVE_SIZE {
648 return Err(format!(
649 "Archive {:?} is {} bytes, exceeding the {} byte limit",
650 path,
651 metadata.len(),
652 MAX_ARCHIVE_SIZE
653 ));
654 }
655
656 let file = File::open(path)
657 .map_err(|error| format!("Failed to open archive {:?}: {}", path, error))?;
658 let mut archive = ZipArchive::new(file)
659 .map_err(|error| format!("Failed to parse ZIP archive {:?}: {}", path, error))?;
660
661 let mut total_uncompressed = 0u64;
662 let mut best: Option<(u8, String, Vec<u8>)> = None;
663 let entry_count = archive.len().min(MAX_ITERATION_COUNT);
664
665 if archive.len() > MAX_ITERATION_COUNT {
666 warn!(
667 "Archive {:?} has more than MAX_ITERATION_COUNT ({}) entries; truncating scan",
668 path, MAX_ITERATION_COUNT
669 );
670 }
671
672 for index in 0..entry_count {
673 let mut entry = archive.by_index(index).map_err(|error| {
674 format!(
675 "Failed to read ZIP entry {} in {:?}: {}",
676 index, path, error
677 )
678 })?;
679
680 total_uncompressed = total_uncompressed.saturating_add(entry.size());
681 if total_uncompressed > MAX_TOTAL_UNCOMPRESSED_SIZE {
682 return Err(format!(
683 "Archive {:?} exceeds total uncompressed size limit of {} bytes",
684 path, MAX_TOTAL_UNCOMPRESSED_SIZE
685 ));
686 }
687
688 let entry_name = entry.name().replace('\\', "/");
689 if entry_name.starts_with('/') || entry_name.split('/').any(|segment| segment == "..") {
690 return Err(format!(
691 "Archive entry {} contains a disallowed path",
692 entry_name
693 ));
694 }
695 let Some(rank) = rank_entry(&entry_name) else {
696 continue;
697 };
698
699 if entry.size() > MAX_FILE_SIZE {
700 return Err(format!(
701 "Archive entry {} is {} bytes, exceeding the {} byte limit",
702 entry_name,
703 entry.size(),
704 MAX_FILE_SIZE
705 ));
706 }
707
708 let compressed_size = entry.compressed_size();
709 if compressed_size > 0 {
710 let ratio = entry.size() as f64 / compressed_size as f64;
711 if ratio > MAX_COMPRESSION_RATIO {
712 return Err(format!(
713 "Archive entry {} has suspicious compression ratio {:.2}:1",
714 entry_name, ratio
715 ));
716 }
717 }
718
719 let should_replace = match &best {
720 Some((best_rank, _, _)) => rank < *best_rank,
721 None => true,
722 };
723
724 if should_replace {
725 let mut bytes = Vec::with_capacity(entry.size() as usize);
726 entry.read_to_end(&mut bytes).map_err(|error| {
727 format!("Failed to read archive entry {}: {}", entry_name, error)
728 })?;
729 best = Some((rank, entry_name, bytes));
730 }
731 }
732
733 Ok(best.map(|(_, entry_name, bytes)| (entry_name, bytes)))
734}
735
736fn parse_proto_manifest_bytes(bytes: &[u8]) -> Result<PackageData, String> {
737 let node =
738 ProtoXmlNode::decode(bytes).map_err(|error| format!("prost decode failure: {}", error))?;
739 let root_element = node
740 .element()
741 .ok_or_else(|| "Proto manifest root is not an element".to_string())?;
742 if root_element.name != "manifest" {
743 return Err(format!(
744 "Unexpected proto XML root element: {}",
745 root_element.name
746 ));
747 }
748
749 let manifest_attributes = proto_attributes_to_map(&root_element.attribute);
750 let uses_sdk_attributes = root_element
751 .child_elements_named("uses-sdk")
752 .next()
753 .map(|element| proto_attributes_to_map(&element.attribute))
754 .unwrap_or_default();
755 let application_attributes = root_element
756 .child_elements_named("application")
757 .next()
758 .map(|element| proto_attributes_to_map(&element.attribute))
759 .unwrap_or_default();
760 let requested_permissions = root_element
761 .child_elements_named_any(&["uses-permission", "uses-permission-sdk-23"])
762 .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
763 .collect::<Vec<_>>();
764 let uses_libraries = root_element
765 .child_elements_named("uses-library")
766 .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
767 .collect::<Vec<_>>();
768
769 let mut package = build_manifest_package_data(
770 DatasourceId::AndroidAab,
771 &manifest_attributes,
772 &uses_sdk_attributes,
773 &application_attributes,
774 requested_permissions,
775 uses_libraries,
776 );
777
778 if let Some(extra_data) = package.extra_data.as_mut() {
779 extra_data.insert("manifest_encoding".to_string(), "proto".into());
780 } else {
781 package.extra_data = Some(HashMap::from([(
782 "manifest_encoding".to_string(),
783 serde_json::Value::String("proto".to_string()),
784 )]));
785 }
786
787 Ok(package)
788}
789
790fn proto_attributes_to_map(attributes: &[ProtoXmlAttribute]) -> HashMap<String, String> {
791 attributes
792 .iter()
793 .filter_map(|attribute| {
794 let key = proto_attribute_key(attribute)?;
795 let value = proto_attribute_value(attribute)?;
796 Some((key, truncate_field(value)))
797 })
798 .collect()
799}
800
801fn proto_attribute_key(attribute: &ProtoXmlAttribute) -> Option<String> {
802 if attribute.name.is_empty() {
803 return None;
804 }
805
806 if attribute.namespace_uri == ANDROID_XML_NAMESPACE {
807 return Some(format!("android:{}", attribute.name));
808 }
809
810 Some(attribute.name.clone())
811}
812
813fn proto_attribute_value(attribute: &ProtoXmlAttribute) -> Option<String> {
814 if !attribute.value.is_empty() {
815 return Some(attribute.value.clone());
816 }
817
818 attribute
819 .compiled_item
820 .as_ref()
821 .and_then(proto_item_to_string)
822}
823
824fn proto_item_to_string(item: &ProtoItem) -> Option<String> {
825 match &item.value {
826 Some(proto_item::Value::Str(value)) => Some(value.value.clone()),
827 Some(proto_item::Value::RawStr(value)) => Some(value.value.clone()),
828 Some(proto_item::Value::Prim(value)) => proto_primitive_to_string(value),
829 _ => None,
830 }
831}
832
833fn proto_primitive_to_string(primitive: &ProtoPrimitive) -> Option<String> {
834 match &primitive.value {
835 Some(proto_primitive::Value::IntDecimal(value)) => Some(value.to_string()),
836 Some(proto_primitive::Value::IntHexadecimal(value)) => Some(format!("0x{value:x}")),
837 Some(proto_primitive::Value::Boolean(value)) => Some(value.to_string()),
838 Some(proto_primitive::Value::Float(value)) => Some(value.to_string()),
839 Some(proto_primitive::Value::Dimension(value)) => Some(value.to_string()),
840 Some(proto_primitive::Value::Fraction(value)) => Some(value.to_string()),
841 _ => None,
842 }
843}
844
845#[derive(Debug, Clone, Default)]
846struct ProtoMap {
847 fields: HashMap<String, Vec<ProtoValue>>,
848}
849
850#[derive(Debug, Clone)]
851enum ProtoValue {
852 Scalar(String),
853 Map(ProtoMap),
854}
855
856impl ProtoMap {
857 fn get_first_string(&self, key: &str) -> Option<String> {
858 self.fields.get(key).and_then(|values| {
859 values.iter().find_map(|value| match value {
860 ProtoValue::Scalar(value) => Some(value.clone()),
861 ProtoValue::Map(_) => None,
862 })
863 })
864 }
865
866 fn get_all_strings(&self, key: &str) -> Vec<String> {
867 self.fields
868 .get(key)
869 .into_iter()
870 .flatten()
871 .filter_map(|value| match value {
872 ProtoValue::Scalar(value) => Some(value.clone()),
873 ProtoValue::Map(_) => None,
874 })
875 .collect()
876 }
877
878 fn get_first_map(&self, key: &str) -> Option<ProtoMap> {
879 self.fields.get(key).and_then(|values| {
880 values.iter().find_map(|value| match value {
881 ProtoValue::Map(value) => Some(value.clone()),
882 ProtoValue::Scalar(_) => None,
883 })
884 })
885 }
886
887 fn get_all_maps(&self, key: &str) -> Vec<ProtoMap> {
888 self.fields
889 .get(key)
890 .into_iter()
891 .flatten()
892 .filter_map(|value| match value {
893 ProtoValue::Map(value) => Some(value.clone()),
894 ProtoValue::Scalar(_) => None,
895 })
896 .collect()
897 }
898}
899
900fn parse_textproto_map(content: &str) -> Result<ProtoMap, String> {
901 let mut parser = TextProtoParser::new(content)?;
902 parser.parse_map(false)
903}
904
905struct TextProtoParser {
906 tokens: Vec<TextProtoToken>,
907 position: usize,
908}
909
910#[derive(Debug, Clone)]
911enum TextProtoToken {
912 Identifier(String),
913 String(String),
914 Colon,
915 LBrace,
916 RBrace,
917}
918
919impl TextProtoParser {
920 fn new(content: &str) -> Result<Self, String> {
921 Ok(Self {
922 tokens: tokenize_textproto(content)?,
923 position: 0,
924 })
925 }
926
927 fn parse_map(&mut self, stop_on_rbrace: bool) -> Result<ProtoMap, String> {
928 let mut map = ProtoMap::default();
929
930 while let Some(token) = self.peek() {
931 match token {
932 TextProtoToken::RBrace if stop_on_rbrace => {
933 self.position += 1;
934 break;
935 }
936 TextProtoToken::RBrace => return Err("Unexpected closing brace".to_string()),
937 TextProtoToken::Identifier(_) => {
938 let key = self.expect_identifier()?;
939 match self.peek() {
940 Some(TextProtoToken::Colon) => {
941 self.position += 1;
942 match self.peek() {
943 Some(TextProtoToken::LBrace) => {
944 self.position += 1;
945 let value = self.parse_map(true)?;
946 map.fields
947 .entry(key)
948 .or_default()
949 .push(ProtoValue::Map(value));
950 }
951 _ => {
952 let value = self.expect_scalar()?;
953 map.fields
954 .entry(key)
955 .or_default()
956 .push(ProtoValue::Scalar(truncate_field(value)));
957 }
958 }
959 }
960 Some(TextProtoToken::LBrace) => {
961 self.position += 1;
962 let value = self.parse_map(true)?;
963 map.fields
964 .entry(key)
965 .or_default()
966 .push(ProtoValue::Map(value));
967 }
968 Some(other) => {
969 return Err(format!("Unexpected token after key: {:?}", other));
970 }
971 None => return Err("Unexpected end of input after key".to_string()),
972 }
973 }
974 other => return Err(format!("Unexpected token in textproto: {:?}", other)),
975 }
976 }
977
978 Ok(map)
979 }
980
981 fn expect_identifier(&mut self) -> Result<String, String> {
982 match self.next() {
983 Some(TextProtoToken::Identifier(value)) => Ok(value),
984 other => Err(format!("Expected identifier, found {:?}", other)),
985 }
986 }
987
988 fn expect_scalar(&mut self) -> Result<String, String> {
989 match self.next() {
990 Some(TextProtoToken::String(mut value)) => {
991 while matches!(self.peek(), Some(TextProtoToken::String(_))) {
992 if let Some(TextProtoToken::String(next)) = self.next() {
993 value.push_str(&next);
994 }
995 }
996 Ok(value)
997 }
998 Some(TextProtoToken::Identifier(value)) => Ok(value),
999 other => Err(format!("Expected scalar value, found {:?}", other)),
1000 }
1001 }
1002
1003 fn peek(&self) -> Option<&TextProtoToken> {
1004 self.tokens.get(self.position)
1005 }
1006
1007 fn next(&mut self) -> Option<TextProtoToken> {
1008 let token = self.tokens.get(self.position).cloned();
1009 if token.is_some() {
1010 self.position += 1;
1011 }
1012 token
1013 }
1014}
1015
1016fn tokenize_textproto(content: &str) -> Result<Vec<TextProtoToken>, String> {
1017 let mut tokens = Vec::new();
1018 let chars = content.chars().collect::<Vec<_>>();
1019 let mut index = 0usize;
1020
1021 while index < chars.len() {
1022 match chars[index] {
1023 '{' => {
1024 tokens.push(TextProtoToken::LBrace);
1025 index += 1;
1026 }
1027 '}' => {
1028 tokens.push(TextProtoToken::RBrace);
1029 index += 1;
1030 }
1031 ':' => {
1032 tokens.push(TextProtoToken::Colon);
1033 index += 1;
1034 }
1035 '"' => {
1036 index += 1;
1037 let mut value = String::new();
1038 while index < chars.len() {
1039 match chars[index] {
1040 '\\' if index + 1 < chars.len() => {
1041 index += 1;
1042 value.push(chars[index]);
1043 index += 1;
1044 }
1045 '"' => {
1046 index += 1;
1047 break;
1048 }
1049 character => {
1050 value.push(character);
1051 index += 1;
1052 }
1053 }
1054 }
1055 tokens.push(TextProtoToken::String(value));
1056 }
1057 '#' => {
1058 while index < chars.len() && chars[index] != '\n' {
1059 index += 1;
1060 }
1061 }
1062 '/' if index + 1 < chars.len() && chars[index + 1] == '/' => {
1063 index += 2;
1064 while index < chars.len() && chars[index] != '\n' {
1065 index += 1;
1066 }
1067 }
1068 character if character.is_ascii_whitespace() => index += 1,
1069 _ => {
1070 let start = index;
1071 while index < chars.len() {
1072 let character = chars[index];
1073 let starts_comment =
1074 character == '/' && index + 1 < chars.len() && chars[index + 1] == '/';
1075
1076 if character.is_ascii_whitespace()
1077 || matches!(character, '{' | '}' | ':' | '#')
1078 || starts_comment
1079 {
1080 break;
1081 }
1082
1083 index += 1;
1084 }
1085
1086 let token = chars[start..index].iter().collect::<String>();
1087 if token.is_empty() {
1088 return Err("Encountered empty textproto token".to_string());
1089 }
1090 tokens.push(TextProtoToken::Identifier(token));
1091 }
1092 }
1093 }
1094
1095 Ok(tokens)
1096}
1097
1098#[derive(Clone, PartialEq, Message)]
1099pub struct ProtoSourcePosition {
1100 #[prost(uint32, tag = "1")]
1101 pub line_number: u32,
1102 #[prost(uint32, tag = "2")]
1103 pub column_number: u32,
1104}
1105
1106#[derive(Clone, PartialEq, Message)]
1107pub struct ProtoXmlNode {
1108 #[prost(oneof = "proto_xml_node::Node", tags = "1, 2")]
1109 pub node: Option<proto_xml_node::Node>,
1110 #[prost(message, optional, tag = "3")]
1111 pub source: Option<ProtoSourcePosition>,
1112}
1113
1114impl ProtoXmlNode {
1115 fn element(&self) -> Option<&ProtoXmlElement> {
1116 match &self.node {
1117 Some(proto_xml_node::Node::Element(element)) => Some(element),
1118 _ => None,
1119 }
1120 }
1121}
1122
1123pub mod proto_xml_node {
1124 use super::ProtoXmlElement;
1125 use prost::Oneof;
1126
1127 #[derive(Clone, PartialEq, Oneof)]
1128 pub enum Node {
1129 #[prost(message, tag = "1")]
1130 Element(ProtoXmlElement),
1131 #[prost(string, tag = "2")]
1132 Text(String),
1133 }
1134}
1135
1136#[derive(Clone, PartialEq, Message)]
1137pub struct ProtoXmlElement {
1138 #[prost(message, repeated, tag = "1")]
1139 pub namespace_declaration: Vec<ProtoXmlNamespace>,
1140 #[prost(string, tag = "2")]
1141 pub namespace_uri: String,
1142 #[prost(string, tag = "3")]
1143 pub name: String,
1144 #[prost(message, repeated, tag = "4")]
1145 pub attribute: Vec<ProtoXmlAttribute>,
1146 #[prost(message, repeated, tag = "5")]
1147 pub child: Vec<ProtoXmlNode>,
1148}
1149
1150impl ProtoXmlElement {
1151 fn child_elements_named<'a>(
1152 &'a self,
1153 name: &'a str,
1154 ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1155 self.child
1156 .iter()
1157 .filter_map(ProtoXmlNode::element)
1158 .filter(move |element| element.name == name)
1159 }
1160
1161 fn child_elements_named_any<'a>(
1162 &'a self,
1163 names: &'a [&'a str],
1164 ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1165 self.child
1166 .iter()
1167 .filter_map(ProtoXmlNode::element)
1168 .filter(move |element| names.contains(&element.name.as_str()))
1169 }
1170}
1171
1172#[derive(Clone, PartialEq, Message)]
1173pub struct ProtoXmlNamespace {
1174 #[prost(string, tag = "1")]
1175 pub prefix: String,
1176 #[prost(string, tag = "2")]
1177 pub uri: String,
1178 #[prost(message, optional, tag = "3")]
1179 pub source: Option<ProtoSourcePosition>,
1180}
1181
1182#[derive(Clone, PartialEq, Message)]
1183pub struct ProtoXmlAttribute {
1184 #[prost(string, tag = "1")]
1185 pub namespace_uri: String,
1186 #[prost(string, tag = "2")]
1187 pub name: String,
1188 #[prost(string, tag = "3")]
1189 pub value: String,
1190 #[prost(message, optional, tag = "4")]
1191 pub source: Option<ProtoSourcePosition>,
1192 #[prost(uint32, tag = "5")]
1193 pub resource_id: u32,
1194 #[prost(message, optional, tag = "6")]
1195 pub compiled_item: Option<ProtoItem>,
1196}
1197
1198#[derive(Clone, PartialEq, Message)]
1199pub struct ProtoItem {
1200 #[prost(oneof = "proto_item::Value", tags = "2, 3, 7")]
1201 pub value: Option<proto_item::Value>,
1202 #[prost(uint32, tag = "8")]
1203 pub flag_status: u32,
1204 #[prost(bool, tag = "9")]
1205 pub flag_negated: bool,
1206 #[prost(string, tag = "10")]
1207 pub flag_name: String,
1208}
1209
1210pub mod proto_item {
1211 use super::{ProtoPrimitive, ProtoRawStringValue, ProtoStringValue};
1212 use prost::Oneof;
1213
1214 #[derive(Clone, PartialEq, Oneof)]
1215 pub enum Value {
1216 #[prost(message, tag = "2")]
1217 Str(ProtoStringValue),
1218 #[prost(message, tag = "3")]
1219 RawStr(ProtoRawStringValue),
1220 #[prost(message, tag = "7")]
1221 Prim(ProtoPrimitive),
1222 }
1223}
1224
1225#[derive(Clone, PartialEq, Message)]
1226pub struct ProtoStringValue {
1227 #[prost(string, tag = "1")]
1228 pub value: String,
1229}
1230
1231#[derive(Clone, PartialEq, Message)]
1232pub struct ProtoRawStringValue {
1233 #[prost(string, tag = "1")]
1234 pub value: String,
1235}
1236
1237#[derive(Clone, PartialEq, Message)]
1238pub struct ProtoPrimitive {
1239 #[prost(oneof = "proto_primitive::Value", tags = "3, 6, 7, 8, 13, 14")]
1240 pub value: Option<proto_primitive::Value>,
1241}
1242
1243pub mod proto_primitive {
1244 use prost::Oneof;
1245
1246 #[derive(Clone, PartialEq, Oneof)]
1247 pub enum Value {
1248 #[prost(float, tag = "3")]
1249 Float(f32),
1250 #[prost(int32, tag = "6")]
1251 IntDecimal(i32),
1252 #[prost(uint32, tag = "7")]
1253 IntHexadecimal(u32),
1254 #[prost(bool, tag = "8")]
1255 Boolean(bool),
1256 #[prost(uint32, tag = "13")]
1257 Dimension(u32),
1258 #[prost(uint32, tag = "14")]
1259 Fraction(u32),
1260 }
1261}
1262
1263crate::register_parser!(
1264 "Android Soong METADATA textproto",
1265 &["**/METADATA"],
1266 "android",
1267 "",
1268 Some(
1269 "https://android.googlesource.com/platform/build/soong/+/refs/heads/main/licenses/metadata/metadata_file.proto"
1270 ),
1271);
1272
1273crate::register_parser!(
1274 "AndroidManifest.xml metadata (text XML or binary AXML)",
1275 &["**/AndroidManifest.xml"],
1276 "android",
1277 "XML",
1278 Some("https://developer.android.com/guide/topics/manifest/manifest-intro"),
1279);
1280
1281crate::register_parser!(
1282 "Android APK archive manifest metadata",
1283 &["**/*.apk"],
1284 "android",
1285 "",
1286 Some("https://developer.android.com/build/build-for-release"),
1287);
1288
1289crate::register_parser!(
1290 "Android App Bundle (.aab) proto manifest metadata",
1291 &["**/*.aab"],
1292 "android",
1293 "",
1294 Some("https://developer.android.com/guide/app-bundle"),
1295);