1use std::collections::HashMap;
5use std::fs::{self, File};
6use std::io::{Cursor, Read};
7use std::path::Path;
8
9use prost::Message;
10use quick_xml::Reader;
11use quick_xml::events::Event;
12use rusty_axml::{find_nodes_by_type, get_requested_permissions, parse_from_reader};
13use zip::ZipArchive;
14
15use crate::models::{DatasourceId, PackageData, PackageType};
16use crate::parser_warn as warn;
17use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
18use crate::utils::magic;
19
20use super::PackageParser;
21
22const PACKAGE_TYPE: PackageType = PackageType::Android;
23const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024;
24const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
25const MAX_TOTAL_UNCOMPRESSED_SIZE: u64 = 1024 * 1024 * 1024;
26const MAX_COMPRESSION_RATIO: f64 = 100.0;
27const ANDROID_XML_NAMESPACE: &str = "http://schemas.android.com/apk/res/android";
28
29fn default_package_data(datasource_id: DatasourceId) -> PackageData {
30 PackageData {
31 package_type: Some(PACKAGE_TYPE),
32 datasource_id: Some(datasource_id),
33 ..Default::default()
34 }
35}
36
37pub struct AndroidSoongMetadataParser;
38pub struct AndroidManifestParser;
39pub struct AndroidApkParser;
40pub struct AndroidAabParser;
41
42impl PackageParser for AndroidSoongMetadataParser {
43 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
44
45 fn is_match(path: &Path) -> bool {
46 path.file_name().and_then(|name| name.to_str()) == Some("METADATA")
47 && !path
48 .parent()
49 .and_then(|parent| parent.file_name())
50 .and_then(|name| name.to_str())
51 .is_some_and(|name| name.ends_with(".dist-info"))
52 }
53
54 fn extract_packages(path: &Path) -> Vec<PackageData> {
55 let content = match crate::parsers::utils::read_file_to_string(path, None) {
56 Ok(content) => content,
57 Err(error) => {
58 warn!(
59 "Failed to read Android Soong METADATA {:?}: {}",
60 path, error
61 );
62 return vec![default_package_data(DatasourceId::AndroidSoongMetadata)];
63 }
64 };
65
66 vec![parse_soong_metadata(&content)]
67 }
68}
69
70impl PackageParser for AndroidManifestParser {
71 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
72
73 fn is_match(path: &Path) -> bool {
74 path.file_name().and_then(|name| name.to_str()) == Some("AndroidManifest.xml")
75 }
76
77 fn extract_packages(path: &Path) -> Vec<PackageData> {
78 let bytes = match read_file_bytes(path, None) {
79 Ok(bytes) => bytes,
80 Err(error) => {
81 warn!("Failed to read AndroidManifest.xml {:?}: {}", path, error);
82 return vec![default_package_data(DatasourceId::AndroidManifestXml)];
83 }
84 };
85
86 vec![parse_manifest_bytes(
87 &bytes,
88 DatasourceId::AndroidManifestXml,
89 "AndroidManifest.xml",
90 )]
91 }
92}
93
94impl PackageParser for AndroidApkParser {
95 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
96
97 fn is_match(path: &Path) -> bool {
98 path.extension().and_then(|ext| ext.to_str()) == Some("apk") && magic::is_zip(path)
99 }
100
101 fn extract_packages(path: &Path) -> Vec<PackageData> {
102 let package_data = match read_best_zip_entry(path, |entry_name| {
103 if entry_name == "AndroidManifest.xml" {
104 Some(0)
105 } else {
106 None
107 }
108 }) {
109 Ok(Some((_, bytes))) => parse_binary_manifest_bytes(&bytes, DatasourceId::AndroidApk)
110 .unwrap_or_else(|error| {
111 warn!("Failed to parse APK manifest {:?}: {}", path, error);
112 default_package_data(DatasourceId::AndroidApk)
113 }),
114 Ok(None) => {
115 warn!("No AndroidManifest.xml found in APK {:?}", path);
116 default_package_data(DatasourceId::AndroidApk)
117 }
118 Err(error) => {
119 warn!("Failed to read APK archive {:?}: {}", path, error);
120 default_package_data(DatasourceId::AndroidApk)
121 }
122 };
123
124 vec![package_data]
125 }
126}
127
128impl PackageParser for AndroidAabParser {
129 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
130
131 fn is_match(path: &Path) -> bool {
132 path.extension().and_then(|ext| ext.to_str()) == Some("aab") && magic::is_zip(path)
133 }
134
135 fn extract_packages(path: &Path) -> Vec<PackageData> {
136 let package_data = match read_best_zip_entry(path, |entry_name| {
137 if entry_name == "base/manifest/AndroidManifest.xml" {
138 Some(0)
139 } else if entry_name.ends_with("/manifest/AndroidManifest.xml") {
140 Some(1)
141 } else {
142 None
143 }
144 }) {
145 Ok(Some((entry_name, bytes))) => {
146 parse_proto_manifest_bytes(&bytes).unwrap_or_else(|error| {
147 warn!(
148 "Failed to parse AAB manifest {:?} ({}): {}",
149 path, entry_name, error
150 );
151 default_package_data(DatasourceId::AndroidAab)
152 })
153 }
154 Ok(None) => {
155 warn!("No proto AndroidManifest.xml found in AAB {:?}", path);
156 default_package_data(DatasourceId::AndroidAab)
157 }
158 Err(error) => {
159 warn!("Failed to read AAB archive {:?}: {}", path, error);
160 default_package_data(DatasourceId::AndroidAab)
161 }
162 };
163
164 vec![package_data]
165 }
166}
167
168fn read_file_bytes(path: &Path, max_size: Option<u64>) -> Result<Vec<u8>, String> {
169 let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
170 let metadata =
171 fs::metadata(path).map_err(|error| format!("Cannot stat file {:?}: {}", path, error))?;
172
173 if metadata.len() > limit {
174 return Err(format!(
175 "File {:?} is {} bytes, exceeding the {} byte limit",
176 path,
177 metadata.len(),
178 limit
179 ));
180 }
181
182 let mut file =
183 File::open(path).map_err(|error| format!("Failed to open {:?}: {}", path, error))?;
184 let mut bytes = Vec::with_capacity(metadata.len() as usize);
185 file.read_to_end(&mut bytes)
186 .map_err(|error| format!("Failed to read {:?}: {}", path, error))?;
187 Ok(bytes)
188}
189
190fn parse_soong_metadata(content: &str) -> PackageData {
191 let parsed = parse_textproto_map(content).unwrap_or_else(|error| {
192 warn!("Failed to parse Android Soong METADATA: {}", error);
193 ProtoMap::default()
194 });
195
196 let mut package = default_package_data(DatasourceId::AndroidSoongMetadata);
197 package.name = parsed.get_first_string("name").map(truncate_field);
198 package.description = parsed.get_first_string("description").map(truncate_field);
199
200 if let Some(third_party) = parsed.get_first_map("third_party") {
201 package.version = third_party.get_first_string("version").map(truncate_field);
202
203 let url_entries = third_party
204 .get_all_maps("url")
205 .into_iter()
206 .map(|entry| {
207 let type_ = entry.get_first_string("type").map(truncate_field);
208 let value = entry.get_first_string("value").map(truncate_field);
209 (type_, value)
210 })
211 .collect::<Vec<_>>();
212
213 let homepage_url = third_party.get_first_string("homepage").or_else(|| {
214 url_entries
215 .iter()
216 .find(|(type_, _)| {
217 type_
218 .as_deref()
219 .is_some_and(|type_| type_.eq_ignore_ascii_case("homepage"))
220 })
221 .and_then(|(_, value)| value.clone())
222 });
223 package.homepage_url = homepage_url.map(truncate_field);
224
225 let license_types = third_party
226 .get_all_strings("license_type")
227 .into_iter()
228 .map(truncate_field)
229 .collect::<Vec<_>>();
230 if !license_types.is_empty() {
231 package.extracted_license_statement = Some(license_types.join(", "));
232 }
233
234 let identifiers = third_party
235 .get_all_maps("identifier")
236 .into_iter()
237 .map(|identifier| {
238 let type_ = identifier.get_first_string("type").map(truncate_field);
239 let value = identifier.get_first_string("value").map(truncate_field);
240 let mut object = serde_json::Map::new();
241 if let Some(type_) = type_ {
242 object.insert("type".to_string(), type_.into());
243 }
244 if let Some(value) = &value {
245 object.insert("value".to_string(), value.clone().into());
246 }
247
248 if package.vcs_url.is_none()
249 && let (Some(type_), Some(value)) = (
250 identifier.get_first_string("type"),
251 identifier.get_first_string("value"),
252 )
253 {
254 let lower_type = type_.to_ascii_lowercase();
255 if lower_type.contains("git") {
256 package.vcs_url = Some(truncate_field(value));
257 } else if lower_type.contains("archive")
258 || lower_type.contains("tar")
259 || lower_type.contains("zip")
260 {
261 package.download_url = Some(truncate_field(value));
262 }
263 }
264
265 serde_json::Value::Object(object)
266 })
267 .collect::<Vec<_>>();
268
269 for (type_, value) in &url_entries {
270 let Some(value) = value else {
271 continue;
272 };
273
274 match type_.as_deref().map(str::to_ascii_lowercase).as_deref() {
275 Some("git") if package.vcs_url.is_none() => {
276 package.vcs_url = Some(value.clone());
277 }
278 Some("archive") if package.download_url.is_none() => {
279 package.download_url = Some(value.clone());
280 }
281 Some("homepage") if package.homepage_url.is_none() => {
282 package.homepage_url = Some(value.clone());
283 }
284 _ => {}
285 }
286 }
287
288 let mut extra_data = HashMap::new();
289 if !identifiers.is_empty() {
290 extra_data.insert("identifiers".to_string(), identifiers.into());
291 }
292 if !url_entries.is_empty() {
293 extra_data.insert(
294 "urls".to_string(),
295 url_entries
296 .iter()
297 .map(|(type_, value)| {
298 let mut object = serde_json::Map::new();
299 if let Some(type_) = type_ {
300 object.insert("type".to_string(), type_.clone().into());
301 }
302 if let Some(value) = value {
303 object.insert("value".to_string(), value.clone().into());
304 }
305 serde_json::Value::Object(object)
306 })
307 .collect::<Vec<_>>()
308 .into(),
309 );
310 }
311
312 if let Some(last_upgrade_date) = third_party.get_first_map("last_upgrade_date") {
313 let year = last_upgrade_date.get_first_string("year");
314 let month = last_upgrade_date.get_first_string("month");
315 let day = last_upgrade_date.get_first_string("day");
316 if let (Some(year), Some(month), Some(day)) = (year, month, day) {
317 let formatted = format!(
318 "{:04}-{:02}-{:02}",
319 year.parse::<u32>().unwrap_or_default(),
320 month.parse::<u32>().unwrap_or_default(),
321 day.parse::<u32>().unwrap_or_default()
322 );
323 extra_data.insert(
324 "last_upgrade_date".to_string(),
325 truncate_field(formatted).into(),
326 );
327 }
328 }
329
330 if let Some(upstream_url) = third_party.get_first_string("url") {
331 extra_data.insert(
332 "upstream_url".to_string(),
333 truncate_field(upstream_url).into(),
334 );
335 }
336
337 if !extra_data.is_empty() {
338 package.extra_data = Some(extra_data);
339 }
340 }
341
342 package
343}
344
345fn parse_manifest_bytes(bytes: &[u8], datasource_id: DatasourceId, context: &str) -> PackageData {
346 if looks_like_text_xml(bytes) {
347 match parse_text_manifest_bytes(bytes, datasource_id) {
348 Ok(package) => return package,
349 Err(error) => warn!("Failed to parse {} as text XML: {}", context, error),
350 }
351 }
352
353 parse_binary_manifest_bytes(bytes, datasource_id).unwrap_or_else(|error| {
354 warn!(
355 "Failed to parse {} as binary Android XML: {}",
356 context, error
357 );
358 default_package_data(datasource_id)
359 })
360}
361
362fn looks_like_text_xml(bytes: &[u8]) -> bool {
363 bytes
364 .iter()
365 .find(|byte| !byte.is_ascii_whitespace())
366 .is_some_and(|byte| *byte == b'<')
367}
368
369fn parse_text_manifest_bytes(
370 bytes: &[u8],
371 datasource_id: DatasourceId,
372) -> Result<PackageData, String> {
373 let content = String::from_utf8(bytes.to_vec())
374 .map_err(|error| format!("Invalid UTF-8 in AndroidManifest.xml: {}", error))?;
375
376 let mut reader = Reader::from_str(&content);
377 reader.config_mut().trim_text(true);
378
379 let mut buf = Vec::new();
380 let mut manifest_attributes = HashMap::new();
381 let mut uses_sdk_attributes = HashMap::new();
382 let mut application_attributes = HashMap::new();
383 let mut requested_permissions = Vec::new();
384 let mut uses_libraries = Vec::new();
385 let mut iteration_count = 0usize;
386
387 loop {
388 iteration_count += 1;
389 if iteration_count > MAX_ITERATION_COUNT {
390 return Err(format!(
391 "Exceeded MAX_ITERATION_COUNT ({}) while parsing AndroidManifest.xml",
392 MAX_ITERATION_COUNT
393 ));
394 }
395
396 match reader.read_event_into(&mut buf) {
397 Ok(Event::Start(event)) | Ok(Event::Empty(event)) => {
398 let name = String::from_utf8_lossy(event.name().as_ref()).into_owned();
399 let attributes = xml_attributes_to_map(&reader, &event)?;
400 match name.as_str() {
401 "manifest" if manifest_attributes.is_empty() => {
402 manifest_attributes = attributes
403 }
404 "uses-sdk" => uses_sdk_attributes = attributes,
405 "application" if application_attributes.is_empty() => {
406 application_attributes = attributes;
407 }
408 "uses-permission" | "uses-permission-sdk-23" => {
409 if let Some(permission) = attributes.get("android:name") {
410 requested_permissions.push(permission.clone());
411 }
412 }
413 "uses-library" => {
414 if let Some(library_name) = attributes.get("android:name") {
415 uses_libraries.push(library_name.clone());
416 }
417 }
418 _ => {}
419 }
420 }
421 Ok(Event::Eof) => break,
422 Err(error) => {
423 return Err(format!(
424 "XML parse error at position {}: {}",
425 reader.buffer_position(),
426 error
427 ));
428 }
429 _ => {}
430 }
431
432 buf.clear();
433 }
434
435 Ok(build_manifest_package_data(
436 datasource_id,
437 &manifest_attributes,
438 &uses_sdk_attributes,
439 &application_attributes,
440 requested_permissions,
441 uses_libraries,
442 ))
443}
444
445fn xml_attributes_to_map(
446 reader: &Reader<&[u8]>,
447 event: &quick_xml::events::BytesStart<'_>,
448) -> Result<HashMap<String, String>, String> {
449 let mut attributes = HashMap::new();
450
451 for attribute in event.attributes().flatten().take(MAX_ITERATION_COUNT) {
452 let key = String::from_utf8_lossy(attribute.key.as_ref()).into_owned();
453 let value = attribute
454 .decode_and_unescape_value(reader.decoder())
455 .map_err(|error| format!("Failed to decode XML attribute {}: {}", key, error))?
456 .into_owned();
457 attributes.insert(key, truncate_field(value));
458 }
459
460 Ok(attributes)
461}
462
463fn parse_binary_manifest_bytes(
464 bytes: &[u8],
465 datasource_id: DatasourceId,
466) -> Result<PackageData, String> {
467 let axml = std::panic::catch_unwind(|| parse_from_reader(Cursor::new(bytes.to_vec())))
468 .map_err(|_| "rusty-axml panicked while parsing binary Android XML".to_string())?
469 .map_err(|error| format!("rusty-axml parse failure: {}", error))?;
470
471 let manifest_attributes =
472 normalize_binary_attributes(axml.root().borrow().attributes().clone());
473 let uses_sdk_attributes = find_nodes_by_type(&axml, "uses-sdk")
474 .into_iter()
475 .next()
476 .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
477 .unwrap_or_default();
478 let application_attributes = find_nodes_by_type(&axml, "application")
479 .into_iter()
480 .next()
481 .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
482 .unwrap_or_default();
483
484 let requested_permissions = get_requested_permissions(&axml)
485 .into_iter()
486 .map(truncate_field)
487 .collect::<Vec<_>>();
488 let uses_libraries = find_nodes_by_type(&axml, "uses-library")
489 .into_iter()
490 .filter_map(|node| node.borrow().get_attr("android:name").map(str::to_string))
491 .map(truncate_field)
492 .collect::<Vec<_>>();
493
494 Ok(build_manifest_package_data(
495 datasource_id,
496 &manifest_attributes,
497 &uses_sdk_attributes,
498 &application_attributes,
499 requested_permissions,
500 uses_libraries,
501 ))
502}
503
504fn build_manifest_package_data(
505 datasource_id: DatasourceId,
506 manifest_attributes: &HashMap<String, String>,
507 uses_sdk_attributes: &HashMap<String, String>,
508 application_attributes: &HashMap<String, String>,
509 requested_permissions: Vec<String>,
510 uses_libraries: Vec<String>,
511) -> PackageData {
512 let mut package = default_package_data(datasource_id);
513 package.name = manifest_attributes.get("package").cloned();
514 package.version = manifest_attributes
515 .get("android:versionName")
516 .cloned()
517 .or_else(|| manifest_attributes.get("android:versionCode").cloned());
518
519 package.description = application_attributes
520 .get("android:label")
521 .filter(|label| {
522 !label.starts_with('@') && !label.chars().all(|character| character.is_ascii_digit())
523 })
524 .cloned();
525
526 let mut extra_data = HashMap::new();
527 insert_extra(
528 &mut extra_data,
529 "version_code",
530 manifest_attributes.get("android:versionCode"),
531 );
532 insert_extra(
533 &mut extra_data,
534 "compile_sdk_version",
535 manifest_attributes.get("android:compileSdkVersion"),
536 );
537 insert_extra(
538 &mut extra_data,
539 "compile_sdk_version_codename",
540 manifest_attributes.get("android:compileSdkVersionCodename"),
541 );
542 insert_extra(
543 &mut extra_data,
544 "platform_build_version_code",
545 manifest_attributes.get("platformBuildVersionCode"),
546 );
547 insert_extra(
548 &mut extra_data,
549 "platform_build_version_name",
550 manifest_attributes.get("platformBuildVersionName"),
551 );
552 insert_extra(
553 &mut extra_data,
554 "min_sdk_version",
555 uses_sdk_attributes.get("android:minSdkVersion"),
556 );
557 insert_extra(
558 &mut extra_data,
559 "target_sdk_version",
560 uses_sdk_attributes.get("android:targetSdkVersion"),
561 );
562 insert_extra(
563 &mut extra_data,
564 "max_sdk_version",
565 uses_sdk_attributes.get("android:maxSdkVersion"),
566 );
567
568 if !requested_permissions.is_empty() {
569 extra_data.insert(
570 "requested_permissions".to_string(),
571 requested_permissions
572 .into_iter()
573 .map(serde_json::Value::from)
574 .collect::<Vec<_>>()
575 .into(),
576 );
577 }
578 if !uses_libraries.is_empty() {
579 extra_data.insert(
580 "uses_libraries".to_string(),
581 uses_libraries
582 .into_iter()
583 .map(serde_json::Value::from)
584 .collect::<Vec<_>>()
585 .into(),
586 );
587 }
588
589 if !extra_data.is_empty() {
590 package.extra_data = Some(extra_data);
591 }
592
593 package
594}
595
596fn normalize_binary_attributes(attributes: HashMap<String, String>) -> HashMap<String, String> {
597 attributes
598 .into_iter()
599 .map(|(key, value)| (key, normalize_binary_attribute_value(&value)))
600 .collect()
601}
602
603fn normalize_binary_attribute_value(value: &str) -> String {
604 let hex_value = value
605 .strip_prefix("(type 0x10) 0x")
606 .or_else(|| value.strip_prefix("0x"));
607
608 if let Some(hex_value) = hex_value
609 && let Ok(parsed) = u64::from_str_radix(hex_value, 16)
610 {
611 return parsed.to_string();
612 }
613
614 value.to_string()
615}
616
617fn insert_extra(
618 extra_data: &mut HashMap<String, serde_json::Value>,
619 key: &str,
620 value: Option<&String>,
621) {
622 if let Some(value) = value {
623 extra_data.insert(key.to_string(), truncate_field(value.clone()).into());
624 }
625}
626
627fn read_best_zip_entry<F>(
628 path: &Path,
629 mut rank_entry: F,
630) -> Result<Option<(String, Vec<u8>)>, String>
631where
632 F: FnMut(&str) -> Option<u8>,
633{
634 let metadata = fs::metadata(path)
635 .map_err(|error| format!("Failed to stat archive {:?}: {}", path, error))?;
636 if metadata.len() > MAX_ARCHIVE_SIZE {
637 return Err(format!(
638 "Archive {:?} is {} bytes, exceeding the {} byte limit",
639 path,
640 metadata.len(),
641 MAX_ARCHIVE_SIZE
642 ));
643 }
644
645 let file = File::open(path)
646 .map_err(|error| format!("Failed to open archive {:?}: {}", path, error))?;
647 let mut archive = ZipArchive::new(file)
648 .map_err(|error| format!("Failed to parse ZIP archive {:?}: {}", path, error))?;
649
650 let mut total_uncompressed = 0u64;
651 let mut best: Option<(u8, String, Vec<u8>)> = None;
652 let entry_count = archive.len().min(MAX_ITERATION_COUNT);
653
654 if archive.len() > MAX_ITERATION_COUNT {
655 warn!(
656 "Archive {:?} has more than MAX_ITERATION_COUNT ({}) entries; truncating scan",
657 path, MAX_ITERATION_COUNT
658 );
659 }
660
661 for index in 0..entry_count {
662 let mut entry = archive.by_index(index).map_err(|error| {
663 format!(
664 "Failed to read ZIP entry {} in {:?}: {}",
665 index, path, error
666 )
667 })?;
668
669 total_uncompressed = total_uncompressed.saturating_add(entry.size());
670 if total_uncompressed > MAX_TOTAL_UNCOMPRESSED_SIZE {
671 return Err(format!(
672 "Archive {:?} exceeds total uncompressed size limit of {} bytes",
673 path, MAX_TOTAL_UNCOMPRESSED_SIZE
674 ));
675 }
676
677 let entry_name = entry.name().replace('\\', "/");
678 if entry_name.starts_with('/') || entry_name.split('/').any(|segment| segment == "..") {
679 return Err(format!(
680 "Archive entry {} contains a disallowed path",
681 entry_name
682 ));
683 }
684 let Some(rank) = rank_entry(&entry_name) else {
685 continue;
686 };
687
688 if entry.size() > MAX_FILE_SIZE {
689 return Err(format!(
690 "Archive entry {} is {} bytes, exceeding the {} byte limit",
691 entry_name,
692 entry.size(),
693 MAX_FILE_SIZE
694 ));
695 }
696
697 let compressed_size = entry.compressed_size();
698 if compressed_size > 0 {
699 let ratio = entry.size() as f64 / compressed_size as f64;
700 if ratio > MAX_COMPRESSION_RATIO {
701 return Err(format!(
702 "Archive entry {} has suspicious compression ratio {:.2}:1",
703 entry_name, ratio
704 ));
705 }
706 }
707
708 let should_replace = match &best {
709 Some((best_rank, _, _)) => rank < *best_rank,
710 None => true,
711 };
712
713 if should_replace {
714 let mut bytes = Vec::with_capacity(entry.size() as usize);
715 entry.read_to_end(&mut bytes).map_err(|error| {
716 format!("Failed to read archive entry {}: {}", entry_name, error)
717 })?;
718 best = Some((rank, entry_name, bytes));
719 }
720 }
721
722 Ok(best.map(|(_, entry_name, bytes)| (entry_name, bytes)))
723}
724
725fn parse_proto_manifest_bytes(bytes: &[u8]) -> Result<PackageData, String> {
726 let node =
727 ProtoXmlNode::decode(bytes).map_err(|error| format!("prost decode failure: {}", error))?;
728 let root_element = node
729 .element()
730 .ok_or_else(|| "Proto manifest root is not an element".to_string())?;
731 if root_element.name != "manifest" {
732 return Err(format!(
733 "Unexpected proto XML root element: {}",
734 root_element.name
735 ));
736 }
737
738 let manifest_attributes = proto_attributes_to_map(&root_element.attribute);
739 let uses_sdk_attributes = root_element
740 .child_elements_named("uses-sdk")
741 .next()
742 .map(|element| proto_attributes_to_map(&element.attribute))
743 .unwrap_or_default();
744 let application_attributes = root_element
745 .child_elements_named("application")
746 .next()
747 .map(|element| proto_attributes_to_map(&element.attribute))
748 .unwrap_or_default();
749 let requested_permissions = root_element
750 .child_elements_named_any(&["uses-permission", "uses-permission-sdk-23"])
751 .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
752 .collect::<Vec<_>>();
753 let uses_libraries = root_element
754 .child_elements_named("uses-library")
755 .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
756 .collect::<Vec<_>>();
757
758 let mut package = build_manifest_package_data(
759 DatasourceId::AndroidAab,
760 &manifest_attributes,
761 &uses_sdk_attributes,
762 &application_attributes,
763 requested_permissions,
764 uses_libraries,
765 );
766
767 if let Some(extra_data) = package.extra_data.as_mut() {
768 extra_data.insert("manifest_encoding".to_string(), "proto".into());
769 } else {
770 package.extra_data = Some(HashMap::from([(
771 "manifest_encoding".to_string(),
772 serde_json::Value::String("proto".to_string()),
773 )]));
774 }
775
776 Ok(package)
777}
778
779fn proto_attributes_to_map(attributes: &[ProtoXmlAttribute]) -> HashMap<String, String> {
780 attributes
781 .iter()
782 .filter_map(|attribute| {
783 let key = proto_attribute_key(attribute)?;
784 let value = proto_attribute_value(attribute)?;
785 Some((key, truncate_field(value)))
786 })
787 .collect()
788}
789
790fn proto_attribute_key(attribute: &ProtoXmlAttribute) -> Option<String> {
791 if attribute.name.is_empty() {
792 return None;
793 }
794
795 if attribute.namespace_uri == ANDROID_XML_NAMESPACE {
796 return Some(format!("android:{}", attribute.name));
797 }
798
799 Some(attribute.name.clone())
800}
801
802fn proto_attribute_value(attribute: &ProtoXmlAttribute) -> Option<String> {
803 if !attribute.value.is_empty() {
804 return Some(attribute.value.clone());
805 }
806
807 attribute
808 .compiled_item
809 .as_ref()
810 .and_then(proto_item_to_string)
811}
812
813fn proto_item_to_string(item: &ProtoItem) -> Option<String> {
814 match &item.value {
815 Some(proto_item::Value::Str(value)) => Some(value.value.clone()),
816 Some(proto_item::Value::RawStr(value)) => Some(value.value.clone()),
817 Some(proto_item::Value::Prim(value)) => proto_primitive_to_string(value),
818 _ => None,
819 }
820}
821
822fn proto_primitive_to_string(primitive: &ProtoPrimitive) -> Option<String> {
823 match &primitive.value {
824 Some(proto_primitive::Value::IntDecimal(value)) => Some(value.to_string()),
825 Some(proto_primitive::Value::IntHexadecimal(value)) => Some(format!("0x{value:x}")),
826 Some(proto_primitive::Value::Boolean(value)) => Some(value.to_string()),
827 Some(proto_primitive::Value::Float(value)) => Some(value.to_string()),
828 Some(proto_primitive::Value::Dimension(value)) => Some(value.to_string()),
829 Some(proto_primitive::Value::Fraction(value)) => Some(value.to_string()),
830 _ => None,
831 }
832}
833
834#[derive(Debug, Clone, Default)]
835struct ProtoMap {
836 fields: HashMap<String, Vec<ProtoValue>>,
837}
838
839#[derive(Debug, Clone)]
840enum ProtoValue {
841 Scalar(String),
842 Map(ProtoMap),
843}
844
845impl ProtoMap {
846 fn get_first_string(&self, key: &str) -> Option<String> {
847 self.fields.get(key).and_then(|values| {
848 values.iter().find_map(|value| match value {
849 ProtoValue::Scalar(value) => Some(value.clone()),
850 ProtoValue::Map(_) => None,
851 })
852 })
853 }
854
855 fn get_all_strings(&self, key: &str) -> Vec<String> {
856 self.fields
857 .get(key)
858 .into_iter()
859 .flatten()
860 .filter_map(|value| match value {
861 ProtoValue::Scalar(value) => Some(value.clone()),
862 ProtoValue::Map(_) => None,
863 })
864 .collect()
865 }
866
867 fn get_first_map(&self, key: &str) -> Option<ProtoMap> {
868 self.fields.get(key).and_then(|values| {
869 values.iter().find_map(|value| match value {
870 ProtoValue::Map(value) => Some(value.clone()),
871 ProtoValue::Scalar(_) => None,
872 })
873 })
874 }
875
876 fn get_all_maps(&self, key: &str) -> Vec<ProtoMap> {
877 self.fields
878 .get(key)
879 .into_iter()
880 .flatten()
881 .filter_map(|value| match value {
882 ProtoValue::Map(value) => Some(value.clone()),
883 ProtoValue::Scalar(_) => None,
884 })
885 .collect()
886 }
887}
888
889fn parse_textproto_map(content: &str) -> Result<ProtoMap, String> {
890 let mut parser = TextProtoParser::new(content)?;
891 parser.parse_map(false)
892}
893
894struct TextProtoParser {
895 tokens: Vec<TextProtoToken>,
896 position: usize,
897}
898
899#[derive(Debug, Clone)]
900enum TextProtoToken {
901 Identifier(String),
902 String(String),
903 Colon,
904 LBrace,
905 RBrace,
906}
907
908impl TextProtoParser {
909 fn new(content: &str) -> Result<Self, String> {
910 Ok(Self {
911 tokens: tokenize_textproto(content)?,
912 position: 0,
913 })
914 }
915
916 fn parse_map(&mut self, stop_on_rbrace: bool) -> Result<ProtoMap, String> {
917 let mut map = ProtoMap::default();
918
919 while let Some(token) = self.peek() {
920 match token {
921 TextProtoToken::RBrace if stop_on_rbrace => {
922 self.position += 1;
923 break;
924 }
925 TextProtoToken::RBrace => return Err("Unexpected closing brace".to_string()),
926 TextProtoToken::Identifier(_) => {
927 let key = self.expect_identifier()?;
928 match self.peek() {
929 Some(TextProtoToken::Colon) => {
930 self.position += 1;
931 let value = self.expect_scalar()?;
932 map.fields
933 .entry(key)
934 .or_default()
935 .push(ProtoValue::Scalar(truncate_field(value)));
936 }
937 Some(TextProtoToken::LBrace) => {
938 self.position += 1;
939 let value = self.parse_map(true)?;
940 map.fields
941 .entry(key)
942 .or_default()
943 .push(ProtoValue::Map(value));
944 }
945 Some(other) => {
946 return Err(format!("Unexpected token after key: {:?}", other));
947 }
948 None => return Err("Unexpected end of input after key".to_string()),
949 }
950 }
951 other => return Err(format!("Unexpected token in textproto: {:?}", other)),
952 }
953 }
954
955 Ok(map)
956 }
957
958 fn expect_identifier(&mut self) -> Result<String, String> {
959 match self.next() {
960 Some(TextProtoToken::Identifier(value)) => Ok(value),
961 other => Err(format!("Expected identifier, found {:?}", other)),
962 }
963 }
964
965 fn expect_scalar(&mut self) -> Result<String, String> {
966 match self.next() {
967 Some(TextProtoToken::String(mut value)) => {
968 while matches!(self.peek(), Some(TextProtoToken::String(_))) {
969 if let Some(TextProtoToken::String(next)) = self.next() {
970 value.push_str(&next);
971 }
972 }
973 Ok(value)
974 }
975 Some(TextProtoToken::Identifier(value)) => Ok(value),
976 other => Err(format!("Expected scalar value, found {:?}", other)),
977 }
978 }
979
980 fn peek(&self) -> Option<&TextProtoToken> {
981 self.tokens.get(self.position)
982 }
983
984 fn next(&mut self) -> Option<TextProtoToken> {
985 let token = self.tokens.get(self.position).cloned();
986 if token.is_some() {
987 self.position += 1;
988 }
989 token
990 }
991}
992
993fn tokenize_textproto(content: &str) -> Result<Vec<TextProtoToken>, String> {
994 let mut tokens = Vec::new();
995 let chars = content.chars().collect::<Vec<_>>();
996 let mut index = 0usize;
997
998 while index < chars.len() {
999 match chars[index] {
1000 '{' => {
1001 tokens.push(TextProtoToken::LBrace);
1002 index += 1;
1003 }
1004 '}' => {
1005 tokens.push(TextProtoToken::RBrace);
1006 index += 1;
1007 }
1008 ':' => {
1009 tokens.push(TextProtoToken::Colon);
1010 index += 1;
1011 }
1012 '"' => {
1013 index += 1;
1014 let mut value = String::new();
1015 while index < chars.len() {
1016 match chars[index] {
1017 '\\' if index + 1 < chars.len() => {
1018 index += 1;
1019 value.push(chars[index]);
1020 index += 1;
1021 }
1022 '"' => {
1023 index += 1;
1024 break;
1025 }
1026 character => {
1027 value.push(character);
1028 index += 1;
1029 }
1030 }
1031 }
1032 tokens.push(TextProtoToken::String(value));
1033 }
1034 '#' => {
1035 while index < chars.len() && chars[index] != '\n' {
1036 index += 1;
1037 }
1038 }
1039 '/' if index + 1 < chars.len() && chars[index + 1] == '/' => {
1040 index += 2;
1041 while index < chars.len() && chars[index] != '\n' {
1042 index += 1;
1043 }
1044 }
1045 character if character.is_ascii_whitespace() => index += 1,
1046 _ => {
1047 let start = index;
1048 while index < chars.len() {
1049 let character = chars[index];
1050 let starts_comment =
1051 character == '/' && index + 1 < chars.len() && chars[index + 1] == '/';
1052
1053 if character.is_ascii_whitespace()
1054 || matches!(character, '{' | '}' | ':' | '#')
1055 || starts_comment
1056 {
1057 break;
1058 }
1059
1060 index += 1;
1061 }
1062
1063 let token = chars[start..index].iter().collect::<String>();
1064 if token.is_empty() {
1065 return Err("Encountered empty textproto token".to_string());
1066 }
1067 tokens.push(TextProtoToken::Identifier(token));
1068 }
1069 }
1070 }
1071
1072 Ok(tokens)
1073}
1074
1075#[derive(Clone, PartialEq, Message)]
1076pub(crate) struct ProtoSourcePosition {
1077 #[prost(uint32, tag = "1")]
1078 pub line_number: u32,
1079 #[prost(uint32, tag = "2")]
1080 pub column_number: u32,
1081}
1082
1083#[derive(Clone, PartialEq, Message)]
1084pub(crate) struct ProtoXmlNode {
1085 #[prost(oneof = "proto_xml_node::Node", tags = "1, 2")]
1086 pub node: Option<proto_xml_node::Node>,
1087 #[prost(message, optional, tag = "3")]
1088 pub source: Option<ProtoSourcePosition>,
1089}
1090
1091impl ProtoXmlNode {
1092 fn element(&self) -> Option<&ProtoXmlElement> {
1093 match &self.node {
1094 Some(proto_xml_node::Node::Element(element)) => Some(element),
1095 _ => None,
1096 }
1097 }
1098}
1099
1100pub(crate) mod proto_xml_node {
1101 use super::ProtoXmlElement;
1102 use prost::Oneof;
1103
1104 #[derive(Clone, PartialEq, Oneof)]
1105 pub enum Node {
1106 #[prost(message, tag = "1")]
1107 Element(ProtoXmlElement),
1108 #[prost(string, tag = "2")]
1109 Text(String),
1110 }
1111}
1112
1113#[derive(Clone, PartialEq, Message)]
1114pub(crate) struct ProtoXmlElement {
1115 #[prost(message, repeated, tag = "1")]
1116 pub namespace_declaration: Vec<ProtoXmlNamespace>,
1117 #[prost(string, tag = "2")]
1118 pub namespace_uri: String,
1119 #[prost(string, tag = "3")]
1120 pub name: String,
1121 #[prost(message, repeated, tag = "4")]
1122 pub attribute: Vec<ProtoXmlAttribute>,
1123 #[prost(message, repeated, tag = "5")]
1124 pub child: Vec<ProtoXmlNode>,
1125}
1126
1127impl ProtoXmlElement {
1128 fn child_elements_named<'a>(
1129 &'a self,
1130 name: &'a str,
1131 ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1132 self.child
1133 .iter()
1134 .filter_map(ProtoXmlNode::element)
1135 .filter(move |element| element.name == name)
1136 }
1137
1138 fn child_elements_named_any<'a>(
1139 &'a self,
1140 names: &'a [&'a str],
1141 ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1142 self.child
1143 .iter()
1144 .filter_map(ProtoXmlNode::element)
1145 .filter(move |element| names.contains(&element.name.as_str()))
1146 }
1147}
1148
1149#[derive(Clone, PartialEq, Message)]
1150pub(crate) struct ProtoXmlNamespace {
1151 #[prost(string, tag = "1")]
1152 pub prefix: String,
1153 #[prost(string, tag = "2")]
1154 pub uri: String,
1155 #[prost(message, optional, tag = "3")]
1156 pub source: Option<ProtoSourcePosition>,
1157}
1158
1159#[derive(Clone, PartialEq, Message)]
1160pub(crate) struct ProtoXmlAttribute {
1161 #[prost(string, tag = "1")]
1162 pub namespace_uri: String,
1163 #[prost(string, tag = "2")]
1164 pub name: String,
1165 #[prost(string, tag = "3")]
1166 pub value: String,
1167 #[prost(message, optional, tag = "4")]
1168 pub source: Option<ProtoSourcePosition>,
1169 #[prost(uint32, tag = "5")]
1170 pub resource_id: u32,
1171 #[prost(message, optional, tag = "6")]
1172 pub compiled_item: Option<ProtoItem>,
1173}
1174
1175#[derive(Clone, PartialEq, Message)]
1176pub(crate) struct ProtoItem {
1177 #[prost(oneof = "proto_item::Value", tags = "2, 3, 7")]
1178 pub value: Option<proto_item::Value>,
1179 #[prost(uint32, tag = "8")]
1180 pub flag_status: u32,
1181 #[prost(bool, tag = "9")]
1182 pub flag_negated: bool,
1183 #[prost(string, tag = "10")]
1184 pub flag_name: String,
1185}
1186
1187pub(crate) mod proto_item {
1188 use super::{ProtoPrimitive, ProtoRawStringValue, ProtoStringValue};
1189 use prost::Oneof;
1190
1191 #[derive(Clone, PartialEq, Oneof)]
1192 pub enum Value {
1193 #[prost(message, tag = "2")]
1194 Str(ProtoStringValue),
1195 #[prost(message, tag = "3")]
1196 RawStr(ProtoRawStringValue),
1197 #[prost(message, tag = "7")]
1198 Prim(ProtoPrimitive),
1199 }
1200}
1201
1202#[derive(Clone, PartialEq, Message)]
1203pub(crate) struct ProtoStringValue {
1204 #[prost(string, tag = "1")]
1205 pub value: String,
1206}
1207
1208#[derive(Clone, PartialEq, Message)]
1209pub(crate) struct ProtoRawStringValue {
1210 #[prost(string, tag = "1")]
1211 pub value: String,
1212}
1213
1214#[derive(Clone, PartialEq, Message)]
1215pub(crate) struct ProtoPrimitive {
1216 #[prost(oneof = "proto_primitive::Value", tags = "3, 6, 7, 8, 13, 14")]
1217 pub value: Option<proto_primitive::Value>,
1218}
1219
1220pub(crate) mod proto_primitive {
1221 use prost::Oneof;
1222
1223 #[derive(Clone, PartialEq, Oneof)]
1224 pub enum Value {
1225 #[prost(float, tag = "3")]
1226 Float(f32),
1227 #[prost(int32, tag = "6")]
1228 IntDecimal(i32),
1229 #[prost(uint32, tag = "7")]
1230 IntHexadecimal(u32),
1231 #[prost(bool, tag = "8")]
1232 Boolean(bool),
1233 #[prost(uint32, tag = "13")]
1234 Dimension(u32),
1235 #[prost(uint32, tag = "14")]
1236 Fraction(u32),
1237 }
1238}
1239
1240crate::register_parser!(
1241 "Android Soong METADATA textproto",
1242 &["**/METADATA"],
1243 "android",
1244 "",
1245 Some(
1246 "https://android.googlesource.com/platform/build/soong/+/refs/heads/main/licenses/metadata/metadata_file.proto"
1247 ),
1248);
1249
1250crate::register_parser!(
1251 "AndroidManifest.xml metadata (text XML or binary AXML)",
1252 &["**/AndroidManifest.xml"],
1253 "android",
1254 "XML",
1255 Some("https://developer.android.com/guide/topics/manifest/manifest-intro"),
1256);
1257
1258crate::register_parser!(
1259 "Android APK archive manifest metadata",
1260 &["**/*.apk"],
1261 "android",
1262 "",
1263 Some("https://developer.android.com/build/build-for-release"),
1264);
1265
1266crate::register_parser!(
1267 "Android App Bundle (.aab) proto manifest metadata",
1268 &["**/*.aab"],
1269 "android",
1270 "",
1271 Some("https://developer.android.com/guide/app-bundle"),
1272);