1use std::collections::HashMap;
5use std::fs::{self, File};
6use std::io::{Cursor, Read};
7use std::path::Path;
8
9use prost::Message;
10use quick_xml::Reader;
11use quick_xml::events::Event;
12use rusty_axml::{find_nodes_by_type, get_requested_permissions, parse_from_reader};
13use zip::ZipArchive;
14
15use crate::models::{DatasourceId, PackageData, PackageType};
16use crate::parser_warn as warn;
17use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
18use crate::utils::magic;
19
20use super::PackageParser;
21
22const PACKAGE_TYPE: PackageType = PackageType::Android;
23const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024;
24const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
25const MAX_TOTAL_UNCOMPRESSED_SIZE: u64 = 1024 * 1024 * 1024;
26const MAX_COMPRESSION_RATIO: f64 = 100.0;
27const ANDROID_XML_NAMESPACE: &str = "http://schemas.android.com/apk/res/android";
28
29fn default_package_data(datasource_id: DatasourceId) -> PackageData {
30 PackageData {
31 package_type: Some(PACKAGE_TYPE),
32 datasource_id: Some(datasource_id),
33 ..Default::default()
34 }
35}
36
37pub struct AndroidSoongMetadataParser;
38pub struct AndroidManifestParser;
39pub struct AndroidApkParser;
40pub struct AndroidAabParser;
41
42impl PackageParser for AndroidSoongMetadataParser {
43 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
44
45 fn is_match(path: &Path) -> bool {
46 path.file_name().and_then(|name| name.to_str()) == Some("METADATA")
47 && !path
48 .parent()
49 .and_then(|parent| parent.file_name())
50 .and_then(|name| name.to_str())
51 .is_some_and(|name| name.ends_with(".dist-info"))
52 }
53
54 fn extract_packages(path: &Path) -> Vec<PackageData> {
55 let content = match crate::parsers::utils::read_file_to_string(path, None) {
56 Ok(content) => content,
57 Err(error) => {
58 warn!(
59 "Failed to read Android Soong METADATA {:?}: {}",
60 path, error
61 );
62 return vec![default_package_data(DatasourceId::AndroidSoongMetadata)];
63 }
64 };
65
66 vec![parse_soong_metadata(&content)]
67 }
68}
69
70impl PackageParser for AndroidManifestParser {
71 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
72
73 fn is_match(path: &Path) -> bool {
74 path.file_name().and_then(|name| name.to_str()) == Some("AndroidManifest.xml")
75 }
76
77 fn extract_packages(path: &Path) -> Vec<PackageData> {
78 let bytes = match read_file_bytes(path, None) {
79 Ok(bytes) => bytes,
80 Err(error) => {
81 warn!("Failed to read AndroidManifest.xml {:?}: {}", path, error);
82 return vec![default_package_data(DatasourceId::AndroidManifestXml)];
83 }
84 };
85
86 vec![parse_manifest_bytes(
87 &bytes,
88 DatasourceId::AndroidManifestXml,
89 "AndroidManifest.xml",
90 )]
91 }
92}
93
94impl PackageParser for AndroidApkParser {
95 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
96
97 fn is_match(path: &Path) -> bool {
98 path.extension().and_then(|ext| ext.to_str()) == Some("apk") && magic::is_zip(path)
99 }
100
101 fn extract_packages(path: &Path) -> Vec<PackageData> {
102 let package_data = match read_best_zip_entry(path, |entry_name| {
103 if entry_name == "AndroidManifest.xml" {
104 Some(0)
105 } else {
106 None
107 }
108 }) {
109 Ok(Some((_, bytes))) => parse_binary_manifest_bytes(&bytes, DatasourceId::AndroidApk)
110 .unwrap_or_else(|error| {
111 warn!("Failed to parse APK manifest {:?}: {}", path, error);
112 default_package_data(DatasourceId::AndroidApk)
113 }),
114 Ok(None) => {
115 warn!("No AndroidManifest.xml found in APK {:?}", path);
116 default_package_data(DatasourceId::AndroidApk)
117 }
118 Err(error) => {
119 warn!("Failed to read APK archive {:?}: {}", path, error);
120 default_package_data(DatasourceId::AndroidApk)
121 }
122 };
123
124 vec![package_data]
125 }
126}
127
128impl PackageParser for AndroidAabParser {
129 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
130
131 fn is_match(path: &Path) -> bool {
132 path.extension().and_then(|ext| ext.to_str()) == Some("aab") && magic::is_zip(path)
133 }
134
135 fn extract_packages(path: &Path) -> Vec<PackageData> {
136 let package_data = match read_best_zip_entry(path, |entry_name| {
137 if entry_name == "base/manifest/AndroidManifest.xml" {
138 Some(0)
139 } else if entry_name.ends_with("/manifest/AndroidManifest.xml") {
140 Some(1)
141 } else {
142 None
143 }
144 }) {
145 Ok(Some((entry_name, bytes))) => {
146 parse_proto_manifest_bytes(&bytes).unwrap_or_else(|error| {
147 warn!(
148 "Failed to parse AAB manifest {:?} ({}): {}",
149 path, entry_name, error
150 );
151 default_package_data(DatasourceId::AndroidAab)
152 })
153 }
154 Ok(None) => {
155 warn!("No proto AndroidManifest.xml found in AAB {:?}", path);
156 default_package_data(DatasourceId::AndroidAab)
157 }
158 Err(error) => {
159 warn!("Failed to read AAB archive {:?}: {}", path, error);
160 default_package_data(DatasourceId::AndroidAab)
161 }
162 };
163
164 vec![package_data]
165 }
166}
167
168fn read_file_bytes(path: &Path, max_size: Option<u64>) -> Result<Vec<u8>, String> {
169 let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
170 let metadata =
171 fs::metadata(path).map_err(|error| format!("Cannot stat file {:?}: {}", path, error))?;
172
173 if metadata.len() > limit {
174 return Err(format!(
175 "File {:?} is {} bytes, exceeding the {} byte limit",
176 path,
177 metadata.len(),
178 limit
179 ));
180 }
181
182 let mut file =
183 File::open(path).map_err(|error| format!("Failed to open {:?}: {}", path, error))?;
184 let mut bytes = Vec::with_capacity(metadata.len() as usize);
185 file.read_to_end(&mut bytes)
186 .map_err(|error| format!("Failed to read {:?}: {}", path, error))?;
187 Ok(bytes)
188}
189
190fn parse_soong_metadata(content: &str) -> PackageData {
191 let parsed = parse_textproto_map(content).unwrap_or_else(|error| {
192 warn!("Failed to parse Android Soong METADATA: {}", error);
193 ProtoMap::default()
194 });
195
196 let mut package = default_package_data(DatasourceId::AndroidSoongMetadata);
197 package.name = parsed.get_first_string("name").map(truncate_field);
198 package.description = parsed.get_first_string("description").map(truncate_field);
199
200 if let Some(third_party) = parsed.get_first_map("third_party") {
201 package.version = third_party.get_first_string("version").map(truncate_field);
202
203 let url_entries = third_party
204 .get_all_maps("url")
205 .into_iter()
206 .map(|entry| {
207 let type_ = entry.get_first_string("type").map(truncate_field);
208 let value = entry.get_first_string("value").map(truncate_field);
209 (type_, value)
210 })
211 .collect::<Vec<_>>();
212
213 let homepage_url = third_party.get_first_string("homepage").or_else(|| {
214 url_entries
215 .iter()
216 .find(|(type_, _)| {
217 type_
218 .as_deref()
219 .is_some_and(|type_| type_.eq_ignore_ascii_case("homepage"))
220 })
221 .and_then(|(_, value)| value.clone())
222 });
223 package.homepage_url = homepage_url.map(truncate_field);
224
225 let license_types = third_party
226 .get_all_strings("license_type")
227 .into_iter()
228 .map(truncate_field)
229 .collect::<Vec<_>>();
230 if !license_types.is_empty() {
231 package.extracted_license_statement = Some(license_types.join(", "));
232 }
233
234 let identifiers = third_party
235 .get_all_maps("identifier")
236 .into_iter()
237 .map(|identifier| {
238 let type_ = identifier.get_first_string("type").map(truncate_field);
239 let value = identifier.get_first_string("value").map(truncate_field);
240 let mut object = serde_json::Map::new();
241 if let Some(type_) = type_ {
242 object.insert("type".to_string(), type_.into());
243 }
244 if let Some(value) = &value {
245 object.insert("value".to_string(), value.clone().into());
246 }
247
248 if package.vcs_url.is_none()
249 && let (Some(type_), Some(value)) = (
250 identifier.get_first_string("type"),
251 identifier.get_first_string("value"),
252 )
253 {
254 let lower_type = type_.to_ascii_lowercase();
255 if lower_type.contains("git") {
256 package.vcs_url = Some(truncate_field(value));
257 } else if lower_type.contains("archive")
258 || lower_type.contains("tar")
259 || lower_type.contains("zip")
260 {
261 package.download_url = Some(truncate_field(value));
262 }
263 }
264
265 serde_json::Value::Object(object)
266 })
267 .collect::<Vec<_>>();
268
269 for (type_, value) in &url_entries {
270 let Some(value) = value else {
271 continue;
272 };
273
274 match type_.as_deref().map(str::to_ascii_lowercase).as_deref() {
275 Some("git") if package.vcs_url.is_none() => {
276 package.vcs_url = Some(value.clone());
277 }
278 Some("archive") if package.download_url.is_none() => {
279 package.download_url = Some(value.clone());
280 }
281 Some("homepage") if package.homepage_url.is_none() => {
282 package.homepage_url = Some(value.clone());
283 }
284 _ => {}
285 }
286 }
287
288 let mut extra_data = HashMap::new();
289 if !identifiers.is_empty() {
290 extra_data.insert("identifiers".to_string(), identifiers.into());
291 }
292 if !url_entries.is_empty() {
293 extra_data.insert(
294 "urls".to_string(),
295 url_entries
296 .iter()
297 .map(|(type_, value)| {
298 let mut object = serde_json::Map::new();
299 if let Some(type_) = type_ {
300 object.insert("type".to_string(), type_.clone().into());
301 }
302 if let Some(value) = value {
303 object.insert("value".to_string(), value.clone().into());
304 }
305 serde_json::Value::Object(object)
306 })
307 .collect::<Vec<_>>()
308 .into(),
309 );
310 }
311
312 if let Some(last_upgrade_date) = third_party.get_first_map("last_upgrade_date") {
313 let year = last_upgrade_date.get_first_string("year");
314 let month = last_upgrade_date.get_first_string("month");
315 let day = last_upgrade_date.get_first_string("day");
316 if let (Some(year), Some(month), Some(day)) = (year, month, day) {
317 let formatted = format!(
318 "{:04}-{:02}-{:02}",
319 year.parse::<u32>().unwrap_or_default(),
320 month.parse::<u32>().unwrap_or_default(),
321 day.parse::<u32>().unwrap_or_default()
322 );
323 extra_data.insert(
324 "last_upgrade_date".to_string(),
325 truncate_field(formatted).into(),
326 );
327 }
328 }
329
330 if let Some(upstream_url) = third_party.get_first_string("url") {
331 extra_data.insert(
332 "upstream_url".to_string(),
333 truncate_field(upstream_url).into(),
334 );
335 }
336
337 if !extra_data.is_empty() {
338 package.extra_data = Some(extra_data);
339 }
340 }
341
342 package
343}
344
345fn parse_manifest_bytes(bytes: &[u8], datasource_id: DatasourceId, context: &str) -> PackageData {
346 if looks_like_text_xml(bytes) {
347 match parse_text_manifest_bytes(bytes, datasource_id) {
348 Ok(package) => return package,
349 Err(error) => warn!("Failed to parse {} as text XML: {}", context, error),
350 }
351 }
352
353 parse_binary_manifest_bytes(bytes, datasource_id).unwrap_or_else(|error| {
354 warn!(
355 "Failed to parse {} as binary Android XML: {}",
356 context, error
357 );
358 default_package_data(datasource_id)
359 })
360}
361
362fn looks_like_text_xml(bytes: &[u8]) -> bool {
363 bytes
364 .iter()
365 .find(|byte| !byte.is_ascii_whitespace())
366 .is_some_and(|byte| *byte == b'<')
367}
368
369fn parse_text_manifest_bytes(
370 bytes: &[u8],
371 datasource_id: DatasourceId,
372) -> Result<PackageData, String> {
373 let content = String::from_utf8(bytes.to_vec())
374 .map_err(|error| format!("Invalid UTF-8 in AndroidManifest.xml: {}", error))?;
375
376 let mut reader = Reader::from_str(&content);
377 reader.config_mut().trim_text(true);
378
379 let mut buf = Vec::new();
380 let mut manifest_attributes = HashMap::new();
381 let mut uses_sdk_attributes = HashMap::new();
382 let mut application_attributes = HashMap::new();
383 let mut requested_permissions = Vec::new();
384 let mut uses_libraries = Vec::new();
385 let mut iteration_count = 0usize;
386
387 loop {
388 iteration_count += 1;
389 if iteration_count > MAX_ITERATION_COUNT {
390 return Err(format!(
391 "Exceeded MAX_ITERATION_COUNT ({}) while parsing AndroidManifest.xml",
392 MAX_ITERATION_COUNT
393 ));
394 }
395
396 match reader.read_event_into(&mut buf) {
397 Ok(Event::Start(event)) | Ok(Event::Empty(event)) => {
398 let name = String::from_utf8_lossy(event.name().as_ref()).into_owned();
399 let attributes = xml_attributes_to_map(&reader, &event)?;
400 match name.as_str() {
401 "manifest" if manifest_attributes.is_empty() => {
402 manifest_attributes = attributes
403 }
404 "uses-sdk" => uses_sdk_attributes = attributes,
405 "application" if application_attributes.is_empty() => {
406 application_attributes = attributes;
407 }
408 "uses-permission" | "uses-permission-sdk-23" => {
409 if let Some(permission) = attributes.get("android:name") {
410 requested_permissions.push(permission.clone());
411 }
412 }
413 "uses-library" => {
414 if let Some(library_name) = attributes.get("android:name") {
415 uses_libraries.push(library_name.clone());
416 }
417 }
418 _ => {}
419 }
420 }
421 Ok(Event::Eof) => break,
422 Err(error) => {
423 return Err(format!(
424 "XML parse error at position {}: {}",
425 reader.buffer_position(),
426 error
427 ));
428 }
429 _ => {}
430 }
431
432 buf.clear();
433 }
434
435 Ok(build_manifest_package_data(
436 datasource_id,
437 &manifest_attributes,
438 &uses_sdk_attributes,
439 &application_attributes,
440 requested_permissions,
441 uses_libraries,
442 ))
443}
444
445fn xml_attributes_to_map(
446 reader: &Reader<&[u8]>,
447 event: &quick_xml::events::BytesStart<'_>,
448) -> Result<HashMap<String, String>, String> {
449 let mut attributes = HashMap::new();
450
451 for attribute in event.attributes().flatten().take(MAX_ITERATION_COUNT) {
452 let key = String::from_utf8_lossy(attribute.key.as_ref()).into_owned();
453 let value = attribute
454 .decode_and_unescape_value(reader.decoder())
455 .map_err(|error| format!("Failed to decode XML attribute {}: {}", key, error))?
456 .into_owned();
457 attributes.insert(key, truncate_field(value));
458 }
459
460 Ok(attributes)
461}
462
463fn parse_binary_manifest_bytes(
464 bytes: &[u8],
465 datasource_id: DatasourceId,
466) -> Result<PackageData, String> {
467 let axml = std::panic::catch_unwind(|| parse_from_reader(Cursor::new(bytes.to_vec())))
468 .map_err(|_| "rusty-axml panicked while parsing binary Android XML".to_string())?
469 .map_err(|error| format!("rusty-axml parse failure: {}", error))?;
470
471 let manifest_attributes =
472 normalize_binary_attributes(axml.root().borrow().attributes().clone());
473 let uses_sdk_attributes = find_nodes_by_type(&axml, "uses-sdk")
474 .into_iter()
475 .next()
476 .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
477 .unwrap_or_default();
478 let application_attributes = find_nodes_by_type(&axml, "application")
479 .into_iter()
480 .next()
481 .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
482 .unwrap_or_default();
483
484 let requested_permissions = get_requested_permissions(&axml)
485 .into_iter()
486 .map(truncate_field)
487 .collect::<Vec<_>>();
488 let uses_libraries = find_nodes_by_type(&axml, "uses-library")
489 .into_iter()
490 .filter_map(|node| node.borrow().get_attr("android:name").map(str::to_string))
491 .map(truncate_field)
492 .collect::<Vec<_>>();
493
494 Ok(build_manifest_package_data(
495 datasource_id,
496 &manifest_attributes,
497 &uses_sdk_attributes,
498 &application_attributes,
499 requested_permissions,
500 uses_libraries,
501 ))
502}
503
504fn build_manifest_package_data(
505 datasource_id: DatasourceId,
506 manifest_attributes: &HashMap<String, String>,
507 uses_sdk_attributes: &HashMap<String, String>,
508 application_attributes: &HashMap<String, String>,
509 requested_permissions: Vec<String>,
510 uses_libraries: Vec<String>,
511) -> PackageData {
512 let mut package = default_package_data(datasource_id);
513 package.name = manifest_attributes.get("package").cloned();
514 package.version = manifest_attributes
515 .get("android:versionName")
516 .cloned()
517 .or_else(|| manifest_attributes.get("android:versionCode").cloned());
518
519 package.description = application_attributes
520 .get("android:label")
521 .filter(|label| {
522 !label.starts_with('@') && !label.chars().all(|character| character.is_ascii_digit())
523 })
524 .cloned();
525
526 let mut extra_data = HashMap::new();
527 insert_extra(
528 &mut extra_data,
529 "version_code",
530 manifest_attributes.get("android:versionCode"),
531 );
532 insert_extra(
533 &mut extra_data,
534 "compile_sdk_version",
535 manifest_attributes.get("android:compileSdkVersion"),
536 );
537 insert_extra(
538 &mut extra_data,
539 "compile_sdk_version_codename",
540 manifest_attributes.get("android:compileSdkVersionCodename"),
541 );
542 insert_extra(
543 &mut extra_data,
544 "platform_build_version_code",
545 manifest_attributes.get("platformBuildVersionCode"),
546 );
547 insert_extra(
548 &mut extra_data,
549 "platform_build_version_name",
550 manifest_attributes.get("platformBuildVersionName"),
551 );
552 insert_extra(
553 &mut extra_data,
554 "min_sdk_version",
555 uses_sdk_attributes.get("android:minSdkVersion"),
556 );
557 insert_extra(
558 &mut extra_data,
559 "target_sdk_version",
560 uses_sdk_attributes.get("android:targetSdkVersion"),
561 );
562 insert_extra(
563 &mut extra_data,
564 "max_sdk_version",
565 uses_sdk_attributes.get("android:maxSdkVersion"),
566 );
567
568 if !requested_permissions.is_empty() {
569 extra_data.insert(
570 "requested_permissions".to_string(),
571 requested_permissions
572 .into_iter()
573 .map(serde_json::Value::from)
574 .collect::<Vec<_>>()
575 .into(),
576 );
577 }
578 if !uses_libraries.is_empty() {
579 extra_data.insert(
580 "uses_libraries".to_string(),
581 uses_libraries
582 .into_iter()
583 .map(serde_json::Value::from)
584 .collect::<Vec<_>>()
585 .into(),
586 );
587 }
588
589 if !extra_data.is_empty() {
590 package.extra_data = Some(extra_data);
591 }
592
593 package
594}
595
596fn normalize_binary_attributes(attributes: HashMap<String, String>) -> HashMap<String, String> {
597 attributes
598 .into_iter()
599 .map(|(key, value)| (key, normalize_binary_attribute_value(&value)))
600 .collect()
601}
602
603fn normalize_binary_attribute_value(value: &str) -> String {
604 let hex_value = value
605 .strip_prefix("(type 0x10) 0x")
606 .or_else(|| value.strip_prefix("0x"));
607
608 if let Some(hex_value) = hex_value
609 && let Ok(parsed) = u64::from_str_radix(hex_value, 16)
610 {
611 return parsed.to_string();
612 }
613
614 value.to_string()
615}
616
617fn insert_extra(
618 extra_data: &mut HashMap<String, serde_json::Value>,
619 key: &str,
620 value: Option<&String>,
621) {
622 if let Some(value) = value {
623 extra_data.insert(key.to_string(), truncate_field(value.clone()).into());
624 }
625}
626
627fn read_best_zip_entry<F>(
628 path: &Path,
629 mut rank_entry: F,
630) -> Result<Option<(String, Vec<u8>)>, String>
631where
632 F: FnMut(&str) -> Option<u8>,
633{
634 let metadata = fs::metadata(path)
635 .map_err(|error| format!("Failed to stat archive {:?}: {}", path, error))?;
636 if metadata.len() > MAX_ARCHIVE_SIZE {
637 return Err(format!(
638 "Archive {:?} is {} bytes, exceeding the {} byte limit",
639 path,
640 metadata.len(),
641 MAX_ARCHIVE_SIZE
642 ));
643 }
644
645 let file = File::open(path)
646 .map_err(|error| format!("Failed to open archive {:?}: {}", path, error))?;
647 let mut archive = ZipArchive::new(file)
648 .map_err(|error| format!("Failed to parse ZIP archive {:?}: {}", path, error))?;
649
650 let mut total_uncompressed = 0u64;
651 let mut best: Option<(u8, String, Vec<u8>)> = None;
652 let entry_count = archive.len().min(MAX_ITERATION_COUNT);
653
654 if archive.len() > MAX_ITERATION_COUNT {
655 warn!(
656 "Archive {:?} has more than MAX_ITERATION_COUNT ({}) entries; truncating scan",
657 path, MAX_ITERATION_COUNT
658 );
659 }
660
661 for index in 0..entry_count {
662 let mut entry = archive.by_index(index).map_err(|error| {
663 format!(
664 "Failed to read ZIP entry {} in {:?}: {}",
665 index, path, error
666 )
667 })?;
668
669 total_uncompressed = total_uncompressed.saturating_add(entry.size());
670 if total_uncompressed > MAX_TOTAL_UNCOMPRESSED_SIZE {
671 return Err(format!(
672 "Archive {:?} exceeds total uncompressed size limit of {} bytes",
673 path, MAX_TOTAL_UNCOMPRESSED_SIZE
674 ));
675 }
676
677 let entry_name = entry.name().replace('\\', "/");
678 if entry_name.starts_with('/') || entry_name.split('/').any(|segment| segment == "..") {
679 return Err(format!(
680 "Archive entry {} contains a disallowed path",
681 entry_name
682 ));
683 }
684 let Some(rank) = rank_entry(&entry_name) else {
685 continue;
686 };
687
688 if entry.size() > MAX_FILE_SIZE {
689 return Err(format!(
690 "Archive entry {} is {} bytes, exceeding the {} byte limit",
691 entry_name,
692 entry.size(),
693 MAX_FILE_SIZE
694 ));
695 }
696
697 let compressed_size = entry.compressed_size();
698 if compressed_size > 0 {
699 let ratio = entry.size() as f64 / compressed_size as f64;
700 if ratio > MAX_COMPRESSION_RATIO {
701 return Err(format!(
702 "Archive entry {} has suspicious compression ratio {:.2}:1",
703 entry_name, ratio
704 ));
705 }
706 }
707
708 let should_replace = match &best {
709 Some((best_rank, _, _)) => rank < *best_rank,
710 None => true,
711 };
712
713 if should_replace {
714 let mut bytes = Vec::with_capacity(entry.size() as usize);
715 entry.read_to_end(&mut bytes).map_err(|error| {
716 format!("Failed to read archive entry {}: {}", entry_name, error)
717 })?;
718 best = Some((rank, entry_name, bytes));
719 }
720 }
721
722 Ok(best.map(|(_, entry_name, bytes)| (entry_name, bytes)))
723}
724
725fn parse_proto_manifest_bytes(bytes: &[u8]) -> Result<PackageData, String> {
726 let node =
727 ProtoXmlNode::decode(bytes).map_err(|error| format!("prost decode failure: {}", error))?;
728 let root_element = node
729 .element()
730 .ok_or_else(|| "Proto manifest root is not an element".to_string())?;
731 if root_element.name != "manifest" {
732 return Err(format!(
733 "Unexpected proto XML root element: {}",
734 root_element.name
735 ));
736 }
737
738 let manifest_attributes = proto_attributes_to_map(&root_element.attribute);
739 let uses_sdk_attributes = root_element
740 .child_elements_named("uses-sdk")
741 .next()
742 .map(|element| proto_attributes_to_map(&element.attribute))
743 .unwrap_or_default();
744 let application_attributes = root_element
745 .child_elements_named("application")
746 .next()
747 .map(|element| proto_attributes_to_map(&element.attribute))
748 .unwrap_or_default();
749 let requested_permissions = root_element
750 .child_elements_named_any(&["uses-permission", "uses-permission-sdk-23"])
751 .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
752 .collect::<Vec<_>>();
753 let uses_libraries = root_element
754 .child_elements_named("uses-library")
755 .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
756 .collect::<Vec<_>>();
757
758 let mut package = build_manifest_package_data(
759 DatasourceId::AndroidAab,
760 &manifest_attributes,
761 &uses_sdk_attributes,
762 &application_attributes,
763 requested_permissions,
764 uses_libraries,
765 );
766
767 if let Some(extra_data) = package.extra_data.as_mut() {
768 extra_data.insert("manifest_encoding".to_string(), "proto".into());
769 } else {
770 package.extra_data = Some(HashMap::from([(
771 "manifest_encoding".to_string(),
772 serde_json::Value::String("proto".to_string()),
773 )]));
774 }
775
776 Ok(package)
777}
778
779fn proto_attributes_to_map(attributes: &[ProtoXmlAttribute]) -> HashMap<String, String> {
780 attributes
781 .iter()
782 .filter_map(|attribute| {
783 let key = proto_attribute_key(attribute)?;
784 let value = proto_attribute_value(attribute)?;
785 Some((key, truncate_field(value)))
786 })
787 .collect()
788}
789
790fn proto_attribute_key(attribute: &ProtoXmlAttribute) -> Option<String> {
791 if attribute.name.is_empty() {
792 return None;
793 }
794
795 if attribute.namespace_uri == ANDROID_XML_NAMESPACE {
796 return Some(format!("android:{}", attribute.name));
797 }
798
799 Some(attribute.name.clone())
800}
801
802fn proto_attribute_value(attribute: &ProtoXmlAttribute) -> Option<String> {
803 if !attribute.value.is_empty() {
804 return Some(attribute.value.clone());
805 }
806
807 attribute
808 .compiled_item
809 .as_ref()
810 .and_then(proto_item_to_string)
811}
812
813fn proto_item_to_string(item: &ProtoItem) -> Option<String> {
814 match &item.value {
815 Some(proto_item::Value::Str(value)) => Some(value.value.clone()),
816 Some(proto_item::Value::RawStr(value)) => Some(value.value.clone()),
817 Some(proto_item::Value::Prim(value)) => proto_primitive_to_string(value),
818 _ => None,
819 }
820}
821
822fn proto_primitive_to_string(primitive: &ProtoPrimitive) -> Option<String> {
823 match &primitive.value {
824 Some(proto_primitive::Value::IntDecimal(value)) => Some(value.to_string()),
825 Some(proto_primitive::Value::IntHexadecimal(value)) => Some(format!("0x{value:x}")),
826 Some(proto_primitive::Value::Boolean(value)) => Some(value.to_string()),
827 Some(proto_primitive::Value::Float(value)) => Some(value.to_string()),
828 Some(proto_primitive::Value::Dimension(value)) => Some(value.to_string()),
829 Some(proto_primitive::Value::Fraction(value)) => Some(value.to_string()),
830 _ => None,
831 }
832}
833
834#[derive(Debug, Clone, Default)]
835struct ProtoMap {
836 fields: HashMap<String, Vec<ProtoValue>>,
837}
838
839#[derive(Debug, Clone)]
840enum ProtoValue {
841 Scalar(String),
842 Map(ProtoMap),
843}
844
845impl ProtoMap {
846 fn get_first_string(&self, key: &str) -> Option<String> {
847 self.fields.get(key).and_then(|values| {
848 values.iter().find_map(|value| match value {
849 ProtoValue::Scalar(value) => Some(value.clone()),
850 ProtoValue::Map(_) => None,
851 })
852 })
853 }
854
855 fn get_all_strings(&self, key: &str) -> Vec<String> {
856 self.fields
857 .get(key)
858 .into_iter()
859 .flatten()
860 .filter_map(|value| match value {
861 ProtoValue::Scalar(value) => Some(value.clone()),
862 ProtoValue::Map(_) => None,
863 })
864 .collect()
865 }
866
867 fn get_first_map(&self, key: &str) -> Option<ProtoMap> {
868 self.fields.get(key).and_then(|values| {
869 values.iter().find_map(|value| match value {
870 ProtoValue::Map(value) => Some(value.clone()),
871 ProtoValue::Scalar(_) => None,
872 })
873 })
874 }
875
876 fn get_all_maps(&self, key: &str) -> Vec<ProtoMap> {
877 self.fields
878 .get(key)
879 .into_iter()
880 .flatten()
881 .filter_map(|value| match value {
882 ProtoValue::Map(value) => Some(value.clone()),
883 ProtoValue::Scalar(_) => None,
884 })
885 .collect()
886 }
887}
888
889fn parse_textproto_map(content: &str) -> Result<ProtoMap, String> {
890 let mut parser = TextProtoParser::new(content)?;
891 parser.parse_map(false)
892}
893
894struct TextProtoParser {
895 tokens: Vec<TextProtoToken>,
896 position: usize,
897}
898
899#[derive(Debug, Clone)]
900enum TextProtoToken {
901 Identifier(String),
902 String(String),
903 Colon,
904 LBrace,
905 RBrace,
906}
907
908impl TextProtoParser {
909 fn new(content: &str) -> Result<Self, String> {
910 Ok(Self {
911 tokens: tokenize_textproto(content)?,
912 position: 0,
913 })
914 }
915
916 fn parse_map(&mut self, stop_on_rbrace: bool) -> Result<ProtoMap, String> {
917 let mut map = ProtoMap::default();
918
919 while let Some(token) = self.peek() {
920 match token {
921 TextProtoToken::RBrace if stop_on_rbrace => {
922 self.position += 1;
923 break;
924 }
925 TextProtoToken::RBrace => return Err("Unexpected closing brace".to_string()),
926 TextProtoToken::Identifier(_) => {
927 let key = self.expect_identifier()?;
928 match self.peek() {
929 Some(TextProtoToken::Colon) => {
930 self.position += 1;
931 let value = self.expect_scalar()?;
932 map.fields
933 .entry(key)
934 .or_default()
935 .push(ProtoValue::Scalar(truncate_field(value)));
936 }
937 Some(TextProtoToken::LBrace) => {
938 self.position += 1;
939 let value = self.parse_map(true)?;
940 map.fields
941 .entry(key)
942 .or_default()
943 .push(ProtoValue::Map(value));
944 }
945 Some(other) => {
946 return Err(format!("Unexpected token after key: {:?}", other));
947 }
948 None => return Err("Unexpected end of input after key".to_string()),
949 }
950 }
951 other => return Err(format!("Unexpected token in textproto: {:?}", other)),
952 }
953 }
954
955 Ok(map)
956 }
957
958 fn expect_identifier(&mut self) -> Result<String, String> {
959 match self.next() {
960 Some(TextProtoToken::Identifier(value)) => Ok(value),
961 other => Err(format!("Expected identifier, found {:?}", other)),
962 }
963 }
964
965 fn expect_scalar(&mut self) -> Result<String, String> {
966 match self.next() {
967 Some(TextProtoToken::Identifier(value)) | Some(TextProtoToken::String(value)) => {
968 Ok(value)
969 }
970 other => Err(format!("Expected scalar value, found {:?}", other)),
971 }
972 }
973
974 fn peek(&self) -> Option<&TextProtoToken> {
975 self.tokens.get(self.position)
976 }
977
978 fn next(&mut self) -> Option<TextProtoToken> {
979 let token = self.tokens.get(self.position).cloned();
980 if token.is_some() {
981 self.position += 1;
982 }
983 token
984 }
985}
986
987fn tokenize_textproto(content: &str) -> Result<Vec<TextProtoToken>, String> {
988 let mut tokens = Vec::new();
989 let chars = content.chars().collect::<Vec<_>>();
990 let mut index = 0usize;
991
992 while index < chars.len() {
993 match chars[index] {
994 '{' => {
995 tokens.push(TextProtoToken::LBrace);
996 index += 1;
997 }
998 '}' => {
999 tokens.push(TextProtoToken::RBrace);
1000 index += 1;
1001 }
1002 ':' => {
1003 tokens.push(TextProtoToken::Colon);
1004 index += 1;
1005 }
1006 '"' => {
1007 index += 1;
1008 let mut value = String::new();
1009 while index < chars.len() {
1010 match chars[index] {
1011 '\\' if index + 1 < chars.len() => {
1012 index += 1;
1013 value.push(chars[index]);
1014 index += 1;
1015 }
1016 '"' => {
1017 index += 1;
1018 break;
1019 }
1020 character => {
1021 value.push(character);
1022 index += 1;
1023 }
1024 }
1025 }
1026 tokens.push(TextProtoToken::String(value));
1027 }
1028 '#' => {
1029 while index < chars.len() && chars[index] != '\n' {
1030 index += 1;
1031 }
1032 }
1033 '/' if index + 1 < chars.len() && chars[index + 1] == '/' => {
1034 index += 2;
1035 while index < chars.len() && chars[index] != '\n' {
1036 index += 1;
1037 }
1038 }
1039 character if character.is_ascii_whitespace() => index += 1,
1040 _ => {
1041 let start = index;
1042 while index < chars.len() {
1043 let character = chars[index];
1044 let starts_comment =
1045 character == '/' && index + 1 < chars.len() && chars[index + 1] == '/';
1046
1047 if character.is_ascii_whitespace()
1048 || matches!(character, '{' | '}' | ':' | '#')
1049 || starts_comment
1050 {
1051 break;
1052 }
1053
1054 index += 1;
1055 }
1056
1057 let token = chars[start..index].iter().collect::<String>();
1058 if token.is_empty() {
1059 return Err("Encountered empty textproto token".to_string());
1060 }
1061 tokens.push(TextProtoToken::Identifier(token));
1062 }
1063 }
1064 }
1065
1066 Ok(tokens)
1067}
1068
1069#[derive(Clone, PartialEq, Message)]
1070pub(crate) struct ProtoSourcePosition {
1071 #[prost(uint32, tag = "1")]
1072 pub line_number: u32,
1073 #[prost(uint32, tag = "2")]
1074 pub column_number: u32,
1075}
1076
1077#[derive(Clone, PartialEq, Message)]
1078pub(crate) struct ProtoXmlNode {
1079 #[prost(oneof = "proto_xml_node::Node", tags = "1, 2")]
1080 pub node: Option<proto_xml_node::Node>,
1081 #[prost(message, optional, tag = "3")]
1082 pub source: Option<ProtoSourcePosition>,
1083}
1084
1085impl ProtoXmlNode {
1086 fn element(&self) -> Option<&ProtoXmlElement> {
1087 match &self.node {
1088 Some(proto_xml_node::Node::Element(element)) => Some(element),
1089 _ => None,
1090 }
1091 }
1092}
1093
1094pub(crate) mod proto_xml_node {
1095 use super::ProtoXmlElement;
1096 use prost::Oneof;
1097
1098 #[derive(Clone, PartialEq, Oneof)]
1099 pub enum Node {
1100 #[prost(message, tag = "1")]
1101 Element(ProtoXmlElement),
1102 #[prost(string, tag = "2")]
1103 Text(String),
1104 }
1105}
1106
1107#[derive(Clone, PartialEq, Message)]
1108pub(crate) struct ProtoXmlElement {
1109 #[prost(message, repeated, tag = "1")]
1110 pub namespace_declaration: Vec<ProtoXmlNamespace>,
1111 #[prost(string, tag = "2")]
1112 pub namespace_uri: String,
1113 #[prost(string, tag = "3")]
1114 pub name: String,
1115 #[prost(message, repeated, tag = "4")]
1116 pub attribute: Vec<ProtoXmlAttribute>,
1117 #[prost(message, repeated, tag = "5")]
1118 pub child: Vec<ProtoXmlNode>,
1119}
1120
1121impl ProtoXmlElement {
1122 fn child_elements_named<'a>(
1123 &'a self,
1124 name: &'a str,
1125 ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1126 self.child
1127 .iter()
1128 .filter_map(ProtoXmlNode::element)
1129 .filter(move |element| element.name == name)
1130 }
1131
1132 fn child_elements_named_any<'a>(
1133 &'a self,
1134 names: &'a [&'a str],
1135 ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1136 self.child
1137 .iter()
1138 .filter_map(ProtoXmlNode::element)
1139 .filter(move |element| names.contains(&element.name.as_str()))
1140 }
1141}
1142
1143#[derive(Clone, PartialEq, Message)]
1144pub(crate) struct ProtoXmlNamespace {
1145 #[prost(string, tag = "1")]
1146 pub prefix: String,
1147 #[prost(string, tag = "2")]
1148 pub uri: String,
1149 #[prost(message, optional, tag = "3")]
1150 pub source: Option<ProtoSourcePosition>,
1151}
1152
1153#[derive(Clone, PartialEq, Message)]
1154pub(crate) struct ProtoXmlAttribute {
1155 #[prost(string, tag = "1")]
1156 pub namespace_uri: String,
1157 #[prost(string, tag = "2")]
1158 pub name: String,
1159 #[prost(string, tag = "3")]
1160 pub value: String,
1161 #[prost(message, optional, tag = "4")]
1162 pub source: Option<ProtoSourcePosition>,
1163 #[prost(uint32, tag = "5")]
1164 pub resource_id: u32,
1165 #[prost(message, optional, tag = "6")]
1166 pub compiled_item: Option<ProtoItem>,
1167}
1168
1169#[derive(Clone, PartialEq, Message)]
1170pub(crate) struct ProtoItem {
1171 #[prost(oneof = "proto_item::Value", tags = "2, 3, 7")]
1172 pub value: Option<proto_item::Value>,
1173 #[prost(uint32, tag = "8")]
1174 pub flag_status: u32,
1175 #[prost(bool, tag = "9")]
1176 pub flag_negated: bool,
1177 #[prost(string, tag = "10")]
1178 pub flag_name: String,
1179}
1180
1181pub(crate) mod proto_item {
1182 use super::{ProtoPrimitive, ProtoRawStringValue, ProtoStringValue};
1183 use prost::Oneof;
1184
1185 #[derive(Clone, PartialEq, Oneof)]
1186 pub enum Value {
1187 #[prost(message, tag = "2")]
1188 Str(ProtoStringValue),
1189 #[prost(message, tag = "3")]
1190 RawStr(ProtoRawStringValue),
1191 #[prost(message, tag = "7")]
1192 Prim(ProtoPrimitive),
1193 }
1194}
1195
1196#[derive(Clone, PartialEq, Message)]
1197pub(crate) struct ProtoStringValue {
1198 #[prost(string, tag = "1")]
1199 pub value: String,
1200}
1201
1202#[derive(Clone, PartialEq, Message)]
1203pub(crate) struct ProtoRawStringValue {
1204 #[prost(string, tag = "1")]
1205 pub value: String,
1206}
1207
1208#[derive(Clone, PartialEq, Message)]
1209pub(crate) struct ProtoPrimitive {
1210 #[prost(oneof = "proto_primitive::Value", tags = "3, 6, 7, 8, 13, 14")]
1211 pub value: Option<proto_primitive::Value>,
1212}
1213
1214pub(crate) mod proto_primitive {
1215 use prost::Oneof;
1216
1217 #[derive(Clone, PartialEq, Oneof)]
1218 pub enum Value {
1219 #[prost(float, tag = "3")]
1220 Float(f32),
1221 #[prost(int32, tag = "6")]
1222 IntDecimal(i32),
1223 #[prost(uint32, tag = "7")]
1224 IntHexadecimal(u32),
1225 #[prost(bool, tag = "8")]
1226 Boolean(bool),
1227 #[prost(uint32, tag = "13")]
1228 Dimension(u32),
1229 #[prost(uint32, tag = "14")]
1230 Fraction(u32),
1231 }
1232}
1233
1234crate::register_parser!(
1235 "Android Soong METADATA textproto",
1236 &["**/METADATA"],
1237 "android",
1238 "",
1239 Some(
1240 "https://android.googlesource.com/platform/build/soong/+/refs/heads/main/licenses/metadata/metadata_file.proto"
1241 ),
1242);
1243
1244crate::register_parser!(
1245 "AndroidManifest.xml metadata (text XML or binary AXML)",
1246 &["**/AndroidManifest.xml"],
1247 "android",
1248 "XML",
1249 Some("https://developer.android.com/guide/topics/manifest/manifest-intro"),
1250);
1251
1252crate::register_parser!(
1253 "Android APK archive manifest metadata",
1254 &["**/*.apk"],
1255 "android",
1256 "",
1257 Some("https://developer.android.com/build/build-for-release"),
1258);
1259
1260crate::register_parser!(
1261 "Android App Bundle (.aab) proto manifest metadata",
1262 &["**/*.aab"],
1263 "android",
1264 "",
1265 Some("https://developer.android.com/guide/app-bundle"),
1266);