1use std::collections::HashMap;
5use std::fs::{self, File};
6use std::io::{Cursor, Read};
7use std::path::Path;
8
9use prost::Message;
10use quick_xml::Reader;
11use quick_xml::events::Event;
12use rusty_axml::{find_nodes_by_type, get_requested_permissions, parse_from_reader};
13use zip::ZipArchive;
14
15use crate::models::{DatasourceId, PackageData, PackageType};
16use crate::parser_warn as warn;
17use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
18use crate::utils::magic;
19
20use super::PackageParser;
21
22const PACKAGE_TYPE: PackageType = PackageType::Android;
23const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024;
24const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
25const MAX_TOTAL_UNCOMPRESSED_SIZE: u64 = 1024 * 1024 * 1024;
26const MAX_COMPRESSION_RATIO: f64 = 100.0;
27const ANDROID_XML_NAMESPACE: &str = "http://schemas.android.com/apk/res/android";
28
29fn default_package_data(datasource_id: DatasourceId) -> PackageData {
30 PackageData {
31 package_type: Some(PACKAGE_TYPE),
32 datasource_id: Some(datasource_id),
33 ..Default::default()
34 }
35}
36
37pub struct AndroidSoongMetadataParser;
38pub struct AndroidManifestParser;
39pub struct AndroidApkParser;
40pub struct AndroidAabParser;
41
42fn looks_like_android_soong_metadata_content(content: &str) -> bool {
43 let mut saw_named_field = false;
44
45 for line in content.lines().take(40) {
46 let trimmed = line.trim();
47
48 if trimmed.is_empty() || trimmed.starts_with('#') {
49 continue;
50 }
51
52 if trimmed.starts_with("//") {
53 return false;
54 }
55
56 if trimmed.starts_with("third_party {")
57 || trimmed.starts_with("third_party{")
58 || trimmed.starts_with("url {")
59 || trimmed.starts_with("url{")
60 || trimmed.starts_with("identifier {")
61 || trimmed.starts_with("identifier{")
62 || trimmed.starts_with("security {")
63 || trimmed.starts_with("security{")
64 || trimmed.starts_with("last_upgrade_date {")
65 || trimmed.starts_with("last_upgrade_date{")
66 {
67 return true;
68 }
69
70 if let Some(value) = trimmed.strip_prefix("license_type:") {
71 let value = value.trim();
72 if !value.is_empty()
73 && value
74 .chars()
75 .all(|character| character.is_ascii_uppercase() || character == '_')
76 {
77 return true;
78 }
79 }
80
81 if trimmed.starts_with("name:")
82 || trimmed.starts_with("description:")
83 || trimmed.starts_with("homepage:")
84 {
85 saw_named_field = true;
86 }
87 }
88
89 saw_named_field && content.contains("third_party")
90}
91
92impl PackageParser for AndroidSoongMetadataParser {
93 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
94
95 fn is_match(path: &Path) -> bool {
96 if path.file_name().and_then(|name| name.to_str()) != Some("METADATA") {
97 return false;
98 }
99
100 if !path.is_file() {
101 return false;
102 }
103
104 crate::parsers::utils::read_file_to_string(path, Some(MAX_MANIFEST_SIZE))
105 .map(|content| looks_like_android_soong_metadata_content(&content))
106 .unwrap_or(false)
107 }
108
109 fn extract_packages(path: &Path) -> Vec<PackageData> {
110 let content = match crate::parsers::utils::read_file_to_string(path, None) {
111 Ok(content) => content,
112 Err(error) => {
113 warn!(
114 "Failed to read Android Soong METADATA {:?}: {}",
115 path, error
116 );
117 return vec![default_package_data(DatasourceId::AndroidSoongMetadata)];
118 }
119 };
120
121 vec![parse_soong_metadata(&content)]
122 }
123}
124
125impl PackageParser for AndroidManifestParser {
126 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
127
128 fn is_match(path: &Path) -> bool {
129 path.file_name().and_then(|name| name.to_str()) == Some("AndroidManifest.xml")
130 }
131
132 fn extract_packages(path: &Path) -> Vec<PackageData> {
133 let bytes = match read_file_bytes(path, None) {
134 Ok(bytes) => bytes,
135 Err(error) => {
136 warn!("Failed to read AndroidManifest.xml {:?}: {}", path, error);
137 return vec![default_package_data(DatasourceId::AndroidManifestXml)];
138 }
139 };
140
141 parse_manifest_bytes(
142 &bytes,
143 DatasourceId::AndroidManifestXml,
144 "AndroidManifest.xml",
145 )
146 .into_iter()
147 .collect()
148 }
149}
150
151impl PackageParser for AndroidApkParser {
152 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
153
154 fn is_match(path: &Path) -> bool {
155 path.extension().and_then(|ext| ext.to_str()) == Some("apk") && magic::is_zip(path)
156 }
157
158 fn extract_packages(path: &Path) -> Vec<PackageData> {
159 let package_data = match read_best_zip_entry(path, |entry_name| {
160 if entry_name == "AndroidManifest.xml" {
161 Some(0)
162 } else {
163 None
164 }
165 }) {
166 Ok(Some((_, bytes))) => parse_binary_manifest_bytes(&bytes, DatasourceId::AndroidApk)
167 .unwrap_or_else(|error| {
168 warn!("Failed to parse APK manifest {:?}: {}", path, error);
169 default_package_data(DatasourceId::AndroidApk)
170 }),
171 Ok(None) => {
172 warn!("No AndroidManifest.xml found in APK {:?}", path);
173 default_package_data(DatasourceId::AndroidApk)
174 }
175 Err(error) => {
176 warn!("Failed to read APK archive {:?}: {}", path, error);
177 default_package_data(DatasourceId::AndroidApk)
178 }
179 };
180
181 vec![package_data]
182 }
183}
184
185impl PackageParser for AndroidAabParser {
186 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
187
188 fn is_match(path: &Path) -> bool {
189 path.extension().and_then(|ext| ext.to_str()) == Some("aab") && magic::is_zip(path)
190 }
191
192 fn extract_packages(path: &Path) -> Vec<PackageData> {
193 let package_data = match read_best_zip_entry(path, |entry_name| {
194 if entry_name == "base/manifest/AndroidManifest.xml" {
195 Some(0)
196 } else if entry_name.ends_with("/manifest/AndroidManifest.xml") {
197 Some(1)
198 } else {
199 None
200 }
201 }) {
202 Ok(Some((entry_name, bytes))) => {
203 parse_proto_manifest_bytes(&bytes).unwrap_or_else(|error| {
204 warn!(
205 "Failed to parse AAB manifest {:?} ({}): {}",
206 path, entry_name, error
207 );
208 default_package_data(DatasourceId::AndroidAab)
209 })
210 }
211 Ok(None) => {
212 warn!("No proto AndroidManifest.xml found in AAB {:?}", path);
213 default_package_data(DatasourceId::AndroidAab)
214 }
215 Err(error) => {
216 warn!("Failed to read AAB archive {:?}: {}", path, error);
217 default_package_data(DatasourceId::AndroidAab)
218 }
219 };
220
221 vec![package_data]
222 }
223}
224
225fn read_file_bytes(path: &Path, max_size: Option<u64>) -> Result<Vec<u8>, String> {
226 let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
227 let metadata =
228 fs::metadata(path).map_err(|error| format!("Cannot stat file {:?}: {}", path, error))?;
229
230 if metadata.len() > limit {
231 return Err(format!(
232 "File {:?} is {} bytes, exceeding the {} byte limit",
233 path,
234 metadata.len(),
235 limit
236 ));
237 }
238
239 let mut file =
240 File::open(path).map_err(|error| format!("Failed to open {:?}: {}", path, error))?;
241 let mut bytes = Vec::with_capacity(metadata.len() as usize);
242 file.read_to_end(&mut bytes)
243 .map_err(|error| format!("Failed to read {:?}: {}", path, error))?;
244 Ok(bytes)
245}
246
247fn parse_soong_metadata(content: &str) -> PackageData {
248 let parsed = parse_textproto_map(content).unwrap_or_else(|error| {
249 warn!("Failed to parse Android Soong METADATA: {}", error);
250 ProtoMap::default()
251 });
252
253 let mut package = default_package_data(DatasourceId::AndroidSoongMetadata);
254 package.name = parsed.get_first_string("name").map(truncate_field);
255 package.description = parsed.get_first_string("description").map(truncate_field);
256
257 if let Some(third_party) = parsed.get_first_map("third_party") {
258 package.version = third_party.get_first_string("version").map(truncate_field);
259
260 let url_entries = third_party
261 .get_all_maps("url")
262 .into_iter()
263 .map(|entry| {
264 let type_ = entry.get_first_string("type").map(truncate_field);
265 let value = entry.get_first_string("value").map(truncate_field);
266 (type_, value)
267 })
268 .collect::<Vec<_>>();
269
270 let homepage_url = third_party.get_first_string("homepage").or_else(|| {
271 url_entries
272 .iter()
273 .find(|(type_, _)| {
274 type_
275 .as_deref()
276 .is_some_and(|type_| type_.eq_ignore_ascii_case("homepage"))
277 })
278 .and_then(|(_, value)| value.clone())
279 });
280 package.homepage_url = homepage_url.map(truncate_field);
281
282 let license_types = third_party
283 .get_all_strings("license_type")
284 .into_iter()
285 .map(truncate_field)
286 .collect::<Vec<_>>();
287 if !license_types.is_empty() {
288 package.extracted_license_statement = Some(license_types.join(", "));
289 }
290
291 let identifiers = third_party
292 .get_all_maps("identifier")
293 .into_iter()
294 .map(|identifier| {
295 let type_ = identifier.get_first_string("type").map(truncate_field);
296 let value = identifier.get_first_string("value").map(truncate_field);
297 let mut object = serde_json::Map::new();
298 if let Some(type_) = type_ {
299 object.insert("type".to_string(), type_.into());
300 }
301 if let Some(value) = &value {
302 object.insert("value".to_string(), value.clone().into());
303 }
304
305 if package.vcs_url.is_none()
306 && let (Some(type_), Some(value)) = (
307 identifier.get_first_string("type"),
308 identifier.get_first_string("value"),
309 )
310 {
311 let lower_type = type_.to_ascii_lowercase();
312 if lower_type.contains("git") {
313 package.vcs_url = Some(truncate_field(value));
314 } else if lower_type.contains("archive")
315 || lower_type.contains("tar")
316 || lower_type.contains("zip")
317 {
318 package.download_url = Some(truncate_field(value));
319 }
320 }
321
322 serde_json::Value::Object(object)
323 })
324 .collect::<Vec<_>>();
325
326 for (type_, value) in &url_entries {
327 let Some(value) = value else {
328 continue;
329 };
330
331 match type_.as_deref().map(str::to_ascii_lowercase).as_deref() {
332 Some("git") if package.vcs_url.is_none() => {
333 package.vcs_url = Some(value.clone());
334 }
335 Some("archive") if package.download_url.is_none() => {
336 package.download_url = Some(value.clone());
337 }
338 Some("homepage") if package.homepage_url.is_none() => {
339 package.homepage_url = Some(value.clone());
340 }
341 _ => {}
342 }
343 }
344
345 let mut extra_data = HashMap::new();
346 if !identifiers.is_empty() {
347 extra_data.insert("identifiers".to_string(), identifiers.into());
348 }
349 if !url_entries.is_empty() {
350 extra_data.insert(
351 "urls".to_string(),
352 url_entries
353 .iter()
354 .map(|(type_, value)| {
355 let mut object = serde_json::Map::new();
356 if let Some(type_) = type_ {
357 object.insert("type".to_string(), type_.clone().into());
358 }
359 if let Some(value) = value {
360 object.insert("value".to_string(), value.clone().into());
361 }
362 serde_json::Value::Object(object)
363 })
364 .collect::<Vec<_>>()
365 .into(),
366 );
367 }
368
369 if let Some(last_upgrade_date) = third_party.get_first_map("last_upgrade_date") {
370 let year = last_upgrade_date.get_first_string("year");
371 let month = last_upgrade_date.get_first_string("month");
372 let day = last_upgrade_date.get_first_string("day");
373 if let (Some(year), Some(month), Some(day)) = (year, month, day) {
374 let formatted = format!(
375 "{:04}-{:02}-{:02}",
376 year.parse::<u32>().unwrap_or_default(),
377 month.parse::<u32>().unwrap_or_default(),
378 day.parse::<u32>().unwrap_or_default()
379 );
380 extra_data.insert(
381 "last_upgrade_date".to_string(),
382 truncate_field(formatted).into(),
383 );
384 }
385 }
386
387 if let Some(upstream_url) = third_party.get_first_string("url") {
388 extra_data.insert(
389 "upstream_url".to_string(),
390 truncate_field(upstream_url).into(),
391 );
392 }
393
394 if !extra_data.is_empty() {
395 package.extra_data = Some(extra_data);
396 }
397 }
398
399 package
400}
401
402fn parse_manifest_bytes(
403 bytes: &[u8],
404 datasource_id: DatasourceId,
405 context: &str,
406) -> Option<PackageData> {
407 if looks_like_text_xml(bytes) {
408 match parse_text_manifest_bytes(bytes, datasource_id) {
409 Ok(package) => return Some(package),
410 Err(error) => {
411 warn!("Failed to parse {} as text XML: {}", context, error);
412 return None;
413 }
414 }
415 }
416
417 parse_binary_manifest_bytes(bytes, datasource_id)
418 .map(Some)
419 .unwrap_or_else(|error| {
420 warn!(
421 "Failed to parse {} as binary Android XML: {}",
422 context, error
423 );
424 None
425 })
426}
427
428fn looks_like_text_xml(bytes: &[u8]) -> bool {
429 bytes
430 .iter()
431 .find(|byte| !byte.is_ascii_whitespace())
432 .is_some_and(|byte| *byte == b'<')
433}
434
435fn parse_text_manifest_bytes(
436 bytes: &[u8],
437 datasource_id: DatasourceId,
438) -> Result<PackageData, String> {
439 let content = String::from_utf8(bytes.to_vec())
440 .map_err(|error| format!("Invalid UTF-8 in AndroidManifest.xml: {}", error))?;
441
442 let mut reader = Reader::from_str(&content);
443 reader.config_mut().trim_text(true);
444
445 let mut buf = Vec::new();
446 let mut manifest_attributes = HashMap::new();
447 let mut uses_sdk_attributes = HashMap::new();
448 let mut application_attributes = HashMap::new();
449 let mut requested_permissions = Vec::new();
450 let mut uses_libraries = Vec::new();
451 let mut iteration_count = 0usize;
452
453 loop {
454 iteration_count += 1;
455 if iteration_count > MAX_ITERATION_COUNT {
456 return Err(format!(
457 "Exceeded MAX_ITERATION_COUNT ({}) while parsing AndroidManifest.xml",
458 MAX_ITERATION_COUNT
459 ));
460 }
461
462 match reader.read_event_into(&mut buf) {
463 Ok(Event::Start(event)) | Ok(Event::Empty(event)) => {
464 let name = String::from_utf8_lossy(event.name().as_ref()).into_owned();
465 let attributes = xml_attributes_to_map(&reader, &event)?;
466 match name.as_str() {
467 "manifest" if manifest_attributes.is_empty() => {
468 manifest_attributes = attributes
469 }
470 "uses-sdk" => uses_sdk_attributes = attributes,
471 "application" if application_attributes.is_empty() => {
472 application_attributes = attributes;
473 }
474 "uses-permission" | "uses-permission-sdk-23" => {
475 if let Some(permission) = attributes.get("android:name") {
476 requested_permissions.push(permission.clone());
477 }
478 }
479 "uses-library" => {
480 if let Some(library_name) = attributes.get("android:name") {
481 uses_libraries.push(library_name.clone());
482 }
483 }
484 _ => {}
485 }
486 }
487 Ok(Event::Eof) => break,
488 Err(error) => {
489 return Err(format!(
490 "XML parse error at position {}: {}",
491 reader.buffer_position(),
492 error
493 ));
494 }
495 _ => {}
496 }
497
498 buf.clear();
499 }
500
501 Ok(build_manifest_package_data(
502 datasource_id,
503 &manifest_attributes,
504 &uses_sdk_attributes,
505 &application_attributes,
506 requested_permissions,
507 uses_libraries,
508 ))
509}
510
511fn xml_attributes_to_map(
512 reader: &Reader<&[u8]>,
513 event: &quick_xml::events::BytesStart<'_>,
514) -> Result<HashMap<String, String>, String> {
515 let mut attributes = HashMap::new();
516
517 for attribute in event.attributes().flatten().take(MAX_ITERATION_COUNT) {
518 let key = String::from_utf8_lossy(attribute.key.as_ref()).into_owned();
519 let value = attribute
520 .decode_and_unescape_value(reader.decoder())
521 .map_err(|error| format!("Failed to decode XML attribute {}: {}", key, error))?
522 .into_owned();
523 attributes.insert(key, truncate_field(value));
524 }
525
526 Ok(attributes)
527}
528
529fn parse_binary_manifest_bytes(
530 bytes: &[u8],
531 datasource_id: DatasourceId,
532) -> Result<PackageData, String> {
533 let axml = std::panic::catch_unwind(|| parse_from_reader(Cursor::new(bytes.to_vec())))
534 .map_err(|_| "rusty-axml panicked while parsing binary Android XML".to_string())?
535 .map_err(|error| format!("rusty-axml parse failure: {}", error))?;
536
537 let manifest_attributes =
538 normalize_binary_attributes(axml.root().borrow().attributes().clone());
539 let uses_sdk_attributes = find_nodes_by_type(&axml, "uses-sdk")
540 .into_iter()
541 .next()
542 .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
543 .unwrap_or_default();
544 let application_attributes = find_nodes_by_type(&axml, "application")
545 .into_iter()
546 .next()
547 .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
548 .unwrap_or_default();
549
550 let requested_permissions = get_requested_permissions(&axml)
551 .into_iter()
552 .map(truncate_field)
553 .collect::<Vec<_>>();
554 let uses_libraries = find_nodes_by_type(&axml, "uses-library")
555 .into_iter()
556 .filter_map(|node| node.borrow().get_attr("android:name").map(str::to_string))
557 .map(truncate_field)
558 .collect::<Vec<_>>();
559
560 Ok(build_manifest_package_data(
561 datasource_id,
562 &manifest_attributes,
563 &uses_sdk_attributes,
564 &application_attributes,
565 requested_permissions,
566 uses_libraries,
567 ))
568}
569
570fn build_manifest_package_data(
571 datasource_id: DatasourceId,
572 manifest_attributes: &HashMap<String, String>,
573 uses_sdk_attributes: &HashMap<String, String>,
574 application_attributes: &HashMap<String, String>,
575 requested_permissions: Vec<String>,
576 uses_libraries: Vec<String>,
577) -> PackageData {
578 let mut package = default_package_data(datasource_id);
579 package.name = manifest_attributes.get("package").cloned();
580 package.version = manifest_attributes
581 .get("android:versionName")
582 .cloned()
583 .or_else(|| manifest_attributes.get("android:versionCode").cloned());
584
585 package.description = application_attributes
586 .get("android:label")
587 .filter(|label| {
588 !label.starts_with('@') && !label.chars().all(|character| character.is_ascii_digit())
589 })
590 .cloned();
591
592 let mut extra_data = HashMap::new();
593 insert_extra(
594 &mut extra_data,
595 "version_code",
596 manifest_attributes.get("android:versionCode"),
597 );
598 insert_extra(
599 &mut extra_data,
600 "compile_sdk_version",
601 manifest_attributes.get("android:compileSdkVersion"),
602 );
603 insert_extra(
604 &mut extra_data,
605 "compile_sdk_version_codename",
606 manifest_attributes.get("android:compileSdkVersionCodename"),
607 );
608 insert_extra(
609 &mut extra_data,
610 "platform_build_version_code",
611 manifest_attributes.get("platformBuildVersionCode"),
612 );
613 insert_extra(
614 &mut extra_data,
615 "platform_build_version_name",
616 manifest_attributes.get("platformBuildVersionName"),
617 );
618 insert_extra(
619 &mut extra_data,
620 "min_sdk_version",
621 uses_sdk_attributes.get("android:minSdkVersion"),
622 );
623 insert_extra(
624 &mut extra_data,
625 "target_sdk_version",
626 uses_sdk_attributes.get("android:targetSdkVersion"),
627 );
628 insert_extra(
629 &mut extra_data,
630 "max_sdk_version",
631 uses_sdk_attributes.get("android:maxSdkVersion"),
632 );
633
634 if !requested_permissions.is_empty() {
635 extra_data.insert(
636 "requested_permissions".to_string(),
637 requested_permissions
638 .into_iter()
639 .map(serde_json::Value::from)
640 .collect::<Vec<_>>()
641 .into(),
642 );
643 }
644 if !uses_libraries.is_empty() {
645 extra_data.insert(
646 "uses_libraries".to_string(),
647 uses_libraries
648 .into_iter()
649 .map(serde_json::Value::from)
650 .collect::<Vec<_>>()
651 .into(),
652 );
653 }
654
655 if !extra_data.is_empty() {
656 package.extra_data = Some(extra_data);
657 }
658
659 package
660}
661
662fn normalize_binary_attributes(attributes: HashMap<String, String>) -> HashMap<String, String> {
663 attributes
664 .into_iter()
665 .map(|(key, value)| (key, normalize_binary_attribute_value(&value)))
666 .collect()
667}
668
669fn normalize_binary_attribute_value(value: &str) -> String {
670 let hex_value = value
671 .strip_prefix("(type 0x10) 0x")
672 .or_else(|| value.strip_prefix("0x"));
673
674 if let Some(hex_value) = hex_value
675 && let Ok(parsed) = u64::from_str_radix(hex_value, 16)
676 {
677 return parsed.to_string();
678 }
679
680 value.to_string()
681}
682
683fn insert_extra(
684 extra_data: &mut HashMap<String, serde_json::Value>,
685 key: &str,
686 value: Option<&String>,
687) {
688 if let Some(value) = value {
689 extra_data.insert(key.to_string(), truncate_field(value.clone()).into());
690 }
691}
692
693fn read_best_zip_entry<F>(
694 path: &Path,
695 mut rank_entry: F,
696) -> Result<Option<(String, Vec<u8>)>, String>
697where
698 F: FnMut(&str) -> Option<u8>,
699{
700 let metadata = fs::metadata(path)
701 .map_err(|error| format!("Failed to stat archive {:?}: {}", path, error))?;
702 if metadata.len() > MAX_ARCHIVE_SIZE {
703 return Err(format!(
704 "Archive {:?} is {} bytes, exceeding the {} byte limit",
705 path,
706 metadata.len(),
707 MAX_ARCHIVE_SIZE
708 ));
709 }
710
711 let file = File::open(path)
712 .map_err(|error| format!("Failed to open archive {:?}: {}", path, error))?;
713 let mut archive = ZipArchive::new(file)
714 .map_err(|error| format!("Failed to parse ZIP archive {:?}: {}", path, error))?;
715
716 let mut total_uncompressed = 0u64;
717 let mut best: Option<(u8, String, Vec<u8>)> = None;
718 let entry_count = archive.len().min(MAX_ITERATION_COUNT);
719
720 if archive.len() > MAX_ITERATION_COUNT {
721 warn!(
722 "Archive {:?} has more than MAX_ITERATION_COUNT ({}) entries; truncating scan",
723 path, MAX_ITERATION_COUNT
724 );
725 }
726
727 for index in 0..entry_count {
728 let mut entry = archive.by_index(index).map_err(|error| {
729 format!(
730 "Failed to read ZIP entry {} in {:?}: {}",
731 index, path, error
732 )
733 })?;
734
735 total_uncompressed = total_uncompressed.saturating_add(entry.size());
736 if total_uncompressed > MAX_TOTAL_UNCOMPRESSED_SIZE {
737 return Err(format!(
738 "Archive {:?} exceeds total uncompressed size limit of {} bytes",
739 path, MAX_TOTAL_UNCOMPRESSED_SIZE
740 ));
741 }
742
743 let entry_name = entry.name().replace('\\', "/");
744 if entry_name.starts_with('/') || entry_name.split('/').any(|segment| segment == "..") {
745 return Err(format!(
746 "Archive entry {} contains a disallowed path",
747 entry_name
748 ));
749 }
750 let Some(rank) = rank_entry(&entry_name) else {
751 continue;
752 };
753
754 if entry.size() > MAX_FILE_SIZE {
755 return Err(format!(
756 "Archive entry {} is {} bytes, exceeding the {} byte limit",
757 entry_name,
758 entry.size(),
759 MAX_FILE_SIZE
760 ));
761 }
762
763 let compressed_size = entry.compressed_size();
764 if compressed_size > 0 {
765 let ratio = entry.size() as f64 / compressed_size as f64;
766 if ratio > MAX_COMPRESSION_RATIO {
767 return Err(format!(
768 "Archive entry {} has suspicious compression ratio {:.2}:1",
769 entry_name, ratio
770 ));
771 }
772 }
773
774 let should_replace = match &best {
775 Some((best_rank, _, _)) => rank < *best_rank,
776 None => true,
777 };
778
779 if should_replace {
780 let mut bytes = Vec::with_capacity(entry.size() as usize);
781 entry.read_to_end(&mut bytes).map_err(|error| {
782 format!("Failed to read archive entry {}: {}", entry_name, error)
783 })?;
784 best = Some((rank, entry_name, bytes));
785 }
786 }
787
788 Ok(best.map(|(_, entry_name, bytes)| (entry_name, bytes)))
789}
790
791fn parse_proto_manifest_bytes(bytes: &[u8]) -> Result<PackageData, String> {
792 let node =
793 ProtoXmlNode::decode(bytes).map_err(|error| format!("prost decode failure: {}", error))?;
794 let root_element = node
795 .element()
796 .ok_or_else(|| "Proto manifest root is not an element".to_string())?;
797 if root_element.name != "manifest" {
798 return Err(format!(
799 "Unexpected proto XML root element: {}",
800 root_element.name
801 ));
802 }
803
804 let manifest_attributes = proto_attributes_to_map(&root_element.attribute);
805 let uses_sdk_attributes = root_element
806 .child_elements_named("uses-sdk")
807 .next()
808 .map(|element| proto_attributes_to_map(&element.attribute))
809 .unwrap_or_default();
810 let application_attributes = root_element
811 .child_elements_named("application")
812 .next()
813 .map(|element| proto_attributes_to_map(&element.attribute))
814 .unwrap_or_default();
815 let requested_permissions = root_element
816 .child_elements_named_any(&["uses-permission", "uses-permission-sdk-23"])
817 .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
818 .collect::<Vec<_>>();
819 let uses_libraries = root_element
820 .child_elements_named("uses-library")
821 .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
822 .collect::<Vec<_>>();
823
824 let mut package = build_manifest_package_data(
825 DatasourceId::AndroidAab,
826 &manifest_attributes,
827 &uses_sdk_attributes,
828 &application_attributes,
829 requested_permissions,
830 uses_libraries,
831 );
832
833 if let Some(extra_data) = package.extra_data.as_mut() {
834 extra_data.insert("manifest_encoding".to_string(), "proto".into());
835 } else {
836 package.extra_data = Some(HashMap::from([(
837 "manifest_encoding".to_string(),
838 serde_json::Value::String("proto".to_string()),
839 )]));
840 }
841
842 Ok(package)
843}
844
845fn proto_attributes_to_map(attributes: &[ProtoXmlAttribute]) -> HashMap<String, String> {
846 attributes
847 .iter()
848 .filter_map(|attribute| {
849 let key = proto_attribute_key(attribute)?;
850 let value = proto_attribute_value(attribute)?;
851 Some((key, truncate_field(value)))
852 })
853 .collect()
854}
855
856fn proto_attribute_key(attribute: &ProtoXmlAttribute) -> Option<String> {
857 if attribute.name.is_empty() {
858 return None;
859 }
860
861 if attribute.namespace_uri == ANDROID_XML_NAMESPACE {
862 return Some(format!("android:{}", attribute.name));
863 }
864
865 Some(attribute.name.clone())
866}
867
868fn proto_attribute_value(attribute: &ProtoXmlAttribute) -> Option<String> {
869 if !attribute.value.is_empty() {
870 return Some(attribute.value.clone());
871 }
872
873 attribute
874 .compiled_item
875 .as_ref()
876 .and_then(proto_item_to_string)
877}
878
879fn proto_item_to_string(item: &ProtoItem) -> Option<String> {
880 match &item.value {
881 Some(proto_item::Value::Str(value)) => Some(value.value.clone()),
882 Some(proto_item::Value::RawStr(value)) => Some(value.value.clone()),
883 Some(proto_item::Value::Prim(value)) => proto_primitive_to_string(value),
884 _ => None,
885 }
886}
887
888fn proto_primitive_to_string(primitive: &ProtoPrimitive) -> Option<String> {
889 match &primitive.value {
890 Some(proto_primitive::Value::IntDecimal(value)) => Some(value.to_string()),
891 Some(proto_primitive::Value::IntHexadecimal(value)) => Some(format!("0x{value:x}")),
892 Some(proto_primitive::Value::Boolean(value)) => Some(value.to_string()),
893 Some(proto_primitive::Value::Float(value)) => Some(value.to_string()),
894 Some(proto_primitive::Value::Dimension(value)) => Some(value.to_string()),
895 Some(proto_primitive::Value::Fraction(value)) => Some(value.to_string()),
896 _ => None,
897 }
898}
899
900#[derive(Debug, Clone, Default)]
901struct ProtoMap {
902 fields: HashMap<String, Vec<ProtoValue>>,
903}
904
905#[derive(Debug, Clone)]
906enum ProtoValue {
907 Scalar(String),
908 Map(ProtoMap),
909}
910
911impl ProtoMap {
912 fn get_first_string(&self, key: &str) -> Option<String> {
913 self.fields.get(key).and_then(|values| {
914 values.iter().find_map(|value| match value {
915 ProtoValue::Scalar(value) => Some(value.clone()),
916 ProtoValue::Map(_) => None,
917 })
918 })
919 }
920
921 fn get_all_strings(&self, key: &str) -> Vec<String> {
922 self.fields
923 .get(key)
924 .into_iter()
925 .flatten()
926 .filter_map(|value| match value {
927 ProtoValue::Scalar(value) => Some(value.clone()),
928 ProtoValue::Map(_) => None,
929 })
930 .collect()
931 }
932
933 fn get_first_map(&self, key: &str) -> Option<ProtoMap> {
934 self.fields.get(key).and_then(|values| {
935 values.iter().find_map(|value| match value {
936 ProtoValue::Map(value) => Some(value.clone()),
937 ProtoValue::Scalar(_) => None,
938 })
939 })
940 }
941
942 fn get_all_maps(&self, key: &str) -> Vec<ProtoMap> {
943 self.fields
944 .get(key)
945 .into_iter()
946 .flatten()
947 .filter_map(|value| match value {
948 ProtoValue::Map(value) => Some(value.clone()),
949 ProtoValue::Scalar(_) => None,
950 })
951 .collect()
952 }
953}
954
955fn parse_textproto_map(content: &str) -> Result<ProtoMap, String> {
956 let mut parser = TextProtoParser::new(content)?;
957 parser.parse_map(false)
958}
959
960struct TextProtoParser {
961 tokens: Vec<TextProtoToken>,
962 position: usize,
963}
964
965#[derive(Debug, Clone)]
966enum TextProtoToken {
967 Identifier(String),
968 String(String),
969 Colon,
970 LBrace,
971 RBrace,
972}
973
974impl TextProtoParser {
975 fn new(content: &str) -> Result<Self, String> {
976 Ok(Self {
977 tokens: tokenize_textproto(content)?,
978 position: 0,
979 })
980 }
981
982 fn parse_map(&mut self, stop_on_rbrace: bool) -> Result<ProtoMap, String> {
983 let mut map = ProtoMap::default();
984
985 while let Some(token) = self.peek() {
986 match token {
987 TextProtoToken::RBrace if stop_on_rbrace => {
988 self.position += 1;
989 break;
990 }
991 TextProtoToken::RBrace => return Err("Unexpected closing brace".to_string()),
992 TextProtoToken::Identifier(_) => {
993 let key = self.expect_identifier()?;
994 match self.peek() {
995 Some(TextProtoToken::Colon) => {
996 self.position += 1;
997 match self.peek() {
998 Some(TextProtoToken::LBrace) => {
999 self.position += 1;
1000 let value = self.parse_map(true)?;
1001 map.fields
1002 .entry(key)
1003 .or_default()
1004 .push(ProtoValue::Map(value));
1005 }
1006 _ => {
1007 let value = self.expect_scalar()?;
1008 map.fields
1009 .entry(key)
1010 .or_default()
1011 .push(ProtoValue::Scalar(truncate_field(value)));
1012 }
1013 }
1014 }
1015 Some(TextProtoToken::LBrace) => {
1016 self.position += 1;
1017 let value = self.parse_map(true)?;
1018 map.fields
1019 .entry(key)
1020 .or_default()
1021 .push(ProtoValue::Map(value));
1022 }
1023 Some(other) => {
1024 return Err(format!("Unexpected token after key: {:?}", other));
1025 }
1026 None => return Err("Unexpected end of input after key".to_string()),
1027 }
1028 }
1029 other => return Err(format!("Unexpected token in textproto: {:?}", other)),
1030 }
1031 }
1032
1033 Ok(map)
1034 }
1035
1036 fn expect_identifier(&mut self) -> Result<String, String> {
1037 match self.next() {
1038 Some(TextProtoToken::Identifier(value)) => Ok(value),
1039 other => Err(format!("Expected identifier, found {:?}", other)),
1040 }
1041 }
1042
1043 fn expect_scalar(&mut self) -> Result<String, String> {
1044 match self.next() {
1045 Some(TextProtoToken::String(mut value)) => {
1046 while matches!(self.peek(), Some(TextProtoToken::String(_))) {
1047 if let Some(TextProtoToken::String(next)) = self.next() {
1048 value.push_str(&next);
1049 }
1050 }
1051 Ok(value)
1052 }
1053 Some(TextProtoToken::Identifier(value)) => Ok(value),
1054 other => Err(format!("Expected scalar value, found {:?}", other)),
1055 }
1056 }
1057
1058 fn peek(&self) -> Option<&TextProtoToken> {
1059 self.tokens.get(self.position)
1060 }
1061
1062 fn next(&mut self) -> Option<TextProtoToken> {
1063 let token = self.tokens.get(self.position).cloned();
1064 if token.is_some() {
1065 self.position += 1;
1066 }
1067 token
1068 }
1069}
1070
1071fn tokenize_textproto(content: &str) -> Result<Vec<TextProtoToken>, String> {
1072 let mut tokens = Vec::new();
1073 let chars = content.chars().collect::<Vec<_>>();
1074 let mut index = 0usize;
1075
1076 while index < chars.len() {
1077 match chars[index] {
1078 '{' => {
1079 tokens.push(TextProtoToken::LBrace);
1080 index += 1;
1081 }
1082 '}' => {
1083 tokens.push(TextProtoToken::RBrace);
1084 index += 1;
1085 }
1086 ':' => {
1087 tokens.push(TextProtoToken::Colon);
1088 index += 1;
1089 }
1090 '"' => {
1091 index += 1;
1092 let mut value = String::new();
1093 while index < chars.len() {
1094 match chars[index] {
1095 '\\' if index + 1 < chars.len() => {
1096 index += 1;
1097 value.push(chars[index]);
1098 index += 1;
1099 }
1100 '"' => {
1101 index += 1;
1102 break;
1103 }
1104 character => {
1105 value.push(character);
1106 index += 1;
1107 }
1108 }
1109 }
1110 tokens.push(TextProtoToken::String(value));
1111 }
1112 '#' => {
1113 while index < chars.len() && chars[index] != '\n' {
1114 index += 1;
1115 }
1116 }
1117 '/' if index + 1 < chars.len() && chars[index + 1] == '/' => {
1118 index += 2;
1119 while index < chars.len() && chars[index] != '\n' {
1120 index += 1;
1121 }
1122 }
1123 character if character.is_ascii_whitespace() => index += 1,
1124 _ => {
1125 let start = index;
1126 while index < chars.len() {
1127 let character = chars[index];
1128 let starts_comment =
1129 character == '/' && index + 1 < chars.len() && chars[index + 1] == '/';
1130
1131 if character.is_ascii_whitespace()
1132 || matches!(character, '{' | '}' | ':' | '#')
1133 || starts_comment
1134 {
1135 break;
1136 }
1137
1138 index += 1;
1139 }
1140
1141 let token = chars[start..index].iter().collect::<String>();
1142 if token.is_empty() {
1143 return Err("Encountered empty textproto token".to_string());
1144 }
1145 tokens.push(TextProtoToken::Identifier(token));
1146 }
1147 }
1148 }
1149
1150 Ok(tokens)
1151}
1152
1153#[derive(Clone, PartialEq, Message)]
1154pub struct ProtoSourcePosition {
1155 #[prost(uint32, tag = "1")]
1156 pub line_number: u32,
1157 #[prost(uint32, tag = "2")]
1158 pub column_number: u32,
1159}
1160
1161#[derive(Clone, PartialEq, Message)]
1162pub struct ProtoXmlNode {
1163 #[prost(oneof = "proto_xml_node::Node", tags = "1, 2")]
1164 pub node: Option<proto_xml_node::Node>,
1165 #[prost(message, optional, tag = "3")]
1166 pub source: Option<ProtoSourcePosition>,
1167}
1168
1169impl ProtoXmlNode {
1170 fn element(&self) -> Option<&ProtoXmlElement> {
1171 match &self.node {
1172 Some(proto_xml_node::Node::Element(element)) => Some(element),
1173 _ => None,
1174 }
1175 }
1176}
1177
1178pub mod proto_xml_node {
1179 use super::ProtoXmlElement;
1180 use prost::Oneof;
1181
1182 #[derive(Clone, PartialEq, Oneof)]
1183 pub enum Node {
1184 #[prost(message, tag = "1")]
1185 Element(ProtoXmlElement),
1186 #[prost(string, tag = "2")]
1187 Text(String),
1188 }
1189}
1190
1191#[derive(Clone, PartialEq, Message)]
1192pub struct ProtoXmlElement {
1193 #[prost(message, repeated, tag = "1")]
1194 pub namespace_declaration: Vec<ProtoXmlNamespace>,
1195 #[prost(string, tag = "2")]
1196 pub namespace_uri: String,
1197 #[prost(string, tag = "3")]
1198 pub name: String,
1199 #[prost(message, repeated, tag = "4")]
1200 pub attribute: Vec<ProtoXmlAttribute>,
1201 #[prost(message, repeated, tag = "5")]
1202 pub child: Vec<ProtoXmlNode>,
1203}
1204
1205impl ProtoXmlElement {
1206 fn child_elements_named<'a>(
1207 &'a self,
1208 name: &'a str,
1209 ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1210 self.child
1211 .iter()
1212 .filter_map(ProtoXmlNode::element)
1213 .filter(move |element| element.name == name)
1214 }
1215
1216 fn child_elements_named_any<'a>(
1217 &'a self,
1218 names: &'a [&'a str],
1219 ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1220 self.child
1221 .iter()
1222 .filter_map(ProtoXmlNode::element)
1223 .filter(move |element| names.contains(&element.name.as_str()))
1224 }
1225}
1226
1227#[derive(Clone, PartialEq, Message)]
1228pub struct ProtoXmlNamespace {
1229 #[prost(string, tag = "1")]
1230 pub prefix: String,
1231 #[prost(string, tag = "2")]
1232 pub uri: String,
1233 #[prost(message, optional, tag = "3")]
1234 pub source: Option<ProtoSourcePosition>,
1235}
1236
1237#[derive(Clone, PartialEq, Message)]
1238pub struct ProtoXmlAttribute {
1239 #[prost(string, tag = "1")]
1240 pub namespace_uri: String,
1241 #[prost(string, tag = "2")]
1242 pub name: String,
1243 #[prost(string, tag = "3")]
1244 pub value: String,
1245 #[prost(message, optional, tag = "4")]
1246 pub source: Option<ProtoSourcePosition>,
1247 #[prost(uint32, tag = "5")]
1248 pub resource_id: u32,
1249 #[prost(message, optional, tag = "6")]
1250 pub compiled_item: Option<ProtoItem>,
1251}
1252
1253#[derive(Clone, PartialEq, Message)]
1254pub struct ProtoItem {
1255 #[prost(oneof = "proto_item::Value", tags = "2, 3, 7")]
1256 pub value: Option<proto_item::Value>,
1257 #[prost(uint32, tag = "8")]
1258 pub flag_status: u32,
1259 #[prost(bool, tag = "9")]
1260 pub flag_negated: bool,
1261 #[prost(string, tag = "10")]
1262 pub flag_name: String,
1263}
1264
1265pub mod proto_item {
1266 use super::{ProtoPrimitive, ProtoRawStringValue, ProtoStringValue};
1267 use prost::Oneof;
1268
1269 #[derive(Clone, PartialEq, Oneof)]
1270 pub enum Value {
1271 #[prost(message, tag = "2")]
1272 Str(ProtoStringValue),
1273 #[prost(message, tag = "3")]
1274 RawStr(ProtoRawStringValue),
1275 #[prost(message, tag = "7")]
1276 Prim(ProtoPrimitive),
1277 }
1278}
1279
1280#[derive(Clone, PartialEq, Message)]
1281pub struct ProtoStringValue {
1282 #[prost(string, tag = "1")]
1283 pub value: String,
1284}
1285
1286#[derive(Clone, PartialEq, Message)]
1287pub struct ProtoRawStringValue {
1288 #[prost(string, tag = "1")]
1289 pub value: String,
1290}
1291
1292#[derive(Clone, PartialEq, Message)]
1293pub struct ProtoPrimitive {
1294 #[prost(oneof = "proto_primitive::Value", tags = "3, 6, 7, 8, 13, 14")]
1295 pub value: Option<proto_primitive::Value>,
1296}
1297
1298pub mod proto_primitive {
1299 use prost::Oneof;
1300
1301 #[derive(Clone, PartialEq, Oneof)]
1302 pub enum Value {
1303 #[prost(float, tag = "3")]
1304 Float(f32),
1305 #[prost(int32, tag = "6")]
1306 IntDecimal(i32),
1307 #[prost(uint32, tag = "7")]
1308 IntHexadecimal(u32),
1309 #[prost(bool, tag = "8")]
1310 Boolean(bool),
1311 #[prost(uint32, tag = "13")]
1312 Dimension(u32),
1313 #[prost(uint32, tag = "14")]
1314 Fraction(u32),
1315 }
1316}
1317
1318crate::register_parser!(
1319 "Android Soong METADATA textproto",
1320 &["**/METADATA"],
1321 "android",
1322 "",
1323 Some(
1324 "https://android.googlesource.com/platform/build/soong/+/refs/heads/main/licenses/metadata/metadata_file.proto"
1325 ),
1326);
1327
1328crate::register_parser!(
1329 "AndroidManifest.xml metadata (text XML or binary AXML)",
1330 &["**/AndroidManifest.xml"],
1331 "android",
1332 "XML",
1333 Some("https://developer.android.com/guide/topics/manifest/manifest-intro"),
1334);
1335
1336crate::register_parser!(
1337 "Android APK archive manifest metadata",
1338 &["**/*.apk"],
1339 "android",
1340 "",
1341 Some("https://developer.android.com/build/build-for-release"),
1342);
1343
1344crate::register_parser!(
1345 "Android App Bundle (.aab) proto manifest metadata",
1346 &["**/*.aab"],
1347 "android",
1348 "",
1349 Some("https://developer.android.com/guide/app-bundle"),
1350);