1use std::collections::HashMap;
5use std::fs::{self, File};
6use std::io::{Cursor, Read};
7use std::path::Path;
8
9use prost::Message;
10use quick_xml::Reader;
11use quick_xml::XmlVersion;
12use quick_xml::events::Event;
13use rusty_axml::{find_nodes_by_type, get_requested_permissions, parse_from_reader};
14use zip::ZipArchive;
15
16use crate::models::{DatasourceId, PackageData, PackageType};
17use crate::parser_warn as warn;
18use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
19use crate::utils::magic;
20
21use super::PackageParser;
22use super::metadata::ParserMetadata;
23
24const PACKAGE_TYPE: PackageType = PackageType::Android;
25const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024;
26const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
27const MAX_TOTAL_UNCOMPRESSED_SIZE: u64 = 1024 * 1024 * 1024;
28const MAX_COMPRESSION_RATIO: f64 = 100.0;
29const ANDROID_XML_NAMESPACE: &str = "http://schemas.android.com/apk/res/android";
30
31fn default_package_data(datasource_id: DatasourceId) -> PackageData {
32 PackageData {
33 package_type: Some(PACKAGE_TYPE),
34 datasource_id: Some(datasource_id),
35 ..Default::default()
36 }
37}
38
39pub struct AndroidSoongMetadataParser;
40pub struct AndroidManifestParser;
41pub struct AndroidApkParser;
42pub struct AndroidAabParser;
43
44fn looks_like_android_soong_metadata_content(content: &str) -> bool {
45 let mut saw_named_field = false;
46
47 for line in content.lines().take(40) {
48 let trimmed = line.trim();
49
50 if trimmed.is_empty() || trimmed.starts_with('#') {
51 continue;
52 }
53
54 if trimmed.starts_with("//") {
55 return false;
56 }
57
58 if trimmed.starts_with("third_party {")
59 || trimmed.starts_with("third_party{")
60 || trimmed.starts_with("url {")
61 || trimmed.starts_with("url{")
62 || trimmed.starts_with("identifier {")
63 || trimmed.starts_with("identifier{")
64 || trimmed.starts_with("security {")
65 || trimmed.starts_with("security{")
66 || trimmed.starts_with("last_upgrade_date {")
67 || trimmed.starts_with("last_upgrade_date{")
68 {
69 return true;
70 }
71
72 if let Some(value) = trimmed.strip_prefix("license_type:") {
73 let value = value.trim();
74 if !value.is_empty()
75 && value
76 .chars()
77 .all(|character| character.is_ascii_uppercase() || character == '_')
78 {
79 return true;
80 }
81 }
82
83 if trimmed.starts_with("name:")
84 || trimmed.starts_with("description:")
85 || trimmed.starts_with("homepage:")
86 {
87 saw_named_field = true;
88 }
89 }
90
91 saw_named_field && content.contains("third_party")
92}
93
94impl PackageParser for AndroidSoongMetadataParser {
95 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
96
97 fn metadata() -> Vec<ParserMetadata> {
98 vec![ParserMetadata {
99 description: "Android Soong METADATA textproto",
100 file_patterns: &["**/METADATA"],
101 package_type: "android",
102 primary_language: "",
103 documentation_url: Some(
104 "https://android.googlesource.com/platform/build/soong/+/refs/heads/main/compliance/project_metadata_proto/project_metadata.proto",
105 ),
106 }]
107 }
108
109 fn is_match(path: &Path) -> bool {
110 if path.file_name().and_then(|name| name.to_str()) != Some("METADATA") {
111 return false;
112 }
113
114 if !path.is_file() {
115 return false;
116 }
117
118 crate::parsers::utils::read_file_to_string(path, Some(MAX_MANIFEST_SIZE))
119 .map(|content| looks_like_android_soong_metadata_content(&content))
120 .unwrap_or(false)
121 }
122
123 fn extract_packages(path: &Path) -> Vec<PackageData> {
124 let content = match crate::parsers::utils::read_file_to_string(path, None) {
125 Ok(content) => content,
126 Err(error) => {
127 warn!(
128 "Failed to read Android Soong METADATA {:?}: {}",
129 path, error
130 );
131 return vec![default_package_data(DatasourceId::AndroidSoongMetadata)];
132 }
133 };
134
135 vec![parse_soong_metadata(&content)]
136 }
137}
138
139impl PackageParser for AndroidManifestParser {
140 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
141
142 fn metadata() -> Vec<ParserMetadata> {
143 vec![ParserMetadata {
144 description: "AndroidManifest.xml metadata (text XML or binary AXML)",
145 file_patterns: &["**/AndroidManifest.xml"],
146 package_type: "android",
147 primary_language: "XML",
148 documentation_url: Some(
149 "https://developer.android.com/guide/topics/manifest/manifest-intro",
150 ),
151 }]
152 }
153
154 fn is_match(path: &Path) -> bool {
155 path.file_name().and_then(|name| name.to_str()) == Some("AndroidManifest.xml")
156 }
157
158 fn extract_packages(path: &Path) -> Vec<PackageData> {
159 let bytes = match read_file_bytes(path, None) {
160 Ok(bytes) => bytes,
161 Err(error) => {
162 warn!("Failed to read AndroidManifest.xml {:?}: {}", path, error);
163 return vec![default_package_data(DatasourceId::AndroidManifestXml)];
164 }
165 };
166
167 parse_manifest_bytes(
168 &bytes,
169 DatasourceId::AndroidManifestXml,
170 "AndroidManifest.xml",
171 )
172 .into_iter()
173 .collect()
174 }
175}
176
177impl PackageParser for AndroidApkParser {
178 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
179
180 fn metadata() -> Vec<ParserMetadata> {
181 vec![ParserMetadata {
182 description: "Android APK archive manifest metadata",
183 file_patterns: &["**/*.apk"],
184 package_type: "android",
185 primary_language: "",
186 documentation_url: Some("https://developer.android.com/build/build-for-release"),
187 }]
188 }
189
190 fn is_match(path: &Path) -> bool {
191 path.extension().and_then(|ext| ext.to_str()) == Some("apk") && magic::is_zip(path)
192 }
193
194 fn extract_packages(path: &Path) -> Vec<PackageData> {
195 let package_data = match read_best_zip_entry(path, |entry_name| {
196 if entry_name == "AndroidManifest.xml" {
197 Some(0)
198 } else {
199 None
200 }
201 }) {
202 Ok(Some((_, bytes))) => parse_binary_manifest_bytes(&bytes, DatasourceId::AndroidApk)
203 .unwrap_or_else(|error| {
204 warn!("Failed to parse APK manifest {:?}: {}", path, error);
205 default_package_data(DatasourceId::AndroidApk)
206 }),
207 Ok(None) => {
208 warn!("No AndroidManifest.xml found in APK {:?}", path);
209 default_package_data(DatasourceId::AndroidApk)
210 }
211 Err(error) => {
212 warn!("Failed to read APK archive {:?}: {}", path, error);
213 default_package_data(DatasourceId::AndroidApk)
214 }
215 };
216
217 vec![package_data]
218 }
219}
220
221impl PackageParser for AndroidAabParser {
222 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
223
224 fn metadata() -> Vec<ParserMetadata> {
225 vec![ParserMetadata {
226 description: "Android App Bundle (.aab) proto manifest metadata",
227 file_patterns: &["**/*.aab"],
228 package_type: "android",
229 primary_language: "",
230 documentation_url: Some("https://developer.android.com/guide/app-bundle"),
231 }]
232 }
233
234 fn is_match(path: &Path) -> bool {
235 path.extension().and_then(|ext| ext.to_str()) == Some("aab") && magic::is_zip(path)
236 }
237
238 fn extract_packages(path: &Path) -> Vec<PackageData> {
239 let package_data = match read_best_zip_entry(path, |entry_name| {
240 if entry_name == "base/manifest/AndroidManifest.xml" {
241 Some(0)
242 } else if entry_name.ends_with("/manifest/AndroidManifest.xml") {
243 Some(1)
244 } else {
245 None
246 }
247 }) {
248 Ok(Some((entry_name, bytes))) => {
249 parse_proto_manifest_bytes(&bytes).unwrap_or_else(|error| {
250 warn!(
251 "Failed to parse AAB manifest {:?} ({}): {}",
252 path, entry_name, error
253 );
254 default_package_data(DatasourceId::AndroidAab)
255 })
256 }
257 Ok(None) => {
258 warn!("No proto AndroidManifest.xml found in AAB {:?}", path);
259 default_package_data(DatasourceId::AndroidAab)
260 }
261 Err(error) => {
262 warn!("Failed to read AAB archive {:?}: {}", path, error);
263 default_package_data(DatasourceId::AndroidAab)
264 }
265 };
266
267 vec![package_data]
268 }
269}
270
271fn read_file_bytes(path: &Path, max_size: Option<u64>) -> Result<Vec<u8>, String> {
272 let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
273 let metadata =
274 fs::metadata(path).map_err(|error| format!("Cannot stat file {:?}: {}", path, error))?;
275
276 if metadata.len() > limit {
277 return Err(format!(
278 "File {:?} is {} bytes, exceeding the {} byte limit",
279 path,
280 metadata.len(),
281 limit
282 ));
283 }
284
285 let mut file =
286 File::open(path).map_err(|error| format!("Failed to open {:?}: {}", path, error))?;
287 let mut bytes = Vec::with_capacity(metadata.len() as usize);
288 file.read_to_end(&mut bytes)
289 .map_err(|error| format!("Failed to read {:?}: {}", path, error))?;
290 Ok(bytes)
291}
292
293fn parse_soong_metadata(content: &str) -> PackageData {
294 let parsed = parse_textproto_map(content).unwrap_or_else(|error| {
295 warn!("Failed to parse Android Soong METADATA: {}", error);
296 ProtoMap::default()
297 });
298
299 let mut package = default_package_data(DatasourceId::AndroidSoongMetadata);
300 package.name = parsed.get_first_string("name").map(truncate_field);
301 package.description = parsed.get_first_string("description").map(truncate_field);
302
303 if let Some(third_party) = parsed.get_first_map("third_party") {
304 package.version = third_party.get_first_string("version").map(truncate_field);
305
306 let url_entries = third_party
307 .get_all_maps("url")
308 .into_iter()
309 .map(|entry| {
310 let type_ = entry.get_first_string("type").map(truncate_field);
311 let value = entry.get_first_string("value").map(truncate_field);
312 (type_, value)
313 })
314 .collect::<Vec<_>>();
315
316 let homepage_url = third_party.get_first_string("homepage").or_else(|| {
317 url_entries
318 .iter()
319 .find(|(type_, _)| {
320 type_
321 .as_deref()
322 .is_some_and(|type_| type_.eq_ignore_ascii_case("homepage"))
323 })
324 .and_then(|(_, value)| value.clone())
325 });
326 package.homepage_url = homepage_url.map(truncate_field);
327
328 let license_types = third_party
329 .get_all_strings("license_type")
330 .into_iter()
331 .map(truncate_field)
332 .collect::<Vec<_>>();
333 if !license_types.is_empty() {
334 package.extracted_license_statement = Some(license_types.join(", "));
335 }
336
337 let identifiers = third_party
338 .get_all_maps("identifier")
339 .into_iter()
340 .map(|identifier| {
341 let type_ = identifier.get_first_string("type").map(truncate_field);
342 let value = identifier.get_first_string("value").map(truncate_field);
343 let mut object = serde_json::Map::new();
344 if let Some(type_) = type_ {
345 object.insert("type".to_string(), type_.into());
346 }
347 if let Some(value) = &value {
348 object.insert("value".to_string(), value.clone().into());
349 }
350
351 if package.vcs_url.is_none()
352 && let (Some(type_), Some(value)) = (
353 identifier.get_first_string("type"),
354 identifier.get_first_string("value"),
355 )
356 {
357 let lower_type = type_.to_ascii_lowercase();
358 if lower_type.contains("git") {
359 package.vcs_url = Some(truncate_field(value));
360 } else if lower_type.contains("archive")
361 || lower_type.contains("tar")
362 || lower_type.contains("zip")
363 {
364 package.download_url = Some(truncate_field(value));
365 }
366 }
367
368 serde_json::Value::Object(object)
369 })
370 .collect::<Vec<_>>();
371
372 for (type_, value) in &url_entries {
373 let Some(value) = value else {
374 continue;
375 };
376
377 match type_.as_deref().map(str::to_ascii_lowercase).as_deref() {
378 Some("git") if package.vcs_url.is_none() => {
379 package.vcs_url = Some(value.clone());
380 }
381 Some("archive") if package.download_url.is_none() => {
382 package.download_url = Some(value.clone());
383 }
384 Some("homepage") if package.homepage_url.is_none() => {
385 package.homepage_url = Some(value.clone());
386 }
387 _ => {}
388 }
389 }
390
391 let mut extra_data = HashMap::new();
392 if !identifiers.is_empty() {
393 extra_data.insert("identifiers".to_string(), identifiers.into());
394 }
395 if !url_entries.is_empty() {
396 extra_data.insert(
397 "urls".to_string(),
398 url_entries
399 .iter()
400 .map(|(type_, value)| {
401 let mut object = serde_json::Map::new();
402 if let Some(type_) = type_ {
403 object.insert("type".to_string(), type_.clone().into());
404 }
405 if let Some(value) = value {
406 object.insert("value".to_string(), value.clone().into());
407 }
408 serde_json::Value::Object(object)
409 })
410 .collect::<Vec<_>>()
411 .into(),
412 );
413 }
414
415 if let Some(last_upgrade_date) = third_party.get_first_map("last_upgrade_date") {
416 let year = last_upgrade_date.get_first_string("year");
417 let month = last_upgrade_date.get_first_string("month");
418 let day = last_upgrade_date.get_first_string("day");
419 if let (Some(year), Some(month), Some(day)) = (year, month, day) {
420 let formatted = format!(
421 "{:04}-{:02}-{:02}",
422 year.parse::<u32>().unwrap_or_default(),
423 month.parse::<u32>().unwrap_or_default(),
424 day.parse::<u32>().unwrap_or_default()
425 );
426 extra_data.insert(
427 "last_upgrade_date".to_string(),
428 truncate_field(formatted).into(),
429 );
430 }
431 }
432
433 if let Some(upstream_url) = third_party.get_first_string("url") {
434 extra_data.insert(
435 "upstream_url".to_string(),
436 truncate_field(upstream_url).into(),
437 );
438 }
439
440 if !extra_data.is_empty() {
441 package.extra_data = Some(extra_data);
442 }
443 }
444
445 package
446}
447
448fn parse_manifest_bytes(
449 bytes: &[u8],
450 datasource_id: DatasourceId,
451 context: &str,
452) -> Option<PackageData> {
453 if looks_like_text_xml(bytes) {
454 match parse_text_manifest_bytes(bytes, datasource_id) {
455 Ok(package) => return Some(package),
456 Err(error) => {
457 warn!("Failed to parse {} as text XML: {}", context, error);
458 return None;
459 }
460 }
461 }
462
463 parse_binary_manifest_bytes(bytes, datasource_id)
464 .map(Some)
465 .unwrap_or_else(|error| {
466 warn!(
467 "Failed to parse {} as binary Android XML: {}",
468 context, error
469 );
470 None
471 })
472}
473
474fn looks_like_text_xml(bytes: &[u8]) -> bool {
475 bytes
476 .iter()
477 .find(|byte| !byte.is_ascii_whitespace())
478 .is_some_and(|byte| *byte == b'<')
479}
480
481fn parse_text_manifest_bytes(
482 bytes: &[u8],
483 datasource_id: DatasourceId,
484) -> Result<PackageData, String> {
485 let content = String::from_utf8(bytes.to_vec())
486 .map_err(|error| format!("Invalid UTF-8 in AndroidManifest.xml: {}", error))?;
487
488 let mut reader = Reader::from_str(&content);
489 reader.config_mut().trim_text(true);
490
491 let mut buf = Vec::new();
492 let mut manifest_attributes = HashMap::new();
493 let mut uses_sdk_attributes = HashMap::new();
494 let mut application_attributes = HashMap::new();
495 let mut requested_permissions = Vec::new();
496 let mut uses_libraries = Vec::new();
497 let mut iteration_count = 0usize;
498
499 loop {
500 iteration_count += 1;
501 if iteration_count > MAX_ITERATION_COUNT {
502 return Err(format!(
503 "Exceeded MAX_ITERATION_COUNT ({}) while parsing AndroidManifest.xml",
504 MAX_ITERATION_COUNT
505 ));
506 }
507
508 match reader.read_event_into(&mut buf) {
509 Ok(Event::Start(event)) | Ok(Event::Empty(event)) => {
510 let name = String::from_utf8_lossy(event.name().as_ref()).into_owned();
511 let attributes = xml_attributes_to_map(&reader, &event)?;
512 match name.as_str() {
513 "manifest" if manifest_attributes.is_empty() => {
514 manifest_attributes = attributes
515 }
516 "uses-sdk" => uses_sdk_attributes = attributes,
517 "application" if application_attributes.is_empty() => {
518 application_attributes = attributes;
519 }
520 "uses-permission" | "uses-permission-sdk-23" => {
521 if let Some(permission) = attributes.get("android:name") {
522 requested_permissions.push(permission.clone());
523 }
524 }
525 "uses-library" => {
526 if let Some(library_name) = attributes.get("android:name") {
527 uses_libraries.push(library_name.clone());
528 }
529 }
530 _ => {}
531 }
532 }
533 Ok(Event::Eof) => break,
534 Err(error) => {
535 return Err(format!(
536 "XML parse error at position {}: {}",
537 reader.buffer_position(),
538 error
539 ));
540 }
541 _ => {}
542 }
543
544 buf.clear();
545 }
546
547 Ok(build_manifest_package_data(
548 datasource_id,
549 &manifest_attributes,
550 &uses_sdk_attributes,
551 &application_attributes,
552 requested_permissions,
553 uses_libraries,
554 ))
555}
556
557fn xml_attributes_to_map(
558 reader: &Reader<&[u8]>,
559 event: &quick_xml::events::BytesStart<'_>,
560) -> Result<HashMap<String, String>, String> {
561 let mut attributes = HashMap::new();
562
563 for attribute in event.attributes().flatten().take(MAX_ITERATION_COUNT) {
564 let key = String::from_utf8_lossy(attribute.key.as_ref()).into_owned();
565 let value = attribute
566 .decoded_and_normalized_value(XmlVersion::Implicit1_0, reader.decoder())
567 .map_err(|error| format!("Failed to decode XML attribute {}: {}", key, error))?
568 .into_owned();
569 attributes.insert(key, truncate_field(value));
570 }
571
572 Ok(attributes)
573}
574
575fn parse_binary_manifest_bytes(
576 bytes: &[u8],
577 datasource_id: DatasourceId,
578) -> Result<PackageData, String> {
579 let axml = std::panic::catch_unwind(|| parse_from_reader(Cursor::new(bytes.to_vec())))
580 .map_err(|_| "rusty-axml panicked while parsing binary Android XML".to_string())?
581 .map_err(|error| format!("rusty-axml parse failure: {}", error))?;
582
583 let manifest_attributes =
584 normalize_binary_attributes(axml.root().borrow().attributes().clone());
585 let uses_sdk_attributes = find_nodes_by_type(&axml, "uses-sdk")
586 .into_iter()
587 .next()
588 .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
589 .unwrap_or_default();
590 let application_attributes = find_nodes_by_type(&axml, "application")
591 .into_iter()
592 .next()
593 .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
594 .unwrap_or_default();
595
596 let requested_permissions = get_requested_permissions(&axml)
597 .into_iter()
598 .map(truncate_field)
599 .collect::<Vec<_>>();
600 let uses_libraries = find_nodes_by_type(&axml, "uses-library")
601 .into_iter()
602 .filter_map(|node| node.borrow().get_attr("android:name").map(str::to_string))
603 .map(truncate_field)
604 .collect::<Vec<_>>();
605
606 Ok(build_manifest_package_data(
607 datasource_id,
608 &manifest_attributes,
609 &uses_sdk_attributes,
610 &application_attributes,
611 requested_permissions,
612 uses_libraries,
613 ))
614}
615
616fn build_manifest_package_data(
617 datasource_id: DatasourceId,
618 manifest_attributes: &HashMap<String, String>,
619 uses_sdk_attributes: &HashMap<String, String>,
620 application_attributes: &HashMap<String, String>,
621 requested_permissions: Vec<String>,
622 uses_libraries: Vec<String>,
623) -> PackageData {
624 let mut package = default_package_data(datasource_id);
625 package.name = manifest_attributes.get("package").cloned();
626 package.version = manifest_attributes
627 .get("android:versionName")
628 .cloned()
629 .or_else(|| manifest_attributes.get("android:versionCode").cloned());
630
631 package.description = application_attributes
632 .get("android:label")
633 .filter(|label| {
634 !label.starts_with('@') && !label.chars().all(|character| character.is_ascii_digit())
635 })
636 .cloned();
637
638 let mut extra_data = HashMap::new();
639 insert_extra(
640 &mut extra_data,
641 "version_code",
642 manifest_attributes.get("android:versionCode"),
643 );
644 insert_extra(
645 &mut extra_data,
646 "compile_sdk_version",
647 manifest_attributes.get("android:compileSdkVersion"),
648 );
649 insert_extra(
650 &mut extra_data,
651 "compile_sdk_version_codename",
652 manifest_attributes.get("android:compileSdkVersionCodename"),
653 );
654 insert_extra(
655 &mut extra_data,
656 "platform_build_version_code",
657 manifest_attributes.get("platformBuildVersionCode"),
658 );
659 insert_extra(
660 &mut extra_data,
661 "platform_build_version_name",
662 manifest_attributes.get("platformBuildVersionName"),
663 );
664 insert_extra(
665 &mut extra_data,
666 "min_sdk_version",
667 uses_sdk_attributes.get("android:minSdkVersion"),
668 );
669 insert_extra(
670 &mut extra_data,
671 "target_sdk_version",
672 uses_sdk_attributes.get("android:targetSdkVersion"),
673 );
674 insert_extra(
675 &mut extra_data,
676 "max_sdk_version",
677 uses_sdk_attributes.get("android:maxSdkVersion"),
678 );
679
680 if !requested_permissions.is_empty() {
681 extra_data.insert(
682 "requested_permissions".to_string(),
683 requested_permissions
684 .into_iter()
685 .map(serde_json::Value::from)
686 .collect::<Vec<_>>()
687 .into(),
688 );
689 }
690 if !uses_libraries.is_empty() {
691 extra_data.insert(
692 "uses_libraries".to_string(),
693 uses_libraries
694 .into_iter()
695 .map(serde_json::Value::from)
696 .collect::<Vec<_>>()
697 .into(),
698 );
699 }
700
701 if !extra_data.is_empty() {
702 package.extra_data = Some(extra_data);
703 }
704
705 package
706}
707
708fn normalize_binary_attributes(attributes: HashMap<String, String>) -> HashMap<String, String> {
709 attributes
710 .into_iter()
711 .map(|(key, value)| (key, normalize_binary_attribute_value(&value)))
712 .collect()
713}
714
715fn normalize_binary_attribute_value(value: &str) -> String {
716 let hex_value = value
717 .strip_prefix("(type 0x10) 0x")
718 .or_else(|| value.strip_prefix("0x"));
719
720 if let Some(hex_value) = hex_value
721 && let Ok(parsed) = u64::from_str_radix(hex_value, 16)
722 {
723 return parsed.to_string();
724 }
725
726 value.to_string()
727}
728
729fn insert_extra(
730 extra_data: &mut HashMap<String, serde_json::Value>,
731 key: &str,
732 value: Option<&String>,
733) {
734 if let Some(value) = value {
735 extra_data.insert(key.to_string(), truncate_field(value.clone()).into());
736 }
737}
738
739fn read_best_zip_entry<F>(
740 path: &Path,
741 mut rank_entry: F,
742) -> Result<Option<(String, Vec<u8>)>, String>
743where
744 F: FnMut(&str) -> Option<u8>,
745{
746 let metadata = fs::metadata(path)
747 .map_err(|error| format!("Failed to stat archive {:?}: {}", path, error))?;
748 if metadata.len() > MAX_ARCHIVE_SIZE {
749 return Err(format!(
750 "Archive {:?} is {} bytes, exceeding the {} byte limit",
751 path,
752 metadata.len(),
753 MAX_ARCHIVE_SIZE
754 ));
755 }
756
757 let file = File::open(path)
758 .map_err(|error| format!("Failed to open archive {:?}: {}", path, error))?;
759 let mut archive = ZipArchive::new(file)
760 .map_err(|error| format!("Failed to parse ZIP archive {:?}: {}", path, error))?;
761
762 let mut total_uncompressed = 0u64;
763 let mut best: Option<(u8, String, Vec<u8>)> = None;
764 let entry_count = archive.len().min(MAX_ITERATION_COUNT);
765
766 if archive.len() > MAX_ITERATION_COUNT {
767 warn!(
768 "Archive {:?} has more than MAX_ITERATION_COUNT ({}) entries; truncating scan",
769 path, MAX_ITERATION_COUNT
770 );
771 }
772
773 for index in 0..entry_count {
774 let mut entry = archive.by_index(index).map_err(|error| {
775 format!(
776 "Failed to read ZIP entry {} in {:?}: {}",
777 index, path, error
778 )
779 })?;
780
781 total_uncompressed = total_uncompressed.saturating_add(entry.size());
782 if total_uncompressed > MAX_TOTAL_UNCOMPRESSED_SIZE {
783 return Err(format!(
784 "Archive {:?} exceeds total uncompressed size limit of {} bytes",
785 path, MAX_TOTAL_UNCOMPRESSED_SIZE
786 ));
787 }
788
789 let entry_name = entry.name().replace('\\', "/");
790 if entry_name.starts_with('/') || entry_name.split('/').any(|segment| segment == "..") {
791 return Err(format!(
792 "Archive entry {} contains a disallowed path",
793 entry_name
794 ));
795 }
796 let Some(rank) = rank_entry(&entry_name) else {
797 continue;
798 };
799
800 if entry.size() > MAX_FILE_SIZE {
801 return Err(format!(
802 "Archive entry {} is {} bytes, exceeding the {} byte limit",
803 entry_name,
804 entry.size(),
805 MAX_FILE_SIZE
806 ));
807 }
808
809 let compressed_size = entry.compressed_size();
810 if compressed_size > 0 {
811 let ratio = entry.size() as f64 / compressed_size as f64;
812 if ratio > MAX_COMPRESSION_RATIO {
813 return Err(format!(
814 "Archive entry {} has suspicious compression ratio {:.2}:1",
815 entry_name, ratio
816 ));
817 }
818 }
819
820 let should_replace = match &best {
821 Some((best_rank, _, _)) => rank < *best_rank,
822 None => true,
823 };
824
825 if should_replace {
826 let mut bytes = Vec::with_capacity(entry.size() as usize);
827 entry.read_to_end(&mut bytes).map_err(|error| {
828 format!("Failed to read archive entry {}: {}", entry_name, error)
829 })?;
830 best = Some((rank, entry_name, bytes));
831 }
832 }
833
834 Ok(best.map(|(_, entry_name, bytes)| (entry_name, bytes)))
835}
836
837fn parse_proto_manifest_bytes(bytes: &[u8]) -> Result<PackageData, String> {
838 let node =
839 ProtoXmlNode::decode(bytes).map_err(|error| format!("prost decode failure: {}", error))?;
840 let root_element = node
841 .element()
842 .ok_or_else(|| "Proto manifest root is not an element".to_string())?;
843 if root_element.name != "manifest" {
844 return Err(format!(
845 "Unexpected proto XML root element: {}",
846 root_element.name
847 ));
848 }
849
850 let manifest_attributes = proto_attributes_to_map(&root_element.attribute);
851 let uses_sdk_attributes = root_element
852 .child_elements_named("uses-sdk")
853 .next()
854 .map(|element| proto_attributes_to_map(&element.attribute))
855 .unwrap_or_default();
856 let application_attributes = root_element
857 .child_elements_named("application")
858 .next()
859 .map(|element| proto_attributes_to_map(&element.attribute))
860 .unwrap_or_default();
861 let requested_permissions = root_element
862 .child_elements_named_any(&["uses-permission", "uses-permission-sdk-23"])
863 .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
864 .collect::<Vec<_>>();
865 let uses_libraries = root_element
866 .child_elements_named("uses-library")
867 .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
868 .collect::<Vec<_>>();
869
870 let mut package = build_manifest_package_data(
871 DatasourceId::AndroidAab,
872 &manifest_attributes,
873 &uses_sdk_attributes,
874 &application_attributes,
875 requested_permissions,
876 uses_libraries,
877 );
878
879 if let Some(extra_data) = package.extra_data.as_mut() {
880 extra_data.insert("manifest_encoding".to_string(), "proto".into());
881 } else {
882 package.extra_data = Some(HashMap::from([(
883 "manifest_encoding".to_string(),
884 serde_json::Value::String("proto".to_string()),
885 )]));
886 }
887
888 Ok(package)
889}
890
891fn proto_attributes_to_map(attributes: &[ProtoXmlAttribute]) -> HashMap<String, String> {
892 attributes
893 .iter()
894 .filter_map(|attribute| {
895 let key = proto_attribute_key(attribute)?;
896 let value = proto_attribute_value(attribute)?;
897 Some((key, truncate_field(value)))
898 })
899 .collect()
900}
901
902fn proto_attribute_key(attribute: &ProtoXmlAttribute) -> Option<String> {
903 if attribute.name.is_empty() {
904 return None;
905 }
906
907 if attribute.namespace_uri == ANDROID_XML_NAMESPACE {
908 return Some(format!("android:{}", attribute.name));
909 }
910
911 Some(attribute.name.clone())
912}
913
914fn proto_attribute_value(attribute: &ProtoXmlAttribute) -> Option<String> {
915 if !attribute.value.is_empty() {
916 return Some(attribute.value.clone());
917 }
918
919 attribute
920 .compiled_item
921 .as_ref()
922 .and_then(proto_item_to_string)
923}
924
925fn proto_item_to_string(item: &ProtoItem) -> Option<String> {
926 match &item.value {
927 Some(proto_item::Value::Str(value)) => Some(value.value.clone()),
928 Some(proto_item::Value::RawStr(value)) => Some(value.value.clone()),
929 Some(proto_item::Value::Prim(value)) => proto_primitive_to_string(value),
930 _ => None,
931 }
932}
933
934fn proto_primitive_to_string(primitive: &ProtoPrimitive) -> Option<String> {
935 match &primitive.value {
936 Some(proto_primitive::Value::IntDecimal(value)) => Some(value.to_string()),
937 Some(proto_primitive::Value::IntHexadecimal(value)) => Some(format!("0x{value:x}")),
938 Some(proto_primitive::Value::Boolean(value)) => Some(value.to_string()),
939 Some(proto_primitive::Value::Float(value)) => Some(value.to_string()),
940 Some(proto_primitive::Value::Dimension(value)) => Some(value.to_string()),
941 Some(proto_primitive::Value::Fraction(value)) => Some(value.to_string()),
942 _ => None,
943 }
944}
945
946#[derive(Debug, Clone, Default)]
947struct ProtoMap {
948 fields: HashMap<String, Vec<ProtoValue>>,
949}
950
951#[derive(Debug, Clone)]
952enum ProtoValue {
953 Scalar(String),
954 Map(ProtoMap),
955}
956
957impl ProtoMap {
958 fn get_first_string(&self, key: &str) -> Option<String> {
959 self.fields.get(key).and_then(|values| {
960 values.iter().find_map(|value| match value {
961 ProtoValue::Scalar(value) => Some(value.clone()),
962 ProtoValue::Map(_) => None,
963 })
964 })
965 }
966
967 fn get_all_strings(&self, key: &str) -> Vec<String> {
968 self.fields
969 .get(key)
970 .into_iter()
971 .flatten()
972 .filter_map(|value| match value {
973 ProtoValue::Scalar(value) => Some(value.clone()),
974 ProtoValue::Map(_) => None,
975 })
976 .collect()
977 }
978
979 fn get_first_map(&self, key: &str) -> Option<ProtoMap> {
980 self.fields.get(key).and_then(|values| {
981 values.iter().find_map(|value| match value {
982 ProtoValue::Map(value) => Some(value.clone()),
983 ProtoValue::Scalar(_) => None,
984 })
985 })
986 }
987
988 fn get_all_maps(&self, key: &str) -> Vec<ProtoMap> {
989 self.fields
990 .get(key)
991 .into_iter()
992 .flatten()
993 .filter_map(|value| match value {
994 ProtoValue::Map(value) => Some(value.clone()),
995 ProtoValue::Scalar(_) => None,
996 })
997 .collect()
998 }
999}
1000
1001fn parse_textproto_map(content: &str) -> Result<ProtoMap, String> {
1002 let mut parser = TextProtoParser::new(content)?;
1003 parser.parse_map(false)
1004}
1005
1006struct TextProtoParser {
1007 tokens: Vec<TextProtoToken>,
1008 position: usize,
1009}
1010
1011#[derive(Debug, Clone)]
1012enum TextProtoToken {
1013 Identifier(String),
1014 String(String),
1015 Colon,
1016 LBrace,
1017 RBrace,
1018}
1019
1020impl TextProtoParser {
1021 fn new(content: &str) -> Result<Self, String> {
1022 Ok(Self {
1023 tokens: tokenize_textproto(content)?,
1024 position: 0,
1025 })
1026 }
1027
1028 fn parse_map(&mut self, stop_on_rbrace: bool) -> Result<ProtoMap, String> {
1029 let mut map = ProtoMap::default();
1030
1031 while let Some(token) = self.peek() {
1032 match token {
1033 TextProtoToken::RBrace if stop_on_rbrace => {
1034 self.position += 1;
1035 break;
1036 }
1037 TextProtoToken::RBrace => return Err("Unexpected closing brace".to_string()),
1038 TextProtoToken::Identifier(_) => {
1039 let key = self.expect_identifier()?;
1040 match self.peek() {
1041 Some(TextProtoToken::Colon) => {
1042 self.position += 1;
1043 match self.peek() {
1044 Some(TextProtoToken::LBrace) => {
1045 self.position += 1;
1046 let value = self.parse_map(true)?;
1047 map.fields
1048 .entry(key)
1049 .or_default()
1050 .push(ProtoValue::Map(value));
1051 }
1052 _ => {
1053 let value = self.expect_scalar()?;
1054 map.fields
1055 .entry(key)
1056 .or_default()
1057 .push(ProtoValue::Scalar(truncate_field(value)));
1058 }
1059 }
1060 }
1061 Some(TextProtoToken::LBrace) => {
1062 self.position += 1;
1063 let value = self.parse_map(true)?;
1064 map.fields
1065 .entry(key)
1066 .or_default()
1067 .push(ProtoValue::Map(value));
1068 }
1069 Some(other) => {
1070 return Err(format!("Unexpected token after key: {:?}", other));
1071 }
1072 None => return Err("Unexpected end of input after key".to_string()),
1073 }
1074 }
1075 other => return Err(format!("Unexpected token in textproto: {:?}", other)),
1076 }
1077 }
1078
1079 Ok(map)
1080 }
1081
1082 fn expect_identifier(&mut self) -> Result<String, String> {
1083 match self.next() {
1084 Some(TextProtoToken::Identifier(value)) => Ok(value),
1085 other => Err(format!("Expected identifier, found {:?}", other)),
1086 }
1087 }
1088
1089 fn expect_scalar(&mut self) -> Result<String, String> {
1090 match self.next() {
1091 Some(TextProtoToken::String(mut value)) => {
1092 while matches!(self.peek(), Some(TextProtoToken::String(_))) {
1093 if let Some(TextProtoToken::String(next)) = self.next() {
1094 value.push_str(&next);
1095 }
1096 }
1097 Ok(value)
1098 }
1099 Some(TextProtoToken::Identifier(value)) => Ok(value),
1100 other => Err(format!("Expected scalar value, found {:?}", other)),
1101 }
1102 }
1103
1104 fn peek(&self) -> Option<&TextProtoToken> {
1105 self.tokens.get(self.position)
1106 }
1107
1108 fn next(&mut self) -> Option<TextProtoToken> {
1109 let token = self.tokens.get(self.position).cloned();
1110 if token.is_some() {
1111 self.position += 1;
1112 }
1113 token
1114 }
1115}
1116
1117fn tokenize_textproto(content: &str) -> Result<Vec<TextProtoToken>, String> {
1118 let mut tokens = Vec::new();
1119 let chars = content.chars().collect::<Vec<_>>();
1120 let mut index = 0usize;
1121
1122 while index < chars.len() {
1123 match chars[index] {
1124 '{' => {
1125 tokens.push(TextProtoToken::LBrace);
1126 index += 1;
1127 }
1128 '}' => {
1129 tokens.push(TextProtoToken::RBrace);
1130 index += 1;
1131 }
1132 ':' => {
1133 tokens.push(TextProtoToken::Colon);
1134 index += 1;
1135 }
1136 '"' => {
1137 index += 1;
1138 let mut value = String::new();
1139 while index < chars.len() {
1140 match chars[index] {
1141 '\\' if index + 1 < chars.len() => {
1142 index += 1;
1143 value.push(chars[index]);
1144 index += 1;
1145 }
1146 '"' => {
1147 index += 1;
1148 break;
1149 }
1150 character => {
1151 value.push(character);
1152 index += 1;
1153 }
1154 }
1155 }
1156 tokens.push(TextProtoToken::String(value));
1157 }
1158 '#' => {
1159 while index < chars.len() && chars[index] != '\n' {
1160 index += 1;
1161 }
1162 }
1163 '/' if index + 1 < chars.len() && chars[index + 1] == '/' => {
1164 index += 2;
1165 while index < chars.len() && chars[index] != '\n' {
1166 index += 1;
1167 }
1168 }
1169 character if character.is_ascii_whitespace() => index += 1,
1170 _ => {
1171 let start = index;
1172 while index < chars.len() {
1173 let character = chars[index];
1174 let starts_comment =
1175 character == '/' && index + 1 < chars.len() && chars[index + 1] == '/';
1176
1177 if character.is_ascii_whitespace()
1178 || matches!(character, '{' | '}' | ':' | '#')
1179 || starts_comment
1180 {
1181 break;
1182 }
1183
1184 index += 1;
1185 }
1186
1187 let token = chars[start..index].iter().collect::<String>();
1188 if token.is_empty() {
1189 return Err("Encountered empty textproto token".to_string());
1190 }
1191 tokens.push(TextProtoToken::Identifier(token));
1192 }
1193 }
1194 }
1195
1196 Ok(tokens)
1197}
1198
1199#[derive(Clone, PartialEq, Message)]
1200pub struct ProtoSourcePosition {
1201 #[prost(uint32, tag = "1")]
1202 pub line_number: u32,
1203 #[prost(uint32, tag = "2")]
1204 pub column_number: u32,
1205}
1206
1207#[derive(Clone, PartialEq, Message)]
1208pub struct ProtoXmlNode {
1209 #[prost(oneof = "proto_xml_node::Node", tags = "1, 2")]
1210 pub node: Option<proto_xml_node::Node>,
1211 #[prost(message, optional, tag = "3")]
1212 pub source: Option<ProtoSourcePosition>,
1213}
1214
1215impl ProtoXmlNode {
1216 fn element(&self) -> Option<&ProtoXmlElement> {
1217 match &self.node {
1218 Some(proto_xml_node::Node::Element(element)) => Some(element),
1219 _ => None,
1220 }
1221 }
1222}
1223
1224pub mod proto_xml_node {
1225 use super::ProtoXmlElement;
1226 use prost::Oneof;
1227
1228 #[derive(Clone, PartialEq, Oneof)]
1229 pub enum Node {
1230 #[prost(message, tag = "1")]
1231 Element(ProtoXmlElement),
1232 #[prost(string, tag = "2")]
1233 Text(String),
1234 }
1235}
1236
1237#[derive(Clone, PartialEq, Message)]
1238pub struct ProtoXmlElement {
1239 #[prost(message, repeated, tag = "1")]
1240 pub namespace_declaration: Vec<ProtoXmlNamespace>,
1241 #[prost(string, tag = "2")]
1242 pub namespace_uri: String,
1243 #[prost(string, tag = "3")]
1244 pub name: String,
1245 #[prost(message, repeated, tag = "4")]
1246 pub attribute: Vec<ProtoXmlAttribute>,
1247 #[prost(message, repeated, tag = "5")]
1248 pub child: Vec<ProtoXmlNode>,
1249}
1250
1251impl ProtoXmlElement {
1252 fn child_elements_named<'a>(
1253 &'a self,
1254 name: &'a str,
1255 ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1256 self.child
1257 .iter()
1258 .filter_map(ProtoXmlNode::element)
1259 .filter(move |element| element.name == name)
1260 }
1261
1262 fn child_elements_named_any<'a>(
1263 &'a self,
1264 names: &'a [&'a str],
1265 ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1266 self.child
1267 .iter()
1268 .filter_map(ProtoXmlNode::element)
1269 .filter(move |element| names.contains(&element.name.as_str()))
1270 }
1271}
1272
1273#[derive(Clone, PartialEq, Message)]
1274pub struct ProtoXmlNamespace {
1275 #[prost(string, tag = "1")]
1276 pub prefix: String,
1277 #[prost(string, tag = "2")]
1278 pub uri: String,
1279 #[prost(message, optional, tag = "3")]
1280 pub source: Option<ProtoSourcePosition>,
1281}
1282
1283#[derive(Clone, PartialEq, Message)]
1284pub struct ProtoXmlAttribute {
1285 #[prost(string, tag = "1")]
1286 pub namespace_uri: String,
1287 #[prost(string, tag = "2")]
1288 pub name: String,
1289 #[prost(string, tag = "3")]
1290 pub value: String,
1291 #[prost(message, optional, tag = "4")]
1292 pub source: Option<ProtoSourcePosition>,
1293 #[prost(uint32, tag = "5")]
1294 pub resource_id: u32,
1295 #[prost(message, optional, tag = "6")]
1296 pub compiled_item: Option<ProtoItem>,
1297}
1298
1299#[derive(Clone, PartialEq, Message)]
1300pub struct ProtoItem {
1301 #[prost(oneof = "proto_item::Value", tags = "2, 3, 7")]
1302 pub value: Option<proto_item::Value>,
1303 #[prost(uint32, tag = "8")]
1304 pub flag_status: u32,
1305 #[prost(bool, tag = "9")]
1306 pub flag_negated: bool,
1307 #[prost(string, tag = "10")]
1308 pub flag_name: String,
1309}
1310
1311pub mod proto_item {
1312 use super::{ProtoPrimitive, ProtoRawStringValue, ProtoStringValue};
1313 use prost::Oneof;
1314
1315 #[derive(Clone, PartialEq, Oneof)]
1316 pub enum Value {
1317 #[prost(message, tag = "2")]
1318 Str(ProtoStringValue),
1319 #[prost(message, tag = "3")]
1320 RawStr(ProtoRawStringValue),
1321 #[prost(message, tag = "7")]
1322 Prim(ProtoPrimitive),
1323 }
1324}
1325
1326#[derive(Clone, PartialEq, Message)]
1327pub struct ProtoStringValue {
1328 #[prost(string, tag = "1")]
1329 pub value: String,
1330}
1331
1332#[derive(Clone, PartialEq, Message)]
1333pub struct ProtoRawStringValue {
1334 #[prost(string, tag = "1")]
1335 pub value: String,
1336}
1337
1338#[derive(Clone, PartialEq, Message)]
1339pub struct ProtoPrimitive {
1340 #[prost(oneof = "proto_primitive::Value", tags = "3, 6, 7, 8, 13, 14")]
1341 pub value: Option<proto_primitive::Value>,
1342}
1343
1344pub mod proto_primitive {
1345 use prost::Oneof;
1346
1347 #[derive(Clone, PartialEq, Oneof)]
1348 pub enum Value {
1349 #[prost(float, tag = "3")]
1350 Float(f32),
1351 #[prost(int32, tag = "6")]
1352 IntDecimal(i32),
1353 #[prost(uint32, tag = "7")]
1354 IntHexadecimal(u32),
1355 #[prost(bool, tag = "8")]
1356 Boolean(bool),
1357 #[prost(uint32, tag = "13")]
1358 Dimension(u32),
1359 #[prost(uint32, tag = "14")]
1360 Fraction(u32),
1361 }
1362}