Skip to main content

provenant/parsers/
android.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::fs::{self, File};
6use std::io::{Cursor, Read};
7use std::path::Path;
8
9use prost::Message;
10use quick_xml::Reader;
11use quick_xml::events::Event;
12use rusty_axml::{find_nodes_by_type, get_requested_permissions, parse_from_reader};
13use zip::ZipArchive;
14
15use crate::models::{DatasourceId, PackageData, PackageType};
16use crate::parser_warn as warn;
17use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
18use crate::utils::magic;
19
20use super::PackageParser;
21
22const PACKAGE_TYPE: PackageType = PackageType::Android;
23const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024;
24const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
25const MAX_TOTAL_UNCOMPRESSED_SIZE: u64 = 1024 * 1024 * 1024;
26const MAX_COMPRESSION_RATIO: f64 = 100.0;
27const ANDROID_XML_NAMESPACE: &str = "http://schemas.android.com/apk/res/android";
28
29fn default_package_data(datasource_id: DatasourceId) -> PackageData {
30    PackageData {
31        package_type: Some(PACKAGE_TYPE),
32        datasource_id: Some(datasource_id),
33        ..Default::default()
34    }
35}
36
37pub struct AndroidSoongMetadataParser;
38pub struct AndroidManifestParser;
39pub struct AndroidApkParser;
40pub struct AndroidAabParser;
41
42fn looks_like_android_soong_metadata_content(content: &str) -> bool {
43    let mut saw_named_field = false;
44
45    for line in content.lines().take(40) {
46        let trimmed = line.trim();
47
48        if trimmed.is_empty() || trimmed.starts_with('#') {
49            continue;
50        }
51
52        if trimmed.starts_with("//") {
53            return false;
54        }
55
56        if trimmed.starts_with("third_party {")
57            || trimmed.starts_with("third_party{")
58            || trimmed.starts_with("url {")
59            || trimmed.starts_with("url{")
60            || trimmed.starts_with("identifier {")
61            || trimmed.starts_with("identifier{")
62            || trimmed.starts_with("security {")
63            || trimmed.starts_with("security{")
64            || trimmed.starts_with("last_upgrade_date {")
65            || trimmed.starts_with("last_upgrade_date{")
66        {
67            return true;
68        }
69
70        if let Some(value) = trimmed.strip_prefix("license_type:") {
71            let value = value.trim();
72            if !value.is_empty()
73                && value
74                    .chars()
75                    .all(|character| character.is_ascii_uppercase() || character == '_')
76            {
77                return true;
78            }
79        }
80
81        if trimmed.starts_with("name:")
82            || trimmed.starts_with("description:")
83            || trimmed.starts_with("homepage:")
84        {
85            saw_named_field = true;
86        }
87    }
88
89    saw_named_field && content.contains("third_party")
90}
91
92impl PackageParser for AndroidSoongMetadataParser {
93    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
94
95    fn is_match(path: &Path) -> bool {
96        if path.file_name().and_then(|name| name.to_str()) != Some("METADATA") {
97            return false;
98        }
99
100        if !path.is_file() {
101            return false;
102        }
103
104        crate::parsers::utils::read_file_to_string(path, Some(MAX_MANIFEST_SIZE))
105            .map(|content| looks_like_android_soong_metadata_content(&content))
106            .unwrap_or(false)
107    }
108
109    fn extract_packages(path: &Path) -> Vec<PackageData> {
110        let content = match crate::parsers::utils::read_file_to_string(path, None) {
111            Ok(content) => content,
112            Err(error) => {
113                warn!(
114                    "Failed to read Android Soong METADATA {:?}: {}",
115                    path, error
116                );
117                return vec![default_package_data(DatasourceId::AndroidSoongMetadata)];
118            }
119        };
120
121        vec![parse_soong_metadata(&content)]
122    }
123}
124
125impl PackageParser for AndroidManifestParser {
126    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
127
128    fn is_match(path: &Path) -> bool {
129        path.file_name().and_then(|name| name.to_str()) == Some("AndroidManifest.xml")
130    }
131
132    fn extract_packages(path: &Path) -> Vec<PackageData> {
133        let bytes = match read_file_bytes(path, None) {
134            Ok(bytes) => bytes,
135            Err(error) => {
136                warn!("Failed to read AndroidManifest.xml {:?}: {}", path, error);
137                return vec![default_package_data(DatasourceId::AndroidManifestXml)];
138            }
139        };
140
141        parse_manifest_bytes(
142            &bytes,
143            DatasourceId::AndroidManifestXml,
144            "AndroidManifest.xml",
145        )
146        .into_iter()
147        .collect()
148    }
149}
150
151impl PackageParser for AndroidApkParser {
152    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
153
154    fn is_match(path: &Path) -> bool {
155        path.extension().and_then(|ext| ext.to_str()) == Some("apk") && magic::is_zip(path)
156    }
157
158    fn extract_packages(path: &Path) -> Vec<PackageData> {
159        let package_data = match read_best_zip_entry(path, |entry_name| {
160            if entry_name == "AndroidManifest.xml" {
161                Some(0)
162            } else {
163                None
164            }
165        }) {
166            Ok(Some((_, bytes))) => parse_binary_manifest_bytes(&bytes, DatasourceId::AndroidApk)
167                .unwrap_or_else(|error| {
168                    warn!("Failed to parse APK manifest {:?}: {}", path, error);
169                    default_package_data(DatasourceId::AndroidApk)
170                }),
171            Ok(None) => {
172                warn!("No AndroidManifest.xml found in APK {:?}", path);
173                default_package_data(DatasourceId::AndroidApk)
174            }
175            Err(error) => {
176                warn!("Failed to read APK archive {:?}: {}", path, error);
177                default_package_data(DatasourceId::AndroidApk)
178            }
179        };
180
181        vec![package_data]
182    }
183}
184
185impl PackageParser for AndroidAabParser {
186    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
187
188    fn is_match(path: &Path) -> bool {
189        path.extension().and_then(|ext| ext.to_str()) == Some("aab") && magic::is_zip(path)
190    }
191
192    fn extract_packages(path: &Path) -> Vec<PackageData> {
193        let package_data = match read_best_zip_entry(path, |entry_name| {
194            if entry_name == "base/manifest/AndroidManifest.xml" {
195                Some(0)
196            } else if entry_name.ends_with("/manifest/AndroidManifest.xml") {
197                Some(1)
198            } else {
199                None
200            }
201        }) {
202            Ok(Some((entry_name, bytes))) => {
203                parse_proto_manifest_bytes(&bytes).unwrap_or_else(|error| {
204                    warn!(
205                        "Failed to parse AAB manifest {:?} ({}): {}",
206                        path, entry_name, error
207                    );
208                    default_package_data(DatasourceId::AndroidAab)
209                })
210            }
211            Ok(None) => {
212                warn!("No proto AndroidManifest.xml found in AAB {:?}", path);
213                default_package_data(DatasourceId::AndroidAab)
214            }
215            Err(error) => {
216                warn!("Failed to read AAB archive {:?}: {}", path, error);
217                default_package_data(DatasourceId::AndroidAab)
218            }
219        };
220
221        vec![package_data]
222    }
223}
224
225fn read_file_bytes(path: &Path, max_size: Option<u64>) -> Result<Vec<u8>, String> {
226    let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
227    let metadata =
228        fs::metadata(path).map_err(|error| format!("Cannot stat file {:?}: {}", path, error))?;
229
230    if metadata.len() > limit {
231        return Err(format!(
232            "File {:?} is {} bytes, exceeding the {} byte limit",
233            path,
234            metadata.len(),
235            limit
236        ));
237    }
238
239    let mut file =
240        File::open(path).map_err(|error| format!("Failed to open {:?}: {}", path, error))?;
241    let mut bytes = Vec::with_capacity(metadata.len() as usize);
242    file.read_to_end(&mut bytes)
243        .map_err(|error| format!("Failed to read {:?}: {}", path, error))?;
244    Ok(bytes)
245}
246
247fn parse_soong_metadata(content: &str) -> PackageData {
248    let parsed = parse_textproto_map(content).unwrap_or_else(|error| {
249        warn!("Failed to parse Android Soong METADATA: {}", error);
250        ProtoMap::default()
251    });
252
253    let mut package = default_package_data(DatasourceId::AndroidSoongMetadata);
254    package.name = parsed.get_first_string("name").map(truncate_field);
255    package.description = parsed.get_first_string("description").map(truncate_field);
256
257    if let Some(third_party) = parsed.get_first_map("third_party") {
258        package.version = third_party.get_first_string("version").map(truncate_field);
259
260        let url_entries = third_party
261            .get_all_maps("url")
262            .into_iter()
263            .map(|entry| {
264                let type_ = entry.get_first_string("type").map(truncate_field);
265                let value = entry.get_first_string("value").map(truncate_field);
266                (type_, value)
267            })
268            .collect::<Vec<_>>();
269
270        let homepage_url = third_party.get_first_string("homepage").or_else(|| {
271            url_entries
272                .iter()
273                .find(|(type_, _)| {
274                    type_
275                        .as_deref()
276                        .is_some_and(|type_| type_.eq_ignore_ascii_case("homepage"))
277                })
278                .and_then(|(_, value)| value.clone())
279        });
280        package.homepage_url = homepage_url.map(truncate_field);
281
282        let license_types = third_party
283            .get_all_strings("license_type")
284            .into_iter()
285            .map(truncate_field)
286            .collect::<Vec<_>>();
287        if !license_types.is_empty() {
288            package.extracted_license_statement = Some(license_types.join(", "));
289        }
290
291        let identifiers = third_party
292            .get_all_maps("identifier")
293            .into_iter()
294            .map(|identifier| {
295                let type_ = identifier.get_first_string("type").map(truncate_field);
296                let value = identifier.get_first_string("value").map(truncate_field);
297                let mut object = serde_json::Map::new();
298                if let Some(type_) = type_ {
299                    object.insert("type".to_string(), type_.into());
300                }
301                if let Some(value) = &value {
302                    object.insert("value".to_string(), value.clone().into());
303                }
304
305                if package.vcs_url.is_none()
306                    && let (Some(type_), Some(value)) = (
307                        identifier.get_first_string("type"),
308                        identifier.get_first_string("value"),
309                    )
310                {
311                    let lower_type = type_.to_ascii_lowercase();
312                    if lower_type.contains("git") {
313                        package.vcs_url = Some(truncate_field(value));
314                    } else if lower_type.contains("archive")
315                        || lower_type.contains("tar")
316                        || lower_type.contains("zip")
317                    {
318                        package.download_url = Some(truncate_field(value));
319                    }
320                }
321
322                serde_json::Value::Object(object)
323            })
324            .collect::<Vec<_>>();
325
326        for (type_, value) in &url_entries {
327            let Some(value) = value else {
328                continue;
329            };
330
331            match type_.as_deref().map(str::to_ascii_lowercase).as_deref() {
332                Some("git") if package.vcs_url.is_none() => {
333                    package.vcs_url = Some(value.clone());
334                }
335                Some("archive") if package.download_url.is_none() => {
336                    package.download_url = Some(value.clone());
337                }
338                Some("homepage") if package.homepage_url.is_none() => {
339                    package.homepage_url = Some(value.clone());
340                }
341                _ => {}
342            }
343        }
344
345        let mut extra_data = HashMap::new();
346        if !identifiers.is_empty() {
347            extra_data.insert("identifiers".to_string(), identifiers.into());
348        }
349        if !url_entries.is_empty() {
350            extra_data.insert(
351                "urls".to_string(),
352                url_entries
353                    .iter()
354                    .map(|(type_, value)| {
355                        let mut object = serde_json::Map::new();
356                        if let Some(type_) = type_ {
357                            object.insert("type".to_string(), type_.clone().into());
358                        }
359                        if let Some(value) = value {
360                            object.insert("value".to_string(), value.clone().into());
361                        }
362                        serde_json::Value::Object(object)
363                    })
364                    .collect::<Vec<_>>()
365                    .into(),
366            );
367        }
368
369        if let Some(last_upgrade_date) = third_party.get_first_map("last_upgrade_date") {
370            let year = last_upgrade_date.get_first_string("year");
371            let month = last_upgrade_date.get_first_string("month");
372            let day = last_upgrade_date.get_first_string("day");
373            if let (Some(year), Some(month), Some(day)) = (year, month, day) {
374                let formatted = format!(
375                    "{:04}-{:02}-{:02}",
376                    year.parse::<u32>().unwrap_or_default(),
377                    month.parse::<u32>().unwrap_or_default(),
378                    day.parse::<u32>().unwrap_or_default()
379                );
380                extra_data.insert(
381                    "last_upgrade_date".to_string(),
382                    truncate_field(formatted).into(),
383                );
384            }
385        }
386
387        if let Some(upstream_url) = third_party.get_first_string("url") {
388            extra_data.insert(
389                "upstream_url".to_string(),
390                truncate_field(upstream_url).into(),
391            );
392        }
393
394        if !extra_data.is_empty() {
395            package.extra_data = Some(extra_data);
396        }
397    }
398
399    package
400}
401
402fn parse_manifest_bytes(
403    bytes: &[u8],
404    datasource_id: DatasourceId,
405    context: &str,
406) -> Option<PackageData> {
407    if looks_like_text_xml(bytes) {
408        match parse_text_manifest_bytes(bytes, datasource_id) {
409            Ok(package) => return Some(package),
410            Err(error) => {
411                warn!("Failed to parse {} as text XML: {}", context, error);
412                return None;
413            }
414        }
415    }
416
417    parse_binary_manifest_bytes(bytes, datasource_id)
418        .map(Some)
419        .unwrap_or_else(|error| {
420            warn!(
421                "Failed to parse {} as binary Android XML: {}",
422                context, error
423            );
424            None
425        })
426}
427
428fn looks_like_text_xml(bytes: &[u8]) -> bool {
429    bytes
430        .iter()
431        .find(|byte| !byte.is_ascii_whitespace())
432        .is_some_and(|byte| *byte == b'<')
433}
434
435fn parse_text_manifest_bytes(
436    bytes: &[u8],
437    datasource_id: DatasourceId,
438) -> Result<PackageData, String> {
439    let content = String::from_utf8(bytes.to_vec())
440        .map_err(|error| format!("Invalid UTF-8 in AndroidManifest.xml: {}", error))?;
441
442    let mut reader = Reader::from_str(&content);
443    reader.config_mut().trim_text(true);
444
445    let mut buf = Vec::new();
446    let mut manifest_attributes = HashMap::new();
447    let mut uses_sdk_attributes = HashMap::new();
448    let mut application_attributes = HashMap::new();
449    let mut requested_permissions = Vec::new();
450    let mut uses_libraries = Vec::new();
451    let mut iteration_count = 0usize;
452
453    loop {
454        iteration_count += 1;
455        if iteration_count > MAX_ITERATION_COUNT {
456            return Err(format!(
457                "Exceeded MAX_ITERATION_COUNT ({}) while parsing AndroidManifest.xml",
458                MAX_ITERATION_COUNT
459            ));
460        }
461
462        match reader.read_event_into(&mut buf) {
463            Ok(Event::Start(event)) | Ok(Event::Empty(event)) => {
464                let name = String::from_utf8_lossy(event.name().as_ref()).into_owned();
465                let attributes = xml_attributes_to_map(&reader, &event)?;
466                match name.as_str() {
467                    "manifest" if manifest_attributes.is_empty() => {
468                        manifest_attributes = attributes
469                    }
470                    "uses-sdk" => uses_sdk_attributes = attributes,
471                    "application" if application_attributes.is_empty() => {
472                        application_attributes = attributes;
473                    }
474                    "uses-permission" | "uses-permission-sdk-23" => {
475                        if let Some(permission) = attributes.get("android:name") {
476                            requested_permissions.push(permission.clone());
477                        }
478                    }
479                    "uses-library" => {
480                        if let Some(library_name) = attributes.get("android:name") {
481                            uses_libraries.push(library_name.clone());
482                        }
483                    }
484                    _ => {}
485                }
486            }
487            Ok(Event::Eof) => break,
488            Err(error) => {
489                return Err(format!(
490                    "XML parse error at position {}: {}",
491                    reader.buffer_position(),
492                    error
493                ));
494            }
495            _ => {}
496        }
497
498        buf.clear();
499    }
500
501    Ok(build_manifest_package_data(
502        datasource_id,
503        &manifest_attributes,
504        &uses_sdk_attributes,
505        &application_attributes,
506        requested_permissions,
507        uses_libraries,
508    ))
509}
510
511fn xml_attributes_to_map(
512    reader: &Reader<&[u8]>,
513    event: &quick_xml::events::BytesStart<'_>,
514) -> Result<HashMap<String, String>, String> {
515    let mut attributes = HashMap::new();
516
517    for attribute in event.attributes().flatten().take(MAX_ITERATION_COUNT) {
518        let key = String::from_utf8_lossy(attribute.key.as_ref()).into_owned();
519        let value = attribute
520            .decode_and_unescape_value(reader.decoder())
521            .map_err(|error| format!("Failed to decode XML attribute {}: {}", key, error))?
522            .into_owned();
523        attributes.insert(key, truncate_field(value));
524    }
525
526    Ok(attributes)
527}
528
529fn parse_binary_manifest_bytes(
530    bytes: &[u8],
531    datasource_id: DatasourceId,
532) -> Result<PackageData, String> {
533    let axml = std::panic::catch_unwind(|| parse_from_reader(Cursor::new(bytes.to_vec())))
534        .map_err(|_| "rusty-axml panicked while parsing binary Android XML".to_string())?
535        .map_err(|error| format!("rusty-axml parse failure: {}", error))?;
536
537    let manifest_attributes =
538        normalize_binary_attributes(axml.root().borrow().attributes().clone());
539    let uses_sdk_attributes = find_nodes_by_type(&axml, "uses-sdk")
540        .into_iter()
541        .next()
542        .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
543        .unwrap_or_default();
544    let application_attributes = find_nodes_by_type(&axml, "application")
545        .into_iter()
546        .next()
547        .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
548        .unwrap_or_default();
549
550    let requested_permissions = get_requested_permissions(&axml)
551        .into_iter()
552        .map(truncate_field)
553        .collect::<Vec<_>>();
554    let uses_libraries = find_nodes_by_type(&axml, "uses-library")
555        .into_iter()
556        .filter_map(|node| node.borrow().get_attr("android:name").map(str::to_string))
557        .map(truncate_field)
558        .collect::<Vec<_>>();
559
560    Ok(build_manifest_package_data(
561        datasource_id,
562        &manifest_attributes,
563        &uses_sdk_attributes,
564        &application_attributes,
565        requested_permissions,
566        uses_libraries,
567    ))
568}
569
570fn build_manifest_package_data(
571    datasource_id: DatasourceId,
572    manifest_attributes: &HashMap<String, String>,
573    uses_sdk_attributes: &HashMap<String, String>,
574    application_attributes: &HashMap<String, String>,
575    requested_permissions: Vec<String>,
576    uses_libraries: Vec<String>,
577) -> PackageData {
578    let mut package = default_package_data(datasource_id);
579    package.name = manifest_attributes.get("package").cloned();
580    package.version = manifest_attributes
581        .get("android:versionName")
582        .cloned()
583        .or_else(|| manifest_attributes.get("android:versionCode").cloned());
584
585    package.description = application_attributes
586        .get("android:label")
587        .filter(|label| {
588            !label.starts_with('@') && !label.chars().all(|character| character.is_ascii_digit())
589        })
590        .cloned();
591
592    let mut extra_data = HashMap::new();
593    insert_extra(
594        &mut extra_data,
595        "version_code",
596        manifest_attributes.get("android:versionCode"),
597    );
598    insert_extra(
599        &mut extra_data,
600        "compile_sdk_version",
601        manifest_attributes.get("android:compileSdkVersion"),
602    );
603    insert_extra(
604        &mut extra_data,
605        "compile_sdk_version_codename",
606        manifest_attributes.get("android:compileSdkVersionCodename"),
607    );
608    insert_extra(
609        &mut extra_data,
610        "platform_build_version_code",
611        manifest_attributes.get("platformBuildVersionCode"),
612    );
613    insert_extra(
614        &mut extra_data,
615        "platform_build_version_name",
616        manifest_attributes.get("platformBuildVersionName"),
617    );
618    insert_extra(
619        &mut extra_data,
620        "min_sdk_version",
621        uses_sdk_attributes.get("android:minSdkVersion"),
622    );
623    insert_extra(
624        &mut extra_data,
625        "target_sdk_version",
626        uses_sdk_attributes.get("android:targetSdkVersion"),
627    );
628    insert_extra(
629        &mut extra_data,
630        "max_sdk_version",
631        uses_sdk_attributes.get("android:maxSdkVersion"),
632    );
633
634    if !requested_permissions.is_empty() {
635        extra_data.insert(
636            "requested_permissions".to_string(),
637            requested_permissions
638                .into_iter()
639                .map(serde_json::Value::from)
640                .collect::<Vec<_>>()
641                .into(),
642        );
643    }
644    if !uses_libraries.is_empty() {
645        extra_data.insert(
646            "uses_libraries".to_string(),
647            uses_libraries
648                .into_iter()
649                .map(serde_json::Value::from)
650                .collect::<Vec<_>>()
651                .into(),
652        );
653    }
654
655    if !extra_data.is_empty() {
656        package.extra_data = Some(extra_data);
657    }
658
659    package
660}
661
662fn normalize_binary_attributes(attributes: HashMap<String, String>) -> HashMap<String, String> {
663    attributes
664        .into_iter()
665        .map(|(key, value)| (key, normalize_binary_attribute_value(&value)))
666        .collect()
667}
668
669fn normalize_binary_attribute_value(value: &str) -> String {
670    let hex_value = value
671        .strip_prefix("(type 0x10) 0x")
672        .or_else(|| value.strip_prefix("0x"));
673
674    if let Some(hex_value) = hex_value
675        && let Ok(parsed) = u64::from_str_radix(hex_value, 16)
676    {
677        return parsed.to_string();
678    }
679
680    value.to_string()
681}
682
683fn insert_extra(
684    extra_data: &mut HashMap<String, serde_json::Value>,
685    key: &str,
686    value: Option<&String>,
687) {
688    if let Some(value) = value {
689        extra_data.insert(key.to_string(), truncate_field(value.clone()).into());
690    }
691}
692
693fn read_best_zip_entry<F>(
694    path: &Path,
695    mut rank_entry: F,
696) -> Result<Option<(String, Vec<u8>)>, String>
697where
698    F: FnMut(&str) -> Option<u8>,
699{
700    let metadata = fs::metadata(path)
701        .map_err(|error| format!("Failed to stat archive {:?}: {}", path, error))?;
702    if metadata.len() > MAX_ARCHIVE_SIZE {
703        return Err(format!(
704            "Archive {:?} is {} bytes, exceeding the {} byte limit",
705            path,
706            metadata.len(),
707            MAX_ARCHIVE_SIZE
708        ));
709    }
710
711    let file = File::open(path)
712        .map_err(|error| format!("Failed to open archive {:?}: {}", path, error))?;
713    let mut archive = ZipArchive::new(file)
714        .map_err(|error| format!("Failed to parse ZIP archive {:?}: {}", path, error))?;
715
716    let mut total_uncompressed = 0u64;
717    let mut best: Option<(u8, String, Vec<u8>)> = None;
718    let entry_count = archive.len().min(MAX_ITERATION_COUNT);
719
720    if archive.len() > MAX_ITERATION_COUNT {
721        warn!(
722            "Archive {:?} has more than MAX_ITERATION_COUNT ({}) entries; truncating scan",
723            path, MAX_ITERATION_COUNT
724        );
725    }
726
727    for index in 0..entry_count {
728        let mut entry = archive.by_index(index).map_err(|error| {
729            format!(
730                "Failed to read ZIP entry {} in {:?}: {}",
731                index, path, error
732            )
733        })?;
734
735        total_uncompressed = total_uncompressed.saturating_add(entry.size());
736        if total_uncompressed > MAX_TOTAL_UNCOMPRESSED_SIZE {
737            return Err(format!(
738                "Archive {:?} exceeds total uncompressed size limit of {} bytes",
739                path, MAX_TOTAL_UNCOMPRESSED_SIZE
740            ));
741        }
742
743        let entry_name = entry.name().replace('\\', "/");
744        if entry_name.starts_with('/') || entry_name.split('/').any(|segment| segment == "..") {
745            return Err(format!(
746                "Archive entry {} contains a disallowed path",
747                entry_name
748            ));
749        }
750        let Some(rank) = rank_entry(&entry_name) else {
751            continue;
752        };
753
754        if entry.size() > MAX_FILE_SIZE {
755            return Err(format!(
756                "Archive entry {} is {} bytes, exceeding the {} byte limit",
757                entry_name,
758                entry.size(),
759                MAX_FILE_SIZE
760            ));
761        }
762
763        let compressed_size = entry.compressed_size();
764        if compressed_size > 0 {
765            let ratio = entry.size() as f64 / compressed_size as f64;
766            if ratio > MAX_COMPRESSION_RATIO {
767                return Err(format!(
768                    "Archive entry {} has suspicious compression ratio {:.2}:1",
769                    entry_name, ratio
770                ));
771            }
772        }
773
774        let should_replace = match &best {
775            Some((best_rank, _, _)) => rank < *best_rank,
776            None => true,
777        };
778
779        if should_replace {
780            let mut bytes = Vec::with_capacity(entry.size() as usize);
781            entry.read_to_end(&mut bytes).map_err(|error| {
782                format!("Failed to read archive entry {}: {}", entry_name, error)
783            })?;
784            best = Some((rank, entry_name, bytes));
785        }
786    }
787
788    Ok(best.map(|(_, entry_name, bytes)| (entry_name, bytes)))
789}
790
791fn parse_proto_manifest_bytes(bytes: &[u8]) -> Result<PackageData, String> {
792    let node =
793        ProtoXmlNode::decode(bytes).map_err(|error| format!("prost decode failure: {}", error))?;
794    let root_element = node
795        .element()
796        .ok_or_else(|| "Proto manifest root is not an element".to_string())?;
797    if root_element.name != "manifest" {
798        return Err(format!(
799            "Unexpected proto XML root element: {}",
800            root_element.name
801        ));
802    }
803
804    let manifest_attributes = proto_attributes_to_map(&root_element.attribute);
805    let uses_sdk_attributes = root_element
806        .child_elements_named("uses-sdk")
807        .next()
808        .map(|element| proto_attributes_to_map(&element.attribute))
809        .unwrap_or_default();
810    let application_attributes = root_element
811        .child_elements_named("application")
812        .next()
813        .map(|element| proto_attributes_to_map(&element.attribute))
814        .unwrap_or_default();
815    let requested_permissions = root_element
816        .child_elements_named_any(&["uses-permission", "uses-permission-sdk-23"])
817        .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
818        .collect::<Vec<_>>();
819    let uses_libraries = root_element
820        .child_elements_named("uses-library")
821        .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
822        .collect::<Vec<_>>();
823
824    let mut package = build_manifest_package_data(
825        DatasourceId::AndroidAab,
826        &manifest_attributes,
827        &uses_sdk_attributes,
828        &application_attributes,
829        requested_permissions,
830        uses_libraries,
831    );
832
833    if let Some(extra_data) = package.extra_data.as_mut() {
834        extra_data.insert("manifest_encoding".to_string(), "proto".into());
835    } else {
836        package.extra_data = Some(HashMap::from([(
837            "manifest_encoding".to_string(),
838            serde_json::Value::String("proto".to_string()),
839        )]));
840    }
841
842    Ok(package)
843}
844
845fn proto_attributes_to_map(attributes: &[ProtoXmlAttribute]) -> HashMap<String, String> {
846    attributes
847        .iter()
848        .filter_map(|attribute| {
849            let key = proto_attribute_key(attribute)?;
850            let value = proto_attribute_value(attribute)?;
851            Some((key, truncate_field(value)))
852        })
853        .collect()
854}
855
856fn proto_attribute_key(attribute: &ProtoXmlAttribute) -> Option<String> {
857    if attribute.name.is_empty() {
858        return None;
859    }
860
861    if attribute.namespace_uri == ANDROID_XML_NAMESPACE {
862        return Some(format!("android:{}", attribute.name));
863    }
864
865    Some(attribute.name.clone())
866}
867
868fn proto_attribute_value(attribute: &ProtoXmlAttribute) -> Option<String> {
869    if !attribute.value.is_empty() {
870        return Some(attribute.value.clone());
871    }
872
873    attribute
874        .compiled_item
875        .as_ref()
876        .and_then(proto_item_to_string)
877}
878
879fn proto_item_to_string(item: &ProtoItem) -> Option<String> {
880    match &item.value {
881        Some(proto_item::Value::Str(value)) => Some(value.value.clone()),
882        Some(proto_item::Value::RawStr(value)) => Some(value.value.clone()),
883        Some(proto_item::Value::Prim(value)) => proto_primitive_to_string(value),
884        _ => None,
885    }
886}
887
888fn proto_primitive_to_string(primitive: &ProtoPrimitive) -> Option<String> {
889    match &primitive.value {
890        Some(proto_primitive::Value::IntDecimal(value)) => Some(value.to_string()),
891        Some(proto_primitive::Value::IntHexadecimal(value)) => Some(format!("0x{value:x}")),
892        Some(proto_primitive::Value::Boolean(value)) => Some(value.to_string()),
893        Some(proto_primitive::Value::Float(value)) => Some(value.to_string()),
894        Some(proto_primitive::Value::Dimension(value)) => Some(value.to_string()),
895        Some(proto_primitive::Value::Fraction(value)) => Some(value.to_string()),
896        _ => None,
897    }
898}
899
900#[derive(Debug, Clone, Default)]
901struct ProtoMap {
902    fields: HashMap<String, Vec<ProtoValue>>,
903}
904
905#[derive(Debug, Clone)]
906enum ProtoValue {
907    Scalar(String),
908    Map(ProtoMap),
909}
910
911impl ProtoMap {
912    fn get_first_string(&self, key: &str) -> Option<String> {
913        self.fields.get(key).and_then(|values| {
914            values.iter().find_map(|value| match value {
915                ProtoValue::Scalar(value) => Some(value.clone()),
916                ProtoValue::Map(_) => None,
917            })
918        })
919    }
920
921    fn get_all_strings(&self, key: &str) -> Vec<String> {
922        self.fields
923            .get(key)
924            .into_iter()
925            .flatten()
926            .filter_map(|value| match value {
927                ProtoValue::Scalar(value) => Some(value.clone()),
928                ProtoValue::Map(_) => None,
929            })
930            .collect()
931    }
932
933    fn get_first_map(&self, key: &str) -> Option<ProtoMap> {
934        self.fields.get(key).and_then(|values| {
935            values.iter().find_map(|value| match value {
936                ProtoValue::Map(value) => Some(value.clone()),
937                ProtoValue::Scalar(_) => None,
938            })
939        })
940    }
941
942    fn get_all_maps(&self, key: &str) -> Vec<ProtoMap> {
943        self.fields
944            .get(key)
945            .into_iter()
946            .flatten()
947            .filter_map(|value| match value {
948                ProtoValue::Map(value) => Some(value.clone()),
949                ProtoValue::Scalar(_) => None,
950            })
951            .collect()
952    }
953}
954
955fn parse_textproto_map(content: &str) -> Result<ProtoMap, String> {
956    let mut parser = TextProtoParser::new(content)?;
957    parser.parse_map(false)
958}
959
960struct TextProtoParser {
961    tokens: Vec<TextProtoToken>,
962    position: usize,
963}
964
965#[derive(Debug, Clone)]
966enum TextProtoToken {
967    Identifier(String),
968    String(String),
969    Colon,
970    LBrace,
971    RBrace,
972}
973
974impl TextProtoParser {
975    fn new(content: &str) -> Result<Self, String> {
976        Ok(Self {
977            tokens: tokenize_textproto(content)?,
978            position: 0,
979        })
980    }
981
982    fn parse_map(&mut self, stop_on_rbrace: bool) -> Result<ProtoMap, String> {
983        let mut map = ProtoMap::default();
984
985        while let Some(token) = self.peek() {
986            match token {
987                TextProtoToken::RBrace if stop_on_rbrace => {
988                    self.position += 1;
989                    break;
990                }
991                TextProtoToken::RBrace => return Err("Unexpected closing brace".to_string()),
992                TextProtoToken::Identifier(_) => {
993                    let key = self.expect_identifier()?;
994                    match self.peek() {
995                        Some(TextProtoToken::Colon) => {
996                            self.position += 1;
997                            match self.peek() {
998                                Some(TextProtoToken::LBrace) => {
999                                    self.position += 1;
1000                                    let value = self.parse_map(true)?;
1001                                    map.fields
1002                                        .entry(key)
1003                                        .or_default()
1004                                        .push(ProtoValue::Map(value));
1005                                }
1006                                _ => {
1007                                    let value = self.expect_scalar()?;
1008                                    map.fields
1009                                        .entry(key)
1010                                        .or_default()
1011                                        .push(ProtoValue::Scalar(truncate_field(value)));
1012                                }
1013                            }
1014                        }
1015                        Some(TextProtoToken::LBrace) => {
1016                            self.position += 1;
1017                            let value = self.parse_map(true)?;
1018                            map.fields
1019                                .entry(key)
1020                                .or_default()
1021                                .push(ProtoValue::Map(value));
1022                        }
1023                        Some(other) => {
1024                            return Err(format!("Unexpected token after key: {:?}", other));
1025                        }
1026                        None => return Err("Unexpected end of input after key".to_string()),
1027                    }
1028                }
1029                other => return Err(format!("Unexpected token in textproto: {:?}", other)),
1030            }
1031        }
1032
1033        Ok(map)
1034    }
1035
1036    fn expect_identifier(&mut self) -> Result<String, String> {
1037        match self.next() {
1038            Some(TextProtoToken::Identifier(value)) => Ok(value),
1039            other => Err(format!("Expected identifier, found {:?}", other)),
1040        }
1041    }
1042
1043    fn expect_scalar(&mut self) -> Result<String, String> {
1044        match self.next() {
1045            Some(TextProtoToken::String(mut value)) => {
1046                while matches!(self.peek(), Some(TextProtoToken::String(_))) {
1047                    if let Some(TextProtoToken::String(next)) = self.next() {
1048                        value.push_str(&next);
1049                    }
1050                }
1051                Ok(value)
1052            }
1053            Some(TextProtoToken::Identifier(value)) => Ok(value),
1054            other => Err(format!("Expected scalar value, found {:?}", other)),
1055        }
1056    }
1057
1058    fn peek(&self) -> Option<&TextProtoToken> {
1059        self.tokens.get(self.position)
1060    }
1061
1062    fn next(&mut self) -> Option<TextProtoToken> {
1063        let token = self.tokens.get(self.position).cloned();
1064        if token.is_some() {
1065            self.position += 1;
1066        }
1067        token
1068    }
1069}
1070
1071fn tokenize_textproto(content: &str) -> Result<Vec<TextProtoToken>, String> {
1072    let mut tokens = Vec::new();
1073    let chars = content.chars().collect::<Vec<_>>();
1074    let mut index = 0usize;
1075
1076    while index < chars.len() {
1077        match chars[index] {
1078            '{' => {
1079                tokens.push(TextProtoToken::LBrace);
1080                index += 1;
1081            }
1082            '}' => {
1083                tokens.push(TextProtoToken::RBrace);
1084                index += 1;
1085            }
1086            ':' => {
1087                tokens.push(TextProtoToken::Colon);
1088                index += 1;
1089            }
1090            '"' => {
1091                index += 1;
1092                let mut value = String::new();
1093                while index < chars.len() {
1094                    match chars[index] {
1095                        '\\' if index + 1 < chars.len() => {
1096                            index += 1;
1097                            value.push(chars[index]);
1098                            index += 1;
1099                        }
1100                        '"' => {
1101                            index += 1;
1102                            break;
1103                        }
1104                        character => {
1105                            value.push(character);
1106                            index += 1;
1107                        }
1108                    }
1109                }
1110                tokens.push(TextProtoToken::String(value));
1111            }
1112            '#' => {
1113                while index < chars.len() && chars[index] != '\n' {
1114                    index += 1;
1115                }
1116            }
1117            '/' if index + 1 < chars.len() && chars[index + 1] == '/' => {
1118                index += 2;
1119                while index < chars.len() && chars[index] != '\n' {
1120                    index += 1;
1121                }
1122            }
1123            character if character.is_ascii_whitespace() => index += 1,
1124            _ => {
1125                let start = index;
1126                while index < chars.len() {
1127                    let character = chars[index];
1128                    let starts_comment =
1129                        character == '/' && index + 1 < chars.len() && chars[index + 1] == '/';
1130
1131                    if character.is_ascii_whitespace()
1132                        || matches!(character, '{' | '}' | ':' | '#')
1133                        || starts_comment
1134                    {
1135                        break;
1136                    }
1137
1138                    index += 1;
1139                }
1140
1141                let token = chars[start..index].iter().collect::<String>();
1142                if token.is_empty() {
1143                    return Err("Encountered empty textproto token".to_string());
1144                }
1145                tokens.push(TextProtoToken::Identifier(token));
1146            }
1147        }
1148    }
1149
1150    Ok(tokens)
1151}
1152
1153#[derive(Clone, PartialEq, Message)]
1154pub struct ProtoSourcePosition {
1155    #[prost(uint32, tag = "1")]
1156    pub line_number: u32,
1157    #[prost(uint32, tag = "2")]
1158    pub column_number: u32,
1159}
1160
1161#[derive(Clone, PartialEq, Message)]
1162pub struct ProtoXmlNode {
1163    #[prost(oneof = "proto_xml_node::Node", tags = "1, 2")]
1164    pub node: Option<proto_xml_node::Node>,
1165    #[prost(message, optional, tag = "3")]
1166    pub source: Option<ProtoSourcePosition>,
1167}
1168
1169impl ProtoXmlNode {
1170    fn element(&self) -> Option<&ProtoXmlElement> {
1171        match &self.node {
1172            Some(proto_xml_node::Node::Element(element)) => Some(element),
1173            _ => None,
1174        }
1175    }
1176}
1177
1178pub mod proto_xml_node {
1179    use super::ProtoXmlElement;
1180    use prost::Oneof;
1181
1182    #[derive(Clone, PartialEq, Oneof)]
1183    pub enum Node {
1184        #[prost(message, tag = "1")]
1185        Element(ProtoXmlElement),
1186        #[prost(string, tag = "2")]
1187        Text(String),
1188    }
1189}
1190
1191#[derive(Clone, PartialEq, Message)]
1192pub struct ProtoXmlElement {
1193    #[prost(message, repeated, tag = "1")]
1194    pub namespace_declaration: Vec<ProtoXmlNamespace>,
1195    #[prost(string, tag = "2")]
1196    pub namespace_uri: String,
1197    #[prost(string, tag = "3")]
1198    pub name: String,
1199    #[prost(message, repeated, tag = "4")]
1200    pub attribute: Vec<ProtoXmlAttribute>,
1201    #[prost(message, repeated, tag = "5")]
1202    pub child: Vec<ProtoXmlNode>,
1203}
1204
1205impl ProtoXmlElement {
1206    fn child_elements_named<'a>(
1207        &'a self,
1208        name: &'a str,
1209    ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1210        self.child
1211            .iter()
1212            .filter_map(ProtoXmlNode::element)
1213            .filter(move |element| element.name == name)
1214    }
1215
1216    fn child_elements_named_any<'a>(
1217        &'a self,
1218        names: &'a [&'a str],
1219    ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1220        self.child
1221            .iter()
1222            .filter_map(ProtoXmlNode::element)
1223            .filter(move |element| names.contains(&element.name.as_str()))
1224    }
1225}
1226
1227#[derive(Clone, PartialEq, Message)]
1228pub struct ProtoXmlNamespace {
1229    #[prost(string, tag = "1")]
1230    pub prefix: String,
1231    #[prost(string, tag = "2")]
1232    pub uri: String,
1233    #[prost(message, optional, tag = "3")]
1234    pub source: Option<ProtoSourcePosition>,
1235}
1236
1237#[derive(Clone, PartialEq, Message)]
1238pub struct ProtoXmlAttribute {
1239    #[prost(string, tag = "1")]
1240    pub namespace_uri: String,
1241    #[prost(string, tag = "2")]
1242    pub name: String,
1243    #[prost(string, tag = "3")]
1244    pub value: String,
1245    #[prost(message, optional, tag = "4")]
1246    pub source: Option<ProtoSourcePosition>,
1247    #[prost(uint32, tag = "5")]
1248    pub resource_id: u32,
1249    #[prost(message, optional, tag = "6")]
1250    pub compiled_item: Option<ProtoItem>,
1251}
1252
1253#[derive(Clone, PartialEq, Message)]
1254pub struct ProtoItem {
1255    #[prost(oneof = "proto_item::Value", tags = "2, 3, 7")]
1256    pub value: Option<proto_item::Value>,
1257    #[prost(uint32, tag = "8")]
1258    pub flag_status: u32,
1259    #[prost(bool, tag = "9")]
1260    pub flag_negated: bool,
1261    #[prost(string, tag = "10")]
1262    pub flag_name: String,
1263}
1264
1265pub mod proto_item {
1266    use super::{ProtoPrimitive, ProtoRawStringValue, ProtoStringValue};
1267    use prost::Oneof;
1268
1269    #[derive(Clone, PartialEq, Oneof)]
1270    pub enum Value {
1271        #[prost(message, tag = "2")]
1272        Str(ProtoStringValue),
1273        #[prost(message, tag = "3")]
1274        RawStr(ProtoRawStringValue),
1275        #[prost(message, tag = "7")]
1276        Prim(ProtoPrimitive),
1277    }
1278}
1279
1280#[derive(Clone, PartialEq, Message)]
1281pub struct ProtoStringValue {
1282    #[prost(string, tag = "1")]
1283    pub value: String,
1284}
1285
1286#[derive(Clone, PartialEq, Message)]
1287pub struct ProtoRawStringValue {
1288    #[prost(string, tag = "1")]
1289    pub value: String,
1290}
1291
1292#[derive(Clone, PartialEq, Message)]
1293pub struct ProtoPrimitive {
1294    #[prost(oneof = "proto_primitive::Value", tags = "3, 6, 7, 8, 13, 14")]
1295    pub value: Option<proto_primitive::Value>,
1296}
1297
1298pub mod proto_primitive {
1299    use prost::Oneof;
1300
1301    #[derive(Clone, PartialEq, Oneof)]
1302    pub enum Value {
1303        #[prost(float, tag = "3")]
1304        Float(f32),
1305        #[prost(int32, tag = "6")]
1306        IntDecimal(i32),
1307        #[prost(uint32, tag = "7")]
1308        IntHexadecimal(u32),
1309        #[prost(bool, tag = "8")]
1310        Boolean(bool),
1311        #[prost(uint32, tag = "13")]
1312        Dimension(u32),
1313        #[prost(uint32, tag = "14")]
1314        Fraction(u32),
1315    }
1316}
1317
1318crate::register_parser!(
1319    "Android Soong METADATA textproto",
1320    &["**/METADATA"],
1321    "android",
1322    "",
1323    Some(
1324        "https://android.googlesource.com/platform/build/soong/+/refs/heads/main/licenses/metadata/metadata_file.proto"
1325    ),
1326);
1327
1328crate::register_parser!(
1329    "AndroidManifest.xml metadata (text XML or binary AXML)",
1330    &["**/AndroidManifest.xml"],
1331    "android",
1332    "XML",
1333    Some("https://developer.android.com/guide/topics/manifest/manifest-intro"),
1334);
1335
1336crate::register_parser!(
1337    "Android APK archive manifest metadata",
1338    &["**/*.apk"],
1339    "android",
1340    "",
1341    Some("https://developer.android.com/build/build-for-release"),
1342);
1343
1344crate::register_parser!(
1345    "Android App Bundle (.aab) proto manifest metadata",
1346    &["**/*.aab"],
1347    "android",
1348    "",
1349    Some("https://developer.android.com/guide/app-bundle"),
1350);