Skip to main content

provenant/parsers/
android.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::fs::{self, File};
6use std::io::{Cursor, Read};
7use std::path::Path;
8
9use prost::Message;
10use quick_xml::Reader;
11use quick_xml::XmlVersion;
12use quick_xml::events::Event;
13use rusty_axml::{find_nodes_by_type, get_requested_permissions, parse_from_reader};
14use zip::ZipArchive;
15
16use crate::models::{DatasourceId, PackageData, PackageType};
17use crate::parser_warn as warn;
18use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
19use crate::utils::magic;
20
21use super::PackageParser;
22use super::metadata::ParserMetadata;
23
24const PACKAGE_TYPE: PackageType = PackageType::Android;
25const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024;
26const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
27const MAX_TOTAL_UNCOMPRESSED_SIZE: u64 = 1024 * 1024 * 1024;
28const MAX_COMPRESSION_RATIO: f64 = 100.0;
29const ANDROID_XML_NAMESPACE: &str = "http://schemas.android.com/apk/res/android";
30
31fn default_package_data(datasource_id: DatasourceId) -> PackageData {
32    PackageData {
33        package_type: Some(PACKAGE_TYPE),
34        datasource_id: Some(datasource_id),
35        ..Default::default()
36    }
37}
38
39pub struct AndroidSoongMetadataParser;
40pub struct AndroidManifestParser;
41pub struct AndroidApkParser;
42pub struct AndroidAabParser;
43
44fn looks_like_android_soong_metadata_content(content: &str) -> bool {
45    let mut saw_named_field = false;
46
47    for line in content.lines().take(40) {
48        let trimmed = line.trim();
49
50        if trimmed.is_empty() || trimmed.starts_with('#') {
51            continue;
52        }
53
54        if trimmed.starts_with("//") {
55            return false;
56        }
57
58        if trimmed.starts_with("third_party {")
59            || trimmed.starts_with("third_party{")
60            || trimmed.starts_with("url {")
61            || trimmed.starts_with("url{")
62            || trimmed.starts_with("identifier {")
63            || trimmed.starts_with("identifier{")
64            || trimmed.starts_with("security {")
65            || trimmed.starts_with("security{")
66            || trimmed.starts_with("last_upgrade_date {")
67            || trimmed.starts_with("last_upgrade_date{")
68        {
69            return true;
70        }
71
72        if let Some(value) = trimmed.strip_prefix("license_type:") {
73            let value = value.trim();
74            if !value.is_empty()
75                && value
76                    .chars()
77                    .all(|character| character.is_ascii_uppercase() || character == '_')
78            {
79                return true;
80            }
81        }
82
83        if trimmed.starts_with("name:")
84            || trimmed.starts_with("description:")
85            || trimmed.starts_with("homepage:")
86        {
87            saw_named_field = true;
88        }
89    }
90
91    saw_named_field && content.contains("third_party")
92}
93
94impl PackageParser for AndroidSoongMetadataParser {
95    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
96
97    fn metadata() -> Vec<ParserMetadata> {
98        vec![ParserMetadata {
99            description: "Android Soong METADATA textproto",
100            file_patterns: &["**/METADATA"],
101            package_type: "android",
102            primary_language: "",
103            documentation_url: Some(
104                "https://android.googlesource.com/platform/build/soong/+/refs/heads/main/compliance/project_metadata_proto/project_metadata.proto",
105            ),
106        }]
107    }
108
109    fn is_match(path: &Path) -> bool {
110        if path.file_name().and_then(|name| name.to_str()) != Some("METADATA") {
111            return false;
112        }
113
114        if !path.is_file() {
115            return false;
116        }
117
118        crate::parsers::utils::read_file_to_string(path, Some(MAX_MANIFEST_SIZE))
119            .map(|content| looks_like_android_soong_metadata_content(&content))
120            .unwrap_or(false)
121    }
122
123    fn extract_packages(path: &Path) -> Vec<PackageData> {
124        let content = match crate::parsers::utils::read_file_to_string(path, None) {
125            Ok(content) => content,
126            Err(error) => {
127                warn!(
128                    "Failed to read Android Soong METADATA {:?}: {}",
129                    path, error
130                );
131                return vec![default_package_data(DatasourceId::AndroidSoongMetadata)];
132            }
133        };
134
135        vec![parse_soong_metadata(&content)]
136    }
137}
138
139impl PackageParser for AndroidManifestParser {
140    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
141
142    fn metadata() -> Vec<ParserMetadata> {
143        vec![ParserMetadata {
144            description: "AndroidManifest.xml metadata (text XML or binary AXML)",
145            file_patterns: &["**/AndroidManifest.xml"],
146            package_type: "android",
147            primary_language: "XML",
148            documentation_url: Some(
149                "https://developer.android.com/guide/topics/manifest/manifest-intro",
150            ),
151        }]
152    }
153
154    fn is_match(path: &Path) -> bool {
155        path.file_name().and_then(|name| name.to_str()) == Some("AndroidManifest.xml")
156    }
157
158    fn extract_packages(path: &Path) -> Vec<PackageData> {
159        let bytes = match read_file_bytes(path, None) {
160            Ok(bytes) => bytes,
161            Err(error) => {
162                warn!("Failed to read AndroidManifest.xml {:?}: {}", path, error);
163                return vec![default_package_data(DatasourceId::AndroidManifestXml)];
164            }
165        };
166
167        parse_manifest_bytes(
168            &bytes,
169            DatasourceId::AndroidManifestXml,
170            "AndroidManifest.xml",
171        )
172        .into_iter()
173        .collect()
174    }
175}
176
177impl PackageParser for AndroidApkParser {
178    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
179
180    fn metadata() -> Vec<ParserMetadata> {
181        vec![ParserMetadata {
182            description: "Android APK archive manifest metadata",
183            file_patterns: &["**/*.apk"],
184            package_type: "android",
185            primary_language: "",
186            documentation_url: Some("https://developer.android.com/build/build-for-release"),
187        }]
188    }
189
190    fn is_match(path: &Path) -> bool {
191        path.extension().and_then(|ext| ext.to_str()) == Some("apk") && magic::is_zip(path)
192    }
193
194    fn extract_packages(path: &Path) -> Vec<PackageData> {
195        let package_data = match read_best_zip_entry(path, |entry_name| {
196            if entry_name == "AndroidManifest.xml" {
197                Some(0)
198            } else {
199                None
200            }
201        }) {
202            Ok(Some((_, bytes))) => parse_binary_manifest_bytes(&bytes, DatasourceId::AndroidApk)
203                .unwrap_or_else(|error| {
204                    warn!("Failed to parse APK manifest {:?}: {}", path, error);
205                    default_package_data(DatasourceId::AndroidApk)
206                }),
207            Ok(None) => {
208                warn!("No AndroidManifest.xml found in APK {:?}", path);
209                default_package_data(DatasourceId::AndroidApk)
210            }
211            Err(error) => {
212                warn!("Failed to read APK archive {:?}: {}", path, error);
213                default_package_data(DatasourceId::AndroidApk)
214            }
215        };
216
217        vec![package_data]
218    }
219}
220
221impl PackageParser for AndroidAabParser {
222    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
223
224    fn metadata() -> Vec<ParserMetadata> {
225        vec![ParserMetadata {
226            description: "Android App Bundle (.aab) proto manifest metadata",
227            file_patterns: &["**/*.aab"],
228            package_type: "android",
229            primary_language: "",
230            documentation_url: Some("https://developer.android.com/guide/app-bundle"),
231        }]
232    }
233
234    fn is_match(path: &Path) -> bool {
235        path.extension().and_then(|ext| ext.to_str()) == Some("aab") && magic::is_zip(path)
236    }
237
238    fn extract_packages(path: &Path) -> Vec<PackageData> {
239        let package_data = match read_best_zip_entry(path, |entry_name| {
240            if entry_name == "base/manifest/AndroidManifest.xml" {
241                Some(0)
242            } else if entry_name.ends_with("/manifest/AndroidManifest.xml") {
243                Some(1)
244            } else {
245                None
246            }
247        }) {
248            Ok(Some((entry_name, bytes))) => {
249                parse_proto_manifest_bytes(&bytes).unwrap_or_else(|error| {
250                    warn!(
251                        "Failed to parse AAB manifest {:?} ({}): {}",
252                        path, entry_name, error
253                    );
254                    default_package_data(DatasourceId::AndroidAab)
255                })
256            }
257            Ok(None) => {
258                warn!("No proto AndroidManifest.xml found in AAB {:?}", path);
259                default_package_data(DatasourceId::AndroidAab)
260            }
261            Err(error) => {
262                warn!("Failed to read AAB archive {:?}: {}", path, error);
263                default_package_data(DatasourceId::AndroidAab)
264            }
265        };
266
267        vec![package_data]
268    }
269}
270
271fn read_file_bytes(path: &Path, max_size: Option<u64>) -> Result<Vec<u8>, String> {
272    let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
273    let metadata =
274        fs::metadata(path).map_err(|error| format!("Cannot stat file {:?}: {}", path, error))?;
275
276    if metadata.len() > limit {
277        return Err(format!(
278            "File {:?} is {} bytes, exceeding the {} byte limit",
279            path,
280            metadata.len(),
281            limit
282        ));
283    }
284
285    let mut file =
286        File::open(path).map_err(|error| format!("Failed to open {:?}: {}", path, error))?;
287    let mut bytes = Vec::with_capacity(metadata.len() as usize);
288    file.read_to_end(&mut bytes)
289        .map_err(|error| format!("Failed to read {:?}: {}", path, error))?;
290    Ok(bytes)
291}
292
293fn parse_soong_metadata(content: &str) -> PackageData {
294    let parsed = parse_textproto_map(content).unwrap_or_else(|error| {
295        warn!("Failed to parse Android Soong METADATA: {}", error);
296        ProtoMap::default()
297    });
298
299    let mut package = default_package_data(DatasourceId::AndroidSoongMetadata);
300    package.name = parsed.get_first_string("name").map(truncate_field);
301    package.description = parsed.get_first_string("description").map(truncate_field);
302
303    if let Some(third_party) = parsed.get_first_map("third_party") {
304        package.version = third_party.get_first_string("version").map(truncate_field);
305
306        let url_entries = third_party
307            .get_all_maps("url")
308            .into_iter()
309            .map(|entry| {
310                let type_ = entry.get_first_string("type").map(truncate_field);
311                let value = entry.get_first_string("value").map(truncate_field);
312                (type_, value)
313            })
314            .collect::<Vec<_>>();
315
316        let homepage_url = third_party.get_first_string("homepage").or_else(|| {
317            url_entries
318                .iter()
319                .find(|(type_, _)| {
320                    type_
321                        .as_deref()
322                        .is_some_and(|type_| type_.eq_ignore_ascii_case("homepage"))
323                })
324                .and_then(|(_, value)| value.clone())
325        });
326        package.homepage_url = homepage_url.map(truncate_field);
327
328        let license_types = third_party
329            .get_all_strings("license_type")
330            .into_iter()
331            .map(truncate_field)
332            .collect::<Vec<_>>();
333        if !license_types.is_empty() {
334            package.extracted_license_statement = Some(license_types.join(", "));
335        }
336
337        let identifiers = third_party
338            .get_all_maps("identifier")
339            .into_iter()
340            .map(|identifier| {
341                let type_ = identifier.get_first_string("type").map(truncate_field);
342                let value = identifier.get_first_string("value").map(truncate_field);
343                let mut object = serde_json::Map::new();
344                if let Some(type_) = type_ {
345                    object.insert("type".to_string(), type_.into());
346                }
347                if let Some(value) = &value {
348                    object.insert("value".to_string(), value.clone().into());
349                }
350
351                if package.vcs_url.is_none()
352                    && let (Some(type_), Some(value)) = (
353                        identifier.get_first_string("type"),
354                        identifier.get_first_string("value"),
355                    )
356                {
357                    let lower_type = type_.to_ascii_lowercase();
358                    if lower_type.contains("git") {
359                        package.vcs_url = Some(truncate_field(value));
360                    } else if lower_type.contains("archive")
361                        || lower_type.contains("tar")
362                        || lower_type.contains("zip")
363                    {
364                        package.download_url = Some(truncate_field(value));
365                    }
366                }
367
368                serde_json::Value::Object(object)
369            })
370            .collect::<Vec<_>>();
371
372        for (type_, value) in &url_entries {
373            let Some(value) = value else {
374                continue;
375            };
376
377            match type_.as_deref().map(str::to_ascii_lowercase).as_deref() {
378                Some("git") if package.vcs_url.is_none() => {
379                    package.vcs_url = Some(value.clone());
380                }
381                Some("archive") if package.download_url.is_none() => {
382                    package.download_url = Some(value.clone());
383                }
384                Some("homepage") if package.homepage_url.is_none() => {
385                    package.homepage_url = Some(value.clone());
386                }
387                _ => {}
388            }
389        }
390
391        let mut extra_data = HashMap::new();
392        if !identifiers.is_empty() {
393            extra_data.insert("identifiers".to_string(), identifiers.into());
394        }
395        if !url_entries.is_empty() {
396            extra_data.insert(
397                "urls".to_string(),
398                url_entries
399                    .iter()
400                    .map(|(type_, value)| {
401                        let mut object = serde_json::Map::new();
402                        if let Some(type_) = type_ {
403                            object.insert("type".to_string(), type_.clone().into());
404                        }
405                        if let Some(value) = value {
406                            object.insert("value".to_string(), value.clone().into());
407                        }
408                        serde_json::Value::Object(object)
409                    })
410                    .collect::<Vec<_>>()
411                    .into(),
412            );
413        }
414
415        if let Some(last_upgrade_date) = third_party.get_first_map("last_upgrade_date") {
416            let year = last_upgrade_date.get_first_string("year");
417            let month = last_upgrade_date.get_first_string("month");
418            let day = last_upgrade_date.get_first_string("day");
419            if let (Some(year), Some(month), Some(day)) = (year, month, day) {
420                let formatted = format!(
421                    "{:04}-{:02}-{:02}",
422                    year.parse::<u32>().unwrap_or_default(),
423                    month.parse::<u32>().unwrap_or_default(),
424                    day.parse::<u32>().unwrap_or_default()
425                );
426                extra_data.insert(
427                    "last_upgrade_date".to_string(),
428                    truncate_field(formatted).into(),
429                );
430            }
431        }
432
433        if let Some(upstream_url) = third_party.get_first_string("url") {
434            extra_data.insert(
435                "upstream_url".to_string(),
436                truncate_field(upstream_url).into(),
437            );
438        }
439
440        if !extra_data.is_empty() {
441            package.extra_data = Some(extra_data);
442        }
443    }
444
445    package
446}
447
448fn parse_manifest_bytes(
449    bytes: &[u8],
450    datasource_id: DatasourceId,
451    context: &str,
452) -> Option<PackageData> {
453    if looks_like_text_xml(bytes) {
454        match parse_text_manifest_bytes(bytes, datasource_id) {
455            Ok(package) => return Some(package),
456            Err(error) => {
457                warn!("Failed to parse {} as text XML: {}", context, error);
458                return None;
459            }
460        }
461    }
462
463    parse_binary_manifest_bytes(bytes, datasource_id)
464        .map(Some)
465        .unwrap_or_else(|error| {
466            warn!(
467                "Failed to parse {} as binary Android XML: {}",
468                context, error
469            );
470            None
471        })
472}
473
474fn looks_like_text_xml(bytes: &[u8]) -> bool {
475    bytes
476        .iter()
477        .find(|byte| !byte.is_ascii_whitespace())
478        .is_some_and(|byte| *byte == b'<')
479}
480
481fn parse_text_manifest_bytes(
482    bytes: &[u8],
483    datasource_id: DatasourceId,
484) -> Result<PackageData, String> {
485    let content = String::from_utf8(bytes.to_vec())
486        .map_err(|error| format!("Invalid UTF-8 in AndroidManifest.xml: {}", error))?;
487
488    let mut reader = Reader::from_str(&content);
489    reader.config_mut().trim_text(true);
490
491    let mut buf = Vec::new();
492    let mut manifest_attributes = HashMap::new();
493    let mut uses_sdk_attributes = HashMap::new();
494    let mut application_attributes = HashMap::new();
495    let mut requested_permissions = Vec::new();
496    let mut uses_libraries = Vec::new();
497    let mut iteration_count = 0usize;
498
499    loop {
500        iteration_count += 1;
501        if iteration_count > MAX_ITERATION_COUNT {
502            return Err(format!(
503                "Exceeded MAX_ITERATION_COUNT ({}) while parsing AndroidManifest.xml",
504                MAX_ITERATION_COUNT
505            ));
506        }
507
508        match reader.read_event_into(&mut buf) {
509            Ok(Event::Start(event)) | Ok(Event::Empty(event)) => {
510                let name = String::from_utf8_lossy(event.name().as_ref()).into_owned();
511                let attributes = xml_attributes_to_map(&reader, &event)?;
512                match name.as_str() {
513                    "manifest" if manifest_attributes.is_empty() => {
514                        manifest_attributes = attributes
515                    }
516                    "uses-sdk" => uses_sdk_attributes = attributes,
517                    "application" if application_attributes.is_empty() => {
518                        application_attributes = attributes;
519                    }
520                    "uses-permission" | "uses-permission-sdk-23" => {
521                        if let Some(permission) = attributes.get("android:name") {
522                            requested_permissions.push(permission.clone());
523                        }
524                    }
525                    "uses-library" => {
526                        if let Some(library_name) = attributes.get("android:name") {
527                            uses_libraries.push(library_name.clone());
528                        }
529                    }
530                    _ => {}
531                }
532            }
533            Ok(Event::Eof) => break,
534            Err(error) => {
535                return Err(format!(
536                    "XML parse error at position {}: {}",
537                    reader.buffer_position(),
538                    error
539                ));
540            }
541            _ => {}
542        }
543
544        buf.clear();
545    }
546
547    Ok(build_manifest_package_data(
548        datasource_id,
549        &manifest_attributes,
550        &uses_sdk_attributes,
551        &application_attributes,
552        requested_permissions,
553        uses_libraries,
554    ))
555}
556
557fn xml_attributes_to_map(
558    reader: &Reader<&[u8]>,
559    event: &quick_xml::events::BytesStart<'_>,
560) -> Result<HashMap<String, String>, String> {
561    let mut attributes = HashMap::new();
562
563    for attribute in event.attributes().flatten().take(MAX_ITERATION_COUNT) {
564        let key = String::from_utf8_lossy(attribute.key.as_ref()).into_owned();
565        let value = attribute
566            .decoded_and_normalized_value(XmlVersion::Implicit1_0, reader.decoder())
567            .map_err(|error| format!("Failed to decode XML attribute {}: {}", key, error))?
568            .into_owned();
569        attributes.insert(key, truncate_field(value));
570    }
571
572    Ok(attributes)
573}
574
575fn parse_binary_manifest_bytes(
576    bytes: &[u8],
577    datasource_id: DatasourceId,
578) -> Result<PackageData, String> {
579    let axml = std::panic::catch_unwind(|| parse_from_reader(Cursor::new(bytes.to_vec())))
580        .map_err(|_| "rusty-axml panicked while parsing binary Android XML".to_string())?
581        .map_err(|error| format!("rusty-axml parse failure: {}", error))?;
582
583    let manifest_attributes =
584        normalize_binary_attributes(axml.root().borrow().attributes().clone());
585    let uses_sdk_attributes = find_nodes_by_type(&axml, "uses-sdk")
586        .into_iter()
587        .next()
588        .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
589        .unwrap_or_default();
590    let application_attributes = find_nodes_by_type(&axml, "application")
591        .into_iter()
592        .next()
593        .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
594        .unwrap_or_default();
595
596    let requested_permissions = get_requested_permissions(&axml)
597        .into_iter()
598        .map(truncate_field)
599        .collect::<Vec<_>>();
600    let uses_libraries = find_nodes_by_type(&axml, "uses-library")
601        .into_iter()
602        .filter_map(|node| node.borrow().get_attr("android:name").map(str::to_string))
603        .map(truncate_field)
604        .collect::<Vec<_>>();
605
606    Ok(build_manifest_package_data(
607        datasource_id,
608        &manifest_attributes,
609        &uses_sdk_attributes,
610        &application_attributes,
611        requested_permissions,
612        uses_libraries,
613    ))
614}
615
616fn build_manifest_package_data(
617    datasource_id: DatasourceId,
618    manifest_attributes: &HashMap<String, String>,
619    uses_sdk_attributes: &HashMap<String, String>,
620    application_attributes: &HashMap<String, String>,
621    requested_permissions: Vec<String>,
622    uses_libraries: Vec<String>,
623) -> PackageData {
624    let mut package = default_package_data(datasource_id);
625    package.name = manifest_attributes.get("package").cloned();
626    package.version = manifest_attributes
627        .get("android:versionName")
628        .cloned()
629        .or_else(|| manifest_attributes.get("android:versionCode").cloned());
630
631    package.description = application_attributes
632        .get("android:label")
633        .filter(|label| {
634            !label.starts_with('@') && !label.chars().all(|character| character.is_ascii_digit())
635        })
636        .cloned();
637
638    let mut extra_data = HashMap::new();
639    insert_extra(
640        &mut extra_data,
641        "version_code",
642        manifest_attributes.get("android:versionCode"),
643    );
644    insert_extra(
645        &mut extra_data,
646        "compile_sdk_version",
647        manifest_attributes.get("android:compileSdkVersion"),
648    );
649    insert_extra(
650        &mut extra_data,
651        "compile_sdk_version_codename",
652        manifest_attributes.get("android:compileSdkVersionCodename"),
653    );
654    insert_extra(
655        &mut extra_data,
656        "platform_build_version_code",
657        manifest_attributes.get("platformBuildVersionCode"),
658    );
659    insert_extra(
660        &mut extra_data,
661        "platform_build_version_name",
662        manifest_attributes.get("platformBuildVersionName"),
663    );
664    insert_extra(
665        &mut extra_data,
666        "min_sdk_version",
667        uses_sdk_attributes.get("android:minSdkVersion"),
668    );
669    insert_extra(
670        &mut extra_data,
671        "target_sdk_version",
672        uses_sdk_attributes.get("android:targetSdkVersion"),
673    );
674    insert_extra(
675        &mut extra_data,
676        "max_sdk_version",
677        uses_sdk_attributes.get("android:maxSdkVersion"),
678    );
679
680    if !requested_permissions.is_empty() {
681        extra_data.insert(
682            "requested_permissions".to_string(),
683            requested_permissions
684                .into_iter()
685                .map(serde_json::Value::from)
686                .collect::<Vec<_>>()
687                .into(),
688        );
689    }
690    if !uses_libraries.is_empty() {
691        extra_data.insert(
692            "uses_libraries".to_string(),
693            uses_libraries
694                .into_iter()
695                .map(serde_json::Value::from)
696                .collect::<Vec<_>>()
697                .into(),
698        );
699    }
700
701    if !extra_data.is_empty() {
702        package.extra_data = Some(extra_data);
703    }
704
705    package
706}
707
708fn normalize_binary_attributes(attributes: HashMap<String, String>) -> HashMap<String, String> {
709    attributes
710        .into_iter()
711        .map(|(key, value)| (key, normalize_binary_attribute_value(&value)))
712        .collect()
713}
714
715fn normalize_binary_attribute_value(value: &str) -> String {
716    let hex_value = value
717        .strip_prefix("(type 0x10) 0x")
718        .or_else(|| value.strip_prefix("0x"));
719
720    if let Some(hex_value) = hex_value
721        && let Ok(parsed) = u64::from_str_radix(hex_value, 16)
722    {
723        return parsed.to_string();
724    }
725
726    value.to_string()
727}
728
729fn insert_extra(
730    extra_data: &mut HashMap<String, serde_json::Value>,
731    key: &str,
732    value: Option<&String>,
733) {
734    if let Some(value) = value {
735        extra_data.insert(key.to_string(), truncate_field(value.clone()).into());
736    }
737}
738
739fn read_best_zip_entry<F>(
740    path: &Path,
741    mut rank_entry: F,
742) -> Result<Option<(String, Vec<u8>)>, String>
743where
744    F: FnMut(&str) -> Option<u8>,
745{
746    let metadata = fs::metadata(path)
747        .map_err(|error| format!("Failed to stat archive {:?}: {}", path, error))?;
748    if metadata.len() > MAX_ARCHIVE_SIZE {
749        return Err(format!(
750            "Archive {:?} is {} bytes, exceeding the {} byte limit",
751            path,
752            metadata.len(),
753            MAX_ARCHIVE_SIZE
754        ));
755    }
756
757    let file = File::open(path)
758        .map_err(|error| format!("Failed to open archive {:?}: {}", path, error))?;
759    let mut archive = ZipArchive::new(file)
760        .map_err(|error| format!("Failed to parse ZIP archive {:?}: {}", path, error))?;
761
762    let mut total_uncompressed = 0u64;
763    let mut best: Option<(u8, String, Vec<u8>)> = None;
764    let entry_count = archive.len().min(MAX_ITERATION_COUNT);
765
766    if archive.len() > MAX_ITERATION_COUNT {
767        warn!(
768            "Archive {:?} has more than MAX_ITERATION_COUNT ({}) entries; truncating scan",
769            path, MAX_ITERATION_COUNT
770        );
771    }
772
773    for index in 0..entry_count {
774        let mut entry = archive.by_index(index).map_err(|error| {
775            format!(
776                "Failed to read ZIP entry {} in {:?}: {}",
777                index, path, error
778            )
779        })?;
780
781        total_uncompressed = total_uncompressed.saturating_add(entry.size());
782        if total_uncompressed > MAX_TOTAL_UNCOMPRESSED_SIZE {
783            return Err(format!(
784                "Archive {:?} exceeds total uncompressed size limit of {} bytes",
785                path, MAX_TOTAL_UNCOMPRESSED_SIZE
786            ));
787        }
788
789        let entry_name = entry.name().replace('\\', "/");
790        if entry_name.starts_with('/') || entry_name.split('/').any(|segment| segment == "..") {
791            return Err(format!(
792                "Archive entry {} contains a disallowed path",
793                entry_name
794            ));
795        }
796        let Some(rank) = rank_entry(&entry_name) else {
797            continue;
798        };
799
800        if entry.size() > MAX_FILE_SIZE {
801            return Err(format!(
802                "Archive entry {} is {} bytes, exceeding the {} byte limit",
803                entry_name,
804                entry.size(),
805                MAX_FILE_SIZE
806            ));
807        }
808
809        let compressed_size = entry.compressed_size();
810        if compressed_size > 0 {
811            let ratio = entry.size() as f64 / compressed_size as f64;
812            if ratio > MAX_COMPRESSION_RATIO {
813                return Err(format!(
814                    "Archive entry {} has suspicious compression ratio {:.2}:1",
815                    entry_name, ratio
816                ));
817            }
818        }
819
820        let should_replace = match &best {
821            Some((best_rank, _, _)) => rank < *best_rank,
822            None => true,
823        };
824
825        if should_replace {
826            let mut bytes = Vec::with_capacity(entry.size() as usize);
827            entry.read_to_end(&mut bytes).map_err(|error| {
828                format!("Failed to read archive entry {}: {}", entry_name, error)
829            })?;
830            best = Some((rank, entry_name, bytes));
831        }
832    }
833
834    Ok(best.map(|(_, entry_name, bytes)| (entry_name, bytes)))
835}
836
837fn parse_proto_manifest_bytes(bytes: &[u8]) -> Result<PackageData, String> {
838    let node =
839        ProtoXmlNode::decode(bytes).map_err(|error| format!("prost decode failure: {}", error))?;
840    let root_element = node
841        .element()
842        .ok_or_else(|| "Proto manifest root is not an element".to_string())?;
843    if root_element.name != "manifest" {
844        return Err(format!(
845            "Unexpected proto XML root element: {}",
846            root_element.name
847        ));
848    }
849
850    let manifest_attributes = proto_attributes_to_map(&root_element.attribute);
851    let uses_sdk_attributes = root_element
852        .child_elements_named("uses-sdk")
853        .next()
854        .map(|element| proto_attributes_to_map(&element.attribute))
855        .unwrap_or_default();
856    let application_attributes = root_element
857        .child_elements_named("application")
858        .next()
859        .map(|element| proto_attributes_to_map(&element.attribute))
860        .unwrap_or_default();
861    let requested_permissions = root_element
862        .child_elements_named_any(&["uses-permission", "uses-permission-sdk-23"])
863        .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
864        .collect::<Vec<_>>();
865    let uses_libraries = root_element
866        .child_elements_named("uses-library")
867        .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
868        .collect::<Vec<_>>();
869
870    let mut package = build_manifest_package_data(
871        DatasourceId::AndroidAab,
872        &manifest_attributes,
873        &uses_sdk_attributes,
874        &application_attributes,
875        requested_permissions,
876        uses_libraries,
877    );
878
879    if let Some(extra_data) = package.extra_data.as_mut() {
880        extra_data.insert("manifest_encoding".to_string(), "proto".into());
881    } else {
882        package.extra_data = Some(HashMap::from([(
883            "manifest_encoding".to_string(),
884            serde_json::Value::String("proto".to_string()),
885        )]));
886    }
887
888    Ok(package)
889}
890
891fn proto_attributes_to_map(attributes: &[ProtoXmlAttribute]) -> HashMap<String, String> {
892    attributes
893        .iter()
894        .filter_map(|attribute| {
895            let key = proto_attribute_key(attribute)?;
896            let value = proto_attribute_value(attribute)?;
897            Some((key, truncate_field(value)))
898        })
899        .collect()
900}
901
902fn proto_attribute_key(attribute: &ProtoXmlAttribute) -> Option<String> {
903    if attribute.name.is_empty() {
904        return None;
905    }
906
907    if attribute.namespace_uri == ANDROID_XML_NAMESPACE {
908        return Some(format!("android:{}", attribute.name));
909    }
910
911    Some(attribute.name.clone())
912}
913
914fn proto_attribute_value(attribute: &ProtoXmlAttribute) -> Option<String> {
915    if !attribute.value.is_empty() {
916        return Some(attribute.value.clone());
917    }
918
919    attribute
920        .compiled_item
921        .as_ref()
922        .and_then(proto_item_to_string)
923}
924
925fn proto_item_to_string(item: &ProtoItem) -> Option<String> {
926    match &item.value {
927        Some(proto_item::Value::Str(value)) => Some(value.value.clone()),
928        Some(proto_item::Value::RawStr(value)) => Some(value.value.clone()),
929        Some(proto_item::Value::Prim(value)) => proto_primitive_to_string(value),
930        _ => None,
931    }
932}
933
934fn proto_primitive_to_string(primitive: &ProtoPrimitive) -> Option<String> {
935    match &primitive.value {
936        Some(proto_primitive::Value::IntDecimal(value)) => Some(value.to_string()),
937        Some(proto_primitive::Value::IntHexadecimal(value)) => Some(format!("0x{value:x}")),
938        Some(proto_primitive::Value::Boolean(value)) => Some(value.to_string()),
939        Some(proto_primitive::Value::Float(value)) => Some(value.to_string()),
940        Some(proto_primitive::Value::Dimension(value)) => Some(value.to_string()),
941        Some(proto_primitive::Value::Fraction(value)) => Some(value.to_string()),
942        _ => None,
943    }
944}
945
946#[derive(Debug, Clone, Default)]
947struct ProtoMap {
948    fields: HashMap<String, Vec<ProtoValue>>,
949}
950
951#[derive(Debug, Clone)]
952enum ProtoValue {
953    Scalar(String),
954    Map(ProtoMap),
955}
956
957impl ProtoMap {
958    fn get_first_string(&self, key: &str) -> Option<String> {
959        self.fields.get(key).and_then(|values| {
960            values.iter().find_map(|value| match value {
961                ProtoValue::Scalar(value) => Some(value.clone()),
962                ProtoValue::Map(_) => None,
963            })
964        })
965    }
966
967    fn get_all_strings(&self, key: &str) -> Vec<String> {
968        self.fields
969            .get(key)
970            .into_iter()
971            .flatten()
972            .filter_map(|value| match value {
973                ProtoValue::Scalar(value) => Some(value.clone()),
974                ProtoValue::Map(_) => None,
975            })
976            .collect()
977    }
978
979    fn get_first_map(&self, key: &str) -> Option<ProtoMap> {
980        self.fields.get(key).and_then(|values| {
981            values.iter().find_map(|value| match value {
982                ProtoValue::Map(value) => Some(value.clone()),
983                ProtoValue::Scalar(_) => None,
984            })
985        })
986    }
987
988    fn get_all_maps(&self, key: &str) -> Vec<ProtoMap> {
989        self.fields
990            .get(key)
991            .into_iter()
992            .flatten()
993            .filter_map(|value| match value {
994                ProtoValue::Map(value) => Some(value.clone()),
995                ProtoValue::Scalar(_) => None,
996            })
997            .collect()
998    }
999}
1000
1001fn parse_textproto_map(content: &str) -> Result<ProtoMap, String> {
1002    let mut parser = TextProtoParser::new(content)?;
1003    parser.parse_map(false)
1004}
1005
1006struct TextProtoParser {
1007    tokens: Vec<TextProtoToken>,
1008    position: usize,
1009}
1010
1011#[derive(Debug, Clone)]
1012enum TextProtoToken {
1013    Identifier(String),
1014    String(String),
1015    Colon,
1016    LBrace,
1017    RBrace,
1018}
1019
1020impl TextProtoParser {
1021    fn new(content: &str) -> Result<Self, String> {
1022        Ok(Self {
1023            tokens: tokenize_textproto(content)?,
1024            position: 0,
1025        })
1026    }
1027
1028    fn parse_map(&mut self, stop_on_rbrace: bool) -> Result<ProtoMap, String> {
1029        let mut map = ProtoMap::default();
1030
1031        while let Some(token) = self.peek() {
1032            match token {
1033                TextProtoToken::RBrace if stop_on_rbrace => {
1034                    self.position += 1;
1035                    break;
1036                }
1037                TextProtoToken::RBrace => return Err("Unexpected closing brace".to_string()),
1038                TextProtoToken::Identifier(_) => {
1039                    let key = self.expect_identifier()?;
1040                    match self.peek() {
1041                        Some(TextProtoToken::Colon) => {
1042                            self.position += 1;
1043                            match self.peek() {
1044                                Some(TextProtoToken::LBrace) => {
1045                                    self.position += 1;
1046                                    let value = self.parse_map(true)?;
1047                                    map.fields
1048                                        .entry(key)
1049                                        .or_default()
1050                                        .push(ProtoValue::Map(value));
1051                                }
1052                                _ => {
1053                                    let value = self.expect_scalar()?;
1054                                    map.fields
1055                                        .entry(key)
1056                                        .or_default()
1057                                        .push(ProtoValue::Scalar(truncate_field(value)));
1058                                }
1059                            }
1060                        }
1061                        Some(TextProtoToken::LBrace) => {
1062                            self.position += 1;
1063                            let value = self.parse_map(true)?;
1064                            map.fields
1065                                .entry(key)
1066                                .or_default()
1067                                .push(ProtoValue::Map(value));
1068                        }
1069                        Some(other) => {
1070                            return Err(format!("Unexpected token after key: {:?}", other));
1071                        }
1072                        None => return Err("Unexpected end of input after key".to_string()),
1073                    }
1074                }
1075                other => return Err(format!("Unexpected token in textproto: {:?}", other)),
1076            }
1077        }
1078
1079        Ok(map)
1080    }
1081
1082    fn expect_identifier(&mut self) -> Result<String, String> {
1083        match self.next() {
1084            Some(TextProtoToken::Identifier(value)) => Ok(value),
1085            other => Err(format!("Expected identifier, found {:?}", other)),
1086        }
1087    }
1088
1089    fn expect_scalar(&mut self) -> Result<String, String> {
1090        match self.next() {
1091            Some(TextProtoToken::String(mut value)) => {
1092                while matches!(self.peek(), Some(TextProtoToken::String(_))) {
1093                    if let Some(TextProtoToken::String(next)) = self.next() {
1094                        value.push_str(&next);
1095                    }
1096                }
1097                Ok(value)
1098            }
1099            Some(TextProtoToken::Identifier(value)) => Ok(value),
1100            other => Err(format!("Expected scalar value, found {:?}", other)),
1101        }
1102    }
1103
1104    fn peek(&self) -> Option<&TextProtoToken> {
1105        self.tokens.get(self.position)
1106    }
1107
1108    fn next(&mut self) -> Option<TextProtoToken> {
1109        let token = self.tokens.get(self.position).cloned();
1110        if token.is_some() {
1111            self.position += 1;
1112        }
1113        token
1114    }
1115}
1116
1117fn tokenize_textproto(content: &str) -> Result<Vec<TextProtoToken>, String> {
1118    let mut tokens = Vec::new();
1119    let chars = content.chars().collect::<Vec<_>>();
1120    let mut index = 0usize;
1121
1122    while index < chars.len() {
1123        match chars[index] {
1124            '{' => {
1125                tokens.push(TextProtoToken::LBrace);
1126                index += 1;
1127            }
1128            '}' => {
1129                tokens.push(TextProtoToken::RBrace);
1130                index += 1;
1131            }
1132            ':' => {
1133                tokens.push(TextProtoToken::Colon);
1134                index += 1;
1135            }
1136            '"' => {
1137                index += 1;
1138                let mut value = String::new();
1139                while index < chars.len() {
1140                    match chars[index] {
1141                        '\\' if index + 1 < chars.len() => {
1142                            index += 1;
1143                            value.push(chars[index]);
1144                            index += 1;
1145                        }
1146                        '"' => {
1147                            index += 1;
1148                            break;
1149                        }
1150                        character => {
1151                            value.push(character);
1152                            index += 1;
1153                        }
1154                    }
1155                }
1156                tokens.push(TextProtoToken::String(value));
1157            }
1158            '#' => {
1159                while index < chars.len() && chars[index] != '\n' {
1160                    index += 1;
1161                }
1162            }
1163            '/' if index + 1 < chars.len() && chars[index + 1] == '/' => {
1164                index += 2;
1165                while index < chars.len() && chars[index] != '\n' {
1166                    index += 1;
1167                }
1168            }
1169            character if character.is_ascii_whitespace() => index += 1,
1170            _ => {
1171                let start = index;
1172                while index < chars.len() {
1173                    let character = chars[index];
1174                    let starts_comment =
1175                        character == '/' && index + 1 < chars.len() && chars[index + 1] == '/';
1176
1177                    if character.is_ascii_whitespace()
1178                        || matches!(character, '{' | '}' | ':' | '#')
1179                        || starts_comment
1180                    {
1181                        break;
1182                    }
1183
1184                    index += 1;
1185                }
1186
1187                let token = chars[start..index].iter().collect::<String>();
1188                if token.is_empty() {
1189                    return Err("Encountered empty textproto token".to_string());
1190                }
1191                tokens.push(TextProtoToken::Identifier(token));
1192            }
1193        }
1194    }
1195
1196    Ok(tokens)
1197}
1198
1199#[derive(Clone, PartialEq, Message)]
1200pub struct ProtoSourcePosition {
1201    #[prost(uint32, tag = "1")]
1202    pub line_number: u32,
1203    #[prost(uint32, tag = "2")]
1204    pub column_number: u32,
1205}
1206
1207#[derive(Clone, PartialEq, Message)]
1208pub struct ProtoXmlNode {
1209    #[prost(oneof = "proto_xml_node::Node", tags = "1, 2")]
1210    pub node: Option<proto_xml_node::Node>,
1211    #[prost(message, optional, tag = "3")]
1212    pub source: Option<ProtoSourcePosition>,
1213}
1214
1215impl ProtoXmlNode {
1216    fn element(&self) -> Option<&ProtoXmlElement> {
1217        match &self.node {
1218            Some(proto_xml_node::Node::Element(element)) => Some(element),
1219            _ => None,
1220        }
1221    }
1222}
1223
1224pub mod proto_xml_node {
1225    use super::ProtoXmlElement;
1226    use prost::Oneof;
1227
1228    #[derive(Clone, PartialEq, Oneof)]
1229    pub enum Node {
1230        #[prost(message, tag = "1")]
1231        Element(ProtoXmlElement),
1232        #[prost(string, tag = "2")]
1233        Text(String),
1234    }
1235}
1236
1237#[derive(Clone, PartialEq, Message)]
1238pub struct ProtoXmlElement {
1239    #[prost(message, repeated, tag = "1")]
1240    pub namespace_declaration: Vec<ProtoXmlNamespace>,
1241    #[prost(string, tag = "2")]
1242    pub namespace_uri: String,
1243    #[prost(string, tag = "3")]
1244    pub name: String,
1245    #[prost(message, repeated, tag = "4")]
1246    pub attribute: Vec<ProtoXmlAttribute>,
1247    #[prost(message, repeated, tag = "5")]
1248    pub child: Vec<ProtoXmlNode>,
1249}
1250
1251impl ProtoXmlElement {
1252    fn child_elements_named<'a>(
1253        &'a self,
1254        name: &'a str,
1255    ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1256        self.child
1257            .iter()
1258            .filter_map(ProtoXmlNode::element)
1259            .filter(move |element| element.name == name)
1260    }
1261
1262    fn child_elements_named_any<'a>(
1263        &'a self,
1264        names: &'a [&'a str],
1265    ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1266        self.child
1267            .iter()
1268            .filter_map(ProtoXmlNode::element)
1269            .filter(move |element| names.contains(&element.name.as_str()))
1270    }
1271}
1272
1273#[derive(Clone, PartialEq, Message)]
1274pub struct ProtoXmlNamespace {
1275    #[prost(string, tag = "1")]
1276    pub prefix: String,
1277    #[prost(string, tag = "2")]
1278    pub uri: String,
1279    #[prost(message, optional, tag = "3")]
1280    pub source: Option<ProtoSourcePosition>,
1281}
1282
1283#[derive(Clone, PartialEq, Message)]
1284pub struct ProtoXmlAttribute {
1285    #[prost(string, tag = "1")]
1286    pub namespace_uri: String,
1287    #[prost(string, tag = "2")]
1288    pub name: String,
1289    #[prost(string, tag = "3")]
1290    pub value: String,
1291    #[prost(message, optional, tag = "4")]
1292    pub source: Option<ProtoSourcePosition>,
1293    #[prost(uint32, tag = "5")]
1294    pub resource_id: u32,
1295    #[prost(message, optional, tag = "6")]
1296    pub compiled_item: Option<ProtoItem>,
1297}
1298
1299#[derive(Clone, PartialEq, Message)]
1300pub struct ProtoItem {
1301    #[prost(oneof = "proto_item::Value", tags = "2, 3, 7")]
1302    pub value: Option<proto_item::Value>,
1303    #[prost(uint32, tag = "8")]
1304    pub flag_status: u32,
1305    #[prost(bool, tag = "9")]
1306    pub flag_negated: bool,
1307    #[prost(string, tag = "10")]
1308    pub flag_name: String,
1309}
1310
1311pub mod proto_item {
1312    use super::{ProtoPrimitive, ProtoRawStringValue, ProtoStringValue};
1313    use prost::Oneof;
1314
1315    #[derive(Clone, PartialEq, Oneof)]
1316    pub enum Value {
1317        #[prost(message, tag = "2")]
1318        Str(ProtoStringValue),
1319        #[prost(message, tag = "3")]
1320        RawStr(ProtoRawStringValue),
1321        #[prost(message, tag = "7")]
1322        Prim(ProtoPrimitive),
1323    }
1324}
1325
1326#[derive(Clone, PartialEq, Message)]
1327pub struct ProtoStringValue {
1328    #[prost(string, tag = "1")]
1329    pub value: String,
1330}
1331
1332#[derive(Clone, PartialEq, Message)]
1333pub struct ProtoRawStringValue {
1334    #[prost(string, tag = "1")]
1335    pub value: String,
1336}
1337
1338#[derive(Clone, PartialEq, Message)]
1339pub struct ProtoPrimitive {
1340    #[prost(oneof = "proto_primitive::Value", tags = "3, 6, 7, 8, 13, 14")]
1341    pub value: Option<proto_primitive::Value>,
1342}
1343
1344pub mod proto_primitive {
1345    use prost::Oneof;
1346
1347    #[derive(Clone, PartialEq, Oneof)]
1348    pub enum Value {
1349        #[prost(float, tag = "3")]
1350        Float(f32),
1351        #[prost(int32, tag = "6")]
1352        IntDecimal(i32),
1353        #[prost(uint32, tag = "7")]
1354        IntHexadecimal(u32),
1355        #[prost(bool, tag = "8")]
1356        Boolean(bool),
1357        #[prost(uint32, tag = "13")]
1358        Dimension(u32),
1359        #[prost(uint32, tag = "14")]
1360        Fraction(u32),
1361    }
1362}