Skip to main content

provenant/parsers/
android.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::fs::{self, File};
6use std::io::{Cursor, Read};
7use std::path::Path;
8
9use prost::Message;
10use quick_xml::Reader;
11use quick_xml::events::Event;
12use rusty_axml::{find_nodes_by_type, get_requested_permissions, parse_from_reader};
13use zip::ZipArchive;
14
15use crate::models::{DatasourceId, PackageData, PackageType};
16use crate::parser_warn as warn;
17use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
18use crate::utils::magic;
19
20use super::PackageParser;
21
22const PACKAGE_TYPE: PackageType = PackageType::Android;
23const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024;
24const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
25const MAX_TOTAL_UNCOMPRESSED_SIZE: u64 = 1024 * 1024 * 1024;
26const MAX_COMPRESSION_RATIO: f64 = 100.0;
27const ANDROID_XML_NAMESPACE: &str = "http://schemas.android.com/apk/res/android";
28
29fn default_package_data(datasource_id: DatasourceId) -> PackageData {
30    PackageData {
31        package_type: Some(PACKAGE_TYPE),
32        datasource_id: Some(datasource_id),
33        ..Default::default()
34    }
35}
36
37pub struct AndroidSoongMetadataParser;
38pub struct AndroidManifestParser;
39pub struct AndroidApkParser;
40pub struct AndroidAabParser;
41
42impl PackageParser for AndroidSoongMetadataParser {
43    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
44
45    fn is_match(path: &Path) -> bool {
46        path.file_name().and_then(|name| name.to_str()) == Some("METADATA")
47            && !path
48                .parent()
49                .and_then(|parent| parent.file_name())
50                .and_then(|name| name.to_str())
51                .is_some_and(|name| name.ends_with(".dist-info"))
52    }
53
54    fn extract_packages(path: &Path) -> Vec<PackageData> {
55        let content = match crate::parsers::utils::read_file_to_string(path, None) {
56            Ok(content) => content,
57            Err(error) => {
58                warn!(
59                    "Failed to read Android Soong METADATA {:?}: {}",
60                    path, error
61                );
62                return vec![default_package_data(DatasourceId::AndroidSoongMetadata)];
63            }
64        };
65
66        vec![parse_soong_metadata(&content)]
67    }
68}
69
70impl PackageParser for AndroidManifestParser {
71    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
72
73    fn is_match(path: &Path) -> bool {
74        path.file_name().and_then(|name| name.to_str()) == Some("AndroidManifest.xml")
75    }
76
77    fn extract_packages(path: &Path) -> Vec<PackageData> {
78        let bytes = match read_file_bytes(path, None) {
79            Ok(bytes) => bytes,
80            Err(error) => {
81                warn!("Failed to read AndroidManifest.xml {:?}: {}", path, error);
82                return vec![default_package_data(DatasourceId::AndroidManifestXml)];
83            }
84        };
85
86        parse_manifest_bytes(
87            &bytes,
88            DatasourceId::AndroidManifestXml,
89            "AndroidManifest.xml",
90        )
91        .into_iter()
92        .collect()
93    }
94}
95
96impl PackageParser for AndroidApkParser {
97    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
98
99    fn is_match(path: &Path) -> bool {
100        path.extension().and_then(|ext| ext.to_str()) == Some("apk") && magic::is_zip(path)
101    }
102
103    fn extract_packages(path: &Path) -> Vec<PackageData> {
104        let package_data = match read_best_zip_entry(path, |entry_name| {
105            if entry_name == "AndroidManifest.xml" {
106                Some(0)
107            } else {
108                None
109            }
110        }) {
111            Ok(Some((_, bytes))) => parse_binary_manifest_bytes(&bytes, DatasourceId::AndroidApk)
112                .unwrap_or_else(|error| {
113                    warn!("Failed to parse APK manifest {:?}: {}", path, error);
114                    default_package_data(DatasourceId::AndroidApk)
115                }),
116            Ok(None) => {
117                warn!("No AndroidManifest.xml found in APK {:?}", path);
118                default_package_data(DatasourceId::AndroidApk)
119            }
120            Err(error) => {
121                warn!("Failed to read APK archive {:?}: {}", path, error);
122                default_package_data(DatasourceId::AndroidApk)
123            }
124        };
125
126        vec![package_data]
127    }
128}
129
130impl PackageParser for AndroidAabParser {
131    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
132
133    fn is_match(path: &Path) -> bool {
134        path.extension().and_then(|ext| ext.to_str()) == Some("aab") && magic::is_zip(path)
135    }
136
137    fn extract_packages(path: &Path) -> Vec<PackageData> {
138        let package_data = match read_best_zip_entry(path, |entry_name| {
139            if entry_name == "base/manifest/AndroidManifest.xml" {
140                Some(0)
141            } else if entry_name.ends_with("/manifest/AndroidManifest.xml") {
142                Some(1)
143            } else {
144                None
145            }
146        }) {
147            Ok(Some((entry_name, bytes))) => {
148                parse_proto_manifest_bytes(&bytes).unwrap_or_else(|error| {
149                    warn!(
150                        "Failed to parse AAB manifest {:?} ({}): {}",
151                        path, entry_name, error
152                    );
153                    default_package_data(DatasourceId::AndroidAab)
154                })
155            }
156            Ok(None) => {
157                warn!("No proto AndroidManifest.xml found in AAB {:?}", path);
158                default_package_data(DatasourceId::AndroidAab)
159            }
160            Err(error) => {
161                warn!("Failed to read AAB archive {:?}: {}", path, error);
162                default_package_data(DatasourceId::AndroidAab)
163            }
164        };
165
166        vec![package_data]
167    }
168}
169
170fn read_file_bytes(path: &Path, max_size: Option<u64>) -> Result<Vec<u8>, String> {
171    let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
172    let metadata =
173        fs::metadata(path).map_err(|error| format!("Cannot stat file {:?}: {}", path, error))?;
174
175    if metadata.len() > limit {
176        return Err(format!(
177            "File {:?} is {} bytes, exceeding the {} byte limit",
178            path,
179            metadata.len(),
180            limit
181        ));
182    }
183
184    let mut file =
185        File::open(path).map_err(|error| format!("Failed to open {:?}: {}", path, error))?;
186    let mut bytes = Vec::with_capacity(metadata.len() as usize);
187    file.read_to_end(&mut bytes)
188        .map_err(|error| format!("Failed to read {:?}: {}", path, error))?;
189    Ok(bytes)
190}
191
192fn parse_soong_metadata(content: &str) -> PackageData {
193    let parsed = parse_textproto_map(content).unwrap_or_else(|error| {
194        warn!("Failed to parse Android Soong METADATA: {}", error);
195        ProtoMap::default()
196    });
197
198    let mut package = default_package_data(DatasourceId::AndroidSoongMetadata);
199    package.name = parsed.get_first_string("name").map(truncate_field);
200    package.description = parsed.get_first_string("description").map(truncate_field);
201
202    if let Some(third_party) = parsed.get_first_map("third_party") {
203        package.version = third_party.get_first_string("version").map(truncate_field);
204
205        let url_entries = third_party
206            .get_all_maps("url")
207            .into_iter()
208            .map(|entry| {
209                let type_ = entry.get_first_string("type").map(truncate_field);
210                let value = entry.get_first_string("value").map(truncate_field);
211                (type_, value)
212            })
213            .collect::<Vec<_>>();
214
215        let homepage_url = third_party.get_first_string("homepage").or_else(|| {
216            url_entries
217                .iter()
218                .find(|(type_, _)| {
219                    type_
220                        .as_deref()
221                        .is_some_and(|type_| type_.eq_ignore_ascii_case("homepage"))
222                })
223                .and_then(|(_, value)| value.clone())
224        });
225        package.homepage_url = homepage_url.map(truncate_field);
226
227        let license_types = third_party
228            .get_all_strings("license_type")
229            .into_iter()
230            .map(truncate_field)
231            .collect::<Vec<_>>();
232        if !license_types.is_empty() {
233            package.extracted_license_statement = Some(license_types.join(", "));
234        }
235
236        let identifiers = third_party
237            .get_all_maps("identifier")
238            .into_iter()
239            .map(|identifier| {
240                let type_ = identifier.get_first_string("type").map(truncate_field);
241                let value = identifier.get_first_string("value").map(truncate_field);
242                let mut object = serde_json::Map::new();
243                if let Some(type_) = type_ {
244                    object.insert("type".to_string(), type_.into());
245                }
246                if let Some(value) = &value {
247                    object.insert("value".to_string(), value.clone().into());
248                }
249
250                if package.vcs_url.is_none()
251                    && let (Some(type_), Some(value)) = (
252                        identifier.get_first_string("type"),
253                        identifier.get_first_string("value"),
254                    )
255                {
256                    let lower_type = type_.to_ascii_lowercase();
257                    if lower_type.contains("git") {
258                        package.vcs_url = Some(truncate_field(value));
259                    } else if lower_type.contains("archive")
260                        || lower_type.contains("tar")
261                        || lower_type.contains("zip")
262                    {
263                        package.download_url = Some(truncate_field(value));
264                    }
265                }
266
267                serde_json::Value::Object(object)
268            })
269            .collect::<Vec<_>>();
270
271        for (type_, value) in &url_entries {
272            let Some(value) = value else {
273                continue;
274            };
275
276            match type_.as_deref().map(str::to_ascii_lowercase).as_deref() {
277                Some("git") if package.vcs_url.is_none() => {
278                    package.vcs_url = Some(value.clone());
279                }
280                Some("archive") if package.download_url.is_none() => {
281                    package.download_url = Some(value.clone());
282                }
283                Some("homepage") if package.homepage_url.is_none() => {
284                    package.homepage_url = Some(value.clone());
285                }
286                _ => {}
287            }
288        }
289
290        let mut extra_data = HashMap::new();
291        if !identifiers.is_empty() {
292            extra_data.insert("identifiers".to_string(), identifiers.into());
293        }
294        if !url_entries.is_empty() {
295            extra_data.insert(
296                "urls".to_string(),
297                url_entries
298                    .iter()
299                    .map(|(type_, value)| {
300                        let mut object = serde_json::Map::new();
301                        if let Some(type_) = type_ {
302                            object.insert("type".to_string(), type_.clone().into());
303                        }
304                        if let Some(value) = value {
305                            object.insert("value".to_string(), value.clone().into());
306                        }
307                        serde_json::Value::Object(object)
308                    })
309                    .collect::<Vec<_>>()
310                    .into(),
311            );
312        }
313
314        if let Some(last_upgrade_date) = third_party.get_first_map("last_upgrade_date") {
315            let year = last_upgrade_date.get_first_string("year");
316            let month = last_upgrade_date.get_first_string("month");
317            let day = last_upgrade_date.get_first_string("day");
318            if let (Some(year), Some(month), Some(day)) = (year, month, day) {
319                let formatted = format!(
320                    "{:04}-{:02}-{:02}",
321                    year.parse::<u32>().unwrap_or_default(),
322                    month.parse::<u32>().unwrap_or_default(),
323                    day.parse::<u32>().unwrap_or_default()
324                );
325                extra_data.insert(
326                    "last_upgrade_date".to_string(),
327                    truncate_field(formatted).into(),
328                );
329            }
330        }
331
332        if let Some(upstream_url) = third_party.get_first_string("url") {
333            extra_data.insert(
334                "upstream_url".to_string(),
335                truncate_field(upstream_url).into(),
336            );
337        }
338
339        if !extra_data.is_empty() {
340            package.extra_data = Some(extra_data);
341        }
342    }
343
344    package
345}
346
347fn parse_manifest_bytes(
348    bytes: &[u8],
349    datasource_id: DatasourceId,
350    context: &str,
351) -> Option<PackageData> {
352    if looks_like_text_xml(bytes) {
353        match parse_text_manifest_bytes(bytes, datasource_id) {
354            Ok(package) => return Some(package),
355            Err(error) => {
356                warn!("Failed to parse {} as text XML: {}", context, error);
357                return None;
358            }
359        }
360    }
361
362    parse_binary_manifest_bytes(bytes, datasource_id)
363        .map(Some)
364        .unwrap_or_else(|error| {
365            warn!(
366                "Failed to parse {} as binary Android XML: {}",
367                context, error
368            );
369            None
370        })
371}
372
373fn looks_like_text_xml(bytes: &[u8]) -> bool {
374    bytes
375        .iter()
376        .find(|byte| !byte.is_ascii_whitespace())
377        .is_some_and(|byte| *byte == b'<')
378}
379
380fn parse_text_manifest_bytes(
381    bytes: &[u8],
382    datasource_id: DatasourceId,
383) -> Result<PackageData, String> {
384    let content = String::from_utf8(bytes.to_vec())
385        .map_err(|error| format!("Invalid UTF-8 in AndroidManifest.xml: {}", error))?;
386
387    let mut reader = Reader::from_str(&content);
388    reader.config_mut().trim_text(true);
389
390    let mut buf = Vec::new();
391    let mut manifest_attributes = HashMap::new();
392    let mut uses_sdk_attributes = HashMap::new();
393    let mut application_attributes = HashMap::new();
394    let mut requested_permissions = Vec::new();
395    let mut uses_libraries = Vec::new();
396    let mut iteration_count = 0usize;
397
398    loop {
399        iteration_count += 1;
400        if iteration_count > MAX_ITERATION_COUNT {
401            return Err(format!(
402                "Exceeded MAX_ITERATION_COUNT ({}) while parsing AndroidManifest.xml",
403                MAX_ITERATION_COUNT
404            ));
405        }
406
407        match reader.read_event_into(&mut buf) {
408            Ok(Event::Start(event)) | Ok(Event::Empty(event)) => {
409                let name = String::from_utf8_lossy(event.name().as_ref()).into_owned();
410                let attributes = xml_attributes_to_map(&reader, &event)?;
411                match name.as_str() {
412                    "manifest" if manifest_attributes.is_empty() => {
413                        manifest_attributes = attributes
414                    }
415                    "uses-sdk" => uses_sdk_attributes = attributes,
416                    "application" if application_attributes.is_empty() => {
417                        application_attributes = attributes;
418                    }
419                    "uses-permission" | "uses-permission-sdk-23" => {
420                        if let Some(permission) = attributes.get("android:name") {
421                            requested_permissions.push(permission.clone());
422                        }
423                    }
424                    "uses-library" => {
425                        if let Some(library_name) = attributes.get("android:name") {
426                            uses_libraries.push(library_name.clone());
427                        }
428                    }
429                    _ => {}
430                }
431            }
432            Ok(Event::Eof) => break,
433            Err(error) => {
434                return Err(format!(
435                    "XML parse error at position {}: {}",
436                    reader.buffer_position(),
437                    error
438                ));
439            }
440            _ => {}
441        }
442
443        buf.clear();
444    }
445
446    Ok(build_manifest_package_data(
447        datasource_id,
448        &manifest_attributes,
449        &uses_sdk_attributes,
450        &application_attributes,
451        requested_permissions,
452        uses_libraries,
453    ))
454}
455
456fn xml_attributes_to_map(
457    reader: &Reader<&[u8]>,
458    event: &quick_xml::events::BytesStart<'_>,
459) -> Result<HashMap<String, String>, String> {
460    let mut attributes = HashMap::new();
461
462    for attribute in event.attributes().flatten().take(MAX_ITERATION_COUNT) {
463        let key = String::from_utf8_lossy(attribute.key.as_ref()).into_owned();
464        let value = attribute
465            .decode_and_unescape_value(reader.decoder())
466            .map_err(|error| format!("Failed to decode XML attribute {}: {}", key, error))?
467            .into_owned();
468        attributes.insert(key, truncate_field(value));
469    }
470
471    Ok(attributes)
472}
473
474fn parse_binary_manifest_bytes(
475    bytes: &[u8],
476    datasource_id: DatasourceId,
477) -> Result<PackageData, String> {
478    let axml = std::panic::catch_unwind(|| parse_from_reader(Cursor::new(bytes.to_vec())))
479        .map_err(|_| "rusty-axml panicked while parsing binary Android XML".to_string())?
480        .map_err(|error| format!("rusty-axml parse failure: {}", error))?;
481
482    let manifest_attributes =
483        normalize_binary_attributes(axml.root().borrow().attributes().clone());
484    let uses_sdk_attributes = find_nodes_by_type(&axml, "uses-sdk")
485        .into_iter()
486        .next()
487        .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
488        .unwrap_or_default();
489    let application_attributes = find_nodes_by_type(&axml, "application")
490        .into_iter()
491        .next()
492        .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
493        .unwrap_or_default();
494
495    let requested_permissions = get_requested_permissions(&axml)
496        .into_iter()
497        .map(truncate_field)
498        .collect::<Vec<_>>();
499    let uses_libraries = find_nodes_by_type(&axml, "uses-library")
500        .into_iter()
501        .filter_map(|node| node.borrow().get_attr("android:name").map(str::to_string))
502        .map(truncate_field)
503        .collect::<Vec<_>>();
504
505    Ok(build_manifest_package_data(
506        datasource_id,
507        &manifest_attributes,
508        &uses_sdk_attributes,
509        &application_attributes,
510        requested_permissions,
511        uses_libraries,
512    ))
513}
514
515fn build_manifest_package_data(
516    datasource_id: DatasourceId,
517    manifest_attributes: &HashMap<String, String>,
518    uses_sdk_attributes: &HashMap<String, String>,
519    application_attributes: &HashMap<String, String>,
520    requested_permissions: Vec<String>,
521    uses_libraries: Vec<String>,
522) -> PackageData {
523    let mut package = default_package_data(datasource_id);
524    package.name = manifest_attributes.get("package").cloned();
525    package.version = manifest_attributes
526        .get("android:versionName")
527        .cloned()
528        .or_else(|| manifest_attributes.get("android:versionCode").cloned());
529
530    package.description = application_attributes
531        .get("android:label")
532        .filter(|label| {
533            !label.starts_with('@') && !label.chars().all(|character| character.is_ascii_digit())
534        })
535        .cloned();
536
537    let mut extra_data = HashMap::new();
538    insert_extra(
539        &mut extra_data,
540        "version_code",
541        manifest_attributes.get("android:versionCode"),
542    );
543    insert_extra(
544        &mut extra_data,
545        "compile_sdk_version",
546        manifest_attributes.get("android:compileSdkVersion"),
547    );
548    insert_extra(
549        &mut extra_data,
550        "compile_sdk_version_codename",
551        manifest_attributes.get("android:compileSdkVersionCodename"),
552    );
553    insert_extra(
554        &mut extra_data,
555        "platform_build_version_code",
556        manifest_attributes.get("platformBuildVersionCode"),
557    );
558    insert_extra(
559        &mut extra_data,
560        "platform_build_version_name",
561        manifest_attributes.get("platformBuildVersionName"),
562    );
563    insert_extra(
564        &mut extra_data,
565        "min_sdk_version",
566        uses_sdk_attributes.get("android:minSdkVersion"),
567    );
568    insert_extra(
569        &mut extra_data,
570        "target_sdk_version",
571        uses_sdk_attributes.get("android:targetSdkVersion"),
572    );
573    insert_extra(
574        &mut extra_data,
575        "max_sdk_version",
576        uses_sdk_attributes.get("android:maxSdkVersion"),
577    );
578
579    if !requested_permissions.is_empty() {
580        extra_data.insert(
581            "requested_permissions".to_string(),
582            requested_permissions
583                .into_iter()
584                .map(serde_json::Value::from)
585                .collect::<Vec<_>>()
586                .into(),
587        );
588    }
589    if !uses_libraries.is_empty() {
590        extra_data.insert(
591            "uses_libraries".to_string(),
592            uses_libraries
593                .into_iter()
594                .map(serde_json::Value::from)
595                .collect::<Vec<_>>()
596                .into(),
597        );
598    }
599
600    if !extra_data.is_empty() {
601        package.extra_data = Some(extra_data);
602    }
603
604    package
605}
606
607fn normalize_binary_attributes(attributes: HashMap<String, String>) -> HashMap<String, String> {
608    attributes
609        .into_iter()
610        .map(|(key, value)| (key, normalize_binary_attribute_value(&value)))
611        .collect()
612}
613
614fn normalize_binary_attribute_value(value: &str) -> String {
615    let hex_value = value
616        .strip_prefix("(type 0x10) 0x")
617        .or_else(|| value.strip_prefix("0x"));
618
619    if let Some(hex_value) = hex_value
620        && let Ok(parsed) = u64::from_str_radix(hex_value, 16)
621    {
622        return parsed.to_string();
623    }
624
625    value.to_string()
626}
627
628fn insert_extra(
629    extra_data: &mut HashMap<String, serde_json::Value>,
630    key: &str,
631    value: Option<&String>,
632) {
633    if let Some(value) = value {
634        extra_data.insert(key.to_string(), truncate_field(value.clone()).into());
635    }
636}
637
638fn read_best_zip_entry<F>(
639    path: &Path,
640    mut rank_entry: F,
641) -> Result<Option<(String, Vec<u8>)>, String>
642where
643    F: FnMut(&str) -> Option<u8>,
644{
645    let metadata = fs::metadata(path)
646        .map_err(|error| format!("Failed to stat archive {:?}: {}", path, error))?;
647    if metadata.len() > MAX_ARCHIVE_SIZE {
648        return Err(format!(
649            "Archive {:?} is {} bytes, exceeding the {} byte limit",
650            path,
651            metadata.len(),
652            MAX_ARCHIVE_SIZE
653        ));
654    }
655
656    let file = File::open(path)
657        .map_err(|error| format!("Failed to open archive {:?}: {}", path, error))?;
658    let mut archive = ZipArchive::new(file)
659        .map_err(|error| format!("Failed to parse ZIP archive {:?}: {}", path, error))?;
660
661    let mut total_uncompressed = 0u64;
662    let mut best: Option<(u8, String, Vec<u8>)> = None;
663    let entry_count = archive.len().min(MAX_ITERATION_COUNT);
664
665    if archive.len() > MAX_ITERATION_COUNT {
666        warn!(
667            "Archive {:?} has more than MAX_ITERATION_COUNT ({}) entries; truncating scan",
668            path, MAX_ITERATION_COUNT
669        );
670    }
671
672    for index in 0..entry_count {
673        let mut entry = archive.by_index(index).map_err(|error| {
674            format!(
675                "Failed to read ZIP entry {} in {:?}: {}",
676                index, path, error
677            )
678        })?;
679
680        total_uncompressed = total_uncompressed.saturating_add(entry.size());
681        if total_uncompressed > MAX_TOTAL_UNCOMPRESSED_SIZE {
682            return Err(format!(
683                "Archive {:?} exceeds total uncompressed size limit of {} bytes",
684                path, MAX_TOTAL_UNCOMPRESSED_SIZE
685            ));
686        }
687
688        let entry_name = entry.name().replace('\\', "/");
689        if entry_name.starts_with('/') || entry_name.split('/').any(|segment| segment == "..") {
690            return Err(format!(
691                "Archive entry {} contains a disallowed path",
692                entry_name
693            ));
694        }
695        let Some(rank) = rank_entry(&entry_name) else {
696            continue;
697        };
698
699        if entry.size() > MAX_FILE_SIZE {
700            return Err(format!(
701                "Archive entry {} is {} bytes, exceeding the {} byte limit",
702                entry_name,
703                entry.size(),
704                MAX_FILE_SIZE
705            ));
706        }
707
708        let compressed_size = entry.compressed_size();
709        if compressed_size > 0 {
710            let ratio = entry.size() as f64 / compressed_size as f64;
711            if ratio > MAX_COMPRESSION_RATIO {
712                return Err(format!(
713                    "Archive entry {} has suspicious compression ratio {:.2}:1",
714                    entry_name, ratio
715                ));
716            }
717        }
718
719        let should_replace = match &best {
720            Some((best_rank, _, _)) => rank < *best_rank,
721            None => true,
722        };
723
724        if should_replace {
725            let mut bytes = Vec::with_capacity(entry.size() as usize);
726            entry.read_to_end(&mut bytes).map_err(|error| {
727                format!("Failed to read archive entry {}: {}", entry_name, error)
728            })?;
729            best = Some((rank, entry_name, bytes));
730        }
731    }
732
733    Ok(best.map(|(_, entry_name, bytes)| (entry_name, bytes)))
734}
735
736fn parse_proto_manifest_bytes(bytes: &[u8]) -> Result<PackageData, String> {
737    let node =
738        ProtoXmlNode::decode(bytes).map_err(|error| format!("prost decode failure: {}", error))?;
739    let root_element = node
740        .element()
741        .ok_or_else(|| "Proto manifest root is not an element".to_string())?;
742    if root_element.name != "manifest" {
743        return Err(format!(
744            "Unexpected proto XML root element: {}",
745            root_element.name
746        ));
747    }
748
749    let manifest_attributes = proto_attributes_to_map(&root_element.attribute);
750    let uses_sdk_attributes = root_element
751        .child_elements_named("uses-sdk")
752        .next()
753        .map(|element| proto_attributes_to_map(&element.attribute))
754        .unwrap_or_default();
755    let application_attributes = root_element
756        .child_elements_named("application")
757        .next()
758        .map(|element| proto_attributes_to_map(&element.attribute))
759        .unwrap_or_default();
760    let requested_permissions = root_element
761        .child_elements_named_any(&["uses-permission", "uses-permission-sdk-23"])
762        .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
763        .collect::<Vec<_>>();
764    let uses_libraries = root_element
765        .child_elements_named("uses-library")
766        .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
767        .collect::<Vec<_>>();
768
769    let mut package = build_manifest_package_data(
770        DatasourceId::AndroidAab,
771        &manifest_attributes,
772        &uses_sdk_attributes,
773        &application_attributes,
774        requested_permissions,
775        uses_libraries,
776    );
777
778    if let Some(extra_data) = package.extra_data.as_mut() {
779        extra_data.insert("manifest_encoding".to_string(), "proto".into());
780    } else {
781        package.extra_data = Some(HashMap::from([(
782            "manifest_encoding".to_string(),
783            serde_json::Value::String("proto".to_string()),
784        )]));
785    }
786
787    Ok(package)
788}
789
790fn proto_attributes_to_map(attributes: &[ProtoXmlAttribute]) -> HashMap<String, String> {
791    attributes
792        .iter()
793        .filter_map(|attribute| {
794            let key = proto_attribute_key(attribute)?;
795            let value = proto_attribute_value(attribute)?;
796            Some((key, truncate_field(value)))
797        })
798        .collect()
799}
800
801fn proto_attribute_key(attribute: &ProtoXmlAttribute) -> Option<String> {
802    if attribute.name.is_empty() {
803        return None;
804    }
805
806    if attribute.namespace_uri == ANDROID_XML_NAMESPACE {
807        return Some(format!("android:{}", attribute.name));
808    }
809
810    Some(attribute.name.clone())
811}
812
813fn proto_attribute_value(attribute: &ProtoXmlAttribute) -> Option<String> {
814    if !attribute.value.is_empty() {
815        return Some(attribute.value.clone());
816    }
817
818    attribute
819        .compiled_item
820        .as_ref()
821        .and_then(proto_item_to_string)
822}
823
824fn proto_item_to_string(item: &ProtoItem) -> Option<String> {
825    match &item.value {
826        Some(proto_item::Value::Str(value)) => Some(value.value.clone()),
827        Some(proto_item::Value::RawStr(value)) => Some(value.value.clone()),
828        Some(proto_item::Value::Prim(value)) => proto_primitive_to_string(value),
829        _ => None,
830    }
831}
832
833fn proto_primitive_to_string(primitive: &ProtoPrimitive) -> Option<String> {
834    match &primitive.value {
835        Some(proto_primitive::Value::IntDecimal(value)) => Some(value.to_string()),
836        Some(proto_primitive::Value::IntHexadecimal(value)) => Some(format!("0x{value:x}")),
837        Some(proto_primitive::Value::Boolean(value)) => Some(value.to_string()),
838        Some(proto_primitive::Value::Float(value)) => Some(value.to_string()),
839        Some(proto_primitive::Value::Dimension(value)) => Some(value.to_string()),
840        Some(proto_primitive::Value::Fraction(value)) => Some(value.to_string()),
841        _ => None,
842    }
843}
844
845#[derive(Debug, Clone, Default)]
846struct ProtoMap {
847    fields: HashMap<String, Vec<ProtoValue>>,
848}
849
850#[derive(Debug, Clone)]
851enum ProtoValue {
852    Scalar(String),
853    Map(ProtoMap),
854}
855
856impl ProtoMap {
857    fn get_first_string(&self, key: &str) -> Option<String> {
858        self.fields.get(key).and_then(|values| {
859            values.iter().find_map(|value| match value {
860                ProtoValue::Scalar(value) => Some(value.clone()),
861                ProtoValue::Map(_) => None,
862            })
863        })
864    }
865
866    fn get_all_strings(&self, key: &str) -> Vec<String> {
867        self.fields
868            .get(key)
869            .into_iter()
870            .flatten()
871            .filter_map(|value| match value {
872                ProtoValue::Scalar(value) => Some(value.clone()),
873                ProtoValue::Map(_) => None,
874            })
875            .collect()
876    }
877
878    fn get_first_map(&self, key: &str) -> Option<ProtoMap> {
879        self.fields.get(key).and_then(|values| {
880            values.iter().find_map(|value| match value {
881                ProtoValue::Map(value) => Some(value.clone()),
882                ProtoValue::Scalar(_) => None,
883            })
884        })
885    }
886
887    fn get_all_maps(&self, key: &str) -> Vec<ProtoMap> {
888        self.fields
889            .get(key)
890            .into_iter()
891            .flatten()
892            .filter_map(|value| match value {
893                ProtoValue::Map(value) => Some(value.clone()),
894                ProtoValue::Scalar(_) => None,
895            })
896            .collect()
897    }
898}
899
900fn parse_textproto_map(content: &str) -> Result<ProtoMap, String> {
901    let mut parser = TextProtoParser::new(content)?;
902    parser.parse_map(false)
903}
904
905struct TextProtoParser {
906    tokens: Vec<TextProtoToken>,
907    position: usize,
908}
909
910#[derive(Debug, Clone)]
911enum TextProtoToken {
912    Identifier(String),
913    String(String),
914    Colon,
915    LBrace,
916    RBrace,
917}
918
919impl TextProtoParser {
920    fn new(content: &str) -> Result<Self, String> {
921        Ok(Self {
922            tokens: tokenize_textproto(content)?,
923            position: 0,
924        })
925    }
926
927    fn parse_map(&mut self, stop_on_rbrace: bool) -> Result<ProtoMap, String> {
928        let mut map = ProtoMap::default();
929
930        while let Some(token) = self.peek() {
931            match token {
932                TextProtoToken::RBrace if stop_on_rbrace => {
933                    self.position += 1;
934                    break;
935                }
936                TextProtoToken::RBrace => return Err("Unexpected closing brace".to_string()),
937                TextProtoToken::Identifier(_) => {
938                    let key = self.expect_identifier()?;
939                    match self.peek() {
940                        Some(TextProtoToken::Colon) => {
941                            self.position += 1;
942                            match self.peek() {
943                                Some(TextProtoToken::LBrace) => {
944                                    self.position += 1;
945                                    let value = self.parse_map(true)?;
946                                    map.fields
947                                        .entry(key)
948                                        .or_default()
949                                        .push(ProtoValue::Map(value));
950                                }
951                                _ => {
952                                    let value = self.expect_scalar()?;
953                                    map.fields
954                                        .entry(key)
955                                        .or_default()
956                                        .push(ProtoValue::Scalar(truncate_field(value)));
957                                }
958                            }
959                        }
960                        Some(TextProtoToken::LBrace) => {
961                            self.position += 1;
962                            let value = self.parse_map(true)?;
963                            map.fields
964                                .entry(key)
965                                .or_default()
966                                .push(ProtoValue::Map(value));
967                        }
968                        Some(other) => {
969                            return Err(format!("Unexpected token after key: {:?}", other));
970                        }
971                        None => return Err("Unexpected end of input after key".to_string()),
972                    }
973                }
974                other => return Err(format!("Unexpected token in textproto: {:?}", other)),
975            }
976        }
977
978        Ok(map)
979    }
980
981    fn expect_identifier(&mut self) -> Result<String, String> {
982        match self.next() {
983            Some(TextProtoToken::Identifier(value)) => Ok(value),
984            other => Err(format!("Expected identifier, found {:?}", other)),
985        }
986    }
987
988    fn expect_scalar(&mut self) -> Result<String, String> {
989        match self.next() {
990            Some(TextProtoToken::String(mut value)) => {
991                while matches!(self.peek(), Some(TextProtoToken::String(_))) {
992                    if let Some(TextProtoToken::String(next)) = self.next() {
993                        value.push_str(&next);
994                    }
995                }
996                Ok(value)
997            }
998            Some(TextProtoToken::Identifier(value)) => Ok(value),
999            other => Err(format!("Expected scalar value, found {:?}", other)),
1000        }
1001    }
1002
1003    fn peek(&self) -> Option<&TextProtoToken> {
1004        self.tokens.get(self.position)
1005    }
1006
1007    fn next(&mut self) -> Option<TextProtoToken> {
1008        let token = self.tokens.get(self.position).cloned();
1009        if token.is_some() {
1010            self.position += 1;
1011        }
1012        token
1013    }
1014}
1015
1016fn tokenize_textproto(content: &str) -> Result<Vec<TextProtoToken>, String> {
1017    let mut tokens = Vec::new();
1018    let chars = content.chars().collect::<Vec<_>>();
1019    let mut index = 0usize;
1020
1021    while index < chars.len() {
1022        match chars[index] {
1023            '{' => {
1024                tokens.push(TextProtoToken::LBrace);
1025                index += 1;
1026            }
1027            '}' => {
1028                tokens.push(TextProtoToken::RBrace);
1029                index += 1;
1030            }
1031            ':' => {
1032                tokens.push(TextProtoToken::Colon);
1033                index += 1;
1034            }
1035            '"' => {
1036                index += 1;
1037                let mut value = String::new();
1038                while index < chars.len() {
1039                    match chars[index] {
1040                        '\\' if index + 1 < chars.len() => {
1041                            index += 1;
1042                            value.push(chars[index]);
1043                            index += 1;
1044                        }
1045                        '"' => {
1046                            index += 1;
1047                            break;
1048                        }
1049                        character => {
1050                            value.push(character);
1051                            index += 1;
1052                        }
1053                    }
1054                }
1055                tokens.push(TextProtoToken::String(value));
1056            }
1057            '#' => {
1058                while index < chars.len() && chars[index] != '\n' {
1059                    index += 1;
1060                }
1061            }
1062            '/' if index + 1 < chars.len() && chars[index + 1] == '/' => {
1063                index += 2;
1064                while index < chars.len() && chars[index] != '\n' {
1065                    index += 1;
1066                }
1067            }
1068            character if character.is_ascii_whitespace() => index += 1,
1069            _ => {
1070                let start = index;
1071                while index < chars.len() {
1072                    let character = chars[index];
1073                    let starts_comment =
1074                        character == '/' && index + 1 < chars.len() && chars[index + 1] == '/';
1075
1076                    if character.is_ascii_whitespace()
1077                        || matches!(character, '{' | '}' | ':' | '#')
1078                        || starts_comment
1079                    {
1080                        break;
1081                    }
1082
1083                    index += 1;
1084                }
1085
1086                let token = chars[start..index].iter().collect::<String>();
1087                if token.is_empty() {
1088                    return Err("Encountered empty textproto token".to_string());
1089                }
1090                tokens.push(TextProtoToken::Identifier(token));
1091            }
1092        }
1093    }
1094
1095    Ok(tokens)
1096}
1097
1098#[derive(Clone, PartialEq, Message)]
1099pub struct ProtoSourcePosition {
1100    #[prost(uint32, tag = "1")]
1101    pub line_number: u32,
1102    #[prost(uint32, tag = "2")]
1103    pub column_number: u32,
1104}
1105
1106#[derive(Clone, PartialEq, Message)]
1107pub struct ProtoXmlNode {
1108    #[prost(oneof = "proto_xml_node::Node", tags = "1, 2")]
1109    pub node: Option<proto_xml_node::Node>,
1110    #[prost(message, optional, tag = "3")]
1111    pub source: Option<ProtoSourcePosition>,
1112}
1113
1114impl ProtoXmlNode {
1115    fn element(&self) -> Option<&ProtoXmlElement> {
1116        match &self.node {
1117            Some(proto_xml_node::Node::Element(element)) => Some(element),
1118            _ => None,
1119        }
1120    }
1121}
1122
1123pub mod proto_xml_node {
1124    use super::ProtoXmlElement;
1125    use prost::Oneof;
1126
1127    #[derive(Clone, PartialEq, Oneof)]
1128    pub enum Node {
1129        #[prost(message, tag = "1")]
1130        Element(ProtoXmlElement),
1131        #[prost(string, tag = "2")]
1132        Text(String),
1133    }
1134}
1135
1136#[derive(Clone, PartialEq, Message)]
1137pub struct ProtoXmlElement {
1138    #[prost(message, repeated, tag = "1")]
1139    pub namespace_declaration: Vec<ProtoXmlNamespace>,
1140    #[prost(string, tag = "2")]
1141    pub namespace_uri: String,
1142    #[prost(string, tag = "3")]
1143    pub name: String,
1144    #[prost(message, repeated, tag = "4")]
1145    pub attribute: Vec<ProtoXmlAttribute>,
1146    #[prost(message, repeated, tag = "5")]
1147    pub child: Vec<ProtoXmlNode>,
1148}
1149
1150impl ProtoXmlElement {
1151    fn child_elements_named<'a>(
1152        &'a self,
1153        name: &'a str,
1154    ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1155        self.child
1156            .iter()
1157            .filter_map(ProtoXmlNode::element)
1158            .filter(move |element| element.name == name)
1159    }
1160
1161    fn child_elements_named_any<'a>(
1162        &'a self,
1163        names: &'a [&'a str],
1164    ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1165        self.child
1166            .iter()
1167            .filter_map(ProtoXmlNode::element)
1168            .filter(move |element| names.contains(&element.name.as_str()))
1169    }
1170}
1171
1172#[derive(Clone, PartialEq, Message)]
1173pub struct ProtoXmlNamespace {
1174    #[prost(string, tag = "1")]
1175    pub prefix: String,
1176    #[prost(string, tag = "2")]
1177    pub uri: String,
1178    #[prost(message, optional, tag = "3")]
1179    pub source: Option<ProtoSourcePosition>,
1180}
1181
1182#[derive(Clone, PartialEq, Message)]
1183pub struct ProtoXmlAttribute {
1184    #[prost(string, tag = "1")]
1185    pub namespace_uri: String,
1186    #[prost(string, tag = "2")]
1187    pub name: String,
1188    #[prost(string, tag = "3")]
1189    pub value: String,
1190    #[prost(message, optional, tag = "4")]
1191    pub source: Option<ProtoSourcePosition>,
1192    #[prost(uint32, tag = "5")]
1193    pub resource_id: u32,
1194    #[prost(message, optional, tag = "6")]
1195    pub compiled_item: Option<ProtoItem>,
1196}
1197
1198#[derive(Clone, PartialEq, Message)]
1199pub struct ProtoItem {
1200    #[prost(oneof = "proto_item::Value", tags = "2, 3, 7")]
1201    pub value: Option<proto_item::Value>,
1202    #[prost(uint32, tag = "8")]
1203    pub flag_status: u32,
1204    #[prost(bool, tag = "9")]
1205    pub flag_negated: bool,
1206    #[prost(string, tag = "10")]
1207    pub flag_name: String,
1208}
1209
1210pub mod proto_item {
1211    use super::{ProtoPrimitive, ProtoRawStringValue, ProtoStringValue};
1212    use prost::Oneof;
1213
1214    #[derive(Clone, PartialEq, Oneof)]
1215    pub enum Value {
1216        #[prost(message, tag = "2")]
1217        Str(ProtoStringValue),
1218        #[prost(message, tag = "3")]
1219        RawStr(ProtoRawStringValue),
1220        #[prost(message, tag = "7")]
1221        Prim(ProtoPrimitive),
1222    }
1223}
1224
1225#[derive(Clone, PartialEq, Message)]
1226pub struct ProtoStringValue {
1227    #[prost(string, tag = "1")]
1228    pub value: String,
1229}
1230
1231#[derive(Clone, PartialEq, Message)]
1232pub struct ProtoRawStringValue {
1233    #[prost(string, tag = "1")]
1234    pub value: String,
1235}
1236
1237#[derive(Clone, PartialEq, Message)]
1238pub struct ProtoPrimitive {
1239    #[prost(oneof = "proto_primitive::Value", tags = "3, 6, 7, 8, 13, 14")]
1240    pub value: Option<proto_primitive::Value>,
1241}
1242
1243pub mod proto_primitive {
1244    use prost::Oneof;
1245
1246    #[derive(Clone, PartialEq, Oneof)]
1247    pub enum Value {
1248        #[prost(float, tag = "3")]
1249        Float(f32),
1250        #[prost(int32, tag = "6")]
1251        IntDecimal(i32),
1252        #[prost(uint32, tag = "7")]
1253        IntHexadecimal(u32),
1254        #[prost(bool, tag = "8")]
1255        Boolean(bool),
1256        #[prost(uint32, tag = "13")]
1257        Dimension(u32),
1258        #[prost(uint32, tag = "14")]
1259        Fraction(u32),
1260    }
1261}
1262
1263crate::register_parser!(
1264    "Android Soong METADATA textproto",
1265    &["**/METADATA"],
1266    "android",
1267    "",
1268    Some(
1269        "https://android.googlesource.com/platform/build/soong/+/refs/heads/main/licenses/metadata/metadata_file.proto"
1270    ),
1271);
1272
1273crate::register_parser!(
1274    "AndroidManifest.xml metadata (text XML or binary AXML)",
1275    &["**/AndroidManifest.xml"],
1276    "android",
1277    "XML",
1278    Some("https://developer.android.com/guide/topics/manifest/manifest-intro"),
1279);
1280
1281crate::register_parser!(
1282    "Android APK archive manifest metadata",
1283    &["**/*.apk"],
1284    "android",
1285    "",
1286    Some("https://developer.android.com/build/build-for-release"),
1287);
1288
1289crate::register_parser!(
1290    "Android App Bundle (.aab) proto manifest metadata",
1291    &["**/*.aab"],
1292    "android",
1293    "",
1294    Some("https://developer.android.com/guide/app-bundle"),
1295);