Skip to main content

provenant/parsers/
android.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::HashMap;
5use std::fs::{self, File};
6use std::io::{Cursor, Read};
7use std::path::Path;
8
9use prost::Message;
10use quick_xml::Reader;
11use quick_xml::events::Event;
12use rusty_axml::{find_nodes_by_type, get_requested_permissions, parse_from_reader};
13use zip::ZipArchive;
14
15use crate::models::{DatasourceId, PackageData, PackageType};
16use crate::parser_warn as warn;
17use crate::parsers::utils::{MAX_ITERATION_COUNT, MAX_MANIFEST_SIZE, truncate_field};
18use crate::utils::magic;
19
20use super::PackageParser;
21
22const PACKAGE_TYPE: PackageType = PackageType::Android;
23const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024;
24const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024;
25const MAX_TOTAL_UNCOMPRESSED_SIZE: u64 = 1024 * 1024 * 1024;
26const MAX_COMPRESSION_RATIO: f64 = 100.0;
27const ANDROID_XML_NAMESPACE: &str = "http://schemas.android.com/apk/res/android";
28
29fn default_package_data(datasource_id: DatasourceId) -> PackageData {
30    PackageData {
31        package_type: Some(PACKAGE_TYPE),
32        datasource_id: Some(datasource_id),
33        ..Default::default()
34    }
35}
36
37pub struct AndroidSoongMetadataParser;
38pub struct AndroidManifestParser;
39pub struct AndroidApkParser;
40pub struct AndroidAabParser;
41
42impl PackageParser for AndroidSoongMetadataParser {
43    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
44
45    fn is_match(path: &Path) -> bool {
46        path.file_name().and_then(|name| name.to_str()) == Some("METADATA")
47            && !path
48                .parent()
49                .and_then(|parent| parent.file_name())
50                .and_then(|name| name.to_str())
51                .is_some_and(|name| name.ends_with(".dist-info"))
52    }
53
54    fn extract_packages(path: &Path) -> Vec<PackageData> {
55        let content = match crate::parsers::utils::read_file_to_string(path, None) {
56            Ok(content) => content,
57            Err(error) => {
58                warn!(
59                    "Failed to read Android Soong METADATA {:?}: {}",
60                    path, error
61                );
62                return vec![default_package_data(DatasourceId::AndroidSoongMetadata)];
63            }
64        };
65
66        vec![parse_soong_metadata(&content)]
67    }
68}
69
70impl PackageParser for AndroidManifestParser {
71    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
72
73    fn is_match(path: &Path) -> bool {
74        path.file_name().and_then(|name| name.to_str()) == Some("AndroidManifest.xml")
75    }
76
77    fn extract_packages(path: &Path) -> Vec<PackageData> {
78        let bytes = match read_file_bytes(path, None) {
79            Ok(bytes) => bytes,
80            Err(error) => {
81                warn!("Failed to read AndroidManifest.xml {:?}: {}", path, error);
82                return vec![default_package_data(DatasourceId::AndroidManifestXml)];
83            }
84        };
85
86        vec![parse_manifest_bytes(
87            &bytes,
88            DatasourceId::AndroidManifestXml,
89            "AndroidManifest.xml",
90        )]
91    }
92}
93
94impl PackageParser for AndroidApkParser {
95    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
96
97    fn is_match(path: &Path) -> bool {
98        path.extension().and_then(|ext| ext.to_str()) == Some("apk") && magic::is_zip(path)
99    }
100
101    fn extract_packages(path: &Path) -> Vec<PackageData> {
102        let package_data = match read_best_zip_entry(path, |entry_name| {
103            if entry_name == "AndroidManifest.xml" {
104                Some(0)
105            } else {
106                None
107            }
108        }) {
109            Ok(Some((_, bytes))) => parse_binary_manifest_bytes(&bytes, DatasourceId::AndroidApk)
110                .unwrap_or_else(|error| {
111                    warn!("Failed to parse APK manifest {:?}: {}", path, error);
112                    default_package_data(DatasourceId::AndroidApk)
113                }),
114            Ok(None) => {
115                warn!("No AndroidManifest.xml found in APK {:?}", path);
116                default_package_data(DatasourceId::AndroidApk)
117            }
118            Err(error) => {
119                warn!("Failed to read APK archive {:?}: {}", path, error);
120                default_package_data(DatasourceId::AndroidApk)
121            }
122        };
123
124        vec![package_data]
125    }
126}
127
128impl PackageParser for AndroidAabParser {
129    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
130
131    fn is_match(path: &Path) -> bool {
132        path.extension().and_then(|ext| ext.to_str()) == Some("aab") && magic::is_zip(path)
133    }
134
135    fn extract_packages(path: &Path) -> Vec<PackageData> {
136        let package_data = match read_best_zip_entry(path, |entry_name| {
137            if entry_name == "base/manifest/AndroidManifest.xml" {
138                Some(0)
139            } else if entry_name.ends_with("/manifest/AndroidManifest.xml") {
140                Some(1)
141            } else {
142                None
143            }
144        }) {
145            Ok(Some((entry_name, bytes))) => {
146                parse_proto_manifest_bytes(&bytes).unwrap_or_else(|error| {
147                    warn!(
148                        "Failed to parse AAB manifest {:?} ({}): {}",
149                        path, entry_name, error
150                    );
151                    default_package_data(DatasourceId::AndroidAab)
152                })
153            }
154            Ok(None) => {
155                warn!("No proto AndroidManifest.xml found in AAB {:?}", path);
156                default_package_data(DatasourceId::AndroidAab)
157            }
158            Err(error) => {
159                warn!("Failed to read AAB archive {:?}: {}", path, error);
160                default_package_data(DatasourceId::AndroidAab)
161            }
162        };
163
164        vec![package_data]
165    }
166}
167
168fn read_file_bytes(path: &Path, max_size: Option<u64>) -> Result<Vec<u8>, String> {
169    let limit = max_size.unwrap_or(MAX_MANIFEST_SIZE);
170    let metadata =
171        fs::metadata(path).map_err(|error| format!("Cannot stat file {:?}: {}", path, error))?;
172
173    if metadata.len() > limit {
174        return Err(format!(
175            "File {:?} is {} bytes, exceeding the {} byte limit",
176            path,
177            metadata.len(),
178            limit
179        ));
180    }
181
182    let mut file =
183        File::open(path).map_err(|error| format!("Failed to open {:?}: {}", path, error))?;
184    let mut bytes = Vec::with_capacity(metadata.len() as usize);
185    file.read_to_end(&mut bytes)
186        .map_err(|error| format!("Failed to read {:?}: {}", path, error))?;
187    Ok(bytes)
188}
189
190fn parse_soong_metadata(content: &str) -> PackageData {
191    let parsed = parse_textproto_map(content).unwrap_or_else(|error| {
192        warn!("Failed to parse Android Soong METADATA: {}", error);
193        ProtoMap::default()
194    });
195
196    let mut package = default_package_data(DatasourceId::AndroidSoongMetadata);
197    package.name = parsed.get_first_string("name").map(truncate_field);
198    package.description = parsed.get_first_string("description").map(truncate_field);
199
200    if let Some(third_party) = parsed.get_first_map("third_party") {
201        package.version = third_party.get_first_string("version").map(truncate_field);
202
203        let url_entries = third_party
204            .get_all_maps("url")
205            .into_iter()
206            .map(|entry| {
207                let type_ = entry.get_first_string("type").map(truncate_field);
208                let value = entry.get_first_string("value").map(truncate_field);
209                (type_, value)
210            })
211            .collect::<Vec<_>>();
212
213        let homepage_url = third_party.get_first_string("homepage").or_else(|| {
214            url_entries
215                .iter()
216                .find(|(type_, _)| {
217                    type_
218                        .as_deref()
219                        .is_some_and(|type_| type_.eq_ignore_ascii_case("homepage"))
220                })
221                .and_then(|(_, value)| value.clone())
222        });
223        package.homepage_url = homepage_url.map(truncate_field);
224
225        let license_types = third_party
226            .get_all_strings("license_type")
227            .into_iter()
228            .map(truncate_field)
229            .collect::<Vec<_>>();
230        if !license_types.is_empty() {
231            package.extracted_license_statement = Some(license_types.join(", "));
232        }
233
234        let identifiers = third_party
235            .get_all_maps("identifier")
236            .into_iter()
237            .map(|identifier| {
238                let type_ = identifier.get_first_string("type").map(truncate_field);
239                let value = identifier.get_first_string("value").map(truncate_field);
240                let mut object = serde_json::Map::new();
241                if let Some(type_) = type_ {
242                    object.insert("type".to_string(), type_.into());
243                }
244                if let Some(value) = &value {
245                    object.insert("value".to_string(), value.clone().into());
246                }
247
248                if package.vcs_url.is_none()
249                    && let (Some(type_), Some(value)) = (
250                        identifier.get_first_string("type"),
251                        identifier.get_first_string("value"),
252                    )
253                {
254                    let lower_type = type_.to_ascii_lowercase();
255                    if lower_type.contains("git") {
256                        package.vcs_url = Some(truncate_field(value));
257                    } else if lower_type.contains("archive")
258                        || lower_type.contains("tar")
259                        || lower_type.contains("zip")
260                    {
261                        package.download_url = Some(truncate_field(value));
262                    }
263                }
264
265                serde_json::Value::Object(object)
266            })
267            .collect::<Vec<_>>();
268
269        for (type_, value) in &url_entries {
270            let Some(value) = value else {
271                continue;
272            };
273
274            match type_.as_deref().map(str::to_ascii_lowercase).as_deref() {
275                Some("git") if package.vcs_url.is_none() => {
276                    package.vcs_url = Some(value.clone());
277                }
278                Some("archive") if package.download_url.is_none() => {
279                    package.download_url = Some(value.clone());
280                }
281                Some("homepage") if package.homepage_url.is_none() => {
282                    package.homepage_url = Some(value.clone());
283                }
284                _ => {}
285            }
286        }
287
288        let mut extra_data = HashMap::new();
289        if !identifiers.is_empty() {
290            extra_data.insert("identifiers".to_string(), identifiers.into());
291        }
292        if !url_entries.is_empty() {
293            extra_data.insert(
294                "urls".to_string(),
295                url_entries
296                    .iter()
297                    .map(|(type_, value)| {
298                        let mut object = serde_json::Map::new();
299                        if let Some(type_) = type_ {
300                            object.insert("type".to_string(), type_.clone().into());
301                        }
302                        if let Some(value) = value {
303                            object.insert("value".to_string(), value.clone().into());
304                        }
305                        serde_json::Value::Object(object)
306                    })
307                    .collect::<Vec<_>>()
308                    .into(),
309            );
310        }
311
312        if let Some(last_upgrade_date) = third_party.get_first_map("last_upgrade_date") {
313            let year = last_upgrade_date.get_first_string("year");
314            let month = last_upgrade_date.get_first_string("month");
315            let day = last_upgrade_date.get_first_string("day");
316            if let (Some(year), Some(month), Some(day)) = (year, month, day) {
317                let formatted = format!(
318                    "{:04}-{:02}-{:02}",
319                    year.parse::<u32>().unwrap_or_default(),
320                    month.parse::<u32>().unwrap_or_default(),
321                    day.parse::<u32>().unwrap_or_default()
322                );
323                extra_data.insert(
324                    "last_upgrade_date".to_string(),
325                    truncate_field(formatted).into(),
326                );
327            }
328        }
329
330        if let Some(upstream_url) = third_party.get_first_string("url") {
331            extra_data.insert(
332                "upstream_url".to_string(),
333                truncate_field(upstream_url).into(),
334            );
335        }
336
337        if !extra_data.is_empty() {
338            package.extra_data = Some(extra_data);
339        }
340    }
341
342    package
343}
344
345fn parse_manifest_bytes(bytes: &[u8], datasource_id: DatasourceId, context: &str) -> PackageData {
346    if looks_like_text_xml(bytes) {
347        match parse_text_manifest_bytes(bytes, datasource_id) {
348            Ok(package) => return package,
349            Err(error) => warn!("Failed to parse {} as text XML: {}", context, error),
350        }
351    }
352
353    parse_binary_manifest_bytes(bytes, datasource_id).unwrap_or_else(|error| {
354        warn!(
355            "Failed to parse {} as binary Android XML: {}",
356            context, error
357        );
358        default_package_data(datasource_id)
359    })
360}
361
362fn looks_like_text_xml(bytes: &[u8]) -> bool {
363    bytes
364        .iter()
365        .find(|byte| !byte.is_ascii_whitespace())
366        .is_some_and(|byte| *byte == b'<')
367}
368
369fn parse_text_manifest_bytes(
370    bytes: &[u8],
371    datasource_id: DatasourceId,
372) -> Result<PackageData, String> {
373    let content = String::from_utf8(bytes.to_vec())
374        .map_err(|error| format!("Invalid UTF-8 in AndroidManifest.xml: {}", error))?;
375
376    let mut reader = Reader::from_str(&content);
377    reader.config_mut().trim_text(true);
378
379    let mut buf = Vec::new();
380    let mut manifest_attributes = HashMap::new();
381    let mut uses_sdk_attributes = HashMap::new();
382    let mut application_attributes = HashMap::new();
383    let mut requested_permissions = Vec::new();
384    let mut uses_libraries = Vec::new();
385    let mut iteration_count = 0usize;
386
387    loop {
388        iteration_count += 1;
389        if iteration_count > MAX_ITERATION_COUNT {
390            return Err(format!(
391                "Exceeded MAX_ITERATION_COUNT ({}) while parsing AndroidManifest.xml",
392                MAX_ITERATION_COUNT
393            ));
394        }
395
396        match reader.read_event_into(&mut buf) {
397            Ok(Event::Start(event)) | Ok(Event::Empty(event)) => {
398                let name = String::from_utf8_lossy(event.name().as_ref()).into_owned();
399                let attributes = xml_attributes_to_map(&reader, &event)?;
400                match name.as_str() {
401                    "manifest" if manifest_attributes.is_empty() => {
402                        manifest_attributes = attributes
403                    }
404                    "uses-sdk" => uses_sdk_attributes = attributes,
405                    "application" if application_attributes.is_empty() => {
406                        application_attributes = attributes;
407                    }
408                    "uses-permission" | "uses-permission-sdk-23" => {
409                        if let Some(permission) = attributes.get("android:name") {
410                            requested_permissions.push(permission.clone());
411                        }
412                    }
413                    "uses-library" => {
414                        if let Some(library_name) = attributes.get("android:name") {
415                            uses_libraries.push(library_name.clone());
416                        }
417                    }
418                    _ => {}
419                }
420            }
421            Ok(Event::Eof) => break,
422            Err(error) => {
423                return Err(format!(
424                    "XML parse error at position {}: {}",
425                    reader.buffer_position(),
426                    error
427                ));
428            }
429            _ => {}
430        }
431
432        buf.clear();
433    }
434
435    Ok(build_manifest_package_data(
436        datasource_id,
437        &manifest_attributes,
438        &uses_sdk_attributes,
439        &application_attributes,
440        requested_permissions,
441        uses_libraries,
442    ))
443}
444
445fn xml_attributes_to_map(
446    reader: &Reader<&[u8]>,
447    event: &quick_xml::events::BytesStart<'_>,
448) -> Result<HashMap<String, String>, String> {
449    let mut attributes = HashMap::new();
450
451    for attribute in event.attributes().flatten().take(MAX_ITERATION_COUNT) {
452        let key = String::from_utf8_lossy(attribute.key.as_ref()).into_owned();
453        let value = attribute
454            .decode_and_unescape_value(reader.decoder())
455            .map_err(|error| format!("Failed to decode XML attribute {}: {}", key, error))?
456            .into_owned();
457        attributes.insert(key, truncate_field(value));
458    }
459
460    Ok(attributes)
461}
462
463fn parse_binary_manifest_bytes(
464    bytes: &[u8],
465    datasource_id: DatasourceId,
466) -> Result<PackageData, String> {
467    let axml = std::panic::catch_unwind(|| parse_from_reader(Cursor::new(bytes.to_vec())))
468        .map_err(|_| "rusty-axml panicked while parsing binary Android XML".to_string())?
469        .map_err(|error| format!("rusty-axml parse failure: {}", error))?;
470
471    let manifest_attributes =
472        normalize_binary_attributes(axml.root().borrow().attributes().clone());
473    let uses_sdk_attributes = find_nodes_by_type(&axml, "uses-sdk")
474        .into_iter()
475        .next()
476        .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
477        .unwrap_or_default();
478    let application_attributes = find_nodes_by_type(&axml, "application")
479        .into_iter()
480        .next()
481        .map(|node| normalize_binary_attributes(node.borrow().attributes().clone()))
482        .unwrap_or_default();
483
484    let requested_permissions = get_requested_permissions(&axml)
485        .into_iter()
486        .map(truncate_field)
487        .collect::<Vec<_>>();
488    let uses_libraries = find_nodes_by_type(&axml, "uses-library")
489        .into_iter()
490        .filter_map(|node| node.borrow().get_attr("android:name").map(str::to_string))
491        .map(truncate_field)
492        .collect::<Vec<_>>();
493
494    Ok(build_manifest_package_data(
495        datasource_id,
496        &manifest_attributes,
497        &uses_sdk_attributes,
498        &application_attributes,
499        requested_permissions,
500        uses_libraries,
501    ))
502}
503
504fn build_manifest_package_data(
505    datasource_id: DatasourceId,
506    manifest_attributes: &HashMap<String, String>,
507    uses_sdk_attributes: &HashMap<String, String>,
508    application_attributes: &HashMap<String, String>,
509    requested_permissions: Vec<String>,
510    uses_libraries: Vec<String>,
511) -> PackageData {
512    let mut package = default_package_data(datasource_id);
513    package.name = manifest_attributes.get("package").cloned();
514    package.version = manifest_attributes
515        .get("android:versionName")
516        .cloned()
517        .or_else(|| manifest_attributes.get("android:versionCode").cloned());
518
519    package.description = application_attributes
520        .get("android:label")
521        .filter(|label| {
522            !label.starts_with('@') && !label.chars().all(|character| character.is_ascii_digit())
523        })
524        .cloned();
525
526    let mut extra_data = HashMap::new();
527    insert_extra(
528        &mut extra_data,
529        "version_code",
530        manifest_attributes.get("android:versionCode"),
531    );
532    insert_extra(
533        &mut extra_data,
534        "compile_sdk_version",
535        manifest_attributes.get("android:compileSdkVersion"),
536    );
537    insert_extra(
538        &mut extra_data,
539        "compile_sdk_version_codename",
540        manifest_attributes.get("android:compileSdkVersionCodename"),
541    );
542    insert_extra(
543        &mut extra_data,
544        "platform_build_version_code",
545        manifest_attributes.get("platformBuildVersionCode"),
546    );
547    insert_extra(
548        &mut extra_data,
549        "platform_build_version_name",
550        manifest_attributes.get("platformBuildVersionName"),
551    );
552    insert_extra(
553        &mut extra_data,
554        "min_sdk_version",
555        uses_sdk_attributes.get("android:minSdkVersion"),
556    );
557    insert_extra(
558        &mut extra_data,
559        "target_sdk_version",
560        uses_sdk_attributes.get("android:targetSdkVersion"),
561    );
562    insert_extra(
563        &mut extra_data,
564        "max_sdk_version",
565        uses_sdk_attributes.get("android:maxSdkVersion"),
566    );
567
568    if !requested_permissions.is_empty() {
569        extra_data.insert(
570            "requested_permissions".to_string(),
571            requested_permissions
572                .into_iter()
573                .map(serde_json::Value::from)
574                .collect::<Vec<_>>()
575                .into(),
576        );
577    }
578    if !uses_libraries.is_empty() {
579        extra_data.insert(
580            "uses_libraries".to_string(),
581            uses_libraries
582                .into_iter()
583                .map(serde_json::Value::from)
584                .collect::<Vec<_>>()
585                .into(),
586        );
587    }
588
589    if !extra_data.is_empty() {
590        package.extra_data = Some(extra_data);
591    }
592
593    package
594}
595
596fn normalize_binary_attributes(attributes: HashMap<String, String>) -> HashMap<String, String> {
597    attributes
598        .into_iter()
599        .map(|(key, value)| (key, normalize_binary_attribute_value(&value)))
600        .collect()
601}
602
603fn normalize_binary_attribute_value(value: &str) -> String {
604    let hex_value = value
605        .strip_prefix("(type 0x10) 0x")
606        .or_else(|| value.strip_prefix("0x"));
607
608    if let Some(hex_value) = hex_value
609        && let Ok(parsed) = u64::from_str_radix(hex_value, 16)
610    {
611        return parsed.to_string();
612    }
613
614    value.to_string()
615}
616
617fn insert_extra(
618    extra_data: &mut HashMap<String, serde_json::Value>,
619    key: &str,
620    value: Option<&String>,
621) {
622    if let Some(value) = value {
623        extra_data.insert(key.to_string(), truncate_field(value.clone()).into());
624    }
625}
626
627fn read_best_zip_entry<F>(
628    path: &Path,
629    mut rank_entry: F,
630) -> Result<Option<(String, Vec<u8>)>, String>
631where
632    F: FnMut(&str) -> Option<u8>,
633{
634    let metadata = fs::metadata(path)
635        .map_err(|error| format!("Failed to stat archive {:?}: {}", path, error))?;
636    if metadata.len() > MAX_ARCHIVE_SIZE {
637        return Err(format!(
638            "Archive {:?} is {} bytes, exceeding the {} byte limit",
639            path,
640            metadata.len(),
641            MAX_ARCHIVE_SIZE
642        ));
643    }
644
645    let file = File::open(path)
646        .map_err(|error| format!("Failed to open archive {:?}: {}", path, error))?;
647    let mut archive = ZipArchive::new(file)
648        .map_err(|error| format!("Failed to parse ZIP archive {:?}: {}", path, error))?;
649
650    let mut total_uncompressed = 0u64;
651    let mut best: Option<(u8, String, Vec<u8>)> = None;
652    let entry_count = archive.len().min(MAX_ITERATION_COUNT);
653
654    if archive.len() > MAX_ITERATION_COUNT {
655        warn!(
656            "Archive {:?} has more than MAX_ITERATION_COUNT ({}) entries; truncating scan",
657            path, MAX_ITERATION_COUNT
658        );
659    }
660
661    for index in 0..entry_count {
662        let mut entry = archive.by_index(index).map_err(|error| {
663            format!(
664                "Failed to read ZIP entry {} in {:?}: {}",
665                index, path, error
666            )
667        })?;
668
669        total_uncompressed = total_uncompressed.saturating_add(entry.size());
670        if total_uncompressed > MAX_TOTAL_UNCOMPRESSED_SIZE {
671            return Err(format!(
672                "Archive {:?} exceeds total uncompressed size limit of {} bytes",
673                path, MAX_TOTAL_UNCOMPRESSED_SIZE
674            ));
675        }
676
677        let entry_name = entry.name().replace('\\', "/");
678        if entry_name.starts_with('/') || entry_name.split('/').any(|segment| segment == "..") {
679            return Err(format!(
680                "Archive entry {} contains a disallowed path",
681                entry_name
682            ));
683        }
684        let Some(rank) = rank_entry(&entry_name) else {
685            continue;
686        };
687
688        if entry.size() > MAX_FILE_SIZE {
689            return Err(format!(
690                "Archive entry {} is {} bytes, exceeding the {} byte limit",
691                entry_name,
692                entry.size(),
693                MAX_FILE_SIZE
694            ));
695        }
696
697        let compressed_size = entry.compressed_size();
698        if compressed_size > 0 {
699            let ratio = entry.size() as f64 / compressed_size as f64;
700            if ratio > MAX_COMPRESSION_RATIO {
701                return Err(format!(
702                    "Archive entry {} has suspicious compression ratio {:.2}:1",
703                    entry_name, ratio
704                ));
705            }
706        }
707
708        let should_replace = match &best {
709            Some((best_rank, _, _)) => rank < *best_rank,
710            None => true,
711        };
712
713        if should_replace {
714            let mut bytes = Vec::with_capacity(entry.size() as usize);
715            entry.read_to_end(&mut bytes).map_err(|error| {
716                format!("Failed to read archive entry {}: {}", entry_name, error)
717            })?;
718            best = Some((rank, entry_name, bytes));
719        }
720    }
721
722    Ok(best.map(|(_, entry_name, bytes)| (entry_name, bytes)))
723}
724
725fn parse_proto_manifest_bytes(bytes: &[u8]) -> Result<PackageData, String> {
726    let node =
727        ProtoXmlNode::decode(bytes).map_err(|error| format!("prost decode failure: {}", error))?;
728    let root_element = node
729        .element()
730        .ok_or_else(|| "Proto manifest root is not an element".to_string())?;
731    if root_element.name != "manifest" {
732        return Err(format!(
733            "Unexpected proto XML root element: {}",
734            root_element.name
735        ));
736    }
737
738    let manifest_attributes = proto_attributes_to_map(&root_element.attribute);
739    let uses_sdk_attributes = root_element
740        .child_elements_named("uses-sdk")
741        .next()
742        .map(|element| proto_attributes_to_map(&element.attribute))
743        .unwrap_or_default();
744    let application_attributes = root_element
745        .child_elements_named("application")
746        .next()
747        .map(|element| proto_attributes_to_map(&element.attribute))
748        .unwrap_or_default();
749    let requested_permissions = root_element
750        .child_elements_named_any(&["uses-permission", "uses-permission-sdk-23"])
751        .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
752        .collect::<Vec<_>>();
753    let uses_libraries = root_element
754        .child_elements_named("uses-library")
755        .filter_map(|element| proto_attributes_to_map(&element.attribute).remove("android:name"))
756        .collect::<Vec<_>>();
757
758    let mut package = build_manifest_package_data(
759        DatasourceId::AndroidAab,
760        &manifest_attributes,
761        &uses_sdk_attributes,
762        &application_attributes,
763        requested_permissions,
764        uses_libraries,
765    );
766
767    if let Some(extra_data) = package.extra_data.as_mut() {
768        extra_data.insert("manifest_encoding".to_string(), "proto".into());
769    } else {
770        package.extra_data = Some(HashMap::from([(
771            "manifest_encoding".to_string(),
772            serde_json::Value::String("proto".to_string()),
773        )]));
774    }
775
776    Ok(package)
777}
778
779fn proto_attributes_to_map(attributes: &[ProtoXmlAttribute]) -> HashMap<String, String> {
780    attributes
781        .iter()
782        .filter_map(|attribute| {
783            let key = proto_attribute_key(attribute)?;
784            let value = proto_attribute_value(attribute)?;
785            Some((key, truncate_field(value)))
786        })
787        .collect()
788}
789
790fn proto_attribute_key(attribute: &ProtoXmlAttribute) -> Option<String> {
791    if attribute.name.is_empty() {
792        return None;
793    }
794
795    if attribute.namespace_uri == ANDROID_XML_NAMESPACE {
796        return Some(format!("android:{}", attribute.name));
797    }
798
799    Some(attribute.name.clone())
800}
801
802fn proto_attribute_value(attribute: &ProtoXmlAttribute) -> Option<String> {
803    if !attribute.value.is_empty() {
804        return Some(attribute.value.clone());
805    }
806
807    attribute
808        .compiled_item
809        .as_ref()
810        .and_then(proto_item_to_string)
811}
812
813fn proto_item_to_string(item: &ProtoItem) -> Option<String> {
814    match &item.value {
815        Some(proto_item::Value::Str(value)) => Some(value.value.clone()),
816        Some(proto_item::Value::RawStr(value)) => Some(value.value.clone()),
817        Some(proto_item::Value::Prim(value)) => proto_primitive_to_string(value),
818        _ => None,
819    }
820}
821
822fn proto_primitive_to_string(primitive: &ProtoPrimitive) -> Option<String> {
823    match &primitive.value {
824        Some(proto_primitive::Value::IntDecimal(value)) => Some(value.to_string()),
825        Some(proto_primitive::Value::IntHexadecimal(value)) => Some(format!("0x{value:x}")),
826        Some(proto_primitive::Value::Boolean(value)) => Some(value.to_string()),
827        Some(proto_primitive::Value::Float(value)) => Some(value.to_string()),
828        Some(proto_primitive::Value::Dimension(value)) => Some(value.to_string()),
829        Some(proto_primitive::Value::Fraction(value)) => Some(value.to_string()),
830        _ => None,
831    }
832}
833
834#[derive(Debug, Clone, Default)]
835struct ProtoMap {
836    fields: HashMap<String, Vec<ProtoValue>>,
837}
838
839#[derive(Debug, Clone)]
840enum ProtoValue {
841    Scalar(String),
842    Map(ProtoMap),
843}
844
845impl ProtoMap {
846    fn get_first_string(&self, key: &str) -> Option<String> {
847        self.fields.get(key).and_then(|values| {
848            values.iter().find_map(|value| match value {
849                ProtoValue::Scalar(value) => Some(value.clone()),
850                ProtoValue::Map(_) => None,
851            })
852        })
853    }
854
855    fn get_all_strings(&self, key: &str) -> Vec<String> {
856        self.fields
857            .get(key)
858            .into_iter()
859            .flatten()
860            .filter_map(|value| match value {
861                ProtoValue::Scalar(value) => Some(value.clone()),
862                ProtoValue::Map(_) => None,
863            })
864            .collect()
865    }
866
867    fn get_first_map(&self, key: &str) -> Option<ProtoMap> {
868        self.fields.get(key).and_then(|values| {
869            values.iter().find_map(|value| match value {
870                ProtoValue::Map(value) => Some(value.clone()),
871                ProtoValue::Scalar(_) => None,
872            })
873        })
874    }
875
876    fn get_all_maps(&self, key: &str) -> Vec<ProtoMap> {
877        self.fields
878            .get(key)
879            .into_iter()
880            .flatten()
881            .filter_map(|value| match value {
882                ProtoValue::Map(value) => Some(value.clone()),
883                ProtoValue::Scalar(_) => None,
884            })
885            .collect()
886    }
887}
888
889fn parse_textproto_map(content: &str) -> Result<ProtoMap, String> {
890    let mut parser = TextProtoParser::new(content)?;
891    parser.parse_map(false)
892}
893
894struct TextProtoParser {
895    tokens: Vec<TextProtoToken>,
896    position: usize,
897}
898
899#[derive(Debug, Clone)]
900enum TextProtoToken {
901    Identifier(String),
902    String(String),
903    Colon,
904    LBrace,
905    RBrace,
906}
907
908impl TextProtoParser {
909    fn new(content: &str) -> Result<Self, String> {
910        Ok(Self {
911            tokens: tokenize_textproto(content)?,
912            position: 0,
913        })
914    }
915
916    fn parse_map(&mut self, stop_on_rbrace: bool) -> Result<ProtoMap, String> {
917        let mut map = ProtoMap::default();
918
919        while let Some(token) = self.peek() {
920            match token {
921                TextProtoToken::RBrace if stop_on_rbrace => {
922                    self.position += 1;
923                    break;
924                }
925                TextProtoToken::RBrace => return Err("Unexpected closing brace".to_string()),
926                TextProtoToken::Identifier(_) => {
927                    let key = self.expect_identifier()?;
928                    match self.peek() {
929                        Some(TextProtoToken::Colon) => {
930                            self.position += 1;
931                            let value = self.expect_scalar()?;
932                            map.fields
933                                .entry(key)
934                                .or_default()
935                                .push(ProtoValue::Scalar(truncate_field(value)));
936                        }
937                        Some(TextProtoToken::LBrace) => {
938                            self.position += 1;
939                            let value = self.parse_map(true)?;
940                            map.fields
941                                .entry(key)
942                                .or_default()
943                                .push(ProtoValue::Map(value));
944                        }
945                        Some(other) => {
946                            return Err(format!("Unexpected token after key: {:?}", other));
947                        }
948                        None => return Err("Unexpected end of input after key".to_string()),
949                    }
950                }
951                other => return Err(format!("Unexpected token in textproto: {:?}", other)),
952            }
953        }
954
955        Ok(map)
956    }
957
958    fn expect_identifier(&mut self) -> Result<String, String> {
959        match self.next() {
960            Some(TextProtoToken::Identifier(value)) => Ok(value),
961            other => Err(format!("Expected identifier, found {:?}", other)),
962        }
963    }
964
965    fn expect_scalar(&mut self) -> Result<String, String> {
966        match self.next() {
967            Some(TextProtoToken::String(mut value)) => {
968                while matches!(self.peek(), Some(TextProtoToken::String(_))) {
969                    if let Some(TextProtoToken::String(next)) = self.next() {
970                        value.push_str(&next);
971                    }
972                }
973                Ok(value)
974            }
975            Some(TextProtoToken::Identifier(value)) => Ok(value),
976            other => Err(format!("Expected scalar value, found {:?}", other)),
977        }
978    }
979
980    fn peek(&self) -> Option<&TextProtoToken> {
981        self.tokens.get(self.position)
982    }
983
984    fn next(&mut self) -> Option<TextProtoToken> {
985        let token = self.tokens.get(self.position).cloned();
986        if token.is_some() {
987            self.position += 1;
988        }
989        token
990    }
991}
992
993fn tokenize_textproto(content: &str) -> Result<Vec<TextProtoToken>, String> {
994    let mut tokens = Vec::new();
995    let chars = content.chars().collect::<Vec<_>>();
996    let mut index = 0usize;
997
998    while index < chars.len() {
999        match chars[index] {
1000            '{' => {
1001                tokens.push(TextProtoToken::LBrace);
1002                index += 1;
1003            }
1004            '}' => {
1005                tokens.push(TextProtoToken::RBrace);
1006                index += 1;
1007            }
1008            ':' => {
1009                tokens.push(TextProtoToken::Colon);
1010                index += 1;
1011            }
1012            '"' => {
1013                index += 1;
1014                let mut value = String::new();
1015                while index < chars.len() {
1016                    match chars[index] {
1017                        '\\' if index + 1 < chars.len() => {
1018                            index += 1;
1019                            value.push(chars[index]);
1020                            index += 1;
1021                        }
1022                        '"' => {
1023                            index += 1;
1024                            break;
1025                        }
1026                        character => {
1027                            value.push(character);
1028                            index += 1;
1029                        }
1030                    }
1031                }
1032                tokens.push(TextProtoToken::String(value));
1033            }
1034            '#' => {
1035                while index < chars.len() && chars[index] != '\n' {
1036                    index += 1;
1037                }
1038            }
1039            '/' if index + 1 < chars.len() && chars[index + 1] == '/' => {
1040                index += 2;
1041                while index < chars.len() && chars[index] != '\n' {
1042                    index += 1;
1043                }
1044            }
1045            character if character.is_ascii_whitespace() => index += 1,
1046            _ => {
1047                let start = index;
1048                while index < chars.len() {
1049                    let character = chars[index];
1050                    let starts_comment =
1051                        character == '/' && index + 1 < chars.len() && chars[index + 1] == '/';
1052
1053                    if character.is_ascii_whitespace()
1054                        || matches!(character, '{' | '}' | ':' | '#')
1055                        || starts_comment
1056                    {
1057                        break;
1058                    }
1059
1060                    index += 1;
1061                }
1062
1063                let token = chars[start..index].iter().collect::<String>();
1064                if token.is_empty() {
1065                    return Err("Encountered empty textproto token".to_string());
1066                }
1067                tokens.push(TextProtoToken::Identifier(token));
1068            }
1069        }
1070    }
1071
1072    Ok(tokens)
1073}
1074
1075#[derive(Clone, PartialEq, Message)]
1076pub(crate) struct ProtoSourcePosition {
1077    #[prost(uint32, tag = "1")]
1078    pub line_number: u32,
1079    #[prost(uint32, tag = "2")]
1080    pub column_number: u32,
1081}
1082
1083#[derive(Clone, PartialEq, Message)]
1084pub(crate) struct ProtoXmlNode {
1085    #[prost(oneof = "proto_xml_node::Node", tags = "1, 2")]
1086    pub node: Option<proto_xml_node::Node>,
1087    #[prost(message, optional, tag = "3")]
1088    pub source: Option<ProtoSourcePosition>,
1089}
1090
1091impl ProtoXmlNode {
1092    fn element(&self) -> Option<&ProtoXmlElement> {
1093        match &self.node {
1094            Some(proto_xml_node::Node::Element(element)) => Some(element),
1095            _ => None,
1096        }
1097    }
1098}
1099
1100pub(crate) mod proto_xml_node {
1101    use super::ProtoXmlElement;
1102    use prost::Oneof;
1103
1104    #[derive(Clone, PartialEq, Oneof)]
1105    pub enum Node {
1106        #[prost(message, tag = "1")]
1107        Element(ProtoXmlElement),
1108        #[prost(string, tag = "2")]
1109        Text(String),
1110    }
1111}
1112
1113#[derive(Clone, PartialEq, Message)]
1114pub(crate) struct ProtoXmlElement {
1115    #[prost(message, repeated, tag = "1")]
1116    pub namespace_declaration: Vec<ProtoXmlNamespace>,
1117    #[prost(string, tag = "2")]
1118    pub namespace_uri: String,
1119    #[prost(string, tag = "3")]
1120    pub name: String,
1121    #[prost(message, repeated, tag = "4")]
1122    pub attribute: Vec<ProtoXmlAttribute>,
1123    #[prost(message, repeated, tag = "5")]
1124    pub child: Vec<ProtoXmlNode>,
1125}
1126
1127impl ProtoXmlElement {
1128    fn child_elements_named<'a>(
1129        &'a self,
1130        name: &'a str,
1131    ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1132        self.child
1133            .iter()
1134            .filter_map(ProtoXmlNode::element)
1135            .filter(move |element| element.name == name)
1136    }
1137
1138    fn child_elements_named_any<'a>(
1139        &'a self,
1140        names: &'a [&'a str],
1141    ) -> impl Iterator<Item = &'a ProtoXmlElement> {
1142        self.child
1143            .iter()
1144            .filter_map(ProtoXmlNode::element)
1145            .filter(move |element| names.contains(&element.name.as_str()))
1146    }
1147}
1148
1149#[derive(Clone, PartialEq, Message)]
1150pub(crate) struct ProtoXmlNamespace {
1151    #[prost(string, tag = "1")]
1152    pub prefix: String,
1153    #[prost(string, tag = "2")]
1154    pub uri: String,
1155    #[prost(message, optional, tag = "3")]
1156    pub source: Option<ProtoSourcePosition>,
1157}
1158
1159#[derive(Clone, PartialEq, Message)]
1160pub(crate) struct ProtoXmlAttribute {
1161    #[prost(string, tag = "1")]
1162    pub namespace_uri: String,
1163    #[prost(string, tag = "2")]
1164    pub name: String,
1165    #[prost(string, tag = "3")]
1166    pub value: String,
1167    #[prost(message, optional, tag = "4")]
1168    pub source: Option<ProtoSourcePosition>,
1169    #[prost(uint32, tag = "5")]
1170    pub resource_id: u32,
1171    #[prost(message, optional, tag = "6")]
1172    pub compiled_item: Option<ProtoItem>,
1173}
1174
1175#[derive(Clone, PartialEq, Message)]
1176pub(crate) struct ProtoItem {
1177    #[prost(oneof = "proto_item::Value", tags = "2, 3, 7")]
1178    pub value: Option<proto_item::Value>,
1179    #[prost(uint32, tag = "8")]
1180    pub flag_status: u32,
1181    #[prost(bool, tag = "9")]
1182    pub flag_negated: bool,
1183    #[prost(string, tag = "10")]
1184    pub flag_name: String,
1185}
1186
1187pub(crate) mod proto_item {
1188    use super::{ProtoPrimitive, ProtoRawStringValue, ProtoStringValue};
1189    use prost::Oneof;
1190
1191    #[derive(Clone, PartialEq, Oneof)]
1192    pub enum Value {
1193        #[prost(message, tag = "2")]
1194        Str(ProtoStringValue),
1195        #[prost(message, tag = "3")]
1196        RawStr(ProtoRawStringValue),
1197        #[prost(message, tag = "7")]
1198        Prim(ProtoPrimitive),
1199    }
1200}
1201
1202#[derive(Clone, PartialEq, Message)]
1203pub(crate) struct ProtoStringValue {
1204    #[prost(string, tag = "1")]
1205    pub value: String,
1206}
1207
1208#[derive(Clone, PartialEq, Message)]
1209pub(crate) struct ProtoRawStringValue {
1210    #[prost(string, tag = "1")]
1211    pub value: String,
1212}
1213
1214#[derive(Clone, PartialEq, Message)]
1215pub(crate) struct ProtoPrimitive {
1216    #[prost(oneof = "proto_primitive::Value", tags = "3, 6, 7, 8, 13, 14")]
1217    pub value: Option<proto_primitive::Value>,
1218}
1219
1220pub(crate) mod proto_primitive {
1221    use prost::Oneof;
1222
1223    #[derive(Clone, PartialEq, Oneof)]
1224    pub enum Value {
1225        #[prost(float, tag = "3")]
1226        Float(f32),
1227        #[prost(int32, tag = "6")]
1228        IntDecimal(i32),
1229        #[prost(uint32, tag = "7")]
1230        IntHexadecimal(u32),
1231        #[prost(bool, tag = "8")]
1232        Boolean(bool),
1233        #[prost(uint32, tag = "13")]
1234        Dimension(u32),
1235        #[prost(uint32, tag = "14")]
1236        Fraction(u32),
1237    }
1238}
1239
1240crate::register_parser!(
1241    "Android Soong METADATA textproto",
1242    &["**/METADATA"],
1243    "android",
1244    "",
1245    Some(
1246        "https://android.googlesource.com/platform/build/soong/+/refs/heads/main/licenses/metadata/metadata_file.proto"
1247    ),
1248);
1249
1250crate::register_parser!(
1251    "AndroidManifest.xml metadata (text XML or binary AXML)",
1252    &["**/AndroidManifest.xml"],
1253    "android",
1254    "XML",
1255    Some("https://developer.android.com/guide/topics/manifest/manifest-intro"),
1256);
1257
1258crate::register_parser!(
1259    "Android APK archive manifest metadata",
1260    &["**/*.apk"],
1261    "android",
1262    "",
1263    Some("https://developer.android.com/build/build-for-release"),
1264);
1265
1266crate::register_parser!(
1267    "Android App Bundle (.aab) proto manifest metadata",
1268    &["**/*.aab"],
1269    "android",
1270    "",
1271    Some("https://developer.android.com/guide/app-bundle"),
1272);