Skip to main content

provenant/parsers/debian/
deb.rs

1use std::path::Path;
2
3use crate::models::{DatasourceId, FileReference, Md5Digest, PackageData, PackageType};
4use crate::parser_warn as warn;
5use crate::parsers::rfc822;
6use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
7
8use super::control::build_package_from_paragraph;
9use super::copyright::parse_copyright_file;
10use super::utils::build_debian_purl;
11use super::{
12    IGNORED_ROOT_DIRS, MAX_ARCHIVE_SIZE, MAX_COMPRESSION_RATIO, MAX_FILE_SIZE, PACKAGE_TYPE,
13    default_package_data,
14};
15use crate::parsers::PackageParser;
16
17/// Parser for Debian binary package archives (.deb files)
18pub struct DebianDebParser;
19
20impl PackageParser for DebianDebParser {
21    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
22
23    fn is_match(path: &Path) -> bool {
24        path.extension().and_then(|e| e.to_str()) == Some("deb")
25    }
26
27    fn extract_packages(path: &Path) -> Vec<PackageData> {
28        // Try to extract metadata from archive contents first
29        if let Ok(data) = extract_deb_archive(path) {
30            return vec![data];
31        }
32
33        // Fallback to filename parsing
34        let filename = match path.file_name().and_then(|n| n.to_str()) {
35            Some(f) => f,
36            None => {
37                return vec![default_package_data(DatasourceId::DebianDeb)];
38            }
39        };
40
41        vec![parse_deb_filename(filename)]
42    }
43}
44
45crate::register_parser!(
46    "Debian binary package archive (.deb)",
47    &["**/*.deb"],
48    "deb",
49    "",
50    Some("https://www.debian.org/doc/debian-policy/ch-binary.html"),
51);
52
53fn extract_deb_archive(path: &Path) -> Result<PackageData, String> {
54    use flate2::read::GzDecoder;
55    use liblzma::read::XzDecoder;
56    use std::io::{Cursor, Read};
57
58    let file_metadata =
59        std::fs::metadata(path).map_err(|e| format!("Failed to stat .deb file: {}", e))?;
60    if file_metadata.len() > MAX_ARCHIVE_SIZE {
61        return Err(format!(
62            ".deb file exceeds MAX_ARCHIVE_SIZE ({} bytes)",
63            file_metadata.len()
64        ));
65    }
66    let compressed_size = file_metadata.len() as usize;
67
68    let file = std::fs::File::open(path).map_err(|e| format!("Failed to open .deb file: {}", e))?;
69
70    let mut archive = ar::Archive::new(file);
71    let mut package: Option<PackageData> = None;
72    let mut total_extracted: usize = 0;
73
74    while let Some(entry_result) = archive.next_entry() {
75        let entry = entry_result.map_err(|e| format!("Failed to read ar entry: {}", e))?;
76
77        let entry_name_raw = entry.header().identifier();
78        let entry_name = String::from_utf8_lossy(entry_name_raw);
79        let had_replacement = entry_name_raw.iter().any(|&b| b > 127);
80        if had_replacement {
81            warn!(
82                "extract_deb_archive: non-UTF-8 bytes in entry name replaced with lossy conversion"
83            );
84        }
85        let entry_name = entry_name.trim().to_string();
86
87        if entry_name == "control.tar.gz" || entry_name.starts_with("control.tar") {
88            let entry_size = entry.header().size();
89            if entry_size > MAX_FILE_SIZE {
90                warn!(
91                    "extract_deb_archive: control tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
92                    entry_size
93                );
94                continue;
95            }
96            let mut control_data = Vec::new();
97            entry
98                .take(MAX_FILE_SIZE)
99                .read_to_end(&mut control_data)
100                .map_err(|e| format!("Failed to read control.tar.gz: {}", e))?;
101
102            total_extracted += control_data.len();
103            if compressed_size > 0 && total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
104                warn!(
105                    "extract_deb_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
106                );
107                break;
108            }
109            if total_extracted > MAX_ARCHIVE_SIZE as usize {
110                warn!(
111                    "extract_deb_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
112                );
113                break;
114            }
115
116            if entry_name.ends_with(".gz") {
117                let decoder = GzDecoder::new(Cursor::new(control_data));
118                if let Some(parsed_package) =
119                    parse_control_tar_archive(decoder, &mut total_extracted, compressed_size)?
120                {
121                    package = Some(parsed_package);
122                }
123            } else if entry_name.ends_with(".xz") {
124                let decoder = XzDecoder::new(Cursor::new(control_data));
125                if let Some(parsed_package) =
126                    parse_control_tar_archive(decoder, &mut total_extracted, compressed_size)?
127                {
128                    package = Some(parsed_package);
129                }
130            }
131        } else if entry_name.starts_with("data.tar") {
132            let entry_size = entry.header().size();
133            if entry_size > MAX_FILE_SIZE {
134                warn!(
135                    "extract_deb_archive: data tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
136                    entry_size
137                );
138                continue;
139            }
140            let mut data = Vec::new();
141            entry
142                .take(MAX_FILE_SIZE)
143                .read_to_end(&mut data)
144                .map_err(|e| format!("Failed to read data archive: {}", e))?;
145
146            total_extracted += data.len();
147            if compressed_size > 0 && total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
148                warn!(
149                    "extract_deb_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
150                );
151                break;
152            }
153            if total_extracted > MAX_ARCHIVE_SIZE as usize {
154                warn!(
155                    "extract_deb_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
156                );
157                break;
158            }
159
160            let Some(current_package) = package.as_mut() else {
161                continue;
162            };
163
164            if entry_name.ends_with(".gz") {
165                let decoder = GzDecoder::new(Cursor::new(data));
166                merge_deb_data_archive(
167                    decoder,
168                    current_package,
169                    &mut total_extracted,
170                    compressed_size,
171                )?;
172            } else if entry_name.ends_with(".xz") {
173                let decoder = XzDecoder::new(Cursor::new(data));
174                merge_deb_data_archive(
175                    decoder,
176                    current_package,
177                    &mut total_extracted,
178                    compressed_size,
179                )?;
180            }
181        }
182    }
183
184    package.ok_or_else(|| ".deb archive does not contain control.tar.* metadata".to_string())
185}
186
187fn parse_control_tar_archive<R: std::io::Read>(
188    reader: R,
189    total_extracted: &mut usize,
190    compressed_size: usize,
191) -> Result<Option<PackageData>, String> {
192    use std::io::Read;
193
194    let mut tar_archive = tar::Archive::new(reader);
195
196    for tar_entry_result in tar_archive
197        .entries()
198        .map_err(|e| format!("Failed to read tar entries: {}", e))?
199    {
200        let tar_entry = tar_entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
201
202        let tar_path = tar_entry
203            .path()
204            .map_err(|e| format!("Failed to get tar path: {}", e))?;
205
206        if tar_path
207            .components()
208            .any(|c| matches!(c, std::path::Component::ParentDir))
209        {
210            warn!(
211                "parse_control_tar_archive: skipping tar entry with path traversal: {:?}",
212                tar_path
213            );
214            continue;
215        }
216
217        if tar_entry.size() > MAX_FILE_SIZE {
218            warn!(
219                "parse_control_tar_archive: tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
220                tar_entry.size()
221            );
222            continue;
223        }
224
225        if tar_path.ends_with("control") {
226            let mut control_content = String::new();
227            tar_entry
228                .take(MAX_FILE_SIZE)
229                .read_to_string(&mut control_content)
230                .map_err(|e| format!("Failed to read control file: {}", e))?;
231
232            *total_extracted += control_content.len();
233            if compressed_size > 0 && *total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
234                warn!(
235                    "parse_control_tar_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
236                );
237                return Ok(None);
238            }
239            if *total_extracted > MAX_ARCHIVE_SIZE as usize {
240                warn!(
241                    "parse_control_tar_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
242                );
243                return Ok(None);
244            }
245
246            let paragraphs = rfc822::parse_rfc822_paragraphs(&control_content);
247            if paragraphs.is_empty() {
248                return Err("No paragraphs in control file".to_string());
249            }
250
251            if let Some(package) =
252                build_package_from_paragraph(&paragraphs[0], None, DatasourceId::DebianDeb)
253            {
254                return Ok(Some(package));
255            }
256
257            return Err("Failed to parse control file".to_string());
258        }
259    }
260
261    Ok(None)
262}
263
264fn merge_deb_data_archive<R: std::io::Read>(
265    reader: R,
266    package: &mut PackageData,
267    total_extracted: &mut usize,
268    compressed_size: usize,
269) -> Result<(), String> {
270    use std::io::Read;
271
272    let mut tar_archive = tar::Archive::new(reader);
273
274    for tar_entry_result in tar_archive
275        .entries()
276        .map_err(|e| format!("Failed to read data tar entries: {}", e))?
277    {
278        let tar_entry =
279            tar_entry_result.map_err(|e| format!("Failed to read data tar entry: {}", e))?;
280
281        let tar_path = tar_entry
282            .path()
283            .map_err(|e| format!("Failed to get data tar path: {}", e))?;
284
285        if tar_path
286            .components()
287            .any(|c| matches!(c, std::path::Component::ParentDir))
288        {
289            warn!(
290                "merge_deb_data_archive: skipping tar entry with path traversal: {:?}",
291                tar_path
292            );
293            continue;
294        }
295
296        if tar_entry.size() > MAX_FILE_SIZE {
297            warn!(
298                "merge_deb_data_archive: tar entry exceeds MAX_FILE_SIZE ({} bytes), skipping",
299                tar_entry.size()
300            );
301            continue;
302        }
303
304        let tar_path_str = tar_path.to_string_lossy();
305
306        if tar_path_str.ends_with(&format!(
307            "/usr/share/doc/{}/copyright",
308            package.name.as_deref().unwrap_or_default()
309        )) || tar_path_str.ends_with(&format!(
310            "usr/share/doc/{}/copyright",
311            package.name.as_deref().unwrap_or_default()
312        )) {
313            let mut copyright_content = String::new();
314            tar_entry
315                .take(MAX_FILE_SIZE)
316                .read_to_string(&mut copyright_content)
317                .map_err(|e| format!("Failed to read copyright file from data tar: {}", e))?;
318
319            *total_extracted += copyright_content.len();
320            if compressed_size > 0 && *total_extracted / compressed_size > MAX_COMPRESSION_RATIO {
321                warn!(
322                    "merge_deb_data_archive: compression ratio exceeded MAX_COMPRESSION_RATIO, stopping"
323                );
324                return Ok(());
325            }
326            if *total_extracted > MAX_ARCHIVE_SIZE as usize {
327                warn!(
328                    "merge_deb_data_archive: cumulative extracted size exceeded MAX_ARCHIVE_SIZE, stopping"
329                );
330                return Ok(());
331            }
332
333            let copyright_pkg = parse_copyright_file(&copyright_content, package.name.as_deref());
334            merge_debian_copyright_into_package(package, &copyright_pkg);
335            break;
336        }
337    }
338
339    Ok(())
340}
341
342pub(super) fn merge_debian_copyright_into_package(
343    target: &mut PackageData,
344    copyright: &PackageData,
345) {
346    if target.extracted_license_statement.is_none() {
347        target.extracted_license_statement = copyright.extracted_license_statement.clone();
348    }
349
350    if target.declared_license_expression.is_none() {
351        target.declared_license_expression = copyright.declared_license_expression.clone();
352    }
353    if target.declared_license_expression_spdx.is_none() {
354        target.declared_license_expression_spdx =
355            copyright.declared_license_expression_spdx.clone();
356    }
357    if target.license_detections.is_empty() {
358        target.license_detections = copyright.license_detections.clone();
359    }
360    if target.other_license_expression.is_none() {
361        target.other_license_expression = copyright.other_license_expression.clone();
362    }
363    if target.other_license_expression_spdx.is_none() {
364        target.other_license_expression_spdx = copyright.other_license_expression_spdx.clone();
365    }
366    if target.other_license_detections.is_empty() {
367        target.other_license_detections = copyright.other_license_detections.clone();
368    }
369
370    for party in &copyright.parties {
371        if !target.parties.iter().any(|existing| {
372            existing.r#type == party.r#type
373                && existing.role == party.role
374                && existing.name == party.name
375                && existing.email == party.email
376                && existing.url == party.url
377                && existing.organization == party.organization
378                && existing.organization_url == party.organization_url
379                && existing.timezone == party.timezone
380        }) {
381            target.parties.push(party.clone());
382        }
383    }
384}
385
386fn parse_deb_filename(filename: &str) -> PackageData {
387    let without_ext = filename.trim_end_matches(".deb");
388
389    let parts: Vec<&str> = without_ext.split('_').collect();
390    if parts.len() < 2 {
391        return default_package_data(DatasourceId::DebianDeb);
392    }
393
394    let name = truncate_field(parts[0].to_string());
395    let version = truncate_field(parts[1].to_string());
396    let architecture = if parts.len() >= 3 {
397        Some(truncate_field(parts[2].to_string()))
398    } else {
399        None
400    };
401
402    let namespace = Some("debian".to_string());
403
404    PackageData {
405        datasource_id: Some(DatasourceId::DebianDeb),
406        package_type: Some(PACKAGE_TYPE),
407        namespace: namespace.clone(),
408        name: Some(name.clone()),
409        version: Some(version.clone()),
410        purl: build_debian_purl(
411            &name,
412            Some(&version),
413            namespace.as_deref(),
414            architecture.as_deref(),
415        ),
416        ..Default::default()
417    }
418}
419
420/// Parser for control files inside extracted .deb control tarballs.
421///
422/// Matches paths like `*/control.tar.gz-extract/control` and
423/// `*/control.tar.xz-extract/control` which are created by ExtractCode
424/// when extracting .deb archives.
425pub struct DebianControlInExtractedDebParser;
426
427impl PackageParser for DebianControlInExtractedDebParser {
428    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
429
430    fn is_match(path: &Path) -> bool {
431        path.file_name()
432            .and_then(|n| n.to_str())
433            .is_some_and(|name| name == "control")
434            && path
435                .to_str()
436                .map(|p| {
437                    p.ends_with("control.tar.gz-extract/control")
438                        || p.ends_with("control.tar.xz-extract/control")
439                })
440                .unwrap_or(false)
441    }
442
443    fn extract_packages(path: &Path) -> Vec<PackageData> {
444        let content = match read_file_to_string(path, None) {
445            Ok(c) => c,
446            Err(e) => {
447                warn!(
448                    "Failed to read control file in extracted deb {:?}: {}",
449                    path, e
450                );
451                return vec![default_package_data(
452                    DatasourceId::DebianControlExtractedDeb,
453                )];
454            }
455        };
456
457        // A control file inside an extracted .deb has a single paragraph
458        // (unlike debian/control which has source + binary paragraphs)
459        let paragraphs = rfc822::parse_rfc822_paragraphs(&content);
460        if paragraphs.is_empty() {
461            return vec![default_package_data(
462                DatasourceId::DebianControlExtractedDeb,
463            )];
464        }
465
466        if let Some(pkg) = build_package_from_paragraph(
467            &paragraphs[0],
468            None,
469            DatasourceId::DebianControlExtractedDeb,
470        ) {
471            vec![pkg]
472        } else {
473            vec![default_package_data(
474                DatasourceId::DebianControlExtractedDeb,
475            )]
476        }
477    }
478}
479
480/// Parser for MD5 checksum files inside extracted .deb control tarballs
481pub struct DebianMd5sumInPackageParser;
482
483impl PackageParser for DebianMd5sumInPackageParser {
484    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
485
486    fn is_match(path: &Path) -> bool {
487        path.file_name()
488            .and_then(|n| n.to_str())
489            .is_some_and(|name| name == "md5sums")
490            && path
491                .to_str()
492                .map(|p| {
493                    p.ends_with("control.tar.gz-extract/md5sums")
494                        || p.ends_with("control.tar.xz-extract/md5sums")
495                })
496                .unwrap_or(false)
497    }
498
499    fn extract_packages(path: &Path) -> Vec<PackageData> {
500        let content = match read_file_to_string(path, None) {
501            Ok(c) => c,
502            Err(e) => {
503                warn!("Failed to read md5sums file {:?}: {}", path, e);
504                return vec![default_package_data(
505                    DatasourceId::DebianMd5SumsInExtractedDeb,
506                )];
507            }
508        };
509
510        let package_name = extract_package_name_from_deb_path(path);
511
512        vec![parse_md5sums_in_package(&content, package_name.as_deref())]
513    }
514}
515
516pub(crate) fn extract_package_name_from_deb_path(path: &Path) -> Option<String> {
517    let parent = path.parent()?;
518    let grandparent = parent.parent()?;
519    let dirname = grandparent.file_name()?.to_str()?;
520    let without_extract = dirname.strip_suffix("-extract")?;
521    let without_deb = without_extract.strip_suffix(".deb")?;
522    let name = without_deb.split('_').next()?;
523
524    Some(name.to_string())
525}
526
527fn parse_md5sums_in_package(content: &str, package_name: Option<&str>) -> PackageData {
528    let mut file_references = Vec::new();
529    let mut count = 0usize;
530
531    for line in content.lines() {
532        count += 1;
533        if count > MAX_ITERATION_COUNT {
534            warn!("parse_md5sums_in_package: exceeded MAX_ITERATION_COUNT lines, stopping");
535            break;
536        }
537        let line = line.trim();
538        if line.is_empty() || line.starts_with('#') {
539            continue;
540        }
541
542        let (md5sum, filepath): (Option<Md5Digest>, &str) = if let Some(idx) = line.find("  ") {
543            (
544                Md5Digest::from_hex(line[..idx].trim()).ok(),
545                line[idx + 2..].trim(),
546            )
547        } else if let Some((hash, path)) = line.split_once(' ') {
548            (Md5Digest::from_hex(hash.trim()).ok(), path.trim())
549        } else {
550            (None, line)
551        };
552
553        if IGNORED_ROOT_DIRS.contains(&filepath) {
554            continue;
555        }
556
557        file_references.push(FileReference {
558            path: filepath.to_string(),
559            size: None,
560            sha1: None,
561            md5: md5sum,
562            sha256: None,
563            sha512: None,
564            extra_data: None,
565        });
566    }
567
568    if file_references.is_empty() {
569        return default_package_data(DatasourceId::DebianMd5SumsInExtractedDeb);
570    }
571
572    let namespace = Some("debian".to_string());
573    let mut package = PackageData {
574        datasource_id: Some(DatasourceId::DebianMd5SumsInExtractedDeb),
575        package_type: Some(PACKAGE_TYPE),
576        namespace: namespace.clone(),
577        name: package_name.map(|s| truncate_field(s.to_string())),
578        file_references,
579        ..Default::default()
580    };
581
582    if let Some(n) = &package.name {
583        package.purl = build_debian_purl(n, None, namespace.as_deref(), None);
584    }
585
586    package
587}
588
589crate::register_parser!(
590    "Debian control file in extracted .deb control tarball",
591    &[
592        "**/control.tar.gz-extract/control",
593        "**/control.tar.xz-extract/control"
594    ],
595    "deb",
596    "",
597    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
598);
599
600crate::register_parser!(
601    "Debian MD5 checksums in extracted .deb control tarball",
602    &[
603        "**/control.tar.gz-extract/md5sums",
604        "**/control.tar.xz-extract/md5sums"
605    ],
606    "deb",
607    "",
608    Some("https://www.debian.org/doc/debian-policy/ch-controlfields.html"),
609);
610
611#[cfg(test)]
612mod tests {
613    use super::*;
614    use crate::models::DatasourceId;
615    use ar::{Builder as ArBuilder, Header as ArHeader};
616    use flate2::Compression;
617    use flate2::write::GzEncoder;
618    use liblzma::write::XzEncoder;
619    use std::io::Cursor;
620    use std::path::PathBuf;
621    use tar::{Builder as TarBuilder, Header as TarHeader};
622    use tempfile::NamedTempFile;
623
624    fn create_synthetic_deb_with_control_tar_xz() -> NamedTempFile {
625        let mut control_tar = Vec::new();
626        {
627            let encoder = XzEncoder::new(&mut control_tar, 6);
628            let mut tar_builder = TarBuilder::new(encoder);
629
630            let control_content = b"Package: synthetic\nVersion: 1.2.3\nArchitecture: amd64\nDescription: Synthetic deb\nHomepage: https://example.com\n";
631            let mut header = TarHeader::new_gnu();
632            header
633                .set_path("control")
634                .expect("control tar path should be valid");
635            header.set_size(control_content.len() as u64);
636            header.set_mode(0o644);
637            header.set_cksum();
638            tar_builder
639                .append(&header, Cursor::new(control_content))
640                .expect("control file should be appended to tar.xz");
641            tar_builder.finish().expect("control tar.xz should finish");
642        }
643
644        let deb = NamedTempFile::new().expect("temp deb file should be created");
645        {
646            let mut builder = ArBuilder::new(
647                deb.reopen()
648                    .expect("temporary deb file should reopen for writing"),
649            );
650
651            let debian_binary = b"2.0\n";
652            let mut debian_binary_header =
653                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
654            debian_binary_header.set_mode(0o100644);
655            builder
656                .append(&debian_binary_header, Cursor::new(debian_binary))
657                .expect("debian-binary entry should be appended");
658
659            let mut control_header =
660                ArHeader::new(b"control.tar.xz".to_vec(), control_tar.len() as u64);
661            control_header.set_mode(0o100644);
662            builder
663                .append(&control_header, Cursor::new(control_tar))
664                .expect("control.tar.xz entry should be appended");
665        }
666
667        deb
668    }
669
670    fn create_synthetic_deb_with_copyright() -> NamedTempFile {
671        let mut control_tar = Vec::new();
672        {
673            let encoder = GzEncoder::new(&mut control_tar, Compression::default());
674            let mut tar_builder = TarBuilder::new(encoder);
675
676            let control_content = b"Package: synthetic\nVersion: 9.9.9\nArchitecture: all\nDescription: Synthetic deb with copyright\n";
677            let mut header = TarHeader::new_gnu();
678            header
679                .set_path("control")
680                .expect("control tar path should be valid");
681            header.set_size(control_content.len() as u64);
682            header.set_mode(0o644);
683            header.set_cksum();
684            tar_builder
685                .append(&header, Cursor::new(control_content))
686                .expect("control file should be appended to tar.gz");
687            tar_builder.finish().expect("control tar.gz should finish");
688        }
689
690        let mut data_tar = Vec::new();
691        {
692            let encoder = GzEncoder::new(&mut data_tar, Compression::default());
693            let mut tar_builder = TarBuilder::new(encoder);
694
695            let copyright = b"Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/\nFiles: *\nCopyright: 2024 Example Org\nLicense: Apache-2.0\n Licensed under the Apache License, Version 2.0.\n";
696            let mut header = TarHeader::new_gnu();
697            header
698                .set_path("./usr/share/doc/synthetic/copyright")
699                .expect("copyright path should be valid");
700            header.set_size(copyright.len() as u64);
701            header.set_mode(0o644);
702            header.set_cksum();
703            tar_builder
704                .append(&header, Cursor::new(copyright))
705                .expect("copyright file should be appended to data tar");
706            tar_builder.finish().expect("data tar.gz should finish");
707        }
708
709        let deb = NamedTempFile::new().expect("temp deb file should be created");
710        {
711            let mut builder = ArBuilder::new(
712                deb.reopen()
713                    .expect("temporary deb file should reopen for writing"),
714            );
715
716            let debian_binary = b"2.0\n";
717            let mut debian_binary_header =
718                ArHeader::new(b"debian-binary".to_vec(), debian_binary.len() as u64);
719            debian_binary_header.set_mode(0o100644);
720            builder
721                .append(&debian_binary_header, Cursor::new(debian_binary))
722                .expect("debian-binary entry should be appended");
723
724            let mut control_header =
725                ArHeader::new(b"control.tar.gz".to_vec(), control_tar.len() as u64);
726            control_header.set_mode(0o100644);
727            builder
728                .append(&control_header, Cursor::new(control_tar))
729                .expect("control.tar.gz entry should be appended");
730
731            let mut data_header = ArHeader::new(b"data.tar.gz".to_vec(), data_tar.len() as u64);
732            data_header.set_mode(0o100644);
733            builder
734                .append(&data_header, Cursor::new(data_tar))
735                .expect("data.tar.gz entry should be appended");
736        }
737
738        deb
739    }
740
741    #[test]
742    fn test_deb_parser_is_match() {
743        assert!(DebianDebParser::is_match(&PathBuf::from("package.deb")));
744        assert!(DebianDebParser::is_match(&PathBuf::from(
745            "libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb"
746        )));
747        assert!(!DebianDebParser::is_match(&PathBuf::from("package.tar.gz")));
748        assert!(!DebianDebParser::is_match(&PathBuf::from("control")));
749    }
750
751    #[test]
752    fn test_parse_deb_filename() {
753        let pkg = parse_deb_filename("nginx_1.18.0-1_amd64.deb");
754        assert_eq!(pkg.name, Some("nginx".to_string()));
755        assert_eq!(pkg.version, Some("1.18.0-1".to_string()));
756
757        let pkg = parse_deb_filename("invalid.deb");
758        assert!(pkg.name.is_none());
759        assert!(pkg.version.is_none());
760    }
761
762    #[test]
763    fn test_parse_deb_filename_with_arch() {
764        let pkg = parse_deb_filename("libapache2-mod-md_2.4.38-3+deb10u10_amd64.deb");
765        assert_eq!(pkg.name, Some("libapache2-mod-md".to_string()));
766        assert_eq!(pkg.version, Some("2.4.38-3+deb10u10".to_string()));
767        assert_eq!(pkg.namespace, Some("debian".to_string()));
768        assert_eq!(
769            pkg.purl,
770            Some("pkg:deb/debian/libapache2-mod-md@2.4.38-3%2Bdeb10u10?arch=amd64".to_string())
771        );
772        assert_eq!(pkg.datasource_id, Some(DatasourceId::DebianDeb));
773    }
774
775    #[test]
776    fn test_parse_deb_filename_without_arch() {
777        let pkg = parse_deb_filename("package_1.0-1_all.deb");
778        assert_eq!(pkg.name, Some("package".to_string()));
779        assert_eq!(pkg.version, Some("1.0-1".to_string()));
780        assert!(pkg.purl.as_ref().unwrap().contains("arch=all"));
781    }
782
783    #[test]
784    fn test_extract_deb_archive() {
785        let test_path = PathBuf::from("testdata/debian/deb/adduser_3.112ubuntu1_all.deb");
786        if !test_path.exists() {
787            return;
788        }
789
790        let pkg = DebianDebParser::extract_first_package(&test_path);
791
792        assert_eq!(pkg.name, Some("adduser".to_string()));
793        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
794        assert_eq!(pkg.namespace, Some("ubuntu".to_string()));
795        assert!(pkg.description.is_some());
796        assert!(!pkg.parties.is_empty());
797
798        assert!(pkg.purl.as_ref().unwrap().contains("adduser"));
799        assert!(pkg.purl.as_ref().unwrap().contains("3.112ubuntu1"));
800    }
801
802    #[test]
803    fn test_deb_parser_xz_control() {
804        let deb = create_synthetic_deb_with_control_tar_xz();
805
806        let pkg = DebianDebParser::extract_first_package(deb.path());
807
808        assert_eq!(pkg.name, Some("synthetic".to_string()));
809        assert_eq!(pkg.version, Some("1.2.3".to_string()));
810        assert_eq!(pkg.description, Some("Synthetic deb".to_string()));
811        assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
812    }
813
814    #[test]
815    fn test_deb_parser_with_copyright() {
816        let deb = create_synthetic_deb_with_copyright();
817
818        let pkg = DebianDebParser::extract_first_package(deb.path());
819
820        assert_eq!(pkg.name, Some("synthetic".to_string()));
821        assert_eq!(
822            pkg.extracted_license_statement,
823            Some("Apache-2.0".to_string())
824        );
825        assert!(pkg.parties.iter().any(|party| {
826            party.role.as_deref() == Some("copyright-holder")
827                && party.name.as_deref() == Some("Example Org")
828        }));
829    }
830
831    #[test]
832    fn test_parse_deb_filename_simple() {
833        let pkg = parse_deb_filename("adduser_3.112ubuntu1_all.deb");
834        assert_eq!(pkg.name, Some("adduser".to_string()));
835        assert_eq!(pkg.version, Some("3.112ubuntu1".to_string()));
836        assert_eq!(pkg.namespace, Some("debian".to_string()));
837    }
838
839    #[test]
840    fn test_parse_deb_filename_invalid() {
841        let pkg = parse_deb_filename("invalid.deb");
842        assert!(pkg.name.is_none());
843        assert!(pkg.version.is_none());
844    }
845}