Skip to main content

provenant/output/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::fs::File;
5use std::io::{self, BufWriter, Write};
6
7use crate::output_schema::Output;
8
9mod cyclonedx;
10mod debian;
11mod html;
12mod jsonl;
13mod public_serialize;
14mod shared;
15mod spdx;
16mod template;
17
18pub(crate) const SPDX_DOCUMENT_NOTICE: &str = "Generated with Provenant and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nProvenant should be considered or used as legal advice. Consult an attorney\nfor legal advice.\nProvenant is a free software code scanning tool.\nVisit https://github.com/mstykow/provenant/ for support and download.\nSPDX License List: 3.27";
19const OUTPUT_BUFFER_SIZE: usize = 1024 * 1024;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
22pub enum OutputFormat {
23    #[default]
24    Json,
25    JsonPretty,
26    Yaml,
27    JsonLines,
28    Debian,
29    Html,
30    CustomTemplate,
31    SpdxTv,
32    SpdxRdf,
33    CycloneDxJson,
34    CycloneDxXml,
35}
36
37#[derive(Debug, Clone, Default)]
38pub struct OutputWriteConfig {
39    pub format: OutputFormat,
40    pub custom_template: Option<String>,
41    pub scanned_path: Option<String>,
42}
43
44pub trait OutputWriter {
45    fn write(
46        &self,
47        output: &Output,
48        writer: &mut dyn Write,
49        config: &OutputWriteConfig,
50    ) -> io::Result<()>;
51}
52
53pub struct FormatWriter {
54    format: OutputFormat,
55}
56
57pub fn writer_for_format(format: OutputFormat) -> FormatWriter {
58    FormatWriter { format }
59}
60
61impl OutputWriter for FormatWriter {
62    fn write(
63        &self,
64        output: &Output,
65        writer: &mut dyn Write,
66        config: &OutputWriteConfig,
67    ) -> io::Result<()> {
68        match self.format {
69            OutputFormat::Json => {
70                serde_json::to_writer(&mut *writer, &public_serialize::PublicOutput(output))
71                    .map_err(shared::io_other)?;
72                writer.write_all(b"\n")
73            }
74            OutputFormat::JsonPretty => {
75                serde_json::to_writer_pretty(&mut *writer, &public_serialize::PublicOutput(output))
76                    .map_err(shared::io_other)?;
77                writer.write_all(b"\n")
78            }
79            OutputFormat::Yaml => write_yaml(output, writer),
80            OutputFormat::JsonLines => jsonl::write_json_lines(output, writer),
81            OutputFormat::Debian => debian::write_debian_copyright(output, writer),
82            OutputFormat::Html => html::write_html_report(output, writer),
83            OutputFormat::CustomTemplate => template::write_custom_template(output, writer, config),
84            OutputFormat::SpdxTv => spdx::write_spdx_tag_value(output, writer, config),
85            OutputFormat::SpdxRdf => spdx::write_spdx_rdf_xml(output, writer, config),
86            OutputFormat::CycloneDxJson => cyclonedx::write_cyclonedx_json(output, writer),
87            OutputFormat::CycloneDxXml => cyclonedx::write_cyclonedx_xml(output, writer),
88        }
89    }
90}
91
92pub fn write_output_file(
93    output_file: &str,
94    output: &Output,
95    config: &OutputWriteConfig,
96) -> io::Result<()> {
97    if output_file == "-" {
98        let stdout = io::stdout();
99        let handle = stdout.lock();
100        let mut writer = BufWriter::with_capacity(OUTPUT_BUFFER_SIZE, handle);
101        writer_for_format(config.format).write(output, &mut writer, config)?;
102        return writer.flush();
103    }
104
105    let file = File::create(output_file)?;
106    let mut writer = BufWriter::with_capacity(OUTPUT_BUFFER_SIZE, file);
107    writer_for_format(config.format).write(output, &mut writer, config)?;
108    writer.flush()
109}
110
111fn write_yaml(output: &Output, writer: &mut dyn Write) -> io::Result<()> {
112    yaml_serde::to_writer(&mut *writer, &public_serialize::PublicOutput(output))
113        .map_err(shared::io_other)?;
114    writer.write_all(b"\n")
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use serde_json::Value;
121    use std::fs;
122
123    use crate::license_detection::MatcherKind;
124    use crate::models::{
125        Author, Copyright, ExtraData, FileInfo, FileType, GitSha1, Header, Holder,
126        LicenseDetection, LineNumber, Match, MatchScore, Md5Digest, OutputEmail, OutputURL,
127        Package, PackageData, PackageUid, Sha1Digest, Sha256Digest, SystemEnvironment,
128    };
129    use crate::output_schema::OutputFileInfo;
130
131    #[test]
132    fn test_yaml_writer_outputs_yaml() {
133        let output = Output::from(&sample_internal_output());
134        let mut bytes = Vec::new();
135        writer_for_format(OutputFormat::Yaml)
136            .write(&output, &mut bytes, &OutputWriteConfig::default())
137            .expect("yaml write should succeed");
138        let rendered = String::from_utf8(bytes).expect("yaml should be utf-8");
139        assert!(rendered.contains("headers:"));
140        assert!(rendered.contains("files:"));
141    }
142
143    #[test]
144    fn test_json_lines_writer_outputs_parseable_lines() {
145        let output = Output::from(&sample_internal_output());
146        let mut bytes = Vec::new();
147        writer_for_format(OutputFormat::JsonLines)
148            .write(&output, &mut bytes, &OutputWriteConfig::default())
149            .expect("json-lines write should succeed");
150
151        let rendered = String::from_utf8(bytes).expect("json-lines should be utf-8");
152        let lines = rendered.lines().collect::<Vec<_>>();
153        assert!(lines.len() >= 2);
154        for line in lines {
155            serde_json::from_str::<Value>(line).expect("each line should be valid json");
156        }
157    }
158
159    #[test]
160    fn test_yaml_writer_emits_license_index_provenance_in_headers() {
161        let output = Output::from(&sample_internal_output());
162        let mut bytes = Vec::new();
163        writer_for_format(OutputFormat::Yaml)
164            .write(&output, &mut bytes, &OutputWriteConfig::default())
165            .expect("yaml write should succeed");
166
167        let rendered = String::from_utf8(bytes).expect("yaml should be utf-8");
168        assert!(rendered.contains("license_index_provenance:"));
169        assert!(rendered.contains("dataset_fingerprint: test-fingerprint"));
170        assert!(rendered.contains("source: embedded-artifact"));
171    }
172
173    #[test]
174    fn test_debian_writer_outputs_dep5_style_document() {
175        let mut internal = sample_internal_output();
176        internal.files[0].detected_license_expression = Some("mit".to_string());
177        internal.files[0].license_detections[0].matches[0].matched_text = Some(
178            "Permission is hereby granted, free of charge, to any person obtaining a copy"
179                .to_string(),
180        );
181        let output = Output::from(&internal);
182
183        let mut bytes = Vec::new();
184        writer_for_format(OutputFormat::Debian)
185            .write(&output, &mut bytes, &OutputWriteConfig::default())
186            .expect("debian write should succeed");
187
188        let rendered = String::from_utf8(bytes).expect("debian output should be utf-8");
189        assert!(rendered.contains(
190            "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"
191        ));
192        assert!(rendered.contains("Comment: Generated with Provenant"));
193        assert!(rendered.contains("Files: src/main.rs"));
194        assert!(rendered.contains("Copyright: Example Org"));
195        assert!(rendered.contains("License: mit"));
196        assert!(rendered.contains(" Permission is hereby granted, free of charge"));
197    }
198
199    #[test]
200    fn test_debian_writer_skips_directories_and_deduplicates_license_texts() {
201        let mut internal = sample_internal_output();
202        internal.files.insert(
203            0,
204            FileInfo::new(
205                "src".to_string(),
206                "src".to_string(),
207                String::new(),
208                "src".to_string(),
209                FileType::Directory,
210                None,
211                None,
212                0,
213                None,
214                None,
215                None,
216                None,
217                None,
218                vec![],
219                None,
220                vec![],
221                vec![],
222                vec![],
223                vec![],
224                vec![],
225                vec![],
226                vec![],
227                vec![],
228                vec![],
229            ),
230        );
231        internal.files[1].detected_license_expression = Some("mit".to_string());
232        internal.files[1].license_detections[0].matches[0].matched_text =
233            Some("Same text".to_string());
234        internal.files[1].license_detections[0].matches.push(Match {
235            license_expression: "mit".to_string(),
236            license_expression_spdx: "MIT".to_string(),
237            from_file: Some("src/main.rs".to_string()),
238            start_line: LineNumber::ONE,
239            end_line: LineNumber::ONE,
240            matcher: MatcherKind::Aho,
241            score: MatchScore::MAX,
242            matched_length: Some(1),
243            match_coverage: Some(100.0),
244            rule_relevance: Some(100),
245            rule_identifier: "mit_rule".to_string(),
246            rule_url: None,
247            matched_text: Some("Same text again".to_string()),
248            referenced_filenames: None,
249            matched_text_diagnostics: None,
250        });
251        let output = Output::from(&internal);
252
253        let mut bytes = Vec::new();
254        writer_for_format(OutputFormat::Debian)
255            .write(&output, &mut bytes, &OutputWriteConfig::default())
256            .expect("debian write should succeed");
257
258        let rendered = String::from_utf8(bytes).expect("debian output should be utf-8");
259        assert!(!rendered.contains("Files: src\n"));
260        assert_eq!(rendered.matches(" Same text").count(), 1);
261    }
262
263    #[test]
264    fn test_file_info_serialization_omits_info_fields_when_unset() {
265        let file = FileInfo::new(
266            "main.rs".to_string(),
267            "main".to_string(),
268            "rs".to_string(),
269            "src/main.rs".to_string(),
270            FileType::File,
271            None,
272            None,
273            42,
274            None,
275            None,
276            None,
277            None,
278            None,
279            vec![],
280            None,
281            vec![],
282            vec![],
283            vec![],
284            vec![],
285            vec![],
286            vec![],
287            vec![],
288            vec![],
289            vec![],
290        );
291
292        let schema_file = OutputFileInfo::from(&file);
293        let value = serde_json::to_value(&schema_file).expect("file info serializes");
294        let object = value.as_object().expect("file info object");
295
296        assert!(!object.contains_key("date"));
297        assert!(!object.contains_key("sha1"));
298        assert!(!object.contains_key("md5"));
299        assert!(!object.contains_key("sha256"));
300        assert!(!object.contains_key("sha1_git"));
301        assert!(!object.contains_key("mime_type"));
302        assert!(!object.contains_key("file_type"));
303        assert!(!object.contains_key("programming_language"));
304        assert!(!object.contains_key("is_binary"));
305        assert!(!object.contains_key("is_text"));
306        assert!(!object.contains_key("is_archive"));
307        assert!(!object.contains_key("is_media"));
308        assert!(!object.contains_key("is_source"));
309        assert!(!object.contains_key("is_script"));
310        assert!(!object.contains_key("files_count"));
311        assert!(!object.contains_key("dirs_count"));
312        assert!(!object.contains_key("size_count"));
313        assert!(!object.contains_key("license_policy"));
314    }
315
316    #[test]
317    fn test_file_info_serialization_keeps_license_policy_when_enabled() {
318        let mut file = FileInfo::new(
319            "main.rs".to_string(),
320            "main".to_string(),
321            "rs".to_string(),
322            "src/main.rs".to_string(),
323            FileType::File,
324            Some("text/plain".to_string()),
325            Some("text".to_string()),
326            42,
327            Some("2026-01-01T00:00:00Z".to_string()),
328            Some(Sha1Digest::from_hex("da39a3ee5e6b4b0d3255bfef95601890afd80709").unwrap()),
329            Some(Md5Digest::from_hex("d41d8cd98f00b204e9800998ecf8427e").unwrap()),
330            Some(
331                Sha256Digest::from_hex(
332                    "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
333                )
334                .unwrap(),
335            ),
336            Some("Rust".to_string()),
337            vec![],
338            None,
339            vec![],
340            vec![],
341            vec![],
342            vec![],
343            vec![],
344            vec![],
345            vec![],
346            vec![],
347            vec![],
348        );
349        file.license_policy = Some(vec![]);
350        file.sha1_git =
351            Some(GitSha1::from_hex("da39a3ee5e6b4b0d3255bfef95601890afd80709").unwrap());
352        file.is_binary = Some(false);
353        file.is_text = Some(true);
354        file.is_archive = Some(false);
355        file.is_media = Some(false);
356        file.is_source = Some(true);
357        file.is_script = Some(false);
358        file.files_count = Some(0);
359        file.dirs_count = Some(0);
360        file.size_count = Some(0);
361
362        let schema_file = OutputFileInfo::from(&file);
363        let value = serde_json::to_value(&schema_file).expect("file info serializes");
364        let object = value.as_object().expect("file info object");
365
366        assert_eq!(object.get("license_policy"), Some(&serde_json::json!([])));
367        assert_eq!(object.get("file_type"), Some(&serde_json::json!("text")));
368        assert_eq!(object.get("is_binary"), Some(&serde_json::json!(false)));
369        assert_eq!(object.get("is_text"), Some(&serde_json::json!(true)));
370        assert_eq!(object.get("files_count"), Some(&serde_json::json!(0)));
371        assert_eq!(object.get("dirs_count"), Some(&serde_json::json!(0)));
372        assert_eq!(object.get("size_count"), Some(&serde_json::json!(0)));
373    }
374
375    #[test]
376    fn test_detected_license_expression_spdx_prefers_detection_spdx_values() {
377        let mut internal = sample_internal_output();
378        internal.files[0].detected_license_expression = Some("mit".to_string());
379
380        let schema_file = OutputFileInfo::from(&internal.files[0]);
381        let schema_value = serde_json::to_value(&schema_file).expect("file info serializes");
382        assert_eq!(schema_value["detected_license_expression_spdx"], "MIT");
383
384        let output = Output::from(&internal);
385        let mut bytes = Vec::new();
386        writer_for_format(OutputFormat::Json)
387            .write(&output, &mut bytes, &OutputWriteConfig::default())
388            .expect("json write should succeed");
389
390        let rendered: Value = serde_json::from_slice(&bytes).expect("json output should parse");
391        assert_eq!(
392            rendered["files"][0]["detected_license_expression_spdx"],
393            "MIT"
394        );
395    }
396
397    #[test]
398    fn test_detected_license_expression_spdx_preserves_distinct_nested_operands() {
399        let mut internal = sample_internal_output();
400        internal.files[0].license_detections = vec![crate::models::LicenseDetection {
401            license_expression: "mit AND (apache-2.0 OR mit)".to_string(),
402            license_expression_spdx: "MIT AND (Apache-2.0 OR MIT)".to_string(),
403            matches: vec![],
404            detection_log: vec![],
405            identifier: String::new(),
406        }];
407        internal.files[0].detected_license_expression = None;
408
409        let schema_file = OutputFileInfo::from(&internal.files[0]);
410        let schema_value = serde_json::to_value(&schema_file).expect("file info serializes");
411        assert_eq!(
412            schema_value["detected_license_expression_spdx"],
413            "MIT AND (Apache-2.0 OR MIT)"
414        );
415    }
416
417    #[test]
418    fn test_detected_license_expression_spdx_prefers_covering_joined_expression() {
419        let mut internal = sample_internal_output();
420        internal.files[0].license_detections = vec![
421            crate::models::LicenseDetection {
422                license_expression: "apache-2.0 OR mit".to_string(),
423                license_expression_spdx: "Apache-2.0 OR MIT".to_string(),
424                matches: vec![],
425                detection_log: vec![],
426                identifier: String::new(),
427            },
428            crate::models::LicenseDetection {
429                license_expression: "apache-2.0".to_string(),
430                license_expression_spdx: "Apache-2.0".to_string(),
431                matches: vec![],
432                detection_log: vec![],
433                identifier: String::new(),
434            },
435        ];
436        internal.files[0].detected_license_expression = None;
437
438        let schema_file = OutputFileInfo::from(&internal.files[0]);
439        let schema_value = serde_json::to_value(&schema_file).expect("file info serializes");
440        assert_eq!(
441            schema_value["detected_license_expression_spdx"],
442            "Apache-2.0 OR MIT"
443        );
444    }
445
446    #[test]
447    fn test_json_lines_writer_sorts_files_by_path_for_reproducibility() {
448        let mut internal = sample_internal_output();
449        internal.files.reverse();
450        let output = Output::from(&internal);
451        let mut bytes = Vec::new();
452        writer_for_format(OutputFormat::JsonLines)
453            .write(&output, &mut bytes, &OutputWriteConfig::default())
454            .expect("json-lines write should succeed");
455
456        let rendered = String::from_utf8(bytes).expect("json-lines should be utf-8");
457        let file_lines = rendered
458            .lines()
459            .filter_map(|line| {
460                let value: Value = serde_json::from_str(line).ok()?;
461                let files = value.get("files")?.as_array()?;
462                files.first()?.get("path")?.as_str().map(str::to_string)
463            })
464            .collect::<Vec<_>>();
465
466        let mut sorted = file_lines.clone();
467        sorted.sort();
468        assert_eq!(file_lines, sorted);
469    }
470
471    #[test]
472    fn test_spdx_tag_value_writer_contains_required_fields() {
473        let output = Output::from(&sample_internal_output());
474        let mut bytes = Vec::new();
475        writer_for_format(OutputFormat::SpdxTv)
476            .write(
477                &output,
478                &mut bytes,
479                &OutputWriteConfig {
480                    format: OutputFormat::SpdxTv,
481                    custom_template: None,
482                    scanned_path: Some("scan".to_string()),
483                },
484            )
485            .expect("spdx tv write should succeed");
486
487        let rendered = String::from_utf8(bytes).expect("spdx should be utf-8");
488        assert!(rendered.contains("SPDXVersion: SPDX-2.2"));
489        assert!(rendered.contains("FileName: ./src/main.rs"));
490    }
491
492    #[test]
493    fn test_spdx_rdf_writer_outputs_xml() {
494        let output = Output::from(&sample_internal_output());
495        let mut bytes = Vec::new();
496        writer_for_format(OutputFormat::SpdxRdf)
497            .write(
498                &output,
499                &mut bytes,
500                &OutputWriteConfig {
501                    format: OutputFormat::SpdxRdf,
502                    custom_template: None,
503                    scanned_path: Some("scan".to_string()),
504                },
505            )
506            .expect("spdx rdf write should succeed");
507
508        let rendered = String::from_utf8(bytes).expect("rdf should be utf-8");
509        assert!(rendered.contains("<rdf:RDF"));
510        assert!(rendered.contains("<spdx:SpdxDocument"));
511        assert!(rendered.contains("<spdx:created>2026-01-01T00:00:00Z</spdx:created>"));
512    }
513
514    #[test]
515    fn test_cyclonedx_writers_keep_iso_timestamps_when_headers_use_scancode_format() {
516        let mut internal = sample_internal_output();
517        internal.packages.push(Package::from_package_data(
518            &PackageData {
519                name: Some("demo".to_string()),
520                version: Some("1.0.0".to_string()),
521                ..PackageData::default()
522            },
523            "scan/package.json".to_string(),
524        ));
525        let output = Output::from(&internal);
526
527        let mut json_bytes = Vec::new();
528        writer_for_format(OutputFormat::CycloneDxJson)
529            .write(
530                &output,
531                &mut json_bytes,
532                &OutputWriteConfig {
533                    format: OutputFormat::CycloneDxJson,
534                    custom_template: None,
535                    scanned_path: Some("scan".to_string()),
536                },
537            )
538            .expect("cyclonedx json write should succeed");
539        let json_value: Value =
540            serde_json::from_slice(&json_bytes).expect("cyclonedx json should parse");
541        assert_eq!(
542            json_value["metadata"]["timestamp"].as_str(),
543            Some("2026-01-01T00:00:01Z")
544        );
545
546        let mut xml_bytes = Vec::new();
547        writer_for_format(OutputFormat::CycloneDxXml)
548            .write(
549                &output,
550                &mut xml_bytes,
551                &OutputWriteConfig {
552                    format: OutputFormat::CycloneDxXml,
553                    custom_template: None,
554                    scanned_path: Some("scan".to_string()),
555                },
556            )
557            .expect("cyclonedx xml write should succeed");
558        let xml = String::from_utf8(xml_bytes).expect("cyclonedx xml should be utf-8");
559        assert!(xml.contains("<timestamp>2026-01-01T00:00:01Z</timestamp>"));
560    }
561
562    #[test]
563    fn test_spdx_writers_emit_real_file_and_package_license_info() {
564        let output = Output::from(&sample_internal_output());
565
566        let mut tv_bytes = Vec::new();
567        writer_for_format(OutputFormat::SpdxTv)
568            .write(
569                &output,
570                &mut tv_bytes,
571                &OutputWriteConfig {
572                    format: OutputFormat::SpdxTv,
573                    custom_template: None,
574                    scanned_path: Some("scan".to_string()),
575                },
576            )
577            .expect("spdx tv write should succeed");
578        let tv_rendered = String::from_utf8(tv_bytes).expect("spdx tv should be utf-8");
579        assert!(tv_rendered.contains("PackageLicenseConcluded: NOASSERTION"));
580        assert!(tv_rendered.contains("PackageLicenseInfoFromFiles: MIT"));
581        assert!(tv_rendered.contains("LicenseConcluded: NOASSERTION"));
582        assert!(tv_rendered.contains("LicenseInfoInFile: MIT"));
583        assert!(tv_rendered.contains("PackageCopyrightText: Copyright (c) Example"));
584
585        let mut rdf_bytes = Vec::new();
586        writer_for_format(OutputFormat::SpdxRdf)
587            .write(
588                &output,
589                &mut rdf_bytes,
590                &OutputWriteConfig {
591                    format: OutputFormat::SpdxRdf,
592                    custom_template: None,
593                    scanned_path: Some("scan".to_string()),
594                },
595            )
596            .expect("spdx rdf write should succeed");
597        let rdf_rendered = String::from_utf8(rdf_bytes).expect("spdx rdf should be utf-8");
598        assert!(rdf_rendered.contains(
599            "<spdx:licenseInfoFromFiles rdf:resource=\"http://spdx.org/licenses/MIT\"/>"
600        ));
601        assert!(
602            rdf_rendered.contains(
603                "<spdx:licenseInfoInFile rdf:resource=\"http://spdx.org/licenses/MIT\"/>"
604            )
605        );
606        assert!(rdf_rendered.contains(
607            "<spdx:licenseConcluded rdf:resource=\"http://spdx.org/rdf/terms#noassertion\"/>"
608        ));
609    }
610
611    #[test]
612    fn test_spdx_writers_emit_license_ref_metadata_and_matched_text() {
613        let mut internal = sample_internal_output();
614        internal.files[0].license_detections = vec![LicenseDetection {
615            license_expression: "unknown-license-reference".to_string(),
616            license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
617            matches: vec![Match {
618                license_expression: "unknown-license-reference".to_string(),
619                license_expression_spdx: "LicenseRef-scancode-unknown-license-reference"
620                    .to_string(),
621                from_file: Some("src/main.rs".to_string()),
622                start_line: LineNumber::ONE,
623                end_line: LineNumber::new(2).unwrap(),
624                matcher: MatcherKind::Aho,
625                score: MatchScore::MAX,
626                matched_length: Some(4),
627                match_coverage: Some(100.0),
628                rule_relevance: Some(100),
629                rule_identifier: "unknown-license-reference.RULE".to_string(),
630                rule_url: Some("https://example.com/unknown-license-reference.LICENSE".to_string()),
631                matched_text: Some("Custom license text".to_string()),
632                referenced_filenames: Some(vec!["LICENSE".to_string()]),
633                matched_text_diagnostics: None,
634            }],
635            detection_log: vec![],
636            identifier: "unknown-ref-id".to_string(),
637        }];
638        internal.license_references = vec![crate::models::LicenseReference {
639            key: Some("unknown-license-reference".to_string()),
640            language: Some("en".to_string()),
641            name: "Unknown License Reference".to_string(),
642            short_name: "Unknown License Reference".to_string(),
643            owner: None,
644            homepage_url: None,
645            spdx_license_key: "LicenseRef-scancode-unknown-license-reference".to_string(),
646            other_spdx_license_keys: vec![],
647            osi_license_key: None,
648            text_urls: vec![],
649            osi_url: None,
650            faq_url: None,
651            other_urls: vec![],
652            category: None,
653            is_exception: false,
654            is_unknown: true,
655            is_generic: false,
656            notes: None,
657            minimum_coverage: None,
658            standard_notice: None,
659            ignorable_copyrights: vec![],
660            ignorable_holders: vec![],
661            ignorable_authors: vec![],
662            ignorable_urls: vec![],
663            ignorable_emails: vec![],
664            scancode_url: None,
665            licensedb_url: None,
666            spdx_url: None,
667            text: "Unused fallback text".to_string(),
668        }];
669        let output = Output::from(&internal);
670
671        let mut tv_bytes = Vec::new();
672        writer_for_format(OutputFormat::SpdxTv)
673            .write(
674                &output,
675                &mut tv_bytes,
676                &OutputWriteConfig {
677                    format: OutputFormat::SpdxTv,
678                    custom_template: None,
679                    scanned_path: Some("scan".to_string()),
680                },
681            )
682            .expect("spdx tv write should succeed");
683        let tv_rendered = String::from_utf8(tv_bytes).expect("spdx tv should be utf-8");
684        assert!(
685            tv_rendered
686                .contains("LicenseInfoInFile: LicenseRef-scancode-unknown-license-reference")
687        );
688        assert!(tv_rendered.contains(
689            "PackageLicenseInfoFromFiles: LicenseRef-scancode-unknown-license-reference"
690        ));
691        assert!(tv_rendered.contains("LicenseID: LicenseRef-scancode-unknown-license-reference"));
692        assert!(tv_rendered.contains("ExtractedText: <text>Custom license text"));
693        assert!(tv_rendered.contains("LicenseName: Unknown License Reference"));
694        assert!(tv_rendered.contains(
695            "LicenseComment: <text>See details at https://example.com/unknown-license-reference.LICENSE"
696        ));
697
698        let mut rdf_bytes = Vec::new();
699        writer_for_format(OutputFormat::SpdxRdf)
700            .write(
701                &output,
702                &mut rdf_bytes,
703                &OutputWriteConfig {
704                    format: OutputFormat::SpdxRdf,
705                    custom_template: None,
706                    scanned_path: Some("scan".to_string()),
707                },
708            )
709            .expect("spdx rdf write should succeed");
710        let rdf_rendered = String::from_utf8(rdf_bytes).expect("spdx rdf should be utf-8");
711        assert!(rdf_rendered.contains(
712            "<spdx:licenseInfoInFile rdf:resource=\"http://spdx.org/licenses/LicenseRef-scancode-unknown-license-reference\"/>"
713        ));
714        assert!(rdf_rendered.contains(
715            "<spdx:hasExtractedLicensingInfo><spdx:ExtractedLicensingInfo rdf:about=\"#LicenseRef-scancode-unknown-license-reference\">"
716        ));
717        assert!(
718            rdf_rendered.contains("<spdx:extractedText>Custom license text</spdx:extractedText>")
719        );
720    }
721
722    #[test]
723    fn test_cyclonedx_json_writer_outputs_bom() {
724        let output = Output::from(&sample_internal_output());
725        let mut bytes = Vec::new();
726        writer_for_format(OutputFormat::CycloneDxJson)
727            .write(&output, &mut bytes, &OutputWriteConfig::default())
728            .expect("cyclonedx json write should succeed");
729
730        let rendered = String::from_utf8(bytes).expect("cyclonedx json should be utf-8");
731        let value: Value = serde_json::from_str(&rendered).expect("valid json");
732        assert_eq!(value["bomFormat"], "CycloneDX");
733        assert_eq!(value["specVersion"], "1.3");
734    }
735
736    #[test]
737    fn test_json_writer_includes_summary_and_key_file_flags() {
738        let mut internal = sample_internal_output();
739        internal.summary = Some(crate::models::Summary {
740            declared_license_expression: Some("apache-2.0".to_string()),
741            license_clarity_score: Some(crate::models::LicenseClarityScore {
742                score: 100,
743                declared_license: true,
744                identification_precision: true,
745                has_license_text: true,
746                declared_copyrights: true,
747                conflicting_license_categories: false,
748                ambiguous_compound_licensing: false,
749            }),
750            declared_holder: Some("Example Corp.".to_string()),
751            primary_language: Some("Ruby".to_string()),
752            other_license_expressions: vec![crate::models::TallyEntry {
753                value: Some("mit".to_string()),
754                count: 1,
755            }],
756            other_holders: vec![
757                crate::models::TallyEntry {
758                    value: None,
759                    count: 2,
760                },
761                crate::models::TallyEntry {
762                    value: Some("Other Corp.".to_string()),
763                    count: 1,
764                },
765            ],
766            other_languages: vec![crate::models::TallyEntry {
767                value: Some("Python".to_string()),
768                count: 2,
769            }],
770        });
771        internal.files[0].is_legal = true;
772        internal.files[0].is_top_level = true;
773        internal.files[0].is_key_file = true;
774        let output = Output::from(&internal);
775
776        let mut bytes = Vec::new();
777        writer_for_format(OutputFormat::Json)
778            .write(&output, &mut bytes, &OutputWriteConfig::default())
779            .expect("json write should succeed");
780
781        let rendered = String::from_utf8(bytes).expect("json should be utf-8");
782        let value: Value = serde_json::from_str(&rendered).expect("valid json");
783
784        assert_eq!(
785            value["summary"]["declared_license_expression"],
786            "apache-2.0"
787        );
788        assert_eq!(value["summary"]["license_clarity_score"]["score"], 100);
789        assert_eq!(value["summary"]["declared_holder"], "Example Corp.");
790        assert_eq!(value["summary"]["primary_language"], "Ruby");
791        assert_eq!(
792            value["summary"]["other_license_expressions"][0]["value"],
793            "mit"
794        );
795        assert!(value["summary"]["other_holders"][0]["value"].is_null());
796        assert_eq!(value["summary"]["other_holders"][1]["value"], "Other Corp.");
797        assert_eq!(value["summary"]["other_languages"][0]["value"], "Python");
798        assert_eq!(value["files"][0]["is_key_file"], true);
799    }
800
801    #[test]
802    fn test_json_and_json_lines_writers_include_top_level_tallies() {
803        let mut internal = sample_internal_output();
804        internal.tallies = Some(crate::models::Tallies {
805            detected_license_expression: vec![crate::models::TallyEntry {
806                value: Some("mit".to_string()),
807                count: 2,
808            }],
809            copyrights: vec![crate::models::TallyEntry {
810                value: Some("Copyright (c) Example Org".to_string()),
811                count: 1,
812            }],
813            holders: vec![crate::models::TallyEntry {
814                value: Some("Example Org".to_string()),
815                count: 1,
816            }],
817            authors: vec![crate::models::TallyEntry {
818                value: Some("Jane Doe".to_string()),
819                count: 1,
820            }],
821            programming_language: vec![crate::models::TallyEntry {
822                value: Some("Rust".to_string()),
823                count: 1,
824            }],
825        });
826        let output = Output::from(&internal);
827
828        let mut json_bytes = Vec::new();
829        writer_for_format(OutputFormat::Json)
830            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
831            .expect("json write should succeed");
832        let json_value: Value =
833            serde_json::from_slice(&json_bytes).expect("json output should parse");
834        assert_eq!(
835            json_value["tallies"]["detected_license_expression"][0]["value"],
836            "mit"
837        );
838        assert_eq!(
839            json_value["tallies"]["programming_language"][0]["value"],
840            "Rust"
841        );
842
843        let mut jsonl_bytes = Vec::new();
844        writer_for_format(OutputFormat::JsonLines)
845            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
846            .expect("json-lines write should succeed");
847        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
848        assert!(rendered.lines().any(|line| line.contains("\"tallies\"")));
849    }
850
851    #[test]
852    fn test_json_and_json_lines_writers_include_key_file_tallies() {
853        let mut internal = sample_internal_output();
854        internal.tallies_of_key_files = Some(crate::models::Tallies {
855            detected_license_expression: vec![crate::models::TallyEntry {
856                value: Some("apache-2.0".to_string()),
857                count: 1,
858            }],
859            copyrights: vec![],
860            holders: vec![],
861            authors: vec![],
862            programming_language: vec![crate::models::TallyEntry {
863                value: Some("Markdown".to_string()),
864                count: 1,
865            }],
866        });
867        let output = Output::from(&internal);
868
869        let mut json_bytes = Vec::new();
870        writer_for_format(OutputFormat::Json)
871            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
872            .expect("json write should succeed");
873        let json_value: Value =
874            serde_json::from_slice(&json_bytes).expect("json output should parse");
875        assert_eq!(
876            json_value["tallies_of_key_files"]["detected_license_expression"][0]["value"],
877            "apache-2.0"
878        );
879
880        let mut jsonl_bytes = Vec::new();
881        writer_for_format(OutputFormat::JsonLines)
882            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
883            .expect("json-lines write should succeed");
884        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
885        assert!(
886            rendered
887                .lines()
888                .any(|line| line.contains("\"tallies_of_key_files\""))
889        );
890    }
891
892    #[test]
893    fn test_json_and_json_lines_writers_include_file_tallies() {
894        let mut internal = sample_internal_output();
895        internal.files[0].tallies = Some(crate::models::Tallies {
896            detected_license_expression: vec![crate::models::TallyEntry {
897                value: Some("mit".to_string()),
898                count: 1,
899            }],
900            copyrights: vec![crate::models::TallyEntry {
901                value: None,
902                count: 1,
903            }],
904            holders: vec![],
905            authors: vec![],
906            programming_language: vec![crate::models::TallyEntry {
907                value: Some("Rust".to_string()),
908                count: 1,
909            }],
910        });
911        let output = Output::from(&internal);
912
913        let mut json_bytes = Vec::new();
914        writer_for_format(OutputFormat::Json)
915            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
916            .expect("json write should succeed");
917        let json_value: Value =
918            serde_json::from_slice(&json_bytes).expect("json output should parse");
919        assert_eq!(
920            json_value["files"][0]["tallies"]["detected_license_expression"][0]["value"],
921            "mit"
922        );
923
924        let mut jsonl_bytes = Vec::new();
925        writer_for_format(OutputFormat::JsonLines)
926            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
927            .expect("json-lines write should succeed");
928        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
929        assert!(rendered.lines().any(|line| line.contains("\"tallies\"")));
930    }
931
932    #[test]
933    fn test_json_and_json_lines_writers_include_facets_and_tallies_by_facet() {
934        let mut internal = sample_internal_output();
935        internal.files[0].facets = vec!["core".to_string(), "docs".to_string()];
936        internal.tallies_by_facet = Some(vec![crate::models::FacetTallies {
937            facet: "core".to_string(),
938            tallies: crate::models::Tallies {
939                detected_license_expression: vec![crate::models::TallyEntry {
940                    value: Some("mit".to_string()),
941                    count: 1,
942                }],
943                copyrights: vec![],
944                holders: vec![],
945                authors: vec![],
946                programming_language: vec![],
947            },
948        }]);
949        let output = Output::from(&internal);
950
951        let mut json_bytes = Vec::new();
952        writer_for_format(OutputFormat::Json)
953            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
954            .expect("json write should succeed");
955        let json_value: Value =
956            serde_json::from_slice(&json_bytes).expect("json output should parse");
957        assert_eq!(json_value["files"][0]["facets"][0], "core");
958        assert_eq!(json_value["tallies_by_facet"][0]["facet"], "core");
959
960        let mut jsonl_bytes = Vec::new();
961        writer_for_format(OutputFormat::JsonLines)
962            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
963            .expect("json-lines write should succeed");
964        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
965        assert!(
966            rendered
967                .lines()
968                .any(|line| line.contains("\"tallies_by_facet\""))
969        );
970    }
971
972    #[test]
973    fn test_json_and_json_lines_writers_include_top_level_license_references() {
974        let mut internal = sample_internal_output();
975        internal.license_references = vec![crate::models::LicenseReference {
976            key: Some("mit".to_string()),
977            language: Some("en".to_string()),
978            name: "MIT License".to_string(),
979            short_name: "MIT".to_string(),
980            owner: Some("Example Owner".to_string()),
981            homepage_url: Some("https://example.com/license".to_string()),
982            spdx_license_key: "MIT".to_string(),
983            other_spdx_license_keys: vec![],
984            osi_license_key: Some("MIT".to_string()),
985            text_urls: vec!["https://example.com/license.txt".to_string()],
986            osi_url: Some("https://opensource.org/licenses/MIT".to_string()),
987            faq_url: None,
988            other_urls: vec![],
989            category: None,
990            is_exception: false,
991            is_unknown: false,
992            is_generic: false,
993            notes: None,
994            minimum_coverage: None,
995            standard_notice: None,
996            ignorable_copyrights: vec![],
997            ignorable_holders: vec![],
998            ignorable_authors: vec![],
999            ignorable_urls: vec![],
1000            ignorable_emails: vec![],
1001            scancode_url: None,
1002            licensedb_url: None,
1003            spdx_url: None,
1004            text: "MIT text".to_string(),
1005        }];
1006        internal.license_rule_references = vec![crate::models::LicenseRuleReference {
1007            identifier: "license-clue_1.RULE".to_string(),
1008            license_expression: "unknown-license-reference".to_string(),
1009            is_license_text: false,
1010            is_license_notice: false,
1011            is_license_reference: false,
1012            is_license_tag: false,
1013            is_license_clue: true,
1014            is_license_intro: false,
1015            language: None,
1016            rule_url: None,
1017            is_required_phrase: false,
1018            skip_for_required_phrase_generation: false,
1019            replaced_by: vec![],
1020            is_continuous: false,
1021            is_synthetic: false,
1022            is_from_license: false,
1023            length: 0,
1024            relevance: None,
1025            minimum_coverage: None,
1026            referenced_filenames: vec![],
1027            notes: None,
1028            ignorable_copyrights: vec![],
1029            ignorable_holders: vec![],
1030            ignorable_authors: vec![],
1031            ignorable_urls: vec![],
1032            ignorable_emails: vec![],
1033            text: None,
1034        }];
1035        let output = Output::from(&internal);
1036
1037        let mut json_bytes = Vec::new();
1038        writer_for_format(OutputFormat::Json)
1039            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
1040            .expect("json write should succeed");
1041        let json_value: Value =
1042            serde_json::from_slice(&json_bytes).expect("json output should parse");
1043        assert_eq!(
1044            json_value["license_references"][0]["spdx_license_key"],
1045            "MIT"
1046        );
1047        assert_eq!(json_value["license_references"][0]["key"], "mit");
1048        assert_eq!(json_value["license_references"][0]["language"], "en");
1049        assert_eq!(
1050            json_value["license_references"][0]["owner"],
1051            "Example Owner"
1052        );
1053        assert_eq!(
1054            json_value["license_references"][0]["homepage_url"],
1055            "https://example.com/license"
1056        );
1057        assert_eq!(
1058            json_value["license_references"][0]["osi_license_key"],
1059            "MIT"
1060        );
1061        assert_eq!(
1062            json_value["license_references"][0]["text_urls"][0],
1063            "https://example.com/license.txt"
1064        );
1065        assert_eq!(
1066            json_value["license_rule_references"][0]["identifier"],
1067            "license-clue_1.RULE"
1068        );
1069        assert_eq!(
1070            json_value["license_rule_references"][0]["relevance"],
1071            Value::Null
1072        );
1073        assert_eq!(
1074            json_value["license_rule_references"][0]["length"],
1075            Value::from(0)
1076        );
1077
1078        let mut jsonl_bytes = Vec::new();
1079        writer_for_format(OutputFormat::JsonLines)
1080            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
1081            .expect("json-lines write should succeed");
1082        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
1083        assert!(
1084            rendered
1085                .lines()
1086                .any(|line| line.contains("\"license_references\""))
1087        );
1088        assert!(
1089            rendered
1090                .lines()
1091                .any(|line| line.contains("\"license_rule_references\""))
1092        );
1093    }
1094
1095    #[test]
1096    fn test_json_and_json_lines_writers_include_top_level_license_detections() {
1097        let mut internal = sample_internal_output();
1098        internal.license_detections = vec![crate::models::TopLevelLicenseDetection {
1099            identifier: "mit-id".to_string(),
1100            license_expression: "mit".to_string(),
1101            license_expression_spdx: "MIT".to_string(),
1102            detection_count: 2,
1103            detection_log: vec![],
1104            reference_matches: vec![crate::models::Match {
1105                license_expression: "mit".to_string(),
1106                license_expression_spdx: "MIT".to_string(),
1107                from_file: Some("src/main.rs".to_string()),
1108                start_line: LineNumber::ONE,
1109                end_line: LineNumber::new(3).unwrap(),
1110                matcher: MatcherKind::Hash,
1111                score: MatchScore::MAX,
1112                matched_length: Some(10),
1113                match_coverage: Some(100.0),
1114                rule_relevance: Some(100),
1115                rule_identifier: "mit.LICENSE".to_string(),
1116                rule_url: None,
1117                matched_text: None,
1118                referenced_filenames: None,
1119                matched_text_diagnostics: None,
1120            }],
1121        }];
1122        let output = Output::from(&internal);
1123
1124        let mut json_bytes = Vec::new();
1125        writer_for_format(OutputFormat::Json)
1126            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
1127            .expect("json write should succeed");
1128        let json_value: Value =
1129            serde_json::from_slice(&json_bytes).expect("json output should parse");
1130        assert_eq!(json_value["license_detections"][0]["identifier"], "mit-id");
1131        assert_eq!(json_value["license_detections"][0]["detection_count"], 2);
1132
1133        let mut jsonl_bytes = Vec::new();
1134        writer_for_format(OutputFormat::JsonLines)
1135            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
1136            .expect("json-lines write should succeed");
1137        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
1138        assert!(
1139            rendered
1140                .lines()
1141                .any(|line| line.contains("\"license_detections\""))
1142        );
1143    }
1144
1145    #[test]
1146    fn test_json_and_json_lines_writers_keep_empty_top_level_license_detections() {
1147        let output = Output::from(&sample_internal_output());
1148
1149        let mut json_bytes = Vec::new();
1150        writer_for_format(OutputFormat::Json)
1151            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
1152            .expect("json write should succeed");
1153        let json_value: Value =
1154            serde_json::from_slice(&json_bytes).expect("json output should parse");
1155        assert_eq!(json_value["license_detections"], Value::Array(vec![]));
1156
1157        let mut jsonl_bytes = Vec::new();
1158        writer_for_format(OutputFormat::JsonLines)
1159            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
1160            .expect("json-lines write should succeed");
1161        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
1162        assert!(
1163            rendered
1164                .lines()
1165                .any(|line| line == r#"{"license_detections":[]}"#)
1166        );
1167    }
1168
1169    #[test]
1170    fn test_public_writer_normalizes_empty_package_maps_without_changing_schema_output() {
1171        let mut internal = sample_internal_output();
1172        internal.packages.push(Package::from_package_data(
1173            &PackageData {
1174                package_type: Some(crate::models::PackageType::Npm),
1175                name: Some("demo".to_string()),
1176                version: Some("1.0.0".to_string()),
1177                ..PackageData::default()
1178            },
1179            "scan/package.json".to_string(),
1180        ));
1181
1182        let output = Output::from(&internal);
1183        let raw_schema = serde_json::to_value(&output).expect("schema output should serialize");
1184        assert_eq!(
1185            raw_schema["packages"][0]["qualifiers"],
1186            serde_json::json!({})
1187        );
1188        assert_eq!(
1189            raw_schema["packages"][0]["extra_data"],
1190            serde_json::json!({})
1191        );
1192
1193        let mut bytes = Vec::new();
1194        writer_for_format(OutputFormat::Json)
1195            .write(&output, &mut bytes, &OutputWriteConfig::default())
1196            .expect("json write should succeed");
1197        let public_value: Value = serde_json::from_slice(&bytes).expect("public json should parse");
1198
1199        assert!(public_value["packages"][0]["qualifiers"].is_null());
1200        assert!(public_value["packages"][0]["extra_data"].is_null());
1201    }
1202
1203    #[test]
1204    fn test_cyclonedx_xml_writer_outputs_xml() {
1205        let output = Output::from(&sample_internal_output());
1206        let mut bytes = Vec::new();
1207        writer_for_format(OutputFormat::CycloneDxXml)
1208            .write(&output, &mut bytes, &OutputWriteConfig::default())
1209            .expect("cyclonedx xml write should succeed");
1210
1211        let rendered = String::from_utf8(bytes).expect("cyclonedx xml should be utf-8");
1212        assert!(rendered.contains("<bom xmlns=\"http://cyclonedx.org/schema/bom/1.3\""));
1213        assert!(rendered.contains("<components>"));
1214    }
1215
1216    #[test]
1217    fn test_cyclonedx_json_includes_component_license_expression() {
1218        let mut internal = sample_internal_output();
1219        internal.packages = vec![crate::models::Package {
1220            package_type: Some(crate::models::PackageType::Maven),
1221            namespace: Some("example".to_string()),
1222            name: Some("gradle-project".to_string()),
1223            version: Some("1.0.0".to_string()),
1224            qualifiers: None,
1225            subpath: None,
1226            primary_language: Some("Java".to_string()),
1227            description: None,
1228            release_date: None,
1229            parties: vec![],
1230            keywords: vec![],
1231            homepage_url: None,
1232            download_url: None,
1233            size: None,
1234            sha1: None,
1235            md5: None,
1236            sha256: None,
1237            sha512: None,
1238            bug_tracking_url: None,
1239            code_view_url: None,
1240            vcs_url: None,
1241            copyright: None,
1242            holder: None,
1243            declared_license_expression: Some("Apache-2.0".to_string()),
1244            declared_license_expression_spdx: Some("Apache-2.0".to_string()),
1245            license_detections: vec![],
1246            other_license_expression: None,
1247            other_license_expression_spdx: None,
1248            other_license_detections: vec![],
1249            extracted_license_statement: Some("Apache-2.0".to_string()),
1250            notice_text: None,
1251            source_packages: vec![],
1252            is_private: false,
1253            is_virtual: false,
1254            extra_data: None,
1255            repository_homepage_url: None,
1256            repository_download_url: None,
1257            api_data_url: None,
1258            datasource_ids: vec![],
1259            purl: Some("pkg:maven/example/gradle-project@1.0.0".to_string()),
1260            package_uid: PackageUid::from_raw(
1261                "pkg:maven/example/gradle-project@1.0.0?uuid=test".to_string(),
1262            ),
1263            datafile_paths: vec![],
1264        }];
1265        let output = Output::from(&internal);
1266
1267        let mut bytes = Vec::new();
1268        writer_for_format(OutputFormat::CycloneDxJson)
1269            .write(&output, &mut bytes, &OutputWriteConfig::default())
1270            .expect("cyclonedx json write should succeed");
1271
1272        let rendered = String::from_utf8(bytes).expect("cyclonedx json should be utf-8");
1273        let value: Value = serde_json::from_str(&rendered).expect("valid json");
1274
1275        assert_eq!(
1276            value["components"][0]["licenses"][0]["expression"],
1277            "Apache-2.0"
1278        );
1279    }
1280
1281    #[test]
1282    fn test_cyclonedx_external_references_are_deduplicated() {
1283        let mut internal = sample_internal_output();
1284        internal.packages = vec![Package::from_package_data(
1285            &PackageData {
1286                package_type: Some(crate::models::PackageType::Npm),
1287                name: Some("demo".to_string()),
1288                version: Some("1.0.0".to_string()),
1289                download_url: Some("https://example.com/download.tgz".to_string()),
1290                repository_download_url: Some("https://example.com/download.tgz".to_string()),
1291                homepage_url: Some("https://example.com".to_string()),
1292                repository_homepage_url: Some("https://example.com".to_string()),
1293                ..PackageData::default()
1294            },
1295            "scan/package.json".to_string(),
1296        )];
1297        let output = Output::from(&internal);
1298
1299        let mut json_bytes = Vec::new();
1300        writer_for_format(OutputFormat::CycloneDxJson)
1301            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
1302            .expect("cyclonedx json write should succeed");
1303        let value: Value = serde_json::from_slice(&json_bytes).expect("valid cyclonedx json");
1304        let refs = value["components"][0]["externalReferences"]
1305            .as_array()
1306            .expect("external references should be an array");
1307        assert_eq!(refs.len(), 2);
1308
1309        let mut xml_bytes = Vec::new();
1310        writer_for_format(OutputFormat::CycloneDxXml)
1311            .write(&output, &mut xml_bytes, &OutputWriteConfig::default())
1312            .expect("cyclonedx xml write should succeed");
1313        let xml = String::from_utf8(xml_bytes).expect("cyclonedx xml should be utf-8");
1314        assert_eq!(xml.matches("https://example.com/download.tgz").count(), 1);
1315        assert_eq!(xml.matches("https://example.com</url>").count(), 1);
1316    }
1317
1318    #[test]
1319    fn test_spdx_prefers_single_detected_package_name_over_scan_root() {
1320        let mut internal = sample_internal_output();
1321        internal.packages = vec![Package::from_package_data(
1322            &PackageData {
1323                package_type: Some(crate::models::PackageType::Npm),
1324                name: Some("detected-package".to_string()),
1325                version: Some("1.0.0".to_string()),
1326                ..PackageData::default()
1327            },
1328            "scan/package.json".to_string(),
1329        )];
1330        let output = Output::from(&internal);
1331
1332        let mut tv_bytes = Vec::new();
1333        writer_for_format(OutputFormat::SpdxTv)
1334            .write(
1335                &output,
1336                &mut tv_bytes,
1337                &OutputWriteConfig {
1338                    format: OutputFormat::SpdxTv,
1339                    custom_template: None,
1340                    scanned_path: Some("scan-root".to_string()),
1341                },
1342            )
1343            .expect("spdx tv write should succeed");
1344        let tv = String::from_utf8(tv_bytes).expect("spdx tv should be utf-8");
1345        assert!(tv.contains("PackageName: detected-package"));
1346        assert!(tv.contains("DocumentNamespace: http://spdx.org/spdxdocs/detected-package"));
1347
1348        let mut rdf_bytes = Vec::new();
1349        writer_for_format(OutputFormat::SpdxRdf)
1350            .write(
1351                &output,
1352                &mut rdf_bytes,
1353                &OutputWriteConfig {
1354                    format: OutputFormat::SpdxRdf,
1355                    custom_template: None,
1356                    scanned_path: Some("scan-root".to_string()),
1357                },
1358            )
1359            .expect("spdx rdf write should succeed");
1360        let rdf = String::from_utf8(rdf_bytes).expect("spdx rdf should be utf-8");
1361        assert!(rdf.contains("<spdx:name>detected-package</spdx:name>"));
1362    }
1363
1364    #[test]
1365    fn test_spdx_empty_scan_tag_value_matches_python_sentinel() {
1366        let output = Output {
1367            summary: None,
1368            tallies: None,
1369            tallies_of_key_files: None,
1370            tallies_by_facet: None,
1371            headers: vec![],
1372            packages: vec![],
1373            dependencies: vec![],
1374            license_detections: vec![],
1375            files: vec![],
1376            license_references: vec![],
1377            license_rule_references: vec![],
1378        };
1379        let mut bytes = Vec::new();
1380        writer_for_format(OutputFormat::SpdxTv)
1381            .write(
1382                &output,
1383                &mut bytes,
1384                &OutputWriteConfig {
1385                    format: OutputFormat::SpdxTv,
1386                    custom_template: None,
1387                    scanned_path: Some("scan".to_string()),
1388                },
1389            )
1390            .expect("spdx tv write should succeed");
1391
1392        let rendered = String::from_utf8(bytes).expect("spdx should be utf-8");
1393        assert_eq!(rendered, "# No results for package 'scan'.\n");
1394    }
1395
1396    #[test]
1397    fn test_spdx_empty_scan_rdf_matches_python_sentinel() {
1398        let output = Output {
1399            summary: None,
1400            tallies: None,
1401            tallies_of_key_files: None,
1402            tallies_by_facet: None,
1403            headers: vec![],
1404            packages: vec![],
1405            dependencies: vec![],
1406            license_detections: vec![],
1407            files: vec![],
1408            license_references: vec![],
1409            license_rule_references: vec![],
1410        };
1411        let mut bytes = Vec::new();
1412        writer_for_format(OutputFormat::SpdxRdf)
1413            .write(
1414                &output,
1415                &mut bytes,
1416                &OutputWriteConfig {
1417                    format: OutputFormat::SpdxRdf,
1418                    custom_template: None,
1419                    scanned_path: Some("scan".to_string()),
1420                },
1421            )
1422            .expect("spdx rdf write should succeed");
1423
1424        let rendered = String::from_utf8(bytes).expect("rdf should be utf-8");
1425        assert_eq!(rendered, "<!-- No results for package 'scan'. -->\n");
1426    }
1427
1428    #[test]
1429    fn test_html_writer_outputs_html_document() {
1430        let output = Output::from(&sample_internal_output());
1431        let mut bytes = Vec::new();
1432        writer_for_format(OutputFormat::Html)
1433            .write(&output, &mut bytes, &OutputWriteConfig::default())
1434            .expect("html write should succeed");
1435        let rendered = String::from_utf8(bytes).expect("html should be utf-8");
1436        assert!(rendered.contains("<!doctype html>"));
1437        assert!(rendered.contains("Provenant HTML Report"));
1438    }
1439
1440    #[test]
1441    fn test_custom_template_writer_renders_output_context() {
1442        let output = Output::from(&sample_internal_output());
1443        let temp_dir = tempfile::tempdir().expect("tempdir should be created");
1444        let template_path = temp_dir.path().join("template.tera");
1445        fs::write(
1446            &template_path,
1447            "version={{ output.headers[0].output_format_version }} files={{ files | length }}",
1448        )
1449        .expect("template should be written");
1450
1451        let mut bytes = Vec::new();
1452        writer_for_format(OutputFormat::CustomTemplate)
1453            .write(
1454                &output,
1455                &mut bytes,
1456                &OutputWriteConfig {
1457                    format: OutputFormat::CustomTemplate,
1458                    custom_template: Some(template_path.to_string_lossy().to_string()),
1459                    scanned_path: None,
1460                },
1461            )
1462            .expect("custom template write should succeed");
1463
1464        let rendered = String::from_utf8(bytes).expect("template output should be utf-8");
1465        assert!(rendered.contains("version=4.1.0"));
1466        assert!(rendered.contains("files=1"));
1467    }
1468
1469    fn sample_internal_output() -> crate::models::Output {
1470        crate::models::Output {
1471            summary: None,
1472            tallies: None,
1473            tallies_of_key_files: None,
1474            tallies_by_facet: None,
1475            headers: vec![Header {
1476                tool_name: "provenant".to_string(),
1477                tool_version: crate::version::BUILD_VERSION.to_string(),
1478                options: serde_json::Map::new(),
1479                notice: crate::models::HEADER_NOTICE.to_string(),
1480                start_timestamp: "2026-01-01T000000.000000".to_string(),
1481                end_timestamp: "2026-01-01T000001.000000".to_string(),
1482                output_format_version: "4.1.0".to_string(),
1483                duration: 1.0,
1484                errors: vec![],
1485                warnings: vec![],
1486                extra_data: ExtraData {
1487                    system_environment: SystemEnvironment {
1488                        operating_system: "darwin".to_string(),
1489                        cpu_architecture: "aarch64".to_string(),
1490                        platform: "darwin".to_string(),
1491                        platform_version: "26.3.1".to_string(),
1492                        rust_version: "1.93.0".to_string(),
1493                    },
1494                    spdx_license_list_version: "3.27".to_string(),
1495                    files_count: 1,
1496                    directories_count: 1,
1497                    excluded_count: 0,
1498                    license_index_provenance: Some(crate::models::LicenseIndexProvenance {
1499                        source: "embedded-artifact".to_string(),
1500                        dataset_fingerprint: "test-fingerprint".to_string(),
1501                        ignored_rules: vec![
1502                            "gpl-2.0_and-unknown-license-reference_1.RULE".to_string(),
1503                        ],
1504                        ignored_licenses: vec![],
1505                        ignored_rules_due_to_licenses: vec![],
1506                        added_rules: vec![],
1507                        replaced_rules: vec![],
1508                        added_licenses: vec![],
1509                        replaced_licenses: vec![],
1510                    }),
1511                },
1512            }],
1513            packages: vec![],
1514            dependencies: vec![],
1515            license_detections: vec![],
1516            files: vec![FileInfo::new(
1517                "main.rs".to_string(),
1518                "main".to_string(),
1519                "rs".to_string(),
1520                "src/main.rs".to_string(),
1521                FileType::File,
1522                Some("text/plain".to_string()),
1523                None,
1524                42,
1525                None,
1526                Some(Sha1Digest::from_hex("da39a3ee5e6b4b0d3255bfef95601890afd80709").unwrap()),
1527                Some(Md5Digest::from_hex("d41d8cd98f00b204e9800998ecf8427e").unwrap()),
1528                Some(
1529                    Sha256Digest::from_hex(
1530                        "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
1531                    )
1532                    .unwrap(),
1533                ),
1534                Some("Rust".to_string()),
1535                vec![PackageData::default()],
1536                None,
1537                vec![LicenseDetection {
1538                    license_expression: "mit".to_string(),
1539                    license_expression_spdx: "MIT".to_string(),
1540                    matches: vec![Match {
1541                        license_expression: "mit".to_string(),
1542                        license_expression_spdx: "MIT".to_string(),
1543                        from_file: None,
1544                        start_line: LineNumber::ONE,
1545                        end_line: LineNumber::ONE,
1546                        matcher: MatcherKind::Hash,
1547                        score: MatchScore::MAX,
1548                        matched_length: None,
1549                        match_coverage: None,
1550                        rule_relevance: None,
1551                        rule_identifier: "mit_rule".to_string(),
1552                        rule_url: None,
1553                        matched_text: None,
1554                        referenced_filenames: None,
1555                        matched_text_diagnostics: None,
1556                    }],
1557                    detection_log: vec![],
1558                    identifier: String::new(),
1559                }],
1560                vec![],
1561                vec![Copyright {
1562                    copyright: "Copyright (c) Example".to_string(),
1563                    normalized_copyright: None,
1564                    start_line: LineNumber::ONE,
1565                    end_line: LineNumber::ONE,
1566                }],
1567                vec![Holder {
1568                    holder: "Example Org".to_string(),
1569                    start_line: LineNumber::ONE,
1570                    end_line: LineNumber::ONE,
1571                }],
1572                vec![Author {
1573                    author: "Jane Doe".to_string(),
1574                    start_line: LineNumber::ONE,
1575                    end_line: LineNumber::ONE,
1576                }],
1577                vec![OutputEmail {
1578                    email: "jane@example.com".to_string(),
1579                    start_line: LineNumber::ONE,
1580                    end_line: LineNumber::ONE,
1581                }],
1582                vec![OutputURL {
1583                    url: "https://example.com".to_string(),
1584                    start_line: LineNumber::ONE,
1585                    end_line: LineNumber::ONE,
1586                }],
1587                vec![],
1588                vec![],
1589            )],
1590            license_references: vec![],
1591            license_rule_references: vec![],
1592        }
1593    }
1594}