Skip to main content

provenant/output/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::fs::File;
5use std::io::{self, BufWriter, Write};
6
7use crate::output_schema::Output;
8
9mod cyclonedx;
10mod debian;
11mod html;
12mod jsonl;
13mod public_serialize;
14mod shared;
15mod spdx;
16mod template;
17
18pub(crate) const SPDX_DOCUMENT_NOTICE: &str = "Generated with Provenant and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nProvenant should be considered or used as legal advice. Consult an attorney\nfor legal advice.\nProvenant is a free software code scanning tool.\nVisit https://github.com/mstykow/provenant/ for support and download.\nSPDX License List: 3.27";
19const OUTPUT_BUFFER_SIZE: usize = 1024 * 1024;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
22pub enum OutputFormat {
23    #[default]
24    Json,
25    JsonPretty,
26    Yaml,
27    JsonLines,
28    Debian,
29    Html,
30    CustomTemplate,
31    SpdxTv,
32    SpdxRdf,
33    CycloneDxJson,
34    CycloneDxXml,
35}
36
37#[derive(Debug, Clone, Default)]
38pub struct OutputWriteConfig {
39    pub format: OutputFormat,
40    pub custom_template: Option<String>,
41    pub scanned_path: Option<String>,
42}
43
44pub trait OutputWriter {
45    fn write(
46        &self,
47        output: &Output,
48        writer: &mut dyn Write,
49        config: &OutputWriteConfig,
50    ) -> io::Result<()>;
51}
52
53pub struct FormatWriter {
54    format: OutputFormat,
55}
56
57pub fn writer_for_format(format: OutputFormat) -> FormatWriter {
58    FormatWriter { format }
59}
60
61impl OutputWriter for FormatWriter {
62    fn write(
63        &self,
64        output: &Output,
65        writer: &mut dyn Write,
66        config: &OutputWriteConfig,
67    ) -> io::Result<()> {
68        match self.format {
69            OutputFormat::Json => {
70                serde_json::to_writer(&mut *writer, &public_serialize::PublicOutput(output))
71                    .map_err(shared::io_other)?;
72                writer.write_all(b"\n")
73            }
74            OutputFormat::JsonPretty => {
75                serde_json::to_writer_pretty(&mut *writer, &public_serialize::PublicOutput(output))
76                    .map_err(shared::io_other)?;
77                writer.write_all(b"\n")
78            }
79            OutputFormat::Yaml => write_yaml(output, writer),
80            OutputFormat::JsonLines => jsonl::write_json_lines(output, writer),
81            OutputFormat::Debian => debian::write_debian_copyright(output, writer),
82            OutputFormat::Html => html::write_html_report(output, writer),
83            OutputFormat::CustomTemplate => template::write_custom_template(output, writer, config),
84            OutputFormat::SpdxTv => spdx::write_spdx_tag_value(output, writer, config),
85            OutputFormat::SpdxRdf => spdx::write_spdx_rdf_xml(output, writer, config),
86            OutputFormat::CycloneDxJson => cyclonedx::write_cyclonedx_json(output, writer),
87            OutputFormat::CycloneDxXml => cyclonedx::write_cyclonedx_xml(output, writer),
88        }
89    }
90}
91
92pub fn write_output_file(
93    output_file: &str,
94    output: &Output,
95    config: &OutputWriteConfig,
96) -> io::Result<()> {
97    if output_file == "-" {
98        let stdout = io::stdout();
99        let handle = stdout.lock();
100        let mut writer = BufWriter::with_capacity(OUTPUT_BUFFER_SIZE, handle);
101        writer_for_format(config.format).write(output, &mut writer, config)?;
102        return writer.flush();
103    }
104
105    let file = File::create(output_file)?;
106    let mut writer = BufWriter::with_capacity(OUTPUT_BUFFER_SIZE, file);
107    writer_for_format(config.format).write(output, &mut writer, config)?;
108    writer.flush()
109}
110
111fn write_yaml(output: &Output, writer: &mut dyn Write) -> io::Result<()> {
112    yaml_serde::to_writer(&mut *writer, &public_serialize::PublicOutput(output))
113        .map_err(shared::io_other)?;
114    writer.write_all(b"\n")
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120    use serde_json::Value;
121    use std::fs;
122
123    use crate::models::{
124        Author, Copyright, ExtraData, FileInfo, FileType, GitSha1, Header, Holder,
125        LicenseDetection, LineNumber, Match, MatchScore, Md5Digest, OutputEmail, OutputURL,
126        Package, PackageData, PackageUid, Sha1Digest, Sha256Digest, SystemEnvironment,
127    };
128    use crate::output_schema::OutputFileInfo;
129
130    #[test]
131    fn test_yaml_writer_outputs_yaml() {
132        let output = Output::from(&sample_internal_output());
133        let mut bytes = Vec::new();
134        writer_for_format(OutputFormat::Yaml)
135            .write(&output, &mut bytes, &OutputWriteConfig::default())
136            .expect("yaml write should succeed");
137        let rendered = String::from_utf8(bytes).expect("yaml should be utf-8");
138        assert!(rendered.contains("headers:"));
139        assert!(rendered.contains("files:"));
140    }
141
142    #[test]
143    fn test_json_lines_writer_outputs_parseable_lines() {
144        let output = Output::from(&sample_internal_output());
145        let mut bytes = Vec::new();
146        writer_for_format(OutputFormat::JsonLines)
147            .write(&output, &mut bytes, &OutputWriteConfig::default())
148            .expect("json-lines write should succeed");
149
150        let rendered = String::from_utf8(bytes).expect("json-lines should be utf-8");
151        let lines = rendered.lines().collect::<Vec<_>>();
152        assert!(lines.len() >= 2);
153        for line in lines {
154            serde_json::from_str::<Value>(line).expect("each line should be valid json");
155        }
156    }
157
158    #[test]
159    fn test_yaml_writer_emits_license_index_provenance_in_headers() {
160        let output = Output::from(&sample_internal_output());
161        let mut bytes = Vec::new();
162        writer_for_format(OutputFormat::Yaml)
163            .write(&output, &mut bytes, &OutputWriteConfig::default())
164            .expect("yaml write should succeed");
165
166        let rendered = String::from_utf8(bytes).expect("yaml should be utf-8");
167        assert!(rendered.contains("license_index_provenance:"));
168        assert!(rendered.contains("dataset_fingerprint: test-fingerprint"));
169        assert!(rendered.contains("source: embedded-artifact"));
170    }
171
172    #[test]
173    fn test_debian_writer_outputs_dep5_style_document() {
174        let mut internal = sample_internal_output();
175        internal.files[0].license_expression = Some("mit".to_string());
176        internal.files[0].license_detections[0].matches[0].matched_text = Some(
177            "Permission is hereby granted, free of charge, to any person obtaining a copy"
178                .to_string(),
179        );
180        let output = Output::from(&internal);
181
182        let mut bytes = Vec::new();
183        writer_for_format(OutputFormat::Debian)
184            .write(&output, &mut bytes, &OutputWriteConfig::default())
185            .expect("debian write should succeed");
186
187        let rendered = String::from_utf8(bytes).expect("debian output should be utf-8");
188        assert!(rendered.contains(
189            "Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/"
190        ));
191        assert!(rendered.contains("Comment: Generated with Provenant"));
192        assert!(rendered.contains("Files: src/main.rs"));
193        assert!(rendered.contains("Copyright: Example Org"));
194        assert!(rendered.contains("License: mit"));
195        assert!(rendered.contains(" Permission is hereby granted, free of charge"));
196    }
197
198    #[test]
199    fn test_debian_writer_skips_directories_and_deduplicates_license_texts() {
200        let mut internal = sample_internal_output();
201        internal.files.insert(
202            0,
203            FileInfo::new(
204                "src".to_string(),
205                "src".to_string(),
206                String::new(),
207                "src".to_string(),
208                FileType::Directory,
209                None,
210                None,
211                0,
212                None,
213                None,
214                None,
215                None,
216                None,
217                vec![],
218                None,
219                vec![],
220                vec![],
221                vec![],
222                vec![],
223                vec![],
224                vec![],
225                vec![],
226                vec![],
227                vec![],
228            ),
229        );
230        internal.files[1].license_expression = Some("mit".to_string());
231        internal.files[1].license_detections[0].matches[0].matched_text =
232            Some("Same text".to_string());
233        internal.files[1].license_detections[0].matches.push(Match {
234            license_expression: "mit".to_string(),
235            license_expression_spdx: "MIT".to_string(),
236            from_file: Some("src/main.rs".to_string()),
237            start_line: LineNumber::ONE,
238            end_line: LineNumber::ONE,
239            matcher: Some("2-aho".to_string()),
240            score: MatchScore::MAX,
241            matched_length: Some(1),
242            match_coverage: Some(100.0),
243            rule_relevance: Some(100),
244            rule_identifier: Some("mit_rule".to_string()),
245            rule_url: None,
246            matched_text: Some("Same text again".to_string()),
247            referenced_filenames: None,
248            matched_text_diagnostics: None,
249        });
250        let output = Output::from(&internal);
251
252        let mut bytes = Vec::new();
253        writer_for_format(OutputFormat::Debian)
254            .write(&output, &mut bytes, &OutputWriteConfig::default())
255            .expect("debian write should succeed");
256
257        let rendered = String::from_utf8(bytes).expect("debian output should be utf-8");
258        assert!(!rendered.contains("Files: src\n"));
259        assert_eq!(rendered.matches(" Same text").count(), 1);
260    }
261
262    #[test]
263    fn test_file_info_serialization_omits_info_fields_when_unset() {
264        let file = FileInfo::new(
265            "main.rs".to_string(),
266            "main".to_string(),
267            "rs".to_string(),
268            "src/main.rs".to_string(),
269            FileType::File,
270            None,
271            None,
272            42,
273            None,
274            None,
275            None,
276            None,
277            None,
278            vec![],
279            None,
280            vec![],
281            vec![],
282            vec![],
283            vec![],
284            vec![],
285            vec![],
286            vec![],
287            vec![],
288            vec![],
289        );
290
291        let schema_file = OutputFileInfo::from(&file);
292        let value = serde_json::to_value(&schema_file).expect("file info serializes");
293        let object = value.as_object().expect("file info object");
294
295        assert!(!object.contains_key("date"));
296        assert!(!object.contains_key("sha1"));
297        assert!(!object.contains_key("md5"));
298        assert!(!object.contains_key("sha256"));
299        assert!(!object.contains_key("sha1_git"));
300        assert!(!object.contains_key("mime_type"));
301        assert!(!object.contains_key("file_type"));
302        assert!(!object.contains_key("programming_language"));
303        assert!(!object.contains_key("is_binary"));
304        assert!(!object.contains_key("is_text"));
305        assert!(!object.contains_key("is_archive"));
306        assert!(!object.contains_key("is_media"));
307        assert!(!object.contains_key("is_source"));
308        assert!(!object.contains_key("is_script"));
309        assert!(!object.contains_key("files_count"));
310        assert!(!object.contains_key("dirs_count"));
311        assert!(!object.contains_key("size_count"));
312        assert!(!object.contains_key("license_policy"));
313    }
314
315    #[test]
316    fn test_file_info_serialization_keeps_license_policy_when_enabled() {
317        let mut file = FileInfo::new(
318            "main.rs".to_string(),
319            "main".to_string(),
320            "rs".to_string(),
321            "src/main.rs".to_string(),
322            FileType::File,
323            Some("text/plain".to_string()),
324            Some("text".to_string()),
325            42,
326            Some("2026-01-01T00:00:00Z".to_string()),
327            Some(Sha1Digest::from_hex("da39a3ee5e6b4b0d3255bfef95601890afd80709").unwrap()),
328            Some(Md5Digest::from_hex("d41d8cd98f00b204e9800998ecf8427e").unwrap()),
329            Some(
330                Sha256Digest::from_hex(
331                    "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
332                )
333                .unwrap(),
334            ),
335            Some("Rust".to_string()),
336            vec![],
337            None,
338            vec![],
339            vec![],
340            vec![],
341            vec![],
342            vec![],
343            vec![],
344            vec![],
345            vec![],
346            vec![],
347        );
348        file.license_policy = Some(vec![]);
349        file.sha1_git =
350            Some(GitSha1::from_hex("da39a3ee5e6b4b0d3255bfef95601890afd80709").unwrap());
351        file.is_binary = Some(false);
352        file.is_text = Some(true);
353        file.is_archive = Some(false);
354        file.is_media = Some(false);
355        file.is_source = Some(true);
356        file.is_script = Some(false);
357        file.files_count = Some(0);
358        file.dirs_count = Some(0);
359        file.size_count = Some(0);
360
361        let schema_file = OutputFileInfo::from(&file);
362        let value = serde_json::to_value(&schema_file).expect("file info serializes");
363        let object = value.as_object().expect("file info object");
364
365        assert_eq!(object.get("license_policy"), Some(&serde_json::json!([])));
366        assert_eq!(object.get("file_type"), Some(&serde_json::json!("text")));
367        assert_eq!(object.get("is_binary"), Some(&serde_json::json!(false)));
368        assert_eq!(object.get("is_text"), Some(&serde_json::json!(true)));
369        assert_eq!(object.get("files_count"), Some(&serde_json::json!(0)));
370        assert_eq!(object.get("dirs_count"), Some(&serde_json::json!(0)));
371        assert_eq!(object.get("size_count"), Some(&serde_json::json!(0)));
372    }
373
374    #[test]
375    fn test_detected_license_expression_spdx_prefers_detection_spdx_values() {
376        let mut internal = sample_internal_output();
377        internal.files[0].license_expression = Some("mit".to_string());
378
379        let schema_file = OutputFileInfo::from(&internal.files[0]);
380        let schema_value = serde_json::to_value(&schema_file).expect("file info serializes");
381        assert_eq!(schema_value["detected_license_expression_spdx"], "MIT");
382
383        let output = Output::from(&internal);
384        let mut bytes = Vec::new();
385        writer_for_format(OutputFormat::Json)
386            .write(&output, &mut bytes, &OutputWriteConfig::default())
387            .expect("json write should succeed");
388
389        let rendered: Value = serde_json::from_slice(&bytes).expect("json output should parse");
390        assert_eq!(
391            rendered["files"][0]["detected_license_expression_spdx"],
392            "MIT"
393        );
394    }
395
396    #[test]
397    fn test_detected_license_expression_spdx_preserves_distinct_nested_operands() {
398        let mut internal = sample_internal_output();
399        internal.files[0].license_detections = vec![crate::models::LicenseDetection {
400            license_expression: "mit AND (apache-2.0 OR mit)".to_string(),
401            license_expression_spdx: "MIT AND (Apache-2.0 OR MIT)".to_string(),
402            matches: vec![],
403            detection_log: vec![],
404            identifier: None,
405        }];
406        internal.files[0].license_expression = None;
407
408        let schema_file = OutputFileInfo::from(&internal.files[0]);
409        let schema_value = serde_json::to_value(&schema_file).expect("file info serializes");
410        assert_eq!(
411            schema_value["detected_license_expression_spdx"],
412            "MIT AND (Apache-2.0 OR MIT)"
413        );
414    }
415
416    #[test]
417    fn test_detected_license_expression_spdx_prefers_covering_joined_expression() {
418        let mut internal = sample_internal_output();
419        internal.files[0].license_detections = vec![
420            crate::models::LicenseDetection {
421                license_expression: "apache-2.0 OR mit".to_string(),
422                license_expression_spdx: "Apache-2.0 OR MIT".to_string(),
423                matches: vec![],
424                detection_log: vec![],
425                identifier: None,
426            },
427            crate::models::LicenseDetection {
428                license_expression: "apache-2.0".to_string(),
429                license_expression_spdx: "Apache-2.0".to_string(),
430                matches: vec![],
431                detection_log: vec![],
432                identifier: None,
433            },
434        ];
435        internal.files[0].license_expression = None;
436
437        let schema_file = OutputFileInfo::from(&internal.files[0]);
438        let schema_value = serde_json::to_value(&schema_file).expect("file info serializes");
439        assert_eq!(
440            schema_value["detected_license_expression_spdx"],
441            "Apache-2.0 OR MIT"
442        );
443    }
444
445    #[test]
446    fn test_json_lines_writer_sorts_files_by_path_for_reproducibility() {
447        let mut internal = sample_internal_output();
448        internal.files.reverse();
449        let output = Output::from(&internal);
450        let mut bytes = Vec::new();
451        writer_for_format(OutputFormat::JsonLines)
452            .write(&output, &mut bytes, &OutputWriteConfig::default())
453            .expect("json-lines write should succeed");
454
455        let rendered = String::from_utf8(bytes).expect("json-lines should be utf-8");
456        let file_lines = rendered
457            .lines()
458            .filter_map(|line| {
459                let value: Value = serde_json::from_str(line).ok()?;
460                let files = value.get("files")?.as_array()?;
461                files.first()?.get("path")?.as_str().map(str::to_string)
462            })
463            .collect::<Vec<_>>();
464
465        let mut sorted = file_lines.clone();
466        sorted.sort();
467        assert_eq!(file_lines, sorted);
468    }
469
470    #[test]
471    fn test_spdx_tag_value_writer_contains_required_fields() {
472        let output = Output::from(&sample_internal_output());
473        let mut bytes = Vec::new();
474        writer_for_format(OutputFormat::SpdxTv)
475            .write(
476                &output,
477                &mut bytes,
478                &OutputWriteConfig {
479                    format: OutputFormat::SpdxTv,
480                    custom_template: None,
481                    scanned_path: Some("scan".to_string()),
482                },
483            )
484            .expect("spdx tv write should succeed");
485
486        let rendered = String::from_utf8(bytes).expect("spdx should be utf-8");
487        assert!(rendered.contains("SPDXVersion: SPDX-2.2"));
488        assert!(rendered.contains("FileName: ./src/main.rs"));
489    }
490
491    #[test]
492    fn test_spdx_rdf_writer_outputs_xml() {
493        let output = Output::from(&sample_internal_output());
494        let mut bytes = Vec::new();
495        writer_for_format(OutputFormat::SpdxRdf)
496            .write(
497                &output,
498                &mut bytes,
499                &OutputWriteConfig {
500                    format: OutputFormat::SpdxRdf,
501                    custom_template: None,
502                    scanned_path: Some("scan".to_string()),
503                },
504            )
505            .expect("spdx rdf write should succeed");
506
507        let rendered = String::from_utf8(bytes).expect("rdf should be utf-8");
508        assert!(rendered.contains("<rdf:RDF"));
509        assert!(rendered.contains("<spdx:SpdxDocument"));
510        assert!(rendered.contains("<spdx:created>2026-01-01T00:00:00Z</spdx:created>"));
511    }
512
513    #[test]
514    fn test_cyclonedx_writers_keep_iso_timestamps_when_headers_use_scancode_format() {
515        let mut internal = sample_internal_output();
516        internal.packages.push(Package::from_package_data(
517            &PackageData {
518                name: Some("demo".to_string()),
519                version: Some("1.0.0".to_string()),
520                ..PackageData::default()
521            },
522            "scan/package.json".to_string(),
523        ));
524        let output = Output::from(&internal);
525
526        let mut json_bytes = Vec::new();
527        writer_for_format(OutputFormat::CycloneDxJson)
528            .write(
529                &output,
530                &mut json_bytes,
531                &OutputWriteConfig {
532                    format: OutputFormat::CycloneDxJson,
533                    custom_template: None,
534                    scanned_path: Some("scan".to_string()),
535                },
536            )
537            .expect("cyclonedx json write should succeed");
538        let json_value: Value =
539            serde_json::from_slice(&json_bytes).expect("cyclonedx json should parse");
540        assert_eq!(
541            json_value["metadata"]["timestamp"].as_str(),
542            Some("2026-01-01T00:00:01Z")
543        );
544
545        let mut xml_bytes = Vec::new();
546        writer_for_format(OutputFormat::CycloneDxXml)
547            .write(
548                &output,
549                &mut xml_bytes,
550                &OutputWriteConfig {
551                    format: OutputFormat::CycloneDxXml,
552                    custom_template: None,
553                    scanned_path: Some("scan".to_string()),
554                },
555            )
556            .expect("cyclonedx xml write should succeed");
557        let xml = String::from_utf8(xml_bytes).expect("cyclonedx xml should be utf-8");
558        assert!(xml.contains("<timestamp>2026-01-01T00:00:01Z</timestamp>"));
559    }
560
561    #[test]
562    fn test_spdx_writers_emit_real_file_and_package_license_info() {
563        let output = Output::from(&sample_internal_output());
564
565        let mut tv_bytes = Vec::new();
566        writer_for_format(OutputFormat::SpdxTv)
567            .write(
568                &output,
569                &mut tv_bytes,
570                &OutputWriteConfig {
571                    format: OutputFormat::SpdxTv,
572                    custom_template: None,
573                    scanned_path: Some("scan".to_string()),
574                },
575            )
576            .expect("spdx tv write should succeed");
577        let tv_rendered = String::from_utf8(tv_bytes).expect("spdx tv should be utf-8");
578        assert!(tv_rendered.contains("PackageLicenseConcluded: NOASSERTION"));
579        assert!(tv_rendered.contains("PackageLicenseInfoFromFiles: MIT"));
580        assert!(tv_rendered.contains("LicenseConcluded: NOASSERTION"));
581        assert!(tv_rendered.contains("LicenseInfoInFile: MIT"));
582        assert!(tv_rendered.contains("PackageCopyrightText: Copyright (c) Example"));
583
584        let mut rdf_bytes = Vec::new();
585        writer_for_format(OutputFormat::SpdxRdf)
586            .write(
587                &output,
588                &mut rdf_bytes,
589                &OutputWriteConfig {
590                    format: OutputFormat::SpdxRdf,
591                    custom_template: None,
592                    scanned_path: Some("scan".to_string()),
593                },
594            )
595            .expect("spdx rdf write should succeed");
596        let rdf_rendered = String::from_utf8(rdf_bytes).expect("spdx rdf should be utf-8");
597        assert!(rdf_rendered.contains(
598            "<spdx:licenseInfoFromFiles rdf:resource=\"http://spdx.org/licenses/MIT\"/>"
599        ));
600        assert!(
601            rdf_rendered.contains(
602                "<spdx:licenseInfoInFile rdf:resource=\"http://spdx.org/licenses/MIT\"/>"
603            )
604        );
605        assert!(rdf_rendered.contains(
606            "<spdx:licenseConcluded rdf:resource=\"http://spdx.org/rdf/terms#noassertion\"/>"
607        ));
608    }
609
610    #[test]
611    fn test_spdx_writers_emit_license_ref_metadata_and_matched_text() {
612        let mut internal = sample_internal_output();
613        internal.files[0].license_detections = vec![LicenseDetection {
614            license_expression: "unknown-license-reference".to_string(),
615            license_expression_spdx: "LicenseRef-scancode-unknown-license-reference".to_string(),
616            matches: vec![Match {
617                license_expression: "unknown-license-reference".to_string(),
618                license_expression_spdx: "LicenseRef-scancode-unknown-license-reference"
619                    .to_string(),
620                from_file: Some("src/main.rs".to_string()),
621                start_line: LineNumber::ONE,
622                end_line: LineNumber::new(2).unwrap(),
623                matcher: Some("2-aho".to_string()),
624                score: MatchScore::MAX,
625                matched_length: Some(4),
626                match_coverage: Some(100.0),
627                rule_relevance: Some(100),
628                rule_identifier: Some("unknown-license-reference.RULE".to_string()),
629                rule_url: Some("https://example.com/unknown-license-reference.LICENSE".to_string()),
630                matched_text: Some("Custom license text".to_string()),
631                referenced_filenames: Some(vec!["LICENSE".to_string()]),
632                matched_text_diagnostics: None,
633            }],
634            detection_log: vec![],
635            identifier: Some("unknown-ref-id".to_string()),
636        }];
637        internal.license_references = vec![crate::models::LicenseReference {
638            key: Some("unknown-license-reference".to_string()),
639            language: Some("en".to_string()),
640            name: "Unknown License Reference".to_string(),
641            short_name: "Unknown License Reference".to_string(),
642            owner: None,
643            homepage_url: None,
644            spdx_license_key: "LicenseRef-scancode-unknown-license-reference".to_string(),
645            other_spdx_license_keys: vec![],
646            osi_license_key: None,
647            text_urls: vec![],
648            osi_url: None,
649            faq_url: None,
650            other_urls: vec![],
651            category: None,
652            is_exception: false,
653            is_unknown: true,
654            is_generic: false,
655            notes: None,
656            minimum_coverage: None,
657            standard_notice: None,
658            ignorable_copyrights: vec![],
659            ignorable_holders: vec![],
660            ignorable_authors: vec![],
661            ignorable_urls: vec![],
662            ignorable_emails: vec![],
663            scancode_url: None,
664            licensedb_url: None,
665            spdx_url: None,
666            text: "Unused fallback text".to_string(),
667        }];
668        let output = Output::from(&internal);
669
670        let mut tv_bytes = Vec::new();
671        writer_for_format(OutputFormat::SpdxTv)
672            .write(
673                &output,
674                &mut tv_bytes,
675                &OutputWriteConfig {
676                    format: OutputFormat::SpdxTv,
677                    custom_template: None,
678                    scanned_path: Some("scan".to_string()),
679                },
680            )
681            .expect("spdx tv write should succeed");
682        let tv_rendered = String::from_utf8(tv_bytes).expect("spdx tv should be utf-8");
683        assert!(
684            tv_rendered
685                .contains("LicenseInfoInFile: LicenseRef-scancode-unknown-license-reference")
686        );
687        assert!(tv_rendered.contains(
688            "PackageLicenseInfoFromFiles: LicenseRef-scancode-unknown-license-reference"
689        ));
690        assert!(tv_rendered.contains("LicenseID: LicenseRef-scancode-unknown-license-reference"));
691        assert!(tv_rendered.contains("ExtractedText: <text>Custom license text"));
692        assert!(tv_rendered.contains("LicenseName: Unknown License Reference"));
693        assert!(tv_rendered.contains(
694            "LicenseComment: <text>See details at https://example.com/unknown-license-reference.LICENSE"
695        ));
696
697        let mut rdf_bytes = Vec::new();
698        writer_for_format(OutputFormat::SpdxRdf)
699            .write(
700                &output,
701                &mut rdf_bytes,
702                &OutputWriteConfig {
703                    format: OutputFormat::SpdxRdf,
704                    custom_template: None,
705                    scanned_path: Some("scan".to_string()),
706                },
707            )
708            .expect("spdx rdf write should succeed");
709        let rdf_rendered = String::from_utf8(rdf_bytes).expect("spdx rdf should be utf-8");
710        assert!(rdf_rendered.contains(
711            "<spdx:licenseInfoInFile rdf:resource=\"http://spdx.org/licenses/LicenseRef-scancode-unknown-license-reference\"/>"
712        ));
713        assert!(rdf_rendered.contains(
714            "<spdx:hasExtractedLicensingInfo><spdx:ExtractedLicensingInfo rdf:about=\"#LicenseRef-scancode-unknown-license-reference\">"
715        ));
716        assert!(
717            rdf_rendered.contains("<spdx:extractedText>Custom license text</spdx:extractedText>")
718        );
719    }
720
721    #[test]
722    fn test_cyclonedx_json_writer_outputs_bom() {
723        let output = Output::from(&sample_internal_output());
724        let mut bytes = Vec::new();
725        writer_for_format(OutputFormat::CycloneDxJson)
726            .write(&output, &mut bytes, &OutputWriteConfig::default())
727            .expect("cyclonedx json write should succeed");
728
729        let rendered = String::from_utf8(bytes).expect("cyclonedx json should be utf-8");
730        let value: Value = serde_json::from_str(&rendered).expect("valid json");
731        assert_eq!(value["bomFormat"], "CycloneDX");
732        assert_eq!(value["specVersion"], "1.3");
733    }
734
735    #[test]
736    fn test_json_writer_includes_summary_and_key_file_flags() {
737        let mut internal = sample_internal_output();
738        internal.summary = Some(crate::models::Summary {
739            declared_license_expression: Some("apache-2.0".to_string()),
740            license_clarity_score: Some(crate::models::LicenseClarityScore {
741                score: 100,
742                declared_license: true,
743                identification_precision: true,
744                has_license_text: true,
745                declared_copyrights: true,
746                conflicting_license_categories: false,
747                ambiguous_compound_licensing: false,
748            }),
749            declared_holder: Some("Example Corp.".to_string()),
750            primary_language: Some("Ruby".to_string()),
751            other_license_expressions: vec![crate::models::TallyEntry {
752                value: Some("mit".to_string()),
753                count: 1,
754            }],
755            other_holders: vec![
756                crate::models::TallyEntry {
757                    value: None,
758                    count: 2,
759                },
760                crate::models::TallyEntry {
761                    value: Some("Other Corp.".to_string()),
762                    count: 1,
763                },
764            ],
765            other_languages: vec![crate::models::TallyEntry {
766                value: Some("Python".to_string()),
767                count: 2,
768            }],
769        });
770        internal.files[0].is_legal = true;
771        internal.files[0].is_top_level = true;
772        internal.files[0].is_key_file = true;
773        let output = Output::from(&internal);
774
775        let mut bytes = Vec::new();
776        writer_for_format(OutputFormat::Json)
777            .write(&output, &mut bytes, &OutputWriteConfig::default())
778            .expect("json write should succeed");
779
780        let rendered = String::from_utf8(bytes).expect("json should be utf-8");
781        let value: Value = serde_json::from_str(&rendered).expect("valid json");
782
783        assert_eq!(
784            value["summary"]["declared_license_expression"],
785            "apache-2.0"
786        );
787        assert_eq!(value["summary"]["license_clarity_score"]["score"], 100);
788        assert_eq!(value["summary"]["declared_holder"], "Example Corp.");
789        assert_eq!(value["summary"]["primary_language"], "Ruby");
790        assert_eq!(
791            value["summary"]["other_license_expressions"][0]["value"],
792            "mit"
793        );
794        assert!(value["summary"]["other_holders"][0]["value"].is_null());
795        assert_eq!(value["summary"]["other_holders"][1]["value"], "Other Corp.");
796        assert_eq!(value["summary"]["other_languages"][0]["value"], "Python");
797        assert_eq!(value["files"][0]["is_key_file"], true);
798    }
799
800    #[test]
801    fn test_json_and_json_lines_writers_include_top_level_tallies() {
802        let mut internal = sample_internal_output();
803        internal.tallies = Some(crate::models::Tallies {
804            detected_license_expression: vec![crate::models::TallyEntry {
805                value: Some("mit".to_string()),
806                count: 2,
807            }],
808            copyrights: vec![crate::models::TallyEntry {
809                value: Some("Copyright (c) Example Org".to_string()),
810                count: 1,
811            }],
812            holders: vec![crate::models::TallyEntry {
813                value: Some("Example Org".to_string()),
814                count: 1,
815            }],
816            authors: vec![crate::models::TallyEntry {
817                value: Some("Jane Doe".to_string()),
818                count: 1,
819            }],
820            programming_language: vec![crate::models::TallyEntry {
821                value: Some("Rust".to_string()),
822                count: 1,
823            }],
824        });
825        let output = Output::from(&internal);
826
827        let mut json_bytes = Vec::new();
828        writer_for_format(OutputFormat::Json)
829            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
830            .expect("json write should succeed");
831        let json_value: Value =
832            serde_json::from_slice(&json_bytes).expect("json output should parse");
833        assert_eq!(
834            json_value["tallies"]["detected_license_expression"][0]["value"],
835            "mit"
836        );
837        assert_eq!(
838            json_value["tallies"]["programming_language"][0]["value"],
839            "Rust"
840        );
841
842        let mut jsonl_bytes = Vec::new();
843        writer_for_format(OutputFormat::JsonLines)
844            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
845            .expect("json-lines write should succeed");
846        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
847        assert!(rendered.lines().any(|line| line.contains("\"tallies\"")));
848    }
849
850    #[test]
851    fn test_json_and_json_lines_writers_include_key_file_tallies() {
852        let mut internal = sample_internal_output();
853        internal.tallies_of_key_files = Some(crate::models::Tallies {
854            detected_license_expression: vec![crate::models::TallyEntry {
855                value: Some("apache-2.0".to_string()),
856                count: 1,
857            }],
858            copyrights: vec![],
859            holders: vec![],
860            authors: vec![],
861            programming_language: vec![crate::models::TallyEntry {
862                value: Some("Markdown".to_string()),
863                count: 1,
864            }],
865        });
866        let output = Output::from(&internal);
867
868        let mut json_bytes = Vec::new();
869        writer_for_format(OutputFormat::Json)
870            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
871            .expect("json write should succeed");
872        let json_value: Value =
873            serde_json::from_slice(&json_bytes).expect("json output should parse");
874        assert_eq!(
875            json_value["tallies_of_key_files"]["detected_license_expression"][0]["value"],
876            "apache-2.0"
877        );
878
879        let mut jsonl_bytes = Vec::new();
880        writer_for_format(OutputFormat::JsonLines)
881            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
882            .expect("json-lines write should succeed");
883        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
884        assert!(
885            rendered
886                .lines()
887                .any(|line| line.contains("\"tallies_of_key_files\""))
888        );
889    }
890
891    #[test]
892    fn test_json_and_json_lines_writers_include_file_tallies() {
893        let mut internal = sample_internal_output();
894        internal.files[0].tallies = Some(crate::models::Tallies {
895            detected_license_expression: vec![crate::models::TallyEntry {
896                value: Some("mit".to_string()),
897                count: 1,
898            }],
899            copyrights: vec![crate::models::TallyEntry {
900                value: None,
901                count: 1,
902            }],
903            holders: vec![],
904            authors: vec![],
905            programming_language: vec![crate::models::TallyEntry {
906                value: Some("Rust".to_string()),
907                count: 1,
908            }],
909        });
910        let output = Output::from(&internal);
911
912        let mut json_bytes = Vec::new();
913        writer_for_format(OutputFormat::Json)
914            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
915            .expect("json write should succeed");
916        let json_value: Value =
917            serde_json::from_slice(&json_bytes).expect("json output should parse");
918        assert_eq!(
919            json_value["files"][0]["tallies"]["detected_license_expression"][0]["value"],
920            "mit"
921        );
922
923        let mut jsonl_bytes = Vec::new();
924        writer_for_format(OutputFormat::JsonLines)
925            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
926            .expect("json-lines write should succeed");
927        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
928        assert!(rendered.lines().any(|line| line.contains("\"tallies\"")));
929    }
930
931    #[test]
932    fn test_json_and_json_lines_writers_include_facets_and_tallies_by_facet() {
933        let mut internal = sample_internal_output();
934        internal.files[0].facets = vec!["core".to_string(), "docs".to_string()];
935        internal.tallies_by_facet = Some(vec![crate::models::FacetTallies {
936            facet: "core".to_string(),
937            tallies: crate::models::Tallies {
938                detected_license_expression: vec![crate::models::TallyEntry {
939                    value: Some("mit".to_string()),
940                    count: 1,
941                }],
942                copyrights: vec![],
943                holders: vec![],
944                authors: vec![],
945                programming_language: vec![],
946            },
947        }]);
948        let output = Output::from(&internal);
949
950        let mut json_bytes = Vec::new();
951        writer_for_format(OutputFormat::Json)
952            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
953            .expect("json write should succeed");
954        let json_value: Value =
955            serde_json::from_slice(&json_bytes).expect("json output should parse");
956        assert_eq!(json_value["files"][0]["facets"][0], "core");
957        assert_eq!(json_value["tallies_by_facet"][0]["facet"], "core");
958
959        let mut jsonl_bytes = Vec::new();
960        writer_for_format(OutputFormat::JsonLines)
961            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
962            .expect("json-lines write should succeed");
963        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
964        assert!(
965            rendered
966                .lines()
967                .any(|line| line.contains("\"tallies_by_facet\""))
968        );
969    }
970
971    #[test]
972    fn test_json_and_json_lines_writers_include_top_level_license_references() {
973        let mut internal = sample_internal_output();
974        internal.license_references = vec![crate::models::LicenseReference {
975            key: Some("mit".to_string()),
976            language: Some("en".to_string()),
977            name: "MIT License".to_string(),
978            short_name: "MIT".to_string(),
979            owner: Some("Example Owner".to_string()),
980            homepage_url: Some("https://example.com/license".to_string()),
981            spdx_license_key: "MIT".to_string(),
982            other_spdx_license_keys: vec![],
983            osi_license_key: Some("MIT".to_string()),
984            text_urls: vec!["https://example.com/license.txt".to_string()],
985            osi_url: Some("https://opensource.org/licenses/MIT".to_string()),
986            faq_url: None,
987            other_urls: vec![],
988            category: None,
989            is_exception: false,
990            is_unknown: false,
991            is_generic: false,
992            notes: None,
993            minimum_coverage: None,
994            standard_notice: None,
995            ignorable_copyrights: vec![],
996            ignorable_holders: vec![],
997            ignorable_authors: vec![],
998            ignorable_urls: vec![],
999            ignorable_emails: vec![],
1000            scancode_url: None,
1001            licensedb_url: None,
1002            spdx_url: None,
1003            text: "MIT text".to_string(),
1004        }];
1005        internal.license_rule_references = vec![crate::models::LicenseRuleReference {
1006            identifier: "license-clue_1.RULE".to_string(),
1007            license_expression: "unknown-license-reference".to_string(),
1008            is_license_text: false,
1009            is_license_notice: false,
1010            is_license_reference: false,
1011            is_license_tag: false,
1012            is_license_clue: true,
1013            is_license_intro: false,
1014            language: None,
1015            rule_url: None,
1016            is_required_phrase: false,
1017            skip_for_required_phrase_generation: false,
1018            replaced_by: vec![],
1019            is_continuous: false,
1020            is_synthetic: false,
1021            is_from_license: false,
1022            length: 0,
1023            relevance: None,
1024            minimum_coverage: None,
1025            referenced_filenames: vec![],
1026            notes: None,
1027            ignorable_copyrights: vec![],
1028            ignorable_holders: vec![],
1029            ignorable_authors: vec![],
1030            ignorable_urls: vec![],
1031            ignorable_emails: vec![],
1032            text: None,
1033        }];
1034        let output = Output::from(&internal);
1035
1036        let mut json_bytes = Vec::new();
1037        writer_for_format(OutputFormat::Json)
1038            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
1039            .expect("json write should succeed");
1040        let json_value: Value =
1041            serde_json::from_slice(&json_bytes).expect("json output should parse");
1042        assert_eq!(
1043            json_value["license_references"][0]["spdx_license_key"],
1044            "MIT"
1045        );
1046        assert_eq!(json_value["license_references"][0]["key"], "mit");
1047        assert_eq!(json_value["license_references"][0]["language"], "en");
1048        assert_eq!(
1049            json_value["license_references"][0]["owner"],
1050            "Example Owner"
1051        );
1052        assert_eq!(
1053            json_value["license_references"][0]["homepage_url"],
1054            "https://example.com/license"
1055        );
1056        assert_eq!(
1057            json_value["license_references"][0]["osi_license_key"],
1058            "MIT"
1059        );
1060        assert_eq!(
1061            json_value["license_references"][0]["text_urls"][0],
1062            "https://example.com/license.txt"
1063        );
1064        assert_eq!(
1065            json_value["license_rule_references"][0]["identifier"],
1066            "license-clue_1.RULE"
1067        );
1068        assert_eq!(
1069            json_value["license_rule_references"][0]["relevance"],
1070            Value::Null
1071        );
1072        assert_eq!(
1073            json_value["license_rule_references"][0]["length"],
1074            Value::from(0)
1075        );
1076
1077        let mut jsonl_bytes = Vec::new();
1078        writer_for_format(OutputFormat::JsonLines)
1079            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
1080            .expect("json-lines write should succeed");
1081        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
1082        assert!(
1083            rendered
1084                .lines()
1085                .any(|line| line.contains("\"license_references\""))
1086        );
1087        assert!(
1088            rendered
1089                .lines()
1090                .any(|line| line.contains("\"license_rule_references\""))
1091        );
1092    }
1093
1094    #[test]
1095    fn test_json_and_json_lines_writers_include_top_level_license_detections() {
1096        let mut internal = sample_internal_output();
1097        internal.license_detections = vec![crate::models::TopLevelLicenseDetection {
1098            identifier: "mit-id".to_string(),
1099            license_expression: "mit".to_string(),
1100            license_expression_spdx: "MIT".to_string(),
1101            detection_count: 2,
1102            detection_log: vec![],
1103            reference_matches: vec![crate::models::Match {
1104                license_expression: "mit".to_string(),
1105                license_expression_spdx: "MIT".to_string(),
1106                from_file: Some("src/main.rs".to_string()),
1107                start_line: LineNumber::ONE,
1108                end_line: LineNumber::new(3).unwrap(),
1109                matcher: Some("1-hash".to_string()),
1110                score: MatchScore::MAX,
1111                matched_length: Some(10),
1112                match_coverage: Some(100.0),
1113                rule_relevance: Some(100),
1114                rule_identifier: Some("mit.LICENSE".to_string()),
1115                rule_url: None,
1116                matched_text: None,
1117                referenced_filenames: None,
1118                matched_text_diagnostics: None,
1119            }],
1120        }];
1121        let output = Output::from(&internal);
1122
1123        let mut json_bytes = Vec::new();
1124        writer_for_format(OutputFormat::Json)
1125            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
1126            .expect("json write should succeed");
1127        let json_value: Value =
1128            serde_json::from_slice(&json_bytes).expect("json output should parse");
1129        assert_eq!(json_value["license_detections"][0]["identifier"], "mit-id");
1130        assert_eq!(json_value["license_detections"][0]["detection_count"], 2);
1131
1132        let mut jsonl_bytes = Vec::new();
1133        writer_for_format(OutputFormat::JsonLines)
1134            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
1135            .expect("json-lines write should succeed");
1136        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
1137        assert!(
1138            rendered
1139                .lines()
1140                .any(|line| line.contains("\"license_detections\""))
1141        );
1142    }
1143
1144    #[test]
1145    fn test_json_and_json_lines_writers_keep_empty_top_level_license_detections() {
1146        let output = Output::from(&sample_internal_output());
1147
1148        let mut json_bytes = Vec::new();
1149        writer_for_format(OutputFormat::Json)
1150            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
1151            .expect("json write should succeed");
1152        let json_value: Value =
1153            serde_json::from_slice(&json_bytes).expect("json output should parse");
1154        assert_eq!(json_value["license_detections"], Value::Array(vec![]));
1155
1156        let mut jsonl_bytes = Vec::new();
1157        writer_for_format(OutputFormat::JsonLines)
1158            .write(&output, &mut jsonl_bytes, &OutputWriteConfig::default())
1159            .expect("json-lines write should succeed");
1160        let rendered = String::from_utf8(jsonl_bytes).expect("json-lines should be utf-8");
1161        assert!(
1162            rendered
1163                .lines()
1164                .any(|line| line == r#"{"license_detections":[]}"#)
1165        );
1166    }
1167
1168    #[test]
1169    fn test_public_writer_normalizes_empty_package_maps_without_changing_schema_output() {
1170        let mut internal = sample_internal_output();
1171        internal.packages.push(Package::from_package_data(
1172            &PackageData {
1173                package_type: Some(crate::models::PackageType::Npm),
1174                name: Some("demo".to_string()),
1175                version: Some("1.0.0".to_string()),
1176                ..PackageData::default()
1177            },
1178            "scan/package.json".to_string(),
1179        ));
1180
1181        let output = Output::from(&internal);
1182        let raw_schema = serde_json::to_value(&output).expect("schema output should serialize");
1183        assert_eq!(
1184            raw_schema["packages"][0]["qualifiers"],
1185            serde_json::json!({})
1186        );
1187        assert_eq!(
1188            raw_schema["packages"][0]["extra_data"],
1189            serde_json::json!({})
1190        );
1191
1192        let mut bytes = Vec::new();
1193        writer_for_format(OutputFormat::Json)
1194            .write(&output, &mut bytes, &OutputWriteConfig::default())
1195            .expect("json write should succeed");
1196        let public_value: Value = serde_json::from_slice(&bytes).expect("public json should parse");
1197
1198        assert!(public_value["packages"][0]["qualifiers"].is_null());
1199        assert!(public_value["packages"][0]["extra_data"].is_null());
1200    }
1201
1202    #[test]
1203    fn test_cyclonedx_xml_writer_outputs_xml() {
1204        let output = Output::from(&sample_internal_output());
1205        let mut bytes = Vec::new();
1206        writer_for_format(OutputFormat::CycloneDxXml)
1207            .write(&output, &mut bytes, &OutputWriteConfig::default())
1208            .expect("cyclonedx xml write should succeed");
1209
1210        let rendered = String::from_utf8(bytes).expect("cyclonedx xml should be utf-8");
1211        assert!(rendered.contains("<bom xmlns=\"http://cyclonedx.org/schema/bom/1.3\""));
1212        assert!(rendered.contains("<components>"));
1213    }
1214
1215    #[test]
1216    fn test_cyclonedx_json_includes_component_license_expression() {
1217        let mut internal = sample_internal_output();
1218        internal.packages = vec![crate::models::Package {
1219            package_type: Some(crate::models::PackageType::Maven),
1220            namespace: Some("example".to_string()),
1221            name: Some("gradle-project".to_string()),
1222            version: Some("1.0.0".to_string()),
1223            qualifiers: None,
1224            subpath: None,
1225            primary_language: Some("Java".to_string()),
1226            description: None,
1227            release_date: None,
1228            parties: vec![],
1229            keywords: vec![],
1230            homepage_url: None,
1231            download_url: None,
1232            size: None,
1233            sha1: None,
1234            md5: None,
1235            sha256: None,
1236            sha512: None,
1237            bug_tracking_url: None,
1238            code_view_url: None,
1239            vcs_url: None,
1240            copyright: None,
1241            holder: None,
1242            declared_license_expression: Some("Apache-2.0".to_string()),
1243            declared_license_expression_spdx: Some("Apache-2.0".to_string()),
1244            license_detections: vec![],
1245            other_license_expression: None,
1246            other_license_expression_spdx: None,
1247            other_license_detections: vec![],
1248            extracted_license_statement: Some("Apache-2.0".to_string()),
1249            notice_text: None,
1250            source_packages: vec![],
1251            is_private: false,
1252            is_virtual: false,
1253            extra_data: None,
1254            repository_homepage_url: None,
1255            repository_download_url: None,
1256            api_data_url: None,
1257            datasource_ids: vec![],
1258            purl: Some("pkg:maven/example/gradle-project@1.0.0".to_string()),
1259            package_uid: PackageUid::from_raw(
1260                "pkg:maven/example/gradle-project@1.0.0?uuid=test".to_string(),
1261            ),
1262            datafile_paths: vec![],
1263        }];
1264        let output = Output::from(&internal);
1265
1266        let mut bytes = Vec::new();
1267        writer_for_format(OutputFormat::CycloneDxJson)
1268            .write(&output, &mut bytes, &OutputWriteConfig::default())
1269            .expect("cyclonedx json write should succeed");
1270
1271        let rendered = String::from_utf8(bytes).expect("cyclonedx json should be utf-8");
1272        let value: Value = serde_json::from_str(&rendered).expect("valid json");
1273
1274        assert_eq!(
1275            value["components"][0]["licenses"][0]["expression"],
1276            "Apache-2.0"
1277        );
1278    }
1279
1280    #[test]
1281    fn test_cyclonedx_external_references_are_deduplicated() {
1282        let mut internal = sample_internal_output();
1283        internal.packages = vec![Package::from_package_data(
1284            &PackageData {
1285                package_type: Some(crate::models::PackageType::Npm),
1286                name: Some("demo".to_string()),
1287                version: Some("1.0.0".to_string()),
1288                download_url: Some("https://example.com/download.tgz".to_string()),
1289                repository_download_url: Some("https://example.com/download.tgz".to_string()),
1290                homepage_url: Some("https://example.com".to_string()),
1291                repository_homepage_url: Some("https://example.com".to_string()),
1292                ..PackageData::default()
1293            },
1294            "scan/package.json".to_string(),
1295        )];
1296        let output = Output::from(&internal);
1297
1298        let mut json_bytes = Vec::new();
1299        writer_for_format(OutputFormat::CycloneDxJson)
1300            .write(&output, &mut json_bytes, &OutputWriteConfig::default())
1301            .expect("cyclonedx json write should succeed");
1302        let value: Value = serde_json::from_slice(&json_bytes).expect("valid cyclonedx json");
1303        let refs = value["components"][0]["externalReferences"]
1304            .as_array()
1305            .expect("external references should be an array");
1306        assert_eq!(refs.len(), 2);
1307
1308        let mut xml_bytes = Vec::new();
1309        writer_for_format(OutputFormat::CycloneDxXml)
1310            .write(&output, &mut xml_bytes, &OutputWriteConfig::default())
1311            .expect("cyclonedx xml write should succeed");
1312        let xml = String::from_utf8(xml_bytes).expect("cyclonedx xml should be utf-8");
1313        assert_eq!(xml.matches("https://example.com/download.tgz").count(), 1);
1314        assert_eq!(xml.matches("https://example.com</url>").count(), 1);
1315    }
1316
1317    #[test]
1318    fn test_spdx_prefers_single_detected_package_name_over_scan_root() {
1319        let mut internal = sample_internal_output();
1320        internal.packages = vec![Package::from_package_data(
1321            &PackageData {
1322                package_type: Some(crate::models::PackageType::Npm),
1323                name: Some("detected-package".to_string()),
1324                version: Some("1.0.0".to_string()),
1325                ..PackageData::default()
1326            },
1327            "scan/package.json".to_string(),
1328        )];
1329        let output = Output::from(&internal);
1330
1331        let mut tv_bytes = Vec::new();
1332        writer_for_format(OutputFormat::SpdxTv)
1333            .write(
1334                &output,
1335                &mut tv_bytes,
1336                &OutputWriteConfig {
1337                    format: OutputFormat::SpdxTv,
1338                    custom_template: None,
1339                    scanned_path: Some("scan-root".to_string()),
1340                },
1341            )
1342            .expect("spdx tv write should succeed");
1343        let tv = String::from_utf8(tv_bytes).expect("spdx tv should be utf-8");
1344        assert!(tv.contains("PackageName: detected-package"));
1345        assert!(tv.contains("DocumentNamespace: http://spdx.org/spdxdocs/detected-package"));
1346
1347        let mut rdf_bytes = Vec::new();
1348        writer_for_format(OutputFormat::SpdxRdf)
1349            .write(
1350                &output,
1351                &mut rdf_bytes,
1352                &OutputWriteConfig {
1353                    format: OutputFormat::SpdxRdf,
1354                    custom_template: None,
1355                    scanned_path: Some("scan-root".to_string()),
1356                },
1357            )
1358            .expect("spdx rdf write should succeed");
1359        let rdf = String::from_utf8(rdf_bytes).expect("spdx rdf should be utf-8");
1360        assert!(rdf.contains("<spdx:name>detected-package</spdx:name>"));
1361    }
1362
1363    #[test]
1364    fn test_spdx_empty_scan_tag_value_matches_python_sentinel() {
1365        let output = Output {
1366            summary: None,
1367            tallies: None,
1368            tallies_of_key_files: None,
1369            tallies_by_facet: None,
1370            headers: vec![],
1371            packages: vec![],
1372            dependencies: vec![],
1373            license_detections: vec![],
1374            files: vec![],
1375            license_references: vec![],
1376            license_rule_references: vec![],
1377        };
1378        let mut bytes = Vec::new();
1379        writer_for_format(OutputFormat::SpdxTv)
1380            .write(
1381                &output,
1382                &mut bytes,
1383                &OutputWriteConfig {
1384                    format: OutputFormat::SpdxTv,
1385                    custom_template: None,
1386                    scanned_path: Some("scan".to_string()),
1387                },
1388            )
1389            .expect("spdx tv write should succeed");
1390
1391        let rendered = String::from_utf8(bytes).expect("spdx should be utf-8");
1392        assert_eq!(rendered, "# No results for package 'scan'.\n");
1393    }
1394
1395    #[test]
1396    fn test_spdx_empty_scan_rdf_matches_python_sentinel() {
1397        let output = Output {
1398            summary: None,
1399            tallies: None,
1400            tallies_of_key_files: None,
1401            tallies_by_facet: None,
1402            headers: vec![],
1403            packages: vec![],
1404            dependencies: vec![],
1405            license_detections: vec![],
1406            files: vec![],
1407            license_references: vec![],
1408            license_rule_references: vec![],
1409        };
1410        let mut bytes = Vec::new();
1411        writer_for_format(OutputFormat::SpdxRdf)
1412            .write(
1413                &output,
1414                &mut bytes,
1415                &OutputWriteConfig {
1416                    format: OutputFormat::SpdxRdf,
1417                    custom_template: None,
1418                    scanned_path: Some("scan".to_string()),
1419                },
1420            )
1421            .expect("spdx rdf write should succeed");
1422
1423        let rendered = String::from_utf8(bytes).expect("rdf should be utf-8");
1424        assert_eq!(rendered, "<!-- No results for package 'scan'. -->\n");
1425    }
1426
1427    #[test]
1428    fn test_html_writer_outputs_html_document() {
1429        let output = Output::from(&sample_internal_output());
1430        let mut bytes = Vec::new();
1431        writer_for_format(OutputFormat::Html)
1432            .write(&output, &mut bytes, &OutputWriteConfig::default())
1433            .expect("html write should succeed");
1434        let rendered = String::from_utf8(bytes).expect("html should be utf-8");
1435        assert!(rendered.contains("<!doctype html>"));
1436        assert!(rendered.contains("Provenant HTML Report"));
1437    }
1438
1439    #[test]
1440    fn test_custom_template_writer_renders_output_context() {
1441        let output = Output::from(&sample_internal_output());
1442        let temp_dir = tempfile::tempdir().expect("tempdir should be created");
1443        let template_path = temp_dir.path().join("template.tera");
1444        fs::write(
1445            &template_path,
1446            "version={{ output.headers[0].output_format_version }} files={{ files | length }}",
1447        )
1448        .expect("template should be written");
1449
1450        let mut bytes = Vec::new();
1451        writer_for_format(OutputFormat::CustomTemplate)
1452            .write(
1453                &output,
1454                &mut bytes,
1455                &OutputWriteConfig {
1456                    format: OutputFormat::CustomTemplate,
1457                    custom_template: Some(template_path.to_string_lossy().to_string()),
1458                    scanned_path: None,
1459                },
1460            )
1461            .expect("custom template write should succeed");
1462
1463        let rendered = String::from_utf8(bytes).expect("template output should be utf-8");
1464        assert!(rendered.contains("version=4.1.0"));
1465        assert!(rendered.contains("files=1"));
1466    }
1467
1468    fn sample_internal_output() -> crate::models::Output {
1469        crate::models::Output {
1470            summary: None,
1471            tallies: None,
1472            tallies_of_key_files: None,
1473            tallies_by_facet: None,
1474            headers: vec![Header {
1475                tool_name: "provenant".to_string(),
1476                tool_version: crate::version::BUILD_VERSION.to_string(),
1477                options: serde_json::Map::new(),
1478                notice: crate::models::HEADER_NOTICE.to_string(),
1479                start_timestamp: "2026-01-01T000000.000000".to_string(),
1480                end_timestamp: "2026-01-01T000001.000000".to_string(),
1481                output_format_version: "4.1.0".to_string(),
1482                duration: 1.0,
1483                errors: vec![],
1484                warnings: vec![],
1485                extra_data: ExtraData {
1486                    system_environment: SystemEnvironment {
1487                        operating_system: "darwin".to_string(),
1488                        cpu_architecture: "aarch64".to_string(),
1489                        platform: "darwin".to_string(),
1490                        platform_version: "26.3.1".to_string(),
1491                        rust_version: "1.93.0".to_string(),
1492                    },
1493                    spdx_license_list_version: "3.27".to_string(),
1494                    files_count: 1,
1495                    directories_count: 1,
1496                    excluded_count: 0,
1497                    license_index_provenance: Some(crate::models::LicenseIndexProvenance {
1498                        source: "embedded-artifact".to_string(),
1499                        dataset_fingerprint: "test-fingerprint".to_string(),
1500                        ignored_rules: vec![
1501                            "gpl-2.0_and-unknown-license-reference_1.RULE".to_string(),
1502                        ],
1503                        ignored_licenses: vec![],
1504                        ignored_rules_due_to_licenses: vec![],
1505                        added_rules: vec![],
1506                        replaced_rules: vec![],
1507                        added_licenses: vec![],
1508                        replaced_licenses: vec![],
1509                    }),
1510                },
1511            }],
1512            packages: vec![],
1513            dependencies: vec![],
1514            license_detections: vec![],
1515            files: vec![FileInfo::new(
1516                "main.rs".to_string(),
1517                "main".to_string(),
1518                "rs".to_string(),
1519                "src/main.rs".to_string(),
1520                FileType::File,
1521                Some("text/plain".to_string()),
1522                None,
1523                42,
1524                None,
1525                Some(Sha1Digest::from_hex("da39a3ee5e6b4b0d3255bfef95601890afd80709").unwrap()),
1526                Some(Md5Digest::from_hex("d41d8cd98f00b204e9800998ecf8427e").unwrap()),
1527                Some(
1528                    Sha256Digest::from_hex(
1529                        "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
1530                    )
1531                    .unwrap(),
1532                ),
1533                Some("Rust".to_string()),
1534                vec![PackageData::default()],
1535                None,
1536                vec![LicenseDetection {
1537                    license_expression: "mit".to_string(),
1538                    license_expression_spdx: "MIT".to_string(),
1539                    matches: vec![Match {
1540                        license_expression: "mit".to_string(),
1541                        license_expression_spdx: "MIT".to_string(),
1542                        from_file: None,
1543                        start_line: LineNumber::ONE,
1544                        end_line: LineNumber::ONE,
1545                        matcher: None,
1546                        score: MatchScore::MAX,
1547                        matched_length: None,
1548                        match_coverage: None,
1549                        rule_relevance: None,
1550                        rule_identifier: Some("mit_rule".to_string()),
1551                        rule_url: None,
1552                        matched_text: None,
1553                        referenced_filenames: None,
1554                        matched_text_diagnostics: None,
1555                    }],
1556                    detection_log: vec![],
1557                    identifier: None,
1558                }],
1559                vec![],
1560                vec![Copyright {
1561                    copyright: "Copyright (c) Example".to_string(),
1562                    start_line: LineNumber::ONE,
1563                    end_line: LineNumber::ONE,
1564                }],
1565                vec![Holder {
1566                    holder: "Example Org".to_string(),
1567                    start_line: LineNumber::ONE,
1568                    end_line: LineNumber::ONE,
1569                }],
1570                vec![Author {
1571                    author: "Jane Doe".to_string(),
1572                    start_line: LineNumber::ONE,
1573                    end_line: LineNumber::ONE,
1574                }],
1575                vec![OutputEmail {
1576                    email: "jane@example.com".to_string(),
1577                    start_line: LineNumber::ONE,
1578                    end_line: LineNumber::ONE,
1579                }],
1580                vec![OutputURL {
1581                    url: "https://example.com".to_string(),
1582                    start_line: LineNumber::ONE,
1583                    end_line: LineNumber::ONE,
1584                }],
1585                vec![],
1586                vec![],
1587            )],
1588            license_references: vec![],
1589            license_rule_references: vec![],
1590        }
1591    }
1592}