jscpd-rs 0.1.6

50x+ faster duplicate-code detector for CI/CD; jscpd-compatible CLI, SARIF, JSON, HTML reports
Documentation
use std::collections::HashMap;

use anyhow::Result;
use serde::Serialize;

use super::file_output::write_file_report;
use super::source::source_location;
use crate::cli::Options;
use crate::detector::DetectionResult;

pub(super) fn write(result: &DetectionResult, options: &Options) -> Result<()> {
    let sarif = SarifReport::from_detection(result, options);
    let json = serde_json::to_string(&sarif)?;
    write_file_report(options, "jscpd-sarif.json", "SARIF report", json)
}

#[derive(Serialize)]
struct SarifReport {
    #[serde(rename = "$schema")]
    schema: &'static str,
    version: &'static str,
    runs: Vec<SarifRun>,
}

#[derive(Serialize)]
struct SarifRun {
    tool: SarifTool,
    results: Vec<SarifResult>,
    artifacts: Vec<SarifArtifact>,
}

#[derive(Serialize)]
struct SarifTool {
    driver: SarifDriver,
}

#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifDriver {
    name: &'static str,
    rules: Vec<SarifRule>,
    version: &'static str,
    information_uri: &'static str,
}

#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifRule {
    id: &'static str,
    short_description: SarifMessage,
    help_uri: &'static str,
}

#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifResult {
    level: &'static str,
    message: SarifMessage,
    rule_id: &'static str,
    #[serde(skip_serializing_if = "Vec::is_empty")]
    locations: Vec<SarifLocation>,
    rule_index: usize,
}

#[derive(Serialize)]
struct SarifMessage {
    text: String,
}

#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifLocation {
    physical_location: SarifPhysicalLocation,
}

#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifPhysicalLocation {
    artifact_location: SarifArtifactLocation,
    region: SarifRegion,
}

#[derive(Serialize)]
struct SarifArtifactLocation {
    uri: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    index: Option<usize>,
}

#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifRegion {
    start_line: usize,
    start_column: usize,
    end_line: usize,
    end_column: usize,
}

#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifArtifact {
    source_language: String,
    location: SarifArtifactLocation,
}

impl SarifReport {
    fn from_detection(result: &DetectionResult, options: &Options) -> Self {
        const URL: &str = "https://github.com/kucherenko/jscpd/";

        let mut artifacts = Vec::new();
        let mut artifact_indices = HashMap::new();
        let mut results = Vec::new();

        for clone in &result.clones {
            let uri = clone.duplication_a.source_id.clone();
            let artifact_index = *artifact_indices.entry(uri.clone()).or_insert_with(|| {
                let index = artifacts.len();
                artifacts.push(SarifArtifact {
                    source_language: sarif_source_language(&clone.format),
                    location: SarifArtifactLocation {
                        uri: uri.clone(),
                        index: None,
                    },
                });
                index
            });

            results.push(SarifResult {
                level: "warning",
                message: SarifMessage {
                    text: format!(
                        "Clone detected in {}, - {}[{}] and {}[{}]",
                        clone.format,
                        clone.duplication_a.source_id,
                        source_location(&clone.duplication_a.start, &clone.duplication_a.end),
                        clone.duplication_b.source_id,
                        source_location(&clone.duplication_b.start, &clone.duplication_b.end),
                    ),
                },
                rule_id: "duplication",
                locations: vec![SarifLocation {
                    physical_location: SarifPhysicalLocation {
                        artifact_location: SarifArtifactLocation {
                            uri,
                            index: Some(artifact_index),
                        },
                        region: SarifRegion {
                            start_line: clone.duplication_a.start.line,
                            start_column: clone.duplication_a.start.column,
                            end_line: clone.duplication_a.end.line,
                            end_column: clone.duplication_a.end.column,
                        },
                    },
                }],
                rule_index: 0,
            });
        }

        if result.statistics.total.percentage >= options.threshold.unwrap_or(100.0) {
            results.push(SarifResult {
                level: "error",
                message: SarifMessage {
                    text: format!(
                        "The duplication level ({}%) is bigger than threshold ({}%)",
                        result.statistics.total.percentage,
                        options
                            .threshold
                            .map(|threshold| threshold.to_string())
                            .unwrap_or_else(|| "undefined".to_string()),
                    ),
                },
                rule_id: "duplications-threshold",
                locations: Vec::new(),
                rule_index: 1,
            });
        }

        Self {
            schema: "http://json.schemastore.org/sarif-2.1.0.json",
            version: "2.1.0",
            runs: vec![SarifRun {
                tool: SarifTool {
                    driver: SarifDriver {
                        name: "jscpd",
                        rules: vec![
                            SarifRule {
                                id: "duplication",
                                short_description: SarifMessage {
                                    text: "Found code duplication".to_string(),
                                },
                                help_uri: URL,
                            },
                            SarifRule {
                                id: "duplications-threshold",
                                short_description: SarifMessage {
                                    text: "Level of duplication is too high".to_string(),
                                },
                                help_uri: URL,
                            },
                        ],
                        version: "4.2.4",
                        information_uri: URL,
                    },
                },
                results,
                artifacts,
            }],
        }
    }
}

fn sarif_source_language(format: &str) -> String {
    match format {
        "javascript" => "JavaScript".to_string(),
        "typescript" => "TypeScript".to_string(),
        "jsx" => "JSX".to_string(),
        "tsx" => "TSX".to_string(),
        "css" => "CSS".to_string(),
        "html" | "markup" => "HTML".to_string(),
        "json" => "JSON".to_string(),
        "markdown" => "Markdown".to_string(),
        "rust" => "Rust".to_string(),
        "python" => "Python".to_string(),
        "ruby" => "Ruby".to_string(),
        "go" => "Go".to_string(),
        "java" => "Java".to_string(),
        "csharp" => "C#".to_string(),
        "cpp" => "C++".to_string(),
        "c" => "C".to_string(),
        other => {
            let mut chars = other.chars();
            let Some(first) = chars.next() else {
                return String::new();
            };
            format!("{}{}", first.to_uppercase(), chars.as_str())
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::report::test_support::{make_test_result_with_clone, write_test_report};

    #[test]
    fn sarif_report_matches_upstream_shape() {
        let result = make_test_result_with_clone("src/a.js", "src/b.js");
        let report = SarifReport::from_detection(&result, &Options::default());
        let json = serde_json::to_value(report).unwrap();

        assert_eq!(
            json["$schema"],
            "http://json.schemastore.org/sarif-2.1.0.json"
        );
        assert_eq!(json["version"], "2.1.0");
        assert_eq!(json["runs"][0]["tool"]["driver"]["name"], "jscpd");
        assert_eq!(
            json["runs"][0]["tool"]["driver"]["rules"][0]["id"],
            "duplication"
        );
        assert_eq!(
            json["runs"][0]["results"][0]["message"]["text"],
            "Clone detected in javascript, - src/a.js[2:3 - 5:1] and src/b.js[8:1 - 11:1]"
        );
        assert_eq!(
            json["runs"][0]["results"][0]["locations"][0]["physicalLocation"]["artifactLocation"]["index"],
            0
        );
        assert_eq!(
            json["runs"][0]["artifacts"][0]["sourceLanguage"],
            "JavaScript"
        );
    }

    #[test]
    fn sarif_report_includes_threshold_result_like_upstream() {
        let mut result = make_test_result_with_clone("src/a.js", "src/b.js");
        result.statistics.total.percentage = 25.0;
        let options = Options {
            threshold: Some(10.0),
            ..Options::default()
        };
        let report = SarifReport::from_detection(&result, &options);
        let json = serde_json::to_value(report).unwrap();

        assert_eq!(json["runs"][0]["results"][1]["level"], "error");
        assert_eq!(
            json["runs"][0]["results"][1]["message"]["text"],
            "The duplication level (25%) is bigger than threshold (10%)"
        );
        assert!(json["runs"][0]["results"][1]["locations"].is_null());
    }

    #[test]
    fn write_reports_writes_sarif_report() {
        let sarif = write_test_report("sarif", "sarif-report", &["jscpd-sarif.json"]);
        let json: serde_json::Value = serde_json::from_str(&sarif).unwrap();

        assert_eq!(json["version"], "2.1.0");
        assert_eq!(json["runs"][0]["results"][0]["ruleId"], "duplication");
    }
}