trusty-analyze 0.7.0

Sidecar code-analysis daemon for trusty-search: complexity, smells, quality, facts
Documentation
//! Roslyn SARIF parser and file-matching helpers.
//!
//! Why: SARIF parsing, percent-decoding, file-path matching, and severity
//! normalization are cohesive parsing concerns that are independently testable.
//! Keeping them in a dedicated submodule keeps `mod.rs` focused on the
//! `StaticTool` implementation and the compiler invocation.
//! What: exposes `parse_roslyn_sarif`, `roslyn_file_matches`, and related
//! helpers used by both `run()` (single-file filter) and `run_project()`
//! (multi-file filter).
//! Test: all helpers are covered by unit tests in this module.

use serde_json::Value;

use crate::core::tools::{Severity, ToolDiagnostic};

/// Parse a Roslyn/MSBuild SARIF 2.1 document into diagnostics.
///
/// Why: SARIF is the standard output format for Roslyn analyzers when using
/// `-p:ErrorLog`; this parser extracts the normalized findings.
/// What: reads `runs[0].results[]`, converts each result via
/// `roslyn_result_to_diag`, and collects the non-None results.
/// Test: `parse_roslyn_sarif_extracts_result` exercises this against a
/// captured real fixture string.
pub fn parse_roslyn_sarif(sarif: &str) -> Vec<ToolDiagnostic> {
    let root = match serde_json::from_str::<Value>(sarif.trim()) {
        Ok(v) => v,
        Err(_) => return Vec::new(),
    };
    let results = match root
        .get("runs")
        .and_then(Value::as_array)
        .and_then(|r| r.first())
        .and_then(|run| run.get("results"))
        .and_then(Value::as_array)
    {
        Some(r) => r,
        None => return Vec::new(),
    };
    results.iter().filter_map(roslyn_result_to_diag).collect()
}

/// Convert one SARIF result object into a `ToolDiagnostic`.
///
/// Why: centralises the field-extraction logic so `parse_roslyn_sarif` stays
/// readable and the normalization is independently testable.
/// What: extracts `ruleId`, `level`, `message.text`, and the first
/// `physicalLocation` (uri + region); normalizes the file:// URI.
/// Test: tested indirectly through `parse_roslyn_sarif` in the unit tests.
fn roslyn_result_to_diag(result: &Value) -> Option<ToolDiagnostic> {
    let code = result
        .get("ruleId")
        .and_then(Value::as_str)
        .map(str::to_string);
    let message = result
        .get("message")
        .and_then(|m| m.get("text"))
        .and_then(Value::as_str)
        .unwrap_or("")
        .to_string();
    let physical = result
        .get("locations")
        .and_then(Value::as_array)
        .and_then(|l| l.first())
        .and_then(|loc| loc.get("physicalLocation"))?;
    let raw_uri = physical
        .get("artifactLocation")
        .and_then(|a| a.get("uri"))
        .and_then(Value::as_str)
        .unwrap_or("");
    let file = strip_file_scheme(raw_uri);
    let region = physical.get("region");
    let line = region
        .and_then(|r| r.get("startLine"))
        .and_then(Value::as_u64)
        .unwrap_or(0) as u32;
    let col = region
        .and_then(|r| r.get("startColumn"))
        .and_then(Value::as_u64)
        .unwrap_or(0) as u32;
    let severity = severity_from_str(result.get("level").and_then(Value::as_str).unwrap_or(""));
    Some(ToolDiagnostic {
        tool: "roslyn".into(),
        file,
        line,
        col,
        severity,
        code,
        message,
    })
}

/// Strip a leading `file://` scheme and percent-decode a SARIF URI.
///
/// Why: Roslyn emits absolute, percent-encoded `file:///Users/.../My%20Proj/
/// File.cs` URIs, but downstream code matches against plain filesystem paths
/// that contain literal spaces (real .NET estates have paths like
/// `RestAPI Test Harness/`). Without decoding, those never match.
/// What: removes a `file://` prefix if present (leaving the third `/` as the
/// absolute-path root), then decodes `%XX` escapes.
/// Test: `strip_file_scheme_decodes_and_removes_prefix` in unit tests.
fn strip_file_scheme(uri: &str) -> String {
    let without_scheme = uri.strip_prefix("file://").unwrap_or(uri);
    percent_decode(without_scheme)
}

/// Decode `%XX` percent-escapes in a path string.
///
/// Why: SARIF `artifactLocation.uri` percent-encodes characters such as the
/// space in `My%20Project`; matching against on-disk paths needs the decoded
/// form.
/// What: replaces each `%` followed by two hex digits with the decoded byte;
/// a malformed or truncated escape is copied through verbatim.
/// Test: covered by `strip_file_scheme_decodes_and_removes_prefix`.
fn percent_decode(s: &str) -> String {
    let bytes = s.as_bytes();
    let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
    let mut i = 0;
    while i < bytes.len() {
        if bytes[i] == b'%' && i + 2 < bytes.len() {
            let hi = (bytes[i + 1] as char).to_digit(16);
            let lo = (bytes[i + 2] as char).to_digit(16);
            if let (Some(h), Some(l)) = (hi, lo) {
                out.push((h * 16 + l) as u8);
                i += 3;
                continue;
            }
        }
        out.push(bytes[i]);
        i += 1;
    }
    String::from_utf8_lossy(&out).into_owned()
}

/// Return true if `diag_file` (scheme-stripped, decoded) refers to `want`.
///
/// Why: Roslyn emits absolute paths while the caller may pass a relative or
/// absolute path; matching must be on a path-*component* suffix, not a raw
/// string suffix — otherwise `/abs/NotFoo.cs` would falsely match `Foo.cs`.
/// On Windows a SARIF `file:///C:\Users\...\Foo.cs` URI yields backslash
/// paths after `strip_file_scheme`, so all matching silently fails unless we
/// normalise both sides to forward slashes before comparing.
/// What: normalises `\` → `/` on both arguments, then checks direct equality
/// or `/`-anchored suffix on either side.
/// Test: `roslyn_file_matches_anchors_on_separator` and
/// `roslyn_file_matches_windows_backslash_paths` in unit tests.
pub fn roslyn_file_matches(diag_file: &str, want: &str) -> bool {
    let diag = diag_file.replace('\\', "/");
    let w = want.replace('\\', "/");
    diag == w || diag.ends_with(&format!("/{w}")) || w.ends_with(&format!("/{diag}"))
}

/// Map a SARIF level string to a `Severity`.
///
/// Why: SARIF uses free-form level strings; `Severity` is the normalized enum.
/// What: maps `"error"` → Error, `"warning"` → Warning,
/// `"note"`/`"info"` → Info, everything else → Hint.
/// Test: `severity_from_str_maps_correctly` in unit tests.
pub fn severity_from_str(s: &str) -> Severity {
    match s {
        "error" => Severity::Error,
        "warning" => Severity::Warning,
        "note" | "info" => Severity::Info,
        _ => Severity::Hint,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    const FIXTURE: &str = r#"{"version":"2.1.0","$schema":"http://json.schemastore.org/sarif-2.1.0","runs":[{"tool":{"driver":{"name":"Microsoft (R) Visual C# Compiler"}},"results":[{"ruleId":"CA1052","level":"warning","message":{"text":"Type 'Crypto' is a static holder type but is neither static nor NotInheritable"},"locations":[{"physicalLocation":{"artifactLocation":{"uri":"file:///Users/maui/dve/experiments/hotstats/HotStatsGeoAPI/HotStats.Crypto/Crypto.cs"},"region":{"startLine":15,"startColumn":18,"endLine":15,"endColumn":24}}}]}]}]}"#;

    #[test]
    fn parse_roslyn_sarif_extracts_result() {
        let diags = parse_roslyn_sarif(FIXTURE);
        assert_eq!(diags.len(), 1);
        let d = &diags[0];
        assert_eq!(d.code.as_deref(), Some("CA1052"));
        assert_eq!(d.severity, Severity::Warning);
        assert_eq!(d.line, 15);
        assert_eq!(d.col, 18);
        // URI normalization: must not start with "file://"
        assert!(!d.file.starts_with("file://"));
        assert!(d.file.ends_with("Crypto.cs"));
    }

    #[test]
    fn parse_roslyn_sarif_tolerates_garbage() {
        assert!(parse_roslyn_sarif("not json").is_empty());
        assert!(parse_roslyn_sarif("{}").is_empty());
    }

    #[test]
    fn parse_roslyn_sarif_tolerates_legacy_v1() {
        // SARIF v1 has a different shape — results live directly under the
        // top-level object, not nested under runs[].results.
        let v1 = r#"{"version":"1.0.0","results":[{"ruleId":"X","locations":[]}]}"#;
        // Must not panic; may return empty (no `runs` key).
        let diags = parse_roslyn_sarif(v1);
        assert!(diags.is_empty());
    }

    #[test]
    fn file_filtering_keeps_matching_drops_other() {
        // Two-result fixture: one for Crypto.cs, one for Other.cs.
        let two = r#"{"version":"2.1.0","runs":[{"results":[
            {"ruleId":"CA1","level":"warning","message":{"text":"m1"},"locations":[{"physicalLocation":{"artifactLocation":{"uri":"file:///abs/Crypto.cs"},"region":{"startLine":1,"startColumn":1}}}]},
            {"ruleId":"CA2","level":"error","message":{"text":"m2"},"locations":[{"physicalLocation":{"artifactLocation":{"uri":"file:///abs/Other.cs"},"region":{"startLine":2,"startColumn":3}}}]}
        ]}]}"#;
        let all = parse_roslyn_sarif(two);
        assert_eq!(all.len(), 2);

        // Simulate the filter in `run`: keep only the Crypto.cs result.
        let want = "/abs/Crypto.cs";
        let kept: Vec<_> = all
            .iter()
            .filter(|d| roslyn_file_matches(&d.file, want))
            .collect();
        assert_eq!(kept.len(), 1);
        assert_eq!(kept[0].code.as_deref(), Some("CA1"));

        let want2 = "/abs/Other.cs";
        let kept2: Vec<_> = all
            .iter()
            .filter(|d| roslyn_file_matches(&d.file, want2))
            .collect();
        assert_eq!(kept2.len(), 1);
        assert_eq!(kept2[0].code.as_deref(), Some("CA2"));
    }

    #[test]
    fn severity_from_str_maps_correctly() {
        assert_eq!(severity_from_str("error"), Severity::Error);
        assert_eq!(severity_from_str("warning"), Severity::Warning);
        assert_eq!(severity_from_str("note"), Severity::Info);
        assert_eq!(severity_from_str("info"), Severity::Info);
        assert_eq!(severity_from_str("none"), Severity::Hint);
        assert_eq!(severity_from_str(""), Severity::Hint);
    }

    #[test]
    fn strip_file_scheme_decodes_and_removes_prefix() {
        assert_eq!(
            strip_file_scheme("file:///Users/x/Foo.cs"),
            "/Users/x/Foo.cs"
        );
        // Percent-encoded spaces (real .NET estates: "RestAPI Test Harness/").
        assert_eq!(
            strip_file_scheme("file:///Users/x/My%20Proj/Foo.cs"),
            "/Users/x/My Proj/Foo.cs"
        );
        assert_eq!(strip_file_scheme("/already/plain.cs"), "/already/plain.cs");
        assert_eq!(strip_file_scheme("relative/path.cs"), "relative/path.cs");
        // Malformed escape is copied through verbatim, never panics.
        assert_eq!(strip_file_scheme("a%2zb.cs"), "a%2zb.cs");
    }

    #[test]
    fn roslyn_file_matches_anchors_on_separator() {
        // Exact match
        assert!(roslyn_file_matches("/a/b/Foo.cs", "/a/b/Foo.cs"));
        // Suffix match (diag_file is absolute, want is just the filename)
        assert!(roslyn_file_matches("/a/b/Foo.cs", "Foo.cs"));
        // Suffix match (want is absolute, diag_file is a suffix)
        assert!(roslyn_file_matches("b/Foo.cs", "/a/b/Foo.cs"));
        // No match: different file
        assert!(!roslyn_file_matches("/a/b/Foo.cs", "/a/b/Bar.cs"));
        // No false positive: must anchor on a path separator
        assert!(!roslyn_file_matches("/abs/NotFoo.cs", "Foo.cs"));
    }

    #[test]
    fn roslyn_file_matches_windows_backslash_paths() {
        // Why: on Windows, a SARIF `file:///C:\Users\x\src\Foo.cs` URI
        // yields a backslash path after strip_file_scheme; without
        // normalisation the forward-slash suffix anchors never fire and
        // all C# results are silently dropped on the primary C# platform.
        // What: both sides are normalised to forward slashes before comparing.
        // Test: assert that Windows-style backslash paths match the expected
        // file name and path suffixes, and do not false-positive on unrelated files.

        // Windows absolute path with backslashes matches just the filename.
        assert!(roslyn_file_matches("C:\\Users\\x\\src\\Foo.cs", "Foo.cs"));
        // Windows absolute path matches a forward-slash relative segment.
        assert!(roslyn_file_matches(
            "C:\\Users\\x\\src\\Foo.cs",
            "src/Foo.cs"
        ));
        // Mixed: want has backslashes, diag_file has forward slashes.
        assert!(roslyn_file_matches("/abs/src/Foo.cs", "src\\Foo.cs"));
        // No false positive across different filenames.
        assert!(!roslyn_file_matches("C:\\Users\\x\\src\\Foo.cs", "Bar.cs"));
        // No false positive on a name that is a suffix of another (NotFoo.cs vs Foo.cs).
        assert!(!roslyn_file_matches("C:\\Users\\x\\NotFoo.cs", "Foo.cs"));
    }
}