use crate::parser::SarifResult as ParseResult;
use crate::types::{Artifact, ReportingDescriptor, Result as SarifResult, Run, SarifLog};
use std::collections::{HashMap, HashSet};
use std::path::Path;
#[derive(Debug, Clone)]
pub struct SarifIndex {
pub rules: HashMap<String, ReportingDescriptor>,
pub artifacts: HashMap<String, (Artifact, usize)>,
pub results: HashMap<String, (SarifResult, ResultLocation)>,
pub rule_to_results: HashMap<String, Vec<String>>,
pub artifact_to_results: HashMap<String, Vec<String>>,
pub tool_to_runs: HashMap<String, Vec<usize>>,
pub stats: IndexStats,
pub metadata: IndexMetadata,
}
#[derive(Debug, Clone, PartialEq)]
pub struct ResultLocation {
pub run_index: usize,
pub result_index: usize,
pub guid: Option<String>,
pub rule_id: Option<String>,
pub primary_artifact_uri: Option<String>,
}
#[derive(Debug, Clone, Default)]
pub struct IndexStats {
pub run_count: usize,
pub result_count: usize,
pub rule_count: usize,
pub artifact_count: usize,
pub results_with_guids: usize,
pub results_with_rule_ids: usize,
pub results_with_locations: usize,
pub results_by_level: HashMap<String, usize>,
pub results_by_kind: HashMap<String, usize>,
}
#[derive(Debug, Clone)]
pub struct IndexMetadata {
pub version: String,
pub schema: Option<String>,
pub tools: Vec<String>,
pub indexed_at: String,
pub source_path: Option<String>,
}
#[derive(Debug, Clone, Default)]
pub struct SarifQuery {
pub rule_ids: Option<HashSet<String>>,
pub artifact_uris: Option<HashSet<String>>,
pub levels: Option<HashSet<String>>,
pub kinds: Option<HashSet<String>>,
pub tools: Option<HashSet<String>>,
pub run_indices: Option<HashSet<usize>>,
pub message_contains: Option<String>,
pub has_guid: Option<bool>,
pub has_fingerprints: Option<bool>,
pub limit: Option<usize>,
}
#[derive(Debug, Clone)]
pub struct QueryResults {
pub results: Vec<(SarifResult, ResultLocation)>,
pub total_matches: usize,
pub limited: bool,
pub execution_stats: QueryExecutionStats,
}
#[derive(Debug, Clone)]
pub struct QueryExecutionStats {
pub execution_time_ms: u64,
pub results_scanned: usize,
pub indices_used: Vec<String>,
}
impl SarifIndex {
pub fn new() -> Self {
Self {
rules: HashMap::new(),
artifacts: HashMap::new(),
results: HashMap::new(),
rule_to_results: HashMap::new(),
artifact_to_results: HashMap::new(),
tool_to_runs: HashMap::new(),
stats: IndexStats::default(),
metadata: IndexMetadata {
version: "2.1.0".to_string(),
schema: None,
tools: Vec::new(),
indexed_at: "2024-01-01T00:00:00Z".to_string(), source_path: None,
},
}
}
pub fn from_sarif_log(sarif_log: &SarifLog) -> Self {
let mut index = Self::new();
index.metadata.version = sarif_log.version.clone();
index.metadata.schema = sarif_log.schema.clone();
index.index_sarif_log(sarif_log);
index
}
pub fn from_file<P: AsRef<Path>>(path: P) -> ParseResult<Self> {
let content = std::fs::read_to_string(&path)?;
let sarif_log: SarifLog = crate::from_str(&content)?;
let mut index = Self::from_sarif_log(&sarif_log);
index.metadata.source_path = Some(path.as_ref().to_string_lossy().to_string());
Ok(index)
}
pub fn index_sarif_log(&mut self, sarif_log: &SarifLog) {
let start_time = std::time::Instant::now();
self.stats.run_count = sarif_log.runs.len();
for (run_index, run) in sarif_log.runs.iter().enumerate() {
self.index_run(run, run_index);
}
self.metadata.tools = self.tool_to_runs.keys().cloned().collect();
println!("Indexed SARIF log in {:?}", start_time.elapsed());
}
fn index_run(&mut self, run: &Run, run_index: usize) {
let tool_name = run.tool.driver.name.clone();
self.tool_to_runs
.entry(tool_name)
.or_default()
.push(run_index);
if let Some(ref rules) = run.tool.driver.rules {
for rule in rules {
self.rules.insert(rule.id.clone(), rule.clone());
self.stats.rule_count += 1;
}
}
if let Some(ref artifacts) = run.artifacts {
for (artifact_index, artifact) in artifacts.iter().enumerate() {
if let Some(ref location) = artifact.location
&& let Some(ref uri) = location.uri
{
self.artifacts
.insert(uri.clone(), (artifact.clone(), artifact_index));
self.stats.artifact_count += 1;
}
}
}
if let Some(ref results) = run.results {
for (result_index, result) in results.iter().enumerate() {
self.index_result(result, run_index, result_index);
}
}
}
fn index_result(&mut self, result: &SarifResult, run_index: usize, result_index: usize) {
let result_location = ResultLocation {
run_index,
result_index,
guid: result.guid.clone(),
rule_id: result.rule_id.clone(),
primary_artifact_uri: self.extract_primary_artifact_uri(result),
};
let result_key = result
.guid
.clone()
.unwrap_or_else(|| format!("{}:{}", run_index, result_index));
self.results.insert(
result_key.clone(),
(result.clone(), result_location.clone()),
);
if result.guid.is_some() {
self.stats.results_with_guids += 1;
}
if let Some(ref rule_id) = result.rule_id {
self.rule_to_results
.entry(rule_id.clone())
.or_default()
.push(result_key.clone());
self.stats.results_with_rule_ids += 1;
}
if let Some(ref artifact_uri) = result_location.primary_artifact_uri {
self.artifact_to_results
.entry(artifact_uri.clone())
.or_default()
.push(result_key);
}
self.stats.result_count += 1;
if result.locations.is_some() {
self.stats.results_with_locations += 1;
}
let level = result
.level
.as_ref()
.map(|l| format!("{:?}", l))
.unwrap_or_else(|| "None".to_string());
*self.stats.results_by_level.entry(level).or_insert(0) += 1;
let kind = result
.kind
.as_ref()
.map(|k| format!("{:?}", k))
.unwrap_or_else(|| "None".to_string());
*self.stats.results_by_kind.entry(kind).or_insert(0) += 1;
}
fn extract_primary_artifact_uri(&self, result: &SarifResult) -> Option<String> {
result
.locations
.as_ref()?
.first()?
.physical_location
.as_ref()?
.artifact_location
.as_ref()?
.uri
.clone()
}
pub fn query(&self, query: &SarifQuery) -> QueryResults {
let start_time = std::time::Instant::now();
let mut matching_results = Vec::new();
let mut results_scanned = 0;
let mut indices_used = Vec::new();
if let Some(ref rule_ids) = query.rule_ids {
indices_used.push("rule_to_results".to_string());
for rule_id in rule_ids {
if let Some(result_guids) = self.rule_to_results.get(rule_id) {
for guid in result_guids {
if let Some((result, location)) = self.results.get(guid) {
if self.matches_query(result, location, query) {
matching_results.push((result.clone(), location.clone()));
}
results_scanned += 1;
}
}
}
}
} else if let Some(ref artifact_uris) = query.artifact_uris {
indices_used.push("artifact_to_results".to_string());
for uri in artifact_uris {
if let Some(result_guids) = self.artifact_to_results.get(uri) {
for guid in result_guids {
if let Some((result, location)) = self.results.get(guid) {
if self.matches_query(result, location, query) {
matching_results.push((result.clone(), location.clone()));
}
results_scanned += 1;
}
}
}
}
} else {
for (result, location) in self.results.values() {
if self.matches_query(result, location, query) {
matching_results.push((result.clone(), location.clone()));
}
results_scanned += 1;
}
}
let total_matches = matching_results.len();
let limited = if let Some(limit) = query.limit {
if matching_results.len() > limit {
matching_results.truncate(limit);
true
} else {
false
}
} else {
false
};
let execution_time = start_time.elapsed();
QueryResults {
results: matching_results,
total_matches,
limited,
execution_stats: QueryExecutionStats {
execution_time_ms: execution_time.as_millis() as u64,
results_scanned,
indices_used,
},
}
}
fn matches_query(
&self,
result: &SarifResult,
location: &ResultLocation,
query: &SarifQuery,
) -> bool {
if let Some(ref rule_ids) = query.rule_ids {
if let Some(ref result_rule_id) = result.rule_id {
if !rule_ids.contains(result_rule_id) {
return false;
}
} else {
return false;
}
}
if let Some(ref artifact_uris) = query.artifact_uris {
if let Some(ref result_artifact_uri) = location.primary_artifact_uri {
if !artifact_uris.contains(result_artifact_uri) {
return false;
}
} else {
return false;
}
}
if let Some(ref levels) = query.levels {
let result_level = result
.level
.as_ref()
.map(|l| format!("{:?}", l))
.unwrap_or_else(|| "None".to_string());
if !levels.contains(&result_level) {
return false;
}
}
if let Some(ref kinds) = query.kinds {
let result_kind = result
.kind
.as_ref()
.map(|k| format!("{:?}", k))
.unwrap_or_else(|| "None".to_string());
if !kinds.contains(&result_kind) {
return false;
}
}
if let Some(ref run_indices) = query.run_indices
&& !run_indices.contains(&location.run_index)
{
return false;
}
if let Some(has_guid) = query.has_guid
&& has_guid != result.guid.is_some()
{
return false;
}
if let Some(has_fingerprints) = query.has_fingerprints {
let result_has_fingerprints =
result.fingerprints.is_some() || result.partial_fingerprints.is_some();
if has_fingerprints != result_has_fingerprints {
return false;
}
}
if let Some(ref message_contains) = query.message_contains {
if let Some(ref text) = result.message.text
&& !text.to_lowercase().contains(&message_contains.to_lowercase())
{
return false;
} else if result.message.text.is_none() {
return false;
}
}
true
}
pub fn get_results_for_rule(&self, rule_id: &str) -> Vec<(SarifResult, ResultLocation)> {
let mut query = SarifQuery::default();
query.rule_ids = Some([rule_id.to_string()].into_iter().collect());
self.query(&query).results
}
pub fn get_results_for_artifact(
&self,
artifact_uri: &str,
) -> Vec<(SarifResult, ResultLocation)> {
let mut query = SarifQuery::default();
query.artifact_uris = Some([artifact_uri.to_string()].into_iter().collect());
self.query(&query).results
}
pub fn get_rule_ids(&self) -> Vec<String> {
self.rules.keys().cloned().collect()
}
pub fn get_artifact_uris(&self) -> Vec<String> {
self.artifacts.keys().cloned().collect()
}
pub fn get_stats(&self) -> &IndexStats {
&self.stats
}
pub fn get_metadata(&self) -> &IndexMetadata {
&self.metadata
}
pub fn find_artifact_by_uri(&self, uri: &str) -> Option<usize> {
self.artifacts.get(uri).map(|(_, index)| *index)
}
pub fn find_rule_by_id(&self, rule_id: &str) -> Option<usize> {
self.rules.keys().position(|id| id == rule_id)
}
pub fn find_result_by_guid(&self, guid: &str) -> Option<usize> {
self.results
.get(guid)
.map(|(_, location)| location.result_index)
}
}
impl SarifQuery {
pub fn new() -> Self {
Self::default()
}
pub fn with_rule_id(mut self, rule_id: impl Into<String>) -> Self {
self.rule_ids = Some([rule_id.into()].into_iter().collect());
self
}
pub fn with_rule_ids(mut self, rule_ids: impl IntoIterator<Item = String>) -> Self {
self.rule_ids = Some(rule_ids.into_iter().collect());
self
}
pub fn with_artifact_uri(mut self, uri: impl Into<String>) -> Self {
self.artifact_uris = Some([uri.into()].into_iter().collect());
self
}
pub fn with_level(mut self, level: impl Into<String>) -> Self {
self.levels = Some([level.into()].into_iter().collect());
self
}
pub fn with_tool(mut self, tool: impl Into<String>) -> Self {
self.tools = Some([tool.into()].into_iter().collect());
self
}
pub fn with_message_containing(mut self, text: impl Into<String>) -> Self {
self.message_contains = Some(text.into());
self
}
pub fn with_limit(mut self, limit: usize) -> Self {
self.limit = Some(limit);
self
}
pub fn with_guid_presence(mut self, has_guid: bool) -> Self {
self.has_guid = Some(has_guid);
self
}
}
impl Default for SarifIndex {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::builder::SarifLogBuilder;
#[test]
fn test_index_creation() {
let index = SarifIndex::new();
assert_eq!(index.stats.run_count, 0);
assert_eq!(index.stats.result_count, 0);
assert!(index.rules.is_empty());
assert!(index.artifacts.is_empty());
}
#[test]
fn test_index_from_sarif_log() {
let sarif = SarifLogBuilder::single_error("test-tool", "Test error message", "test.rs", 42)
.build_unchecked();
let index = SarifIndex::from_sarif_log(&sarif);
assert_eq!(index.stats.run_count, 1);
assert_eq!(index.stats.result_count, 1);
assert_eq!(index.metadata.version, "2.1.0");
assert!(index.tool_to_runs.contains_key("test-tool"));
}
#[test]
fn test_query_by_rule_id() {
let sarif = SarifLogBuilder::error_finding(
"eslint",
"no-unused-vars",
"Variable 'x' is assigned but never used",
"src/app.js",
15,
5,
15,
6,
)
.build_unchecked();
let index = SarifIndex::from_sarif_log(&sarif);
let query = SarifQuery::new().with_rule_id("no-unused-vars");
let results = index.query(&query);
assert_eq!(results.results.len(), 1);
assert_eq!(results.total_matches, 1);
assert!(!results.limited);
}
#[test]
fn test_get_results_for_rule() {
let sarif = SarifLogBuilder::error_finding(
"clippy",
"unused_variable",
"unused variable: `x`",
"src/main.rs",
10,
5,
10,
6,
)
.build_unchecked();
let index = SarifIndex::from_sarif_log(&sarif);
let results = index.get_results_for_rule("unused_variable");
assert_eq!(results.len(), 1);
assert_eq!(results[0].1.rule_id, Some("unused_variable".to_string()));
}
#[test]
fn test_statistics() {
let sarif = SarifLogBuilder::error_finding(
"tool",
"RULE001",
"Error message",
"file.rs",
1,
1,
1,
10,
)
.build_unchecked();
let index = SarifIndex::from_sarif_log(&sarif);
let stats = index.get_stats();
assert_eq!(stats.run_count, 1);
assert_eq!(stats.result_count, 1);
assert_eq!(stats.results_by_level.get("Error"), Some(&1));
}
#[test]
fn test_legacy_compatibility() {
let sarif = SarifLogBuilder::error_finding(
"test-tool",
"TEST001",
"Test error message",
"test.rs",
42,
1,
42,
10,
)
.build_unchecked();
let index = SarifIndex::from_sarif_log(&sarif);
let results = index.get_results_for_artifact("test.rs");
assert_eq!(results.len(), 1);
let rule_results = index.get_results_for_rule("TEST001");
assert_eq!(rule_results.len(), 1);
assert_eq!(index.stats.result_count, 1);
assert_eq!(index.stats.results_with_rule_ids, 1);
}
}