Skip to main content

rma_common/
lib.rs

1//! Common types and utilities for Rust Monorepo Analyzer (RMA)
2//!
3//! This crate provides shared data structures, error types, and utilities
4//! used across all RMA components.
5
6pub mod config;
7pub mod suppression;
8
9pub use config::{
10    AllowConfig, AllowType, Baseline, BaselineConfig, BaselineEntry, BaselineMode,
11    CURRENT_CONFIG_VERSION, ConfigLoadResult, ConfigSource, ConfigWarning,
12    DEFAULT_EXAMPLE_IGNORE_PATHS, DEFAULT_TEST_IGNORE_PATHS, DEFAULT_VENDOR_IGNORE_PATHS,
13    EffectiveConfig, Fingerprint, GosecProviderConfig, InlineSuppression, OsvEcosystem,
14    OsvProviderConfig, OxcProviderConfig, OxlintProviderConfig, PmdProviderConfig, Profile,
15    ProfileThresholds, ProfilesConfig, ProviderType, ProvidersConfig, RULES_ALWAYS_ENABLED,
16    RmaTomlConfig, RulesConfig, RulesetsConfig, ScanConfig, SuppressionConfig, SuppressionEngine,
17    SuppressionResult, SuppressionSource, SuppressionType, ThresholdOverride, WarningLevel,
18    parse_expiration_days, parse_inline_suppressions,
19};
20
21use serde::{Deserialize, Serialize};
22use std::path::PathBuf;
23use thiserror::Error;
24
25/// Core error types for RMA operations
26#[derive(Error, Debug)]
27pub enum RmaError {
28    #[error("IO error: {0}")]
29    Io(#[from] std::io::Error),
30
31    #[error("Parse error in {file}: {message}")]
32    Parse { file: PathBuf, message: String },
33
34    #[error("Analysis error: {0}")]
35    Analysis(String),
36
37    #[error("Index error: {0}")]
38    Index(String),
39
40    #[error("Unsupported language: {0}")]
41    UnsupportedLanguage(String),
42
43    #[error("Configuration error: {0}")]
44    Config(String),
45}
46
47/// Supported programming languages (30+ tree-sitter grammars)
48#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
49#[serde(rename_all = "lowercase")]
50pub enum Language {
51    // Systems languages
52    Rust,
53    C,
54    Cpp,
55    Zig,
56
57    // JVM languages
58    Java,
59    Kotlin,
60    Scala,
61
62    // Web languages
63    JavaScript,
64    TypeScript,
65    Html,
66    Css,
67    Scss,
68    Vue,
69    Svelte,
70
71    // Scripting languages
72    Python,
73    Ruby,
74    Php,
75    Lua,
76    Perl,
77
78    // Functional languages
79    Haskell,
80    OCaml,
81    Elixir,
82    Erlang,
83
84    // Other compiled languages
85    Go,
86    Swift,
87    CSharp,
88    Dart,
89
90    // Data/Config languages
91    Json,
92    Yaml,
93    Toml,
94    Sql,
95    GraphQL,
96
97    // Infrastructure
98    Bash,
99    Dockerfile,
100    Hcl, // Terraform
101    Nix,
102
103    // Markup
104    Markdown,
105    Latex,
106
107    // Other
108    Solidity, // Smart contracts
109    Wasm,     // WebAssembly text format
110    Protobuf,
111
112    Unknown,
113}
114
115impl Language {
116    /// Detect language from file extension
117    #[inline]
118    pub fn from_extension(ext: &str) -> Self {
119        match ext.to_lowercase().as_str() {
120            // Systems
121            "rs" => Language::Rust,
122            "c" | "h" => Language::C,
123            "cc" | "cpp" | "cxx" | "hpp" | "hxx" | "hh" => Language::Cpp,
124            "zig" => Language::Zig,
125
126            // JVM
127            "java" => Language::Java,
128            "kt" | "kts" => Language::Kotlin,
129            "scala" | "sc" => Language::Scala,
130
131            // Web
132            "js" | "mjs" | "cjs" | "jsx" => Language::JavaScript,
133            "ts" | "tsx" | "mts" | "cts" => Language::TypeScript,
134            "html" | "htm" => Language::Html,
135            "css" => Language::Css,
136            "scss" | "sass" => Language::Scss,
137            "vue" => Language::Vue,
138            "svelte" => Language::Svelte,
139
140            // Scripting
141            "py" | "pyi" | "pyw" => Language::Python,
142            "rb" | "erb" | "rake" | "gemspec" => Language::Ruby,
143            "php" | "phtml" | "php3" | "php4" | "php5" | "phps" => Language::Php,
144            "lua" => Language::Lua,
145            "pl" | "pm" | "t" => Language::Perl,
146
147            // Functional
148            "hs" | "lhs" => Language::Haskell,
149            "ml" | "mli" => Language::OCaml,
150            "ex" | "exs" => Language::Elixir,
151            "erl" | "hrl" => Language::Erlang,
152
153            // Other compiled
154            "go" => Language::Go,
155            "swift" => Language::Swift,
156            "cs" | "csx" => Language::CSharp,
157            "dart" => Language::Dart,
158
159            // Data/Config
160            "json" | "jsonc" | "json5" => Language::Json,
161            "yaml" | "yml" => Language::Yaml,
162            "toml" => Language::Toml,
163            "sql" | "mysql" | "pgsql" | "plsql" => Language::Sql,
164            "graphql" | "gql" => Language::GraphQL,
165
166            // Infrastructure
167            "sh" | "bash" | "zsh" | "fish" => Language::Bash,
168            "dockerfile" => Language::Dockerfile,
169            "tf" | "tfvars" | "hcl" => Language::Hcl,
170            "nix" => Language::Nix,
171
172            // Markup
173            "md" | "markdown" | "mdx" => Language::Markdown,
174            "tex" | "latex" | "sty" | "cls" => Language::Latex,
175
176            // Other
177            "sol" => Language::Solidity,
178            "wat" | "wast" => Language::Wasm,
179            "proto" | "proto3" => Language::Protobuf,
180
181            _ => Language::Unknown,
182        }
183    }
184
185    /// Get file extensions for this language
186    #[inline]
187    pub fn extensions(&self) -> &'static [&'static str] {
188        match self {
189            Language::Rust => &["rs"],
190            Language::C => &["c", "h"],
191            Language::Cpp => &["cc", "cpp", "cxx", "hpp", "hxx", "hh"],
192            Language::Zig => &["zig"],
193            Language::Java => &["java"],
194            Language::Kotlin => &["kt", "kts"],
195            Language::Scala => &["scala", "sc"],
196            Language::JavaScript => &["js", "mjs", "cjs", "jsx"],
197            Language::TypeScript => &["ts", "tsx", "mts", "cts"],
198            Language::Html => &["html", "htm"],
199            Language::Css => &["css"],
200            Language::Scss => &["scss", "sass"],
201            Language::Vue => &["vue"],
202            Language::Svelte => &["svelte"],
203            Language::Python => &["py", "pyi", "pyw"],
204            Language::Ruby => &["rb", "erb", "rake", "gemspec"],
205            Language::Php => &["php", "phtml"],
206            Language::Lua => &["lua"],
207            Language::Perl => &["pl", "pm", "t"],
208            Language::Haskell => &["hs", "lhs"],
209            Language::OCaml => &["ml", "mli"],
210            Language::Elixir => &["ex", "exs"],
211            Language::Erlang => &["erl", "hrl"],
212            Language::Go => &["go"],
213            Language::Swift => &["swift"],
214            Language::CSharp => &["cs", "csx"],
215            Language::Dart => &["dart"],
216            Language::Json => &["json", "jsonc", "json5"],
217            Language::Yaml => &["yaml", "yml"],
218            Language::Toml => &["toml"],
219            Language::Sql => &["sql", "mysql", "pgsql"],
220            Language::GraphQL => &["graphql", "gql"],
221            Language::Bash => &["sh", "bash", "zsh", "fish"],
222            Language::Dockerfile => &["dockerfile"],
223            Language::Hcl => &["tf", "tfvars", "hcl"],
224            Language::Nix => &["nix"],
225            Language::Markdown => &["md", "markdown", "mdx"],
226            Language::Latex => &["tex", "latex", "sty", "cls"],
227            Language::Solidity => &["sol"],
228            Language::Wasm => &["wat", "wast"],
229            Language::Protobuf => &["proto", "proto3"],
230            Language::Unknown => &[],
231        }
232    }
233
234    /// Check if this language is a systems language (for memory safety analysis)
235    #[inline]
236    pub fn is_systems_language(&self) -> bool {
237        matches!(
238            self,
239            Language::Rust | Language::C | Language::Cpp | Language::Zig
240        )
241    }
242
243    /// Check if this language is a scripting language
244    #[inline]
245    pub fn is_scripting_language(&self) -> bool {
246        matches!(
247            self,
248            Language::JavaScript
249                | Language::TypeScript
250                | Language::Python
251                | Language::Ruby
252                | Language::Php
253                | Language::Lua
254                | Language::Perl
255        )
256    }
257
258    /// Check if this language is a JVM language
259    #[inline]
260    pub fn is_jvm_language(&self) -> bool {
261        matches!(self, Language::Java | Language::Kotlin | Language::Scala)
262    }
263
264    /// Check if this language is a functional language
265    #[inline]
266    pub fn is_functional_language(&self) -> bool {
267        matches!(
268            self,
269            Language::Haskell | Language::OCaml | Language::Elixir | Language::Erlang
270        )
271    }
272
273    /// Check if this language is a data/config language
274    #[inline]
275    pub fn is_data_language(&self) -> bool {
276        matches!(
277            self,
278            Language::Json | Language::Yaml | Language::Toml | Language::Sql | Language::GraphQL
279        )
280    }
281
282    /// Check if this language supports security scanning (has security-relevant constructs)
283    #[inline]
284    pub fn supports_security_scanning(&self) -> bool {
285        !matches!(
286            self,
287            Language::Unknown | Language::Markdown | Language::Latex | Language::Wasm
288        )
289    }
290}
291
292impl std::fmt::Display for Language {
293    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
294        match self {
295            Language::Rust => write!(f, "rust"),
296            Language::C => write!(f, "c"),
297            Language::Cpp => write!(f, "cpp"),
298            Language::Zig => write!(f, "zig"),
299            Language::Java => write!(f, "java"),
300            Language::Kotlin => write!(f, "kotlin"),
301            Language::Scala => write!(f, "scala"),
302            Language::JavaScript => write!(f, "javascript"),
303            Language::TypeScript => write!(f, "typescript"),
304            Language::Html => write!(f, "html"),
305            Language::Css => write!(f, "css"),
306            Language::Scss => write!(f, "scss"),
307            Language::Vue => write!(f, "vue"),
308            Language::Svelte => write!(f, "svelte"),
309            Language::Python => write!(f, "python"),
310            Language::Ruby => write!(f, "ruby"),
311            Language::Php => write!(f, "php"),
312            Language::Lua => write!(f, "lua"),
313            Language::Perl => write!(f, "perl"),
314            Language::Haskell => write!(f, "haskell"),
315            Language::OCaml => write!(f, "ocaml"),
316            Language::Elixir => write!(f, "elixir"),
317            Language::Erlang => write!(f, "erlang"),
318            Language::Go => write!(f, "go"),
319            Language::Swift => write!(f, "swift"),
320            Language::CSharp => write!(f, "csharp"),
321            Language::Dart => write!(f, "dart"),
322            Language::Json => write!(f, "json"),
323            Language::Yaml => write!(f, "yaml"),
324            Language::Toml => write!(f, "toml"),
325            Language::Sql => write!(f, "sql"),
326            Language::GraphQL => write!(f, "graphql"),
327            Language::Bash => write!(f, "bash"),
328            Language::Dockerfile => write!(f, "dockerfile"),
329            Language::Hcl => write!(f, "hcl"),
330            Language::Nix => write!(f, "nix"),
331            Language::Markdown => write!(f, "markdown"),
332            Language::Latex => write!(f, "latex"),
333            Language::Solidity => write!(f, "solidity"),
334            Language::Wasm => write!(f, "wasm"),
335            Language::Protobuf => write!(f, "protobuf"),
336            Language::Unknown => write!(f, "unknown"),
337        }
338    }
339}
340
341/// Severity levels for findings
342#[derive(
343    Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Default,
344)]
345#[serde(rename_all = "lowercase")]
346pub enum Severity {
347    Info,
348    #[default]
349    Warning,
350    Error,
351    Critical,
352}
353
354impl std::fmt::Display for Severity {
355    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
356        match self {
357            Severity::Info => write!(f, "info"),
358            Severity::Warning => write!(f, "warning"),
359            Severity::Error => write!(f, "error"),
360            Severity::Critical => write!(f, "critical"),
361        }
362    }
363}
364
365/// Confidence level for findings (how certain we are this is a real issue)
366#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
367#[serde(rename_all = "lowercase")]
368pub enum Confidence {
369    /// Low confidence - may be a false positive, requires manual review
370    Low,
371    /// Medium confidence - likely an issue but context-dependent
372    #[default]
373    Medium,
374    /// High confidence - almost certainly a real issue
375    High,
376}
377
378impl std::fmt::Display for Confidence {
379    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
380        match self {
381            Confidence::Low => write!(f, "low"),
382            Confidence::Medium => write!(f, "medium"),
383            Confidence::High => write!(f, "high"),
384        }
385    }
386}
387
388/// Category of finding
389#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
390#[serde(rename_all = "lowercase")]
391pub enum FindingCategory {
392    /// Security vulnerabilities
393    #[default]
394    Security,
395    /// Code quality and maintainability
396    Quality,
397    /// Performance issues
398    Performance,
399    /// Style and formatting
400    Style,
401}
402
403impl std::fmt::Display for FindingCategory {
404    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
405        match self {
406            FindingCategory::Security => write!(f, "security"),
407            FindingCategory::Quality => write!(f, "quality"),
408            FindingCategory::Performance => write!(f, "performance"),
409            FindingCategory::Style => write!(f, "style"),
410        }
411    }
412}
413
414/// A source code location
415#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
416pub struct SourceLocation {
417    pub file: PathBuf,
418    pub start_line: usize,
419    pub start_column: usize,
420    pub end_line: usize,
421    pub end_column: usize,
422}
423
424impl SourceLocation {
425    pub fn new(
426        file: PathBuf,
427        start_line: usize,
428        start_column: usize,
429        end_line: usize,
430        end_column: usize,
431    ) -> Self {
432        Self {
433            file,
434            start_line,
435            start_column,
436            end_line,
437            end_column,
438        }
439    }
440}
441
442impl std::fmt::Display for SourceLocation {
443    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
444        write!(
445            f,
446            "{}:{}:{}-{}:{}",
447            self.file.display(),
448            self.start_line,
449            self.start_column,
450            self.end_line,
451            self.end_column
452        )
453    }
454}
455
456/// A suggested fix for a finding with precise byte offsets for auto-fix.
457#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
458pub struct Fix {
459    /// Human-readable description of the fix (e.g., "Replace yaml.load with yaml.safe_load")
460    pub description: String,
461    /// The replacement text to apply
462    pub replacement: String,
463    /// Start byte offset in the source
464    pub start_byte: usize,
465    /// End byte offset in the source (exclusive)
466    pub end_byte: usize,
467}
468
469impl Fix {
470    /// Create a new Fix with the given parameters
471    pub fn new(
472        description: impl Into<String>,
473        replacement: impl Into<String>,
474        start_byte: usize,
475        end_byte: usize,
476    ) -> Self {
477        Self {
478            description: description.into(),
479            replacement: replacement.into(),
480            start_byte,
481            end_byte,
482        }
483    }
484}
485
486/// A security or code quality finding
487#[derive(Debug, Clone, Serialize, Deserialize)]
488pub struct Finding {
489    pub id: String,
490    pub rule_id: String,
491    pub message: String,
492    pub severity: Severity,
493    pub location: SourceLocation,
494    pub language: Language,
495    #[serde(skip_serializing_if = "Option::is_none")]
496    pub snippet: Option<String>,
497    #[serde(skip_serializing_if = "Option::is_none")]
498    pub suggestion: Option<String>,
499    /// Structured fix for auto-fix with precise byte offsets
500    #[serde(skip_serializing_if = "Option::is_none")]
501    pub fix: Option<Fix>,
502    /// Confidence level (how certain we are this is a real issue)
503    #[serde(default)]
504    pub confidence: Confidence,
505    /// Category of finding (security, quality, performance, style)
506    #[serde(default)]
507    pub category: FindingCategory,
508    /// Stable fingerprint for baseline comparison (sha256 hash)
509    #[serde(skip_serializing_if = "Option::is_none")]
510    pub fingerprint: Option<String>,
511    /// Additional properties (e.g., import_hits, import_files_sample for OSV findings)
512    #[serde(skip_serializing_if = "Option::is_none", default)]
513    pub properties: Option<std::collections::HashMap<String, serde_json::Value>>,
514    /// Number of occurrences when deduplicated (same rule in same file)
515    /// None or 1 means single occurrence, >1 means multiple occurrences consolidated
516    #[serde(skip_serializing_if = "Option::is_none", default)]
517    pub occurrence_count: Option<usize>,
518    /// Additional line numbers when occurrence_count > 1
519    #[serde(skip_serializing_if = "Option::is_none", default)]
520    pub additional_locations: Option<Vec<usize>>,
521}
522
523impl Finding {
524    /// Compute a stable fingerprint for this finding
525    /// Based on: rule_id + relative path + normalized snippet
526    pub fn compute_fingerprint(&mut self) {
527        use sha2::{Digest, Sha256};
528
529        let mut hasher = Sha256::new();
530        hasher.update(self.rule_id.as_bytes());
531        hasher.update(self.location.file.to_string_lossy().as_bytes());
532
533        // Normalize snippet by removing whitespace
534        if let Some(snippet) = &self.snippet {
535            let normalized: String = snippet.split_whitespace().collect::<Vec<_>>().join(" ");
536            hasher.update(normalized.as_bytes());
537        }
538
539        let hash = hasher.finalize();
540        self.fingerprint = Some(format!("sha256:{:x}", hash)[..23].to_string());
541    }
542}
543
544/// Deduplicate findings by grouping same rule in same file
545///
546/// When the same rule fires multiple times in the same file, consolidates them
547/// into a single finding with `occurrence_count` set to the total count.
548/// The first occurrence is kept as the representative, with additional line
549/// numbers stored in `additional_locations`.
550///
551/// # Arguments
552/// * `findings` - Vector of findings to deduplicate
553///
554/// # Returns
555/// * Deduplicated vector of findings with occurrence counts
556pub fn deduplicate_findings(findings: Vec<Finding>) -> Vec<Finding> {
557    use std::collections::HashMap;
558
559    // Group by (file, rule_id)
560    let mut grouped: HashMap<(String, String), Vec<Finding>> = HashMap::new();
561
562    for finding in findings {
563        let key = (
564            finding.location.file.to_string_lossy().to_string(),
565            finding.rule_id.clone(),
566        );
567        grouped.entry(key).or_default().push(finding);
568    }
569
570    // Consolidate each group
571    let mut result = Vec::new();
572    for ((_file, _rule_id), mut group) in grouped {
573        if group.len() == 1 {
574            // Single occurrence - no deduplication needed
575            result.push(group.remove(0));
576        } else {
577            // Multiple occurrences - consolidate
578            let count = group.len();
579
580            // Sort by line number to get the first occurrence
581            group.sort_by_key(|f| f.location.start_line);
582
583            // Take the first as representative
584            let mut representative = group.remove(0);
585
586            // Collect additional line numbers
587            let additional_lines: Vec<usize> =
588                group.iter().map(|f| f.location.start_line).collect();
589
590            representative.occurrence_count = Some(count);
591            representative.additional_locations = Some(additional_lines);
592
593            // Update message to indicate deduplication
594            representative.message = format!(
595                "{} ({} occurrences in this file)",
596                representative.message, count
597            );
598
599            result.push(representative);
600        }
601    }
602
603    // Sort by file and line for consistent output
604    result.sort_by(|a, b| {
605        let file_cmp = a.location.file.cmp(&b.location.file);
606        if file_cmp == std::cmp::Ordering::Equal {
607            a.location.start_line.cmp(&b.location.start_line)
608        } else {
609            file_cmp
610        }
611    });
612
613    result
614}
615
616/// Code metrics for a file or function
617#[derive(Debug, Clone, Default, Serialize, Deserialize)]
618pub struct CodeMetrics {
619    pub lines_of_code: usize,
620    pub lines_of_comments: usize,
621    pub blank_lines: usize,
622    pub cyclomatic_complexity: usize,
623    pub cognitive_complexity: usize,
624    pub function_count: usize,
625    pub class_count: usize,
626    pub import_count: usize,
627}
628
629/// Summary of a scan operation
630#[derive(Debug, Clone, Default, Serialize, Deserialize)]
631pub struct ScanSummary {
632    pub files_scanned: usize,
633    pub files_skipped: usize,
634    pub total_lines: usize,
635    pub findings_by_severity: std::collections::HashMap<String, usize>,
636    pub languages: std::collections::HashMap<String, usize>,
637    pub duration_ms: u64,
638}
639
640/// Configuration for RMA operations
641#[derive(Debug, Clone, Serialize, Deserialize)]
642pub struct RmaConfig {
643    /// Paths to exclude from scanning
644    #[serde(default)]
645    pub exclude_patterns: Vec<String>,
646
647    /// Languages to scan (empty = all supported)
648    #[serde(default)]
649    pub languages: Vec<Language>,
650
651    /// Minimum severity to report
652    #[serde(default = "default_min_severity")]
653    pub min_severity: Severity,
654
655    /// Maximum file size in bytes
656    #[serde(default = "default_max_file_size")]
657    pub max_file_size: usize,
658
659    /// Number of parallel workers (0 = auto)
660    #[serde(default)]
661    pub parallelism: usize,
662
663    /// Enable incremental mode
664    #[serde(default)]
665    pub incremental: bool,
666}
667
668fn default_min_severity() -> Severity {
669    Severity::Warning
670}
671
672fn default_max_file_size() -> usize {
673    10 * 1024 * 1024 // 10MB
674}
675
676impl Default for RmaConfig {
677    fn default() -> Self {
678        Self {
679            exclude_patterns: vec![
680                "**/node_modules/**".into(),
681                "**/target/**".into(),
682                "**/vendor/**".into(),
683                "**/.git/**".into(),
684                "**/dist/**".into(),
685                "**/build/**".into(),
686            ],
687            languages: vec![],
688            min_severity: default_min_severity(),
689            max_file_size: default_max_file_size(),
690            parallelism: 0,
691            incremental: false,
692        }
693    }
694}
695
696#[cfg(test)]
697mod tests {
698    use super::*;
699
700    #[test]
701    fn test_language_from_extension() {
702        assert_eq!(Language::from_extension("rs"), Language::Rust);
703        assert_eq!(Language::from_extension("js"), Language::JavaScript);
704        assert_eq!(Language::from_extension("py"), Language::Python);
705        assert_eq!(Language::from_extension("unknown"), Language::Unknown);
706    }
707
708    #[test]
709    fn test_severity_ordering() {
710        assert!(Severity::Info < Severity::Warning);
711        assert!(Severity::Warning < Severity::Error);
712        assert!(Severity::Error < Severity::Critical);
713    }
714
715    #[test]
716    fn test_source_location_display() {
717        let loc = SourceLocation::new(PathBuf::from("test.rs"), 10, 5, 10, 15);
718        assert_eq!(loc.to_string(), "test.rs:10:5-10:15");
719    }
720}