agentshield/ir/
mod.rs

1//! Unified Intermediate Representation for agent extension analysis.
2//!
3//! All adapters produce a `ScanTarget`. All detectors consume a `ScanTarget`.
4//! This decouples framework-specific parsing from security analysis.
5
6pub mod data_surface;
7pub mod dependency_surface;
8pub mod execution_surface;
9pub mod provenance_surface;
10pub mod taint_builder;
11pub mod tool_surface;
12
13use serde::{Deserialize, Serialize};
14use std::path::PathBuf;
15
16pub use data_surface::DataSurface;
17pub use dependency_surface::DependencySurface;
18pub use execution_surface::ExecutionSurface;
19pub use provenance_surface::ProvenanceSurface;
20pub use tool_surface::ToolSurface;
21
22/// Complete scan target — the unified IR that all analysis operates on.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct ScanTarget {
25    /// Human-readable name of the extension.
26    pub name: String,
27    /// Framework that produced this target.
28    pub framework: Framework,
29    /// Root directory of the extension.
30    pub root_path: PathBuf,
31    /// Tool definitions declared by the extension.
32    pub tools: Vec<ToolSurface>,
33    /// Execution capabilities discovered in source code.
34    pub execution: ExecutionSurface,
35    /// Data flow surfaces (inputs, outputs, sources, sinks).
36    pub data: DataSurface,
37    /// Dependency information.
38    pub dependencies: DependencySurface,
39    /// Provenance metadata (author, repo, signatures).
40    pub provenance: ProvenanceSurface,
41    /// Raw source files included in the scan.
42    pub source_files: Vec<SourceFile>,
43}
44
45/// Which agent framework this extension targets.
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
47#[serde(rename_all = "snake_case")]
48pub enum Framework {
49    Mcp,
50    OpenClaw,
51    HermesAgent,
52    LangChain,
53    CrewAi,
54    GptActions,
55    CursorRules,
56    Unknown,
57}
58
59impl std::fmt::Display for Framework {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        match self {
62            Self::Mcp => write!(f, "MCP"),
63            Self::OpenClaw => write!(f, "OpenClaw"),
64            Self::HermesAgent => write!(f, "Hermes Agent"),
65            Self::LangChain => write!(f, "LangChain"),
66            Self::CrewAi => write!(f, "CrewAI"),
67            Self::GptActions => write!(f, "GPT Actions"),
68            Self::CursorRules => write!(f, "Cursor Rules"),
69            Self::Unknown => write!(f, "Unknown"),
70        }
71    }
72}
73
74/// A source file included in the scan.
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct SourceFile {
77    pub path: PathBuf,
78    pub language: Language,
79    pub content: String,
80    pub size_bytes: u64,
81    pub content_hash: String,
82}
83
84/// Programming language of a source file.
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
86#[serde(rename_all = "lowercase")]
87pub enum Language {
88    Python,
89    TypeScript,
90    JavaScript,
91    Shell,
92    Json,
93    Toml,
94    Yaml,
95    Markdown,
96    Unknown,
97}
98
99impl Language {
100    pub fn from_extension(ext: &str) -> Self {
101        match ext.to_lowercase().as_str() {
102            "py" => Self::Python,
103            "ts" | "tsx" => Self::TypeScript,
104            "js" | "jsx" | "mjs" | "cjs" => Self::JavaScript,
105            "sh" | "bash" | "zsh" => Self::Shell,
106            "json" => Self::Json,
107            "toml" => Self::Toml,
108            "yml" | "yaml" => Self::Yaml,
109            "md" | "markdown" => Self::Markdown,
110            _ => Self::Unknown,
111        }
112    }
113}
114
115/// Location in source code.
116#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
117pub struct SourceLocation {
118    pub file: PathBuf,
119    pub line: usize,
120    pub column: usize,
121    pub end_line: Option<usize>,
122    pub end_column: Option<usize>,
123}
124
125/// Where a function argument originates — the key taint abstraction.
126///
127/// Detectors don't need full taint analysis. They just need to know
128/// where a function argument came from.
129#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
130#[serde(rename_all = "snake_case")]
131pub enum ArgumentSource {
132    /// Hardcoded literal string — generally safe.
133    Literal(String),
134    /// Comes from function parameter — potentially user/LLM-controlled.
135    Parameter { name: String },
136    /// Comes from environment variable.
137    EnvVar { name: String },
138    /// Constructed via string formatting/concatenation — dangerous.
139    Interpolated,
140    /// Unable to determine statically.
141    Unknown,
142    /// Parameter was sanitized before being passed (e.g., via `validatePath`).
143    Sanitized { sanitizer: String },
144}
145
146/// The family of sink an argument flows into.
147///
148/// A sanitizer only neutralizes taint for the sink family it actually protects:
149/// a path validator makes a value safe for a file sink but not for a network
150/// sink, and a type coercion (`str()`/`Number()`) does not sanitize any
151/// injection sink. Detectors pass the sink they guard so a `Sanitized` argument
152/// is only treated as safe when its sanitizer category matches.
153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
154pub enum SinkClass {
155    /// Shell/command execution.
156    Command,
157    /// Filesystem path.
158    FilePath,
159    /// Network URL/host.
160    NetworkUrl,
161    /// Dynamic code execution (eval and friends).
162    DynamicExec,
163}
164
165impl ArgumentSource {
166    /// Whether this source is potentially attacker-controlled, ignoring sink
167    /// category. Treats any `Sanitized` value as safe.
168    ///
169    /// Prefer [`ArgumentSource::is_tainted_for_sink`] in sink detectors: a
170    /// sanitizer of the wrong category (e.g. a URL validator guarding a file
171    /// path) must not suppress the finding.
172    pub fn is_tainted(&self) -> bool {
173        !matches!(self, Self::Literal(_) | Self::Sanitized { .. })
174    }
175
176    /// Whether this source is tainted for a specific sink family.
177    ///
178    /// A `Sanitized` value is safe only when its sanitizer category protects
179    /// `sink`; otherwise it stays tainted. `Literal` is always safe; every
180    /// other source is always tainted.
181    pub fn is_tainted_for_sink(&self, sink: SinkClass) -> bool {
182        match self {
183            Self::Literal(_) => false,
184            Self::Sanitized { sanitizer } => {
185                !crate::analysis::cross_file::sanitizer_allows_sink(sanitizer, sink)
186            }
187            _ => true,
188        }
189    }
190}
agentshield/ir/mod.rs

agentshield/ir/
mod.rs