Skip to main content

agentshield/ir/
mod.rs

1//! Unified Intermediate Representation for agent extension analysis.
2//!
3//! All adapters produce a `ScanTarget`. All detectors consume a `ScanTarget`.
4//! This decouples framework-specific parsing from security analysis.
5
6pub mod data_surface;
7pub mod dependency_surface;
8pub mod execution_surface;
9pub mod provenance_surface;
10pub mod taint_builder;
11pub mod tool_surface;
12
13use serde::{Deserialize, Serialize};
14use std::path::PathBuf;
15
16pub use data_surface::DataSurface;
17pub use dependency_surface::DependencySurface;
18pub use execution_surface::ExecutionSurface;
19pub use provenance_surface::ProvenanceSurface;
20pub use tool_surface::ToolSurface;
21
22/// Complete scan target — the unified IR that all analysis operates on.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct ScanTarget {
25    /// Human-readable name of the extension.
26    pub name: String,
27    /// Framework that produced this target.
28    pub framework: Framework,
29    /// Root directory of the extension.
30    pub root_path: PathBuf,
31    /// Tool definitions declared by the extension.
32    pub tools: Vec<ToolSurface>,
33    /// Execution capabilities discovered in source code.
34    pub execution: ExecutionSurface,
35    /// Data flow surfaces (inputs, outputs, sources, sinks).
36    pub data: DataSurface,
37    /// Dependency information.
38    pub dependencies: DependencySurface,
39    /// Provenance metadata (author, repo, signatures).
40    pub provenance: ProvenanceSurface,
41    /// Raw source files included in the scan.
42    pub source_files: Vec<SourceFile>,
43}
44
45/// Which agent framework this extension targets.
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
47#[serde(rename_all = "snake_case")]
48pub enum Framework {
49    Mcp,
50    OpenClaw,
51    LangChain,
52    CrewAi,
53    GptActions,
54    CursorRules,
55    Unknown,
56}
57
58impl std::fmt::Display for Framework {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        match self {
61            Self::Mcp => write!(f, "MCP"),
62            Self::OpenClaw => write!(f, "OpenClaw"),
63            Self::LangChain => write!(f, "LangChain"),
64            Self::CrewAi => write!(f, "CrewAI"),
65            Self::GptActions => write!(f, "GPT Actions"),
66            Self::CursorRules => write!(f, "Cursor Rules"),
67            Self::Unknown => write!(f, "Unknown"),
68        }
69    }
70}
71
72/// A source file included in the scan.
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct SourceFile {
75    pub path: PathBuf,
76    pub language: Language,
77    pub content: String,
78    pub size_bytes: u64,
79    pub content_hash: String,
80}
81
82/// Programming language of a source file.
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
84#[serde(rename_all = "lowercase")]
85pub enum Language {
86    Python,
87    TypeScript,
88    JavaScript,
89    Shell,
90    Json,
91    Toml,
92    Yaml,
93    Markdown,
94    Unknown,
95}
96
97impl Language {
98    pub fn from_extension(ext: &str) -> Self {
99        match ext.to_lowercase().as_str() {
100            "py" => Self::Python,
101            "ts" | "tsx" => Self::TypeScript,
102            "js" | "jsx" | "mjs" | "cjs" => Self::JavaScript,
103            "sh" | "bash" | "zsh" => Self::Shell,
104            "json" => Self::Json,
105            "toml" => Self::Toml,
106            "yml" | "yaml" => Self::Yaml,
107            "md" | "markdown" => Self::Markdown,
108            _ => Self::Unknown,
109        }
110    }
111}
112
113/// Location in source code.
114#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
115pub struct SourceLocation {
116    pub file: PathBuf,
117    pub line: usize,
118    pub column: usize,
119    pub end_line: Option<usize>,
120    pub end_column: Option<usize>,
121}
122
123/// Where a function argument originates — the key taint abstraction.
124///
125/// Detectors don't need full taint analysis. They just need to know
126/// where a function argument came from.
127#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
128#[serde(rename_all = "snake_case")]
129pub enum ArgumentSource {
130    /// Hardcoded literal string — generally safe.
131    Literal(String),
132    /// Comes from function parameter — potentially user/LLM-controlled.
133    Parameter { name: String },
134    /// Comes from environment variable.
135    EnvVar { name: String },
136    /// Constructed via string formatting/concatenation — dangerous.
137    Interpolated,
138    /// Unable to determine statically.
139    Unknown,
140    /// Parameter was sanitized before being passed (e.g., via `validatePath`).
141    Sanitized { sanitizer: String },
142}
143
144impl ArgumentSource {
145    /// Whether this source is potentially attacker-controlled.
146    pub fn is_tainted(&self) -> bool {
147        !matches!(self, Self::Literal(_) | Self::Sanitized { .. })
148    }
149}