Skip to main content

agentshield/ir/
mod.rs

1//! Unified Intermediate Representation for agent extension analysis.
2//!
3//! All adapters produce a `ScanTarget`. All detectors consume a `ScanTarget`.
4//! This decouples framework-specific parsing from security analysis.
5
6pub mod data_surface;
7pub mod dependency_surface;
8pub mod execution_surface;
9pub mod provenance_surface;
10pub mod taint_builder;
11pub mod tool_surface;
12
13use serde::{Deserialize, Serialize};
14use std::path::PathBuf;
15
16pub use data_surface::DataSurface;
17pub use dependency_surface::DependencySurface;
18pub use execution_surface::ExecutionSurface;
19pub use provenance_surface::ProvenanceSurface;
20pub use tool_surface::ToolSurface;
21
22/// Complete scan target — the unified IR that all analysis operates on.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct ScanTarget {
25    /// Human-readable name of the extension.
26    pub name: String,
27    /// Framework that produced this target.
28    pub framework: Framework,
29    /// Root directory of the extension.
30    pub root_path: PathBuf,
31    /// Tool definitions declared by the extension.
32    pub tools: Vec<ToolSurface>,
33    /// Execution capabilities discovered in source code.
34    pub execution: ExecutionSurface,
35    /// Data flow surfaces (inputs, outputs, sources, sinks).
36    pub data: DataSurface,
37    /// Dependency information.
38    pub dependencies: DependencySurface,
39    /// Provenance metadata (author, repo, signatures).
40    pub provenance: ProvenanceSurface,
41    /// Raw source files included in the scan.
42    pub source_files: Vec<SourceFile>,
43}
44
45/// Which agent framework this extension targets.
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
47#[serde(rename_all = "snake_case")]
48pub enum Framework {
49    Mcp,
50    OpenClaw,
51    HermesAgent,
52    LangChain,
53    CrewAi,
54    GptActions,
55    CursorRules,
56    Unknown,
57}
58
59impl std::fmt::Display for Framework {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        match self {
62            Self::Mcp => write!(f, "MCP"),
63            Self::OpenClaw => write!(f, "OpenClaw"),
64            Self::HermesAgent => write!(f, "Hermes Agent"),
65            Self::LangChain => write!(f, "LangChain"),
66            Self::CrewAi => write!(f, "CrewAI"),
67            Self::GptActions => write!(f, "GPT Actions"),
68            Self::CursorRules => write!(f, "Cursor Rules"),
69            Self::Unknown => write!(f, "Unknown"),
70        }
71    }
72}
73
74/// A source file included in the scan.
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct SourceFile {
77    pub path: PathBuf,
78    pub language: Language,
79    pub content: String,
80    pub size_bytes: u64,
81    pub content_hash: String,
82}
83
84/// Programming language of a source file.
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
86#[serde(rename_all = "lowercase")]
87pub enum Language {
88    Python,
89    TypeScript,
90    JavaScript,
91    Shell,
92    Json,
93    Toml,
94    Yaml,
95    Markdown,
96    Unknown,
97}
98
99impl Language {
100    pub fn from_extension(ext: &str) -> Self {
101        match ext.to_lowercase().as_str() {
102            "py" => Self::Python,
103            "ts" | "tsx" => Self::TypeScript,
104            "js" | "jsx" | "mjs" | "cjs" => Self::JavaScript,
105            "sh" | "bash" | "zsh" => Self::Shell,
106            "json" => Self::Json,
107            "toml" => Self::Toml,
108            "yml" | "yaml" => Self::Yaml,
109            "md" | "markdown" => Self::Markdown,
110            _ => Self::Unknown,
111        }
112    }
113}
114
115/// Location in source code.
116#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
117pub struct SourceLocation {
118    pub file: PathBuf,
119    pub line: usize,
120    pub column: usize,
121    pub end_line: Option<usize>,
122    pub end_column: Option<usize>,
123}
124
125/// Where a function argument originates — the key taint abstraction.
126///
127/// Detectors don't need full taint analysis. They just need to know
128/// where a function argument came from.
129#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
130#[serde(rename_all = "snake_case")]
131pub enum ArgumentSource {
132    /// Hardcoded literal string — generally safe.
133    Literal(String),
134    /// Comes from function parameter — potentially user/LLM-controlled.
135    Parameter { name: String },
136    /// Comes from environment variable.
137    EnvVar { name: String },
138    /// Constructed via string formatting/concatenation — dangerous.
139    Interpolated,
140    /// Unable to determine statically.
141    Unknown,
142    /// Parameter was sanitized before being passed (e.g., via `validatePath`).
143    Sanitized { sanitizer: String },
144}
145
146impl ArgumentSource {
147    /// Whether this source is potentially attacker-controlled.
148    pub fn is_tainted(&self) -> bool {
149        !matches!(self, Self::Literal(_) | Self::Sanitized { .. })
150    }
151}