Skip to main content

agentshield/ir/
mod.rs

1//! Unified Intermediate Representation for agent extension analysis.
2//!
3//! All adapters produce a `ScanTarget`. All detectors consume a `ScanTarget`.
4//! This decouples framework-specific parsing from security analysis.
5
6pub mod data_surface;
7pub mod dependency_surface;
8pub mod execution_surface;
9pub mod provenance_surface;
10pub mod tool_surface;
11
12use serde::{Deserialize, Serialize};
13use std::path::PathBuf;
14
15pub use data_surface::DataSurface;
16pub use dependency_surface::DependencySurface;
17pub use execution_surface::ExecutionSurface;
18pub use provenance_surface::ProvenanceSurface;
19pub use tool_surface::ToolSurface;
20
21/// Complete scan target — the unified IR that all analysis operates on.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct ScanTarget {
24    /// Human-readable name of the extension.
25    pub name: String,
26    /// Framework that produced this target.
27    pub framework: Framework,
28    /// Root directory of the extension.
29    pub root_path: PathBuf,
30    /// Tool definitions declared by the extension.
31    pub tools: Vec<ToolSurface>,
32    /// Execution capabilities discovered in source code.
33    pub execution: ExecutionSurface,
34    /// Data flow surfaces (inputs, outputs, sources, sinks).
35    pub data: DataSurface,
36    /// Dependency information.
37    pub dependencies: DependencySurface,
38    /// Provenance metadata (author, repo, signatures).
39    pub provenance: ProvenanceSurface,
40    /// Raw source files included in the scan.
41    pub source_files: Vec<SourceFile>,
42}
43
44/// Which agent framework this extension targets.
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
46#[serde(rename_all = "snake_case")]
47pub enum Framework {
48    Mcp,
49    OpenClaw,
50    LangChain,
51    CrewAi,
52    GptActions,
53    CursorRules,
54    Unknown,
55}
56
57impl std::fmt::Display for Framework {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        match self {
60            Self::Mcp => write!(f, "MCP"),
61            Self::OpenClaw => write!(f, "OpenClaw"),
62            Self::LangChain => write!(f, "LangChain"),
63            Self::CrewAi => write!(f, "CrewAI"),
64            Self::GptActions => write!(f, "GPT Actions"),
65            Self::CursorRules => write!(f, "Cursor Rules"),
66            Self::Unknown => write!(f, "Unknown"),
67        }
68    }
69}
70
71/// A source file included in the scan.
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct SourceFile {
74    pub path: PathBuf,
75    pub language: Language,
76    pub content: String,
77    pub size_bytes: u64,
78    pub content_hash: String,
79}
80
81/// Programming language of a source file.
82#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
83#[serde(rename_all = "lowercase")]
84pub enum Language {
85    Python,
86    TypeScript,
87    JavaScript,
88    Shell,
89    Json,
90    Toml,
91    Yaml,
92    Markdown,
93    Unknown,
94}
95
96impl Language {
97    pub fn from_extension(ext: &str) -> Self {
98        match ext.to_lowercase().as_str() {
99            "py" => Self::Python,
100            "ts" | "tsx" => Self::TypeScript,
101            "js" | "jsx" | "mjs" | "cjs" => Self::JavaScript,
102            "sh" | "bash" | "zsh" => Self::Shell,
103            "json" => Self::Json,
104            "toml" => Self::Toml,
105            "yml" | "yaml" => Self::Yaml,
106            "md" | "markdown" => Self::Markdown,
107            _ => Self::Unknown,
108        }
109    }
110}
111
112/// Location in source code.
113#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
114pub struct SourceLocation {
115    pub file: PathBuf,
116    pub line: usize,
117    pub column: usize,
118    pub end_line: Option<usize>,
119    pub end_column: Option<usize>,
120}
121
122/// Where a function argument originates — the key taint abstraction.
123///
124/// Detectors don't need full taint analysis. They just need to know
125/// where a function argument came from.
126#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
127#[serde(rename_all = "snake_case")]
128pub enum ArgumentSource {
129    /// Hardcoded literal string — generally safe.
130    Literal(String),
131    /// Comes from function parameter — potentially user/LLM-controlled.
132    Parameter { name: String },
133    /// Comes from environment variable.
134    EnvVar { name: String },
135    /// Constructed via string formatting/concatenation — dangerous.
136    Interpolated,
137    /// Unable to determine statically.
138    Unknown,
139}
140
141impl ArgumentSource {
142    /// Whether this source is potentially attacker-controlled.
143    pub fn is_tainted(&self) -> bool {
144        !matches!(self, Self::Literal(_))
145    }
146}