Skip to main content

raxit_core/
schema.rs

1//! Agent Assets Schema - Data structures for RAXIT scan results
2//!
3//! Based on Agent Assets Schema v0.1.0 from SPEC.md
4
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8/// Complete scan result including all discovered assets
9#[derive(Debug, Clone, Serialize, Deserialize)]
10pub struct ScanResult {
11    /// Manifest section (SPEC.md v0.1.0)
12    pub manifest: Manifest,
13
14    /// Discovered agents
15    pub agents: Vec<Agent>,
16
17    /// Discovered tools
18    pub tools: Vec<Tool>,
19
20    /// Discovered models
21    pub models: Vec<Model>,
22
23    /// Memory configurations
24    pub memory: Vec<Memory>,
25
26    /// Trust boundaries
27    #[serde(rename = "trustBoundaries")]
28    pub trust_boundaries: Vec<TrustBoundary>,
29
30    /// Secret findings
31    #[serde(rename = "secretFindings", skip_serializing_if = "Vec::is_empty")]
32    pub secret_findings: Vec<SecretFinding>,
33
34    /// Memory findings
35    #[serde(rename = "memoryFindings", skip_serializing_if = "Vec::is_empty")]
36    pub memory_findings: Vec<MemoryFinding>,
37
38    /// Network findings
39    #[serde(rename = "networkFindings", skip_serializing_if = "Vec::is_empty")]
40    pub network_findings: Vec<NetworkFinding>,
41
42    /// Provenance findings
43    #[serde(rename = "provenanceFindings", skip_serializing_if = "Vec::is_empty")]
44    pub provenance_findings: Vec<ProvenanceFinding>,
45}
46
47impl ScanResult {
48    pub fn new() -> Self {
49        Self {
50            manifest: Manifest::default(),
51            agents: Vec::new(),
52            tools: Vec::new(),
53            models: Vec::new(),
54            memory: Vec::new(),
55            trust_boundaries: Vec::new(),
56            secret_findings: Vec::new(),
57            memory_findings: Vec::new(),
58            network_findings: Vec::new(),
59            provenance_findings: Vec::new(),
60        }
61    }
62
63    pub fn to_yaml(&self) -> crate::Result<String> {
64        serde_yaml::to_string(self).map_err(Into::into)
65    }
66
67    pub fn to_json(&self) -> crate::Result<String> {
68        serde_json::to_string_pretty(self).map_err(Into::into)
69    }
70}
71
72impl Default for ScanResult {
73    fn default() -> Self {
74        Self::new()
75    }
76}
77
78/// Alias for backward compatibility
79pub type AgentAssets = ScanResult;
80
81/// Manifest section - metadata about the scan and project (SPEC.md v0.1.0)
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct Manifest {
84    /// Schema version
85    pub schema_version: String,
86
87    /// Subject - project being scanned
88    pub subject: Subject,
89
90    /// Unique scan ID
91    pub scan_id: String,
92
93    /// Timestamp when scan was performed
94    pub scanned_at: String,
95
96    /// Tool that performed the scan
97    pub scanned_by: String,
98
99    /// Files included in scan
100    pub files: Vec<String>,
101
102    /// Scan configuration
103    pub scan_config: ScanConfigMetadata,
104
105    /// Signature (optional, when signed via RAXIT API)
106    #[serde(skip_serializing_if = "Option::is_none")]
107    pub signature: Option<Signature>,
108}
109
110impl Default for Manifest {
111    fn default() -> Self {
112        Self {
113            schema_version: "0.1.0".to_string(),
114            subject: Subject::default(),
115            scan_id: generate_scan_id(),
116            scanned_at: chrono::Utc::now().to_rfc3339(),
117            scanned_by: format!("raxit-cli/{}", env!("CARGO_PKG_VERSION")),
118            files: Vec::new(),
119            scan_config: ScanConfigMetadata::default(),
120            signature: None,
121        }
122    }
123}
124
125/// Subject - project being scanned
126#[derive(Debug, Clone, Serialize, Deserialize)]
127pub struct Subject {
128    /// Project name
129    pub name: String,
130
131    /// Project version (from pyproject.toml if available)
132    #[serde(skip_serializing_if = "Option::is_none")]
133    pub version: Option<String>,
134
135    /// Source repository URL
136    #[serde(skip_serializing_if = "Option::is_none")]
137    pub source: Option<String>,
138}
139
140impl Default for Subject {
141    fn default() -> Self {
142        Self {
143            name: "unknown".to_string(),
144            version: None,
145            source: None,
146        }
147    }
148}
149
150/// Scan configuration metadata for reproducibility
151#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct ScanConfigMetadata {
153    /// Exclude patterns used
154    pub exclude_patterns: Vec<String>,
155
156    /// Frameworks detected
157    pub frameworks_detected: Vec<String>,
158
159    /// Number of parallel workers used
160    pub parallel_workers: usize,
161
162    /// Whether incremental scanning was enabled
163    pub incremental: bool,
164
165    /// Number of files scanned
166    pub files_scanned: usize,
167
168    /// Number of files skipped (incremental mode)
169    pub files_skipped: usize,
170}
171
172impl Default for ScanConfigMetadata {
173    fn default() -> Self {
174        Self {
175            exclude_patterns: Vec::new(),
176            frameworks_detected: Vec::new(),
177            parallel_workers: 1,
178            incremental: false,
179            files_scanned: 0,
180            files_skipped: 0,
181        }
182    }
183}
184
185/// Signature from RAXIT API (optional)
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct Signature {
188    /// Digest of the schema
189    pub digest: String,
190
191    /// Signing algorithm
192    pub algorithm: String,
193
194    /// Base64-encoded signature value
195    pub signature_value: String,
196
197    /// Timestamp when signed
198    pub signed_at: String,
199
200    /// Key metadata
201    #[serde(skip_serializing_if = "Option::is_none")]
202    pub key_metadata: Option<KeyMetadata>,
203
204    /// Attestation information
205    #[serde(skip_serializing_if = "Option::is_none")]
206    pub attestation: Option<Attestation>,
207}
208
209/// Key metadata from RAXIT API
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct KeyMetadata {
212    pub key_id: String,
213    pub key_version: String,
214}
215
216/// Attestation information from RAXIT API
217#[derive(Debug, Clone, Serialize, Deserialize)]
218pub struct Attestation {
219    pub project_id: String,
220    pub project_name: String,
221    pub organization: String,
222    pub signed_by: String,
223}
224
225/// Generate a unique scan ID
226fn generate_scan_id() -> String {
227    let now = chrono::Utc::now();
228    let random_suffix: String = (0..6)
229        .map(|_| format!("{:x}", rand::random::<u8>() % 16))
230        .collect();
231    format!("scan-{}-{}", now.format("%Y%m%d"), random_suffix)
232}
233
234/// Metadata about the scan (legacy compatibility)
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct ScanMetadata {
237    /// Framework name (e.g., "pydantic-ai", "langgraph")
238    pub framework: String,
239
240    /// Framework version
241    #[serde(rename = "frameworkVersion")]
242    pub framework_version: Option<String>,
243
244    /// Scan timestamp
245    pub timestamp: String,
246
247    /// RAXIT SDK version
248    #[serde(rename = "raxitVersion")]
249    pub raxit_version: String,
250
251    /// Source path scanned
252    pub source_path: String,
253}
254
255impl Default for ScanMetadata {
256    fn default() -> Self {
257        Self {
258            framework: "unknown".to_string(),
259            framework_version: None,
260            timestamp: chrono::Utc::now().to_rfc3339(),
261            raxit_version: env!("CARGO_PKG_VERSION").to_string(),
262            source_path: ".".to_string(),
263        }
264    }
265}
266
267/// Agent definition
268#[derive(Debug, Clone, Serialize, Deserialize)]
269pub struct Agent {
270    /// Unique agent identifier
271    pub id: String,
272
273    /// Agent name
274    pub name: String,
275
276    /// Source location
277    pub location: SourceLocation,
278
279    /// Model used by agent
280    #[serde(rename = "modelId")]
281    pub model_id: Option<String>,
282
283    /// Tools available to agent
284    #[serde(rename = "toolIds")]
285    pub tool_ids: Vec<String>,
286
287    /// Memory configuration
288    #[serde(rename = "memoryId")]
289    pub memory_id: Option<String>,
290
291    /// System prompt
292    #[serde(rename = "systemPrompt")]
293    pub system_prompt: Option<String>,
294
295    /// Result type schema
296    #[serde(rename = "resultType")]
297    pub result_type: Option<String>,
298
299    /// Dependencies type
300    #[serde(rename = "depsType")]
301    pub deps_type: Option<String>,
302}
303
304/// Tool definition
305#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct Tool {
307    /// Unique tool identifier
308    pub id: String,
309
310    /// Tool name
311    pub name: String,
312
313    /// Source location
314    pub location: SourceLocation,
315
316    /// Tool description
317    pub description: Option<String>,
318
319    /// Parameters schema
320    pub parameters: Option<HashMap<String, String>>,
321
322    /// Whether tool requires context
323    #[serde(rename = "requiresContext")]
324    pub requires_context: bool,
325
326    /// Tool type (plain or context-aware)
327    #[serde(rename = "toolType")]
328    pub tool_type: String,
329
330    /// Data flows (CaMeL-style provenance)
331    #[serde(rename = "dataFlows")]
332    pub data_flows: Vec<DataFlow>,
333}
334
335/// Model configuration
336#[derive(Debug, Clone, Serialize, Deserialize)]
337pub struct Model {
338    /// Unique model identifier
339    pub id: String,
340
341    /// Provider (e.g., "openai", "anthropic")
342    pub provider: String,
343
344    /// Model name
345    #[serde(rename = "modelName")]
346    pub model_name: String,
347
348    /// Source location
349    pub location: SourceLocation,
350
351    /// Configuration parameters
352    pub config: HashMap<String, String>,
353}
354
355/// Memory configuration
356#[derive(Debug, Clone, Serialize, Deserialize)]
357pub struct Memory {
358    /// Unique memory identifier
359    pub id: String,
360
361    /// Memory type
362    #[serde(rename = "memoryType")]
363    pub memory_type: String,
364
365    /// Source location
366    pub location: SourceLocation,
367
368    /// Configuration
369    pub config: HashMap<String, String>,
370}
371
372/// Trust boundary definition
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct TrustBoundary {
375    /// Boundary identifier
376    pub id: String,
377
378    /// Component ID (agent or tool)
379    #[serde(rename = "componentId")]
380    pub component_id: String,
381
382    /// Component type
383    #[serde(rename = "componentType")]
384    pub component_type: String,
385
386    /// Untrusted input (A)
387    #[serde(rename = "hasUntrustedInput")]
388    pub has_untrusted_input: bool,
389
390    /// Sensitive access (B)
391    #[serde(rename = "hasSensitiveAccess")]
392    pub has_sensitive_access: bool,
393
394    /// External actions (C)
395    #[serde(rename = "hasExternalActions")]
396    pub has_external_actions: bool,
397
398    /// Compliance status
399    pub compliant: bool,
400
401    /// Violations (if not compliant)
402    pub violations: Vec<String>,
403
404    /// Source location
405    pub location: SourceLocation,
406}
407
408/// Secret finding from secret detection analyzer
409#[derive(Debug, Clone, Serialize, Deserialize)]
410pub struct SecretFinding {
411    /// Unique finding identifier
412    pub id: String,
413
414    /// Type of secret detected
415    #[serde(rename = "secretType")]
416    pub secret_type: String,
417
418    /// Source location
419    pub location: SourceLocation,
420
421    /// Severity level (critical, high, medium, low)
422    pub severity: String,
423
424    /// Description of the finding
425    pub message: String,
426
427    /// Matched pattern (masked for security)
428    #[serde(rename = "matchedPattern", skip_serializing_if = "Option::is_none")]
429    pub matched_pattern: Option<String>,
430}
431
432/// Memory finding from memory detection analyzer
433#[derive(Debug, Clone, Serialize, Deserialize)]
434pub struct MemoryFinding {
435    /// Unique finding identifier
436    pub id: String,
437
438    /// Type of memory usage (vector_store, database, file_persistence, etc.)
439    #[serde(rename = "memoryType")]
440    pub memory_type: String,
441
442    /// Technology used (chroma, pinecone, sqlite, redis, etc.)
443    pub technology: String,
444
445    /// Source location
446    pub location: SourceLocation,
447
448    /// Configuration details (connection string, file path, etc.)
449    #[serde(skip_serializing_if = "Option::is_none")]
450    pub configuration: Option<String>,
451
452    /// Description of the finding
453    pub message: String,
454}
455
456/// Network finding from network detection analyzer
457#[derive(Debug, Clone, Serialize, Deserialize)]
458pub struct NetworkFinding {
459    /// Unique finding identifier
460    pub id: String,
461
462    /// Type of network usage (http_call, api_client, socket_connection, etc.)
463    #[serde(rename = "networkType")]
464    pub network_type: String,
465
466    /// Technology used (requests, httpx, openai, etc.)
467    pub technology: String,
468
469    /// Source location
470    pub location: SourceLocation,
471
472    /// Endpoint URL (if available)
473    #[serde(skip_serializing_if = "Option::is_none")]
474    pub endpoint: Option<String>,
475
476    /// HTTP method (GET, POST, etc.)
477    #[serde(skip_serializing_if = "Option::is_none")]
478    pub method: Option<String>,
479
480    /// Description of the finding
481    pub message: String,
482}
483
484/// Provenance finding from data provenance analyzer
485#[derive(Debug, Clone, Serialize, Deserialize)]
486pub struct ProvenanceFinding {
487    /// Unique finding identifier
488    pub id: String,
489
490    /// Type of finding (tainted_sink, unsafe_flow, etc.)
491    #[serde(rename = "findingType")]
492    pub finding_type: String,
493
494    /// Source type (untrusted_input, external_data, etc.)
495    #[serde(rename = "sourceType")]
496    pub source_type: String,
497
498    /// Sink type (print, file_write, database_insert, etc.)
499    #[serde(rename = "sinkType")]
500    pub sink_type: String,
501
502    /// Tainted variables involved
503    #[serde(rename = "taintedVariables")]
504    pub tainted_variables: Vec<String>,
505
506    /// Source location
507    pub location: SourceLocation,
508
509    /// Severity level (critical, high, medium, low)
510    pub severity: String,
511
512    /// Description of the finding
513    pub message: String,
514
515    /// Data flow path (optional)
516    #[serde(rename = "dataFlow", skip_serializing_if = "Option::is_none")]
517    pub data_flow: Option<String>,
518}
519
520/// Data flow for CaMeL-style provenance
521#[derive(Debug, Clone, Serialize, Deserialize)]
522pub struct DataFlow {
523    /// Variable or data identifier
524    pub variable: String,
525
526    /// Data source
527    pub source: String,
528
529    /// Readers
530    pub readers: Vec<String>,
531
532    /// Writers
533    pub writers: Vec<String>,
534
535    /// Taint level
536    #[serde(rename = "taintLevel")]
537    pub taint_level: String,
538}
539
540/// Source code location
541#[derive(Debug, Clone, Serialize, Deserialize)]
542pub struct SourceLocation {
543    /// File path
544    pub file: String,
545
546    /// Start line
547    pub line: u32,
548
549    /// End line
550    #[serde(rename = "endLine")]
551    pub end_line: Option<u32>,
552
553    /// Function or class name
554    pub function: Option<String>,
555}
556
557#[cfg(test)]
558mod tests {
559    use super::*;
560
561    #[test]
562    fn test_scan_result_serialization() {
563        let result = ScanResult::new();
564        let yaml = result.to_yaml();
565        assert!(yaml.is_ok());
566
567        let json = result.to_json();
568        assert!(json.is_ok());
569    }
570
571    #[test]
572    fn test_default_metadata() {
573        let result = ScanResult::default();
574        assert_eq!(result.manifest.schema_version, "0.1.0");
575        assert!(result
576            .manifest
577            .scanned_by
578            .contains(env!("CARGO_PKG_VERSION")));
579    }
580}