cpg_rs/lib.rs
1//! # cpg-rs
2//!
3//! A Rust library for working with Code Property Graphs (CPGs).
4//!
5//! This crate provides a complete set of data structures for representing Code Property Graphs,
6//! a language-agnostic intermediate representation for static code analysis. CPGs combine
7//! abstract syntax trees, control flow graphs, and data flow graphs into a unified structure
8//! that enables sophisticated program analysis.
9//!
10//! ## Features
11//!
12//! - Complete implementation of the CPG specification
13//! - Serialization/deserialization support via serde
14//! - Strongly-typed enums for node types, edge types, and properties
15//! - Support for CPG overlays and diff graphs for incremental analysis
16//!
17//! ## Usage
18//!
19//! The main data structures in this crate are:
20//!
21//! - [`Cpg`]: The root structure containing nodes and edges
22//! - [`Node`]: Represents program entities like methods, variables, and expressions
23//! - [`Edge`]: Represents relationships between nodes
24//! - [`PropertyValue`]: Represents typed property values attached to nodes and edges
25//!
26//! CPGs can be created programmatically or deserialized from JSON.
27//!
28//! ## Examples
29//!
30//! ### Creating and modifying a CPG
31//!
32//! ```rust
33//! use cpg_rs::{Cpg, Node, Edge, NodeType, EdgeType, NodeProperty, NodePropertyName, PropertyValue, PropertyValueEnum};
34//! use std::fs::File;
35//! use std::io::{BufWriter, BufReader};
36//!
37//! // Create a new CPG
38//! let cpg = Cpg {
39//! node: vec![
40//! Node {
41//! key: 1,
42//! r#type: NodeType::Method,
43//! property: vec![
44//! NodeProperty {
45//! name: NodePropertyName::Name,
46//! value: Some(PropertyValue {
47//! value: Some(PropertyValueEnum::StringValue("main".to_string())),
48//! }),
49//! },
50//! ],
51//! },
52//! ],
53//! edge: vec![],
54//! };
55//!
56//! // Serialize to JSON
57//! let file = File::create("cpg.json").unwrap();
58//! let writer = BufWriter::new(file);
59//! serde_json::to_writer_pretty(writer, &cpg).unwrap();
60//!
61//! // Deserialize from JSON
62//! let file = File::open("cpg.json").unwrap();
63//! let reader = BufReader::new(file);
64//! let cpg: Cpg = serde_json::from_reader(reader).unwrap();
65//! ```
66//!
67//! See the [examples directory](https://github.com/gbrigandi/cpg-rs/tree/main/examples) for more examples:
68//!
69//! - `modify_cpg.rs`: Creating, modifying, and saving a CPG
70//! - `find_methods.rs`: Finding methods and parameters in a CPG
71//! - `diff_graph.rs`: Working with diff graphs to represent changes to a CPG
72//!
73//! See the documentation for individual types for more details.
74
75
76use serde::{Deserialize, Serialize};
77
78/// Represents a typed property value that can be attached to nodes and edges in a CPG.
79///
80/// Property values can be of various types like strings, numbers, booleans, or lists.
81/// The actual value is stored in the `value` field as a `PropertyValueEnum`.
82#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
83pub struct PropertyValue {
84 #[serde(flatten)]
85 pub value: Option<PropertyValueEnum>,
86}
87
88/// Represents the different types of values that a property can have in a CPG.
89///
90/// This enum uses serde's tagged representation with "type" and "value" fields
91/// to support proper serialization and deserialization to/from JSON.
92#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
93#[serde(tag = "type", content = "value")]
94pub enum PropertyValueEnum {
95 #[serde(rename = "string_value")]
96 StringValue(String),
97 #[serde(rename = "bool_value")]
98 BoolValue(bool),
99 #[serde(rename = "int_value")]
100 IntValue(i32),
101 #[serde(rename = "long_value")]
102 LongValue(i64),
103 #[serde(rename = "float_value")]
104 FloatValue(f32),
105 #[serde(rename = "double_value")]
106 DoubleValue(f64),
107 #[serde(rename = "string_list")]
108 StringList(StringList),
109 #[serde(rename = "bool_list")]
110 BoolList(BoolList),
111 #[serde(rename = "int_list")]
112 IntList(IntList),
113 #[serde(rename = "long_list")]
114 LongList(LongList),
115 #[serde(rename = "float_list")]
116 FloatList(FloatList),
117 #[serde(rename = "double_list")]
118 DoubleList(DoubleList),
119 #[serde(rename = "contained_refs")]
120 ContainedRefs(ContainedRefs),
121}
122
123/// Represents references to other nodes in the CPG.
124///
125/// This is used to establish relationships between nodes without using edges.
126#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
127pub struct ContainedRefs {
128 pub local_name: String,
129 pub refs: Vec<i64>,
130}
131
132/// A list of string values that can be used as a property value.
133#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
134pub struct StringList {
135 pub values: Vec<String>,
136}
137
138/// A list of boolean values that can be used as a property value.
139#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
140pub struct BoolList {
141 pub values: Vec<bool>,
142}
143
144/// A list of 32-bit integer values that can be used as a property value.
145#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
146pub struct IntList {
147 pub values: Vec<i32>,
148}
149
150/// A list of 64-bit integer values that can be used as a property value.
151#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
152pub struct LongList {
153 pub values: Vec<i64>,
154}
155
156/// A list of 32-bit floating point values that can be used as a property value.
157#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
158pub struct FloatList {
159 pub values: Vec<f32>,
160}
161
162/// A list of 64-bit floating point values that can be used as a property value.
163#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
164pub struct DoubleList {
165 pub values: Vec<f64>,
166}
167
168/// The root structure of a Code Property Graph (CPG).
169///
170/// A CPG consists of nodes representing program entities (methods, variables, etc.)
171/// and edges representing relationships between those entities.
172///
173/// # Examples
174///
175/// ```
176/// use cpg_rs::{Cpg, Node, Edge, NodeType, EdgeType};
177///
178/// // Create a new CPG with nodes and edges
179/// let cpg = Cpg {
180/// node: vec![], // Add nodes here
181/// edge: vec![], // Add edges here
182/// };
183/// ```
184#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
185pub struct Cpg {
186 pub node: Vec<Node>,
187 pub edge: Vec<Edge>,
188}
189
190/// Represents a node in a Code Property Graph.
191///
192/// Nodes represent program entities like methods, variables, expressions, etc.
193/// Each node has a unique key, a type, and a list of properties.
194///
195/// # Examples
196///
197/// ```
198/// use cpg_rs::{Node, NodeType, NodeProperty, NodePropertyName, PropertyValue, PropertyValueEnum};
199///
200/// // Create a method node
201/// let method_node = Node {
202/// key: 1,
203/// r#type: NodeType::Method,
204/// property: vec![
205/// NodeProperty {
206/// name: NodePropertyName::Name,
207/// value: Some(PropertyValue {
208/// value: Some(PropertyValueEnum::StringValue("main".to_string())),
209/// }),
210/// },
211/// ],
212/// };
213/// ```
214#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
215pub struct Node {
216 pub key: i64,
217 pub r#type: NodeType,
218 pub property: Vec<NodeProperty>,
219}
220
221/// Represents a property of a node in a Code Property Graph.
222///
223/// Each property has a name and an optional value.
224#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
225pub struct NodeProperty {
226 pub name: NodePropertyName,
227 pub value: Option<PropertyValue>,
228}
229
230#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
231pub enum NodeType {
232 #[serde(rename = "UNKNOWN_NODE_TYPE")]
233 UnknownNodeType = 0,
234 /// Programming languages offer many closely-related concepts for describing blocks
235 /// of code that can be executed with input parameters and return output parameters,
236 /// possibly causing side effects. In the CPG specification, we refer to all of these
237 /// concepts (procedures, functions, methods, etc.) as methods. A single METHOD node
238 /// must exist for each method found in the source program.
239 ///
240 /// The `FULL_NAME` field specifies the method's fully-qualified name, including
241 /// information about the namespace it is contained in if applicable, the name field
242 /// is the function's short name. The field `IS_EXTERNAL` indicates whether it was
243 /// possible to identify a method body for the method. This is true for methods that
244 /// are defined in the source program, and false for methods that are dynamically
245 /// linked to the program, that is, methods that exist in an external dependency.
246 ///
247 /// Line and column number information is specified in the optional fields
248 /// `LINE_NUMBER`, `COLUMN_NUMBER`, `LINE_NUMBER_END`, and `COLUMN_NUMBER_END` and
249 /// the name of the source file is specified in `FILENAME`. An optional hash value
250 /// MAY be calculated over the function contents and included in the `HASH` field.
251 ///
252 /// Finally, the fully qualified name of the program constructs that the method
253 /// is immediately contained in is stored in the `AST_PARENT_FULL_NAME` field
254 /// and its type is indicated in the `AST_PARENT_TYPE` field to be one of
255 /// `METHOD`, `TYPE_DECL` or `NAMESPACE_BLOCK`.
256 #[serde(rename = "METHOD")]
257 Method = 1,
258 /// This node represents an (unnamed) formal method return parameter. It carries its
259 /// fully qualified type name in `TYPE_FULL_NAME`. The `CODE` field MAY be set freely,
260 /// e.g., to the constant `RET`, however, subsequent layer creators MUST NOT depend
261 /// on this value.
262 #[serde(rename = "METHOD_RETURN")]
263 MethodReturn = 3,
264 /// A method annotation.
265 /// The semantics of the FULL_NAME property on this node differ from the usual FULL_NAME
266 /// semantics in the sense that FULL_NAME describes the represented annotation class/interface
267 /// itself and not the ANNOTATION node.
268 #[serde(rename = "ANNOTATION")]
269 Annotation = 5,
270 /// Assignment of annotation argument to annotation parameter
271 #[serde(rename = "ANNOTATION_PARAMETER_ASSIGN")]
272 AnnotationParameterAssign = 6,
273 /// Formal annotation parameter
274 #[serde(rename = "ANNOTATION_PARAMETER")]
275 AnnotationParameter = 7,
276 /// This node represents a literal such as an integer or string constant. Literals
277 /// are symbols included in the code in verbatim form and which are immutable.
278 /// The `TYPE_FULL_NAME` field stores the literal's fully-qualified type name,
279 /// e.g., `java.lang.Integer`.
280 #[serde(rename = "LITERAL")]
281 Literal = 8,
282 /// This node represents a type member of a class, struct or union, e.g., for the
283 /// type declaration `class Foo{ int i ; }`, it represents the declaration of the
284 /// variable `i`.
285 #[serde(rename = "MEMBER")]
286 Member = 9,
287 /// Initialization construct for arrays
288 #[serde(rename = "ARRAY_INITIALIZER")]
289 ArrayInitializer = 14,
290 /// A (function/method/procedure) call. The `METHOD_FULL_NAME` property is the name of the
291 /// invoked method (the callee) while the `TYPE_FULL_NAME` is its return type, and
292 /// therefore, the return type of the call when viewing it as an expression. For
293 /// languages like Javascript, it is common that we may know the (short-) name
294 /// of the invoked method, but we do not know at compile time which method
295 /// will actually be invoked, e.g., because it depends on a dynamic import.
296 /// In this case, we leave `METHOD_FULL_NAME` blank but at least fill out `NAME`,
297 /// which contains the method's (short-) name and `SIGNATURE`, which contains
298 /// any information we may have about the types of arguments and return value.
299 #[serde(rename = "CALL")]
300 Call = 15,
301 /// This node represents a local variable. Its fully qualified type name is stored
302 /// in the `TYPE_FULL_NAME` field and its name in the `NAME` field. The `CODE` field
303 /// contains the entire local variable declaration without initialization, e.g., for
304 /// `int x = 10;`, it contains `int x`.
305 #[serde(rename = "LOCAL")]
306 Local = 23,
307 /// This node represents a tag.
308 #[serde(rename = "TAG")]
309 Tag = 24,
310 /// A location node summarizes a source code location.
311 #[serde(rename = "LOCATION")]
312 Location = 25,
313 /// This node represents an identifier as used when referring to a variable by name.
314 /// It holds the identifier's name in the `NAME` field and its fully-qualified type
315 /// name in `TYPE_FULL_NAME`.
316 #[serde(rename = "IDENTIFIER")]
317 Identifier = 27,
318 /// This node represents a return instruction, e.g., `return x`. Note that it does
319 /// NOT represent a formal return parameter as formal return parameters are
320 /// represented via `METHOD_RETURN` nodes.
321 #[serde(rename = "RETURN")]
322 Return = 30,
323 /// This node represents a compound statement. Compound statements are used in many languages to allow
324 /// grouping a sequence of statements. For example, in C and Java, compound statements
325 /// are statements enclosed by curly braces. Function/Method bodies are compound
326 /// statements. We do not use the term "compound statement" because "statement" would
327 /// imply that the block does not yield a value upon evaluation, that is, that it is
328 /// not an expression. This is true in languages such as C and Java, but not for languages
329 /// such as Scala where the value of the block is given by that of the last expression it
330 /// contains. In fact, the Scala grammar uses the term "BlockExpr" (short for
331 /// "block expression") to describe what in the CPG we call "Block".
332 #[serde(rename = "BLOCK")]
333 Block = 31,
334 /// This node represents a formal output parameter. Corresponding output parameters
335 /// for input parameters MUST NOT be created by the frontend as they are automatically
336 /// created upon first loading the CPG.
337 #[serde(rename = "METHOD_PARAMETER_OUT")]
338 MethodParameterOut = 33,
339 /// This node represents a formal input parameter. The field `NAME` contains its
340 /// name, while the field `TYPE_FULL_NAME` contains the fully qualified type name.
341 #[serde(rename = "METHOD_PARAMETER_IN")]
342 MethodParameterIn = 34,
343 /// This node represents a dependency
344 #[serde(rename = "DEPENDENCY")]
345 Dependency = 35,
346 /// File nodes represent source files or a shared objects from which the CPG
347 /// was generated. File nodes serve as indices, that is, they allow looking up all
348 /// elements of the code by file.
349 ///
350 /// For each file, the graph CAN contain exactly one File node, if not File nodes
351 /// are created as indicated by `FILENAME` property of other nodes.
352 /// As file nodes are root nodes of abstract syntax tress, they are AstNodes and
353 /// their order field is set to 0. This is because they have no sibling nodes,
354 /// not because they are the first node of the AST.
355 #[serde(rename = "FILE")]
356 File = 38,
357 /// This node contains the CPG meta data. Exactly one node of this type
358 /// MUST exist per CPG. The `HASH` property MAY contain a hash value calculated
359 /// over the source files this CPG was generated from. The `VERSION` MUST be
360 /// set to the version of the specification ("1.1"). The language field indicates
361 /// which language frontend was used to generate the CPG and the list property
362 /// `OVERLAYS` specifies which overlays have been applied to the CPG.
363 #[serde(rename = "META_DATA")]
364 MetaData = 39,
365 /// This node represents a namespace. Similar to FILE nodes, NAMESPACE nodes
366 /// serve as indices that allow all definitions inside a namespace to be
367 /// obtained by following outgoing edges from a NAMESPACE node.
368 ///
369 /// NAMESPACE nodes MUST NOT be created by language frontends. Instead,
370 /// they are generated from NAMESPACE_BLOCK nodes automatically upon
371 /// first loading of the CPG.
372 #[serde(rename = "NAMESPACE")]
373 Namespace = 40,
374 /// A reference to a namespace.
375 /// We borrow the concept of a "namespace block" from C++, that is, a namespace block
376 /// is a block of code that has been placed in the same namespace by a programmer.
377 /// This block may be introduced via a `package` statement in Java or
378 /// a `namespace{ }` statement in C++.
379 ///
380 /// The `FULL_NAME` field contains a unique identifier to represent the namespace block
381 /// itself not just the namespace it references. So in addition to the namespace name
382 /// it can be useful to use the containing file name to derive a unique identifier.
383 ///
384 /// The `NAME` field contains the namespace name in a human-readable format.
385 /// The name should be given in dot-separated form where a dot indicates
386 /// that the right hand side is a sub namespace of the left hand side, e.g.,
387 /// `foo.bar` denotes the namespace `bar` contained in the namespace `foo`.
388 #[serde(rename = "NAMESPACE_BLOCK")]
389 NamespaceBlock = 41,
390 /// Any AST node that the frontend would like to include in the AST but for
391 /// which no suitable AST node is specified in the CPG specification may be
392 /// included using a node of type `UNKNOWN`.
393 #[serde(rename = "UNKNOWN")]
394 Unknown = 44,
395 /// This node represents a type instance, that is, a concrete instantiation
396 /// of a type declaration.
397 #[serde(rename = "TYPE")]
398 Type = 45,
399 /// This node represents a type declaration as for example given by a class-, struct-,
400 /// or union declaration. In contrast to a `TYPE` node, this node does not represent a
401 /// concrete instantiation of a type, e.g., for the parametrized type `List\[T\]`, it represents
402 /// `List\[T\]`, but not `List\[Integer\]` where `Integer` is a concrete type.
403 ///
404 /// The language frontend MUST create type declarations for all types declared in the
405 /// source program and MAY provide type declarations for types that are not declared
406 /// but referenced by the source program. If a declaration is present in the source
407 /// program, the field `IS_EXTERNAL` is set to `false`. Otherwise, it is set to `true`.
408 ///
409 /// The `FULL_NAME` field specifies the type's fully-qualified name, including
410 /// information about the namespace it is contained in if applicable, the name field
411 /// is the type's short name. Line and column number information is specified in the
412 /// optional fields `LINE_NUMBER`, `COLUMN_NUMBER`, `LINE_NUMBER_END`, and
413 /// `COLUMN_NUMBER_END` and the name of the source file is specified in `FILENAME`.
414 ///
415 /// Base types can be specified via the `INHERITS_FROM_TYPE_FULL_NAME` list, where
416 /// each entry contains the fully-qualified name of a base type. If the type is
417 /// known to be an alias of another type (as for example introduced via the C
418 /// `typedef` statement), the name of the alias is stored in `ALIAS_TYPE_FULL_NAME`.
419 ///
420 /// Finally, the fully qualified name of the program constructs that the type declaration
421 /// is immediately contained in is stored in the `AST_PARENT_FULL_NAME` field
422 /// and its type is indicated in the `AST_PARENT_TYPE` field to be one of
423 /// `METHOD`, `TYPE_DECL` or `NAMESPACE_BLOCK`.
424 #[serde(rename = "TYPE_DECL")]
425 TypeDecl = 46,
426 /// This node represents a formal type parameter, that is, the type parameter
427 /// as given in a type-parametrized method or type declaration. Examples for
428 /// languages that support type parameters are Java (via Generics) and C++
429 /// (via templates). Apart from the standard fields of AST nodes, the type
430 /// parameter carries only a `NAME` field that holds the parameters name.
431 #[serde(rename = "TYPE_PARAMETER")]
432 TypeParameter = 47,
433 /// An (actual) type argument as used to instantiate a parametrized type, in the
434 /// same way an (actual) arguments provides concrete values for a parameter
435 /// at method call sites. As it true for arguments, the method is not expected
436 /// to interpret the type argument. It MUST however store its code in the
437 /// `CODE` field.
438 #[serde(rename = "TYPE_ARGUMENT")]
439 TypeArgument = 48,
440 /// A literal value assigned to an ANNOTATION_PARAMETER
441 #[serde(rename = "ANNOTATION_LITERAL")]
442 AnnotationLiteral = 49,
443 /// This node type represent a configuration file, where `NAME` is the name
444 /// of the file and `content` is its content. The exact representation of the
445 /// name is left undefined and can be chosen as required by consumers of
446 /// the corresponding configuration files.
447 #[serde(rename = "CONFIG_FILE")]
448 ConfigFile = 50,
449 /// `BINDING` nodes represent name-signature pairs that can be resolved at a
450 /// type declaration (`TYPE_DECL`). They are connected to `TYPE_DECL` nodes via
451 /// incoming `BINDS` edges. The bound method is either associated with an outgoing
452 /// `REF` edge to a `METHOD` or with the `METHOD_FULL_NAME` property. The `REF` edge
453 /// if present has priority.
454 #[serde(rename = "BINDING")]
455 Binding = 146,
456 /// This node contains an arbitrary node and an associated tag node.
457 #[serde(rename = "TAG_NODE_PAIR")]
458 TagNodePair = 208,
459 /// Finding nodes may be used to store analysis results in the graph
460 /// that are to be exposed to an end-user, e.g., information about
461 /// potential vulnerabilities or dangerous programming practices.
462 /// A Finding node may contain an abitrary list of key value pairs
463 /// that characterize the finding, as well as a list of nodes that
464 /// serve as evidence for the finding.
465 #[serde(rename = "FINDING")]
466 Finding = 214,
467 /// This node represents a key value pair, where both the key and the value are strings.
468 #[serde(rename = "KEY_VALUE_PAIR")]
469 KeyValuePair = 217,
470 /// This field represents a (language-dependent) modifier such as `static`, `private`
471 /// or `public`. Unlike most other AST nodes, it is NOT an expression, that is, it
472 /// cannot be evaluated and cannot be passed as an argument in function calls.
473 #[serde(rename = "MODIFIER")]
474 Modifier = 300,
475 /// This node represents a reference to a method/function/procedure as it
476 /// appears when a method is passed as an argument in a call. The `METHOD_FULL_NAME`
477 /// field holds the fully-qualified name of the referenced method and the
478 /// `TYPE_FULL_NAME` holds its fully-qualified type name.
479 #[serde(rename = "METHOD_REF")]
480 MethodRef = 333,
481 /// Represents the binding of a LOCAL or METHOD_PARAMETER_IN into the closure of a method
482 #[serde(rename = "CLOSURE_BINDING")]
483 ClosureBinding = 334,
484 /// Reference to a type/class
485 #[serde(rename = "TYPE_REF")]
486 TypeRef = 335,
487 /// This node represents a control structure as introduced by control structure
488 /// statements as well as conditional and unconditional jumps. Its type is stored in the
489 /// `CONTROL_STRUCTURE_TYPE` field to be one of several pre-defined types. These types
490 /// are used in the construction of the control flow layer, making it possible to
491 /// generate the control flow layer from the abstract syntax tree layer automatically.
492 ///
493 /// In addition to the `CONTROL_STRUCTURE_TYPE` field, the `PARSER_TYPE_NAME` field
494 /// MAY be used by frontends to store the name of the control structure as emitted by
495 /// the parser or disassembler, however, the value of this field is not relevant
496 /// for construction of the control flow layer.
497 #[serde(rename = "CONTROL_STRUCTURE")]
498 ControlStructure = 339,
499 /// A jump target is any location in the code that has been specifically marked
500 /// as the target of a jump, e.g., via a label. The `NAME` field holds the name of
501 /// the label while the `PARSER_TYPE_NAME` field holds the name of language construct
502 /// that this jump target is created from, e.g., "Label".
503 #[serde(rename = "JUMP_TARGET")]
504 JumpTarget = 340,
505 /// A jump label specifies the label and thus the JUMP_TARGET of control structures
506 /// BREAK and CONTINUE. The `NAME` field holds the name of the label while the
507 /// `PARSER_TYPE_NAME` field holds the name of language construct that this jump
508 /// label is created from, e.g., "Label".
509 #[serde(rename = "JUMP_LABEL")]
510 JumpLabel = 341,
511 /// This node represents a DOM node used in template languages, e.g., JSX/TSX
512 #[serde(rename = "TEMPLATE_DOM")]
513 TemplateDom = 417,
514 /// A source code comment
515 #[serde(rename = "COMMENT")]
516 Comment = 511,
517 /// This node represents the field accessed in a field access, e.g., in
518 /// `a.b`, it represents `b`. The field name as it occurs in the code is
519 /// stored in the `CODE` field. This may mean that the `CODE` field holds
520 /// an expression. The `CANONICAL_NAME` field MAY contain the same value is
521 /// the `CODE` field but SHOULD contain the normalized name that results
522 /// from evaluating `CODE` as an expression if such an evaluation is
523 /// possible for the language frontend. The objective is to store an identifier
524 /// in `CANONICAL_NAME` that is the same for two nodes iff they refer to the
525 /// same field, regardless of whether they use the same expression to reference
526 /// it.
527 #[serde(rename = "FIELD_IDENTIFIER")]
528 FieldIdentifier = 2001081,
529}
530
531/// Represents an edge in a Code Property Graph.
532///
533/// Edges represent relationships between nodes, such as method calls,
534/// control flow, data flow, etc. Each edge has a source node, a destination node,
535/// a type, and optional properties.
536///
537/// # Examples
538///
539/// ```
540/// use cpg_rs::{Edge, EdgeType};
541///
542/// // Create an edge from node 1 to node 2
543/// let edge = Edge {
544/// src: 1,
545/// dst: 2,
546/// r#type: EdgeType::Ast,
547/// property: vec![],
548/// };
549/// ```
550#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
551pub struct Edge {
552 /// Source node.
553 pub src: i64,
554 /// Destination node.
555 pub dst: i64,
556 pub r#type: EdgeType,
557 pub property: Vec<EdgeProperty>,
558}
559
560/// Represents a property of an edge in a Code Property Graph.
561///
562/// Each property has a name and an optional value.
563#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
564pub struct EdgeProperty {
565 pub name: EdgePropertyName,
566 pub value: Option<PropertyValue>,
567}
568
569#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
570pub enum EdgeType {
571 #[serde(rename = "UNKNOWN_EDGE_TYPE")]
572 UnknownEdgeType = 0,
573 /// This edge connects a parent node to its child in the syntax tree.
574 #[serde(rename = "AST")]
575 Ast = 3,
576 /// This edge connects call sites, i.e., nodes with the type `CALL`, to the
577 /// method node that represent the method they invoke. The frontend MAY create
578 /// `CALL` edges but is not required to do so. Instead, of the `METHOD_FULL_NAME`
579 /// field of the `CALL` node is set correctly, `CALL` edges are created
580 /// automatically as the CPG is first loaded.
581 #[serde(rename = "CALL")]
582 Call = 6,
583 /// This edge indicates that the source node is an identifier that denotes
584 /// access to the destination node. For example, an identifier may reference
585 /// a local variable.
586 #[serde(rename = "REF")]
587 Ref = 10,
588 /// Edges from nodes to the tags they are tagged by.
589 #[serde(rename = "TAGGED_BY")]
590 TaggedBy = 11,
591 /// This edge connects a method input parameter to the corresponding
592 /// method output parameter.
593 #[serde(rename = "PARAMETER_LINK")]
594 ParameterLink = 12,
595 /// This edge indicates control flow from the source to the destination node.
596 #[serde(rename = "CFG")]
597 Cfg = 19,
598 /// This edge connects a node to its evaluation type.
599 #[serde(rename = "EVAL_TYPE")]
600 EvalType = 21,
601 /// This edge connects type arguments to type parameters to indicate
602 /// that the type argument is used to instantiate the type parameter.
603 #[serde(rename = "BINDS_TO")]
604 BindsTo = 22,
605 /// Inheritance relation between a type declaration and a type. This edge MUST NOT
606 /// be created by the language frontend as it is automatically created from
607 /// `INHERITS_FROM_TYPE_FULL_NAME` fields then the CPG is first loaded.
608 #[serde(rename = "INHERITS_FROM")]
609 InheritsFrom = 23,
610 /// This edge connects a node to the method that contains it.
611 #[serde(rename = "CONTAINS")]
612 Contains = 28,
613 /// Represents the capturing of a variable into a closure
614 #[serde(rename = "CAPTURE")]
615 Capture = 40,
616 /// Connection between a captured LOCAL and the corresponding CLOSURE_BINDING
617 #[serde(rename = "CAPTURED_BY")]
618 CapturedBy = 41,
619 /// Similar to `ARGUMENT` edges, `RECEIVER` edges connect call sites
620 /// to their receiver arguments. A receiver argument is the object on
621 /// which a method operates, that is, it is the expression that is
622 /// assigned to the `this` pointer as control is transferred to the method.
623 #[serde(rename = "RECEIVER")]
624 Receiver = 55,
625 /// The edge connects control structure nodes to the expressions that holds their conditions.
626 #[serde(rename = "CONDITION")]
627 Condition = 56,
628 /// A reaching definition edge indicates that a variable produced at the source node reaches
629 /// the destination node without being reassigned on the way. The `VARIABLE` property indicates
630 /// which variable is propagated.
631 #[serde(rename = "REACHING_DEF")]
632 ReachingDef = 137,
633 /// This edge represents an alias relation between a type declaration and a type.
634 /// The language frontend MUST NOT create `ALIAS_OF` edges as they are created
635 /// automatically based on `ALIAS_TYPE_FULL_NAME` fields when the CPG is first loaded.
636 #[serde(rename = "ALIAS_OF")]
637 AliasOf = 138,
638 /// This edge connects a type declaration (`TYPE_DECL`) with a binding node (`BINDING`) and
639 /// indicates that the type declaration has the binding represented by the binding node, in
640 /// other words, there is a (name, signature) pair that can be resolved for the type
641 /// declaration as stored in the binding node.
642 #[serde(rename = "BINDS")]
643 Binds = 155,
644 /// Argument edges connect call sites (node type `CALL`) to their arguments
645 /// (node type `EXPRESSION`) as well as `RETURN` nodes to the expressions
646 /// that return.
647 #[serde(rename = "ARGUMENT")]
648 Argument = 156,
649 /// This edge connects a node to the node that represents its source file. These
650 /// edges MUST not be created by the language frontend but are automatically
651 /// created based on `FILENAME` fields.
652 #[serde(rename = "SOURCE_FILE")]
653 SourceFile = 157,
654 /// This edge indicates that the source node immediately dominates the destination node.
655 #[serde(rename = "DOMINATE")]
656 Dominate = 181,
657 /// This edge indicates that the source node immediately post dominates the destination node.
658 #[serde(rename = "POST_DOMINATE")]
659 PostDominate = 182,
660 /// A CDG edge expresses that the destination node is control dependent on the source node.
661 #[serde(rename = "CDG")]
662 Cdg = 183,
663 /// Edge from imports to dependencies
664 #[serde(rename = "IMPORTS")]
665 Imports = 23663,
666 /// Edge from CALL statement in the AST to the IMPORT.
667 ///  |We use this edge to traverse from the logical representation of the IMPORT
668 ///  |to the corresponding import statement in the AST.
669 ///  |
670 #[serde(rename = "IS_CALL_FOR_IMPORT")]
671 IsCallForImport = 23664,
672}
673
674#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
675pub struct AdditionalNodeProperty {
676 pub node_id: i64,
677 pub property: Option<NodeProperty>,
678}
679
680#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
681pub struct AdditionalEdgeProperty {
682 pub edge_id: i64,
683 pub property: Option<EdgeProperty>,
684 pub out_node_key: i64,
685 pub in_node_key: i64,
686 pub edge_type: EdgeType,
687}
688
689/// Overlays can be stacked onto each other, therefore their node ids must be globally unique.
690#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
691pub struct CpgOverlay {
692 pub node: Vec<Node>,
693 pub edge: Vec<Edge>,
694 pub node_property: Vec<AdditionalNodeProperty>,
695 pub edge_property: Vec<AdditionalEdgeProperty>,
696}
697
698/// Represents a set of changes to be applied to a Code Property Graph.
699///
700/// DiffGraphs can be created independently of each other and therefor when _adding_ nodes|edges,
701/// each DiffGraph has its own ID space. However, when removing nodes|edges, the nodeIds refer to the
702/// globally unique graph id space.
703#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
704pub struct DiffGraph {
705 pub entries: Vec<DiffGraphEntry>,
706}
707
708#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
709#[serde(tag = "type", content = "content")]
710pub enum DiffGraphEntry {
711 #[serde(rename = "node")]
712 Node(Node),
713 #[serde(rename = "edge")]
714 Edge(Edge),
715 #[serde(rename = "node_property")]
716 NodeProperty(AdditionalNodeProperty),
717 #[serde(rename = "edge_property")]
718 EdgeProperty(AdditionalEdgeProperty),
719 #[serde(rename = "remove_node")]
720 RemoveNode(RemoveNode),
721 #[serde(rename = "remove_node_property")]
722 RemoveNodeProperty(RemoveNodeProperty),
723 #[serde(rename = "remove_edge")]
724 RemoveEdge(RemoveEdge),
725 #[serde(rename = "remove_edge_property")]
726 RemoveEdgeProperty(RemoveEdgeProperty),
727}
728
729#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
730pub struct RemoveNode {
731 pub key: i64,
732}
733
734#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
735pub struct RemoveNodeProperty {
736 pub key: i64,
737 pub name: NodePropertyName,
738 pub local_name: String,
739}
740
741#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
742pub struct RemoveEdge {
743 pub out_node_key: i64,
744 pub in_node_key: i64,
745 pub edge_type: EdgeType,
746 /// used to identify edges (since our edges don't have ids)
747 pub properties_hash: Vec<u8>,
748}
749
750#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
751pub struct RemoveEdgeProperty {
752 pub out_node_key: i64,
753 pub in_node_key: i64,
754 pub edge_type: EdgeType,
755 /// used to identify edges (since our edges don't have ids)
756 pub properties_hash: Vec<u8>,
757 pub property_name: EdgePropertyName,
758}
759
760#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
761pub enum NodePropertyName {
762 #[serde(rename = "UNKNOWN_NODE_PROPERTY")]
763 UnknownNodeProperty = 0,
764 /// This optional field provides the line number of the program construct
765 /// represented by the node.
766 #[serde(rename = "LINE_NUMBER")]
767 LineNumber = 2,
768 /// AST node type name emitted by parser.
769 #[serde(rename = "PARSER_TYPE_NAME")]
770 ParserTypeName = 3,
771 /// This integer indicates the position of the node among
772 /// its siblings in the AST. The left-most child has an
773 /// order of 0.
774 #[serde(rename = "ORDER")]
775 Order = 4,
776 /// Name of represented object, e.g., method name (e.g. "run")
777 #[serde(rename = "NAME")]
778 Name = 5,
779 /// This is the fully-qualified name of an entity, e.g., the fully-qualified
780 /// name of a method or type. The details of what constitutes a fully-qualified
781 /// name are language specific. This field SHOULD be human readable.
782 #[serde(rename = "FULL_NAME")]
783 FullName = 6,
784 /// Indicates that the construct (METHOD or TYPE_DECL) is external, that is,
785 /// it is referenced but not defined in the code (applies both to insular
786 /// parsing and to library functions where we have header files only)
787 #[serde(rename = "IS_EXTERNAL")]
788 IsExternal = 7,
789 /// This property denotes a string value as used in a key-value pair.
790 #[serde(rename = "VALUE")]
791 Value = 8,
792 /// This optional fields provides the column number of the program construct
793 /// represented by the node.
794 #[serde(rename = "COLUMN_NUMBER")]
795 ColumnNumber = 11,
796 /// This optional fields provides the line number at which the program construct
797 /// represented by the node ends.
798 #[serde(rename = "LINE_NUMBER_END")]
799 LineNumberEnd = 12,
800 /// A version, given as a string. Used, for example, in the META_DATA node to
801 /// indicate which version of the CPG spec this CPG conforms to
802 #[serde(rename = "VERSION")]
803 Version = 13,
804 /// For formal method input parameters, output parameters, and return parameters,
805 /// this field holds the evaluation strategy, which is one of the following:
806 /// 1) `BY_REFERENCE` indicates that the parameter is passed by reference, 2)
807 /// `BY_VALUE` indicates that it is passed by value, that is, a copy is made,
808 /// 3) `BY_SHARING` the parameter is a pointer/reference and it is shared with
809 /// the caller/callee. While a copy of the pointer is made, a copy of the object
810 /// that it points to is not made.
811 #[serde(rename = "EVALUATION_STRATEGY")]
812 EvaluationStrategy = 15,
813 /// This optional fields provides the column number at which the program construct
814 /// represented by the node ends.
815 #[serde(rename = "COLUMN_NUMBER_END")]
816 ColumnNumberEnd = 16,
817 /// This field indicates which CPG language frontend generated the CPG.
818 /// Frontend developers may freely choose a value that describes their frontend
819 /// so long as it is not used by an existing frontend. Reserved values are to date:
820 /// C, LLVM, GHIDRA, PHP.
821 #[serde(rename = "LANGUAGE")]
822 Language = 19,
823 /// Certain files, e.g., configuration files, may be included in the CPG as-is.
824 /// For such files, the `CONTENT` field contains the files content.
825 #[serde(rename = "CONTENT")]
826 Content = 20,
827 /// This field holds the code snippet that the node represents.
828 #[serde(rename = "CODE")]
829 Code = 21,
830 /// The method signature encodes the types of parameters in a string.
831 /// The string SHOULD be human readable and suitable for differentiating methods
832 /// with different parameter types sufficiently to allow for resolving of
833 /// function overloading. The present specification does not enforce a strict
834 /// format for the signature, that is, it can be chosen by the frontend
835 /// implementor to fit the source language.
836 #[serde(rename = "SIGNATURE")]
837 Signature = 22,
838 /// This field holds the dispatch type of a call, which is either `STATIC_DISPATCH` or
839 /// `DYNAMIC_DISPATCH`. For statically dispatched method calls, the call target is known
840 /// at compile time while for dynamically dispatched calls, it can only be determined at
841 /// runtime as it may depend on the type of an object (as is the case for virtual method
842 /// calls) or calculation of an offset.
843 #[serde(rename = "DISPATCH_TYPE")]
844 DispatchType = 25,
845 /// The modifier type is a free-form string. The following are known modifier types:
846 /// `STATIC`, `PUBLIC`, `PROTECTED`, `PRIVATE`, `ABSTRACT`, `NATIVE`, `CONSTRUCTOR`, `VIRTUAL`.
847 #[serde(rename = "MODIFIER_TYPE")]
848 ModifierType = 26,
849 /// The `CONTROL_STRUCTURE_TYPE` field indicates which kind of control structure
850 /// a `CONTROL_STRUCTURE` node represents. The available types are the following:
851 /// BREAK, CONTINUE, DO, WHILE, FOR, GOTO, IF, ELSE, TRY, THROW and SWITCH.
852 #[serde(rename = "CONTROL_STRUCTURE_TYPE")]
853 ControlStructureType = 27,
854 /// AST-children of CALL nodes have an argument index, that is used to match
855 /// call-site arguments with callee parameters. Explicit parameters are numbered
856 /// from 1 to N, while index 0 is reserved for implicit self / this parameter.
857 /// CALLs without implicit parameter therefore have arguments starting with index 1.
858 /// AST-children of BLOCK nodes may have an argument index as well; in this case,
859 /// the last argument index determines the return expression of a BLOCK expression.
860 /// If the `PARAMETER_NAME` field is set, then the `ARGUMENT_INDEX` field is
861 /// ignored. It is suggested to set it to -1.
862 #[serde(rename = "ARGUMENT_INDEX")]
863 ArgumentIndex = 40,
864 /// Identifier which uniquely describes a CLOSURE_BINDING. This property is used to match captured LOCAL nodes with the corresponding CLOSURE_BINDING nodes
865 #[serde(rename = "CLOSURE_BINDING_ID")]
866 ClosureBindingId = 50,
867 /// This field contains the fully-qualified static type name of the program
868 /// construct represented by a node. It is the name of an instantiated type, e.g.,
869 /// `java.util.List<Integer>`, rather than `java.util.List\[T\]`. If the type
870 /// cannot be determined, this field should be set to the empty string.
871 #[serde(rename = "TYPE_FULL_NAME")]
872 TypeFullName = 51,
873 /// The static type decl of a TYPE. This property is matched against the FULL_NAME
874 /// of TYPE_DECL nodes. It is required to have exactly one TYPE_DECL for each
875 /// different TYPE_DECL_FULL_NAME
876 #[serde(rename = "TYPE_DECL_FULL_NAME")]
877 TypeDeclFullName = 52,
878 /// The static types a TYPE_DECL inherits from. This property is matched against the
879 /// FULL_NAME of TYPE nodes and thus it is required to have at least one TYPE node
880 /// for each TYPE_FULL_NAME
881 #[serde(rename = "INHERITS_FROM_TYPE_FULL_NAME")]
882 InheritsFromTypeFullName = 53,
883 /// The FULL_NAME of a method. Used to link CALL and METHOD nodes. It is required
884 /// to have exactly one METHOD node for each METHOD_FULL_NAME
885 #[serde(rename = "METHOD_FULL_NAME")]
886 MethodFullName = 54,
887 /// The type of the AST parent. Since this is only used in some parts of the graph,
888 /// the list does not include all possible parents by intention.
889 /// Possible parents: METHOD, TYPE_DECL, NAMESPACE_BLOCK.
890 #[serde(rename = "AST_PARENT_TYPE")]
891 AstParentType = 56,
892 /// This field holds the FULL_NAME of the AST parent of an entity.
893 #[serde(rename = "AST_PARENT_FULL_NAME")]
894 AstParentFullName = 57,
895 /// The group ID for a dependency
896 #[serde(rename = "DEPENDENCY_GROUP_ID")]
897 DependencyGroupId = 58,
898 #[serde(rename = "SYMBOL")]
899 Symbol = 100,
900 #[serde(rename = "METHOD_SHORT_NAME")]
901 MethodShortName = 102,
902 #[serde(rename = "PACKAGE_NAME")]
903 PackageName = 103,
904 #[serde(rename = "CLASS_NAME")]
905 ClassName = 104,
906 #[serde(rename = "NODE_LABEL")]
907 NodeLabel = 105,
908 /// The path of the source file this node was generated from, relative to the root
909 /// path in the meta data node. This field must be set but may be set to the value `<unknown>` to
910 /// indicate that no source file can be associated with the node, e.g., because the node represents
911 /// an entity known to exist because it is referenced, but for which the file that is is declared in
912 /// is unknown.
913 #[serde(rename = "FILENAME")]
914 Filename = 106,
915 /// The field contains the names of the overlays applied to this CPG, in order of their
916 /// application. Names are free-form strings, that is, this specification does not
917 /// dictate them but rather requires tool producers and consumers to communicate them
918 /// between each other.
919 #[serde(rename = "OVERLAYS")]
920 Overlays = 118,
921 /// This property contains a hash value in the form of a string.
922 /// Hashes can be used to summarize data, e.g., to summarize the
923 /// contents of source files or sub graphs. Such summaries are useful
924 /// to determine whether code has already been analyzed in incremental
925 /// analysis pipelines. This property is optional to allow its calculation
926 /// to be deferred or skipped if the hash is not needed.
927 #[serde(rename = "HASH")]
928 Hash = 120,
929 /// For calls involving named parameters, the `ARGUMENT_NAME` field holds the
930 /// name of the parameter initialized by the expression. For all other calls,
931 /// this field is unset.
932 #[serde(rename = "ARGUMENT_NAME")]
933 ArgumentName = 130,
934 /// This property denotes a key of a key-value pair.
935 #[serde(rename = "KEY")]
936 Key = 131,
937 #[serde(rename = "CLASS_SHORT_NAME")]
938 ClassShortName = 132,
939 /// This property holds the fully qualified name of the type that the node is
940 /// a type alias of.
941 #[serde(rename = "ALIAS_TYPE_FULL_NAME")]
942 AliasTypeFullName = 158,
943 /// The original name of the (potentially mangled) captured variable
944 #[serde(rename = "CLOSURE_ORIGINAL_NAME")]
945 ClosureOriginalName = 159,
946 /// Specifies whether a parameter is the variadic argument handling parameter of
947 /// a variadic method. Only one parameter of a method is allowed to have this
948 /// property set to true.
949 #[serde(rename = "IS_VARIADIC")]
950 IsVariadic = 221,
951 /// The path to the root directory of the source/binary this CPG is generated from.
952 #[serde(rename = "ROOT")]
953 Root = 1199,
954 /// Type hint for the dynamic type. These are observed to be verifiable at runtime.
955 #[serde(rename = "DYNAMIC_TYPE_HINT_FULL_NAME")]
956 DynamicTypeHintFullName = 1591,
957 /// Similar to `DYNAMIC_TYPE_HINT_FULL_NAME`, but that this makes no guarantee that types within this property are correct. This property is used to capture observations between node interactions during a 'may-analysis'.
958 #[serde(rename = "POSSIBLE_TYPES")]
959 PossibleTypes = 1592,
960 /// Specifies an index, e.g., for a parameter or argument.
961 /// Explicit parameters are numbered from 1 to N, while index 0 is reserved for implicit
962 /// self / this parameter.
963 #[serde(rename = "INDEX")]
964 Index = 2223,
965 /// This field is experimental. It will likely be removed in the future without any notice.
966 /// It stores type information for generic types and methods as well as type information
967 /// for members and locals where the type either contains a type parameter reference or
968 /// an instantiated type reference.
969 #[serde(rename = "GENERIC_SIGNATURE")]
970 GenericSignature = 3000,
971 /// Start offset into the CONTENT property of the corresponding FILE node.
972 /// The offset is such that parts of the content can easily
973 /// be accessed via `content.substring(offset, offsetEnd)`.
974 /// This means that the offset must be measured in utf16 encoding (i.e. neither in
975 /// characters/codeunits nor in byte-offsets into a utf8 encoding).
976 /// E.g. for METHOD nodes this start offset points to the start of the methods
977 /// source code in the string holding the source code of the entire file.
978 #[serde(rename = "OFFSET")]
979 Offset = 3812,
980 /// End offset (exclusive) into the CONTENT property of the corresponding FILE node.
981 /// See OFFSET documentation for finer details.
982 /// E.g. for METHOD nodes this end offset points to the first code position which is
983 /// not part of the method.
984 #[serde(rename = "OFFSET_END")]
985 OffsetEnd = 3813,
986 /// This field holds the canonical name of a `FIELD_IDENTIFIER`. It is typically
987 /// identical to the CODE field, but canonicalized according to source language
988 /// semantics. Human readable names are preferable. `FIELD_IDENTIFIER` nodes must
989 /// share identical `CANONICAL_NAME` if and
990 /// only if they alias, e.g., in C-style unions (if the aliasing relationship is
991 /// unknown or there are partial overlaps, then one must make a reasonable guess,
992 /// and trade off between false negatives and false positives).
993 #[serde(rename = "CANONICAL_NAME")]
994 CanonicalName = 2001092,
995 /// References to other nodes. This is not a real property; it exists here for the sake of serialization only. valueType and cardinality are meaningless.
996 #[serde(rename = "CONTAINED_REF")]
997 ContainedRef = 2007161,
998}
999
1000#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
1001pub enum EdgePropertyName {
1002 #[serde(rename = "UNKNOWN_EDGE_PROPERTY")]
1003 UnknownEdgeProperty = 0,
1004 /// This edge property represents the variable propagated by a reaching definition edge.
1005 #[serde(rename = "VARIABLE")]
1006 Variable = 11,
1007}
1008
1009#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
1010pub enum ModifierTypes {
1011 #[serde(rename = "UNKNOWN_MODIFIER_TYPE")]
1012 UnknownModifierType = 0,
1013 /// The static modifier
1014 #[serde(rename = "STATIC")]
1015 Static = 1,
1016 /// The public modifier
1017 #[serde(rename = "PUBLIC")]
1018 Public = 2,
1019 /// The protected modifier
1020 #[serde(rename = "PROTECTED")]
1021 Protected = 3,
1022 /// The private modifier
1023 #[serde(rename = "PRIVATE")]
1024 Private = 4,
1025 /// The abstract modifier
1026 #[serde(rename = "ABSTRACT")]
1027 Abstract = 5,
1028 /// The native modifier
1029 #[serde(rename = "NATIVE")]
1030 Native = 6,
1031 /// The constructor modifier
1032 #[serde(rename = "CONSTRUCTOR")]
1033 Constructor = 7,
1034 /// The virtual modifier
1035 #[serde(rename = "VIRTUAL")]
1036 Virtual = 8,
1037 /// The internal modifier
1038 #[serde(rename = "INTERNAL")]
1039 Internal = 9,
1040 /// The final modifier
1041 #[serde(rename = "FINAL")]
1042 Final = 10,
1043 /// The readonly modifier
1044 #[serde(rename = "READONLY")]
1045 Readonly = 11,
1046 /// Indicate that a method defines a module in the sense e.g. a python module does with the creation of a module object
1047 #[serde(rename = "MODULE")]
1048 Module = 12,
1049 /// Indicate that a method is an anonymous function, lambda, or closure
1050 #[serde(rename = "LAMBDA")]
1051 Lambda = 13,
1052}
1053
1054#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
1055pub enum Languages {
1056 #[serde(rename = "UNKNOWN_LANGUAGE")]
1057 UnknownLanguage = 0,
1058 #[serde(rename = "JAVA")]
1059 Java = 1,
1060 #[serde(rename = "JAVASCRIPT")]
1061 Javascript = 2,
1062 #[serde(rename = "GOLANG")]
1063 Golang = 3,
1064 #[serde(rename = "CSHARP")]
1065 Csharp = 4,
1066 #[serde(rename = "C")]
1067 C = 5,
1068 #[serde(rename = "PYTHON")]
1069 Python = 6,
1070 #[serde(rename = "LLVM")]
1071 Llvm = 7,
1072 #[serde(rename = "PHP")]
1073 Php = 8,
1074 #[serde(rename = "FUZZY_TEST_LANG")]
1075 FuzzyTestLang = 9,
1076 /// generic reverse engineering framework
1077 #[serde(rename = "GHIDRA")]
1078 Ghidra = 10,
1079 #[serde(rename = "KOTLIN")]
1080 Kotlin = 11,
1081 /// Eclipse CDT based parser for C/C++
1082 #[serde(rename = "NEWC")]
1083 Newc = 12,
1084 /// Source-based front-end for Java
1085 #[serde(rename = "JAVASRC")]
1086 Javasrc = 13,
1087 /// Source-based front-end for Python
1088 #[serde(rename = "PYTHONSRC")]
1089 Pythonsrc = 14,
1090 /// Source-based JS frontend based on Babel
1091 #[serde(rename = "JSSRC")]
1092 Jssrc = 15,
1093 /// Source-based frontend for Ruby
1094 #[serde(rename = "RUBYSRC")]
1095 Rubysrc = 17,
1096 /// Source-based frontend for Swift
1097 #[serde(rename = "SWIFTSRC")]
1098 Swiftsrc = 18,
1099 /// Source-based frontend for C# and .NET
1100 #[serde(rename = "CSHARPSRC")]
1101 Csharpsrc = 19,
1102}
1103
1104#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
1105pub enum EvaluationStrategies {
1106 #[serde(rename = "UNKNOWN_EVALUATION_STRATEGY")]
1107 UnknownEvaluationStrategy = 0,
1108 /// A parameter or return of a function is passed by reference which means an address is used behind the scenes
1109 #[serde(rename = "BY_REFERENCE")]
1110 ByReference = 1,
1111 /// Only applicable to object parameter or return values. The pointer to the object is passed by value but the object itself is not copied and changes to it are thus propagated out of the method context
1112 #[serde(rename = "BY_SHARING")]
1113 BySharing = 2,
1114 /// A parameter or return of a function passed by value which means a flat copy is used
1115 #[serde(rename = "BY_VALUE")]
1116 ByValue = 3,
1117}
1118
1119#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
1120pub enum DispatchTypes {
1121 #[serde(rename = "UNKNOWN_DISPATCH_TYPE")]
1122 UnknownDispatchType = 0,
1123 /// For statically dispatched calls the call target is known before program execution
1124 #[serde(rename = "STATIC_DISPATCH")]
1125 StaticDispatch = 1,
1126 /// For dynamically dispatched calls the target is determined during runtime
1127 #[serde(rename = "DYNAMIC_DISPATCH")]
1128 DynamicDispatch = 2,
1129 /// For macro expansions, code is inlined.
1130 #[serde(rename = "INLINED")]
1131 Inlined = 3,
1132}
1133
1134#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
1135pub enum ControlStructureTypes {
1136 #[serde(rename = "UNKNOWN_CONTROL_STRUCTURE_TYPE")]
1137 UnknownControlStructureType = 0,
1138 /// Represents a break statement. Labeled breaks are expected to have a JUMP_LABEL
1139 /// node AST child with ORDER 1
1140 #[serde(rename = "BREAK")]
1141 Break = 1,
1142 /// Represents a continue statement. Labeled continues are expected to have a JUMP_LABEL
1143 /// node AST child with ORDER 1
1144 #[serde(rename = "CONTINUE")]
1145 Continue = 2,
1146 /// Represents a while statement
1147 #[serde(rename = "WHILE")]
1148 While = 3,
1149 /// Represents a do statement
1150 #[serde(rename = "DO")]
1151 Do = 4,
1152 /// Represents a for statement
1153 #[serde(rename = "FOR")]
1154 For = 5,
1155 /// Represents a goto statement
1156 #[serde(rename = "GOTO")]
1157 Goto = 6,
1158 /// Represents an if statement
1159 #[serde(rename = "IF")]
1160 If = 7,
1161 /// Represents an else statement
1162 #[serde(rename = "ELSE")]
1163 Else = 8,
1164 /// Represents a switch statement
1165 #[serde(rename = "SWITCH")]
1166 Switch = 9,
1167 /// Represents a try statement
1168 #[serde(rename = "TRY")]
1169 Try = 10,
1170 /// Represents a throw statement
1171 #[serde(rename = "THROW")]
1172 Throw = 11,
1173 /// Represents a match expression
1174 #[serde(rename = "MATCH")]
1175 Match = 12,
1176 /// Represents a yield expression
1177 #[serde(rename = "YIELD")]
1178 Yield = 13,
1179 /// Represents a catch clause
1180 #[serde(rename = "CATCH")]
1181 Catch = 14,
1182 /// Represents a finally clause
1183 #[serde(rename = "FINALLY")]
1184 Finally = 15,
1185}
1186
1187
1188#[cfg(test)]
1189mod tests {
1190 use super::*;
1191
1192 #[test]
1193 fn test_property_value_serialization() {
1194 // Test string value
1195 let string_value = PropertyValue {
1196 value: Some(PropertyValueEnum::StringValue("test string".to_string())),
1197 };
1198 let serialized = serde_json::to_string(&string_value).unwrap();
1199 let expected = r#"{"type":"string_value","value":"test string"}"#;
1200 assert_eq!(serialized, expected);
1201
1202 // Test deserialization
1203 let deserialized: PropertyValue = serde_json::from_str(expected).unwrap();
1204 assert_eq!(deserialized, string_value);
1205 }
1206
1207 #[test]
1208 fn test_property_value_bool() {
1209 let bool_value = PropertyValue {
1210 value: Some(PropertyValueEnum::BoolValue(true)),
1211 };
1212 let serialized = serde_json::to_string(&bool_value).unwrap();
1213 let expected = r#"{"type":"bool_value","value":true}"#;
1214 assert_eq!(serialized, expected);
1215
1216 let deserialized: PropertyValue = serde_json::from_str(expected).unwrap();
1217 assert_eq!(deserialized, bool_value);
1218 }
1219
1220 #[test]
1221 fn test_property_value_int() {
1222 let int_value = PropertyValue {
1223 value: Some(PropertyValueEnum::IntValue(42)),
1224 };
1225 let serialized = serde_json::to_string(&int_value).unwrap();
1226 let expected = r#"{"type":"int_value","value":42}"#;
1227 assert_eq!(serialized, expected);
1228
1229 let deserialized: PropertyValue = serde_json::from_str(expected).unwrap();
1230 assert_eq!(deserialized, int_value);
1231 }
1232
1233 #[test]
1234 fn test_property_value_long() {
1235 let long_value = PropertyValue {
1236 value: Some(PropertyValueEnum::LongValue(9223372036854775807)),
1237 };
1238 let serialized = serde_json::to_string(&long_value).unwrap();
1239 let expected = r#"{"type":"long_value","value":9223372036854775807}"#;
1240 assert_eq!(serialized, expected);
1241
1242 let deserialized: PropertyValue = serde_json::from_str(expected).unwrap();
1243 assert_eq!(deserialized, long_value);
1244 }
1245
1246 #[test]
1247 fn test_property_value_float() {
1248 let float_value = PropertyValue {
1249 value: Some(PropertyValueEnum::FloatValue(3.14)),
1250 };
1251 let serialized = serde_json::to_string(&float_value).unwrap();
1252 let expected = r#"{"type":"float_value","value":3.14}"#;
1253 assert_eq!(serialized, expected);
1254
1255 let deserialized: PropertyValue = serde_json::from_str(expected).unwrap();
1256 assert_eq!(deserialized, float_value);
1257 }
1258
1259 #[test]
1260 fn test_property_value_double() {
1261 let double_value = PropertyValue {
1262 value: Some(PropertyValueEnum::DoubleValue(2.71828)),
1263 };
1264 let serialized = serde_json::to_string(&double_value).unwrap();
1265 let expected = r#"{"type":"double_value","value":2.71828}"#;
1266 assert_eq!(serialized, expected);
1267
1268 let deserialized: PropertyValue = serde_json::from_str(expected).unwrap();
1269 assert_eq!(deserialized, double_value);
1270 }
1271
1272 #[test]
1273 fn test_property_value_string_list() {
1274 let string_list = PropertyValue {
1275 value: Some(PropertyValueEnum::StringList(StringList {
1276 values: vec!["one".to_string(), "two".to_string(), "three".to_string()],
1277 })),
1278 };
1279 let serialized = serde_json::to_string(&string_list).unwrap();
1280 let expected = r#"{"type":"string_list","value":{"values":["one","two","three"]}}"#;
1281 assert_eq!(serialized, expected);
1282
1283 let deserialized: PropertyValue = serde_json::from_str(expected).unwrap();
1284 assert_eq!(deserialized, string_list);
1285 }
1286
1287 #[test]
1288 fn test_property_value_contained_refs() {
1289 let contained_refs = PropertyValue {
1290 value: Some(PropertyValueEnum::ContainedRefs(ContainedRefs {
1291 local_name: "test_refs".to_string(),
1292 refs: vec![1, 2, 3, 4],
1293 })),
1294 };
1295 let serialized = serde_json::to_string(&contained_refs).unwrap();
1296 let expected = r#"{"type":"contained_refs","value":{"local_name":"test_refs","refs":[1,2,3,4]}}"#;
1297 assert_eq!(serialized, expected);
1298
1299 let deserialized: PropertyValue = serde_json::from_str(expected).unwrap();
1300 assert_eq!(deserialized, contained_refs);
1301 }
1302
1303 #[test]
1304 fn test_node_serialization() {
1305 let node = Node {
1306 key: 123,
1307 r#type: NodeType::Method,
1308 property: vec![
1309 NodeProperty {
1310 name: NodePropertyName::Name,
1311 value: Some(PropertyValue {
1312 value: Some(PropertyValueEnum::StringValue("testMethod".to_string())),
1313 }),
1314 },
1315 NodeProperty {
1316 name: NodePropertyName::FullName,
1317 value: Some(PropertyValue {
1318 value: Some(PropertyValueEnum::StringValue("com.example.TestClass.testMethod".to_string())),
1319 }),
1320 },
1321 ],
1322 };
1323
1324 let serialized = serde_json::to_string(&node).unwrap();
1325 let deserialized: Node = serde_json::from_str(&serialized).unwrap();
1326 assert_eq!(deserialized, node);
1327 }
1328
1329 #[test]
1330 fn test_edge_serialization() {
1331 let edge = Edge {
1332 src: 123,
1333 dst: 456,
1334 r#type: EdgeType::Ast,
1335 property: vec![
1336 EdgeProperty {
1337 name: EdgePropertyName::Variable,
1338 value: Some(PropertyValue {
1339 value: Some(PropertyValueEnum::StringValue("testVar".to_string())),
1340 }),
1341 },
1342 ],
1343 };
1344
1345 let serialized = serde_json::to_string(&edge).unwrap();
1346 let deserialized: Edge = serde_json::from_str(&serialized).unwrap();
1347 assert_eq!(deserialized, edge);
1348 }
1349
1350 #[test]
1351 fn test_cpg_struct_serialization() {
1352 let cpg = Cpg {
1353 node: vec![
1354 Node {
1355 key: 1,
1356 r#type: NodeType::Method,
1357 property: vec![
1358 NodeProperty {
1359 name: NodePropertyName::Name,
1360 value: Some(PropertyValue {
1361 value: Some(PropertyValueEnum::StringValue("main".to_string())),
1362 }),
1363 },
1364 ],
1365 },
1366 Node {
1367 key: 2,
1368 r#type: NodeType::MethodReturn,
1369 property: vec![],
1370 },
1371 ],
1372 edge: vec![
1373 Edge {
1374 src: 1,
1375 dst: 2,
1376 r#type: EdgeType::Ast,
1377 property: vec![],
1378 },
1379 ],
1380 };
1381
1382 let serialized = serde_json::to_string(&cpg).unwrap();
1383 let deserialized: Cpg = serde_json::from_str(&serialized).unwrap();
1384 assert_eq!(deserialized, cpg);
1385 }
1386
1387 #[test]
1388 fn test_diff_graph_serialization() {
1389 let diff_graph = DiffGraph {
1390 entries: vec![
1391 DiffGraphEntry::Node(Node {
1392 key: 1,
1393 r#type: NodeType::Method,
1394 property: vec![],
1395 }),
1396 DiffGraphEntry::Edge(Edge {
1397 src: 1,
1398 dst: 2,
1399 r#type: EdgeType::Ast,
1400 property: vec![],
1401 }),
1402 DiffGraphEntry::RemoveNode(RemoveNode {
1403 key: 3,
1404 }),
1405 ],
1406 };
1407
1408 let serialized = serde_json::to_string(&diff_graph).unwrap();
1409 let deserialized: DiffGraph = serde_json::from_str(&serialized).unwrap();
1410 assert_eq!(deserialized, diff_graph);
1411 }
1412
1413 #[test]
1414 fn test_node_type_serialization() {
1415 let node_type = NodeType::Method;
1416 let serialized = serde_json::to_string(&node_type).unwrap();
1417 assert_eq!(serialized, r#""METHOD""#);
1418
1419 let deserialized: NodeType = serde_json::from_str(&serialized).unwrap();
1420 assert_eq!(deserialized, NodeType::Method);
1421 }
1422
1423 #[test]
1424 fn test_edge_type_serialization() {
1425 let edge_type = EdgeType::Call;
1426 let serialized = serde_json::to_string(&edge_type).unwrap();
1427 assert_eq!(serialized, r#""CALL""#);
1428
1429 let deserialized: EdgeType = serde_json::from_str(&serialized).unwrap();
1430 assert_eq!(deserialized, EdgeType::Call);
1431 }
1432
1433 #[test]
1434 fn test_property_value_none() {
1435 let none_value = PropertyValue {
1436 value: None,
1437 };
1438 let serialized = serde_json::to_string(&none_value).unwrap();
1439 let expected = r#"{}"#;
1440 assert_eq!(serialized, expected);
1441
1442 let deserialized: PropertyValue = serde_json::from_str(expected).unwrap();
1443 assert_eq!(deserialized, none_value);
1444 }
1445}