cha_core/model.rs
1/// Where a referenced type is defined, from the perspective of the file that
2/// uses it. Used by abstraction-boundary analyses to distinguish "own domain"
3/// types from "pulled in from a library" types.
4#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
5#[serde(rename_all = "snake_case", tag = "kind", content = "module")]
6pub enum TypeOrigin {
7 /// Declared inside the project (resolved via project-wide type registry
8 /// or an import pointing at a project-local path).
9 Local,
10 /// Imported from an external module / crate / package. Carries the module
11 /// name if known (Rust crate path root, Go module path, npm package name,
12 /// C header filename without extension). May be empty if only structure
13 /// says "external" (e.g. `#include <...>` without the header name).
14 External(String),
15 /// Built-in primitive / standard library scalar (int, bool, &str, char…).
16 Primitive,
17 /// Could not be resolved. Detection treats this as potentially external
18 /// but with lower confidence.
19 #[default]
20 Unknown,
21}
22
23/// A single arm value of a `switch`/`match` construct. Recorded so
24/// signature-based analyses can notice "switch on string constants" or
25/// "switch on integer magic numbers" dispatch patterns. Non-literal
26/// patterns (Rust enum variants, Python capture patterns, `default`)
27/// collapse to `Other`.
28#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
29pub enum ArmValue {
30 Str(String),
31 Int(i64),
32 Char(char),
33 /// Non-literal pattern — enum variants, destructuring, guards,
34 /// `default`, `_`. Kept counted so callers can tell "dispatch with
35 /// N arms total" from "dispatch with N literal arms".
36 Other,
37}
38
39/// A function parameter's (or return value's) type, with resolved origin.
40/// Produced by parsers after combining AST type text with the file's imports.
41#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
42pub struct TypeRef {
43 /// Innermost identifier after stripping references, generics, containers.
44 /// e.g. `&mut Vec<tree_sitter::Node>` → `"Node"`.
45 pub name: String,
46 /// Original source text as written, for messages and debugging.
47 /// e.g. `"&mut Vec<tree_sitter::Node>"`.
48 pub raw: String,
49 /// Where the type is declared.
50 pub origin: TypeOrigin,
51}
52
53/// Extracted function info from AST.
54#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
55pub struct FunctionInfo {
56 pub name: String,
57 pub start_line: usize,
58 pub end_line: usize,
59 /// 0-based column of the function name identifier.
60 pub name_col: usize,
61 /// 0-based end column of the function name identifier.
62 pub name_end_col: usize,
63 pub line_count: usize,
64 /// Cyclomatic complexity (1 + number of branch points).
65 pub complexity: usize,
66 /// Hash of the function body AST structure for duplicate detection.
67 pub body_hash: Option<u64>,
68 /// Whether this function is exported (pub/export).
69 pub is_exported: bool,
70 /// Number of parameters.
71 pub parameter_count: usize,
72 /// Names of external identifiers referenced in the body (for Feature Envy).
73 pub external_refs: Vec<String>,
74 /// Max method chain depth in the body (for Message Chains).
75 pub chain_depth: usize,
76 /// Number of switch/match arms (for Switch Statements).
77 pub switch_arms: usize,
78 /// Arm values of each `switch`/`match` arm in the function body,
79 /// in source order, across *all* dispatch constructs. Enables
80 /// `stringly_typed_dispatch` to notice "≥ 3 arms are string
81 /// literals" without re-walking the AST. Empty for functions
82 /// with no switch/match.
83 pub switch_arm_values: Vec<ArmValue>,
84 /// Whether this function only delegates to another object's method (for Middle Man).
85 pub is_delegating: bool,
86 /// Parameter types **in declaration order**, each resolved to a TypeRef.
87 /// Preserves position (first param = index 0) so positional analyses work.
88 pub parameter_types: Vec<TypeRef>,
89 /// Parameter identifier names, parallel to `parameter_types`. Empty
90 /// string for anonymous parameters (C `void foo(int);`). Drives
91 /// name-semantic analyses like `primitive_representation`.
92 pub parameter_names: Vec<String>,
93 /// Number of comment lines in the function body.
94 pub comment_lines: usize,
95 /// Field names referenced in this function body (for Temporary Field).
96 pub referenced_fields: Vec<String>,
97 /// Field names checked for null/None in this function (for Null Object pattern).
98 pub null_check_fields: Vec<String>,
99 /// The field/variable name being dispatched on in switch/match (for Strategy/State).
100 pub switch_dispatch_target: Option<String>,
101 /// Number of optional parameters (for Builder pattern).
102 pub optional_param_count: usize,
103 /// Names of functions/methods called in this function body (for call graph).
104 pub called_functions: Vec<String>,
105 /// Cognitive complexity score [SonarSource 2017] — nesting-aware understandability metric.
106 pub cognitive_complexity: usize,
107 /// Declared return type (None if not annotated or inferred), resolved the
108 /// same way as parameter types. Drives return_type_leak detection.
109 pub return_type: Option<TypeRef>,
110}
111
112/// Extracted class/struct info from AST.
113#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
114pub struct ClassInfo {
115 pub name: String,
116 pub start_line: usize,
117 pub end_line: usize,
118 /// 0-based column of the class/struct name identifier.
119 pub name_col: usize,
120 /// 0-based end column of the class/struct name identifier.
121 pub name_end_col: usize,
122 pub method_count: usize,
123 pub line_count: usize,
124 /// Whether this class is exported.
125 pub is_exported: bool,
126 /// Number of methods that only delegate to another object.
127 pub delegating_method_count: usize,
128 /// Number of fields/properties.
129 pub field_count: usize,
130 /// Field names declared in this class.
131 pub field_names: Vec<String>,
132 /// Field types (parallel to field_names).
133 pub field_types: Vec<String>,
134 /// Whether the class has non-accessor methods (business logic).
135 pub has_behavior: bool,
136 /// Whether this is an interface or abstract class.
137 pub is_interface: bool,
138 /// Parent class/trait name (for Refused Bequest).
139 pub parent_name: Option<String>,
140 /// Number of overridden methods (for Refused Bequest).
141 pub override_count: usize,
142 /// Number of self-method calls in the longest method (for Template Method).
143 pub self_call_count: usize,
144 /// Whether the class has a listener/callback collection field.
145 pub has_listener_field: bool,
146 /// Whether the class has a notify/emit method.
147 pub has_notify_method: bool,
148}
149
150/// Extracted import info.
151#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
152pub struct ImportInfo {
153 pub source: String,
154 pub line: usize,
155 /// 0-based column of the import statement.
156 pub col: usize,
157 /// True for module declarations (e.g. Rust `mod foo;`).
158 pub is_module_decl: bool,
159}
160
161/// A comment extracted from source code by the language parser.
162#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
163pub struct CommentInfo {
164 pub text: String,
165 pub line: usize,
166}
167
168/// Unified source model produced by parsing.
169#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
170pub struct SourceModel {
171 pub language: String,
172 pub total_lines: usize,
173 pub functions: Vec<FunctionInfo>,
174 pub classes: Vec<ClassInfo>,
175 pub imports: Vec<ImportInfo>,
176 pub comments: Vec<CommentInfo>,
177 /// Type aliases: (alias, original). e.g. typedef, using, type =
178 pub type_aliases: Vec<(String, String)>,
179}
180
181/// Compact structural summary of a file — symbol-level view without the
182/// per-function-body detail that analyze plugins need. Serves `cha deps`,
183/// future LSP workspace-symbols, and anywhere a reader needs "what
184/// classes/functions live here and how are they related" without caring
185/// about complexity metrics or TypeRef origin resolution.
186///
187/// One-way derivable from `SourceModel`; cached separately so light
188/// consumers don't pay `SourceModel`'s deserialise cost.
189#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)]
190pub struct SymbolIndex {
191 pub language: String,
192 pub total_lines: usize,
193 pub imports: Vec<ImportInfo>,
194 pub classes: Vec<ClassSymbol>,
195 pub functions: Vec<FunctionSymbol>,
196 /// `(alias, original)`. Mirrors `SourceModel.type_aliases`.
197 pub type_aliases: Vec<(String, String)>,
198}
199
200/// Symbol-level view of a class — everything deps/LSP/hotspot need to
201/// reason about a class without parsing method bodies. Fields intentionally
202/// track the subset of `ClassInfo` that survives cross-file consumption.
203#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)]
204pub struct ClassSymbol {
205 pub name: String,
206 pub parent_name: Option<String>,
207 pub is_interface: bool,
208 pub is_exported: bool,
209 pub method_count: usize,
210 pub has_behavior: bool,
211 pub field_names: Vec<String>,
212 pub field_types: Vec<String>,
213 pub start_line: usize,
214 pub end_line: usize,
215 pub name_col: usize,
216 pub name_end_col: usize,
217}
218
219/// Symbol-level view of a function — name + signature + call-graph input.
220/// Omits body_hash, complexity, cognitive, external_refs, chain_depth,
221/// parameter_types (TypeRef), return_type — those live in `FunctionInfo`
222/// for analyze plugins.
223#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)]
224pub struct FunctionSymbol {
225 pub name: String,
226 pub is_exported: bool,
227 pub parameter_count: usize,
228 pub called_functions: Vec<String>,
229 pub start_line: usize,
230 pub end_line: usize,
231 pub name_col: usize,
232 pub name_end_col: usize,
233 /// Bare type names (no module prefix, no origin info) for each
234 /// parameter in declaration order. Sufficient for signature-based
235 /// clustering (C OOP attribution, call-graph refinement) without
236 /// pulling in TypeRef's origin resolution, which is analyze-only.
237 pub parameter_type_names: Vec<String>,
238 /// Parameter identifier names, parallel to `parameter_type_names`.
239 /// Empty string for anonymous params (C `void foo(int);`). Enables
240 /// name-semantic views (e.g. LSP hover "email: String") without
241 /// loading the full `SourceModel`.
242 pub parameter_names: Vec<String>,
243 /// Bare return type name (same conventions as parameter_type_names);
244 /// `None` if the function has no declared return type.
245 pub return_type_name: Option<String>,
246 /// Mirror of `FunctionInfo.switch_arm_values`. Enables LSP / summary
247 /// tools to recognise stringly-typed dispatchers without loading
248 /// the full source model.
249 pub switch_arm_values: Vec<ArmValue>,
250}
251
252impl SymbolIndex {
253 /// Project a `SourceModel` onto the symbol-level view. Cheap —
254 /// clones strings but no heavy structures.
255 pub fn from_source_model(m: &SourceModel) -> Self {
256 Self {
257 language: m.language.clone(),
258 total_lines: m.total_lines,
259 imports: m.imports.clone(),
260 classes: m.classes.iter().map(ClassSymbol::from_class_info).collect(),
261 functions: m
262 .functions
263 .iter()
264 .map(FunctionSymbol::from_function_info)
265 .collect(),
266 type_aliases: m.type_aliases.clone(),
267 }
268 }
269}
270
271impl ClassSymbol {
272 pub fn from_class_info(c: &ClassInfo) -> Self {
273 Self {
274 name: c.name.clone(),
275 parent_name: c.parent_name.clone(),
276 is_interface: c.is_interface,
277 is_exported: c.is_exported,
278 method_count: c.method_count,
279 has_behavior: c.has_behavior,
280 field_names: c.field_names.clone(),
281 field_types: c.field_types.clone(),
282 start_line: c.start_line,
283 end_line: c.end_line,
284 name_col: c.name_col,
285 name_end_col: c.name_end_col,
286 }
287 }
288}
289
290impl FunctionSymbol {
291 pub fn from_function_info(f: &FunctionInfo) -> Self {
292 Self {
293 name: f.name.clone(),
294 is_exported: f.is_exported,
295 parameter_count: f.parameter_count,
296 called_functions: f.called_functions.clone(),
297 start_line: f.start_line,
298 end_line: f.end_line,
299 name_col: f.name_col,
300 name_end_col: f.name_end_col,
301 parameter_type_names: f.parameter_types.iter().map(|t| t.raw.clone()).collect(),
302 parameter_names: f.parameter_names.clone(),
303 return_type_name: f.return_type.as_ref().map(|t| t.raw.clone()),
304 switch_arm_values: f.switch_arm_values.clone(),
305 }
306 }
307}