Skip to main content

cha_core/
model.rs

1/// Where a referenced type is defined, from the perspective of the file that
2/// uses it. Used by abstraction-boundary analyses to distinguish "own domain"
3/// types from "pulled in from a library" types.
4#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
5#[serde(rename_all = "snake_case", tag = "kind", content = "module")]
6pub enum TypeOrigin {
7    /// Declared inside the project (resolved via project-wide type registry
8    /// or an import pointing at a project-local path).
9    Local,
10    /// Imported from an external module / crate / package. Carries the module
11    /// name if known (Rust crate path root, Go module path, npm package name,
12    /// C header filename without extension). May be empty if only structure
13    /// says "external" (e.g. `#include <...>` without the header name).
14    External(String),
15    /// Built-in primitive / standard library scalar (int, bool, &str, char…).
16    Primitive,
17    /// Could not be resolved. Detection treats this as potentially external
18    /// but with lower confidence.
19    #[default]
20    Unknown,
21}
22
23/// A single arm value of a `switch`/`match` construct. Recorded so
24/// signature-based analyses can notice "switch on string constants" or
25/// "switch on integer magic numbers" dispatch patterns. Non-literal
26/// patterns (Rust enum variants, Python capture patterns, `default`)
27/// collapse to `Other`.
28#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
29pub enum ArmValue {
30    Str(String),
31    Int(i64),
32    Char(char),
33    /// Non-literal pattern — enum variants, destructuring, guards,
34    /// `default`, `_`. Kept counted so callers can tell "dispatch with
35    /// N arms total" from "dispatch with N literal arms".
36    Other,
37}
38
39/// A function parameter's (or return value's) type, with resolved origin.
40/// Produced by parsers after combining AST type text with the file's imports.
41#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)]
42pub struct TypeRef {
43    /// Innermost identifier after stripping references, generics, containers.
44    /// e.g. `&mut Vec<tree_sitter::Node>` → `"Node"`.
45    pub name: String,
46    /// Original source text as written, for messages and debugging.
47    /// e.g. `"&mut Vec<tree_sitter::Node>"`.
48    pub raw: String,
49    /// Where the type is declared.
50    pub origin: TypeOrigin,
51}
52
53/// Extracted function info from AST.
54#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
55pub struct FunctionInfo {
56    pub name: String,
57    pub start_line: usize,
58    pub end_line: usize,
59    /// 0-based column of the function name identifier.
60    pub name_col: usize,
61    /// 0-based end column of the function name identifier.
62    pub name_end_col: usize,
63    pub line_count: usize,
64    /// Cyclomatic complexity (1 + number of branch points).
65    pub complexity: usize,
66    /// Hash of the function body AST structure for duplicate detection.
67    pub body_hash: Option<u64>,
68    /// Whether this function is exported (pub/export).
69    pub is_exported: bool,
70    /// Number of parameters.
71    pub parameter_count: usize,
72    /// Names of external identifiers referenced in the body (for Feature Envy).
73    pub external_refs: Vec<String>,
74    /// Max method chain depth in the body (for Message Chains).
75    pub chain_depth: usize,
76    /// Number of switch/match arms (for Switch Statements).
77    pub switch_arms: usize,
78    /// Arm values of each `switch`/`match` arm in the function body,
79    /// in source order, across *all* dispatch constructs. Enables
80    /// `stringly_typed_dispatch` to notice "≥ 3 arms are string
81    /// literals" without re-walking the AST. Empty for functions
82    /// with no switch/match.
83    pub switch_arm_values: Vec<ArmValue>,
84    /// Whether this function only delegates to another object's method (for Middle Man).
85    pub is_delegating: bool,
86    /// Parameter types **in declaration order**, each resolved to a TypeRef.
87    /// Preserves position (first param = index 0) so positional analyses work.
88    pub parameter_types: Vec<TypeRef>,
89    /// Parameter identifier names, parallel to `parameter_types`. Empty
90    /// string for anonymous parameters (C `void foo(int);`). Drives
91    /// name-semantic analyses like `primitive_representation`.
92    pub parameter_names: Vec<String>,
93    /// Number of comment lines in the function body.
94    pub comment_lines: usize,
95    /// Field names referenced in this function body (for Temporary Field).
96    pub referenced_fields: Vec<String>,
97    /// Field names checked for null/None in this function (for Null Object pattern).
98    pub null_check_fields: Vec<String>,
99    /// The field/variable name being dispatched on in switch/match (for Strategy/State).
100    pub switch_dispatch_target: Option<String>,
101    /// Number of optional parameters (for Builder pattern).
102    pub optional_param_count: usize,
103    /// Names of functions/methods called in this function body (for call graph).
104    pub called_functions: Vec<String>,
105    /// Cognitive complexity score [SonarSource 2017] — nesting-aware understandability metric.
106    pub cognitive_complexity: usize,
107    /// Declared return type (None if not annotated or inferred), resolved the
108    /// same way as parameter types. Drives return_type_leak detection.
109    pub return_type: Option<TypeRef>,
110}
111
112/// Extracted class/struct info from AST.
113#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
114pub struct ClassInfo {
115    pub name: String,
116    pub start_line: usize,
117    pub end_line: usize,
118    /// 0-based column of the class/struct name identifier.
119    pub name_col: usize,
120    /// 0-based end column of the class/struct name identifier.
121    pub name_end_col: usize,
122    pub method_count: usize,
123    pub line_count: usize,
124    /// Whether this class is exported.
125    pub is_exported: bool,
126    /// Number of methods that only delegate to another object.
127    pub delegating_method_count: usize,
128    /// Number of fields/properties.
129    pub field_count: usize,
130    /// Field names declared in this class.
131    pub field_names: Vec<String>,
132    /// Field types (parallel to field_names).
133    pub field_types: Vec<String>,
134    /// Whether the class has non-accessor methods (business logic).
135    pub has_behavior: bool,
136    /// Whether this is an interface or abstract class.
137    pub is_interface: bool,
138    /// Parent class/trait name (for Refused Bequest).
139    pub parent_name: Option<String>,
140    /// Number of overridden methods (for Refused Bequest).
141    pub override_count: usize,
142    /// Number of self-method calls in the longest method (for Template Method).
143    pub self_call_count: usize,
144    /// Whether the class has a listener/callback collection field.
145    pub has_listener_field: bool,
146    /// Whether the class has a notify/emit method.
147    pub has_notify_method: bool,
148}
149
150/// Extracted import info.
151#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
152pub struct ImportInfo {
153    pub source: String,
154    pub line: usize,
155    /// 0-based column of the import statement.
156    pub col: usize,
157    /// True for module declarations (e.g. Rust `mod foo;`).
158    pub is_module_decl: bool,
159}
160
161/// A comment extracted from source code by the language parser.
162#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
163pub struct CommentInfo {
164    pub text: String,
165    pub line: usize,
166}
167
168/// Unified source model produced by parsing.
169#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)]
170pub struct SourceModel {
171    pub language: String,
172    pub total_lines: usize,
173    pub functions: Vec<FunctionInfo>,
174    pub classes: Vec<ClassInfo>,
175    pub imports: Vec<ImportInfo>,
176    pub comments: Vec<CommentInfo>,
177    /// Type aliases: (alias, original). e.g. typedef, using, type =
178    pub type_aliases: Vec<(String, String)>,
179}
180
181/// Compact structural summary of a file — symbol-level view without the
182/// per-function-body detail that analyze plugins need. Serves `cha deps`,
183/// future LSP workspace-symbols, and anywhere a reader needs "what
184/// classes/functions live here and how are they related" without caring
185/// about complexity metrics or TypeRef origin resolution.
186///
187/// One-way derivable from `SourceModel`; cached separately so light
188/// consumers don't pay `SourceModel`'s deserialise cost.
189#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)]
190pub struct SymbolIndex {
191    pub language: String,
192    pub total_lines: usize,
193    pub imports: Vec<ImportInfo>,
194    pub classes: Vec<ClassSymbol>,
195    pub functions: Vec<FunctionSymbol>,
196    /// `(alias, original)`. Mirrors `SourceModel.type_aliases`.
197    pub type_aliases: Vec<(String, String)>,
198}
199
200/// Symbol-level view of a class — everything deps/LSP/hotspot need to
201/// reason about a class without parsing method bodies. Fields intentionally
202/// track the subset of `ClassInfo` that survives cross-file consumption.
203#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)]
204pub struct ClassSymbol {
205    pub name: String,
206    pub parent_name: Option<String>,
207    pub is_interface: bool,
208    pub is_exported: bool,
209    pub method_count: usize,
210    pub has_behavior: bool,
211    pub field_names: Vec<String>,
212    pub field_types: Vec<String>,
213    pub start_line: usize,
214    pub end_line: usize,
215    pub name_col: usize,
216    pub name_end_col: usize,
217}
218
219/// Symbol-level view of a function — name + signature + call-graph input.
220/// Omits body_hash, complexity, cognitive, external_refs, chain_depth,
221/// parameter_types (TypeRef), return_type — those live in `FunctionInfo`
222/// for analyze plugins.
223#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)]
224pub struct FunctionSymbol {
225    pub name: String,
226    pub is_exported: bool,
227    pub parameter_count: usize,
228    pub called_functions: Vec<String>,
229    pub start_line: usize,
230    pub end_line: usize,
231    pub name_col: usize,
232    pub name_end_col: usize,
233    /// Bare type names (no module prefix, no origin info) for each
234    /// parameter in declaration order. Sufficient for signature-based
235    /// clustering (C OOP attribution, call-graph refinement) without
236    /// pulling in TypeRef's origin resolution, which is analyze-only.
237    pub parameter_type_names: Vec<String>,
238    /// Parameter identifier names, parallel to `parameter_type_names`.
239    /// Empty string for anonymous params (C `void foo(int);`). Enables
240    /// name-semantic views (e.g. LSP hover "email: String") without
241    /// loading the full `SourceModel`.
242    pub parameter_names: Vec<String>,
243    /// Bare return type name (same conventions as parameter_type_names);
244    /// `None` if the function has no declared return type.
245    pub return_type_name: Option<String>,
246    /// Mirror of `FunctionInfo.switch_arm_values`. Enables LSP / summary
247    /// tools to recognise stringly-typed dispatchers without loading
248    /// the full source model.
249    pub switch_arm_values: Vec<ArmValue>,
250}
251
252impl SymbolIndex {
253    /// Project a `SourceModel` onto the symbol-level view. Cheap —
254    /// clones strings but no heavy structures.
255    pub fn from_source_model(m: &SourceModel) -> Self {
256        Self {
257            language: m.language.clone(),
258            total_lines: m.total_lines,
259            imports: m.imports.clone(),
260            classes: m.classes.iter().map(ClassSymbol::from_class_info).collect(),
261            functions: m
262                .functions
263                .iter()
264                .map(FunctionSymbol::from_function_info)
265                .collect(),
266            type_aliases: m.type_aliases.clone(),
267        }
268    }
269}
270
271impl ClassSymbol {
272    pub fn from_class_info(c: &ClassInfo) -> Self {
273        Self {
274            name: c.name.clone(),
275            parent_name: c.parent_name.clone(),
276            is_interface: c.is_interface,
277            is_exported: c.is_exported,
278            method_count: c.method_count,
279            has_behavior: c.has_behavior,
280            field_names: c.field_names.clone(),
281            field_types: c.field_types.clone(),
282            start_line: c.start_line,
283            end_line: c.end_line,
284            name_col: c.name_col,
285            name_end_col: c.name_end_col,
286        }
287    }
288}
289
290impl FunctionSymbol {
291    pub fn from_function_info(f: &FunctionInfo) -> Self {
292        Self {
293            name: f.name.clone(),
294            is_exported: f.is_exported,
295            parameter_count: f.parameter_count,
296            called_functions: f.called_functions.clone(),
297            start_line: f.start_line,
298            end_line: f.end_line,
299            name_col: f.name_col,
300            name_end_col: f.name_end_col,
301            parameter_type_names: f.parameter_types.iter().map(|t| t.raw.clone()).collect(),
302            parameter_names: f.parameter_names.clone(),
303            return_type_name: f.return_type.as_ref().map(|t| t.raw.clone()),
304            switch_arm_values: f.switch_arm_values.clone(),
305        }
306    }
307}