context-footprint 0.1.0

A static analysis tool for measuring architectural context exposure in codebases.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
use crate::domain::edge::EdgeKind;
use crate::domain::graph::ContextGraph;
use crate::domain::node::{
    FunctionNode, Mutability, Node, NodeCore, SourceSpan, TypeKind, TypeNode, VariableKind,
    VariableNode, Visibility,
};
use crate::domain::policy::{DocumentationScorer, NodeInfo, NodeType, SizeFunction};
use crate::domain::ports::SourceReader;
use crate::domain::semantic::{ReferenceRole, SemanticData, SymbolKind, SymbolMetadata};
use anyhow::Result;
use std::collections::HashMap;
use std::path::Path;

/// Graph builder - Domain Service for constructing ContextGraph
pub struct GraphBuilder {
    size_function: Box<dyn SizeFunction>,
    doc_scorer: Box<dyn DocumentationScorer>,
}

impl GraphBuilder {
    pub fn new(
        size_function: Box<dyn SizeFunction>,
        doc_scorer: Box<dyn DocumentationScorer>,
    ) -> Self {
        Self {
            size_function,
            doc_scorer,
        }
    }

    /// Three-pass build strategy
    pub fn build(
        &self,
        semantic_data: SemanticData,
        source_reader: &dyn SourceReader,
    ) -> Result<ContextGraph> {
        let mut graph = ContextGraph::new();

        // 1. Pre-collect kinds and parentage for all definitions
        let mut symbol_to_kind: HashMap<String, SymbolKind> = HashMap::new();
        let mut symbol_to_parent: HashMap<String, String> = HashMap::new();

        for document in &semantic_data.documents {
            for definition in &document.definitions {
                symbol_to_kind.insert(definition.symbol.clone(), definition.metadata.kind.clone());
                if let Some(parent) = &definition.metadata.enclosing_symbol {
                    symbol_to_parent.insert(definition.symbol.clone(), parent.clone());
                }
            }
        }

        // Pass 1: Node Allocation
        for document in &semantic_data.documents {
            let source_path = Path::new(&semantic_data.project_root).join(&document.relative_path);
            let source_code = source_reader.read(&source_path)?;

            for definition in &document.definitions {
                let kind = &definition.metadata.kind;

                // Determine if this symbol should be an independent node
                let should_be_node = match kind {
                    // Always nodes
                    SymbolKind::Function
                    | SymbolKind::Method
                    | SymbolKind::Constructor
                    | SymbolKind::StaticMethod
                    | SymbolKind::AbstractMethod
                    | SymbolKind::Class
                    | SymbolKind::Interface
                    | SymbolKind::Struct
                    | SymbolKind::Enum
                    | SymbolKind::TypeAlias
                    | SymbolKind::Trait
                    | SymbolKind::Protocol => true,

                    // Variable-like nodes: only if they are not parameters or local-like
                    SymbolKind::Variable | SymbolKind::Field | SymbolKind::Constant => definition
                        .metadata
                        .enclosing_symbol
                        .as_ref()
                        .and_then(|parent_sym| symbol_to_kind.get(parent_sym))
                        .is_none_or(|parent_kind| {
                            !matches!(
                                parent_kind,
                                SymbolKind::Function
                                    | SymbolKind::Method
                                    | SymbolKind::Constructor
                                    | SymbolKind::StaticMethod
                                    | SymbolKind::AbstractMethod
                            )
                        }),
                    _ => false, // Parameters, Modules, etc. are not independent nodes
                };

                if !should_be_node {
                    continue;
                }

                let node_id = graph.graph.node_count() as u32;

                // Extract documentation strings
                let doc_texts: Vec<String> = definition.metadata.documentation.clone();

                // Compute context_size
                let span = SourceSpan {
                    start_line: definition.enclosing_range.start_line,
                    start_column: definition.enclosing_range.start_column,
                    end_line: definition.enclosing_range.end_line,
                    end_column: definition.enclosing_range.end_column,
                };
                let context_size = self.size_function.compute(&source_code, &span, &doc_texts);

                // Compute doc_score
                let doc_text = doc_texts.first().map(|s| s.as_str());
                let language = document
                    .relative_path
                    .split('.')
                    .next_back()
                    .map(|ext| ext.to_lowercase());
                let node_info = NodeInfo {
                    node_type: infer_node_type_from_kind(kind),
                    name: definition.metadata.display_name.clone(),
                    signature: definition.metadata.signature.clone(),
                    language,
                };
                let doc_score = self.doc_scorer.score(&node_info, doc_text);

                // Create NodeCore
                let core = NodeCore::new(
                    node_id,
                    definition.metadata.display_name.clone(),
                    definition.metadata.enclosing_symbol.clone(),
                    context_size,
                    span,
                    doc_score,
                    definition.metadata.is_external,
                    document.relative_path.clone(),
                );

                // Create specific node type
                let node = create_node_from_definition(core, &definition.metadata)?;
                graph.add_node(definition.symbol.clone(), node);
            }
        }

        // Helper to resolve a symbol to the nearest ancestor that IS a node
        let resolve_to_node_symbol = |mut sym: String,
                                      graph: &ContextGraph,
                                      symbol_to_parent: &HashMap<String, String>|
         -> Option<String> {
            while !graph.symbol_to_node.contains_key(&sym) {
                if let Some(parent) = symbol_to_parent.get(&sym) {
                    sym = parent.clone();
                } else {
                    return None;
                }
            }
            Some(sym)
        };

        // Pass 2: Edge Wiring
        let mut state_writers: HashMap<String, Vec<petgraph::graph::NodeIndex>> = HashMap::new();
        let mut callers: HashMap<String, Vec<petgraph::graph::NodeIndex>> = HashMap::new();
        let mut readers: Vec<(petgraph::graph::NodeIndex, String)> = Vec::new();

        for document in &semantic_data.documents {
            for reference in &document.references {
                let resolved_source_sym = resolve_to_node_symbol(
                    reference.enclosing_symbol.clone(),
                    &graph,
                    &symbol_to_parent,
                );
                let resolved_target_sym =
                    resolve_to_node_symbol(reference.symbol.clone(), &graph, &symbol_to_parent);

                if let (Some(source_sym), Some(target_sym)) =
                    (resolved_source_sym, resolved_target_sym)
                {
                    let source_idx = *graph.symbol_to_node.get(&source_sym).unwrap();
                    let target_idx = *graph.symbol_to_node.get(&target_sym).unwrap();

                    if source_idx == target_idx {
                        continue;
                    }

                    let edge_kind = infer_edge_kind(&reference.role, source_idx, target_idx);

                    if matches!(edge_kind, EdgeKind::Write) {
                        state_writers
                            .entry(target_sym.clone())
                            .or_default()
                            .push(source_idx);
                    }
                    if matches!(edge_kind, EdgeKind::Read) {
                        readers.push((source_idx, target_sym.clone()));
                    }
                    if matches!(edge_kind, EdgeKind::Call) {
                        callers
                            .entry(target_sym.clone())
                            .or_default()
                            .push(source_idx);
                    }

                    graph.add_edge(source_idx, target_idx, edge_kind);
                }
            }
        }

        // Pass 2.5: Process relationships from definitions (e.g., return types, implements, inherits)
        for document in &semantic_data.documents {
            for definition in &document.definitions {
                if let Some(&source_idx) = graph.symbol_to_node.get(&definition.symbol) {
                    for relationship in &definition.metadata.relationships {
                        // Resolve target symbol to a node symbol
                        if let Some(target_idx) = resolve_to_node_symbol(
                            relationship.target_symbol.clone(),
                            &graph,
                            &symbol_to_parent,
                        )
                        .and_then(|resolved_target| {
                            graph.symbol_to_node.get(&resolved_target).copied()
                        }) {
                            if source_idx == target_idx {
                                continue;
                            }

                            // Convert relationship kind to edge kind based on source node type
                            let edge_kind = match relationship.kind {
                                crate::domain::semantic::RelationshipKind::TypeDefinition => {
                                    // TypeDefinition means "source uses target as a type"
                                    // The specific edge depends on what the source is
                                    match graph.node(source_idx) {
                                        crate::domain::node::Node::Function(_) => {
                                            EdgeKind::ReturnType
                                        }
                                        crate::domain::node::Node::Variable(_) => {
                                            EdgeKind::VariableType
                                        }
                                        crate::domain::node::Node::Type(_) => EdgeKind::FieldType,
                                    }
                                }
                                crate::domain::semantic::RelationshipKind::Implements => {
                                    EdgeKind::Implements
                                }
                                crate::domain::semantic::RelationshipKind::Inherits => {
                                    EdgeKind::Inherits
                                }
                                crate::domain::semantic::RelationshipKind::References => {
                                    // Generic references - skip, handled by occurrences
                                    continue;
                                }
                            };

                            graph.add_edge(source_idx, target_idx, edge_kind);
                        }
                    }
                }
            }
        }

        // Pass 3: Dynamic Expansion Edges
        // 1. SharedStateWrite edges: Reader -> Writer
        for (reader_idx, state_symbol) in readers {
            if let Some(writers) = state_writers.get(&state_symbol) {
                for &writer_idx in writers {
                    if reader_idx != writer_idx {
                        graph.add_edge(reader_idx, writer_idx, EdgeKind::SharedStateWrite);
                    }
                }
            }
        }

        // 2. CallIn edges: Callee -> Caller
        let symbols: Vec<String> = graph.symbol_to_node.keys().cloned().collect();
        for callee_symbol in symbols {
            if let (Some(callee_idx), Some(caller_indices)) = (
                graph.get_node_by_symbol(&callee_symbol),
                callers.get(&callee_symbol),
            ) {
                for &caller_idx in caller_indices {
                    if callee_idx != caller_idx {
                        graph.add_edge(callee_idx, caller_idx, EdgeKind::CallIn);
                    }
                }
            }
        }

        Ok(graph)
    }
}

fn infer_node_type_from_kind(kind: &SymbolKind) -> NodeType {
    match kind {
        SymbolKind::Function
        | SymbolKind::Method
        | SymbolKind::Constructor
        | SymbolKind::StaticMethod
        | SymbolKind::AbstractMethod => NodeType::Function,
        SymbolKind::Variable
        | SymbolKind::Field
        | SymbolKind::Constant
        | SymbolKind::Parameter
        | SymbolKind::Module // Module __init__ treated as variable-like
        | SymbolKind::Namespace // Namespace/package treated as variable-like
        | SymbolKind::Package
        | SymbolKind::Macro => NodeType::Variable, // Macro definitions are declaration-like
        SymbolKind::Class
        | SymbolKind::Interface
        | SymbolKind::Struct
        | SymbolKind::Enum
        | SymbolKind::TypeAlias
        | SymbolKind::Trait
        | SymbolKind::Protocol => NodeType::Type,
        _ => NodeType::Variable, // Default: treat unknown as variable (safer than function)
    }
}

fn create_node_from_definition(core: NodeCore, metadata: &SymbolMetadata) -> Result<Node> {
    match infer_node_type_from_kind(&metadata.kind) {
        NodeType::Function => {
            // Extract signature information (simplified - would need actual parsing)
            Ok(Node::Function(FunctionNode {
                core,
                param_count: 0,       // TODO: extract from signature
                typed_param_count: 0, // TODO: extract from signature
                has_return_type: metadata.signature.is_some(), // Simplified
                is_async: false,      // TODO: extract from signature
                is_generator: false,  // TODO: extract from signature
                visibility: Visibility::Public, // TODO: extract from metadata
            }))
        }
        NodeType::Variable => Ok(Node::Variable(VariableNode {
            core,
            has_type_annotation: metadata.signature.is_some(),
            mutability: Mutability::Mutable, // TODO: infer from context
            variable_kind: VariableKind::Global, // TODO: infer from context
        })),
        NodeType::Type => {
            // Check if it's abstract based on kind
            let mut is_abstract = matches!(
                metadata.kind,
                SymbolKind::Interface | SymbolKind::Trait | SymbolKind::Protocol
            );

            // Python Protocol detection: SCIP-python marks Protocols as Class but with Implements relationship to typing.Protocol
            if matches!(metadata.kind, SymbolKind::Class) {
                is_abstract = metadata.relationships.iter().any(|r| {
                    matches!(
                        r.kind,
                        crate::domain::semantic::RelationshipKind::Implements
                    ) && r.target_symbol.contains("typing/Protocol#")
                });
            }

            Ok(Node::Type(TypeNode {
                core,
                type_kind: match metadata.kind {
                    SymbolKind::Class if is_abstract => TypeKind::Protocol, // Python Protocol
                    SymbolKind::Class => TypeKind::Class,
                    SymbolKind::Interface => TypeKind::Interface,
                    SymbolKind::Struct => TypeKind::Struct,
                    SymbolKind::Enum => TypeKind::Enum,
                    SymbolKind::TypeAlias => TypeKind::TypeAlias,
                    SymbolKind::Trait => TypeKind::Protocol, // Trait is similar to Protocol
                    SymbolKind::Protocol => TypeKind::Protocol,
                    _ => TypeKind::Class, // Default
                },
                is_abstract,
                type_param_count: 0, // TODO: extract from signature
            }))
        }
    }
}

fn infer_edge_kind(
    role: &ReferenceRole,
    _source: petgraph::graph::NodeIndex,
    _target: petgraph::graph::NodeIndex,
) -> EdgeKind {
    match role {
        ReferenceRole::Read => EdgeKind::Read,
        ReferenceRole::Write => EdgeKind::Write,
        ReferenceRole::Call => EdgeKind::Call,
        ReferenceRole::TypeUsage => EdgeKind::ParamType, // Simplified
        ReferenceRole::Import => EdgeKind::Call,         // Simplified
        ReferenceRole::Unknown => EdgeKind::Call,        // Default
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::domain::semantic::SymbolMetadata;

    #[test]
    fn test_infer_node_type_from_kind() {
        assert_eq!(
            infer_node_type_from_kind(&SymbolKind::Function),
            NodeType::Function
        );
        assert_eq!(
            infer_node_type_from_kind(&SymbolKind::Class),
            NodeType::Type
        );
        assert_eq!(
            infer_node_type_from_kind(&SymbolKind::Variable),
            NodeType::Variable
        );
        assert_eq!(
            infer_node_type_from_kind(&SymbolKind::Unknown),
            NodeType::Variable
        );
    }

    #[test]
    fn test_create_node_from_definition_class_vs_protocol() {
        let core = NodeCore::new(
            0,
            "MyClass".into(),
            None,
            10,
            SourceSpan {
                start_line: 0,
                start_column: 0,
                end_line: 1,
                end_column: 0,
            },
            1.0,
            false,
            "file.py".into(),
        );

        let mut metadata = SymbolMetadata {
            symbol: "MyClass#".into(),
            kind: SymbolKind::Class,
            display_name: "MyClass".into(),
            documentation: vec![],
            signature: None,
            relationships: vec![],
            enclosing_symbol: None,
            is_external: false,
        };

        let node = create_node_from_definition(core.clone(), &metadata).unwrap();
        if let Node::Type(t) = node {
            assert_eq!(t.type_kind, TypeKind::Class);
        } else {
            panic!("Expected Type node");
        }

        // Add Protocol relationship
        metadata
            .relationships
            .push(crate::domain::semantic::Relationship {
                target_symbol: "typing/Protocol#".into(),
                kind: crate::domain::semantic::RelationshipKind::Implements,
            });

        let node = create_node_from_definition(core.clone(), &metadata).unwrap();
        if let Node::Type(t) = node {
            assert_eq!(t.type_kind, TypeKind::Protocol);
        } else {
            panic!("Expected Protocol node");
        }
    }
}