pedant-core 0.13.0

Analysis engine for pedant: IR extraction, style checks, and capability detection
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
use std::fmt;
use std::rc::Rc;
use std::sync::Arc;

use pedant_types::Capability;

/// Source position extracted from `syn` spans.
///
/// Line is 1-based. Column is 0-based from syn, adjusted to 1-based at report time.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct IrSpan {
    /// 1-based.
    pub line: usize,
    /// 0-based from syn; adjusted to 1-based at report time.
    pub column: usize,
}

/// Discriminant for data flow findings.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum DataFlowKind {
    /// Tainted data flows from a capability source to a capability sink.
    TaintFlow,
    /// Value assigned then overwritten before read.
    DeadStore,
    /// Function returning Result called without binding the return.
    DiscardedResult,
    /// Result handled on some paths, dropped on others.
    PartialErrorHandling,
    /// Same function called with identical arguments within a single scope.
    RepeatedCall,
    /// `.clone()` called but the original is never used afterward.
    UnnecessaryClone,
    /// `Vec::new()`, `String::new()`, or `format!()` inside a loop body.
    AllocationInLoop,
    /// `.collect()` followed immediately by `.iter()` or `.into_iter()`.
    RedundantCollect,
    /// Lock guard held across an `.await` point (potential deadlock or task starvation).
    LockAcrossAwait,
    /// Same locks acquired in different orders across functions (potential deadlock).
    InconsistentLockOrder,
    /// Vec or String binding never mutated after construction.
    ImmutableGrowable,
    /// `.ok()` called on Result where the resulting Option is discarded.
    SwallowedOk,
    /// Thread or task spawned with the JoinHandle dropped or unbound.
    UnobservedSpawn,
}

impl DataFlowKind {
    /// Kebab-case identifier for this data flow kind.
    pub fn code(self) -> &'static str {
        match self {
            Self::TaintFlow => "taint-flow",
            Self::DeadStore => "dead-store",
            Self::DiscardedResult => "discarded-result",
            Self::PartialErrorHandling => "partial-error-handling",
            Self::RepeatedCall => "repeated-call",
            Self::UnnecessaryClone => "unnecessary-clone",
            Self::AllocationInLoop => "allocation-in-loop",
            Self::RedundantCollect => "redundant-collect",
            Self::LockAcrossAwait => "lock-across-await",
            Self::InconsistentLockOrder => "inconsistent-lock-order",
            Self::ImmutableGrowable => "immutable-growable",
            Self::SwallowedOk => "swallowed-ok",
            Self::UnobservedSpawn => "unobserved-spawn",
        }
    }
}

impl fmt::Display for DataFlowKind {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(self.code())
    }
}

/// Data flow finding: taint edge, quality issue, or concurrency hazard.
#[derive(Debug, Clone)]
pub struct DataFlowFact {
    /// What kind of data flow issue this represents.
    pub kind: DataFlowKind,
    /// Where the tainted data originates (taint flows only).
    pub source_capability: Option<Capability>,
    /// Location of the source expression.
    pub source_span: IrSpan,
    /// Where the tainted data is consumed (taint flows only).
    pub sink_capability: Option<Capability>,
    /// Location of the sink expression.
    pub sink_span: IrSpan,
    /// Intermediate function names the data passes through.
    pub call_chain: Box<[Box<str>]>,
    /// Human-readable description of the finding.
    pub message: Box<str>,
}

/// All facts extracted from a single source file's AST in one pass.
#[derive(Debug)]
pub struct FileIr {
    /// Absolute path used for violation reporting.
    pub file_path: Arc<str>,
    /// Function and method definitions with body metadata.
    pub functions: Box<[FnFact]>,
    /// Struct, enum, and trait definitions with type-relationship edges.
    pub type_defs: Box<[TypeDefFact]>,
    /// Inherent and trait impl blocks.
    pub impl_blocks: Box<[ImplFact]>,
    /// Flattened `use` paths for capability detection.
    pub use_paths: Box<[UsePathFact]>,
    /// Nesting-tracked control flow constructs.
    pub control_flow: Box<[ControlFlowFact]>,
    /// Let bindings with ownership and type metadata.
    pub bindings: Box<[BindingFact]>,
    /// Type references classified by position (return, param, field, body).
    pub type_refs: Box<[TypeRefFact]>,
    /// Method calls with receiver tracking for clone-in-loop analysis.
    pub method_calls: Box<[MethodCallFact]>,
    /// Macro invocations for forbidden-macro checks.
    pub macro_invocations: Box<[MacroFact]>,
    /// Item attributes for forbidden-attribute checks.
    pub attributes: Box<[AttributeFact]>,
    /// String literals for credential/endpoint detection.
    pub string_literals: Box<[StringLitFact]>,
    /// Unsafe blocks, functions, and impls.
    pub unsafe_sites: Box<[UnsafeFact]>,
    /// Extern block declarations for FFI detection.
    pub extern_blocks: Box<[ExternBlockFact]>,
    /// Module declarations for inline-test detection.
    pub modules: Box<[ModuleFact]>,
    /// Populated only by semantic enrichment; empty otherwise.
    /// `Arc<[T]>` because semantic enrichment shares the cached analysis's
    /// flow slice — no deep copy. Non-semantic paths use an empty Arc.
    pub data_flows: std::sync::Arc<[DataFlowFact]>,
}

/// Extracted metadata for a function or method definition.
#[derive(Debug)]
pub struct FnFact {
    /// Identifier of the function.
    pub name: Box<str>,
    /// Location of the `fn` keyword.
    pub span: IrSpan,
    /// Marked `unsafe fn`.
    pub is_unsafe: bool,
    /// Declared parameters.
    pub params: Box<[ParamFact]>,
    /// Explicit return type, if present (excludes implicit `()`).
    pub return_type: Option<TypeInfo>,
    /// Unique type names from parameters and return type, for mixed-concerns edges.
    pub signature_type_names: Box<[Rc<str>]>,
    /// Nesting depth of the item in the module tree.
    pub item_depth: usize,
    /// Whether the body contains arithmetic operators.
    pub has_arithmetic: bool,
    /// Pairwise edges from body-referenced types (for mixed-concerns analysis).
    pub body_type_edges: Box<[(Rc<str>, Rc<str>)]>,
}

/// Extracted metadata for a function parameter.
#[derive(Debug)]
pub struct ParamFact {
    /// Identifier or `self`.
    pub name: Box<str>,
    /// Rendered type text for pattern matching.
    pub type_text: Box<str>,
}

/// Rendered type text with dispatch classification.
#[derive(Debug)]
pub struct TypeInfo {
    /// Normalized type text for pattern matching.
    pub text: Box<str>,
    /// Contains `dyn Trait` at any depth.
    pub involves_dyn: bool,
}

/// Else-branch metadata attached to `If` control flow nodes.
#[derive(Debug, Clone, Copy)]
pub struct ElseInfo {
    /// Total branches in the if/else-if chain, when chained.
    pub chain_len: Option<usize>,
    /// Location of the `else` keyword, for `forbid_else` reporting.
    pub span: Option<IrSpan>,
}

/// A control flow construct with nesting context.
#[derive(Debug)]
pub struct ControlFlowFact {
    /// Discriminant: if, match, loop variant, or closure.
    pub kind: ControlFlowKind,
    /// Location of the keyword.
    pub span: IrSpan,
    /// Nesting depth within the function body (for max-depth check).
    pub depth: usize,
    /// Enclosing loop count (for clone-in-loop suppression).
    pub loop_depth: usize,
    /// Set when nested inside an if or match arm.
    pub parent_branch: Option<BranchContext>,
    /// Present only for `If` nodes.
    pub else_info: Option<ElseInfo>,
    /// Index of the function containing this construct.
    pub containing_fn: Option<usize>,
}

/// Discriminant for control flow constructs.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ControlFlowKind {
    /// `if` expression.
    If,
    /// `match` expression.
    Match,
    /// `for .. in` loop.
    ForLoop,
    /// `while` loop.
    WhileLoop,
    /// Bare `loop` (infinite).
    Loop,
    /// Closure expression (counts as nesting).
    Closure,
}

/// Which branch kind encloses a nested control flow node.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BranchContext {
    /// Nested inside an `if` branch.
    If,
    /// Nested inside a `match` arm.
    Match,
}

/// A `let` binding with ownership and context metadata.
#[derive(Debug)]
pub struct BindingFact {
    /// Identifier (or `_` for wildcard).
    pub name: Box<str>,
    /// `None` for compiler-desugared bindings without source spans.
    pub span: Option<IrSpan>,
    /// Enclosing loop count for clone-in-loop analysis.
    pub loop_depth: usize,
    /// `true` when the declared type is `Rc<_>` or `Arc<_>`.
    pub is_refcounted: bool,
    /// `true` when the pattern is `_` (wildcard discard).
    pub is_wildcard: bool,
    /// `true` when an initializer expression is present.
    pub has_init: bool,
    /// `true` when the initializer is `write!`/`writeln!` into a `String` (infallible).
    pub init_is_write_macro: bool,
    /// Index into `FileIr::functions`; links binding to its enclosing function.
    pub containing_fn: Option<usize>,
    /// Present when the binding has an explicit `: Type` annotation.
    pub type_annotation_span: Option<IrSpan>,
    /// Filled by semantic enrichment; canonical type after alias resolution.
    pub resolved_type: Option<Box<str>>,
}

/// A type reference with dispatch and hasher classification.
#[derive(Debug)]
pub struct TypeRefFact {
    /// Normalized type text for pattern matching.
    pub text: Box<str>,
    /// Location of the type in source.
    pub span: IrSpan,
    /// Contains `dyn Trait` at any depth.
    pub involves_dyn: bool,
    /// Matches `Vec<Box<dyn ...>>` pattern.
    pub is_vec_box_dyn: bool,
    /// `HashMap`/`HashSet` without explicit hasher parameter.
    pub is_default_hasher: bool,
    /// Index into `FileIr::functions`; links to enclosing function.
    pub containing_fn: Option<usize>,
    /// Positional context: return, param, field, or body.
    pub context: TypeRefContext,
}

/// Positional context of a type reference, determining which checks apply.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TypeRefContext {
    /// In a function return type.
    Return,
    /// In a function parameter.
    Param,
    /// In a struct or enum field.
    Field,
    /// Inside a function body.
    Body,
}

/// Struct, enum, or trait definition with type-relationship edges.
#[derive(Debug)]
pub struct TypeDefFact {
    /// Identifier of the defined type.
    pub name: Rc<str>,
    /// Location of the definition keyword.
    pub span: IrSpan,
    /// Struct, enum, or trait.
    pub kind: TypeDefKind,
    /// Pairwise type-relationship edges for mixed-concerns graph analysis.
    pub edges: Box<[(Rc<str>, Rc<str>)]>,
}

/// Discriminant for type definitions.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TypeDefKind {
    /// `struct` definition.
    Struct,
    /// `enum` definition.
    Enum,
    /// `trait` definition.
    Trait,
}

/// An inherent or trait impl block with type-relationship edges.
#[derive(Debug)]
pub struct ImplFact {
    /// The type being implemented on.
    pub self_type: Rc<str>,
    /// `Some` for `impl Trait for Type`, `None` for inherent impls.
    pub trait_name: Option<Box<str>>,
    /// Location of the `impl` keyword.
    pub span: IrSpan,
    /// Pairwise type-relationship edges for mixed-concerns graph analysis.
    pub edges: Box<[(Rc<str>, Rc<str>)]>,
}

/// A flattened `use` import path for capability detection.
#[derive(Debug)]
pub struct UsePathFact {
    /// Fully qualified path (e.g., `std::collections::HashMap`).
    pub path: Box<str>,
    /// Location of the `use` statement.
    pub span: IrSpan,
}

/// A method call expression with receiver and loop context.
#[derive(Debug)]
pub struct MethodCallFact {
    /// Method identifier (e.g., `clone`, `unwrap`).
    pub method_name: Box<str>,
    /// Full rendered expression for pattern matching.
    pub text: Box<str>,
    /// Location of the method call.
    pub span: IrSpan,
    /// Simple identifier receiver, when not a complex expression.
    pub receiver_ident: Option<Box<str>>,
    /// Location of the receiver for diagnostic pointing.
    pub receiver_span: IrSpan,
    /// Enclosing loop count for clone-in-loop analysis.
    pub loop_depth: usize,
    /// Index into `FileIr::functions`; links to enclosing function.
    pub containing_fn: Option<usize>,
    /// Filled by semantic enrichment; canonical receiver type.
    /// `Arc<str>` so multiple calls on the same binding share one allocation.
    pub receiver_type: Option<Arc<str>>,
    /// Filled by semantic enrichment; suppresses clone-in-loop for `Copy` types.
    pub is_copy_receiver: bool,
}

/// A macro invocation for forbidden-macro checks.
#[derive(Debug)]
pub struct MacroFact {
    /// Rendered macro text (e.g., `println!`) for pattern matching.
    pub text: Box<str>,
    /// Location of the macro call.
    pub span: IrSpan,
}

/// An item attribute for forbidden-attribute and capability checks.
#[derive(Debug)]
pub struct AttributeFact {
    /// Rendered inner text (e.g., `allow(dead_code)`) for pattern matching.
    pub text: Box<str>,
    /// Location of the `#[` token.
    pub span: IrSpan,
    /// Top-level attribute name (e.g., `derive`, `cfg`, `link`).
    pub name: Box<str>,
}

/// A string literal for credential and endpoint detection.
#[derive(Debug)]
pub struct StringLitFact {
    /// Unescaped content of the literal.
    pub value: Box<str>,
    /// Location of the opening quote.
    pub span: IrSpan,
}

/// An unsafe block, function, or impl for safety auditing.
#[derive(Debug)]
pub struct UnsafeFact {
    /// Block, function, or impl.
    pub kind: UnsafeKind,
    /// Location of the `unsafe` keyword.
    pub span: IrSpan,
    /// Snippet of the unsafe code for evidence reporting.
    pub evidence: Box<str>,
}

/// Discriminant for unsafe constructs.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UnsafeKind {
    /// `unsafe { }` block.
    Block,
    /// `unsafe fn` declaration.
    Fn,
    /// `unsafe impl` block.
    Impl,
}

/// An `extern` block declaration for FFI capability detection.
#[derive(Debug)]
pub struct ExternBlockFact {
    /// Location of the `extern` keyword.
    pub span: IrSpan,
}

/// Structural fingerprint for a function, used for duplicate detection.
///
/// Two functions with identical structure (same control flow, same method call
/// count, same binding count) but different names produce the same `skeleton_hash`.
/// `exact_hash` additionally includes method and type reference names.
#[derive(Debug)]
pub struct FnFingerprint {
    /// Index into `FileIr::functions`.
    pub fn_index: usize,
    /// Function name.
    pub name: Box<str>,
    /// Location of the function definition.
    pub span: IrSpan,
    /// Hash of structural shape only (param count, control flow sequence, counts).
    pub skeleton_hash: u64,
    /// Hash of skeleton components plus method names and type reference texts.
    pub exact_hash: u64,
    /// Total number of facts (method calls + bindings + type refs + control flow).
    pub fact_count: usize,
}

/// A `mod` declaration for inline-test detection.
#[derive(Debug)]
pub struct ModuleFact {
    /// Module identifier.
    pub name: Box<str>,
    /// Location of the `mod` keyword.
    pub span: IrSpan,
    /// `true` when annotated with `#[cfg(test)]`.
    pub is_cfg_test: bool,
}