Skip to main content

codedash_schemas/
lib.rs

1//! Stable public schema for codedash code metrics output.
2//!
3//! This crate defines the **external contract** for codedash's analysis JSON.
4//! It is decoupled from codedash's internal domain model via an Anti-Corruption
5//! Layer (ACL), so internal refactoring does not break external consumers.
6//!
7//! # Design: Anti-Corruption Layer
8//!
9//! codedash maintains **two separate type hierarchies**:
10//!
11//! 1. **Internal domain model** (`codedash::domain::ast`) — optimized for
12//!    parsing and enrichment logic. May change across codedash versions.
13//! 2. **Public schema** (this crate) — stable, versioned contract for
14//!    external consumers.
15//!
16//! The ACL boundary (`codedash::port::schema`) converts domain types into
17//! these schema types via `From` implementations. This is the **only** place
18//! where the two models touch. As a result:
19//!
20//! - Internal refactoring in codedash never breaks consumers of this crate.
21//! - This crate carries minimal dependencies (`serde` only).
22//! - Breaking changes to the schema follow semver.
23//!
24//! # For Rust consumers
25//!
26//! ```rust
27//! use codedash_schemas::AstData;
28//!
29//! # let json = r#"{"files":[],"edges":[]}"#;
30//! let data: AstData = serde_json::from_str(json).unwrap();
31//! ```
32//!
33//! # For non-Rust consumers (JSON Schema)
34//!
35//! Enable the `schema` feature and generate a JSON Schema file:
36//!
37//! ```rust,ignore
38//! let schema = schemars::schema_for!(codedash_schemas::AstData);
39//! println!("{}", serde_json::to_string_pretty(&schema).unwrap());
40//! ```
41//!
42//! The generated schema can then be used by TypeScript, Python, Go, etc.
43//! to validate or generate types for codedash output.
44//!
45//! # Optional features
46//!
47//! - **`schema`** — derives [`schemars::JsonSchema`] on all types, enabling
48//!   JSON Schema generation via `schemars::schema_for!`.
49
50pub mod analyze;
51
52pub use analyze::{AnalyzeResult, Binding, EvalEntry, Group, PerceptValues};
53
54use std::collections::HashMap;
55
56use serde::{Deserialize, Serialize};
57
58/// Top-level AST output from a codedash analysis run.
59#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
60#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
61#[non_exhaustive]
62pub struct AstData {
63    /// Analyzed source files.
64    pub files: Vec<FileData>,
65    /// Dependency edges between files.
66    #[serde(default)]
67    pub edges: Vec<Edge>,
68}
69
70impl AstData {
71    /// Create a new [`AstData`].
72    pub fn new(files: Vec<FileData>, edges: Vec<Edge>) -> Self {
73        Self { files, edges }
74    }
75}
76
77/// Per-file AST data.
78#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
79#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
80#[non_exhaustive]
81pub struct FileData {
82    /// Original file path (relative to the analysis root).
83    pub path: String,
84    /// Normalized module name (e.g. `"app/analyze"`).
85    pub name: String,
86    /// AST nodes (functions, structs, enums, etc.) found in this file.
87    pub nodes: Vec<NodeData>,
88    /// Internal imports (`use crate::...`, `use super::...`).
89    #[serde(default)]
90    pub imports: Vec<ImportInfo>,
91    /// Git commit count within the churn period. Injected by enrichment.
92    #[serde(default, skip_serializing_if = "Option::is_none")]
93    pub git_churn_30d: Option<u32>,
94}
95
96impl FileData {
97    /// Create a new [`FileData`] with required fields.
98    ///
99    /// `nodes`, `imports`, and `git_churn_30d` default to empty/`None`.
100    pub fn new(path: String, name: String) -> Self {
101        Self {
102            path,
103            name,
104            nodes: Vec::new(),
105            imports: Vec::new(),
106            git_churn_30d: None,
107        }
108    }
109}
110
111/// A single AST node (function, struct, enum, impl, etc.).
112#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
113#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
114#[non_exhaustive]
115pub struct NodeData {
116    /// Node kind: `"function"`, `"struct"`, `"enum"`, `"impl"`, `"method"`, etc.
117    pub kind: String,
118    /// Node name (identifier).
119    pub name: String,
120
121    /// Whether this node is exported / publicly visible.
122    #[serde(default)]
123    pub exported: bool,
124    /// Visibility qualifier (e.g. `"pub"`, `"pub(crate)"`).
125    #[serde(default, skip_serializing_if = "Option::is_none")]
126    pub visibility: Option<String>,
127
128    /// Whether this function is `async`.
129    #[serde(default, skip_serializing_if = "Option::is_none")]
130    pub is_async: Option<bool>,
131    /// Whether this function/trait is `unsafe`.
132    #[serde(default, skip_serializing_if = "Option::is_none")]
133    pub is_unsafe: Option<bool>,
134
135    /// First line of the node (1-based).
136    pub start_line: u32,
137    /// Last line of the node (1-based, inclusive).
138    pub end_line: u32,
139    /// Total line count (`end_line - start_line + 1`).
140    pub lines: u32,
141
142    /// Number of parameters (functions/methods only).
143    #[serde(default, skip_serializing_if = "Option::is_none")]
144    pub params: Option<u32>,
145    /// Number of fields (structs/enums only).
146    #[serde(default, skip_serializing_if = "Option::is_none")]
147    pub field_count: Option<u32>,
148    /// Maximum nesting depth.
149    #[serde(default, skip_serializing_if = "Option::is_none")]
150    pub depth: Option<u32>,
151
152    /// Cyclomatic complexity.
153    #[serde(default, skip_serializing_if = "Option::is_none")]
154    pub cyclomatic: Option<u32>,
155
156    /// Trait name for impl blocks (e.g. `"Display"`).
157    #[serde(default, skip_serializing_if = "Option::is_none")]
158    pub trait_name: Option<String>,
159
160    /// Git commit count within the churn period. Injected by enrichment.
161    #[serde(default, skip_serializing_if = "Option::is_none")]
162    pub git_churn_30d: Option<u32>,
163    /// Region coverage ratio (0.0–1.0). Injected by enrichment.
164    #[serde(default, skip_serializing_if = "Option::is_none")]
165    pub coverage: Option<f64>,
166    /// Co-change counts: `"partner_file::*" → commit_count`.
167    /// Injected by enrichment.
168    #[serde(default, skip_serializing_if = "Option::is_none")]
169    pub co_changes: Option<HashMap<String, u32>>,
170
171    /// Internal function calls detected in the body.
172    #[serde(default, skip_serializing_if = "Option::is_none")]
173    pub calls: Option<Vec<CallInfo>>,
174}
175
176impl NodeData {
177    /// Create a new [`NodeData`] with required fields.
178    ///
179    /// All optional fields default to `None` / `false`.
180    pub fn new(kind: String, name: String, start_line: u32, end_line: u32, lines: u32) -> Self {
181        Self {
182            kind,
183            name,
184            exported: false,
185            visibility: None,
186            is_async: None,
187            is_unsafe: None,
188            start_line,
189            end_line,
190            lines,
191            params: None,
192            field_count: None,
193            depth: None,
194            cyclomatic: None,
195            trait_name: None,
196            git_churn_30d: None,
197            coverage: None,
198            co_changes: None,
199            calls: None,
200        }
201    }
202}
203
204/// An internal import (e.g. `use crate::domain::ast::AstData`).
205#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
206#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
207#[non_exhaustive]
208pub struct ImportInfo {
209    /// Source module path.
210    pub from: String,
211    /// Imported symbol names.
212    pub names: Vec<String>,
213}
214
215impl ImportInfo {
216    /// Create a new [`ImportInfo`].
217    pub fn new(from: String, names: Vec<String>) -> Self {
218        Self { from, names }
219    }
220}
221
222/// A call reference from a function body to an imported symbol.
223#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
224#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
225#[non_exhaustive]
226pub struct CallInfo {
227    /// Called symbol name.
228    pub symbol: String,
229    /// Module the symbol belongs to.
230    pub module: String,
231    /// Number of call sites within the function body.
232    pub count: u32,
233}
234
235impl CallInfo {
236    /// Create a new [`CallInfo`].
237    pub fn new(symbol: String, module: String, count: u32) -> Self {
238        Self {
239            symbol,
240            module,
241            count,
242        }
243    }
244}
245
246/// A dependency edge between two files.
247#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
248#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
249#[non_exhaustive]
250pub struct Edge {
251    /// Source file (the file that imports).
252    pub from_file: String,
253    /// Target file (the file being imported from).
254    pub to_file: String,
255    /// Imported symbol name.
256    pub symbol: String,
257    /// Edge type (currently always `"import"`).
258    #[serde(rename = "type")]
259    pub edge_type: String,
260}
261
262impl Edge {
263    /// Create a new [`Edge`].
264    pub fn new(from_file: String, to_file: String, symbol: String, edge_type: String) -> Self {
265        Self {
266            from_file,
267            to_file,
268            symbol,
269            edge_type,
270        }
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277
278    #[test]
279    fn ast_data_roundtrip() {
280        let data = AstData {
281            files: vec![FileData {
282                path: "src/main.rs".to_string(),
283                name: "main".to_string(),
284                nodes: vec![NodeData {
285                    kind: "function".to_string(),
286                    name: "main".to_string(),
287                    exported: true,
288                    visibility: Some("pub".to_string()),
289                    is_async: None,
290                    is_unsafe: None,
291                    start_line: 1,
292                    end_line: 5,
293                    lines: 5,
294                    params: Some(0),
295                    field_count: None,
296                    depth: Some(1),
297                    cyclomatic: Some(1),
298                    trait_name: None,
299                    git_churn_30d: Some(3),
300                    coverage: Some(0.85),
301                    co_changes: Some(HashMap::from([("utils::*".to_string(), 5)])),
302                    calls: Some(vec![CallInfo {
303                        symbol: "helper".to_string(),
304                        module: "utils".to_string(),
305                        count: 1,
306                    }]),
307                }],
308                imports: vec![ImportInfo {
309                    from: "utils".to_string(),
310                    names: vec!["helper".to_string()],
311                }],
312                git_churn_30d: Some(3),
313            }],
314            edges: vec![Edge {
315                from_file: "main".to_string(),
316                to_file: "utils".to_string(),
317                symbol: "helper".to_string(),
318                edge_type: "import".to_string(),
319            }],
320        };
321
322        let json = serde_json::to_string(&data).unwrap();
323        let parsed: AstData = serde_json::from_str(&json).unwrap();
324
325        assert_eq!(parsed.files.len(), 1);
326        assert_eq!(parsed.files[0].nodes[0].name, "main");
327        assert_eq!(
328            parsed.files[0].nodes[0].co_changes.as_ref().unwrap()["utils::*"],
329            5
330        );
331        assert_eq!(parsed.edges[0].edge_type, "import");
332    }
333
334    #[test]
335    fn deserialize_with_missing_optional_fields() {
336        let json = r#"{
337            "files": [{
338                "path": "src/lib.rs",
339                "name": "lib",
340                "nodes": [{
341                    "kind": "struct",
342                    "name": "Foo",
343                    "start_line": 1,
344                    "end_line": 5,
345                    "lines": 5
346                }]
347            }],
348            "edges": []
349        }"#;
350
351        let parsed: AstData = serde_json::from_str(json).unwrap();
352        let node = &parsed.files[0].nodes[0];
353        assert!(!node.exported);
354        assert!(node.co_changes.is_none());
355        assert!(node.cyclomatic.is_none());
356    }
357
358    #[test]
359    fn co_changes_serializes_as_object() {
360        let mut co = HashMap::new();
361        co.insert("auth::*".to_string(), 3);
362        co.insert("db::*".to_string(), 7);
363
364        let node = NodeData {
365            kind: "function".to_string(),
366            name: "handle".to_string(),
367            exported: true,
368            visibility: None,
369            is_async: None,
370            is_unsafe: None,
371            start_line: 1,
372            end_line: 10,
373            lines: 10,
374            params: None,
375            field_count: None,
376            depth: None,
377            cyclomatic: None,
378            trait_name: None,
379            git_churn_30d: None,
380            coverage: None,
381            co_changes: Some(co),
382            calls: None,
383        };
384
385        let json = serde_json::to_value(&node).unwrap();
386        let co_obj = json["co_changes"].as_object().unwrap();
387        assert_eq!(co_obj["auth::*"], 3);
388        assert_eq!(co_obj["db::*"], 7);
389    }
390
391    #[test]
392    fn edge_type_renames_to_type_in_json() {
393        let edge = Edge {
394            from_file: "a".to_string(),
395            to_file: "b".to_string(),
396            symbol: "Foo".to_string(),
397            edge_type: "import".to_string(),
398        };
399
400        let json = serde_json::to_value(&edge).unwrap();
401        assert!(json.get("type").is_some());
402        assert!(json.get("edge_type").is_none());
403    }
404
405    #[test]
406    fn partial_eq_works_for_all_types() {
407        let a = NodeData::new("function".into(), "foo".into(), 1, 10, 10);
408        let b = NodeData::new("function".into(), "foo".into(), 1, 10, 10);
409        assert_eq!(a, b);
410
411        let c = NodeData::new("function".into(), "bar".into(), 1, 10, 10);
412        assert_ne!(a, c);
413    }
414
415    #[test]
416    fn constructors_produce_correct_defaults() {
417        let node = NodeData::new("struct".into(), "Foo".into(), 1, 5, 5);
418        assert!(!node.exported);
419        assert!(node.visibility.is_none());
420        assert!(node.cyclomatic.is_none());
421        assert!(node.calls.is_none());
422
423        let file = FileData::new("src/lib.rs".into(), "lib".into());
424        assert!(file.nodes.is_empty());
425        assert!(file.imports.is_empty());
426        assert!(file.git_churn_30d.is_none());
427    }
428}
429
430#[cfg(all(test, feature = "schema"))]
431mod schema_snapshot {
432    use super::*;
433
434    #[test]
435    fn ast_data_json_schema() {
436        let schema = schemars::schema_for!(AstData);
437        insta::assert_json_snapshot!("ast-data-schema", schema);
438    }
439}
440
441#[cfg(test)]
442mod proptests {
443    use super::*;
444    use proptest::prelude::*;
445
446    fn arb_call_info() -> impl Strategy<Value = CallInfo> {
447        ("[a-z]{1,8}", "[a-z]{1,8}", 0u32..100).prop_map(|(s, m, c)| CallInfo {
448            symbol: s,
449            module: m,
450            count: c,
451        })
452    }
453
454    fn arb_import_info() -> impl Strategy<Value = ImportInfo> {
455        ("[a-z]{1,8}", proptest::collection::vec("[a-z]{1,8}", 1..4))
456            .prop_map(|(f, n)| ImportInfo { from: f, names: n })
457    }
458
459    // Note: coverage (f64) is excluded — arbitrary f64 values can lose
460    // a ULP during JSON roundtrip. Deterministic tests cover f64 fields.
461    fn arb_node_data() -> impl Strategy<Value = NodeData> {
462        (
463            "[a-z]{1,8}",
464            "[a-z]{1,8}",
465            1u32..10000,
466            1u32..500,
467            any::<bool>(),
468            proptest::option::of(0u32..20),
469            proptest::option::of(0u32..50),
470            proptest::option::of(0u32..10),
471        )
472            .prop_map(
473                |(kind, name, start, delta, exported, params, cyclomatic, depth)| {
474                    let end = start + delta;
475                    let lines = delta + 1;
476                    NodeData {
477                        kind,
478                        name,
479                        exported,
480                        visibility: None,
481                        is_async: None,
482                        is_unsafe: None,
483                        start_line: start,
484                        end_line: end,
485                        lines,
486                        params,
487                        field_count: None,
488                        depth,
489                        cyclomatic,
490                        trait_name: None,
491                        git_churn_30d: None,
492                        coverage: None,
493                        co_changes: None,
494                        calls: None,
495                    }
496                },
497            )
498    }
499
500    fn arb_edge() -> impl Strategy<Value = Edge> {
501        ("[a-z]{1,8}", "[a-z]{1,8}", "[A-Z][a-z]{1,8}").prop_map(|(f, t, s)| Edge {
502            from_file: f,
503            to_file: t,
504            symbol: s,
505            edge_type: "import".to_string(),
506        })
507    }
508
509    fn arb_node_data_with_calls() -> impl Strategy<Value = NodeData> {
510        (
511            arb_node_data(),
512            proptest::option::of(proptest::collection::vec(arb_call_info(), 0..3)),
513        )
514            .prop_map(|(mut node, calls)| {
515                node.calls = calls;
516                node
517            })
518    }
519
520    fn arb_file_data() -> impl Strategy<Value = FileData> {
521        (
522            "[a-z/]{1,20}",
523            "[a-z]{1,8}",
524            proptest::collection::vec(arb_node_data_with_calls(), 0..4),
525            proptest::collection::vec(arb_import_info(), 0..3),
526        )
527            .prop_map(|(path, name, nodes, imports)| FileData {
528                path,
529                name,
530                nodes,
531                imports,
532                git_churn_30d: None,
533            })
534    }
535
536    fn arb_ast_data() -> impl Strategy<Value = AstData> {
537        (
538            proptest::collection::vec(arb_file_data(), 0..4),
539            proptest::collection::vec(arb_edge(), 0..4),
540        )
541            .prop_map(|(files, edges)| AstData { files, edges })
542    }
543
544    proptest! {
545        #[test]
546        fn ast_data_serde_roundtrip(data in arb_ast_data()) {
547            let json = serde_json::to_string(&data).unwrap();
548            let parsed: AstData = serde_json::from_str(&json).unwrap();
549            prop_assert_eq!(data, parsed);
550        }
551
552        #[test]
553        fn node_data_serde_roundtrip(node in arb_node_data()) {
554            let json = serde_json::to_string(&node).unwrap();
555            let parsed: NodeData = serde_json::from_str(&json).unwrap();
556            prop_assert_eq!(node, parsed);
557        }
558    }
559}