Skip to main content

codedash_schemas/
lib.rs

1//! Stable public schema for codedash code metrics output.
2//!
3//! This crate defines the **external contract** for codedash's analysis JSON.
4//! It is decoupled from codedash's internal domain model via an Anti-Corruption
5//! Layer (ACL), so internal refactoring does not break external consumers.
6//!
7//! # Design: Anti-Corruption Layer
8//!
9//! codedash maintains **two separate type hierarchies**:
10//!
11//! 1. **Internal domain model** (`codedash::domain::ast`) — optimized for
12//!    parsing and enrichment logic. May change across codedash versions.
13//! 2. **Public schema** (this crate) — stable, versioned contract for
14//!    external consumers.
15//!
16//! The ACL boundary (`codedash::port::schema`) converts domain types into
17//! these schema types via `From` implementations. This is the **only** place
18//! where the two models touch. As a result:
19//!
20//! - Internal refactoring in codedash never breaks consumers of this crate.
21//! - This crate carries minimal dependencies (`serde` only).
22//! - Breaking changes to the schema follow semver.
23//!
24//! # For Rust consumers
25//!
26//! ```rust
27//! use codedash_schemas::AstData;
28//!
29//! # let json = r#"{"files":[],"edges":[]}"#;
30//! let data: AstData = serde_json::from_str(json).unwrap();
31//! ```
32//!
33//! # For non-Rust consumers (JSON Schema)
34//!
35//! Enable the `schema` feature and generate a JSON Schema file:
36//!
37//! ```rust,ignore
38//! let schema = schemars::schema_for!(codedash_schemas::AstData);
39//! println!("{}", serde_json::to_string_pretty(&schema).unwrap());
40//! ```
41//!
42//! The generated schema can then be used by TypeScript, Python, Go, etc.
43//! to validate or generate types for codedash output.
44//!
45//! # Optional features
46//!
47//! - **`schema`** — derives [`schemars::JsonSchema`] on all types, enabling
48//!   JSON Schema generation via `schemars::schema_for!`.
49
50use std::collections::HashMap;
51
52use serde::{Deserialize, Serialize};
53
54/// Top-level AST output from a codedash analysis run.
55#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
56#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
57#[non_exhaustive]
58pub struct AstData {
59    /// Analyzed source files.
60    pub files: Vec<FileData>,
61    /// Dependency edges between files.
62    #[serde(default)]
63    pub edges: Vec<Edge>,
64}
65
66impl AstData {
67    /// Create a new [`AstData`].
68    pub fn new(files: Vec<FileData>, edges: Vec<Edge>) -> Self {
69        Self { files, edges }
70    }
71}
72
73/// Per-file AST data.
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
76#[non_exhaustive]
77pub struct FileData {
78    /// Original file path (relative to the analysis root).
79    pub path: String,
80    /// Normalized module name (e.g. `"app/analyze"`).
81    pub name: String,
82    /// AST nodes (functions, structs, enums, etc.) found in this file.
83    pub nodes: Vec<NodeData>,
84    /// Internal imports (`use crate::...`, `use super::...`).
85    #[serde(default)]
86    pub imports: Vec<ImportInfo>,
87    /// Git commit count within the churn period. Injected by enrichment.
88    #[serde(default, skip_serializing_if = "Option::is_none")]
89    pub git_churn_30d: Option<u32>,
90}
91
92impl FileData {
93    /// Create a new [`FileData`] with required fields.
94    ///
95    /// `nodes`, `imports`, and `git_churn_30d` default to empty/`None`.
96    pub fn new(path: String, name: String) -> Self {
97        Self {
98            path,
99            name,
100            nodes: Vec::new(),
101            imports: Vec::new(),
102            git_churn_30d: None,
103        }
104    }
105}
106
107/// A single AST node (function, struct, enum, impl, etc.).
108#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
109#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
110#[non_exhaustive]
111pub struct NodeData {
112    /// Node kind: `"function"`, `"struct"`, `"enum"`, `"impl"`, `"method"`, etc.
113    pub kind: String,
114    /// Node name (identifier).
115    pub name: String,
116
117    /// Whether this node is exported / publicly visible.
118    #[serde(default)]
119    pub exported: bool,
120    /// Visibility qualifier (e.g. `"pub"`, `"pub(crate)"`).
121    #[serde(default, skip_serializing_if = "Option::is_none")]
122    pub visibility: Option<String>,
123
124    /// Whether this function is `async`.
125    #[serde(default, skip_serializing_if = "Option::is_none")]
126    pub is_async: Option<bool>,
127    /// Whether this function/trait is `unsafe`.
128    #[serde(default, skip_serializing_if = "Option::is_none")]
129    pub is_unsafe: Option<bool>,
130
131    /// First line of the node (1-based).
132    pub start_line: u32,
133    /// Last line of the node (1-based, inclusive).
134    pub end_line: u32,
135    /// Total line count (`end_line - start_line + 1`).
136    pub lines: u32,
137
138    /// Number of parameters (functions/methods only).
139    #[serde(default, skip_serializing_if = "Option::is_none")]
140    pub params: Option<u32>,
141    /// Number of fields (structs/enums only).
142    #[serde(default, skip_serializing_if = "Option::is_none")]
143    pub field_count: Option<u32>,
144    /// Maximum nesting depth.
145    #[serde(default, skip_serializing_if = "Option::is_none")]
146    pub depth: Option<u32>,
147
148    /// Cyclomatic complexity.
149    #[serde(default, skip_serializing_if = "Option::is_none")]
150    pub cyclomatic: Option<u32>,
151
152    /// Trait name for impl blocks (e.g. `"Display"`).
153    #[serde(default, skip_serializing_if = "Option::is_none")]
154    pub trait_name: Option<String>,
155
156    /// Git commit count within the churn period. Injected by enrichment.
157    #[serde(default, skip_serializing_if = "Option::is_none")]
158    pub git_churn_30d: Option<u32>,
159    /// Region coverage ratio (0.0–1.0). Injected by enrichment.
160    #[serde(default, skip_serializing_if = "Option::is_none")]
161    pub coverage: Option<f64>,
162    /// Co-change counts: `"partner_file::*" → commit_count`.
163    /// Injected by enrichment.
164    #[serde(default, skip_serializing_if = "Option::is_none")]
165    pub co_changes: Option<HashMap<String, u32>>,
166
167    /// Internal function calls detected in the body.
168    #[serde(default, skip_serializing_if = "Option::is_none")]
169    pub calls: Option<Vec<CallInfo>>,
170}
171
172impl NodeData {
173    /// Create a new [`NodeData`] with required fields.
174    ///
175    /// All optional fields default to `None` / `false`.
176    pub fn new(kind: String, name: String, start_line: u32, end_line: u32, lines: u32) -> Self {
177        Self {
178            kind,
179            name,
180            exported: false,
181            visibility: None,
182            is_async: None,
183            is_unsafe: None,
184            start_line,
185            end_line,
186            lines,
187            params: None,
188            field_count: None,
189            depth: None,
190            cyclomatic: None,
191            trait_name: None,
192            git_churn_30d: None,
193            coverage: None,
194            co_changes: None,
195            calls: None,
196        }
197    }
198}
199
200/// An internal import (e.g. `use crate::domain::ast::AstData`).
201#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
202#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
203#[non_exhaustive]
204pub struct ImportInfo {
205    /// Source module path.
206    pub from: String,
207    /// Imported symbol names.
208    pub names: Vec<String>,
209}
210
211impl ImportInfo {
212    /// Create a new [`ImportInfo`].
213    pub fn new(from: String, names: Vec<String>) -> Self {
214        Self { from, names }
215    }
216}
217
218/// A call reference from a function body to an imported symbol.
219#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
220#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
221#[non_exhaustive]
222pub struct CallInfo {
223    /// Called symbol name.
224    pub symbol: String,
225    /// Module the symbol belongs to.
226    pub module: String,
227    /// Number of call sites within the function body.
228    pub count: u32,
229}
230
231impl CallInfo {
232    /// Create a new [`CallInfo`].
233    pub fn new(symbol: String, module: String, count: u32) -> Self {
234        Self {
235            symbol,
236            module,
237            count,
238        }
239    }
240}
241
242/// A dependency edge between two files.
243#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
244#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
245#[non_exhaustive]
246pub struct Edge {
247    /// Source file (the file that imports).
248    pub from_file: String,
249    /// Target file (the file being imported from).
250    pub to_file: String,
251    /// Imported symbol name.
252    pub symbol: String,
253    /// Edge type (currently always `"import"`).
254    #[serde(rename = "type")]
255    pub edge_type: String,
256}
257
258impl Edge {
259    /// Create a new [`Edge`].
260    pub fn new(from_file: String, to_file: String, symbol: String, edge_type: String) -> Self {
261        Self {
262            from_file,
263            to_file,
264            symbol,
265            edge_type,
266        }
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    #[test]
275    fn ast_data_roundtrip() {
276        let data = AstData {
277            files: vec![FileData {
278                path: "src/main.rs".to_string(),
279                name: "main".to_string(),
280                nodes: vec![NodeData {
281                    kind: "function".to_string(),
282                    name: "main".to_string(),
283                    exported: true,
284                    visibility: Some("pub".to_string()),
285                    is_async: None,
286                    is_unsafe: None,
287                    start_line: 1,
288                    end_line: 5,
289                    lines: 5,
290                    params: Some(0),
291                    field_count: None,
292                    depth: Some(1),
293                    cyclomatic: Some(1),
294                    trait_name: None,
295                    git_churn_30d: Some(3),
296                    coverage: Some(0.85),
297                    co_changes: Some(HashMap::from([("utils::*".to_string(), 5)])),
298                    calls: Some(vec![CallInfo {
299                        symbol: "helper".to_string(),
300                        module: "utils".to_string(),
301                        count: 1,
302                    }]),
303                }],
304                imports: vec![ImportInfo {
305                    from: "utils".to_string(),
306                    names: vec!["helper".to_string()],
307                }],
308                git_churn_30d: Some(3),
309            }],
310            edges: vec![Edge {
311                from_file: "main".to_string(),
312                to_file: "utils".to_string(),
313                symbol: "helper".to_string(),
314                edge_type: "import".to_string(),
315            }],
316        };
317
318        let json = serde_json::to_string(&data).unwrap();
319        let parsed: AstData = serde_json::from_str(&json).unwrap();
320
321        assert_eq!(parsed.files.len(), 1);
322        assert_eq!(parsed.files[0].nodes[0].name, "main");
323        assert_eq!(
324            parsed.files[0].nodes[0].co_changes.as_ref().unwrap()["utils::*"],
325            5
326        );
327        assert_eq!(parsed.edges[0].edge_type, "import");
328    }
329
330    #[test]
331    fn deserialize_with_missing_optional_fields() {
332        let json = r#"{
333            "files": [{
334                "path": "src/lib.rs",
335                "name": "lib",
336                "nodes": [{
337                    "kind": "struct",
338                    "name": "Foo",
339                    "start_line": 1,
340                    "end_line": 5,
341                    "lines": 5
342                }]
343            }],
344            "edges": []
345        }"#;
346
347        let parsed: AstData = serde_json::from_str(json).unwrap();
348        let node = &parsed.files[0].nodes[0];
349        assert!(!node.exported);
350        assert!(node.co_changes.is_none());
351        assert!(node.cyclomatic.is_none());
352    }
353
354    #[test]
355    fn co_changes_serializes_as_object() {
356        let mut co = HashMap::new();
357        co.insert("auth::*".to_string(), 3);
358        co.insert("db::*".to_string(), 7);
359
360        let node = NodeData {
361            kind: "function".to_string(),
362            name: "handle".to_string(),
363            exported: true,
364            visibility: None,
365            is_async: None,
366            is_unsafe: None,
367            start_line: 1,
368            end_line: 10,
369            lines: 10,
370            params: None,
371            field_count: None,
372            depth: None,
373            cyclomatic: None,
374            trait_name: None,
375            git_churn_30d: None,
376            coverage: None,
377            co_changes: Some(co),
378            calls: None,
379        };
380
381        let json = serde_json::to_value(&node).unwrap();
382        let co_obj = json["co_changes"].as_object().unwrap();
383        assert_eq!(co_obj["auth::*"], 3);
384        assert_eq!(co_obj["db::*"], 7);
385    }
386
387    #[test]
388    fn edge_type_renames_to_type_in_json() {
389        let edge = Edge {
390            from_file: "a".to_string(),
391            to_file: "b".to_string(),
392            symbol: "Foo".to_string(),
393            edge_type: "import".to_string(),
394        };
395
396        let json = serde_json::to_value(&edge).unwrap();
397        assert!(json.get("type").is_some());
398        assert!(json.get("edge_type").is_none());
399    }
400
401    #[test]
402    fn partial_eq_works_for_all_types() {
403        let a = NodeData::new("function".into(), "foo".into(), 1, 10, 10);
404        let b = NodeData::new("function".into(), "foo".into(), 1, 10, 10);
405        assert_eq!(a, b);
406
407        let c = NodeData::new("function".into(), "bar".into(), 1, 10, 10);
408        assert_ne!(a, c);
409    }
410
411    #[test]
412    fn constructors_produce_correct_defaults() {
413        let node = NodeData::new("struct".into(), "Foo".into(), 1, 5, 5);
414        assert!(!node.exported);
415        assert!(node.visibility.is_none());
416        assert!(node.cyclomatic.is_none());
417        assert!(node.calls.is_none());
418
419        let file = FileData::new("src/lib.rs".into(), "lib".into());
420        assert!(file.nodes.is_empty());
421        assert!(file.imports.is_empty());
422        assert!(file.git_churn_30d.is_none());
423    }
424}
425
426#[cfg(all(test, feature = "schema"))]
427mod schema_snapshot {
428    use super::*;
429
430    #[test]
431    fn ast_data_json_schema() {
432        let schema = schemars::schema_for!(AstData);
433        insta::assert_json_snapshot!("ast-data-schema", schema);
434    }
435}
436
437#[cfg(test)]
438mod proptests {
439    use super::*;
440    use proptest::prelude::*;
441
442    fn arb_call_info() -> impl Strategy<Value = CallInfo> {
443        ("[a-z]{1,8}", "[a-z]{1,8}", 0u32..100).prop_map(|(s, m, c)| CallInfo {
444            symbol: s,
445            module: m,
446            count: c,
447        })
448    }
449
450    fn arb_import_info() -> impl Strategy<Value = ImportInfo> {
451        ("[a-z]{1,8}", proptest::collection::vec("[a-z]{1,8}", 1..4))
452            .prop_map(|(f, n)| ImportInfo { from: f, names: n })
453    }
454
455    // Note: coverage (f64) is excluded — arbitrary f64 values can lose
456    // a ULP during JSON roundtrip. Deterministic tests cover f64 fields.
457    fn arb_node_data() -> impl Strategy<Value = NodeData> {
458        (
459            "[a-z]{1,8}",
460            "[a-z]{1,8}",
461            1u32..10000,
462            1u32..500,
463            any::<bool>(),
464            proptest::option::of(0u32..20),
465            proptest::option::of(0u32..50),
466            proptest::option::of(0u32..10),
467        )
468            .prop_map(
469                |(kind, name, start, delta, exported, params, cyclomatic, depth)| {
470                    let end = start + delta;
471                    let lines = delta + 1;
472                    NodeData {
473                        kind,
474                        name,
475                        exported,
476                        visibility: None,
477                        is_async: None,
478                        is_unsafe: None,
479                        start_line: start,
480                        end_line: end,
481                        lines,
482                        params,
483                        field_count: None,
484                        depth,
485                        cyclomatic,
486                        trait_name: None,
487                        git_churn_30d: None,
488                        coverage: None,
489                        co_changes: None,
490                        calls: None,
491                    }
492                },
493            )
494    }
495
496    fn arb_edge() -> impl Strategy<Value = Edge> {
497        ("[a-z]{1,8}", "[a-z]{1,8}", "[A-Z][a-z]{1,8}").prop_map(|(f, t, s)| Edge {
498            from_file: f,
499            to_file: t,
500            symbol: s,
501            edge_type: "import".to_string(),
502        })
503    }
504
505    fn arb_node_data_with_calls() -> impl Strategy<Value = NodeData> {
506        (
507            arb_node_data(),
508            proptest::option::of(proptest::collection::vec(arb_call_info(), 0..3)),
509        )
510            .prop_map(|(mut node, calls)| {
511                node.calls = calls;
512                node
513            })
514    }
515
516    fn arb_file_data() -> impl Strategy<Value = FileData> {
517        (
518            "[a-z/]{1,20}",
519            "[a-z]{1,8}",
520            proptest::collection::vec(arb_node_data_with_calls(), 0..4),
521            proptest::collection::vec(arb_import_info(), 0..3),
522        )
523            .prop_map(|(path, name, nodes, imports)| FileData {
524                path,
525                name,
526                nodes,
527                imports,
528                git_churn_30d: None,
529            })
530    }
531
532    fn arb_ast_data() -> impl Strategy<Value = AstData> {
533        (
534            proptest::collection::vec(arb_file_data(), 0..4),
535            proptest::collection::vec(arb_edge(), 0..4),
536        )
537            .prop_map(|(files, edges)| AstData { files, edges })
538    }
539
540    proptest! {
541        #[test]
542        fn ast_data_serde_roundtrip(data in arb_ast_data()) {
543            let json = serde_json::to_string(&data).unwrap();
544            let parsed: AstData = serde_json::from_str(&json).unwrap();
545            prop_assert_eq!(data, parsed);
546        }
547
548        #[test]
549        fn node_data_serde_roundtrip(node in arb_node_data()) {
550            let json = serde_json::to_string(&node).unwrap();
551            let parsed: NodeData = serde_json::from_str(&json).unwrap();
552            prop_assert_eq!(node, parsed);
553        }
554    }
555}