Skip to main content

tokmd_substrate/
lib.rs

1//! # tokmd-substrate
2//!
3//! **Tier 0 (Pure Data)**
4//!
5//! Shared context that eliminates redundant I/O across sensors.
6//! The substrate is built once (scan + git diff) and shared with
7//! all sensors that run against the same repository.
8//!
9//! ## What belongs here
10//! * `RepoSubstrate`, `SubstrateFile`, `LangSummary`, `DiffRange`
11//! * Pure data types with Serde derive
12//!
13//! ## What does NOT belong here
14//! * I/O operations (substrate building is in tokmd-sensor)
15//! * Business logic or analysis
16
17use std::collections::BTreeMap;
18
19use serde::{Deserialize, Serialize};
20
21/// Shared context for a scanned repository.
22///
23/// Built once from a tokei scan (and optionally git diff), then
24/// passed to every sensor that needs file-level context.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct RepoSubstrate {
27    /// Normalized repo root path (forward slashes).
28    pub repo_root: String,
29    /// All scanned files, sorted by path.
30    pub files: Vec<SubstrateFile>,
31    /// Per-language aggregates.
32    pub lang_summary: BTreeMap<String, LangSummary>,
33    /// Git diff context (if available).
34    #[serde(skip_serializing_if = "Option::is_none")]
35    pub diff_range: Option<DiffRange>,
36    /// Total estimated tokens across all files.
37    pub total_tokens: usize,
38    /// Total bytes across all files.
39    pub total_bytes: usize,
40    /// Total lines of code across all files.
41    pub total_code_lines: usize,
42}
43
44/// A single file in the substrate.
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct SubstrateFile {
47    /// Repo-relative path (forward slashes).
48    pub path: String,
49    /// Detected language.
50    pub lang: String,
51    /// Lines of code.
52    pub code: usize,
53    /// Total lines.
54    pub lines: usize,
55    /// File size in bytes.
56    pub bytes: usize,
57    /// Estimated token count.
58    pub tokens: usize,
59    /// Pre-computed module key.
60    pub module: String,
61    /// Whether this file was modified in the current diff range.
62    pub in_diff: bool,
63}
64
65/// Per-language summary in the substrate.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct LangSummary {
68    /// Number of files.
69    pub files: usize,
70    /// Lines of code.
71    pub code: usize,
72    /// Total lines.
73    pub lines: usize,
74    /// Total bytes.
75    pub bytes: usize,
76    /// Estimated tokens.
77    pub tokens: usize,
78}
79
80/// Git diff range context.
81#[derive(Debug, Clone, Serialize, Deserialize)]
82pub struct DiffRange {
83    /// Base ref (e.g., "main", "v1.0.0").
84    pub base: String,
85    /// Head ref (e.g., "HEAD", "feature-branch").
86    pub head: String,
87    /// Files changed in the diff.
88    pub changed_files: Vec<String>,
89    /// Number of commits in the range.
90    pub commit_count: usize,
91    /// Total insertions.
92    pub insertions: usize,
93    /// Total deletions.
94    pub deletions: usize,
95}
96
97impl RepoSubstrate {
98    /// Get files modified in the current diff range.
99    pub fn diff_files(&self) -> impl Iterator<Item = &SubstrateFile> {
100        self.files.iter().filter(|f| f.in_diff)
101    }
102
103    /// Get files for a specific language.
104    pub fn files_for_lang(&self, lang: &str) -> impl Iterator<Item = &SubstrateFile> {
105        self.files.iter().filter(move |f| f.lang == lang)
106    }
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112
113    fn sample_substrate() -> RepoSubstrate {
114        RepoSubstrate {
115            repo_root: "/repo".to_string(),
116            files: vec![
117                SubstrateFile {
118                    path: "src/lib.rs".to_string(),
119                    lang: "Rust".to_string(),
120                    code: 100,
121                    lines: 120,
122                    bytes: 3000,
123                    tokens: 750,
124                    module: "src".to_string(),
125                    in_diff: true,
126                },
127                SubstrateFile {
128                    path: "src/main.rs".to_string(),
129                    lang: "Rust".to_string(),
130                    code: 50,
131                    lines: 60,
132                    bytes: 1500,
133                    tokens: 375,
134                    module: "src".to_string(),
135                    in_diff: false,
136                },
137            ],
138            lang_summary: BTreeMap::from([(
139                "Rust".to_string(),
140                LangSummary {
141                    files: 2,
142                    code: 150,
143                    lines: 180,
144                    bytes: 4500,
145                    tokens: 1125,
146                },
147            )]),
148            diff_range: Some(DiffRange {
149                base: "main".to_string(),
150                head: "HEAD".to_string(),
151                changed_files: vec!["src/lib.rs".to_string()],
152                commit_count: 3,
153                insertions: 10,
154                deletions: 5,
155            }),
156            total_tokens: 1125,
157            total_bytes: 4500,
158            total_code_lines: 150,
159        }
160    }
161
162    #[test]
163    fn serde_roundtrip() {
164        let sub = sample_substrate();
165        let json = serde_json::to_string(&sub).unwrap();
166        let back: RepoSubstrate = serde_json::from_str(&json).unwrap();
167        assert_eq!(back.files.len(), 2);
168        assert_eq!(back.total_code_lines, 150);
169        assert!(back.diff_range.is_some());
170    }
171
172    #[test]
173    fn diff_files_filter() {
174        let sub = sample_substrate();
175        let diff: Vec<_> = sub.diff_files().collect();
176        assert_eq!(diff.len(), 1);
177        assert_eq!(diff[0].path, "src/lib.rs");
178    }
179
180    #[test]
181    fn files_for_lang_filter() {
182        let sub = sample_substrate();
183        let rust_files: Vec<_> = sub.files_for_lang("Rust").collect();
184        assert_eq!(rust_files.len(), 2);
185        let go_files: Vec<_> = sub.files_for_lang("Go").collect();
186        assert_eq!(go_files.len(), 0);
187    }
188
189    #[test]
190    fn btreemap_ordering() {
191        let sub = sample_substrate();
192        let keys: Vec<_> = sub.lang_summary.keys().collect();
193        // BTreeMap ensures deterministic ordering
194        assert_eq!(keys, vec!["Rust"]);
195    }
196}