pmcp_workbook_runtime/bundle_source.rs
1//! The dumb-byte [`BundleSource`] trait + its local-dir and embedded impls
2//! (Phase 92, Plan 01 — WBSV-09, WBSV-08 boundary).
3//!
4//! A [`BundleSource`] exposes ONLY raw-byte access to a bundle's members:
5//! [`BundleSource::read_artifact`] (one member's exact bytes) and
6//! [`BundleSource::list_artifacts`] (the sorted member set). It deliberately
7//! CANNOT return a parsed bundle — no source impl can pre-parse and thereby
8//! skip the integrity gate. The single shared
9//! [`crate::bundle_loader::load`] is the ONLY parse+verify path (WBSV-08, the
10//! type-level bypass impossibility, threat T-92-03).
11//!
12//! Two impls ship:
13//!
14//! - [`LocalDirSource`] — reads a bundle from a directory tree on disk. One
15//! source = one bundle@version (D-08); the constructor takes the bundle root.
16//! - [`EmbeddedSource`] (behind the `embedded` feature) — reads the SAME bundle
17//! baked into the binary via an [`include_dir::Dir`] (WBSV-09). The two return
18//! identical bytes for the same member, so the loader verifies them identically.
19//!
20//! The trait is SYNC (no `async_trait`, D-07): a byte accessor has no I/O
21//! concurrency need on the boot path, and a sync trait stays object-safe +
22//! `Send + Sync` without an executor.
23
24use std::path::{Path, PathBuf};
25
26use thiserror::Error;
27
28/// Raw-byte access to a single bundle's members.
29///
30/// This trait is the WBSV-08 boundary (threat T-92-03): it exposes ONLY bytes,
31/// never a parsed bundle, so no impl can bypass the shared
32/// [`crate::bundle_loader::load`] integrity gate. It is `Send + Sync` so the
33/// served binary can hold an `Arc<dyn BundleSource>` across handler tasks, and
34/// it is object-safe (both methods take `&self`, no generics) so `Box<dyn
35/// BundleSource>` works.
36///
37/// # Example
38///
39/// A minimal in-memory source — the doctest defines a LOCAL dummy impl, never a
40/// downstream crate, so there is no circular doctest dependency.
41///
42/// ```
43/// use pmcp_workbook_runtime::{BundleSource, BundleSourceError};
44///
45/// struct OneMember;
46///
47/// impl BundleSource for OneMember {
48/// fn read_artifact(&self, name: &str) -> Result<Vec<u8>, BundleSourceError> {
49/// if name == "manifest.json" {
50/// Ok(b"{}".to_vec())
51/// } else {
52/// Err(BundleSourceError::NotFound { member: name.to_string() })
53/// }
54/// }
55/// fn list_artifacts(&self) -> Result<Vec<String>, BundleSourceError> {
56/// Ok(vec!["manifest.json".to_string()])
57/// }
58/// }
59///
60/// let src = OneMember;
61/// assert_eq!(src.read_artifact("manifest.json").unwrap(), b"{}");
62/// assert!(src.read_artifact("missing.json").is_err());
63/// ```
64pub trait BundleSource: Send + Sync {
65 /// Return the EXACT bytes of the member named `name` (a bundle-relative
66 /// path such as `"manifest.json"` or `"evidence/changelog.json"`).
67 ///
68 /// # Errors
69 ///
70 /// Returns [`BundleSourceError::NotFound`] when no such member exists, or
71 /// [`BundleSourceError::Io`] when the underlying read fails.
72 fn read_artifact(&self, name: &str) -> Result<Vec<u8>, BundleSourceError>;
73
74 /// Return the SORTED list of every member's bundle-relative path
75 /// (including nested members like `"evidence/changelog.json"`).
76 ///
77 /// The loader uses this to enforce its fail-closed membership policy, so
78 /// the list MUST be complete and sorted for a stable diagnostic.
79 ///
80 /// # Errors
81 ///
82 /// Returns [`BundleSourceError::Io`] when the member set cannot be
83 /// enumerated.
84 fn list_artifacts(&self) -> Result<Vec<String>, BundleSourceError>;
85}
86
87/// Errors a [`BundleSource`] may surface.
88///
89/// `#[non_exhaustive]` so future source kinds (S3, registry — the documented
90/// extension seam) can add failure modes additively without a semver break.
91#[derive(Debug, Error)]
92#[non_exhaustive]
93pub enum BundleSourceError {
94 /// The underlying byte read or directory walk failed.
95 #[error("bundle source I/O error: {0}")]
96 Io(String),
97
98 /// The requested member does not exist in this bundle.
99 #[error("bundle member not found: {member}")]
100 NotFound {
101 /// The bundle-relative member path that was requested.
102 member: String,
103 },
104}
105
106/// A [`BundleSource`] that reads a bundle from a directory tree on disk.
107///
108/// One `LocalDirSource` wraps ONE bundle root = one bundle@version (D-08); the
109/// member name is joined onto the root to read bytes. `list_artifacts` walks
110/// the tree recursively and returns sorted bundle-relative paths.
111#[derive(Debug, Clone)]
112pub struct LocalDirSource {
113 root: PathBuf,
114}
115
116impl LocalDirSource {
117 /// Wrap the bundle directory rooted at `path`.
118 pub fn new(path: impl Into<PathBuf>) -> Self {
119 Self { root: path.into() }
120 }
121
122 /// Recursively collect bundle-relative member paths under `dir`, pushing
123 /// each into `out` with `/`-normalized separators relative to the root.
124 fn collect_members(&self, dir: &Path, out: &mut Vec<String>) -> Result<(), BundleSourceError> {
125 let entries = std::fs::read_dir(dir).map_err(|e| BundleSourceError::Io(e.to_string()))?;
126 for entry in entries {
127 let entry = entry.map_err(|e| BundleSourceError::Io(e.to_string()))?;
128 let path = entry.path();
129 let file_type = entry
130 .file_type()
131 .map_err(|e| BundleSourceError::Io(e.to_string()))?;
132 if file_type.is_dir() {
133 self.collect_members(&path, out)?;
134 } else {
135 let rel = path.strip_prefix(&self.root).map_err(|_| {
136 BundleSourceError::Io(format!(
137 "member {} is not under bundle root {}",
138 path.display(),
139 self.root.display()
140 ))
141 })?;
142 // Normalize to forward slashes so the member path matches the
143 // loader's allow-set regardless of host path separator.
144 let normalized = rel
145 .components()
146 .map(|c| c.as_os_str().to_string_lossy().into_owned())
147 .collect::<Vec<_>>()
148 .join("/");
149 out.push(normalized);
150 }
151 }
152 Ok(())
153 }
154}
155
156impl BundleSource for LocalDirSource {
157 fn read_artifact(&self, name: &str) -> Result<Vec<u8>, BundleSourceError> {
158 let path = self.root.join(name);
159 match std::fs::read(&path) {
160 Ok(bytes) => Ok(bytes),
161 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
162 Err(BundleSourceError::NotFound {
163 member: name.to_string(),
164 })
165 },
166 Err(e) => Err(BundleSourceError::Io(e.to_string())),
167 }
168 }
169
170 fn list_artifacts(&self) -> Result<Vec<String>, BundleSourceError> {
171 let mut out = Vec::new();
172 self.collect_members(&self.root, &mut out)?;
173 out.sort();
174 Ok(out)
175 }
176}
177
178/// A [`BundleSource`] that reads a bundle baked into the binary via
179/// [`include_dir::Dir`] (WBSV-09), gated behind the `embedded` feature.
180///
181/// Downstream callers construct it from an `include_dir!` macro over a committed
182/// bundle directory and wrap that static `Dir`:
183///
184/// ```ignore
185/// use include_dir::{include_dir, Dir};
186/// use pmcp_workbook_runtime::EmbeddedSource;
187///
188/// static BUNDLE: Dir = include_dir!("$CARGO_MANIFEST_DIR/bundle");
189/// let source = EmbeddedSource::new(&BUNDLE);
190/// ```
191///
192/// It returns the SAME bytes [`LocalDirSource`] does for the same member, so the
193/// shared loader integrity-checks an embedded bundle identically to an on-disk
194/// one.
195#[cfg(feature = "embedded")]
196#[derive(Debug, Clone)]
197pub struct EmbeddedSource {
198 dir: &'static include_dir::Dir<'static>,
199}
200
201#[cfg(feature = "embedded")]
202impl EmbeddedSource {
203 /// Wrap a `'static` [`include_dir::Dir`] produced by the `include_dir!`
204 /// macro over a committed bundle directory.
205 pub fn new(dir: &'static include_dir::Dir<'static>) -> Self {
206 Self { dir }
207 }
208
209 /// Recursively collect member paths from an embedded [`include_dir::Dir`].
210 fn collect(dir: &include_dir::Dir<'static>, out: &mut Vec<String>) {
211 for file in dir.files() {
212 out.push(file.path().to_string_lossy().replace('\\', "/"));
213 }
214 for sub in dir.dirs() {
215 Self::collect(sub, out);
216 }
217 }
218}
219
220#[cfg(feature = "embedded")]
221impl BundleSource for EmbeddedSource {
222 fn read_artifact(&self, name: &str) -> Result<Vec<u8>, BundleSourceError> {
223 self.dir.get_file(name).map_or_else(
224 || {
225 Err(BundleSourceError::NotFound {
226 member: name.to_string(),
227 })
228 },
229 |file| Ok(file.contents().to_vec()),
230 )
231 }
232
233 fn list_artifacts(&self) -> Result<Vec<String>, BundleSourceError> {
234 let mut out = Vec::new();
235 Self::collect(self.dir, &mut out);
236 out.sort();
237 Ok(out)
238 }
239}
240
241#[cfg(test)]
242mod tests {
243 use super::*;
244
245 /// Object-safety + auto-trait assertion: `Box<dyn BundleSource>` must be
246 /// `Send + Sync` so the served binary can share an `Arc<dyn BundleSource>`.
247 fn assert_send_sync<T: Send + Sync>() {}
248
249 #[test]
250 fn bundle_source_trait_object_is_send_sync() {
251 assert_send_sync::<Box<dyn BundleSource>>();
252 }
253
254 /// A self-cleaning unique temp directory (no `tempfile` dependency — the
255 /// runtime crate stays lean; Drop removes the tree).
256 struct TempBundle {
257 path: PathBuf,
258 }
259
260 impl TempBundle {
261 fn new(tag: &str) -> Self {
262 use std::sync::atomic::{AtomicU64, Ordering};
263 static COUNTER: AtomicU64 = AtomicU64::new(0);
264 let n = COUNTER.fetch_add(1, Ordering::Relaxed);
265 let pid = std::process::id();
266 let path = std::env::temp_dir().join(format!("pwr-bundle-src-{tag}-{pid}-{n}"));
267 std::fs::create_dir_all(path.join("evidence")).unwrap();
268 std::fs::write(path.join("manifest.json"), b"{\"manifest\":true}").unwrap();
269 std::fs::write(path.join("BUNDLE.lock"), b"{\"lock\":1}").unwrap();
270 std::fs::write(path.join("evidence/changelog.json"), b"{\"changelog\":[]}").unwrap();
271 Self { path }
272 }
273 }
274
275 impl Drop for TempBundle {
276 fn drop(&mut self) {
277 let _ = std::fs::remove_dir_all(&self.path);
278 }
279 }
280
281 #[test]
282 fn local_dir_source_reads_exact_bytes() {
283 let bundle = TempBundle::new("read");
284 let src = LocalDirSource::new(&bundle.path);
285 assert_eq!(
286 src.read_artifact("manifest.json").unwrap(),
287 b"{\"manifest\":true}"
288 );
289 assert_eq!(
290 src.read_artifact("evidence/changelog.json").unwrap(),
291 b"{\"changelog\":[]}"
292 );
293 }
294
295 #[test]
296 fn local_dir_source_lists_sorted_relative_paths_including_nested() {
297 let bundle = TempBundle::new("list");
298 let src = LocalDirSource::new(&bundle.path);
299 let members = src.list_artifacts().unwrap();
300 assert_eq!(
301 members,
302 vec![
303 "BUNDLE.lock".to_string(),
304 "evidence/changelog.json".to_string(),
305 "manifest.json".to_string(),
306 ],
307 "members are sorted and include the nested evidence path"
308 );
309 }
310
311 #[test]
312 fn local_dir_source_missing_member_returns_not_found_not_panic() {
313 let bundle = TempBundle::new("missing");
314 let src = LocalDirSource::new(&bundle.path);
315 match src.read_artifact("does_not_exist.json") {
316 Err(BundleSourceError::NotFound { member }) => {
317 assert_eq!(member, "does_not_exist.json");
318 },
319 other => panic!("expected NotFound, got {other:?}"),
320 }
321 }
322
323 #[test]
324 fn not_found_display_names_the_member() {
325 let err = BundleSourceError::NotFound {
326 member: "layout.json".to_string(),
327 };
328 assert!(format!("{err}").contains("layout.json"));
329 }
330
331 /// EmbeddedSource over a baked-in tree returns the SAME bytes LocalDirSource
332 /// does for the same member (WBSV-09 parity). The embedded tree is the
333 /// committed fixture under `tests/fixtures/embedded_bundle`.
334 #[cfg(feature = "embedded")]
335 #[test]
336 fn embedded_source_matches_local_dir_bytes() {
337 use include_dir::{include_dir, Dir};
338 static FIXTURE: Dir = include_dir!("$CARGO_MANIFEST_DIR/tests/fixtures/embedded_bundle");
339 let embedded = EmbeddedSource::new(&FIXTURE);
340
341 let manifest_root = concat!(
342 env!("CARGO_MANIFEST_DIR"),
343 "/tests/fixtures/embedded_bundle"
344 );
345 let local = LocalDirSource::new(manifest_root);
346
347 for member in ["manifest.json", "evidence/changelog.json"] {
348 assert_eq!(
349 embedded.read_artifact(member).unwrap(),
350 local.read_artifact(member).unwrap(),
351 "embedded and local-dir bytes must match for {member}"
352 );
353 }
354 // list_artifacts agrees on the member set.
355 assert_eq!(
356 embedded.list_artifacts().unwrap(),
357 local.list_artifacts().unwrap(),
358 "embedded and local-dir member sets must match"
359 );
360 }
361}