1use std::collections::{BTreeMap, BTreeSet};
29use std::fmt::Write as _;
30use std::io::Read as _;
31use std::path::{Component, Path};
32
33use serde::{Deserialize, Serialize};
34use serde_norway::Value;
35use sha2::{Digest, Sha256};
36
37use crate::parser;
38use crate::store::{self, Store};
39use crate::write_atomic;
40
41pub const MANIFEST_FILE: &str = "assets.jsonl";
43
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
51pub struct AssetRecord {
52 pub path: String,
55 pub sha256: String,
58 pub bytes: u64,
60 pub media_type: String,
62 pub wrappers: Vec<String>,
65 pub required: bool,
68}
69
70#[derive(Debug, Clone, PartialEq, Eq)]
72pub struct Declaration {
73 pub path: String,
75 pub required: bool,
78}
79
80#[derive(Debug, Serialize)]
86pub struct ScanReport {
87 pub manifest: String,
88 pub cataloged: usize,
89 pub hashed: usize,
90 pub preserved: usize,
91 pub bytes: u64,
92 pub wrote: bool,
93 pub dry_run: bool,
94 pub warnings: Vec<String>,
95 pub untracked: Vec<String>,
96}
97
98#[derive(Debug, Serialize)]
100pub struct AssetState {
101 pub path: String,
102 pub sha256: String,
103 pub bytes: u64,
104 pub required: bool,
105 pub state: String,
107}
108
109#[derive(Debug, Serialize)]
111pub struct StatusReport {
112 pub total: usize,
113 pub present: usize,
114 pub missing: usize,
115 pub required_missing: usize,
116 pub optional_missing: usize,
117 pub bytes_total: u64,
118 pub bytes_missing: u64,
119 pub assets: Vec<AssetState>,
120}
121
122#[derive(Debug, Serialize)]
124pub struct VerifyReport {
125 pub mode: String,
126 pub checked: usize,
127 pub ok: usize,
128 pub missing: Vec<String>,
129 pub corrupt: Vec<String>,
130 pub complete: bool,
131}
132
133pub fn read_manifest(store: &Store) -> crate::Result<Vec<AssetRecord>> {
142 let abs = store.root.join(MANIFEST_FILE);
143 if !abs.exists() {
144 return Ok(Vec::new());
145 }
146 let text = std::fs::read_to_string(&abs)?;
147 let mut by_path: BTreeMap<String, AssetRecord> = BTreeMap::new();
148 for (i, line) in text.lines().enumerate() {
149 if line.trim().is_empty() {
150 continue;
151 }
152 let rec: AssetRecord = serde_json::from_str(line).map_err(|e| {
153 std::io::Error::new(
154 std::io::ErrorKind::InvalidData,
155 format!("{MANIFEST_FILE} line {}: {e}", i + 1),
156 )
157 })?;
158 by_path.insert(rec.path.clone(), rec);
159 }
160 Ok(by_path.into_values().collect())
161}
162
163pub fn write_manifest(store: &Store, records: &[AssetRecord]) -> crate::Result<()> {
166 let abs = store.root.join(MANIFEST_FILE);
167 if records.is_empty() {
168 if abs.exists() {
169 std::fs::remove_file(&abs)?;
170 }
171 return Ok(());
172 }
173 let mut sorted = records.to_vec();
174 sorted.sort_by(|a, b| a.path.cmp(&b.path));
175 let mut out = String::new();
176 for rec in &sorted {
177 let line = serde_json::to_string(rec).expect("AssetRecord serializes");
178 out.push_str(&line);
179 out.push('\n');
180 }
181 write_atomic(&abs, out.as_bytes())?;
182 Ok(())
183}
184
185pub fn scan(store: &Store, dry_run: bool, untracked: bool) -> crate::Result<ScanReport> {
198 let existing_by_path: BTreeMap<String, AssetRecord> = read_manifest(store)
202 .unwrap_or_default()
203 .into_iter()
204 .map(|r| (r.path.clone(), r))
205 .collect();
206
207 let mut wrappers_by_path: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
209 let mut required_by_path: BTreeMap<String, bool> = BTreeMap::new();
210 let mut declared_paths: BTreeSet<String> = BTreeSet::new();
211 let mut warnings: Vec<String> = Vec::new();
212
213 for rel in store.walk()? {
214 let abs = store.abs_path(&rel);
215 let (fm, _body) = match parser::read_file(&abs) {
216 Ok(v) => v,
217 Err(_) => continue, };
219 let wrapper = rel_to_string(&rel);
220 for decl in declared_assets(&fm) {
221 let norm = match normalize_asset_path(&decl.path) {
222 Ok(n) => n,
223 Err(e) => {
224 warnings.push(format!("{wrapper}: {e}"));
225 continue;
226 }
227 };
228 if is_markdown(&norm) {
229 warnings.push(format!(
230 "{wrapper}: asset path points at a markdown content file ({norm}); skipped"
231 ));
232 continue;
233 }
234 wrappers_by_path
235 .entry(norm.clone())
236 .or_default()
237 .insert(wrapper.clone());
238 let req = required_by_path.entry(norm.clone()).or_insert(false);
239 *req = *req || decl.required;
240 declared_paths.insert(norm);
241 }
242 }
243
244 let mut records: Vec<AssetRecord> = Vec::new();
246 let mut hashed = 0usize;
247 let mut preserved = 0usize;
248 for (path, wrappers) in &wrappers_by_path {
249 let required = *required_by_path.get(path).unwrap_or(&true);
250 let wrappers: Vec<String> = wrappers.iter().cloned().collect();
251
252 let abs = match store::ensure_path_within_store(&store.root, &store.root.join(path)) {
254 Ok(p) => p,
255 Err(_) => {
256 warnings.push(format!("{path}: escapes the store root; skipped"));
257 continue;
258 }
259 };
260
261 if abs.is_dir() {
262 warnings.push(format!("{path}: is a directory, not a file; skipped"));
263 continue;
264 }
265 if abs.is_file() {
266 let (sha256, bytes) = sha256_file(&abs)?;
267 records.push(AssetRecord {
268 path: path.clone(),
269 sha256,
270 bytes,
271 media_type: media_type_for(path),
272 wrappers,
273 required,
274 });
275 hashed += 1;
276 } else if let Some(prev) = existing_by_path.get(path) {
277 records.push(AssetRecord {
280 path: path.clone(),
281 sha256: prev.sha256.clone(),
282 bytes: prev.bytes,
283 media_type: media_type_for(path),
284 wrappers,
285 required,
286 });
287 preserved += 1;
288 } else {
289 warnings.push(format!(
290 "{path}: declared but absent and never cataloged; cannot hash (skipped)"
291 ));
292 }
293 }
294 records.sort_by(|a, b| a.path.cmp(&b.path));
295
296 let bytes: u64 = records.iter().map(|r| r.bytes).sum();
297 let cataloged = records.len();
298
299 let untracked_list = if untracked {
300 find_untracked(store, &declared_paths)?
301 } else {
302 Vec::new()
303 };
304
305 let mut wrote = false;
307 if !dry_run {
308 let current = read_manifest(store).unwrap_or_default();
309 if current != records {
310 write_manifest(store, &records)?;
311 wrote = true;
312 }
313 }
314
315 Ok(ScanReport {
316 manifest: MANIFEST_FILE.to_string(),
317 cataloged,
318 hashed,
319 preserved,
320 bytes,
321 wrote,
322 dry_run,
323 warnings,
324 untracked: untracked_list,
325 })
326}
327
328pub fn verify(store: &Store, include_optional: bool, quick: bool) -> crate::Result<VerifyReport> {
338 let records = read_manifest(store)?;
339 let mut missing = Vec::new();
340 let mut corrupt = Vec::new();
341 let mut checked = 0usize;
342
343 for rec in &records {
344 if !rec.required && !include_optional {
345 continue;
346 }
347 checked += 1;
348 let abs = match store::ensure_path_within_store(&store.root, &store.root.join(&rec.path)) {
349 Ok(p) => p,
350 Err(_) => {
351 corrupt.push(rec.path.clone());
353 continue;
354 }
355 };
356 if !abs.is_file() {
357 missing.push(rec.path.clone());
358 continue;
359 }
360 if quick {
361 let len = std::fs::metadata(&abs)?.len();
362 if len != rec.bytes {
363 corrupt.push(rec.path.clone());
364 }
365 } else {
366 let (sha, bytes) = sha256_file(&abs)?;
367 if sha != rec.sha256 || bytes != rec.bytes {
368 corrupt.push(rec.path.clone());
369 }
370 }
371 }
372
373 let ok = checked - missing.len() - corrupt.len();
374 let complete = missing.is_empty() && corrupt.is_empty();
375 Ok(VerifyReport {
376 mode: if quick { "quick" } else { "deep" }.to_string(),
377 checked,
378 ok,
379 missing,
380 corrupt,
381 complete,
382 })
383}
384
385pub fn status(store: &Store) -> crate::Result<StatusReport> {
393 let records = read_manifest(store)?;
394 let mut present = 0usize;
395 let mut missing = 0usize;
396 let mut required_missing = 0usize;
397 let mut optional_missing = 0usize;
398 let mut bytes_total = 0u64;
399 let mut bytes_missing = 0u64;
400 let mut assets = Vec::with_capacity(records.len());
401
402 for rec in &records {
403 bytes_total += rec.bytes;
404 let is_present = store.root.join(&rec.path).is_file();
405 let state = if is_present {
406 present += 1;
407 "present"
408 } else {
409 missing += 1;
410 bytes_missing += rec.bytes;
411 if rec.required {
412 required_missing += 1;
413 } else {
414 optional_missing += 1;
415 }
416 "missing"
417 };
418 assets.push(AssetState {
419 path: rec.path.clone(),
420 sha256: rec.sha256.clone(),
421 bytes: rec.bytes,
422 required: rec.required,
423 state: state.to_string(),
424 });
425 }
426
427 Ok(StatusReport {
428 total: records.len(),
429 present,
430 missing,
431 required_missing,
432 optional_missing,
433 bytes_total,
434 bytes_missing,
435 assets,
436 })
437}
438
439pub fn paths(store: &Store) -> crate::Result<Vec<String>> {
447 Ok(read_manifest(store)?.into_iter().map(|r| r.path).collect())
448}
449
450pub fn declared_assets(fm: &parser::Frontmatter) -> Vec<Declaration> {
460 let mut out = Vec::new();
461 if let Some(v) = fm.get("asset") {
462 collect_declarations(&v, &mut out);
463 }
464 if let Some(v) = fm.get("assets") {
465 collect_declarations(&v, &mut out);
466 }
467 out
468}
469
470pub fn declarations_from_yaml_map(map: &BTreeMap<String, Value>) -> Vec<Declaration> {
474 let mut out = Vec::new();
475 if let Some(v) = map.get("asset") {
476 collect_declarations(v, &mut out);
477 }
478 if let Some(v) = map.get("assets") {
479 collect_declarations(v, &mut out);
480 }
481 out
482}
483
484fn collect_declarations(v: &Value, out: &mut Vec<Declaration>) {
485 match v {
486 Value::String(s) => out.push(Declaration {
487 path: s.clone(),
488 required: true,
489 }),
490 Value::Sequence(items) => {
491 for item in items {
492 match item {
493 Value::String(s) => out.push(Declaration {
494 path: s.clone(),
495 required: true,
496 }),
497 Value::Mapping(m) => {
498 let path = m
499 .get(Value::String("path".to_string()))
500 .and_then(|x| x.as_str())
501 .map(|s| s.to_string());
502 if let Some(path) = path {
503 let required = m
504 .get(Value::String("required".to_string()))
505 .and_then(|x| x.as_bool())
506 .unwrap_or(true);
507 out.push(Declaration { path, required });
508 }
509 }
510 _ => {}
511 }
512 }
513 }
514 _ => {}
515 }
516}
517
518pub fn normalize_asset_path(raw: &str) -> Result<String, String> {
527 let trimmed = raw.trim();
528 if trimmed.is_empty() {
529 return Err("empty asset path".to_string());
530 }
531 let p = Path::new(trimmed);
532 if p.is_absolute() {
533 return Err(format!("absolute asset path not allowed: {raw}"));
534 }
535 for c in p.components() {
536 match c {
537 Component::ParentDir => return Err(format!("`..` not allowed in asset path: {raw}")),
538 Component::Prefix(_) | Component::RootDir => {
539 return Err(format!("asset path escapes the store: {raw}"))
540 }
541 _ => {}
542 }
543 }
544 Ok(trimmed.replace('\\', "/").trim_end_matches('/').to_string())
545}
546
547fn is_markdown(path: &str) -> bool {
548 Path::new(path)
549 .extension()
550 .and_then(|e| e.to_str())
551 .map(|e| e.eq_ignore_ascii_case("md"))
552 .unwrap_or(false)
553}
554
555fn rel_to_string(p: &Path) -> String {
556 p.to_string_lossy().replace('\\', "/")
557}
558
559fn sha256_file(abs: &Path) -> std::io::Result<(String, u64)> {
562 let mut f = std::fs::File::open(abs)?;
563 let mut hasher = Sha256::new();
564 let mut buf = [0u8; 65536];
565 let mut total: u64 = 0;
566 loop {
567 let n = f.read(&mut buf)?;
568 if n == 0 {
569 break;
570 }
571 hasher.update(&buf[..n]);
572 total += n as u64;
573 }
574 let digest = hasher.finalize();
575 let mut hex = String::with_capacity(64);
576 for b in digest.iter() {
577 let _ = write!(hex, "{b:02x}");
578 }
579 Ok((hex, total))
580}
581
582fn media_type_for(path: &str) -> String {
586 let ext = Path::new(path)
587 .extension()
588 .and_then(|e| e.to_str())
589 .unwrap_or("")
590 .to_ascii_lowercase();
591 let mt = match ext.as_str() {
592 "pdf" => "application/pdf",
593 "png" => "image/png",
594 "jpg" | "jpeg" => "image/jpeg",
595 "gif" => "image/gif",
596 "webp" => "image/webp",
597 "svg" => "image/svg+xml",
598 "tiff" | "tif" => "image/tiff",
599 "mp4" => "video/mp4",
600 "mov" => "video/quicktime",
601 "webm" => "video/webm",
602 "mkv" => "video/x-matroska",
603 "mp3" => "audio/mpeg",
604 "wav" => "audio/wav",
605 "m4a" => "audio/mp4",
606 "flac" => "audio/flac",
607 "zip" => "application/zip",
608 "gz" | "tgz" => "application/gzip",
609 "tar" => "application/x-tar",
610 "csv" => "text/csv",
611 "tsv" => "text/tab-separated-values",
612 "json" => "application/json",
613 "xml" => "application/xml",
614 "txt" => "text/plain",
615 "vtt" => "text/vtt",
616 "srt" => "application/x-subrip",
617 "html" | "htm" => "text/html",
618 "epub" => "application/epub+zip",
619 "docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
620 "xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
621 "pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation",
622 "doc" => "application/msword",
623 "xls" => "application/vnd.ms-excel",
624 "ppt" => "application/vnd.ms-powerpoint",
625 _ => "application/octet-stream",
626 };
627 mt.to_string()
628}
629
630fn find_untracked(store: &Store, declared: &BTreeSet<String>) -> crate::Result<Vec<String>> {
634 let sources = store.root.join("sources");
635 if !sources.is_dir() {
636 return Ok(Vec::new());
637 }
638 let mut out = Vec::new();
639 for entry in walkdir::WalkDir::new(&sources)
640 .into_iter()
641 .filter_entry(|e| !is_hidden(e.file_name().to_str().unwrap_or("")))
642 {
643 let entry = match entry {
644 Ok(e) => e,
645 Err(_) => continue,
646 };
647 if !entry.file_type().is_file() {
648 continue;
649 }
650 let name = entry.file_name().to_str().unwrap_or("");
651 if is_markdown(name) || name == "index.jsonl" {
652 continue;
653 }
654 let rel = match entry.path().strip_prefix(&store.root) {
655 Ok(r) => rel_to_string(r),
656 Err(_) => continue,
657 };
658 if !declared.contains(&rel) {
659 out.push(rel);
660 }
661 }
662 out.sort();
663 Ok(out)
664}
665
666fn is_hidden(name: &str) -> bool {
667 name.starts_with('.') && name != "." && name != ".."
668}