1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use musefs_db::convert::usize_from;
5use musefs_db::{Db, Format, NewArt, NewTrack, Tag, TrackArt};
6use musefs_format::{EmbeddedBinaryTag, EmbeddedPicture, Extent, flac, mp3, mp4, ogg, wav};
7
8use crate::byte_budget::ByteBudget;
9use crate::error::Result;
10use crate::freshness::BackingStamp;
11use std::sync::mpsc::sync_channel;
12
13const BATCH_FILES: usize = 256;
14const BATCH_BYTES: u64 = 64 << 20; const WINDOW: usize = 1 << 20; const MAX_WIDEN_RETRIES: usize = 8;
21pub(crate) const MAX_PROBE_BYTES: u64 = 64 << 20; pub(crate) const MAX_ART_BYTES: usize = 16 * 1024 * 1024 - 64 * 1024;
33
34const MAX_BINARY_TAG_BYTES: usize = MAX_ART_BYTES;
37
38#[derive(Debug)]
43enum ProbeOutcome {
44 Probed(Probed, BackingStamp),
45 Unparseable,
46 Raced,
47}
48
49#[cfg(test)]
50thread_local! {
51 static AFTER_S1_HOOK: std::cell::RefCell<Option<Box<dyn FnMut()>>> =
52 const { std::cell::RefCell::new(None) };
53}
54#[cfg(test)]
55fn fire_after_s1() {
56 AFTER_S1_HOOK.with(|h| {
57 if let Some(f) = h.borrow_mut().as_mut() {
58 f();
59 }
60 });
61}
62#[cfg(test)]
63fn set_after_s1_hook(f: impl FnMut() + 'static) {
64 AFTER_S1_HOOK.with(|h| *h.borrow_mut() = Some(Box::new(f)));
65}
66#[cfg(test)]
67fn clear_after_s1_hook() {
68 AFTER_S1_HOOK.with(|h| *h.borrow_mut() = None);
69}
70
71#[derive(Debug, Clone, PartialEq, Eq)]
72pub struct ScanStats {
73 pub scanned: u64,
74 pub skipped: u64,
75 pub failed: u64,
76 pub raced: u64,
77}
78
79#[derive(Debug, Clone, PartialEq, Eq)]
80pub struct RevalidateStats {
81 pub updated: u64,
82 pub unchanged: u64,
83 pub pruned: u64,
84 pub failed: u64,
85 pub raced: u64,
86}
87
88fn has_ext(path: &Path, ext: &str) -> bool {
89 path.extension()
90 .and_then(|e| e.to_str())
91 .is_some_and(|e| e.eq_ignore_ascii_case(ext))
92}
93
94fn is_supported_audio(path: &Path) -> bool {
96 has_ext(path, "flac")
97 || has_ext(path, "mp3")
98 || has_ext(path, "m4a")
99 || has_ext(path, "m4b")
100 || has_ext(path, "ogg")
101 || has_ext(path, "oga")
102 || has_ext(path, "opus")
103 || has_ext(path, "wav")
104}
105
106fn collect_audio(
107 root: &Path,
108 out: &mut Vec<PathBuf>,
109 follow_symlinks: bool,
110) -> std::io::Result<u64> {
111 let mut visited = HashSet::new();
112 let mut files_visited = HashSet::new();
113 let mut skipped = 0u64;
114 if follow_symlinks {
115 if let Ok(meta) = std::fs::metadata(root) {
118 visited.insert(dir_key(&meta));
119 }
120 }
121 collect_audio_inner(
122 root,
123 out,
124 follow_symlinks,
125 &mut visited,
126 &mut files_visited,
127 &mut skipped,
128 )?;
129 Ok(skipped)
130}
131
132fn collect_audio_inner(
133 root: &Path,
134 out: &mut Vec<PathBuf>,
135 follow_symlinks: bool,
136 visited: &mut HashSet<(u64, u64)>,
137 files_visited: &mut HashSet<(u64, u64)>,
138 skipped: &mut u64,
139) -> std::io::Result<()> {
140 for entry in std::fs::read_dir(root)? {
141 let entry = entry?;
142 let path = entry.path();
143 let ftype = entry.file_type()?;
144 if ftype.is_dir() {
145 descend(&path, out, follow_symlinks, visited, files_visited, skipped)?;
146 } else if ftype.is_file() {
147 if is_supported_audio(&path) {
148 push_file(&path, out, follow_symlinks, files_visited, None);
149 } else {
150 *skipped += 1;
151 }
152 } else if ftype.is_symlink() {
153 if !follow_symlinks {
154 log::warn!(
155 "skipping symlink {} (pass --follow-symlinks to scan it)",
156 path.display()
157 );
158 continue;
159 }
160 match std::fs::metadata(&path) {
161 Ok(meta) if meta.is_dir() => {
162 descend(&path, out, follow_symlinks, visited, files_visited, skipped)?;
163 }
164 Ok(meta) if meta.is_file() => {
165 if is_supported_audio(&path) {
166 push_file(&path, out, follow_symlinks, files_visited, Some(&meta));
167 } else {
168 *skipped += 1;
169 }
170 }
171 Ok(_) => {}
172 Err(e) => {
173 log::warn!("skipping broken symlink {}: {e}", path.display());
174 }
175 }
176 }
177 }
178 Ok(())
179}
180
181fn descend(
182 path: &Path,
183 out: &mut Vec<PathBuf>,
184 follow_symlinks: bool,
185 visited: &mut HashSet<(u64, u64)>,
186 files_visited: &mut HashSet<(u64, u64)>,
187 skipped: &mut u64,
188) -> std::io::Result<()> {
189 if !follow_symlinks {
190 return collect_audio_inner(path, out, follow_symlinks, visited, files_visited, skipped);
191 }
192 let meta = match std::fs::metadata(path) {
193 Ok(m) => m,
194 Err(e) => {
195 log::warn!("skipping directory {}: {e}", path.display());
196 return Ok(());
197 }
198 };
199 if !visited.insert(dir_key(&meta)) {
200 log::warn!("skipping symlink cycle at {}", path.display());
201 return Ok(());
202 }
203 collect_audio_inner(path, out, follow_symlinks, visited, files_visited, skipped)
204}
205
206fn dir_key(meta: &std::fs::Metadata) -> (u64, u64) {
207 use std::os::unix::fs::MetadataExt;
208 (meta.dev(), meta.ino())
209}
210
211fn push_file(
219 path: &Path,
220 out: &mut Vec<PathBuf>,
221 follow_symlinks: bool,
222 files_visited: &mut HashSet<(u64, u64)>,
223 known_meta: Option<&std::fs::Metadata>,
224) {
225 if !follow_symlinks {
226 out.push(path.to_path_buf());
227 return;
228 }
229 let key = match known_meta {
230 Some(m) => Some(dir_key(m)),
231 None => std::fs::metadata(path).ok().map(|m| dir_key(&m)),
232 };
233 match key {
234 Some(k) if !files_visited.insert(k) => {
235 log::debug!("skipping duplicate backing target {}", path.display());
236 }
237 _ => out.push(path.to_path_buf()),
238 }
239}
240
241#[derive(Debug)]
244pub(crate) struct Probed {
245 format: Format,
246 audio_offset: u64,
247 audio_length: u64,
248 tags: Vec<(String, String)>,
249 pictures: Vec<EmbeddedPicture>,
250 binary_tags: Vec<EmbeddedBinaryTag>,
251 structural_blocks: Vec<(String, Vec<u8>)>,
253}
254
255fn wav_probed(prefix: &[u8], bounds: &wav::WavBounds) -> Probed {
258 let (binary_tags, promoted) = wav::read_binary_tags(prefix);
259 let mut tags = wav::read_tags(prefix);
260 tags.extend(promoted);
261 Probed {
262 format: Format::Wav,
263 audio_offset: bounds.audio_offset,
264 audio_length: bounds.audio_length,
265 tags,
266 pictures: wav::read_pictures(prefix),
267 binary_tags,
268 structural_blocks: Vec::new(),
269 }
270}
271
272pub(crate) fn probe_full(path: &Path, bytes: &[u8]) -> Option<Probed> {
275 if has_ext(path, "flac") {
276 let scan = flac::locate_audio(bytes).ok()?;
277 let (structural_blocks, binary_tags) = flac::split_preserved(&scan.preserved);
278 Some(Probed {
279 format: Format::Flac,
280 audio_offset: scan.audio_offset,
281 audio_length: scan.audio_length,
282 tags: flac::read_vorbis_comments(bytes).unwrap_or_default(),
283 pictures: flac::read_pictures(bytes).unwrap_or_default(),
284 binary_tags,
285 structural_blocks,
286 })
287 } else if has_ext(path, "mp3") {
288 let bounds = mp3::locate_audio(bytes).ok()?;
289 let (binary_tags, promoted) = mp3::read_binary_tags(bytes);
290 let mut tags = mp3::read_tags(bytes);
291 tags.extend(promoted);
292 Some(Probed {
293 format: Format::Mp3,
294 audio_offset: bounds.audio_offset,
295 audio_length: bounds.audio_length,
296 tags,
297 pictures: mp3::read_pictures(bytes),
298 binary_tags,
299 structural_blocks: Vec::new(),
300 })
301 } else if has_ext(path, "m4a") || has_ext(path, "m4b") {
302 let bounds = mp4::locate_audio(bytes).ok()?;
303 Some(Probed {
304 format: Format::M4a,
305 audio_offset: bounds.audio_offset,
306 audio_length: bounds.audio_length,
307 tags: mp4::read_tags(bytes),
308 pictures: mp4::read_pictures(bytes, MAX_ART_BYTES),
309 binary_tags: mp4::read_binary_tags(bytes, MAX_BINARY_TAG_BYTES),
310 structural_blocks: Vec::new(),
311 })
312 } else if has_ext(path, "ogg") || has_ext(path, "oga") || has_ext(path, "opus") {
313 let scan = ogg::locate_audio(bytes).ok()?;
314 let format = match scan.codec {
315 ogg::Codec::Opus => Format::Opus,
316 ogg::Codec::Vorbis => Format::Vorbis,
317 ogg::Codec::OggFlac => Format::OggFlac,
318 };
319 Some(Probed {
320 format,
321 audio_offset: scan.audio_offset,
322 audio_length: scan.audio_length,
323 tags: ogg::read_tags(bytes).unwrap_or_default(),
324 pictures: ogg::read_pictures(bytes).unwrap_or_default(),
325 binary_tags: Vec::new(),
326 structural_blocks: Vec::new(),
327 })
328 } else if has_ext(path, "wav") {
329 let bounds = wav::locate_audio(bytes).ok()?;
330 Some(wav_probed(bytes, &bounds))
331 } else {
332 None
333 }
334}
335
336fn read_window(file: &std::fs::File, len: usize) -> std::io::Result<Vec<u8>> {
339 use std::os::unix::fs::FileExt;
340 let mut buf = vec![0u8; len];
341 let n = file.read_at(&mut buf, 0)?;
342 buf.truncate(n);
343 crate::metrics::on_scan_read(n as u64);
344 Ok(buf)
345}
346
347fn read_tail_128(file: &std::fs::File, file_len: u64) -> std::io::Result<Option<[u8; 128]>> {
350 if file_len < 128 {
351 return Ok(None);
352 }
353 use std::os::unix::fs::FileExt;
354 let mut buf = [0u8; 128];
355 file.read_exact_at(&mut buf, file_len - 128)?;
356 crate::metrics::on_scan_read(128);
357 Ok(Some(buf))
358}
359
360fn probe_file(path: &Path, window: usize) -> std::io::Result<ProbeOutcome> {
370 let file = std::fs::File::open(path)?;
371 crate::metrics::on_scan_open();
372 let s1 = BackingStamp::from_metadata(&file.metadata()?);
373 #[cfg(test)]
374 fire_after_s1();
375
376 let probed = probe_body(path, &file, s1.size, window)?;
377
378 let s2 = BackingStamp::from_metadata(&file.metadata()?);
379 if s1 != s2 {
380 log::warn!("skipping {}: changed during probe", path.display());
381 return Ok(ProbeOutcome::Raced);
382 }
383 Ok(match probed {
384 Some(p) => ProbeOutcome::Probed(p, s1),
385 None => ProbeOutcome::Unparseable,
386 })
387}
388
389fn probe_body(
395 path: &Path,
396 file: &std::fs::File,
397 file_len: u64,
398 window: usize,
399) -> std::io::Result<Option<Probed>> {
400 if has_ext(path, "m4a") || has_ext(path, "m4b") {
402 let mut f = file;
403 let scan = match mp4::read_structure_from(&mut f, file_len) {
404 Ok(s) => s,
405 Err(e) => {
406 log::warn!("skipping {}: {e}", path.display());
407 return Ok(None);
408 }
409 };
410 return Ok(Some(Probed {
411 format: Format::M4a,
412 audio_offset: scan.mdat_payload_offset,
413 audio_length: scan.mdat_payload_len,
414 tags: mp4::read_tags(&scan.moov),
415 pictures: mp4::read_pictures(&scan.moov, MAX_ART_BYTES),
416 binary_tags: mp4::read_binary_tags(&scan.moov, MAX_BINARY_TAG_BYTES),
417 structural_blocks: Vec::new(),
418 }));
419 }
420
421 let tail = if has_ext(path, "mp3") {
425 read_tail_128(file, file_len)?
426 } else {
427 None
428 };
429 let probe_cap = file_len.min(MAX_PROBE_BYTES);
432 let mut want = usize_from((window as u64).min(probe_cap));
433 let mut prefix = read_window(file, want)?;
434 for _ in 0..MAX_WIDEN_RETRIES {
435 match probe_prefix(path, &prefix, file_len, tail.as_ref()) {
436 Probe::Done(p) => return Ok(Some(p)),
437 Probe::Skip => {
438 log::warn!("skipping {}: no parseable audio metadata", path.display());
439 return Ok(None);
440 }
441 Probe::NeedMore(up_to) => {
442 if want as u64 >= probe_cap {
444 break;
445 }
446 want = usize_from(up_to.min(probe_cap))
449 .max(want + 1)
450 .min(usize_from(probe_cap));
451 prefix = read_window(file, want)?;
452 }
453 }
454 }
455 if (prefix.len() as u64) < probe_cap {
457 prefix = read_window(file, usize_from(probe_cap))?;
458 }
459 if let Some(p) = probe_full(path, &prefix) {
460 return Ok(Some(p));
461 }
462 if has_ext(path, "wav")
467 && file_len > MAX_PROBE_BYTES
468 && let Ok(bounds) = wav::locate_audio_at_ceiling(&prefix, file_len)
469 {
470 return Ok(Some(wav_probed(&prefix, &bounds)));
471 }
472 if file_len > MAX_PROBE_BYTES {
473 log::warn!(
474 "skipping {}: no parseable metadata within first {MAX_PROBE_BYTES} bytes",
475 path.display()
476 );
477 } else {
478 log::warn!("skipping {}: no parseable audio metadata", path.display());
479 }
480 Ok(None)
481}
482
483enum Probe {
485 Done(Probed),
486 NeedMore(u64),
487 Skip,
488}
489
490fn probe_prefix(path: &Path, prefix: &[u8], file_len: u64, tail: Option<&[u8; 128]>) -> Probe {
492 if has_ext(path, "flac") {
493 match flac::read_metadata_bounded(prefix) {
494 Ok(Extent::Complete(meta)) => {
495 let (structural_blocks, binary_tags) = flac::split_preserved(&meta.preserved);
496 Probe::Done(Probed {
497 format: Format::Flac,
498 audio_offset: meta.audio_offset,
499 audio_length: file_len - meta.audio_offset,
500 tags: flac::read_vorbis_comments(prefix).unwrap_or_default(),
501 pictures: flac::read_pictures(prefix).unwrap_or_default(),
502 binary_tags,
503 structural_blocks,
504 })
505 }
506 Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
507 Err(_) => Probe::Skip,
508 }
509 } else if has_ext(path, "mp3") {
510 match mp3::locate_audio_bounded(prefix, file_len, tail) {
511 Ok(Extent::Complete(b)) => {
512 let (binary_tags, promoted) = mp3::read_binary_tags(prefix);
513 let mut tags = mp3::read_tags(prefix);
514 tags.extend(promoted);
515 Probe::Done(Probed {
516 format: Format::Mp3,
517 audio_offset: b.audio_offset,
518 audio_length: b.audio_length,
519 tags,
520 pictures: mp3::read_pictures(prefix),
521 binary_tags,
522 structural_blocks: Vec::new(),
523 })
524 }
525 Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
526 Err(_) => Probe::Skip,
527 }
528 } else if has_ext(path, "ogg") || has_ext(path, "oga") || has_ext(path, "opus") {
529 match ogg::read_metadata_bounded(prefix, file_len) {
530 Ok(Extent::Complete(header)) => {
531 let format = match header.codec {
532 ogg::Codec::Opus => Format::Opus,
533 ogg::Codec::Vorbis => Format::Vorbis,
534 ogg::Codec::OggFlac => Format::OggFlac,
535 };
536 Probe::Done(Probed {
537 format,
538 audio_offset: header.audio_offset,
539 audio_length: file_len - header.audio_offset,
540 tags: ogg::read_tags(prefix).unwrap_or_default(),
541 pictures: ogg::read_pictures(prefix).unwrap_or_default(),
542 binary_tags: Vec::new(),
543 structural_blocks: Vec::new(),
544 })
545 }
546 Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
547 Err(_) => Probe::Skip,
548 }
549 } else if has_ext(path, "wav") {
550 match wav::locate_audio_bounded(prefix, file_len) {
551 Ok(Extent::Complete(b)) => Probe::Done(wav_probed(prefix, &b)),
552 Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
553 Err(_) => Probe::Skip,
554 }
555 } else {
556 Probe::Skip
557 }
558}
559
560#[derive(Debug, Clone)]
562pub struct ScanOptions {
563 pub jobs: usize,
564 pub window: usize,
566 pub batch_bytes: u64,
568 pub follow_symlinks: bool,
571}
572
573impl Default for ScanOptions {
574 fn default() -> Self {
575 Self {
576 jobs: 0,
577 window: WINDOW,
578 batch_bytes: BATCH_BYTES,
579 follow_symlinks: false,
580 }
581 }
582}
583
584fn effective_jobs(jobs: usize) -> usize {
585 if jobs != 0 {
586 return jobs;
587 }
588 std::thread::available_parallelism().map_or(1, std::num::NonZero::get)
589}
590
591struct Unit {
593 abs_path: String,
594 stamp: BackingStamp,
595 probed: Probed,
596 weight: u64,
597}
598
599fn payload_weight(p: &Probed) -> u64 {
604 let pictures: u64 = p.pictures.iter().map(|pic| pic.data.len() as u64).sum();
605 let binary: u64 = p.binary_tags.iter().map(|t| t.payload.len() as u64).sum();
606 let structural: u64 = p
607 .structural_blocks
608 .iter()
609 .map(|(_, body)| body.len() as u64)
610 .sum();
611 pictures + binary + structural
612}
613
614fn key_passes_floor(key: &str) -> bool {
622 !key.is_empty() && key.bytes().all(|b| b >= 0x20)
623}
624
625fn accept_pictures(abs_path: &str, pictures: Vec<EmbeddedPicture>) -> Vec<EmbeddedPicture> {
631 pictures
632 .into_iter()
633 .filter(|p| {
634 if p.data.len() > MAX_ART_BYTES {
635 log::warn!(
636 "{abs_path}: dropping embedded {} art ({} bytes), over the {MAX_ART_BYTES}-byte cap",
637 p.mime,
638 p.data.len(),
639 );
640 return false;
641 }
642 true
643 })
644 .collect()
645}
646
647fn accept_binary_tags(abs_path: &str, tags: Vec<EmbeddedBinaryTag>) -> Vec<musefs_db::BinaryTag> {
651 tags.into_iter()
652 .filter(|b| {
653 if b.payload.len() > MAX_BINARY_TAG_BYTES {
654 log::warn!(
655 "{abs_path}: dropping binary tag {} ({} bytes), over the {MAX_BINARY_TAG_BYTES}-byte cap",
656 b.key,
657 b.payload.len(),
658 );
659 return false;
660 }
661 !b.payload.is_empty()
662 })
663 .enumerate()
664 .map(|(ordinal, b)| musefs_db::BinaryTag {
665 key: b.key,
666 payload: b.payload,
667 ordinal: ordinal as u64,
668 })
669 .collect()
670}
671
672fn ingest(db: &Db, abs_path: &str, meta: &std::fs::Metadata, probed: Probed) -> Result<()> {
675 let stamp = BackingStamp::from_metadata(meta);
676 let track_id = db.upsert_track(&NewTrack {
677 backing_path: abs_path.to_string(),
678 format: probed.format,
679 audio_offset: probed.audio_offset,
680 audio_length: probed.audio_length,
681 backing_size: meta.len(),
682 backing_mtime_ns: stamp.mtime_ns,
683 backing_ctime_ns: stamp.ctime_ns,
684 })?;
685
686 let mut tags = Vec::new();
687 let mut ordinals: HashMap<String, u64> = HashMap::new();
688 for (key, value) in probed.tags {
689 if !key_passes_floor(&key) {
690 continue;
691 }
692 let ord = ordinals.entry(key.clone()).or_insert(0);
693 tags.push(Tag::new(&key, &value, *ord));
694 *ord += 1;
695 }
696 db.replace_tags(track_id, &tags)?;
697
698 let binary_tags = accept_binary_tags(abs_path, probed.binary_tags);
699 db.set_binary_tags(track_id, &binary_tags)?;
700
701 let mut sb_ordinals: HashMap<String, u64> = HashMap::new();
702 let structural_blocks: Vec<musefs_db::StructuralBlock> = probed
703 .structural_blocks
704 .into_iter()
705 .map(|(kind, body)| {
706 let ord = sb_ordinals.entry(kind.clone()).or_insert(0);
707 let sb = musefs_db::StructuralBlock {
708 kind,
709 ordinal: *ord,
710 body,
711 };
712 *ord += 1;
713 sb
714 })
715 .collect();
716 db.set_structural_blocks(track_id, &structural_blocks)?;
717
718 let mut track_arts = Vec::new();
719 for (ordinal, pic) in accept_pictures(abs_path, probed.pictures)
720 .into_iter()
721 .enumerate()
722 {
723 let art_id = db.upsert_art(&NewArt {
724 mime: pic.mime,
725 width: (pic.width != 0).then_some(pic.width),
726 height: (pic.height != 0).then_some(pic.height),
727 data: pic.data,
728 })?;
729 let picture_type = pic.picture_type.get();
730 track_arts.push(TrackArt {
731 art_id,
732 picture_type,
733 description: pic.description,
734 ordinal: ordinal as u64,
735 });
736 }
737 db.set_track_art(track_id, &track_arts)?;
738 Ok(())
739}
740
741fn ingest_bulk(
744 bw: &mut musefs_db::BulkWriter<'_>,
745 abs_path: &str,
746 stamp: BackingStamp,
747 probed: Probed,
748) -> Result<()> {
749 let track_id = bw.upsert_track(&NewTrack {
750 backing_path: abs_path.to_string(),
751 format: probed.format,
752 audio_offset: probed.audio_offset,
753 audio_length: probed.audio_length,
754 backing_size: stamp.size,
755 backing_mtime_ns: stamp.mtime_ns,
756 backing_ctime_ns: stamp.ctime_ns,
757 })?;
758
759 let mut tags = Vec::new();
760 let mut ordinals: HashMap<String, u64> = HashMap::new();
761 for (key, value) in &probed.tags {
762 if !key_passes_floor(key) {
763 continue;
764 }
765 let ord = ordinals.entry(key.clone()).or_insert(0);
766 tags.push(Tag::new(key, value, *ord));
767 *ord += 1;
768 }
769 bw.replace_tags(track_id, &tags)?;
770
771 let binary_tags = accept_binary_tags(abs_path, probed.binary_tags);
772 bw.set_binary_tags(track_id, &binary_tags)?;
773
774 let mut sb_ordinals: HashMap<String, u64> = HashMap::new();
775 let structural_blocks: Vec<musefs_db::StructuralBlock> = probed
776 .structural_blocks
777 .into_iter()
778 .map(|(kind, body)| {
779 let ord = sb_ordinals.entry(kind.clone()).or_insert(0);
780 let sb = musefs_db::StructuralBlock {
781 kind,
782 ordinal: *ord,
783 body,
784 };
785 *ord += 1;
786 sb
787 })
788 .collect();
789 bw.set_structural_blocks(track_id, &structural_blocks)?;
790
791 let mut track_arts = Vec::new();
792 for (ordinal, pic) in accept_pictures(abs_path, probed.pictures)
793 .into_iter()
794 .enumerate()
795 {
796 let art_id = bw.upsert_art(&NewArt {
797 mime: pic.mime,
798 width: (pic.width != 0).then_some(pic.width),
799 height: (pic.height != 0).then_some(pic.height),
800 data: pic.data,
801 })?;
802 let picture_type = pic.picture_type.get();
803 track_arts.push(TrackArt {
804 art_id,
805 picture_type,
806 description: pic.description,
807 ordinal: ordinal as u64,
808 });
809 }
810 bw.set_track_art(track_id, &track_arts)?;
811 Ok(())
812}
813
814pub fn scan_directory_with(db: &Db, root: &Path, opts: &ScanOptions) -> Result<ScanStats> {
824 let mut files = Vec::new();
825 let mut skipped = 0u64;
826 if root.is_file() {
827 if is_supported_audio(root) {
828 files.push(root.to_path_buf());
829 } else {
830 skipped += 1;
831 }
832 } else {
833 skipped += collect_audio(root, &mut files, opts.follow_symlinks)?;
834 }
835 db.apply_bulk_pragmas_self()?; let mut stats = run_pipeline(db, files, opts)?;
837 stats.skipped = skipped;
839 Ok(stats)
840}
841
842pub fn scan_directory(db: &Db, root: &Path) -> Result<ScanStats> {
844 scan_directory_with(db, root, &ScanOptions::default())
845}
846
847fn run_pipeline(db: &Db, files: Vec<PathBuf>, opts: &ScanOptions) -> Result<ScanStats> {
851 use std::sync::Arc;
852 use std::sync::atomic::{AtomicU64, Ordering};
853
854 let jobs = effective_jobs(opts.jobs);
855 let window = opts.window;
856 let cap = opts.batch_bytes;
857 let budget = Arc::new(ByteBudget::new(cap));
858 let failed = Arc::new(AtomicU64::new(0));
859 let raced = Arc::new(AtomicU64::new(0));
860
861 let work = Arc::new(std::sync::Mutex::new(files.into_iter()));
863 let (tx, rx) = sync_channel::<Unit>(jobs * 2);
864
865 let mut workers = Vec::with_capacity(jobs);
866 for _ in 0..jobs {
867 let work = Arc::clone(&work);
868 let tx = tx.clone();
869 let budget = Arc::clone(&budget);
870 let failed = Arc::clone(&failed);
871 let raced = Arc::clone(&raced);
872 workers.push(std::thread::spawn(move || {
873 loop {
874 let next = { work.lock().unwrap().next() };
875 let Some(path) = next else { break };
876 match probe_file(&path, window) {
877 Ok(ProbeOutcome::Probed(probed, stamp)) => {
878 let abs = match std::fs::canonicalize(&path) {
879 Ok(abs) => abs,
880 Err(e) => {
881 log::warn!("skipping {}: {e}", path.display());
882 failed.fetch_add(1, Ordering::Relaxed);
883 continue;
884 }
885 };
886 let weight = payload_weight(&probed);
887 budget.acquire(weight); let unit = Unit {
889 abs_path: abs.to_string_lossy().into_owned(),
890 stamp,
891 probed,
892 weight,
893 };
894 if tx.send(unit).is_err() {
895 budget.release(weight);
896 break;
897 }
898 }
899 Ok(ProbeOutcome::Unparseable) => {
900 failed.fetch_add(1, Ordering::Relaxed);
901 }
902 Err(e) => {
903 log::warn!("skipping {}: {e}", path.display());
904 failed.fetch_add(1, Ordering::Relaxed);
905 }
906 Ok(ProbeOutcome::Raced) => {
907 raced.fetch_add(1, Ordering::Relaxed);
908 }
909 }
910 }
911 }));
912 }
913 drop(tx); let mut scanned = 0u64;
917 let mut batch: Vec<Unit> = Vec::new();
918 let mut batch_bytes = 0u64;
919 let flush = |batch: &mut Vec<Unit>, batch_bytes: &mut u64, scanned: &mut u64| -> Result<()> {
920 if batch.is_empty() {
921 return Ok(());
922 }
923 let mut bw = db.bulk_writer()?;
924 let mut weights = Vec::with_capacity(batch.len());
927 for Unit {
928 abs_path,
929 stamp,
930 probed,
931 weight,
932 } in batch.drain(..)
933 {
934 weights.push(weight);
935 ingest_bulk(&mut bw, &abs_path, stamp, probed)?;
936 *scanned += 1;
937 }
938 bw.commit()?;
939 for w in weights {
940 budget.release(w);
941 }
942 *batch_bytes = 0;
943 Ok(())
944 };
945
946 loop {
956 match rx.try_recv() {
957 Ok(unit) => {
958 batch_bytes += unit.weight;
959 batch.push(unit);
960 if batch.len() >= BATCH_FILES || batch_bytes >= cap {
961 flush(&mut batch, &mut batch_bytes, &mut scanned)?;
962 }
963 }
964 Err(std::sync::mpsc::TryRecvError::Empty) => {
965 flush(&mut batch, &mut batch_bytes, &mut scanned)?;
966 match rx.recv() {
967 Ok(unit) => {
968 batch_bytes += unit.weight;
969 batch.push(unit);
970 if batch.len() >= BATCH_FILES || batch_bytes >= cap {
971 flush(&mut batch, &mut batch_bytes, &mut scanned)?;
972 }
973 }
974 Err(_) => break, }
976 }
977 Err(std::sync::mpsc::TryRecvError::Disconnected) => break,
978 }
979 }
980 flush(&mut batch, &mut batch_bytes, &mut scanned)?;
981 for w in workers {
986 let _ = w.join();
987 }
988
989 Ok(ScanStats {
990 scanned,
991 skipped: 0, failed: failed.load(Ordering::Relaxed),
993 raced: raced.load(Ordering::Relaxed),
994 })
995}
996
997#[doc(hidden)]
1000pub fn scan_directory_full_oracle(db: &Db, root: &Path) -> Result<ScanStats> {
1001 let mut files = Vec::new();
1002 let mut skipped = 0u64;
1003 if root.is_file() {
1004 if is_supported_audio(root) {
1005 files.push(root.to_path_buf());
1006 } else {
1007 skipped += 1;
1008 }
1009 } else {
1010 skipped += collect_audio(root, &mut files, false)?;
1011 }
1012 let mut stats = ScanStats {
1013 scanned: 0,
1014 skipped,
1015 failed: 0,
1016 raced: 0,
1017 };
1018 for path in files {
1019 let bytes = std::fs::read(&path)?;
1020 let Some(probed) = probe_full(&path, &bytes) else {
1021 stats.failed += 1;
1022 continue;
1023 };
1024 let meta = std::fs::metadata(&path)?;
1025 let abs = std::fs::canonicalize(&path)?;
1026 ingest(db, &abs.to_string_lossy(), &meta, probed)?;
1027 stats.scanned += 1;
1028 }
1029 Ok(stats)
1030}
1031
1032pub fn revalidate_with(db: &Db, root: &Path, opts: &ScanOptions) -> Result<RevalidateStats> {
1047 let mut files = Vec::new();
1048 if root.is_file() {
1049 if is_supported_audio(root) {
1050 files.push(root.to_path_buf());
1051 }
1052 } else {
1053 collect_audio(root, &mut files, opts.follow_symlinks)?;
1054 }
1055 db.apply_bulk_pragmas_self()?;
1056
1057 let existing: HashMap<String, (crate::freshness::BackingStamp, i64, Format)> = db
1060 .list_tracks()?
1061 .into_iter()
1062 .map(|t| {
1063 (
1064 t.backing_path.clone(),
1065 (
1066 crate::freshness::BackingStamp::from_track(&t),
1067 t.id,
1068 t.format,
1069 ),
1070 )
1071 })
1072 .collect();
1073 let have_structural = db.track_ids_with_structural_blocks()?;
1077
1078 let mut unchanged = 0u64;
1079 let mut skip_failed = 0u64;
1080 let mut changed: Vec<PathBuf> = Vec::new();
1081 for path in files {
1082 let meta = match std::fs::metadata(&path) {
1083 Ok(meta) => meta,
1084 Err(e) => {
1085 log::warn!("skipping {}: {e}", path.display());
1086 skip_failed += 1;
1087 continue;
1088 }
1089 };
1090 let abs = match std::fs::canonicalize(&path) {
1091 Ok(abs) => abs,
1092 Err(e) => {
1093 log::warn!("skipping {}: {e}", path.display());
1094 skip_failed += 1;
1095 continue;
1096 }
1097 };
1098 let key = abs.to_string_lossy().into_owned();
1099 if let Some((stamp, id, format)) = existing.get(&key).copied() {
1100 let needs_backfill = format == Format::Flac && !have_structural.contains(&id);
1101 if crate::freshness::BackingStamp::from_metadata(&meta) == stamp && !needs_backfill {
1102 unchanged += 1;
1103 continue;
1104 }
1105 }
1106 changed.push(path);
1107 }
1108
1109 let scan = run_pipeline(db, changed, opts)?;
1110
1111 let canon_root = std::fs::canonicalize(root)?;
1113 let mut pruned = 0u64;
1114 for track in db.list_tracks()? {
1115 if !Path::new(&track.backing_path).starts_with(&canon_root) {
1116 continue;
1117 }
1118 if let Err(e) = std::fs::metadata(&track.backing_path)
1119 && e.kind() == std::io::ErrorKind::NotFound
1120 {
1121 db.delete_track(track.id)?;
1122 pruned += 1;
1123 }
1124 }
1125 db.gc_orphan_art()?;
1126
1127 Ok(RevalidateStats {
1128 updated: scan.scanned,
1129 unchanged,
1130 pruned,
1131 failed: scan.failed + skip_failed,
1132 raced: scan.raced,
1133 })
1134}
1135
1136pub fn revalidate(db: &Db, root: &Path) -> Result<RevalidateStats> {
1138 revalidate_with(db, root, &ScanOptions::default())
1139}
1140
1141#[cfg(test)]
1142mod scan_unit_tests {
1143 use super::*;
1144 use musefs_format::PictureType;
1145 use std::io::Write;
1146
1147 #[test]
1153 fn scan_options_defaults() {
1154 let d = ScanOptions::default();
1155 assert_eq!(d.jobs, 0, "jobs default = use available parallelism");
1156 assert_eq!(d.window, 1_048_576, "window default = 1 MiB");
1157 assert_eq!(d.batch_bytes, 67_108_864, "batch_bytes default = 64 MiB");
1158 }
1159
1160 fn write_temp(name: &str, bytes: &[u8]) -> (tempfile::TempDir, std::fs::File) {
1163 let dir = tempfile::tempdir().unwrap();
1164 let path = dir.path().join(name);
1165 std::fs::File::create(&path)
1166 .unwrap()
1167 .write_all(bytes)
1168 .unwrap();
1169 let file = std::fs::File::open(&path).unwrap();
1170 (dir, file)
1171 }
1172
1173 #[test]
1178 fn read_tail_128_exact_128_bytes() {
1179 let pattern: Vec<u8> = (0u8..128).collect();
1181 let (_dir, file) = write_temp("tail128.bin", &pattern);
1182
1183 let tail = read_tail_128(&file, 128).unwrap();
1184 let expected: [u8; 128] = pattern.clone().try_into().unwrap();
1185 assert_eq!(tail, Some(expected));
1191 }
1192
1193 #[test]
1195 fn read_tail_128_short_file_is_none() {
1196 let (_dir, file) = write_temp("tail127.bin", &[0xABu8; 127]);
1197 assert_eq!(read_tail_128(&file, 127).unwrap(), None);
1198 }
1199
1200 #[test]
1204 fn effective_jobs_zero_uses_parallelism_and_nonzero_passes_through() {
1205 let par = std::thread::available_parallelism().map_or(1, std::num::NonZero::get);
1206 assert_eq!(effective_jobs(0), par);
1207 assert_eq!(effective_jobs(4), 4);
1208 assert_eq!(effective_jobs(1), 1);
1209 }
1210
1211 #[test]
1215 fn payload_weight_sums_all_buffered_payloads() {
1216 let pic = |n: usize| EmbeddedPicture {
1217 mime: "image/png".to_string(),
1218 picture_type: PictureType::new(3).unwrap(),
1219 description: String::new(),
1220 width: 0,
1221 height: 0,
1222 data: vec![0u8; n],
1223 };
1224 let probed = Probed {
1225 format: Format::Flac,
1226 audio_offset: 0,
1227 audio_length: 0,
1228 tags: Vec::new(),
1229 pictures: vec![pic(3), pic(5)],
1230 binary_tags: vec![EmbeddedBinaryTag {
1231 key: "APPLICATION".into(),
1232 payload: vec![0u8; 4],
1233 }],
1234 structural_blocks: vec![("SEEKTABLE".into(), vec![0u8; 2])],
1235 };
1236 assert_eq!(payload_weight(&probed), 14);
1238
1239 let empty = Probed {
1241 format: Format::Flac,
1242 audio_offset: 0,
1243 audio_length: 0,
1244 tags: Vec::new(),
1245 pictures: Vec::new(),
1246 binary_tags: Vec::new(),
1247 structural_blocks: Vec::new(),
1248 };
1249 assert_eq!(payload_weight(&empty), 0);
1250 }
1251
1252 fn mp4_with_binary_freeform(mean: &str, name: &str, value: &[u8]) -> Vec<u8> {
1257 fn bx(kind: &[u8; 4], body: &[u8]) -> Vec<u8> {
1258 let mut v = u32::try_from(8 + body.len())
1259 .unwrap()
1260 .to_be_bytes()
1261 .to_vec();
1262 v.extend_from_slice(kind);
1263 v.extend_from_slice(body);
1264 v
1265 }
1266 let mut hdlr_body = vec![0u8; 8];
1269 hdlr_body.extend_from_slice(b"soun");
1270 hdlr_body.extend_from_slice(&[0u8; 12]); let trak = bx(b"trak", &bx(b"mdia", &bx(b"hdlr", &hdlr_body)));
1272
1273 let mut mean_body = 0u32.to_be_bytes().to_vec();
1275 mean_body.extend_from_slice(mean.as_bytes());
1276 let mut name_body = 0u32.to_be_bytes().to_vec();
1277 name_body.extend_from_slice(name.as_bytes());
1278 let mut data_body = 0u32.to_be_bytes().to_vec(); data_body.extend_from_slice(&0u32.to_be_bytes()); data_body.extend_from_slice(value);
1281 let mut free = bx(b"mean", &mean_body);
1282 free.extend(bx(b"name", &name_body));
1283 free.extend(bx(b"data", &data_body));
1284 let ilst = bx(b"ilst", &bx(b"----", &free));
1285 let mut meta = 0u32.to_be_bytes().to_vec();
1286 meta.extend(bx(b"hdlr", &[0u8; 25]));
1287 meta.extend(ilst);
1288 let udta = bx(b"udta", &bx(b"meta", &meta));
1289
1290 let moov = bx(b"moov", &[trak, udta].concat());
1291 [bx(b"ftyp", b"M4A "), moov, bx(b"mdat", b"AUDIODATA")].concat()
1292 }
1293
1294 #[test]
1295 fn probe_full_surfaces_mp4_binary_freeform() {
1296 use musefs_format::mp4;
1297 let bytes = mp4_with_binary_freeform("com.serato.dj", "analysis", &[0x00, 0xAB, 0xCD]);
1298 let probed = probe_full(std::path::Path::new("/x.m4a"), &bytes).expect("probed");
1299 assert_eq!(probed.format, Format::M4a);
1300 let keys: Vec<&str> = probed.binary_tags.iter().map(|b| b.key.as_str()).collect();
1301 assert!(
1302 keys.contains(&"----:com.serato.dj:analysis"),
1303 "binary freeform not surfaced: {keys:?}"
1304 );
1305 let bt = probed
1306 .binary_tags
1307 .iter()
1308 .find(|b| b.key == "----:com.serato.dj:analysis")
1309 .unwrap();
1310 assert_eq!(bt.payload, vec![0x00, 0xAB, 0xCD]);
1311 let scan = mp4::read_structure(&bytes).unwrap();
1312 assert_eq!(probed.audio_offset, scan.mdat_payload_offset);
1313 }
1314
1315 fn mp4_with_covr(type_code: u32, value: &[u8]) -> Vec<u8> {
1316 fn bx(kind: &[u8; 4], body: &[u8]) -> Vec<u8> {
1317 let mut v = u32::try_from(8 + body.len())
1318 .unwrap()
1319 .to_be_bytes()
1320 .to_vec();
1321 v.extend_from_slice(kind);
1322 v.extend_from_slice(body);
1323 v
1324 }
1325 let mut hdlr_body = vec![0u8; 8];
1326 hdlr_body.extend_from_slice(b"soun");
1327 hdlr_body.extend_from_slice(&[0u8; 12]);
1328 let trak = bx(b"trak", &bx(b"mdia", &bx(b"hdlr", &hdlr_body)));
1329
1330 let mut data_body = type_code.to_be_bytes().to_vec();
1331 data_body.extend_from_slice(&0u32.to_be_bytes());
1332 data_body.extend_from_slice(value);
1333 let ilst = bx(b"ilst", &bx(b"covr", &bx(b"data", &data_body)));
1334 let mut meta = 0u32.to_be_bytes().to_vec();
1335 meta.extend(bx(b"hdlr", &[0u8; 25]));
1336 meta.extend(ilst);
1337 let udta = bx(b"udta", &bx(b"meta", &meta));
1338
1339 let moov = bx(b"moov", &[trak, udta].concat());
1340 [bx(b"ftyp", b"M4A "), moov, bx(b"mdat", b"AUDIODATA")].concat()
1341 }
1342
1343 #[test]
1344 fn probe_file_skips_oversized_mp4_covr() {
1345 let oversized = vec![0xFFu8; MAX_ART_BYTES + 1];
1346 let bytes = mp4_with_covr(13, &oversized);
1347 let dir = tempfile::tempdir().unwrap();
1348 let path = dir.path().join("oversized_art.m4a");
1349 std::fs::write(&path, &bytes).unwrap();
1350 let probed = match probe_file(&path, 0).unwrap() {
1351 ProbeOutcome::Probed(p, _) => p,
1352 other => panic!("expected Probed, got {other:?}"),
1353 };
1354 assert_eq!(probed.format, Format::M4a);
1355 assert!(
1356 probed.pictures.is_empty(),
1357 "oversized covr must be skipped at extraction, not materialized"
1358 );
1359 }
1360
1361 #[test]
1362 fn probe_file_skips_oversized_mp4_binary_freeform() {
1363 let oversized = vec![0xABu8; MAX_BINARY_TAG_BYTES + 1];
1366 let bytes = mp4_with_binary_freeform("com.serato.dj", "analysis", &oversized);
1367 let dir = tempfile::tempdir().unwrap();
1368 let path = dir.path().join("oversized_bin.m4a");
1369 std::fs::write(&path, &bytes).unwrap();
1370 let probed = match probe_file(&path, 0).unwrap() {
1371 ProbeOutcome::Probed(p, _) => p,
1372 other => panic!("expected Probed, got {other:?}"),
1373 };
1374 assert_eq!(probed.format, Format::M4a);
1375 assert!(
1376 probed.binary_tags.is_empty(),
1377 "oversized binary freeform must be skipped at extraction, not materialized"
1378 );
1379 }
1380}
1381
1382#[cfg(test)]
1383mod ogg_probe_tests {
1384 use super::*;
1385 use musefs_format::ogg::page_test_support::{
1386 build_header_pub, lace_packet_pub, vorbis_body_empty,
1387 };
1388 use std::io::Write;
1389
1390 #[test]
1391 fn probe_detects_opus_and_seeds_tags() {
1392 let head = b"OpusHead\x01\x02\x38\x01\x80\xbb\x00\x00\x00\x00\x00".to_vec();
1393 let mut tags = b"OpusTags".to_vec();
1394 tags.extend_from_slice(&vorbis_body_empty());
1395 let (mut bytes, _) = build_header_pub(0x1234, &[&head, &tags]);
1396 let (audio, _) = lace_packet_pub(0x1234, 2, false, 960, &[0u8; 100]);
1397 bytes.extend_from_slice(&audio);
1398
1399 let dir = tempfile::tempdir().unwrap();
1400 let path = dir.path().join("song.opus");
1401 std::fs::File::create(&path)
1402 .unwrap()
1403 .write_all(&bytes)
1404 .unwrap();
1405
1406 let probed = probe_full(&path, &bytes).expect("opus should probe");
1407 assert_eq!(probed.format, Format::Opus);
1408 assert_eq!(probed.audio_offset, (bytes.len() - audio.len()) as u64);
1409 }
1410
1411 #[test]
1412 fn scan_single_opus_file_ingests_it() {
1413 let head = b"OpusHead\x01\x02\x38\x01\x80\xbb\x00\x00\x00\x00\x00".to_vec();
1414 let mut tags = b"OpusTags".to_vec();
1415 tags.extend_from_slice(&vorbis_body_empty());
1416 let (mut bytes, _) = build_header_pub(0x1234, &[&head, &tags]);
1417 let (audio, _) = lace_packet_pub(0x1234, 2, false, 960, &[0u8; 100]);
1418 bytes.extend_from_slice(&audio);
1419
1420 let dir = tempfile::tempdir().unwrap();
1421 let path = dir.path().join("single.opus");
1422 std::io::Write::write_all(&mut std::fs::File::create(&path).unwrap(), &bytes).unwrap();
1423
1424 let db = musefs_db::Db::open_in_memory().unwrap();
1425 let stats = crate::scan_directory(&db, &path).unwrap();
1427 assert_eq!(stats.scanned, 1);
1428 assert_eq!(stats.skipped, 0);
1429 }
1430
1431 #[test]
1432 fn probe_recognizes_oga_alias() {
1433 let head = b"OpusHead\x01\x02\x38\x01\x80\xbb\x00\x00\x00\x00\x00".to_vec();
1434 let mut tags = b"OpusTags".to_vec();
1435 tags.extend_from_slice(&vorbis_body_empty());
1436 let (mut bytes, _) = build_header_pub(0x1234, &[&head, &tags]);
1437 let (audio, _) = lace_packet_pub(0x1234, 2, false, 960, &[0u8; 100]);
1438 bytes.extend_from_slice(&audio);
1439
1440 let dir = tempfile::tempdir().unwrap();
1441 let path = dir.path().join("song.oga");
1442 std::fs::File::create(&path)
1443 .unwrap()
1444 .write_all(&bytes)
1445 .unwrap();
1446
1447 let probed = probe_full(&path, &bytes).expect("oga should probe");
1448 assert_eq!(probed.format, Format::Opus);
1449 }
1450}
1451
1452#[cfg(test)]
1453mod wav_probe_tests {
1454 use super::*;
1455 use std::io::Write;
1456
1457 fn build_wav() -> Vec<u8> {
1458 let mut fmt = Vec::new();
1459 fmt.extend_from_slice(&1u16.to_le_bytes());
1460 fmt.extend_from_slice(&1u16.to_le_bytes());
1461 fmt.extend_from_slice(&44_100u32.to_le_bytes());
1462 fmt.extend_from_slice(&88_200u32.to_le_bytes());
1463 fmt.extend_from_slice(&2u16.to_le_bytes());
1464 fmt.extend_from_slice(&16u16.to_le_bytes());
1465
1466 let data = vec![0u8; 16];
1467 let mut body = Vec::new();
1468 for (id, payload) in [(b"fmt ", &fmt), (b"data", &data)] {
1469 body.extend_from_slice(id);
1470 body.extend_from_slice(&u32::try_from(payload.len()).unwrap().to_le_bytes());
1471 body.extend_from_slice(payload);
1472 }
1473 let mut out = b"RIFF".to_vec();
1474 out.extend_from_slice(&u32::try_from(body.len() + 4).unwrap().to_le_bytes());
1475 out.extend_from_slice(b"WAVE");
1476 out.extend_from_slice(&body);
1477 out
1478 }
1479
1480 #[test]
1481 fn probe_detects_wav() {
1482 let bytes = build_wav();
1483 let dir = tempfile::tempdir().unwrap();
1484 let path = dir.path().join("song.wav");
1485 std::fs::File::create(&path)
1486 .unwrap()
1487 .write_all(&bytes)
1488 .unwrap();
1489
1490 let probed = probe_full(&path, &bytes).expect("wav should probe");
1491 assert_eq!(probed.format, Format::Wav);
1492 assert_eq!(probed.audio_length, 16);
1493 }
1494
1495 #[test]
1496 fn scan_single_wav_file_ingests_it() {
1497 let bytes = build_wav();
1498 let dir = tempfile::tempdir().unwrap();
1499 let path = dir.path().join("single.wav");
1500 std::fs::File::create(&path)
1501 .unwrap()
1502 .write_all(&bytes)
1503 .unwrap();
1504
1505 let db = musefs_db::Db::open_in_memory().unwrap();
1506 let stats = crate::scan_directory(&db, &path).unwrap();
1507 assert_eq!(stats.scanned, 1);
1508 assert_eq!(stats.skipped, 0);
1509 }
1510}
1511
1512#[cfg(test)]
1513mod hardening_tests {
1514 use super::*;
1515
1516 #[test]
1517 fn max_art_bytes_is_16_mib_minus_64_kib() {
1518 assert_eq!(MAX_ART_BYTES, 16_711_680);
1519 }
1520
1521 #[test]
1522 fn scan_caps_match_db_limits() {
1523 assert_eq!(
1524 i64::try_from(MAX_ART_BYTES).unwrap(),
1525 musefs_db::limits::MAX_ART_BYTES
1526 );
1527 assert_eq!(
1528 i64::try_from(MAX_BINARY_TAG_BYTES).unwrap(),
1529 musefs_db::limits::MAX_BINARY_TAG_BYTES
1530 );
1531 }
1532
1533 #[test]
1534 fn is_supported_audio_accepts_known_and_rejects_unknown() {
1535 for ok in [
1536 "a.flac", "a.mp3", "a.m4a", "a.m4b", "a.ogg", "a.oga", "a.opus", "a.wav",
1537 ] {
1538 assert!(
1539 is_supported_audio(std::path::Path::new(ok)),
1540 "{ok} should be supported"
1541 );
1542 }
1543 for bad in ["a.txt", "a.png", "a", "a.flacx"] {
1544 assert!(
1545 !is_supported_audio(std::path::Path::new(bad)),
1546 "{bad} must be rejected"
1547 );
1548 }
1549 }
1550
1551 #[test]
1552 fn collect_audio_skips_unsupported_files() {
1553 let dir = tempfile::tempdir().unwrap();
1554 std::fs::write(dir.path().join("keep.flac"), b"x").unwrap();
1555 std::fs::write(dir.path().join("skip.txt"), b"x").unwrap();
1556 let mut out = Vec::new();
1557 collect_audio(dir.path(), &mut out, false).unwrap();
1558 assert_eq!(out.len(), 1);
1559 assert!(out[0].ends_with("keep.flac"));
1560 }
1561
1562 #[test]
1563 fn scan_options_default_does_not_follow_symlinks() {
1564 assert!(!ScanOptions::default().follow_symlinks);
1565 }
1566
1567 #[test]
1568 fn collect_audio_follows_symlinked_file_when_enabled() {
1569 let dir = tempfile::tempdir().unwrap();
1570 let real = dir.path().join("real.flac");
1571 std::fs::write(&real, b"x").unwrap();
1572 let lib = dir.path().join("lib");
1573 std::fs::create_dir(&lib).unwrap();
1574 std::os::unix::fs::symlink(&real, lib.join("link.flac")).unwrap();
1575
1576 let mut on = Vec::new();
1577 collect_audio(&lib, &mut on, true).unwrap();
1578 assert_eq!(
1579 on.len(),
1580 1,
1581 "symlinked file should be collected when following"
1582 );
1583
1584 let mut off = Vec::new();
1585 collect_audio(&lib, &mut off, false).unwrap();
1586 assert!(
1587 off.is_empty(),
1588 "symlinked file should be skipped by default"
1589 );
1590 }
1591
1592 #[test]
1593 fn collect_audio_follows_symlinked_dir_when_enabled() {
1594 let dir = tempfile::tempdir().unwrap();
1595 let real_dir = dir.path().join("music");
1596 std::fs::create_dir(&real_dir).unwrap();
1597 std::fs::write(real_dir.join("song.flac"), b"x").unwrap();
1598 let root = dir.path().join("root");
1599 std::fs::create_dir(&root).unwrap();
1600 std::os::unix::fs::symlink(&real_dir, root.join("linkdir")).unwrap();
1601
1602 let mut on = Vec::new();
1603 collect_audio(&root, &mut on, true).unwrap();
1604 assert_eq!(
1605 on.len(),
1606 1,
1607 "files under a symlinked dir should be collected"
1608 );
1609
1610 let mut off = Vec::new();
1611 collect_audio(&root, &mut off, false).unwrap();
1612 assert!(off.is_empty(), "symlinked dir should be skipped by default");
1613 }
1614
1615 #[test]
1616 fn collect_audio_terminates_on_symlink_cycle() {
1617 let dir = tempfile::tempdir().unwrap();
1618 let a = dir.path().join("a");
1619 std::fs::create_dir(&a).unwrap();
1620 std::fs::write(a.join("song.flac"), b"x").unwrap();
1621 std::os::unix::fs::symlink(dir.path(), a.join("loop")).unwrap();
1622
1623 let mut out = Vec::new();
1624 collect_audio(dir.path(), &mut out, true).unwrap();
1625 assert_eq!(
1626 out.iter().filter(|p| p.ends_with("song.flac")).count(),
1627 1,
1628 "each real file collected at most once despite the cycle"
1629 );
1630 }
1631
1632 #[test]
1633 fn collect_audio_skips_broken_symlink_when_following() {
1634 let dir = tempfile::tempdir().unwrap();
1635 std::fs::write(dir.path().join("real.flac"), b"x").unwrap();
1636 std::os::unix::fs::symlink(dir.path().join("nonexistent"), dir.path().join("dangling"))
1637 .unwrap();
1638
1639 let mut out = Vec::new();
1640 let result = collect_audio(dir.path(), &mut out, true);
1641 assert!(
1642 result.is_ok(),
1643 "a dangling symlink must not abort collection"
1644 );
1645 assert_eq!(out.len(), 1);
1646 assert!(out[0].ends_with("real.flac"));
1647 }
1648
1649 #[test]
1650 fn collect_audio_does_not_follow_symlinks_by_default() {
1651 let dir = tempfile::tempdir().unwrap();
1652 std::fs::write(dir.path().join("real.flac"), b"x").unwrap();
1653 let other = dir.path().join("other.flac");
1654 std::fs::write(&other, b"x").unwrap();
1655 std::os::unix::fs::symlink(&other, dir.path().join("link.flac")).unwrap();
1656
1657 let mut out = Vec::new();
1658 collect_audio(dir.path(), &mut out, false).unwrap();
1659 assert_eq!(out.len(), 2);
1660 }
1661
1662 #[test]
1663 fn collect_audio_ignores_symlink_to_non_file_target_when_following() {
1664 use std::os::unix::ffi::OsStrExt;
1665
1666 let dir = tempfile::tempdir().unwrap();
1667 let fifo = dir.path().join("fifo");
1670 let c_path = std::ffi::CString::new(fifo.as_os_str().as_bytes()).unwrap();
1671 #[expect(unsafe_code, reason = "libc::mkfifo FFI; no std equivalent")]
1672 let rc = unsafe { libc::mkfifo(c_path.as_ptr(), 0o644) };
1673 assert_eq!(rc, 0, "mkfifo failed: {}", std::io::Error::last_os_error());
1674
1675 std::os::unix::fs::symlink(&fifo, dir.path().join("link.flac")).unwrap();
1678
1679 let mut out = Vec::new();
1680 collect_audio(dir.path(), &mut out, true).unwrap();
1681 assert!(
1682 out.is_empty(),
1683 "a symlink to a non-file, non-dir target must not be collected"
1684 );
1685 }
1686
1687 #[test]
1688 fn probe_returns_none_for_supported_ext_with_garbage_contents() {
1689 let dir = tempfile::tempdir().unwrap();
1690 for name in ["bad.flac", "bad.mp3", "bad.m4a", "bad.wav", "bad.opus"] {
1691 let path = dir.path().join(name);
1692 std::fs::write(&path, b"not a real audio file").unwrap();
1693 assert!(
1694 probe_full(&path, b"not a real audio file").is_none(),
1695 "{name} must skip"
1696 );
1697 }
1698 }
1699
1700 fn flac_block(bt: u8, body: &[u8], last: bool) -> Vec<u8> {
1701 let mut v = vec![(if last { 0x80 } else { 0 }) | (bt & 0x7F)];
1702 let n: u32 = u32::try_from(body.len()).unwrap();
1703 v.extend_from_slice(&[
1704 u8::try_from(n >> 16).unwrap(),
1705 u8::try_from(n >> 8).unwrap(),
1706 u8::try_from(n).unwrap(),
1707 ]);
1708 v.extend_from_slice(body);
1709 v
1710 }
1711 fn streaminfo() -> Vec<u8> {
1712 let mut si = vec![
1713 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0xC4, 0x42, 0xF0,
1714 0x00, 0x00, 0x00, 0x00,
1715 ];
1716 si.extend_from_slice(&[0u8; 16]);
1717 si
1718 }
1719 fn vorbis_comment(entries: &[&str]) -> Vec<u8> {
1720 let mut vc = Vec::new();
1721 let vendor = b"x";
1722 vc.extend_from_slice(&u32::try_from(vendor.len()).unwrap().to_le_bytes());
1723 vc.extend_from_slice(vendor);
1724 vc.extend_from_slice(&u32::try_from(entries.len()).unwrap().to_le_bytes());
1725 for e in entries {
1726 vc.extend_from_slice(&u32::try_from(e.len()).unwrap().to_le_bytes());
1727 vc.extend_from_slice(e.as_bytes());
1728 }
1729 vc
1730 }
1731 fn picture(width: u32, height: u32, data: &[u8]) -> Vec<u8> {
1732 let mut b = Vec::new();
1733 b.extend_from_slice(&3u32.to_be_bytes());
1734 let mime = "image/png";
1735 b.extend_from_slice(&u32::try_from(mime.len()).unwrap().to_be_bytes());
1736 b.extend_from_slice(mime.as_bytes());
1737 b.extend_from_slice(&0u32.to_be_bytes());
1738 b.extend_from_slice(&width.to_be_bytes());
1739 b.extend_from_slice(&height.to_be_bytes());
1740 b.extend_from_slice(&0u32.to_be_bytes());
1741 b.extend_from_slice(&0u32.to_be_bytes());
1742 b.extend_from_slice(&u32::try_from(data.len()).unwrap().to_be_bytes());
1743 b.extend_from_slice(data);
1744 b
1745 }
1746 fn write_flac(path: &std::path::Path, entries: &[&str], pic: Option<(u32, u32)>) {
1747 let mut out = b"fLaC".to_vec();
1748 out.extend(flac_block(0, &streaminfo(), false));
1749 let last_is_vc = pic.is_none();
1750 out.extend(flac_block(4, &vorbis_comment(entries), last_is_vc));
1751 if let Some((w, h)) = pic {
1752 out.extend(flac_block(6, &picture(w, h, &[0xAB; 64]), true));
1753 }
1754 out.extend_from_slice(&[0xCD; 128]);
1755 std::fs::write(path, &out).unwrap();
1756 }
1757
1758 #[test]
1759 fn ingest_assigns_sequential_ordinals_per_key() {
1760 let dir = tempfile::tempdir().unwrap();
1761 let path = dir.path().join("multi.flac");
1762 write_flac(&path, &["ARTIST=A1", "ARTIST=A2"], None);
1763 let db = musefs_db::Db::open_in_memory().unwrap();
1764 crate::scan_directory(&db, &path).unwrap();
1765 let track = db.list_tracks().unwrap().into_iter().next().unwrap();
1766 let mut artists: Vec<(u64, String)> = db
1767 .get_tags(track.id)
1768 .unwrap()
1769 .into_iter()
1770 .filter(|t| t.key.eq_ignore_ascii_case("artist"))
1771 .map(|t| (t.ordinal, t.value))
1772 .collect();
1773 artists.sort();
1774 assert_eq!(artists, vec![(0, "A1".to_string()), (1, "A2".to_string())]);
1775 }
1776
1777 #[test]
1778 fn ingest_stores_nonzero_art_dimensions() {
1779 let dir = tempfile::tempdir().unwrap();
1780 let path = dir.path().join("art.flac");
1781 write_flac(&path, &["ARTIST=A", "TITLE=T"], Some((10, 20)));
1782 let db = musefs_db::Db::open_in_memory().unwrap();
1783 crate::scan_directory(&db, &path).unwrap();
1784 let track = db.list_tracks().unwrap().into_iter().next().unwrap();
1785 let ta = db.get_track_art(track.id).unwrap();
1786 assert_eq!(ta.len(), 1);
1787 let meta = db.get_art_meta(ta[0].art_id).unwrap().unwrap();
1788 assert_eq!(meta.width, Some(10));
1789 assert_eq!(meta.height, Some(20));
1790 }
1791
1792 #[test]
1793 fn ingest_oracle_path_stores_nonzero_art_dimensions() {
1794 let dir = tempfile::tempdir().unwrap();
1797 let path = dir.path().join("art.flac");
1798 write_flac(&path, &["ARTIST=A", "TITLE=T"], Some((10, 20)));
1799 let db = musefs_db::Db::open_in_memory().unwrap();
1800 crate::scan_directory_full_oracle(&db, &path).unwrap();
1801 let track = db.list_tracks().unwrap().into_iter().next().unwrap();
1802 let ta = db.get_track_art(track.id).unwrap();
1803 assert_eq!(ta.len(), 1);
1804 let meta = db.get_art_meta(ta[0].art_id).unwrap().unwrap();
1805 assert_eq!(meta.width, Some(10));
1806 assert_eq!(meta.height, Some(20));
1807 }
1808
1809 #[test]
1810 fn scan_directory_counts_scanned_failed_and_skipped() {
1811 let dir = tempfile::tempdir().unwrap();
1812 write_flac(
1813 &dir.path().join("ok1.flac"),
1814 &["ARTIST=A", "TITLE=T1"],
1815 None,
1816 );
1817 write_flac(
1818 &dir.path().join("ok2.flac"),
1819 &["ARTIST=A", "TITLE=T2"],
1820 None,
1821 );
1822 std::fs::write(dir.path().join("bad.flac"), b"garbage").unwrap();
1824 std::fs::write(dir.path().join("notes.txt"), b"hello").unwrap();
1826 let db = musefs_db::Db::open_in_memory().unwrap();
1827 let stats = crate::scan_directory(&db, dir.path()).unwrap();
1828 assert_eq!(stats.scanned, 2);
1829 assert_eq!(stats.failed, 1);
1830 assert_eq!(stats.skipped, 1);
1831 }
1832
1833 #[test]
1834 fn revalidate_buckets_unchanged_and_prunes_missing() {
1835 let dir = tempfile::tempdir().unwrap();
1836 let keep = dir.path().join("keep.flac");
1837 write_flac(&keep, &["ARTIST=A", "TITLE=T"], None);
1838 let db = musefs_db::Db::open_in_memory().unwrap();
1839 crate::scan_directory(&db, dir.path()).unwrap();
1840
1841 let s1 = crate::revalidate(&db, dir.path()).unwrap();
1842 assert_eq!(s1.unchanged, 1);
1843 assert_eq!(s1.updated, 0);
1844 assert_eq!(s1.pruned, 0);
1845
1846 std::fs::remove_file(&keep).unwrap();
1847 let s2 = crate::revalidate(&db, dir.path()).unwrap();
1848 assert_eq!(s2.pruned, 1);
1849 assert!(db.list_tracks().unwrap().is_empty());
1850 }
1851
1852 #[test]
1853 fn revalidate_does_not_prune_on_non_notfound_error() {
1854 let dir = tempfile::tempdir().unwrap();
1855 let file = dir.path().join("real.flac");
1856 write_flac(&file, &["ARTIST=A", "TITLE=T"], None);
1857 let db = musefs_db::Db::open_in_memory().unwrap();
1858 crate::scan_directory(&db, dir.path()).unwrap();
1859
1860 use musefs_db::{Format, NewTrack};
1861 let track = db.list_tracks().unwrap().into_iter().next().unwrap();
1862 db.delete_track(track.id).unwrap();
1863 let canon = std::fs::canonicalize(dir.path()).unwrap();
1864 let ghost = canon.join("real.flac").join("ghost.flac");
1865 db.upsert_track(&NewTrack {
1866 backing_path: ghost.to_string_lossy().into_owned(),
1867 format: Format::Flac,
1868 audio_offset: 0,
1869 audio_length: 0,
1870 backing_size: 0,
1871 backing_mtime_ns: 0,
1872 backing_ctime_ns: 0,
1873 })
1874 .unwrap();
1875
1876 let stats = crate::revalidate(&db, dir.path()).unwrap();
1877 assert_eq!(stats.pruned, 0, "ENOTDIR is not NotFound → must not prune");
1878 assert!(
1879 db.list_tracks()
1880 .unwrap()
1881 .iter()
1882 .any(|t| t.backing_path == ghost.to_string_lossy()),
1883 "ghost track must still exist"
1884 );
1885 }
1886
1887 #[test]
1888 fn scan_ingests_binary_tags_and_promotes() {
1889 use id3::frame::{Content, Popularimeter, Unknown};
1890 use id3::{Encoder, Frame, Tag, TagLike, Version};
1891
1892 let dir = tempfile::tempdir().unwrap();
1893
1894 let mut tag = Tag::new();
1896 tag.add_frame(Popularimeter {
1897 user: "u".into(),
1898 rating: 128,
1899 counter: 3,
1900 });
1901 tag.add_frame(Frame::with_content(
1902 "PRIV",
1903 Content::Unknown(Unknown {
1904 data: vec![1, 1, 2, 3, 5],
1905 version: Version::Id3v24,
1906 }),
1907 ));
1908 let mut bytes = Vec::new();
1909 Encoder::new()
1910 .version(Version::Id3v24)
1911 .encode(&tag, &mut bytes)
1912 .unwrap();
1913 bytes.extend_from_slice(&[0xFF, 0xFB, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00]);
1915 std::fs::write(dir.path().join("a.mp3"), &bytes).unwrap();
1916
1917 let db = musefs_db::Db::open_in_memory().unwrap();
1918 crate::scan::scan_directory(&db, dir.path()).unwrap();
1919 let track = db.list_tracks().unwrap().into_iter().next().unwrap();
1920 let tid = track.id;
1921
1922 let bin = db.get_binary_tags(tid).unwrap();
1924 assert!(
1925 bin.iter().any(|r| r.key == "PRIV" && r.byte_len == 5),
1926 "PRIV not ingested as binary row; got: {bin:?}"
1927 );
1928
1929 let texts = db.get_tags(tid).unwrap();
1931 assert!(
1932 texts.iter().any(|t| t.key == "rating" && t.value == "128"),
1933 "rating not promoted; got: {texts:?}"
1934 );
1935 assert!(
1936 texts.iter().any(|t| t.key == "playcount" && t.value == "3"),
1937 "playcount not promoted; got: {texts:?}"
1938 );
1939 }
1940
1941 fn probed_with_mixed_binary_tags() -> Probed {
1946 Probed {
1947 format: musefs_db::Format::Mp3,
1948 audio_offset: 0,
1949 audio_length: 0,
1950 tags: Vec::new(),
1951 pictures: Vec::new(),
1952 binary_tags: vec![
1953 EmbeddedBinaryTag {
1954 key: "PRIV".into(),
1955 payload: vec![1, 2, 3],
1956 },
1957 EmbeddedBinaryTag {
1958 key: "GEOB".into(),
1959 payload: Vec::new(),
1960 },
1961 EmbeddedBinaryTag {
1962 key: "SYLT".into(),
1963 payload: vec![0u8; MAX_BINARY_TAG_BYTES + 1],
1964 },
1965 ],
1966 structural_blocks: Vec::new(),
1967 }
1968 }
1969
1970 #[test]
1971 fn ingest_filters_empty_and_oversize_binary_tags() {
1972 let dir = tempfile::tempdir().unwrap();
1973 let path = dir.path().join("a.mp3");
1974 std::fs::write(&path, b"x").unwrap();
1975 let meta = std::fs::metadata(&path).unwrap();
1976 let db = Db::open_in_memory().unwrap();
1977
1978 ingest(
1979 &db,
1980 &path.to_string_lossy(),
1981 &meta,
1982 probed_with_mixed_binary_tags(),
1983 )
1984 .unwrap();
1985
1986 let tid = db.list_tracks().unwrap()[0].id;
1987 let rows = db.get_binary_tags(tid).unwrap();
1988 assert_eq!(
1989 rows.len(),
1990 1,
1991 "only the valid binary tag survives: {rows:?}"
1992 );
1993 assert_eq!(rows[0].key, "PRIV");
1994 assert_eq!(rows[0].byte_len, 3);
1995 }
1996
1997 #[test]
1998 fn ingest_bulk_filters_empty_and_oversize_binary_tags() {
1999 let db = Db::open_in_memory().unwrap();
2000 {
2001 let mut bw = db.bulk_writer().unwrap();
2002 ingest_bulk(
2003 &mut bw,
2004 "/a.mp3",
2005 BackingStamp {
2006 size: 1,
2007 mtime_ns: 0,
2008 ctime_ns: 0,
2009 },
2010 probed_with_mixed_binary_tags(),
2011 )
2012 .unwrap();
2013 bw.commit().unwrap();
2014 }
2015 let tid = db.list_tracks().unwrap()[0].id;
2016 let rows = db.get_binary_tags(tid).unwrap();
2017 assert_eq!(
2018 rows.len(),
2019 1,
2020 "only the valid binary tag survives: {rows:?}"
2021 );
2022 assert_eq!(rows[0].key, "PRIV");
2023 assert_eq!(rows[0].byte_len, 3);
2024 }
2025
2026 #[test]
2027 fn accept_pictures_keeps_at_cap_and_drops_over_cap() {
2028 let mk = |len: usize| EmbeddedPicture {
2029 mime: "image/jpeg".to_string(),
2030 picture_type: musefs_format::PictureType::new(3).unwrap(),
2031 description: String::new(),
2032 width: 0,
2033 height: 0,
2034 data: vec![0u8; len],
2035 };
2036 let kept = accept_pictures("/x.flac", vec![mk(MAX_ART_BYTES), mk(MAX_ART_BYTES + 1)]);
2039 assert_eq!(kept.len(), 1, "exactly the at-cap picture survives");
2040 assert_eq!(kept[0].data.len(), MAX_ART_BYTES);
2041 }
2042
2043 #[test]
2044 fn accept_binary_tags_keeps_at_cap_and_drops_over_cap() {
2045 let mk = |len: usize| EmbeddedBinaryTag {
2046 key: "PRIV".to_string(),
2047 payload: vec![0u8; len],
2048 };
2049 let kept = accept_binary_tags(
2050 "/x.mp3",
2051 vec![mk(MAX_BINARY_TAG_BYTES), mk(MAX_BINARY_TAG_BYTES + 1)],
2052 );
2053 assert_eq!(kept.len(), 1, "exactly the at-cap binary tag survives");
2054 assert_eq!(kept[0].payload.len(), MAX_BINARY_TAG_BYTES);
2055 }
2056
2057 fn probed_with_text_tags(tags: &[(&str, &str)]) -> Probed {
2058 Probed {
2059 format: musefs_db::Format::Mp3,
2060 audio_offset: 0,
2061 audio_length: 0,
2062 tags: tags
2063 .iter()
2064 .map(|(k, v)| ((*k).to_string(), (*v).to_string()))
2065 .collect(),
2066 pictures: Vec::new(),
2067 binary_tags: Vec::new(),
2068 structural_blocks: Vec::new(),
2069 }
2070 }
2071
2072 #[test]
2073 fn ingest_skips_empty_and_control_char_keys() {
2074 let dir = tempfile::tempdir().unwrap();
2075 let path = dir.path().join("a.mp3");
2076 std::fs::write(&path, b"x").unwrap();
2077 let meta = std::fs::metadata(&path).unwrap();
2078 let db = Db::open_in_memory().unwrap();
2079
2080 ingest(
2081 &db,
2082 &path.to_string_lossy(),
2083 &meta,
2084 probed_with_text_tags(&[
2085 ("artist", "Alice"),
2086 ("", "dropped"), ("a\u{7}b", "dropped"), ("a\u{0}b", "dropped"), ("a=b", "kept"), ]),
2091 )
2092 .unwrap();
2093
2094 let tid = db.list_tracks().unwrap()[0].id;
2095 let keys: Vec<String> = db
2096 .get_tags(tid)
2097 .unwrap()
2098 .into_iter()
2099 .map(|t| t.key)
2100 .collect();
2101 assert_eq!(keys, vec!["a=b".to_string(), "artist".to_string()]);
2103 }
2104
2105 #[test]
2106 fn ingest_bulk_skips_empty_and_control_char_keys() {
2107 let db = Db::open_in_memory().unwrap();
2108 {
2109 let mut bw = db.bulk_writer().unwrap();
2110 ingest_bulk(
2111 &mut bw,
2112 "/a.mp3",
2113 BackingStamp {
2114 size: 1,
2115 mtime_ns: 0,
2116 ctime_ns: 0,
2117 },
2118 probed_with_text_tags(&[
2119 ("artist", "Alice"),
2120 ("", "dropped"),
2121 ("a\u{7}b", "dropped"),
2122 ("a\u{0}b", "dropped"), ("a=b", "kept"),
2124 ]),
2125 )
2126 .unwrap();
2127 bw.commit().unwrap();
2128 }
2129 let tid = db.list_tracks().unwrap()[0].id;
2130 let keys: Vec<String> = db
2131 .get_tags(tid)
2132 .unwrap()
2133 .into_iter()
2134 .map(|t| t.key)
2135 .collect();
2136 assert_eq!(keys, vec!["a=b".to_string(), "artist".to_string()]);
2137 }
2138
2139 fn probed_with_duplicate_structural_kind() -> Probed {
2145 Probed {
2146 format: musefs_db::Format::Flac,
2147 audio_offset: 0,
2148 audio_length: 0,
2149 tags: Vec::new(),
2150 pictures: Vec::new(),
2151 binary_tags: Vec::new(),
2152 structural_blocks: vec![
2153 ("SEEKTABLE".to_string(), vec![0xA1]),
2154 ("SEEKTABLE".to_string(), vec![0xB2]),
2155 ],
2156 }
2157 }
2158
2159 #[test]
2160 fn ingest_assigns_sequential_structural_ordinals_per_kind() {
2161 let dir = tempfile::tempdir().unwrap();
2162 let path = dir.path().join("a.flac");
2163 std::fs::write(&path, b"x").unwrap();
2164 let meta = std::fs::metadata(&path).unwrap();
2165 let db = Db::open_in_memory().unwrap();
2166
2167 ingest(
2168 &db,
2169 &path.to_string_lossy(),
2170 &meta,
2171 probed_with_duplicate_structural_kind(),
2172 )
2173 .unwrap();
2174
2175 let tid = db.list_tracks().unwrap()[0].id;
2176 let got = db.get_structural_blocks(tid).unwrap();
2177 assert_eq!(got.len(), 2);
2180 assert_eq!(got[0].ordinal, 0);
2181 assert_eq!(got[0].body, vec![0xA1]);
2182 assert_eq!(got[1].ordinal, 1);
2183 assert_eq!(got[1].body, vec![0xB2]);
2184 }
2185
2186 fn probed_with_duplicate_tag_key() -> Probed {
2194 Probed {
2195 format: musefs_db::Format::Flac,
2196 audio_offset: 0,
2197 audio_length: 0,
2198 tags: vec![
2199 ("ARTIST".to_string(), "A".to_string()),
2200 ("ARTIST".to_string(), "B".to_string()),
2201 ],
2202 pictures: Vec::new(),
2203 binary_tags: Vec::new(),
2204 structural_blocks: Vec::new(),
2205 }
2206 }
2207
2208 #[test]
2209 fn ingest_assigns_sequential_tag_ordinals_per_key() {
2210 let dir = tempfile::tempdir().unwrap();
2211 let path = dir.path().join("a.flac");
2212 std::fs::write(&path, b"x").unwrap();
2213 let meta = std::fs::metadata(&path).unwrap();
2214 let db = Db::open_in_memory().unwrap();
2215
2216 ingest(
2217 &db,
2218 &path.to_string_lossy(),
2219 &meta,
2220 probed_with_duplicate_tag_key(),
2221 )
2222 .unwrap();
2223
2224 let tid = db.list_tracks().unwrap()[0].id;
2225 let got = db.get_tags(tid).unwrap();
2226 assert_eq!(got.len(), 2);
2229 assert_eq!(got[0].ordinal, 0);
2230 assert_eq!(got[0].value, "A");
2231 assert_eq!(got[1].ordinal, 1);
2232 assert_eq!(got[1].value, "B");
2233 }
2234
2235 #[test]
2236 fn ingest_bulk_assigns_sequential_structural_ordinals_per_kind() {
2237 let db = Db::open_in_memory().unwrap();
2238 {
2239 let mut bw = db.bulk_writer().unwrap();
2240 ingest_bulk(
2241 &mut bw,
2242 "/a.flac",
2243 BackingStamp {
2244 size: 1,
2245 mtime_ns: 0,
2246 ctime_ns: 0,
2247 },
2248 probed_with_duplicate_structural_kind(),
2249 )
2250 .unwrap();
2251 bw.commit().unwrap();
2252 }
2253 let tid = db.list_tracks().unwrap()[0].id;
2254 let got = db.get_structural_blocks(tid).unwrap();
2255 assert_eq!(got.len(), 2);
2256 assert_eq!(got[0].ordinal, 0);
2257 assert_eq!(got[0].body, vec![0xA1]);
2258 assert_eq!(got[1].ordinal, 1);
2259 assert_eq!(got[1].body, vec![0xB2]);
2260 }
2261}
2262
2263#[cfg(test)]
2264mod bounded_probe_tests {
2265 use super::*;
2266 use musefs_db::Db;
2267
2268 fn flac_fixture() -> Vec<u8> {
2271 let mut bytes = b"fLaC".to_vec();
2272 bytes.push(0x80); bytes.extend_from_slice(&[0, 0, 34]); bytes.extend(std::iter::repeat_n(0u8, 34));
2275 bytes.extend_from_slice(b"AUDIOPAYLOAD");
2276 bytes
2277 }
2278
2279 #[test]
2280 fn scan_counts_unreadable_file_as_failed_and_continues() {
2281 let dir = tempfile::tempdir().unwrap();
2282 let good = dir.path().join("good.flac");
2284 let mut bytes = b"fLaC".to_vec();
2285 bytes.push(0x80);
2286 bytes.extend_from_slice(&[0, 0, 34]);
2287 bytes.extend(std::iter::repeat_n(0u8, 34));
2288 bytes.extend_from_slice(b"AUDIO");
2289 std::fs::write(&good, &bytes).unwrap();
2290 std::fs::write(dir.path().join("bad.flac"), b"").unwrap();
2291
2292 let db = Db::open_in_memory().unwrap();
2293 let stats = scan_directory(&db, dir.path()).unwrap();
2294 assert_eq!(stats.scanned, 1);
2295 assert_eq!(stats.skipped + stats.failed, 1);
2296 }
2297
2298 #[test]
2299 fn scan_directory_bounded_matches_full_for_flac() {
2300 let dir = tempfile::tempdir().unwrap();
2303 let path = dir.path().join("a.flac");
2304 let bytes = flac_fixture();
2305 std::fs::write(&path, &bytes).unwrap();
2306
2307 let full = probe_full(&path, &bytes).expect("full probe");
2308
2309 let db = Db::open_in_memory().unwrap();
2310 let stats = scan_directory(&db, dir.path()).unwrap();
2311 assert_eq!(stats.scanned, 1);
2312 let track = db
2313 .get_track_by_path(&std::fs::canonicalize(&path).unwrap().to_string_lossy())
2314 .unwrap()
2315 .unwrap();
2316 assert_eq!(track.bounds.audio_offset(), full.audio_offset);
2317 assert_eq!(track.bounds.audio_length(), full.audio_length);
2318 }
2319
2320 #[test]
2321 fn revalidate_skips_unchanged_and_reprobes_changed() {
2322 let dir = tempfile::tempdir().unwrap();
2323 let p = dir.path().join("x.flac");
2324 let mk = |audio: &[u8]| {
2325 let mut b = b"fLaC".to_vec();
2326 b.push(0x80);
2327 b.extend_from_slice(&[0, 0, 34]);
2328 b.extend(std::iter::repeat_n(0u8, 34));
2329 b.extend_from_slice(audio);
2330 b
2331 };
2332 std::fs::write(&p, mk(b"AUDIO")).unwrap();
2333 let db = Db::open_in_memory().unwrap();
2334 scan_directory(&db, dir.path()).unwrap();
2335
2336 let s1 = revalidate_with(&db, dir.path(), &ScanOptions::default()).unwrap();
2338 assert_eq!(s1.unchanged, 1);
2339 assert_eq!(s1.updated, 0);
2340
2341 std::fs::write(&p, mk(b"DIFFERENT-AUDIO")).unwrap();
2343 let s2 = revalidate_with(&db, dir.path(), &ScanOptions::default()).unwrap();
2344 assert_eq!(s2.updated, 1);
2345 assert_eq!(s2.unchanged, 0);
2346 let track = db
2348 .get_track_by_path(&std::fs::canonicalize(&p).unwrap().to_string_lossy())
2349 .unwrap()
2350 .unwrap();
2351 assert_eq!(
2352 usize_from(track.bounds.audio_length()),
2353 b"DIFFERENT-AUDIO".len()
2354 );
2355 }
2356
2357 #[test]
2358 fn revalidate_accepts_a_single_file_target() {
2359 let dir = tempfile::tempdir().unwrap();
2362 let p = dir.path().join("x.flac");
2363 let mut bytes = b"fLaC".to_vec();
2364 bytes.push(0x80);
2365 bytes.extend_from_slice(&[0, 0, 34]);
2366 bytes.extend(std::iter::repeat_n(0u8, 34));
2367 bytes.extend_from_slice(b"AUDIO");
2368 std::fs::write(&p, &bytes).unwrap();
2369 let db = Db::open_in_memory().unwrap();
2370 scan_directory(&db, dir.path()).unwrap();
2371
2372 let stats = revalidate_with(&db, &p, &ScanOptions::default()).unwrap();
2375 assert_eq!(stats.unchanged, 1);
2376 assert_eq!(stats.pruned, 0);
2377 assert_eq!(db.list_tracks().unwrap().len(), 1);
2378 }
2379
2380 #[test]
2381 fn jobs1_and_jobs_n_produce_equivalent_state() {
2382 let dir = tempfile::tempdir().unwrap();
2383 for i in 0..12 {
2385 let mut bytes = b"fLaC".to_vec();
2386 bytes.push(0x80);
2387 bytes.extend_from_slice(&[0, 0, 34]);
2388 bytes.extend(std::iter::repeat_n(0u8, 34));
2389 bytes.extend_from_slice(format!("AUDIO-{i}").as_bytes());
2390 std::fs::write(dir.path().join(format!("t{i}.flac")), &bytes).unwrap();
2391 }
2392 let norm = |jobs: usize| {
2393 let db = Db::open_in_memory().unwrap();
2394 scan_directory_with(
2395 &db,
2396 dir.path(),
2397 &ScanOptions {
2398 jobs,
2399 ..Default::default()
2400 },
2401 )
2402 .unwrap();
2403 let mut rows: Vec<(String, u64, u64)> = db
2404 .list_tracks()
2405 .unwrap()
2406 .into_iter()
2407 .map(|t| {
2408 (
2409 t.backing_path,
2410 t.bounds.audio_offset(),
2411 t.bounds.audio_length(),
2412 )
2413 })
2414 .collect();
2415 rows.sort();
2416 rows
2417 };
2418 assert_eq!(norm(1), norm(4));
2419 assert_eq!(norm(1).len(), 12);
2420 }
2421
2422 #[test]
2423 fn oversize_unparseable_file_is_skipped_not_read_whole() {
2424 use std::io::Write;
2428 let dir = tempfile::tempdir().unwrap();
2429 let path = dir.path().join("huge.flac");
2430 let mut f = std::fs::File::create(&path).unwrap();
2431 f.write_all(b"fLaC").unwrap();
2434 f.write_all(&[0x04, 0xFF, 0xFF, 0xFF]).unwrap();
2435 let len = MAX_PROBE_BYTES + 4096;
2436 f.set_len(len).unwrap();
2437 drop(f);
2438
2439 assert!(matches!(
2440 probe_file(&path, WINDOW).unwrap(),
2441 ProbeOutcome::Unparseable
2442 ));
2443 }
2444
2445 #[test]
2446 fn oversize_wav_is_served_via_data_header() {
2447 use std::io::Write;
2453 let dir = tempfile::tempdir().unwrap();
2454 let path = dir.path().join("long.wav");
2455
2456 let data_len: u64 = MAX_PROBE_BYTES + (16 << 20); let mut fmt = Vec::new();
2458 fmt.extend_from_slice(&1u16.to_le_bytes());
2459 fmt.extend_from_slice(&1u16.to_le_bytes());
2460 fmt.extend_from_slice(&44_100u32.to_le_bytes());
2461 fmt.extend_from_slice(&88_200u32.to_le_bytes());
2462 fmt.extend_from_slice(&2u16.to_le_bytes());
2463 fmt.extend_from_slice(&16u16.to_le_bytes());
2464
2465 let mut front = b"RIFF".to_vec();
2466 let riff_size = 36u32 + u32::try_from(data_len).unwrap();
2468 front.extend_from_slice(&riff_size.to_le_bytes());
2469 front.extend_from_slice(b"WAVE");
2470 front.extend_from_slice(b"fmt ");
2471 front.extend_from_slice(&u32::try_from(fmt.len()).unwrap().to_le_bytes());
2472 front.extend_from_slice(&fmt);
2473 front.extend_from_slice(b"data");
2474 front.extend_from_slice(&u32::try_from(data_len).unwrap().to_le_bytes());
2475 let audio_offset = front.len() as u64;
2476 let file_len = audio_offset + data_len;
2477
2478 let mut f = std::fs::File::create(&path).unwrap();
2479 f.write_all(&front).unwrap();
2480 f.set_len(file_len).unwrap();
2481 drop(f);
2482
2483 let probed = match probe_file(&path, WINDOW).unwrap() {
2484 ProbeOutcome::Probed(p, _) => p,
2485 other => panic!("expected Probed, got {other:?}"),
2486 };
2487 assert_eq!(probed.format, Format::Wav);
2488 assert_eq!(probed.audio_offset, audio_offset);
2489 assert_eq!(probed.audio_length, data_len);
2490 }
2491
2492 #[test]
2493 fn probe_file_reports_raced_on_mid_probe_mutation() {
2494 use std::io::Write;
2495 let dir = tempfile::tempdir().unwrap();
2496 let path = dir.path().join("a.wav");
2497
2498 let mut fmt = Vec::new();
2500 for v in [1u16, 1, 0, 0, 0, 16] {
2501 fmt.extend_from_slice(&v.to_le_bytes());
2502 }
2503 let mut front = b"RIFF".to_vec();
2504 let riff_size = 4 + 8 + u32::try_from(fmt.len()).unwrap() + 8 + 64;
2506 front.extend_from_slice(&riff_size.to_le_bytes());
2507 front.extend_from_slice(b"WAVE");
2508 front.extend_from_slice(b"fmt ");
2509 front.extend_from_slice(&u32::try_from(fmt.len()).unwrap().to_le_bytes());
2510 front.extend_from_slice(&fmt);
2511 front.extend_from_slice(b"data");
2512 front.extend_from_slice(&64u32.to_le_bytes());
2513 let mut f = std::fs::File::create(&path).unwrap();
2514 f.write_all(&front).unwrap();
2515 f.set_len(front.len() as u64 + 64).unwrap();
2516 drop(f);
2517
2518 let pc = path.clone();
2519 set_after_s1_hook(move || {
2520 let mut g = std::fs::OpenOptions::new().append(true).open(&pc).unwrap();
2521 g.write_all(&[0u8; 4096]).unwrap(); });
2523 let out = probe_file(&path, WINDOW);
2524 clear_after_s1_hook();
2525 assert!(matches!(out, Ok(ProbeOutcome::Raced)), "got {out:?}");
2526 }
2527}