1use std::collections::{BTreeMap, HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use musefs_db::convert::usize_from;
5use musefs_db::{Db, Format, NewArt, NewTrack, Tag, TrackArt};
6use musefs_format::{EmbeddedBinaryTag, EmbeddedPicture, Extent, flac, mp3, mp4, ogg, wav};
7
8use crate::byte_budget::ByteBudget;
9use crate::error::Result;
10use crate::freshness::BackingStamp;
11use std::fmt;
12use std::sync::Arc;
13use std::sync::mpsc::sync_channel;
14
15const BATCH_FILES: usize = 256;
16const BATCH_BYTES: u64 = 64 << 20; const WINDOW: usize = 1 << 16; const MAX_WIDEN_RETRIES: usize = 8;
23pub(crate) const MAX_PROBE_BYTES: u64 = 64 << 20; pub(crate) const MAX_ART_BYTES: usize = 16 * 1024 * 1024 - 64 * 1024;
35
36const MAX_BINARY_TAG_BYTES: usize = MAX_ART_BYTES;
39
40#[derive(Debug)]
45enum ProbeOutcome {
46 Probed(Probed, BackingStamp),
47 Unparseable,
48 Raced,
49}
50
51#[cfg(test)]
52thread_local! {
53 static AFTER_S1_HOOK: std::cell::RefCell<Option<Box<dyn FnMut()>>> =
54 const { std::cell::RefCell::new(None) };
55}
56#[cfg(test)]
57fn fire_after_s1() {
58 AFTER_S1_HOOK.with(|h| {
59 if let Some(f) = h.borrow_mut().as_mut() {
60 f();
61 }
62 });
63}
64#[cfg(test)]
65fn set_after_s1_hook(f: impl FnMut() + 'static) {
66 AFTER_S1_HOOK.with(|h| *h.borrow_mut() = Some(Box::new(f)));
67}
68#[cfg(test)]
69fn clear_after_s1_hook() {
70 AFTER_S1_HOOK.with(|h| *h.borrow_mut() = None);
71}
72
73#[derive(Debug, Clone, Copy)]
78pub enum ScanProgress<'a> {
79 Discovered { found: u64 },
82 Walked { total: u64 },
85 Ingested {
87 done: u64,
88 total: u64,
89 path: &'a str,
90 },
91}
92
93#[derive(Clone)]
98pub struct ProgressSink(Arc<dyn for<'a> Fn(ScanProgress<'a>) + Send + Sync>);
99
100impl ProgressSink {
101 pub fn new(f: impl for<'a> Fn(ScanProgress<'a>) + Send + Sync + 'static) -> Self {
102 ProgressSink(Arc::new(f))
103 }
104
105 fn emit(&self, ev: ScanProgress<'_>) {
106 (self.0)(ev);
107 }
108}
109
110impl fmt::Debug for ProgressSink {
111 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112 f.write_str("ProgressSink")
113 }
114}
115
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub struct ScanStats {
118 pub scanned: u64,
119 pub skipped: u64,
120 pub already_present: u64,
122 pub failed: u64,
123 pub raced: u64,
124}
125
126#[derive(Debug, Default)]
133struct SkipTally {
134 total: u64,
135 by_ext: BTreeMap<String, u64>,
136}
137
138impl SkipTally {
139 fn record(&mut self, path: &Path) {
142 self.total += 1;
143 let ext = path
144 .extension()
145 .and_then(|e| e.to_str())
146 .map_or_else(|| "<none>".to_string(), str::to_ascii_lowercase);
147 *self.by_ext.entry(ext).or_insert(0) += 1;
148 }
149
150 fn summary(&self) -> Option<String> {
154 if self.total == 0 {
155 return None;
156 }
157 let mut buckets: Vec<(&String, &u64)> = self.by_ext.iter().collect();
158 buckets.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
159 let breakdown = buckets
160 .iter()
161 .map(|(ext, n)| format!("{ext}={n}"))
162 .collect::<Vec<_>>()
163 .join(", ");
164 Some(format!("skipped {}: {breakdown}", self.total))
165 }
166}
167
168#[derive(Debug, Clone, PartialEq, Eq)]
169pub struct RevalidateStats {
170 pub updated: u64,
171 pub unchanged: u64,
172 pub pruned: u64,
173 pub failed: u64,
174 pub raced: u64,
175}
176
177fn has_ext(path: &Path, ext: &str) -> bool {
178 path.extension()
179 .and_then(|e| e.to_str())
180 .is_some_and(|e| e.eq_ignore_ascii_case(ext))
181}
182
183fn is_supported_audio(path: &Path) -> bool {
185 has_ext(path, "flac")
186 || has_ext(path, "mp3")
187 || has_ext(path, "m4a")
188 || has_ext(path, "m4b")
189 || has_ext(path, "ogg")
190 || has_ext(path, "oga")
191 || has_ext(path, "opus")
192 || has_ext(path, "wav")
193}
194
195fn collect_audio(
196 root: &Path,
197 out: &mut Vec<PathBuf>,
198 follow_symlinks: bool,
199) -> std::io::Result<SkipTally> {
200 collect_audio_with(root, out, follow_symlinks, None)
201}
202
203fn collect_audio_with(
204 root: &Path,
205 out: &mut Vec<PathBuf>,
206 follow_symlinks: bool,
207 progress: Option<&ProgressSink>,
208) -> std::io::Result<SkipTally> {
209 let mut visited = HashSet::new();
210 let mut files_visited = HashSet::new();
211 let mut tally = SkipTally::default();
212 if follow_symlinks && let Ok(meta) = std::fs::metadata(root) {
213 visited.insert(dir_key(&meta));
214 }
215 collect_audio_inner(
216 root,
217 out,
218 follow_symlinks,
219 &mut visited,
220 &mut files_visited,
221 &mut tally,
222 progress,
223 )?;
224 Ok(tally)
225}
226
227fn collect_audio_inner(
228 root: &Path,
229 out: &mut Vec<PathBuf>,
230 follow_symlinks: bool,
231 visited: &mut HashSet<(u64, u64)>,
232 files_visited: &mut HashSet<(u64, u64)>,
233 tally: &mut SkipTally,
234 progress: Option<&ProgressSink>,
235) -> std::io::Result<()> {
236 let entries = match std::fs::read_dir(root) {
242 Ok(entries) => entries,
243 Err(e) => {
244 log::warn!("skipping directory {}: {e}", root.display());
245 return Ok(());
246 }
247 };
248 for entry in entries {
249 let entry = match entry {
250 Ok(entry) => entry,
251 Err(e) => {
252 log::warn!("skipping unreadable entry in {}: {e}", root.display());
253 continue;
254 }
255 };
256 let path = entry.path();
257 let ftype = match entry.file_type() {
258 Ok(ftype) => ftype,
259 Err(e) => {
260 log::warn!("skipping {}: {e}", path.display());
261 continue;
262 }
263 };
264 if ftype.is_dir() {
265 descend(
266 &path,
267 out,
268 follow_symlinks,
269 visited,
270 files_visited,
271 tally,
272 progress,
273 )?;
274 } else if ftype.is_file() {
275 if is_supported_audio(&path) {
276 push_file(&path, out, follow_symlinks, files_visited, None, progress);
277 } else {
278 tally.record(&path);
279 }
280 } else if ftype.is_symlink() {
281 if !follow_symlinks {
282 log::debug!(
287 "skipping symlink {} (pass --follow-symlinks to scan it)",
288 path.display()
289 );
290 continue;
291 }
292 match std::fs::metadata(&path) {
293 Ok(meta) if meta.is_dir() => {
294 descend(
295 &path,
296 out,
297 follow_symlinks,
298 visited,
299 files_visited,
300 tally,
301 progress,
302 )?;
303 }
304 Ok(meta) if meta.is_file() => {
305 if is_supported_audio(&path) {
306 push_file(
307 &path,
308 out,
309 follow_symlinks,
310 files_visited,
311 Some(&meta),
312 progress,
313 );
314 } else {
315 tally.record(&path);
316 }
317 }
318 Ok(_) => {}
319 Err(e) => {
320 log::warn!("skipping broken symlink {}: {e}", path.display());
321 }
322 }
323 } else {
324 tally.record(&path);
330 }
331 }
332 Ok(())
333}
334
335fn descend(
336 path: &Path,
337 out: &mut Vec<PathBuf>,
338 follow_symlinks: bool,
339 visited: &mut HashSet<(u64, u64)>,
340 files_visited: &mut HashSet<(u64, u64)>,
341 tally: &mut SkipTally,
342 progress: Option<&ProgressSink>,
343) -> std::io::Result<()> {
344 if !follow_symlinks {
345 return collect_audio_inner(
346 path,
347 out,
348 follow_symlinks,
349 visited,
350 files_visited,
351 tally,
352 progress,
353 );
354 }
355 let meta = match std::fs::metadata(path) {
356 Ok(m) => m,
357 Err(e) => {
358 log::warn!("skipping directory {}: {e}", path.display());
359 return Ok(());
360 }
361 };
362 if !visited.insert(dir_key(&meta)) {
363 log::warn!("skipping symlink cycle at {}", path.display());
364 return Ok(());
365 }
366 collect_audio_inner(
367 path,
368 out,
369 follow_symlinks,
370 visited,
371 files_visited,
372 tally,
373 progress,
374 )
375}
376
377fn dir_key(meta: &std::fs::Metadata) -> (u64, u64) {
378 use std::os::unix::fs::MetadataExt;
379 (meta.dev(), meta.ino())
380}
381
382fn push_file(
390 path: &Path,
391 out: &mut Vec<PathBuf>,
392 follow_symlinks: bool,
393 files_visited: &mut HashSet<(u64, u64)>,
394 known_meta: Option<&std::fs::Metadata>,
395 progress: Option<&ProgressSink>,
396) {
397 if !follow_symlinks {
398 out.push(path.to_path_buf());
399 if let Some(p) = progress {
400 p.emit(ScanProgress::Discovered {
401 found: out.len() as u64,
402 });
403 }
404 return;
405 }
406 let key = match known_meta {
407 Some(m) => Some(dir_key(m)),
408 None => std::fs::metadata(path).ok().map(|m| dir_key(&m)),
409 };
410 match key {
411 Some(k) if !files_visited.insert(k) => {
412 log::debug!("skipping duplicate backing target {}", path.display());
413 }
414 _ => {
415 out.push(path.to_path_buf());
416 if let Some(p) = progress {
417 p.emit(ScanProgress::Discovered {
418 found: out.len() as u64,
419 });
420 }
421 }
422 }
423}
424
425#[derive(Debug)]
428pub(crate) struct Probed {
429 format: Format,
430 audio_offset: u64,
431 audio_length: u64,
432 tags: Vec<(String, String)>,
433 pictures: Vec<EmbeddedPicture>,
434 binary_tags: Vec<EmbeddedBinaryTag>,
435 structural_blocks: Vec<(String, Vec<u8>)>,
437}
438
439fn wav_probed(prefix: &[u8], bounds: &wav::WavBounds) -> Probed {
442 let (binary_tags, promoted) = wav::read_binary_tags(prefix);
443 let mut tags = wav::read_tags(prefix);
444 tags.extend(promoted);
445 Probed {
446 format: Format::Wav,
447 audio_offset: bounds.audio_offset,
448 audio_length: bounds.audio_length,
449 tags,
450 pictures: wav::read_pictures(prefix),
451 binary_tags,
452 structural_blocks: Vec::new(),
453 }
454}
455
456pub(crate) fn probe_full(path: &Path, bytes: &[u8]) -> Option<Probed> {
459 if has_ext(path, "flac") {
460 let scan = flac::locate_audio(bytes).ok()?;
461 let (structural_blocks, binary_tags) = flac::split_preserved(&scan.preserved);
462 Some(Probed {
463 format: Format::Flac,
464 audio_offset: scan.audio_offset,
465 audio_length: scan.audio_length,
466 tags: flac::read_vorbis_comments(bytes).unwrap_or_default(),
467 pictures: flac::read_pictures(bytes).unwrap_or_default(),
468 binary_tags,
469 structural_blocks,
470 })
471 } else if has_ext(path, "mp3") {
472 let bounds = mp3::locate_audio(bytes).ok()?;
473 let (binary_tags, promoted) = mp3::read_binary_tags(bytes);
474 let mut tags = mp3::read_tags(bytes);
475 tags.extend(promoted);
476 Some(Probed {
477 format: Format::Mp3,
478 audio_offset: bounds.audio_offset,
479 audio_length: bounds.audio_length,
480 tags,
481 pictures: mp3::read_pictures(bytes),
482 binary_tags,
483 structural_blocks: Vec::new(),
484 })
485 } else if has_ext(path, "m4a") || has_ext(path, "m4b") {
486 let bounds = mp4::locate_audio(bytes).ok()?;
487 let (pictures, art_drops) = mp4::read_pictures_reporting(bytes, MAX_ART_BYTES);
488 let (binary_tags, bin_drops) = mp4::read_binary_tags_reporting(bytes, MAX_BINARY_TAG_BYTES);
489 log_mp4_oversize_drops(path, &art_drops, &bin_drops);
490 Some(Probed {
491 format: Format::M4a,
492 audio_offset: bounds.audio_offset,
493 audio_length: bounds.audio_length,
494 tags: mp4::read_tags(bytes),
495 pictures,
496 binary_tags,
497 structural_blocks: Vec::new(),
498 })
499 } else if has_ext(path, "ogg") || has_ext(path, "oga") || has_ext(path, "opus") {
500 let scan = ogg::locate_audio(bytes).ok()?;
501 let format = match scan.codec {
502 ogg::Codec::Opus => Format::Opus,
503 ogg::Codec::Vorbis => Format::Vorbis,
504 ogg::Codec::OggFlac => Format::OggFlac,
505 };
506 Some(Probed {
507 format,
508 audio_offset: scan.audio_offset,
509 audio_length: scan.audio_length,
510 tags: ogg::read_tags(bytes).unwrap_or_default(),
511 pictures: ogg::read_pictures(bytes).unwrap_or_default(),
512 binary_tags: Vec::new(),
513 structural_blocks: Vec::new(),
514 })
515 } else if has_ext(path, "wav") {
516 let bounds = wav::locate_audio(bytes).ok()?;
517 Some(wav_probed(bytes, &bounds))
518 } else {
519 None
520 }
521}
522
523fn read_window(file: &std::fs::File, len: usize) -> std::io::Result<Vec<u8>> {
526 use std::os::unix::fs::FileExt;
527 let mut buf = vec![0u8; len];
528 let n = file.read_at(&mut buf, 0)?;
529 buf.truncate(n);
530 crate::metrics::on_scan_read(n as u64);
531 Ok(buf)
532}
533
534fn read_tail_128(file: &std::fs::File, file_len: u64) -> std::io::Result<Option<[u8; 128]>> {
537 if file_len < 128 {
538 return Ok(None);
539 }
540 use std::os::unix::fs::FileExt;
541 let mut buf = [0u8; 128];
542 file.read_exact_at(&mut buf, file_len - 128)?;
543 crate::metrics::on_scan_read(128);
544 Ok(Some(buf))
545}
546
547fn probe_file(path: &Path, window: usize) -> std::io::Result<ProbeOutcome> {
557 let file = std::fs::File::open(path)?;
558 crate::metrics::on_scan_open();
559 let s1 = BackingStamp::from_metadata(&file.metadata()?);
560 #[cfg(test)]
561 fire_after_s1();
562
563 let probed = probe_body(path, &file, s1.size, window)?;
564
565 let s2 = BackingStamp::from_metadata(&file.metadata()?);
566 if s1 != s2 {
567 log::warn!("skipping {}: changed during probe", path.display());
568 return Ok(ProbeOutcome::Raced);
569 }
570 Ok(match probed {
571 Some(p) => ProbeOutcome::Probed(p, s1),
572 None => ProbeOutcome::Unparseable,
573 })
574}
575
576fn probe_file_caught(path: &Path, window: usize) -> std::io::Result<ProbeOutcome> {
585 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| probe_file(path, window))) {
586 Ok(res) => res,
587 Err(payload) => {
588 let msg = payload
589 .downcast_ref::<&str>()
590 .copied()
591 .or_else(|| payload.downcast_ref::<String>().map(String::as_str))
592 .unwrap_or("<non-string panic>");
593 log::error!(
594 "scan worker panicked probing {}: {msg}; counting as failed",
595 path.display()
596 );
597 Ok(ProbeOutcome::Unparseable)
598 }
599 }
600}
601
602fn probe_body(
608 path: &Path,
609 file: &std::fs::File,
610 file_len: u64,
611 window: usize,
612) -> std::io::Result<Option<Probed>> {
613 if has_ext(path, "m4a") || has_ext(path, "m4b") {
615 let mut f = file;
616 let scan = match mp4::read_structure_from(&mut f, file_len) {
617 Ok(s) => s,
618 Err(e) => {
619 log::warn!("skipping {}: {e}", path.display());
620 return Ok(None);
621 }
622 };
623 let (pictures, art_drops) = mp4::read_pictures_reporting(&scan.moov, MAX_ART_BYTES);
624 let (binary_tags, bin_drops) =
625 mp4::read_binary_tags_reporting(&scan.moov, MAX_BINARY_TAG_BYTES);
626 log_mp4_oversize_drops(path, &art_drops, &bin_drops);
627 return Ok(Some(Probed {
628 format: Format::M4a,
629 audio_offset: scan.mdat_payload_offset,
630 audio_length: scan.mdat_payload_len,
631 tags: mp4::read_tags(&scan.moov),
632 pictures,
633 binary_tags,
634 structural_blocks: Vec::new(),
635 }));
636 }
637
638 let tail = if has_ext(path, "mp3") {
642 read_tail_128(file, file_len)?
643 } else {
644 None
645 };
646 let probe_cap = file_len.min(MAX_PROBE_BYTES);
649 let mut want = usize_from((window as u64).min(probe_cap));
650 let mut prefix = read_window(file, want)?;
651 for _ in 0..MAX_WIDEN_RETRIES {
652 match probe_prefix(path, &prefix, file_len, tail.as_ref()) {
653 Probe::Done(p) => return Ok(Some(p)),
654 Probe::Skip => {
655 log::warn!("skipping {}: no parseable audio metadata", path.display());
656 return Ok(None);
657 }
658 Probe::NeedMore(up_to) => {
659 if want as u64 >= probe_cap {
661 break;
662 }
663 want = usize_from(up_to.min(probe_cap))
666 .max(want + 1)
667 .min(usize_from(probe_cap));
668 prefix = read_window(file, want)?;
669 }
670 }
671 }
672 if (prefix.len() as u64) < probe_cap {
674 prefix = read_window(file, usize_from(probe_cap))?;
675 }
676 if let Some(p) = probe_full(path, &prefix) {
677 return Ok(Some(p));
678 }
679 if has_ext(path, "wav")
684 && file_len > MAX_PROBE_BYTES
685 && let Ok(bounds) = wav::locate_audio_at_ceiling(&prefix, file_len)
686 {
687 return Ok(Some(wav_probed(&prefix, &bounds)));
688 }
689 if file_len > MAX_PROBE_BYTES {
690 log::warn!(
691 "skipping {}: no parseable metadata within first {MAX_PROBE_BYTES} bytes",
692 path.display()
693 );
694 } else {
695 log::warn!("skipping {}: no parseable audio metadata", path.display());
696 }
697 Ok(None)
698}
699
700enum Probe {
702 Done(Probed),
703 NeedMore(u64),
704 Skip,
705}
706
707fn probe_prefix(path: &Path, prefix: &[u8], file_len: u64, tail: Option<&[u8; 128]>) -> Probe {
709 if has_ext(path, "flac") {
710 match flac::read_metadata_bounded(prefix) {
711 Ok(Extent::Complete(meta)) => {
712 let (structural_blocks, binary_tags) = flac::split_preserved(&meta.preserved);
713 Probe::Done(Probed {
714 format: Format::Flac,
715 audio_offset: meta.audio_offset,
716 audio_length: file_len - meta.audio_offset,
717 tags: flac::read_vorbis_comments(prefix).unwrap_or_default(),
718 pictures: flac::read_pictures(prefix).unwrap_or_default(),
719 binary_tags,
720 structural_blocks,
721 })
722 }
723 Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
724 Err(_) => Probe::Skip,
725 }
726 } else if has_ext(path, "mp3") {
727 match mp3::locate_audio_bounded(prefix, file_len, tail) {
728 Ok(Extent::Complete(b)) => {
729 let (binary_tags, promoted) = mp3::read_binary_tags(prefix);
730 let mut tags = mp3::read_tags(prefix);
731 tags.extend(promoted);
732 Probe::Done(Probed {
733 format: Format::Mp3,
734 audio_offset: b.audio_offset,
735 audio_length: b.audio_length,
736 tags,
737 pictures: mp3::read_pictures(prefix),
738 binary_tags,
739 structural_blocks: Vec::new(),
740 })
741 }
742 Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
743 Err(_) => Probe::Skip,
744 }
745 } else if has_ext(path, "ogg") || has_ext(path, "oga") || has_ext(path, "opus") {
746 match ogg::read_metadata_bounded(prefix, file_len) {
747 Ok(Extent::Complete(header)) => {
748 let format = match header.codec {
749 ogg::Codec::Opus => Format::Opus,
750 ogg::Codec::Vorbis => Format::Vorbis,
751 ogg::Codec::OggFlac => Format::OggFlac,
752 };
753 Probe::Done(Probed {
754 format,
755 audio_offset: header.audio_offset,
756 audio_length: file_len - header.audio_offset,
757 tags: ogg::read_tags(prefix).unwrap_or_default(),
758 pictures: ogg::read_pictures(prefix).unwrap_or_default(),
759 binary_tags: Vec::new(),
760 structural_blocks: Vec::new(),
761 })
762 }
763 Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
764 Err(_) => Probe::Skip,
765 }
766 } else if has_ext(path, "wav") {
767 match wav::locate_audio_bounded(prefix, file_len) {
768 Ok(Extent::Complete(b)) => Probe::Done(wav_probed(prefix, &b)),
769 Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
770 Err(_) => Probe::Skip,
771 }
772 } else {
773 Probe::Skip
774 }
775}
776
777#[derive(Debug, Clone, Copy, PartialEq, Eq)]
779pub enum ChecksumTier {
780 None,
782 Fingerprint,
784 Full,
786}
787
788#[derive(Debug, Clone, Copy, PartialEq, Eq)]
790pub enum MatchStrictness {
791 Auto,
794 Fast,
796 Strict,
799}
800
801#[derive(Debug, Clone, Copy, PartialEq, Eq)]
804enum WritePolicy {
805 Full,
808 StructuralOnly,
810}
811
812#[derive(Debug, Clone)]
814pub struct ScanOptions {
815 pub jobs: usize,
816 pub window: usize,
818 pub batch_bytes: u64,
820 pub follow_symlinks: bool,
823 pub progress: Option<ProgressSink>,
825 pub checksum: ChecksumTier,
827 pub strictness: MatchStrictness,
829 pub force: bool,
832 pub prune: bool,
835}
836
837impl Default for ScanOptions {
838 fn default() -> Self {
839 Self {
840 jobs: 0,
841 window: WINDOW,
842 batch_bytes: BATCH_BYTES,
843 follow_symlinks: false,
844 progress: None,
845 checksum: ChecksumTier::Fingerprint,
846 strictness: MatchStrictness::Auto,
847 force: false,
848 prune: false,
849 }
850 }
851}
852
853fn effective_jobs(jobs: usize) -> usize {
854 if jobs != 0 {
855 return jobs;
856 }
857 std::thread::available_parallelism().map_or(1, std::num::NonZero::get)
858}
859
860struct Unit {
862 abs_path: String,
863 stamp: BackingStamp,
864 probed: Probed,
865 weight: u64,
866 fingerprint: Option<String>,
867 content_hash: Option<String>,
868}
869
870fn payload_weight(p: &Probed) -> u64 {
875 let pictures: u64 = p.pictures.iter().map(|pic| pic.data.len() as u64).sum();
876 let binary: u64 = p.binary_tags.iter().map(|t| t.payload.len() as u64).sum();
877 let structural: u64 = p
878 .structural_blocks
879 .iter()
880 .map(|(_, body)| body.len() as u64)
881 .sum();
882 pictures + binary + structural
883}
884
885fn key_passes_floor(key: &str) -> bool {
893 !key.is_empty() && key.bytes().all(|b| b >= 0x20)
894}
895
896fn accept_pictures(abs_path: &str, pictures: Vec<EmbeddedPicture>) -> Vec<EmbeddedPicture> {
902 pictures
903 .into_iter()
904 .filter(|p| {
905 if p.data.len() > MAX_ART_BYTES {
906 log::warn!(
907 "{abs_path}: dropping embedded {} art ({} bytes), over the {MAX_ART_BYTES}-byte cap",
908 p.mime,
909 p.data.len(),
910 );
911 return false;
912 }
913 true
914 })
915 .collect()
916}
917
918fn accept_binary_tags(abs_path: &str, tags: Vec<EmbeddedBinaryTag>) -> Vec<musefs_db::BinaryTag> {
922 tags.into_iter()
923 .filter(|b| {
924 if b.payload.len() > MAX_BINARY_TAG_BYTES {
925 log::warn!(
926 "{abs_path}: dropping binary tag {} ({} bytes), over the {MAX_BINARY_TAG_BYTES}-byte cap",
927 b.key,
928 b.payload.len(),
929 );
930 return false;
931 }
932 !b.payload.is_empty()
933 })
934 .enumerate()
935 .map(|(ordinal, b)| musefs_db::BinaryTag {
936 key: b.key,
937 payload: b.payload,
938 ordinal: ordinal as u64,
939 })
940 .collect()
941}
942
943fn log_mp4_oversize_drops(path: &Path, art: &[mp4::OversizeDrop], binary: &[mp4::OversizeDrop]) {
950 for d in art {
951 log::warn!(
952 "{}: dropping embedded {} art ({} bytes), over the {MAX_ART_BYTES}-byte cap",
953 path.display(),
954 d.descriptor,
955 d.bytes,
956 );
957 }
958 for d in binary {
959 log::warn!(
960 "{}: dropping binary tag {} ({} bytes), over the {MAX_BINARY_TAG_BYTES}-byte cap",
961 path.display(),
962 d.descriptor,
963 d.bytes,
964 );
965 }
966}
967
968fn structural_blocks_from(blocks: Vec<(String, Vec<u8>)>) -> Vec<musefs_db::StructuralBlock> {
969 let mut ordinals: HashMap<String, u64> = HashMap::new();
970 blocks
971 .into_iter()
972 .map(|(kind, body)| {
973 let ord = ordinals.entry(kind.clone()).or_insert(0);
974 let block = musefs_db::StructuralBlock {
975 kind,
976 ordinal: *ord,
977 body,
978 };
979 *ord += 1;
980 block
981 })
982 .collect()
983}
984
985trait TrackSink {
991 fn upsert_track(&mut self, t: &NewTrack) -> musefs_db::Result<i64>;
992 fn replace_tags(&mut self, track_id: i64, tags: &[Tag]) -> musefs_db::Result<()>;
993 fn set_binary_tags(
994 &mut self,
995 track_id: i64,
996 tags: &[musefs_db::BinaryTag],
997 ) -> musefs_db::Result<()>;
998 fn set_structural_blocks(
999 &mut self,
1000 track_id: i64,
1001 blocks: &[musefs_db::StructuralBlock],
1002 ) -> musefs_db::Result<()>;
1003 fn upsert_art(&mut self, a: &NewArt) -> musefs_db::Result<i64>;
1004 fn set_track_art(&mut self, track_id: i64, items: &[TrackArt]) -> musefs_db::Result<()>;
1005 fn set_track_checksums(
1006 &mut self,
1007 track_id: i64,
1008 fingerprint: Option<&str>,
1009 content_hash: Option<&str>,
1010 ) -> musefs_db::Result<()>;
1011 fn track_exists_at(&mut self, path: &str) -> musefs_db::Result<bool>;
1012 fn tracks_by_fingerprint(&mut self, fp: &str) -> musefs_db::Result<Vec<musefs_db::Track>>;
1013 #[allow(clippy::too_many_arguments)]
1014 fn retarget_track(
1015 &mut self,
1016 id: i64,
1017 new_backing_path: &str,
1018 stamp: BackingStamp,
1019 audio_offset: u64,
1020 audio_length: u64,
1021 fingerprint: Option<&str>,
1022 content_hash: Option<&str>,
1023 ) -> musefs_db::Result<()>;
1024}
1025
1026impl TrackSink for &Db {
1027 fn upsert_track(&mut self, t: &NewTrack) -> musefs_db::Result<i64> {
1028 Db::upsert_track(self, t)
1029 }
1030 fn replace_tags(&mut self, track_id: i64, tags: &[Tag]) -> musefs_db::Result<()> {
1031 Db::replace_tags(self, track_id, tags)
1032 }
1033 fn set_binary_tags(
1034 &mut self,
1035 track_id: i64,
1036 tags: &[musefs_db::BinaryTag],
1037 ) -> musefs_db::Result<()> {
1038 Db::set_binary_tags(self, track_id, tags)
1039 }
1040 fn set_structural_blocks(
1041 &mut self,
1042 track_id: i64,
1043 blocks: &[musefs_db::StructuralBlock],
1044 ) -> musefs_db::Result<()> {
1045 Db::set_structural_blocks(self, track_id, blocks)
1046 }
1047 fn upsert_art(&mut self, a: &NewArt) -> musefs_db::Result<i64> {
1048 Db::upsert_art(self, a)
1049 }
1050 fn set_track_art(&mut self, track_id: i64, items: &[TrackArt]) -> musefs_db::Result<()> {
1051 Db::set_track_art(self, track_id, items)
1052 }
1053 fn set_track_checksums(
1054 &mut self,
1055 track_id: i64,
1056 fingerprint: Option<&str>,
1057 content_hash: Option<&str>,
1058 ) -> musefs_db::Result<()> {
1059 Db::set_track_checksums(self, track_id, fingerprint, content_hash)
1060 }
1061 fn track_exists_at(&mut self, path: &str) -> musefs_db::Result<bool> {
1062 Ok(Db::get_track_by_path(self, path)?.is_some())
1063 }
1064 fn tracks_by_fingerprint(&mut self, fp: &str) -> musefs_db::Result<Vec<musefs_db::Track>> {
1065 Db::tracks_by_fingerprint(self, fp)
1066 }
1067 fn retarget_track(
1068 &mut self,
1069 id: i64,
1070 new_backing_path: &str,
1071 stamp: BackingStamp,
1072 audio_offset: u64,
1073 audio_length: u64,
1074 fingerprint: Option<&str>,
1075 content_hash: Option<&str>,
1076 ) -> musefs_db::Result<()> {
1077 Db::retarget_track(
1078 self,
1079 id,
1080 new_backing_path,
1081 stamp.size,
1082 stamp.mtime_ns,
1083 stamp.ctime_ns,
1084 audio_offset,
1085 audio_length,
1086 fingerprint,
1087 content_hash,
1088 )
1089 }
1090}
1091
1092impl TrackSink for &mut musefs_db::BulkWriter<'_> {
1093 fn upsert_track(&mut self, t: &NewTrack) -> musefs_db::Result<i64> {
1094 musefs_db::BulkWriter::upsert_track(self, t)
1095 }
1096 fn replace_tags(&mut self, track_id: i64, tags: &[Tag]) -> musefs_db::Result<()> {
1097 musefs_db::BulkWriter::replace_tags(self, track_id, tags)
1098 }
1099 fn set_binary_tags(
1100 &mut self,
1101 track_id: i64,
1102 tags: &[musefs_db::BinaryTag],
1103 ) -> musefs_db::Result<()> {
1104 musefs_db::BulkWriter::set_binary_tags(self, track_id, tags)
1105 }
1106 fn set_structural_blocks(
1107 &mut self,
1108 track_id: i64,
1109 blocks: &[musefs_db::StructuralBlock],
1110 ) -> musefs_db::Result<()> {
1111 musefs_db::BulkWriter::set_structural_blocks(self, track_id, blocks)
1112 }
1113 fn upsert_art(&mut self, a: &NewArt) -> musefs_db::Result<i64> {
1114 musefs_db::BulkWriter::upsert_art(self, a)
1115 }
1116 fn set_track_art(&mut self, track_id: i64, items: &[TrackArt]) -> musefs_db::Result<()> {
1117 musefs_db::BulkWriter::set_track_art(self, track_id, items)
1118 }
1119 fn set_track_checksums(
1120 &mut self,
1121 track_id: i64,
1122 fingerprint: Option<&str>,
1123 content_hash: Option<&str>,
1124 ) -> musefs_db::Result<()> {
1125 musefs_db::BulkWriter::set_track_checksums(self, track_id, fingerprint, content_hash)
1126 }
1127 fn track_exists_at(&mut self, path: &str) -> musefs_db::Result<bool> {
1128 Ok(musefs_db::BulkWriter::get_track_by_path(self, path)?.is_some())
1129 }
1130 fn tracks_by_fingerprint(&mut self, fp: &str) -> musefs_db::Result<Vec<musefs_db::Track>> {
1131 musefs_db::BulkWriter::tracks_by_fingerprint(self, fp)
1132 }
1133 fn retarget_track(
1134 &mut self,
1135 id: i64,
1136 new_backing_path: &str,
1137 stamp: BackingStamp,
1138 audio_offset: u64,
1139 audio_length: u64,
1140 fingerprint: Option<&str>,
1141 content_hash: Option<&str>,
1142 ) -> musefs_db::Result<()> {
1143 musefs_db::BulkWriter::retarget_track(
1144 self,
1145 id,
1146 new_backing_path,
1147 stamp.size,
1148 stamp.mtime_ns,
1149 stamp.ctime_ns,
1150 audio_offset,
1151 audio_length,
1152 fingerprint,
1153 content_hash,
1154 )
1155 }
1156}
1157
1158fn ingest_into(
1165 mut w: impl TrackSink,
1166 abs_path: &str,
1167 stamp: BackingStamp,
1168 probed: Probed,
1169 fingerprint: Option<&str>,
1170 content_hash: Option<&str>,
1171) -> Result<()> {
1172 let track_id = w.upsert_track(&NewTrack {
1173 backing_path: abs_path.to_string(),
1174 format: probed.format,
1175 audio_offset: probed.audio_offset,
1176 audio_length: probed.audio_length,
1177 backing_size: stamp.size,
1178 backing_mtime_ns: stamp.mtime_ns,
1179 backing_ctime_ns: stamp.ctime_ns,
1180 })?;
1181 w.set_track_checksums(track_id, fingerprint, content_hash)?;
1182
1183 let mut tags = Vec::new();
1184 let mut ordinals: HashMap<String, u64> = HashMap::new();
1185 for (key, value) in probed.tags {
1186 if !key_passes_floor(&key) {
1187 continue;
1188 }
1189 let ord = ordinals.entry(key.clone()).or_insert(0);
1190 tags.push(Tag::new(&key, &value, *ord));
1191 *ord += 1;
1192 }
1193 w.replace_tags(track_id, &tags)?;
1194
1195 let binary_tags = accept_binary_tags(abs_path, probed.binary_tags);
1196 w.set_binary_tags(track_id, &binary_tags)?;
1197
1198 let structural_blocks = structural_blocks_from(probed.structural_blocks);
1199 w.set_structural_blocks(track_id, &structural_blocks)?;
1200
1201 let mut track_arts = Vec::new();
1202 for (ordinal, pic) in accept_pictures(abs_path, probed.pictures)
1203 .into_iter()
1204 .enumerate()
1205 {
1206 let art_id = w.upsert_art(&NewArt {
1207 mime: pic.mime,
1208 width: (pic.width != 0).then_some(pic.width),
1209 height: (pic.height != 0).then_some(pic.height),
1210 data: pic.data,
1211 })?;
1212 let picture_type = pic.picture_type.get();
1213 track_arts.push(TrackArt {
1214 art_id,
1215 picture_type,
1216 description: pic.description,
1217 ordinal: ordinal as u64,
1218 });
1219 }
1220 w.set_track_art(track_id, &track_arts)?;
1221 Ok(())
1222}
1223
1224fn refresh_structural_into(
1227 mut w: impl TrackSink,
1228 abs_path: &str,
1229 stamp: BackingStamp,
1230 probed: Probed,
1231 fingerprint: Option<&str>,
1232 content_hash: Option<&str>,
1233) -> Result<()> {
1234 let track_id = w.upsert_track(&NewTrack {
1235 backing_path: abs_path.to_string(),
1236 format: probed.format,
1237 audio_offset: probed.audio_offset,
1238 audio_length: probed.audio_length,
1239 backing_size: stamp.size,
1240 backing_mtime_ns: stamp.mtime_ns,
1241 backing_ctime_ns: stamp.ctime_ns,
1242 })?;
1243 w.set_track_checksums(track_id, fingerprint, content_hash)?;
1244 let structural_blocks = structural_blocks_from(probed.structural_blocks);
1245 w.set_structural_blocks(track_id, &structural_blocks)?;
1246 Ok(())
1247}
1248
1249fn ingest_unit(
1254 mut w: impl TrackSink,
1255 unit: Unit,
1256 strictness: MatchStrictness,
1257 policy: WritePolicy,
1258) -> Result<()> {
1259 if policy == WritePolicy::StructuralOnly {
1260 return refresh_structural_into(
1261 w,
1262 &unit.abs_path,
1263 unit.stamp,
1264 unit.probed,
1265 unit.fingerprint.as_deref(),
1266 unit.content_hash.as_deref(),
1267 );
1268 }
1269 if w.track_exists_at(&unit.abs_path)? {
1271 return ingest_into(
1272 w,
1273 &unit.abs_path,
1274 unit.stamp,
1275 unit.probed,
1276 unit.fingerprint.as_deref(),
1277 unit.content_hash.as_deref(),
1278 );
1279 }
1280 if let Some(fp) = unit.fingerprint.as_deref() {
1281 let candidates: Vec<musefs_db::Track> = w
1282 .tracks_by_fingerprint(fp)?
1283 .into_iter()
1284 .filter(|t| match std::fs::metadata(&t.backing_path) {
1285 Err(e) if e.kind() == std::io::ErrorKind::NotFound => true,
1286 Ok(_) => false,
1287 Err(e) => {
1288 log::warn!(
1289 "skipping retarget candidate {}: cannot stat backing path ({e})",
1290 t.backing_path
1291 );
1292 false
1293 }
1294 })
1295 .collect();
1296 if candidates.len() == 1 {
1297 let cand = &candidates[0];
1298 let needs_full = match strictness {
1300 MatchStrictness::Fast => false,
1301 MatchStrictness::Auto | MatchStrictness::Strict => cand.content_hash.is_some(),
1302 };
1303 let new_hash: Option<String> = match (&unit.content_hash, needs_full) {
1308 (Some(h), _) => Some(h.clone()),
1309 (None, true) => match full_file_hash(std::path::Path::new(&unit.abs_path)) {
1310 Ok(h) => Some(h),
1311 Err(e) => {
1312 log::warn!(
1313 "hash confirm failed for {}: {e}; inserting fresh",
1314 unit.abs_path
1315 );
1316 None
1317 }
1318 },
1319 (None, false) => None,
1320 };
1321 let confirmed = match strictness {
1322 MatchStrictness::Fast => true,
1323 MatchStrictness::Auto | MatchStrictness::Strict => match &cand.content_hash {
1324 None => matches!(strictness, MatchStrictness::Auto),
1326 Some(stored) => new_hash.as_deref() == Some(stored.as_str()),
1327 },
1328 };
1329 if confirmed && !w.track_exists_at(&unit.abs_path)? {
1330 w.retarget_track(
1331 cand.id,
1332 &unit.abs_path,
1333 unit.stamp,
1334 unit.probed.audio_offset,
1335 unit.probed.audio_length,
1336 unit.fingerprint.as_deref(),
1337 new_hash.as_deref(),
1338 )?;
1339 return Ok(());
1340 }
1341 if !confirmed {
1342 log::warn!(
1343 "fingerprint match for {} not confirmed (strictness {:?}); inserting fresh",
1344 unit.abs_path,
1345 strictness,
1346 );
1347 }
1348 } else if candidates.len() > 1 {
1349 log::warn!(
1350 "ambiguous fingerprint match for {} ({} missing candidates); inserting fresh",
1351 unit.abs_path,
1352 candidates.len(),
1353 );
1354 }
1355 }
1356 ingest_into(
1357 w,
1358 &unit.abs_path,
1359 unit.stamp,
1360 unit.probed,
1361 unit.fingerprint.as_deref(),
1362 unit.content_hash.as_deref(),
1363 )
1364}
1365
1366fn ingest(db: &Db, abs_path: &str, meta: &std::fs::Metadata, probed: Probed) -> Result<()> {
1369 ingest_into(
1370 db,
1371 abs_path,
1372 BackingStamp::from_metadata(meta),
1373 probed,
1374 None,
1375 None,
1376 )
1377}
1378
1379#[cfg(test)]
1384fn ingest_bulk(
1385 bw: &mut musefs_db::BulkWriter<'_>,
1386 abs_path: &str,
1387 stamp: BackingStamp,
1388 probed: Probed,
1389) -> Result<()> {
1390 ingest_into(bw, abs_path, stamp, probed, None, None)
1391}
1392
1393pub fn scan_directory_with(db: &Db, root: &Path, opts: &ScanOptions) -> Result<ScanStats> {
1405 let canon = std::fs::canonicalize(root)?;
1409 let root = canon.as_path();
1410 let mut files = Vec::new();
1411 let mut tally = SkipTally::default();
1412 if root.is_file() {
1413 if is_supported_audio(root) {
1414 files.push(root.to_path_buf());
1415 } else {
1416 tally.record(root);
1417 }
1418 } else {
1419 tally = collect_audio_with(
1420 root,
1421 &mut files,
1422 opts.follow_symlinks,
1423 opts.progress.as_ref(),
1424 )?;
1425 }
1426 let mut already_present = 0u64;
1427 if !opts.force {
1428 let existing: HashSet<String> = db
1429 .list_tracks()?
1430 .into_iter()
1431 .map(|t| t.backing_path)
1432 .collect();
1433 let before = files.len();
1434 files.retain(|path| {
1435 let key = if opts.follow_symlinks {
1436 match std::fs::canonicalize(path) {
1437 Ok(abs) => abs.to_string_lossy().into_owned(),
1438 Err(_) => return true,
1439 }
1440 } else {
1441 path.to_string_lossy().into_owned()
1442 };
1443 !existing.contains(&key)
1444 });
1445 already_present = (before - files.len()) as u64;
1446 }
1447 if let Some(p) = &opts.progress {
1448 p.emit(ScanProgress::Walked {
1449 total: files.len() as u64,
1450 });
1451 }
1452 db.apply_bulk_pragmas_self()?; let mut stats = run_pipeline(db, files, opts, WritePolicy::Full)?;
1454 stats.skipped = tally.total;
1456 stats.already_present = already_present;
1457 if let Some(summary) = tally.summary() {
1460 log::warn!("{summary}");
1461 }
1462 Ok(stats)
1463}
1464
1465pub fn scan_directory(db: &Db, root: &Path) -> Result<ScanStats> {
1467 scan_directory_with(db, root, &ScanOptions::default())
1468}
1469
1470fn run_pipeline(
1474 db: &Db,
1475 files: Vec<PathBuf>,
1476 opts: &ScanOptions,
1477 policy: WritePolicy,
1478) -> Result<ScanStats> {
1479 use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
1480
1481 let jobs = effective_jobs(opts.jobs);
1482 let total = files.len() as u64;
1483 let progress = opts.progress.as_ref();
1484 let window = opts.window;
1485 let follow_symlinks = opts.follow_symlinks;
1486 let tier = opts.checksum;
1487 let strictness = opts.strictness;
1488 let cap = opts.batch_bytes;
1489 let budget = Arc::new(ByteBudget::new(cap));
1490 let failed = Arc::new(AtomicU64::new(0));
1491 let raced = Arc::new(AtomicU64::new(0));
1492
1493 let files = Arc::new(files);
1496 let cursor = Arc::new(AtomicUsize::new(0));
1497 let (tx, rx) = sync_channel::<Unit>(jobs * 2);
1498
1499 let mut workers = Vec::with_capacity(jobs);
1500 for _ in 0..jobs {
1501 let files = Arc::clone(&files);
1502 let cursor = Arc::clone(&cursor);
1503 let tx = tx.clone();
1504 let budget = Arc::clone(&budget);
1505 let failed = Arc::clone(&failed);
1506 let raced = Arc::clone(&raced);
1507 workers.push(std::thread::spawn(move || {
1508 loop {
1509 let i = cursor.fetch_add(1, Ordering::Relaxed);
1510 let Some(path) = files.get(i) else { break };
1511 match probe_file_caught(path, window) {
1512 Ok(ProbeOutcome::Probed(probed, stamp)) => {
1513 let abs_path = if follow_symlinks {
1517 match std::fs::canonicalize(path) {
1518 Ok(abs) => abs.to_string_lossy().into_owned(),
1519 Err(e) => {
1520 log::warn!("skipping {}: {e}", path.display());
1521 failed.fetch_add(1, Ordering::Relaxed);
1522 continue;
1523 }
1524 }
1525 } else {
1526 path.to_string_lossy().into_owned()
1527 };
1528 let weight = payload_weight(&probed);
1529 budget.acquire(weight); let fingerprint = match tier {
1531 ChecksumTier::None => None,
1532 ChecksumTier::Fingerprint | ChecksumTier::Full => {
1533 Some(fingerprint_of(&probed))
1534 }
1535 };
1536 let content_hash = match tier {
1537 ChecksumTier::Full => {
1538 match full_file_hash(std::path::Path::new(&abs_path)) {
1539 Ok(h) => Some(h),
1540 Err(e) => {
1541 log::warn!("content hash failed for {abs_path}: {e}");
1542 None
1543 }
1544 }
1545 }
1546 _ => None,
1547 };
1548 let unit = Unit {
1549 abs_path,
1550 stamp,
1551 probed,
1552 weight,
1553 fingerprint,
1554 content_hash,
1555 };
1556 if tx.send(unit).is_err() {
1557 budget.release(weight);
1558 break;
1559 }
1560 }
1561 Ok(ProbeOutcome::Unparseable) => {
1562 failed.fetch_add(1, Ordering::Relaxed);
1563 }
1564 Err(e) => {
1565 log::warn!("skipping {}: {e}", path.display());
1566 failed.fetch_add(1, Ordering::Relaxed);
1567 }
1568 Ok(ProbeOutcome::Raced) => {
1569 raced.fetch_add(1, Ordering::Relaxed);
1570 }
1571 }
1572 }
1573 }));
1574 }
1575 drop(tx); let mut scanned = 0u64;
1579 let mut batch: Vec<Unit> = Vec::new();
1580 let mut batch_bytes = 0u64;
1581 let flush = |batch: &mut Vec<Unit>, batch_bytes: &mut u64, scanned: &mut u64| -> Result<()> {
1582 if batch.is_empty() {
1583 return Ok(());
1584 }
1585 let mut bw = db.bulk_writer()?;
1586 let mut released = 0u64;
1589 let mut committed: Vec<String> = Vec::new();
1593 for unit in batch.drain(..) {
1594 released += unit.weight;
1595 committed.push(unit.abs_path.clone());
1596 ingest_unit(&mut bw, unit, strictness, policy)?;
1597 }
1598 bw.commit()?;
1599 for abs_path in committed {
1600 *scanned += 1;
1601 if let Some(p) = progress {
1602 p.emit(ScanProgress::Ingested {
1603 done: *scanned,
1604 total,
1605 path: &abs_path,
1606 });
1607 }
1608 }
1609 budget.release(released);
1612 *batch_bytes = 0;
1613 Ok(())
1614 };
1615
1616 loop {
1626 match rx.try_recv() {
1627 Ok(unit) => {
1628 batch_bytes += unit.weight;
1629 batch.push(unit);
1630 if batch.len() >= BATCH_FILES || batch_bytes >= cap {
1631 flush(&mut batch, &mut batch_bytes, &mut scanned)?;
1632 }
1633 }
1634 Err(std::sync::mpsc::TryRecvError::Empty) => {
1635 flush(&mut batch, &mut batch_bytes, &mut scanned)?;
1636 match rx.recv() {
1637 Ok(unit) => {
1638 batch_bytes += unit.weight;
1639 batch.push(unit);
1640 if batch.len() >= BATCH_FILES || batch_bytes >= cap {
1641 flush(&mut batch, &mut batch_bytes, &mut scanned)?;
1642 }
1643 }
1644 Err(_) => break, }
1646 }
1647 Err(std::sync::mpsc::TryRecvError::Disconnected) => break,
1648 }
1649 }
1650 flush(&mut batch, &mut batch_bytes, &mut scanned)?;
1651 for w in workers {
1656 let _ = w.join();
1657 }
1658
1659 Ok(ScanStats {
1660 scanned,
1661 skipped: 0, already_present: 0,
1663 failed: failed.load(Ordering::Relaxed),
1664 raced: raced.load(Ordering::Relaxed),
1665 })
1666}
1667
1668#[doc(hidden)]
1671pub fn scan_directory_full_oracle(db: &Db, root: &Path) -> Result<ScanStats> {
1672 let mut files = Vec::new();
1673 let mut skipped = 0u64;
1674 if root.is_file() {
1675 if is_supported_audio(root) {
1676 files.push(root.to_path_buf());
1677 } else {
1678 skipped += 1;
1679 }
1680 } else {
1681 skipped += collect_audio(root, &mut files, false)?.total;
1682 }
1683 let mut stats = ScanStats {
1684 scanned: 0,
1685 skipped,
1686 already_present: 0,
1687 failed: 0,
1688 raced: 0,
1689 };
1690 for path in files {
1691 let bytes = std::fs::read(&path)?;
1692 let Some(probed) = probe_full(&path, &bytes) else {
1693 stats.failed += 1;
1694 continue;
1695 };
1696 let meta = std::fs::metadata(&path)?;
1697 let abs = std::fs::canonicalize(&path)?;
1698 ingest(db, &abs.to_string_lossy(), &meta, probed)?;
1699 stats.scanned += 1;
1700 }
1701 Ok(stats)
1702}
1703
1704pub fn revalidate_with(db: &Db, root: &Path, opts: &ScanOptions) -> Result<RevalidateStats> {
1719 let canon = std::fs::canonicalize(root)?;
1722 let root = canon.as_path();
1723 let mut files = Vec::new();
1724 if root.is_file() {
1725 if is_supported_audio(root) {
1726 files.push(root.to_path_buf());
1727 }
1728 } else {
1729 collect_audio_with(
1730 root,
1731 &mut files,
1732 opts.follow_symlinks,
1733 opts.progress.as_ref(),
1734 )?;
1735 }
1736 db.apply_bulk_pragmas_self()?;
1737
1738 let existing: HashMap<String, (crate::freshness::BackingStamp, i64, Format, bool, bool)> = db
1742 .list_tracks()?
1743 .into_iter()
1744 .map(|t| {
1745 (
1746 t.backing_path.clone(),
1747 (
1748 crate::freshness::BackingStamp::from_track(&t),
1749 t.id,
1750 t.format,
1751 t.fingerprint.is_some(),
1752 t.content_hash.is_some(),
1753 ),
1754 )
1755 })
1756 .collect();
1757 let have_structural = db.track_ids_with_structural_blocks()?;
1761
1762 let mut unchanged = 0u64;
1763 let mut skip_failed = 0u64;
1764 let mut changed: Vec<PathBuf> = Vec::new();
1765 for path in files {
1766 let meta = match std::fs::metadata(&path) {
1767 Ok(meta) => meta,
1768 Err(e) => {
1769 log::warn!("skipping {}: {e}", path.display());
1770 skip_failed += 1;
1771 continue;
1772 }
1773 };
1774 let key = if opts.follow_symlinks {
1775 match std::fs::canonicalize(&path) {
1776 Ok(abs) => abs.to_string_lossy().into_owned(),
1777 Err(e) => {
1778 log::warn!("skipping {}: {e}", path.display());
1779 skip_failed += 1;
1780 continue;
1781 }
1782 }
1783 } else {
1784 path.to_string_lossy().into_owned()
1785 };
1786 if let Some((stamp, id, format, has_fingerprint, has_content_hash)) =
1787 existing.get(&key).copied()
1788 {
1789 let needs_backfill = format == Format::Flac && !have_structural.contains(&id);
1790 let needs_checksum = match opts.checksum {
1791 ChecksumTier::None => false,
1792 ChecksumTier::Fingerprint => !has_fingerprint,
1793 ChecksumTier::Full => !has_fingerprint || !has_content_hash,
1794 };
1795 if crate::freshness::BackingStamp::from_metadata(&meta) == stamp
1796 && !needs_backfill
1797 && !needs_checksum
1798 {
1799 unchanged += 1;
1800 continue;
1801 }
1802 changed.push(path);
1803 }
1804 }
1805
1806 if let Some(p) = &opts.progress {
1807 p.emit(ScanProgress::Walked {
1808 total: changed.len() as u64,
1809 });
1810 }
1811
1812 let mut pruned = 0u64;
1813 let scan = run_pipeline(db, changed, opts, WritePolicy::StructuralOnly)?;
1814
1815 if opts.prune {
1816 let canon_root = root;
1817 for track in db.list_tracks()? {
1818 if !Path::new(&track.backing_path).starts_with(canon_root) {
1819 continue;
1820 }
1821 if let Err(e) = std::fs::metadata(&track.backing_path)
1822 && e.kind() == std::io::ErrorKind::NotFound
1823 {
1824 db.delete_track(track.id)?;
1825 pruned += 1;
1826 }
1827 }
1828 db.gc_orphan_art()?;
1829 }
1830
1831 Ok(RevalidateStats {
1832 updated: scan.scanned,
1833 unchanged,
1834 pruned,
1835 failed: scan.failed + skip_failed,
1836 raced: scan.raced,
1837 })
1838}
1839
1840pub fn revalidate(db: &Db, root: &Path) -> Result<RevalidateStats> {
1842 revalidate_with(db, root, &ScanOptions::default())
1843}
1844
1845pub(crate) fn fingerprint_of(p: &Probed) -> String {
1850 use sha2::{Digest, Sha256};
1851 fn feed(h: &mut Sha256, bytes: &[u8]) {
1854 h.update((bytes.len() as u64).to_le_bytes());
1855 h.update(bytes);
1856 }
1857 let mut h = Sha256::new();
1858 feed(&mut h, p.format.as_str().as_bytes());
1859 h.update(p.audio_offset.to_le_bytes());
1860 h.update(p.audio_length.to_le_bytes());
1861 h.update((p.tags.len() as u64).to_le_bytes());
1862 for (k, v) in &p.tags {
1863 feed(&mut h, k.as_bytes());
1864 feed(&mut h, v.as_bytes());
1865 }
1866 h.update((p.pictures.len() as u64).to_le_bytes());
1867 for pic in &p.pictures {
1868 feed(&mut h, pic.mime.as_bytes());
1869 h.update(u64::from(pic.picture_type.get()).to_le_bytes());
1870 feed(&mut h, pic.description.as_bytes());
1871 h.update(u64::from(pic.width).to_le_bytes());
1872 h.update(u64::from(pic.height).to_le_bytes());
1873 feed(&mut h, &pic.data);
1874 }
1875 h.update((p.binary_tags.len() as u64).to_le_bytes());
1876 for bt in &p.binary_tags {
1877 feed(&mut h, bt.key.as_bytes());
1878 feed(&mut h, &bt.payload);
1879 }
1880 h.update((p.structural_blocks.len() as u64).to_le_bytes());
1881 for (kind, body) in &p.structural_blocks {
1882 feed(&mut h, kind.as_bytes());
1883 feed(&mut h, body);
1884 }
1885 format!("{:x}", base16ct::HexDisplay(&h.finalize()))
1886}
1887
1888pub(crate) fn full_file_hash(path: &std::path::Path) -> std::io::Result<String> {
1892 use sha2::{Digest, Sha256};
1893 let mut f = std::fs::File::open(path)?;
1894 let mut h = Sha256::new();
1895 let mut buf = vec![0u8; 1 << 16];
1896 loop {
1897 let n = std::io::Read::read(&mut f, &mut buf)?;
1898 if n == 0 {
1899 break;
1900 }
1901 h.update(&buf[..n]);
1902 }
1903 Ok(format!("{:x}", base16ct::HexDisplay(&h.finalize())))
1904}
1905
1906#[cfg(test)]
1907mod bounded_probe_tests;
1908#[cfg(test)]
1909mod hardening_tests;
1910#[cfg(test)]
1911mod ogg_probe_tests;
1912#[cfg(test)]
1913mod scan_unit_tests;
1914#[cfg(test)]
1915mod wav_probe_tests;