Skip to main content

musefs_core/
scan.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use musefs_db::convert::usize_from;
5use musefs_db::{Db, Format, NewArt, NewTrack, Tag, TrackArt};
6use musefs_format::{EmbeddedBinaryTag, EmbeddedPicture, Extent, flac, mp3, mp4, ogg, wav};
7
8use crate::byte_budget::ByteBudget;
9use crate::error::Result;
10use crate::freshness::BackingStamp;
11use std::sync::mpsc::sync_channel;
12
13const BATCH_FILES: usize = 256;
14const BATCH_BYTES: u64 = 64 << 20; // 64 MiB
15
16/// Initial bounded-read window. Covers typical metadata + cover art; a larger
17/// metadata region triggers a `NeedMore` widen.
18const WINDOW: usize = 1 << 20; // 1 MiB
19/// Cap on widen iterations before falling back to a full-buffer read.
20const MAX_WIDEN_RETRIES: usize = 8;
21/// Hard ceiling on bytes read to probe one file. Real audio metadata fits far
22/// below this, so a file still unparsed past the cap is treated as malformed
23/// rather than read whole into RAM. Guards against a multi-GB file misnamed with
24/// an audio extension, and against a corrupt header whose length field demands a
25/// giant `NeedMore` widen.
26pub(crate) const MAX_PROBE_BYTES: u64 = 64 << 20; // 64 MiB
27
28/// The artwork-size ceiling. Enforced here at ingest (oversize scanned art is
29/// dropped) and at resolve in `mapping::track_art_to_inputs` (oversize art from
30/// any writer is rejected). Sized to clear FLAC's 24-bit block length with
31/// headroom for the picture-block framing.
32pub(crate) const MAX_ART_BYTES: usize = 16 * 1024 * 1024 - 64 * 1024;
33
34/// Per-frame cap for opaque binary tags, mirroring `MAX_ART_BYTES`. Oversize
35/// payloads (e.g. a GEOB embedding a multi-MB file) are logged-and-skipped.
36const MAX_BINARY_TAG_BYTES: usize = MAX_ART_BYTES;
37
38/// Outcome of probing one backing file. `Unparseable` is a supported-extension
39/// file whose bytes did not parse (counted as a scan `failed`). `Raced` means
40/// the file changed under us between the pre- and post-probe `fstat` — the probe
41/// may be torn, so nothing is committed for it (#276).
42#[derive(Debug)]
43enum ProbeOutcome {
44    Probed(Probed, BackingStamp),
45    Unparseable,
46    Raced,
47}
48
49#[cfg(test)]
50thread_local! {
51    static AFTER_S1_HOOK: std::cell::RefCell<Option<Box<dyn FnMut()>>> =
52        const { std::cell::RefCell::new(None) };
53}
54#[cfg(test)]
55fn fire_after_s1() {
56    AFTER_S1_HOOK.with(|h| {
57        if let Some(f) = h.borrow_mut().as_mut() {
58            f();
59        }
60    });
61}
62#[cfg(test)]
63fn set_after_s1_hook(f: impl FnMut() + 'static) {
64    AFTER_S1_HOOK.with(|h| *h.borrow_mut() = Some(Box::new(f)));
65}
66#[cfg(test)]
67fn clear_after_s1_hook() {
68    AFTER_S1_HOOK.with(|h| *h.borrow_mut() = None);
69}
70
71#[derive(Debug, Clone, PartialEq, Eq)]
72pub struct ScanStats {
73    pub scanned: u64,
74    pub skipped: u64,
75    pub failed: u64,
76    pub raced: u64,
77}
78
79#[derive(Debug, Clone, PartialEq, Eq)]
80pub struct RevalidateStats {
81    pub updated: u64,
82    pub unchanged: u64,
83    pub pruned: u64,
84    pub failed: u64,
85    pub raced: u64,
86}
87
88fn has_ext(path: &Path, ext: &str) -> bool {
89    path.extension()
90        .and_then(|e| e.to_str())
91        .is_some_and(|e| e.eq_ignore_ascii_case(ext))
92}
93
94/// True if `path` has an extension for a format the scanner can probe.
95fn is_supported_audio(path: &Path) -> bool {
96    has_ext(path, "flac")
97        || has_ext(path, "mp3")
98        || has_ext(path, "m4a")
99        || has_ext(path, "m4b")
100        || has_ext(path, "ogg")
101        || has_ext(path, "oga")
102        || has_ext(path, "opus")
103        || has_ext(path, "wav")
104}
105
106fn collect_audio(
107    root: &Path,
108    out: &mut Vec<PathBuf>,
109    follow_symlinks: bool,
110) -> std::io::Result<u64> {
111    let mut visited = HashSet::new();
112    let mut files_visited = HashSet::new();
113    let mut skipped = 0u64;
114    if follow_symlinks {
115        // Seed with the root's identity so a symlink pointing back to it is
116        // caught as a cycle on the first descent.
117        if let Ok(meta) = std::fs::metadata(root) {
118            visited.insert(dir_key(&meta));
119        }
120    }
121    collect_audio_inner(
122        root,
123        out,
124        follow_symlinks,
125        &mut visited,
126        &mut files_visited,
127        &mut skipped,
128    )?;
129    Ok(skipped)
130}
131
132fn collect_audio_inner(
133    root: &Path,
134    out: &mut Vec<PathBuf>,
135    follow_symlinks: bool,
136    visited: &mut HashSet<(u64, u64)>,
137    files_visited: &mut HashSet<(u64, u64)>,
138    skipped: &mut u64,
139) -> std::io::Result<()> {
140    for entry in std::fs::read_dir(root)? {
141        let entry = entry?;
142        let path = entry.path();
143        let ftype = entry.file_type()?;
144        if ftype.is_dir() {
145            descend(&path, out, follow_symlinks, visited, files_visited, skipped)?;
146        } else if ftype.is_file() {
147            if is_supported_audio(&path) {
148                push_file(&path, out, follow_symlinks, files_visited, None);
149            } else {
150                *skipped += 1;
151            }
152        } else if ftype.is_symlink() {
153            if !follow_symlinks {
154                log::warn!(
155                    "skipping symlink {} (pass --follow-symlinks to scan it)",
156                    path.display()
157                );
158                continue;
159            }
160            match std::fs::metadata(&path) {
161                Ok(meta) if meta.is_dir() => {
162                    descend(&path, out, follow_symlinks, visited, files_visited, skipped)?;
163                }
164                Ok(meta) if meta.is_file() => {
165                    if is_supported_audio(&path) {
166                        push_file(&path, out, follow_symlinks, files_visited, Some(&meta));
167                    } else {
168                        *skipped += 1;
169                    }
170                }
171                Ok(_) => {}
172                Err(e) => {
173                    log::warn!("skipping broken symlink {}: {e}", path.display());
174                }
175            }
176        }
177    }
178    Ok(())
179}
180
181fn descend(
182    path: &Path,
183    out: &mut Vec<PathBuf>,
184    follow_symlinks: bool,
185    visited: &mut HashSet<(u64, u64)>,
186    files_visited: &mut HashSet<(u64, u64)>,
187    skipped: &mut u64,
188) -> std::io::Result<()> {
189    if !follow_symlinks {
190        return collect_audio_inner(path, out, follow_symlinks, visited, files_visited, skipped);
191    }
192    let meta = match std::fs::metadata(path) {
193        Ok(m) => m,
194        Err(e) => {
195            log::warn!("skipping directory {}: {e}", path.display());
196            return Ok(());
197        }
198    };
199    if !visited.insert(dir_key(&meta)) {
200        log::warn!("skipping symlink cycle at {}", path.display());
201        return Ok(());
202    }
203    collect_audio_inner(path, out, follow_symlinks, visited, files_visited, skipped)
204}
205
206fn dir_key(meta: &std::fs::Metadata) -> (u64, u64) {
207    use std::os::unix::fs::MetadataExt;
208    (meta.dev(), meta.ino())
209}
210
211/// Collect one supported-extension file into `out`, deduplicating by target
212/// identity when following symlinks so a real file and a symlink to it (or a
213/// file reached via two symlink paths) are ingested once. `known_meta` is the
214/// already-resolved target metadata when the caller has it (the symlink arm),
215/// avoiding a second `stat`. Dedup is best-effort: if the target cannot be
216/// `stat`ed we push it and let the probe pipeline count it rather than dropping
217/// it silently.
218fn push_file(
219    path: &Path,
220    out: &mut Vec<PathBuf>,
221    follow_symlinks: bool,
222    files_visited: &mut HashSet<(u64, u64)>,
223    known_meta: Option<&std::fs::Metadata>,
224) {
225    if !follow_symlinks {
226        out.push(path.to_path_buf());
227        return;
228    }
229    let key = match known_meta {
230        Some(m) => Some(dir_key(m)),
231        None => std::fs::metadata(path).ok().map(|m| dir_key(&m)),
232    };
233    match key {
234        Some(k) if !files_visited.insert(k) => {
235            log::debug!("skipping duplicate backing target {}", path.display());
236        }
237        _ => out.push(path.to_path_buf()),
238    }
239}
240
241/// A backing file parsed into the fields a track row needs, plus its raw
242/// `(key, value)` tags to seed.
243#[derive(Debug)]
244pub(crate) struct Probed {
245    format: Format,
246    audio_offset: u64,
247    audio_length: u64,
248    tags: Vec<(String, String)>,
249    pictures: Vec<EmbeddedPicture>,
250    binary_tags: Vec<EmbeddedBinaryTag>,
251    /// FLAC STREAMINFO/SEEKTABLE as (kind, body) pairs; empty for other formats.
252    structural_blocks: Vec<(String, Vec<u8>)>,
253}
254
255/// Assemble a WAV [`Probed`] from located audio bounds, reading tags and pictures
256/// from `prefix`. Shared by the bounded, full-buffer, and ceiling probe paths.
257fn wav_probed(prefix: &[u8], bounds: &wav::WavBounds) -> Probed {
258    let (binary_tags, promoted) = wav::read_binary_tags(prefix);
259    let mut tags = wav::read_tags(prefix);
260    tags.extend(promoted);
261    Probed {
262        format: Format::Wav,
263        audio_offset: bounds.audio_offset,
264        audio_length: bounds.audio_length,
265        tags,
266        pictures: wav::read_pictures(prefix),
267        binary_tags,
268        structural_blocks: Vec::new(),
269    }
270}
271
272/// Full-buffer probe (legacy path). Retained as the reference implementation the
273/// bounded path is checked against (see the equivalence property test).
274pub(crate) fn probe_full(path: &Path, bytes: &[u8]) -> Option<Probed> {
275    if has_ext(path, "flac") {
276        let scan = flac::locate_audio(bytes).ok()?;
277        let (structural_blocks, binary_tags) = flac::split_preserved(&scan.preserved);
278        Some(Probed {
279            format: Format::Flac,
280            audio_offset: scan.audio_offset,
281            audio_length: scan.audio_length,
282            tags: flac::read_vorbis_comments(bytes).unwrap_or_default(),
283            pictures: flac::read_pictures(bytes).unwrap_or_default(),
284            binary_tags,
285            structural_blocks,
286        })
287    } else if has_ext(path, "mp3") {
288        let bounds = mp3::locate_audio(bytes).ok()?;
289        let (binary_tags, promoted) = mp3::read_binary_tags(bytes);
290        let mut tags = mp3::read_tags(bytes);
291        tags.extend(promoted);
292        Some(Probed {
293            format: Format::Mp3,
294            audio_offset: bounds.audio_offset,
295            audio_length: bounds.audio_length,
296            tags,
297            pictures: mp3::read_pictures(bytes),
298            binary_tags,
299            structural_blocks: Vec::new(),
300        })
301    } else if has_ext(path, "m4a") || has_ext(path, "m4b") {
302        let bounds = mp4::locate_audio(bytes).ok()?;
303        Some(Probed {
304            format: Format::M4a,
305            audio_offset: bounds.audio_offset,
306            audio_length: bounds.audio_length,
307            tags: mp4::read_tags(bytes),
308            pictures: mp4::read_pictures(bytes, MAX_ART_BYTES),
309            binary_tags: mp4::read_binary_tags(bytes, MAX_BINARY_TAG_BYTES),
310            structural_blocks: Vec::new(),
311        })
312    } else if has_ext(path, "ogg") || has_ext(path, "oga") || has_ext(path, "opus") {
313        let scan = ogg::locate_audio(bytes).ok()?;
314        let format = match scan.codec {
315            ogg::Codec::Opus => Format::Opus,
316            ogg::Codec::Vorbis => Format::Vorbis,
317            ogg::Codec::OggFlac => Format::OggFlac,
318        };
319        Some(Probed {
320            format,
321            audio_offset: scan.audio_offset,
322            audio_length: scan.audio_length,
323            tags: ogg::read_tags(bytes).unwrap_or_default(),
324            pictures: ogg::read_pictures(bytes).unwrap_or_default(),
325            binary_tags: Vec::new(),
326            structural_blocks: Vec::new(),
327        })
328    } else if has_ext(path, "wav") {
329        let bounds = wav::locate_audio(bytes).ok()?;
330        Some(wav_probed(bytes, &bounds))
331    } else {
332        None
333    }
334}
335
336/// Read `[0, len)` of `path` into a buffer, counting the read. A short read at
337/// EOF is fine (`len` may exceed the file size).
338fn read_window(file: &std::fs::File, len: usize) -> std::io::Result<Vec<u8>> {
339    use std::os::unix::fs::FileExt;
340    let mut buf = vec![0u8; len];
341    let n = file.read_at(&mut buf, 0)?;
342    buf.truncate(n);
343    crate::metrics::on_scan_read(n as u64);
344    Ok(buf)
345}
346
347/// Read the file's last 128 bytes (for the MP3 ID3v1 trailer check), or `None`
348/// if the file is shorter than 128 bytes.
349fn read_tail_128(file: &std::fs::File, file_len: u64) -> std::io::Result<Option<[u8; 128]>> {
350    if file_len < 128 {
351        return Ok(None);
352    }
353    use std::os::unix::fs::FileExt;
354    let mut buf = [0u8; 128];
355    file.read_exact_at(&mut buf, file_len - 128)?;
356    crate::metrics::on_scan_read(128);
357    Ok(Some(buf))
358}
359
360/// Bounded probe of one backing file: open once, fstat before and after the
361/// probe, and report `Raced` when the file moved mid-probe — so the stored
362/// stamp and the probed bytes provably share one inode held still across the
363/// probe. Never reads the audio payload (M4A uses the seek reader;
364/// front-anchored formats read only the metadata extent).
365///
366/// Returns `ProbeOutcome::Unparseable` for a supported-extension file that does
367/// not parse (counted as `failed`) and `ProbeOutcome::Raced` if the file
368/// changed under us.
369fn probe_file(path: &Path, window: usize) -> std::io::Result<ProbeOutcome> {
370    let file = std::fs::File::open(path)?;
371    crate::metrics::on_scan_open();
372    let s1 = BackingStamp::from_metadata(&file.metadata()?);
373    #[cfg(test)]
374    fire_after_s1();
375
376    let probed = probe_body(path, &file, s1.size, window)?;
377
378    let s2 = BackingStamp::from_metadata(&file.metadata()?);
379    if s1 != s2 {
380        log::warn!("skipping {}: changed during probe", path.display());
381        return Ok(ProbeOutcome::Raced);
382    }
383    Ok(match probed {
384        Some(p) => ProbeOutcome::Probed(p, s1),
385        None => ProbeOutcome::Unparseable,
386    })
387}
388
389/// The per-format metadata dispatch for one already-opened backing file, over
390/// its first `file_len` bytes. Split out of `probe_file` so the fstat-sandwich
391/// wrapper stays legible. Never reads the audio payload (M4A uses the seek
392/// reader; front-anchored formats read only the metadata extent). Returns
393/// `Ok(None)` for an unsupported/unparseable file.
394fn probe_body(
395    path: &Path,
396    file: &std::fs::File,
397    file_len: u64,
398    window: usize,
399) -> std::io::Result<Option<Probed>> {
400    // M4A: seek reader, never touches mdat.
401    if has_ext(path, "m4a") || has_ext(path, "m4b") {
402        let mut f = file;
403        let scan = match mp4::read_structure_from(&mut f, file_len) {
404            Ok(s) => s,
405            Err(e) => {
406                log::warn!("skipping {}: {e}", path.display());
407                return Ok(None);
408            }
409        };
410        return Ok(Some(Probed {
411            format: Format::M4a,
412            audio_offset: scan.mdat_payload_offset,
413            audio_length: scan.mdat_payload_len,
414            tags: mp4::read_tags(&scan.moov),
415            pictures: mp4::read_pictures(&scan.moov, MAX_ART_BYTES),
416            binary_tags: mp4::read_binary_tags(&scan.moov, MAX_BINARY_TAG_BYTES),
417            structural_blocks: Vec::new(),
418        }));
419    }
420
421    // Front-anchored formats: read a window, widen on NeedMore. Only the MP3
422    // arm of probe_prefix consumes the ID3v1 tail, and dispatch is by
423    // extension — so only .mp3 pays the tail read (#67).
424    let tail = if has_ext(path, "mp3") {
425        read_tail_128(file, file_len)?
426    } else {
427        None
428    };
429    // Never read past the probe ceiling, however large the file or whatever a
430    // (possibly corrupt) header asks for via `NeedMore`.
431    let probe_cap = file_len.min(MAX_PROBE_BYTES);
432    let mut want = usize_from((window as u64).min(probe_cap));
433    let mut prefix = read_window(file, want)?;
434    for _ in 0..MAX_WIDEN_RETRIES {
435        match probe_prefix(path, &prefix, file_len, tail.as_ref()) {
436            Probe::Done(p) => return Ok(Some(p)),
437            Probe::Skip => {
438                log::warn!("skipping {}: no parseable audio metadata", path.display());
439                return Ok(None);
440            }
441            Probe::NeedMore(up_to) => {
442                // Read everything we're willing to probe? Widening can't help.
443                if want as u64 >= probe_cap {
444                    break;
445                }
446                // Grow to at least `up_to` (capped at `probe_cap`), always making
447                // progress (`+1`), then retry.
448                want = usize_from(up_to.min(probe_cap))
449                    .max(want + 1)
450                    .min(usize_from(probe_cap));
451                prefix = read_window(file, want)?;
452            }
453        }
454    }
455    // Fallback: full-buffer probe over the bytes we were willing to read.
456    if (prefix.len() as u64) < probe_cap {
457        prefix = read_window(file, usize_from(probe_cap))?;
458    }
459    if let Some(p) = probe_full(path, &prefix) {
460        return Ok(Some(p));
461    }
462    // A WAV whose `data` payload runs past the probe ceiling fails the strict
463    // full-buffer parse (the payload isn't present to bound), yet its `fmt `/`data`
464    // headers sit at the front: trust the declared bounds and serve the audio,
465    // accepting the loss of any tag chunks trailing the payload.
466    if has_ext(path, "wav")
467        && file_len > MAX_PROBE_BYTES
468        && let Ok(bounds) = wav::locate_audio_at_ceiling(&prefix, file_len)
469    {
470        return Ok(Some(wav_probed(&prefix, &bounds)));
471    }
472    if file_len > MAX_PROBE_BYTES {
473        log::warn!(
474            "skipping {}: no parseable metadata within first {MAX_PROBE_BYTES} bytes",
475            path.display()
476        );
477    } else {
478        log::warn!("skipping {}: no parseable audio metadata", path.display());
479    }
480    Ok(None)
481}
482
483/// Outcome of a single bounded dispatch attempt against the current `prefix`.
484enum Probe {
485    Done(Probed),
486    NeedMore(u64),
487    Skip,
488}
489
490/// Dispatch the front-anchored formats against `prefix` + `file_len`.
491fn probe_prefix(path: &Path, prefix: &[u8], file_len: u64, tail: Option<&[u8; 128]>) -> Probe {
492    if has_ext(path, "flac") {
493        match flac::read_metadata_bounded(prefix) {
494            Ok(Extent::Complete(meta)) => {
495                let (structural_blocks, binary_tags) = flac::split_preserved(&meta.preserved);
496                Probe::Done(Probed {
497                    format: Format::Flac,
498                    audio_offset: meta.audio_offset,
499                    audio_length: file_len - meta.audio_offset,
500                    tags: flac::read_vorbis_comments(prefix).unwrap_or_default(),
501                    pictures: flac::read_pictures(prefix).unwrap_or_default(),
502                    binary_tags,
503                    structural_blocks,
504                })
505            }
506            Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
507            Err(_) => Probe::Skip,
508        }
509    } else if has_ext(path, "mp3") {
510        match mp3::locate_audio_bounded(prefix, file_len, tail) {
511            Ok(Extent::Complete(b)) => {
512                let (binary_tags, promoted) = mp3::read_binary_tags(prefix);
513                let mut tags = mp3::read_tags(prefix);
514                tags.extend(promoted);
515                Probe::Done(Probed {
516                    format: Format::Mp3,
517                    audio_offset: b.audio_offset,
518                    audio_length: b.audio_length,
519                    tags,
520                    pictures: mp3::read_pictures(prefix),
521                    binary_tags,
522                    structural_blocks: Vec::new(),
523                })
524            }
525            Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
526            Err(_) => Probe::Skip,
527        }
528    } else if has_ext(path, "ogg") || has_ext(path, "oga") || has_ext(path, "opus") {
529        match ogg::read_metadata_bounded(prefix, file_len) {
530            Ok(Extent::Complete(header)) => {
531                let format = match header.codec {
532                    ogg::Codec::Opus => Format::Opus,
533                    ogg::Codec::Vorbis => Format::Vorbis,
534                    ogg::Codec::OggFlac => Format::OggFlac,
535                };
536                Probe::Done(Probed {
537                    format,
538                    audio_offset: header.audio_offset,
539                    audio_length: file_len - header.audio_offset,
540                    tags: ogg::read_tags(prefix).unwrap_or_default(),
541                    pictures: ogg::read_pictures(prefix).unwrap_or_default(),
542                    binary_tags: Vec::new(),
543                    structural_blocks: Vec::new(),
544                })
545            }
546            Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
547            Err(_) => Probe::Skip,
548        }
549    } else if has_ext(path, "wav") {
550        match wav::locate_audio_bounded(prefix, file_len) {
551            Ok(Extent::Complete(b)) => Probe::Done(wav_probed(prefix, &b)),
552            Ok(Extent::NeedMore { up_to }) => Probe::NeedMore(up_to),
553            Err(_) => Probe::Skip,
554        }
555    } else {
556        Probe::Skip
557    }
558}
559
560/// Knobs for a scan. `jobs == 0` means "use available parallelism".
561#[derive(Debug, Clone)]
562pub struct ScanOptions {
563    pub jobs: usize,
564    /// Initial probe read window in bytes; widened on `NeedMore`.
565    pub window: usize,
566    /// In-flight art-byte budget and per-batch byte-flush threshold.
567    pub batch_bytes: u64,
568    /// Follow symlinks during collection. Off by default: symlinks are logged
569    /// and skipped, which keeps the walk immune to directory-symlink cycles.
570    pub follow_symlinks: bool,
571}
572
573impl Default for ScanOptions {
574    fn default() -> Self {
575        Self {
576            jobs: 0,
577            window: WINDOW,
578            batch_bytes: BATCH_BYTES,
579            follow_symlinks: false,
580        }
581    }
582}
583
584fn effective_jobs(jobs: usize) -> usize {
585    if jobs != 0 {
586        return jobs;
587    }
588    std::thread::available_parallelism().map_or(1, std::num::NonZero::get)
589}
590
591/// One probed file ready to write, plus its art-byte weight for backpressure.
592struct Unit {
593    abs_path: String,
594    stamp: BackingStamp,
595    probed: Probed,
596    weight: u64,
597}
598
599/// In-memory byte weight of a `Probed`, used for batch backpressure
600/// (`ScanOptions::batch_bytes`). Counts every buffered payload — pictures plus FLAC
601/// structural blocks and binary tags — so large preserved blocks can't slip the
602/// budget the way picture-only accounting did.
603fn payload_weight(p: &Probed) -> u64 {
604    let pictures: u64 = p.pictures.iter().map(|pic| pic.data.len() as u64).sum();
605    let binary: u64 = p.binary_tags.iter().map(|t| t.payload.len() as u64).sum();
606    let structural: u64 = p
607        .structural_blocks
608        .iter()
609        .map(|(_, body)| body.len() as u64)
610        .sum();
611    pictures + binary + structural
612}
613
614/// The universal `tags.key` floor, mirrored from the DB `CHECK` exactly: a key
615/// must be non-empty and contain no byte below 0x20 (the control chars the DB
616/// rejects via its GLOB range; NUL also fails here, the DB's documented blind
617/// spot). DEL (0x7F) and high/non-ASCII bytes are accepted, matching the DB.
618/// Distinct from the strict Vorbis `is_valid_key` (which also bars `=`, 0x7E,
619/// 0x7F, and non-ASCII) — applying that here would wrongly drop legal MP3/M4A
620/// custom keys containing `=`/`:`/space.
621fn key_passes_floor(key: &str) -> bool {
622    !key.is_empty() && key.bytes().all(|b| b >= 0x20)
623}
624
625/// Drops embedded pictures over [`MAX_ART_BYTES`], logging each so a cover that
626/// vanishes from the synthesized view is explained rather than silent (#284).
627/// Filtering here, before the caller enumerates, keeps stored art ordinals
628/// gap-free. Note: the mp4 `covr` path caps oversize art earlier, inside
629/// `mp4::read_pictures`, so those drops never reach this filter.
630fn accept_pictures(abs_path: &str, pictures: Vec<EmbeddedPicture>) -> Vec<EmbeddedPicture> {
631    pictures
632        .into_iter()
633        .filter(|p| {
634            if p.data.len() > MAX_ART_BYTES {
635                log::warn!(
636                    "{abs_path}: dropping embedded {} art ({} bytes), over the {MAX_ART_BYTES}-byte cap",
637                    p.mime,
638                    p.data.len(),
639                );
640                return false;
641            }
642            true
643        })
644        .collect()
645}
646
647/// Filters embedded binary tags to those worth storing, logging oversize drops
648/// (#284). Empty payloads carry nothing to serve, so they are dropped silently;
649/// payloads over [`MAX_BINARY_TAG_BYTES`] are a lossy drop and get a warning.
650fn accept_binary_tags(abs_path: &str, tags: Vec<EmbeddedBinaryTag>) -> Vec<musefs_db::BinaryTag> {
651    tags.into_iter()
652        .filter(|b| {
653            if b.payload.len() > MAX_BINARY_TAG_BYTES {
654                log::warn!(
655                    "{abs_path}: dropping binary tag {} ({} bytes), over the {MAX_BINARY_TAG_BYTES}-byte cap",
656                    b.key,
657                    b.payload.len(),
658                );
659                return false;
660            }
661            !b.payload.is_empty()
662        })
663        .enumerate()
664        .map(|(ordinal, b)| musefs_db::BinaryTag {
665            key: b.key,
666            payload: b.payload,
667            ordinal: ordinal as u64,
668        })
669        .collect()
670}
671
672/// Upsert a track from a probed backing file: write the track row, replace its
673/// seeded tags, and ingest its embedded art (capped, deduped, clamped).
674fn ingest(db: &Db, abs_path: &str, meta: &std::fs::Metadata, probed: Probed) -> Result<()> {
675    let stamp = BackingStamp::from_metadata(meta);
676    let track_id = db.upsert_track(&NewTrack {
677        backing_path: abs_path.to_string(),
678        format: probed.format,
679        audio_offset: probed.audio_offset,
680        audio_length: probed.audio_length,
681        backing_size: meta.len(),
682        backing_mtime_ns: stamp.mtime_ns,
683        backing_ctime_ns: stamp.ctime_ns,
684    })?;
685
686    let mut tags = Vec::new();
687    let mut ordinals: HashMap<String, u64> = HashMap::new();
688    for (key, value) in probed.tags {
689        if !key_passes_floor(&key) {
690            continue;
691        }
692        let ord = ordinals.entry(key.clone()).or_insert(0);
693        tags.push(Tag::new(&key, &value, *ord));
694        *ord += 1;
695    }
696    db.replace_tags(track_id, &tags)?;
697
698    let binary_tags = accept_binary_tags(abs_path, probed.binary_tags);
699    db.set_binary_tags(track_id, &binary_tags)?;
700
701    let mut sb_ordinals: HashMap<String, u64> = HashMap::new();
702    let structural_blocks: Vec<musefs_db::StructuralBlock> = probed
703        .structural_blocks
704        .into_iter()
705        .map(|(kind, body)| {
706            let ord = sb_ordinals.entry(kind.clone()).or_insert(0);
707            let sb = musefs_db::StructuralBlock {
708                kind,
709                ordinal: *ord,
710                body,
711            };
712            *ord += 1;
713            sb
714        })
715        .collect();
716    db.set_structural_blocks(track_id, &structural_blocks)?;
717
718    let mut track_arts = Vec::new();
719    for (ordinal, pic) in accept_pictures(abs_path, probed.pictures)
720        .into_iter()
721        .enumerate()
722    {
723        let art_id = db.upsert_art(&NewArt {
724            mime: pic.mime,
725            width: (pic.width != 0).then_some(pic.width),
726            height: (pic.height != 0).then_some(pic.height),
727            data: pic.data,
728        })?;
729        let picture_type = pic.picture_type.get();
730        track_arts.push(TrackArt {
731            art_id,
732            picture_type,
733            description: pic.description,
734            ordinal: ordinal as u64,
735        });
736    }
737    db.set_track_art(track_id, &track_arts)?;
738    Ok(())
739}
740
741/// Like `ingest`, but writes through a batch `BulkWriter`. Takes `probed` by
742/// value so picture/binary-tag/structural-block bytes are moved, not cloned (#68).
743fn ingest_bulk(
744    bw: &mut musefs_db::BulkWriter<'_>,
745    abs_path: &str,
746    stamp: BackingStamp,
747    probed: Probed,
748) -> Result<()> {
749    let track_id = bw.upsert_track(&NewTrack {
750        backing_path: abs_path.to_string(),
751        format: probed.format,
752        audio_offset: probed.audio_offset,
753        audio_length: probed.audio_length,
754        backing_size: stamp.size,
755        backing_mtime_ns: stamp.mtime_ns,
756        backing_ctime_ns: stamp.ctime_ns,
757    })?;
758
759    let mut tags = Vec::new();
760    let mut ordinals: HashMap<String, u64> = HashMap::new();
761    for (key, value) in &probed.tags {
762        if !key_passes_floor(key) {
763            continue;
764        }
765        let ord = ordinals.entry(key.clone()).or_insert(0);
766        tags.push(Tag::new(key, value, *ord));
767        *ord += 1;
768    }
769    bw.replace_tags(track_id, &tags)?;
770
771    let binary_tags = accept_binary_tags(abs_path, probed.binary_tags);
772    bw.set_binary_tags(track_id, &binary_tags)?;
773
774    let mut sb_ordinals: HashMap<String, u64> = HashMap::new();
775    let structural_blocks: Vec<musefs_db::StructuralBlock> = probed
776        .structural_blocks
777        .into_iter()
778        .map(|(kind, body)| {
779            let ord = sb_ordinals.entry(kind.clone()).or_insert(0);
780            let sb = musefs_db::StructuralBlock {
781                kind,
782                ordinal: *ord,
783                body,
784            };
785            *ord += 1;
786            sb
787        })
788        .collect();
789    bw.set_structural_blocks(track_id, &structural_blocks)?;
790
791    let mut track_arts = Vec::new();
792    for (ordinal, pic) in accept_pictures(abs_path, probed.pictures)
793        .into_iter()
794        .enumerate()
795    {
796        let art_id = bw.upsert_art(&NewArt {
797            mime: pic.mime,
798            width: (pic.width != 0).then_some(pic.width),
799            height: (pic.height != 0).then_some(pic.height),
800            data: pic.data,
801        })?;
802        let picture_type = pic.picture_type.get();
803        track_arts.push(TrackArt {
804            art_id,
805            picture_type,
806            description: pic.description,
807            ordinal: ordinal as u64,
808        });
809    }
810    bw.set_track_art(track_id, &track_arts)?;
811    Ok(())
812}
813
814/// Public entry: parallel-probe / single-writer scan of `root`.
815///
816/// Insert/update a track row for each supported audio file (FLAC, MP3, M4A,
817/// Opus, Vorbis, FLAC-in-Ogg) under `root` (with audio bounds and validation
818/// stamps), seeding its tags from the file's existing metadata. `root` may be
819/// a single audio file (only that file is scanned) or a directory (walked
820/// recursively). Files whose extension is not a supported audio format
821/// increment `ScanStats::skipped`; supported-extension files with a per-file
822/// I/O or parse error increment `ScanStats::failed` and do not abort the scan.
823pub fn scan_directory_with(db: &Db, root: &Path, opts: &ScanOptions) -> Result<ScanStats> {
824    let mut files = Vec::new();
825    let mut skipped = 0u64;
826    if root.is_file() {
827        if is_supported_audio(root) {
828            files.push(root.to_path_buf());
829        } else {
830            skipped += 1;
831        }
832    } else {
833        skipped += collect_audio(root, &mut files, opts.follow_symlinks)?;
834    }
835    db.apply_bulk_pragmas_self()?; // scan-scoped tuning on the caller's connection
836    let mut stats = run_pipeline(db, files, opts)?;
837    // skipped is tallied during the walk, not the pipeline
838    stats.skipped = skipped;
839    Ok(stats)
840}
841
842/// Back-compat shim used by the CLI and existing tests.
843pub fn scan_directory(db: &Db, root: &Path) -> Result<ScanStats> {
844    scan_directory_with(db, root, &ScanOptions::default())
845}
846
847/// Probe `files` across `jobs` workers (no DB access) and write the results from a
848/// single writer (this thread) in batched transactions. Per-file errors are
849/// counted, not fatal.
850fn run_pipeline(db: &Db, files: Vec<PathBuf>, opts: &ScanOptions) -> Result<ScanStats> {
851    use std::sync::Arc;
852    use std::sync::atomic::{AtomicU64, Ordering};
853
854    let jobs = effective_jobs(opts.jobs);
855    let window = opts.window;
856    let cap = opts.batch_bytes;
857    let budget = Arc::new(ByteBudget::new(cap));
858    let failed = Arc::new(AtomicU64::new(0));
859    let raced = Arc::new(AtomicU64::new(0));
860
861    // Work queue: a shared iterator behind a mutex (cheap; probing dominates).
862    let work = Arc::new(std::sync::Mutex::new(files.into_iter()));
863    let (tx, rx) = sync_channel::<Unit>(jobs * 2);
864
865    let mut workers = Vec::with_capacity(jobs);
866    for _ in 0..jobs {
867        let work = Arc::clone(&work);
868        let tx = tx.clone();
869        let budget = Arc::clone(&budget);
870        let failed = Arc::clone(&failed);
871        let raced = Arc::clone(&raced);
872        workers.push(std::thread::spawn(move || {
873            loop {
874                let next = { work.lock().unwrap().next() };
875                let Some(path) = next else { break };
876                match probe_file(&path, window) {
877                    Ok(ProbeOutcome::Probed(probed, stamp)) => {
878                        let abs = match std::fs::canonicalize(&path) {
879                            Ok(abs) => abs,
880                            Err(e) => {
881                                log::warn!("skipping {}: {e}", path.display());
882                                failed.fetch_add(1, Ordering::Relaxed);
883                                continue;
884                            }
885                        };
886                        let weight = payload_weight(&probed);
887                        budget.acquire(weight); // backpressure on in-flight art bytes
888                        let unit = Unit {
889                            abs_path: abs.to_string_lossy().into_owned(),
890                            stamp,
891                            probed,
892                            weight,
893                        };
894                        if tx.send(unit).is_err() {
895                            budget.release(weight);
896                            break;
897                        }
898                    }
899                    Ok(ProbeOutcome::Unparseable) => {
900                        failed.fetch_add(1, Ordering::Relaxed);
901                    }
902                    Err(e) => {
903                        log::warn!("skipping {}: {e}", path.display());
904                        failed.fetch_add(1, Ordering::Relaxed);
905                    }
906                    Ok(ProbeOutcome::Raced) => {
907                        raced.fetch_add(1, Ordering::Relaxed);
908                    }
909                }
910            }
911        }));
912    }
913    drop(tx); // close the channel once all clones (workers) finish
914
915    // Writer: this thread. Batch by file count and accumulated art bytes.
916    let mut scanned = 0u64;
917    let mut batch: Vec<Unit> = Vec::new();
918    let mut batch_bytes = 0u64;
919    let flush = |batch: &mut Vec<Unit>, batch_bytes: &mut u64, scanned: &mut u64| -> Result<()> {
920        if batch.is_empty() {
921            return Ok(());
922        }
923        let mut bw = db.bulk_writer()?;
924        // Budget weights are released only after commit, and ingest_bulk consumes
925        // the Probed — capture each unit's weight before the move (#68).
926        let mut weights = Vec::with_capacity(batch.len());
927        for Unit {
928            abs_path,
929            stamp,
930            probed,
931            weight,
932        } in batch.drain(..)
933        {
934            weights.push(weight);
935            ingest_bulk(&mut bw, &abs_path, stamp, probed)?;
936            *scanned += 1;
937        }
938        bw.commit()?;
939        for w in weights {
940            budget.release(w);
941        }
942        *batch_bytes = 0;
943        Ok(())
944    };
945
946    // Drain the channel, batching by file count and accumulated art bytes. The
947    // budget cap equals the byte-flush threshold, so a worker calling
948    // `budget.acquire` (which it does *before* `send`) could block while the
949    // writer's pending batch sits just below the threshold — if the writer then
950    // parked on a blocking `recv`, neither side could make progress (the held
951    // budget is never released, the batch never reaches the threshold). To avoid
952    // that, whenever the channel momentarily drains we flush the pending batch —
953    // releasing the budget so blocked producers proceed — *before* blocking on the
954    // next item.
955    loop {
956        match rx.try_recv() {
957            Ok(unit) => {
958                batch_bytes += unit.weight;
959                batch.push(unit);
960                if batch.len() >= BATCH_FILES || batch_bytes >= cap {
961                    flush(&mut batch, &mut batch_bytes, &mut scanned)?;
962                }
963            }
964            Err(std::sync::mpsc::TryRecvError::Empty) => {
965                flush(&mut batch, &mut batch_bytes, &mut scanned)?;
966                match rx.recv() {
967                    Ok(unit) => {
968                        batch_bytes += unit.weight;
969                        batch.push(unit);
970                        if batch.len() >= BATCH_FILES || batch_bytes >= cap {
971                            flush(&mut batch, &mut batch_bytes, &mut scanned)?;
972                        }
973                    }
974                    Err(_) => break, // all workers finished; channel closed
975                }
976            }
977            Err(std::sync::mpsc::TryRecvError::Disconnected) => break,
978        }
979    }
980    flush(&mut batch, &mut batch_bytes, &mut scanned)?;
981    // A fatal flush error above returns via `?` *before* this join, abandoning the
982    // worker threads — acceptable because a DB-write failure aborts the whole scan.
983    // On the success path every worker has already exited (the work queue drained
984    // and `drop(tx)` closed the channel), so these joins return promptly.
985    for w in workers {
986        let _ = w.join();
987    }
988
989    Ok(ScanStats {
990        scanned,
991        skipped: 0, // counted at walk time; filled in by scan_directory_with
992        failed: failed.load(Ordering::Relaxed),
993        raced: raced.load(Ordering::Relaxed),
994    })
995}
996
997/// Test/oracle only: scan using the legacy whole-file probe (`probe_full`). The
998/// equivalence property compares this against the bounded `scan_directory`.
999#[doc(hidden)]
1000pub fn scan_directory_full_oracle(db: &Db, root: &Path) -> Result<ScanStats> {
1001    let mut files = Vec::new();
1002    let mut skipped = 0u64;
1003    if root.is_file() {
1004        if is_supported_audio(root) {
1005            files.push(root.to_path_buf());
1006        } else {
1007            skipped += 1;
1008        }
1009    } else {
1010        skipped += collect_audio(root, &mut files, false)?;
1011    }
1012    let mut stats = ScanStats {
1013        scanned: 0,
1014        skipped,
1015        failed: 0,
1016        raced: 0,
1017    };
1018    for path in files {
1019        let bytes = std::fs::read(&path)?;
1020        let Some(probed) = probe_full(&path, &bytes) else {
1021            stats.failed += 1;
1022            continue;
1023        };
1024        let meta = std::fs::metadata(&path)?;
1025        let abs = std::fs::canonicalize(&path)?;
1026        ingest(db, &abs.to_string_lossy(), &meta, probed)?;
1027        stats.scanned += 1;
1028    }
1029    Ok(stats)
1030}
1031
1032/// Re-validate an already-scanned library root: re-probe only files whose
1033/// size/mtime/ctime changed since the last scan (skipping unchanged ones so external
1034/// tag edits in the DB are preserved), then delete tracks **under `root`** whose
1035/// backing file is gone (cascading tags/art links) and garbage-collect
1036/// now-unreferenced art. `root` may be a single audio file (only that file is
1037/// revalidated) or a directory (walked recursively). Pruning is scoped to
1038/// `root`, so revalidating one library root never removes tracks belonging to
1039/// another.
1040///
1041/// Uses `opts` to configure the probe pipeline (e.g. `jobs` for parallelism).
1042/// The skip-unchanged decision runs on the calling thread before workers are
1043/// dispatched, so workers remain DB-free. A `stat`/`canonicalize` failure on a
1044/// candidate during the skip pass is counted in `failed` (and the file is left
1045/// for the next revalidation) rather than re-probed or pruned.
1046pub fn revalidate_with(db: &Db, root: &Path, opts: &ScanOptions) -> Result<RevalidateStats> {
1047    let mut files = Vec::new();
1048    if root.is_file() {
1049        if is_supported_audio(root) {
1050            files.push(root.to_path_buf());
1051        }
1052    } else {
1053        collect_audio(root, &mut files, opts.follow_symlinks)?;
1054    }
1055    db.apply_bulk_pragmas_self()?;
1056
1057    // Main-thread pre-dispatch skip pass: load existing (path -> stamp,id,format) once,
1058    // stat each candidate, keep only changed files. Workers stay DB-free.
1059    let existing: HashMap<String, (crate::freshness::BackingStamp, i64, Format)> = db
1060        .list_tracks()?
1061        .into_iter()
1062        .map(|t| {
1063            (
1064                t.backing_path.clone(),
1065                (
1066                    crate::freshness::BackingStamp::from_track(&t),
1067                    t.id,
1068                    t.format,
1069                ),
1070            )
1071        })
1072        .collect();
1073    // Legacy backfill (spec §1): FLAC tracks scanned under V1 have no structural
1074    // blocks. Re-scan them even when the backing file is unchanged so the V2
1075    // structural store + binary tags get populated by the ingest path.
1076    let have_structural = db.track_ids_with_structural_blocks()?;
1077
1078    let mut unchanged = 0u64;
1079    let mut skip_failed = 0u64;
1080    let mut changed: Vec<PathBuf> = Vec::new();
1081    for path in files {
1082        let meta = match std::fs::metadata(&path) {
1083            Ok(meta) => meta,
1084            Err(e) => {
1085                log::warn!("skipping {}: {e}", path.display());
1086                skip_failed += 1;
1087                continue;
1088            }
1089        };
1090        let abs = match std::fs::canonicalize(&path) {
1091            Ok(abs) => abs,
1092            Err(e) => {
1093                log::warn!("skipping {}: {e}", path.display());
1094                skip_failed += 1;
1095                continue;
1096            }
1097        };
1098        let key = abs.to_string_lossy().into_owned();
1099        if let Some((stamp, id, format)) = existing.get(&key).copied() {
1100            let needs_backfill = format == Format::Flac && !have_structural.contains(&id);
1101            if crate::freshness::BackingStamp::from_metadata(&meta) == stamp && !needs_backfill {
1102                unchanged += 1;
1103                continue;
1104            }
1105        }
1106        changed.push(path);
1107    }
1108
1109    let scan = run_pipeline(db, changed, opts)?;
1110
1111    // Prune + GC on the writer connection (single-threaded), unchanged from before.
1112    let canon_root = std::fs::canonicalize(root)?;
1113    let mut pruned = 0u64;
1114    for track in db.list_tracks()? {
1115        if !Path::new(&track.backing_path).starts_with(&canon_root) {
1116            continue;
1117        }
1118        if let Err(e) = std::fs::metadata(&track.backing_path)
1119            && e.kind() == std::io::ErrorKind::NotFound
1120        {
1121            db.delete_track(track.id)?;
1122            pruned += 1;
1123        }
1124    }
1125    db.gc_orphan_art()?;
1126
1127    Ok(RevalidateStats {
1128        updated: scan.scanned,
1129        unchanged,
1130        pruned,
1131        failed: scan.failed + skip_failed,
1132        raced: scan.raced,
1133    })
1134}
1135
1136/// Back-compat shim used by the CLI and existing tests.
1137pub fn revalidate(db: &Db, root: &Path) -> Result<RevalidateStats> {
1138    revalidate_with(db, root, &ScanOptions::default())
1139}
1140
1141#[cfg(test)]
1142mod scan_unit_tests {
1143    use super::*;
1144    use musefs_format::PictureType;
1145    use std::io::Write;
1146
1147    // --- ScanOptions defaults (WINDOW L16, BATCH_BYTES L12) ---
1148
1149    // kills the WINDOW `<<`→`>>` and BATCH_BYTES initializer mutants: the
1150    // right-hand sides are decimal literals, so a mutated const/Default
1151    // initializer cannot flow to both sides of the assertion.
1152    #[test]
1153    fn scan_options_defaults() {
1154        let d = ScanOptions::default();
1155        assert_eq!(d.jobs, 0, "jobs default = use available parallelism");
1156        assert_eq!(d.window, 1_048_576, "window default = 1 MiB");
1157        assert_eq!(d.batch_bytes, 67_108_864, "batch_bytes default = 64 MiB");
1158    }
1159
1160    // --- read_tail_128() (lines 170-178) ---
1161
1162    fn write_temp(name: &str, bytes: &[u8]) -> (tempfile::TempDir, std::fs::File) {
1163        let dir = tempfile::tempdir().unwrap();
1164        let path = dir.path().join(name);
1165        std::fs::File::create(&path)
1166            .unwrap()
1167            .write_all(bytes)
1168            .unwrap();
1169        let file = std::fs::File::open(&path).unwrap();
1170        (dir, file)
1171    }
1172
1173    // kills scan L171 `<`→`<=` (128-byte file must be Some)
1174    // kills scan L172 Ok(None) constant, L178 Ok(Some) value
1175    // kills scan L176 `file_len - 128`→`/` (offset 0 vs 1 shifts the bytes)
1176    // kills scan L175 buf init [0;128]/[1;128] constants (exact bytes asserted)
1177    #[test]
1178    fn read_tail_128_exact_128_bytes() {
1179        // Distinct, position-sensitive pattern: byte[i] = i (0..=127).
1180        let pattern: Vec<u8> = (0u8..128).collect();
1181        let (_dir, file) = write_temp("tail128.bin", &pattern);
1182
1183        let tail = read_tail_128(&file, 128).unwrap();
1184        let expected: [u8; 128] = pattern.clone().try_into().unwrap();
1185        // Exact equality kills:
1186        //  - Ok(None) (would be None, not Some)
1187        //  - [0;128]/[1;128] buf-init constants (would mismatch the pattern)
1188        //  - `<`→`<=` (128<=128 true → returns None for a 128-byte file)
1189        //  - `-`→`/` (offset 128/128==1 reads bytes[1..], shifting the pattern)
1190        assert_eq!(tail, Some(expected));
1191    }
1192
1193    // kills scan L171 `<`→`<=` boundary the other way (127 bytes → None)
1194    #[test]
1195    fn read_tail_128_short_file_is_none() {
1196        let (_dir, file) = write_temp("tail127.bin", &[0xABu8; 127]);
1197        assert_eq!(read_tail_128(&file, 127).unwrap(), None);
1198    }
1199
1200    // --- effective_jobs() (lines 313-318) ---
1201
1202    // kills scan L314 effective_jobs body→1 (assuming parallelism > 1)
1203    #[test]
1204    fn effective_jobs_zero_uses_parallelism_and_nonzero_passes_through() {
1205        let par = std::thread::available_parallelism().map_or(1, std::num::NonZero::get);
1206        assert_eq!(effective_jobs(0), par);
1207        assert_eq!(effective_jobs(4), 4);
1208        assert_eq!(effective_jobs(1), 1);
1209    }
1210
1211    // --- payload_weight() ---
1212
1213    // Sums picture + binary-tag + structural-block byte lengths (batch backpressure).
1214    #[test]
1215    fn payload_weight_sums_all_buffered_payloads() {
1216        let pic = |n: usize| EmbeddedPicture {
1217            mime: "image/png".to_string(),
1218            picture_type: PictureType::new(3).unwrap(),
1219            description: String::new(),
1220            width: 0,
1221            height: 0,
1222            data: vec![0u8; n],
1223        };
1224        let probed = Probed {
1225            format: Format::Flac,
1226            audio_offset: 0,
1227            audio_length: 0,
1228            tags: Vec::new(),
1229            pictures: vec![pic(3), pic(5)],
1230            binary_tags: vec![EmbeddedBinaryTag {
1231                key: "APPLICATION".into(),
1232                payload: vec![0u8; 4],
1233            }],
1234            structural_blocks: vec![("SEEKTABLE".into(), vec![0u8; 2])],
1235        };
1236        // 3 + 5 (pictures) + 4 (binary) + 2 (structural) = 14.
1237        assert_eq!(payload_weight(&probed), 14);
1238
1239        // Empty → 0, distinguishes the →1 constant (which ignores the input).
1240        let empty = Probed {
1241            format: Format::Flac,
1242            audio_offset: 0,
1243            audio_length: 0,
1244            tags: Vec::new(),
1245            pictures: Vec::new(),
1246            binary_tags: Vec::new(),
1247            structural_blocks: Vec::new(),
1248        };
1249        assert_eq!(payload_weight(&empty), 0);
1250    }
1251
1252    /// Minimal-but-valid m4a that `mp4::locate_audio` accepts (one `soun` trak),
1253    /// with a `udta/meta/ilst` carrying one binary `----` atom. `value` is the raw
1254    /// binary `data` payload (type code 0). Not synthesis-grade (no stco), but
1255    /// `probe_full` only locates audio + reads tags, never synthesizes.
1256    fn mp4_with_binary_freeform(mean: &str, name: &str, value: &[u8]) -> Vec<u8> {
1257        fn bx(kind: &[u8; 4], body: &[u8]) -> Vec<u8> {
1258            let mut v = u32::try_from(8 + body.len())
1259                .unwrap()
1260                .to_be_bytes()
1261                .to_vec();
1262            v.extend_from_slice(kind);
1263            v.extend_from_slice(body);
1264            v
1265        }
1266        // mdia/hdlr with handler type `soun` at payload offset 8..12 (FullBox
1267        // version/flags [0..4], pre_defined [4..8], handler_type [8..12]).
1268        let mut hdlr_body = vec![0u8; 8];
1269        hdlr_body.extend_from_slice(b"soun");
1270        hdlr_body.extend_from_slice(&[0u8; 12]); // reserved(12) + empty name
1271        let trak = bx(b"trak", &bx(b"mdia", &bx(b"hdlr", &hdlr_body)));
1272
1273        // udta/meta/ilst with one binary `----` atom.
1274        let mut mean_body = 0u32.to_be_bytes().to_vec();
1275        mean_body.extend_from_slice(mean.as_bytes());
1276        let mut name_body = 0u32.to_be_bytes().to_vec();
1277        name_body.extend_from_slice(name.as_bytes());
1278        let mut data_body = 0u32.to_be_bytes().to_vec(); // type 0 = binary
1279        data_body.extend_from_slice(&0u32.to_be_bytes()); // locale
1280        data_body.extend_from_slice(value);
1281        let mut free = bx(b"mean", &mean_body);
1282        free.extend(bx(b"name", &name_body));
1283        free.extend(bx(b"data", &data_body));
1284        let ilst = bx(b"ilst", &bx(b"----", &free));
1285        let mut meta = 0u32.to_be_bytes().to_vec();
1286        meta.extend(bx(b"hdlr", &[0u8; 25]));
1287        meta.extend(ilst);
1288        let udta = bx(b"udta", &bx(b"meta", &meta));
1289
1290        let moov = bx(b"moov", &[trak, udta].concat());
1291        [bx(b"ftyp", b"M4A "), moov, bx(b"mdat", b"AUDIODATA")].concat()
1292    }
1293
1294    #[test]
1295    fn probe_full_surfaces_mp4_binary_freeform() {
1296        use musefs_format::mp4;
1297        let bytes = mp4_with_binary_freeform("com.serato.dj", "analysis", &[0x00, 0xAB, 0xCD]);
1298        let probed = probe_full(std::path::Path::new("/x.m4a"), &bytes).expect("probed");
1299        assert_eq!(probed.format, Format::M4a);
1300        let keys: Vec<&str> = probed.binary_tags.iter().map(|b| b.key.as_str()).collect();
1301        assert!(
1302            keys.contains(&"----:com.serato.dj:analysis"),
1303            "binary freeform not surfaced: {keys:?}"
1304        );
1305        let bt = probed
1306            .binary_tags
1307            .iter()
1308            .find(|b| b.key == "----:com.serato.dj:analysis")
1309            .unwrap();
1310        assert_eq!(bt.payload, vec![0x00, 0xAB, 0xCD]);
1311        let scan = mp4::read_structure(&bytes).unwrap();
1312        assert_eq!(probed.audio_offset, scan.mdat_payload_offset);
1313    }
1314
1315    fn mp4_with_covr(type_code: u32, value: &[u8]) -> Vec<u8> {
1316        fn bx(kind: &[u8; 4], body: &[u8]) -> Vec<u8> {
1317            let mut v = u32::try_from(8 + body.len())
1318                .unwrap()
1319                .to_be_bytes()
1320                .to_vec();
1321            v.extend_from_slice(kind);
1322            v.extend_from_slice(body);
1323            v
1324        }
1325        let mut hdlr_body = vec![0u8; 8];
1326        hdlr_body.extend_from_slice(b"soun");
1327        hdlr_body.extend_from_slice(&[0u8; 12]);
1328        let trak = bx(b"trak", &bx(b"mdia", &bx(b"hdlr", &hdlr_body)));
1329
1330        let mut data_body = type_code.to_be_bytes().to_vec();
1331        data_body.extend_from_slice(&0u32.to_be_bytes());
1332        data_body.extend_from_slice(value);
1333        let ilst = bx(b"ilst", &bx(b"covr", &bx(b"data", &data_body)));
1334        let mut meta = 0u32.to_be_bytes().to_vec();
1335        meta.extend(bx(b"hdlr", &[0u8; 25]));
1336        meta.extend(ilst);
1337        let udta = bx(b"udta", &bx(b"meta", &meta));
1338
1339        let moov = bx(b"moov", &[trak, udta].concat());
1340        [bx(b"ftyp", b"M4A "), moov, bx(b"mdat", b"AUDIODATA")].concat()
1341    }
1342
1343    #[test]
1344    fn probe_file_skips_oversized_mp4_covr() {
1345        let oversized = vec![0xFFu8; MAX_ART_BYTES + 1];
1346        let bytes = mp4_with_covr(13, &oversized);
1347        let dir = tempfile::tempdir().unwrap();
1348        let path = dir.path().join("oversized_art.m4a");
1349        std::fs::write(&path, &bytes).unwrap();
1350        let probed = match probe_file(&path, 0).unwrap() {
1351            ProbeOutcome::Probed(p, _) => p,
1352            other => panic!("expected Probed, got {other:?}"),
1353        };
1354        assert_eq!(probed.format, Format::M4a);
1355        assert!(
1356            probed.pictures.is_empty(),
1357            "oversized covr must be skipped at extraction, not materialized"
1358        );
1359    }
1360
1361    #[test]
1362    fn probe_file_skips_oversized_mp4_binary_freeform() {
1363        // A `----` value larger than MAX_BINARY_TAG_BYTES must be skipped at
1364        // extraction by the real seek-path scanner, so it is absent from Probed.
1365        let oversized = vec![0xABu8; MAX_BINARY_TAG_BYTES + 1];
1366        let bytes = mp4_with_binary_freeform("com.serato.dj", "analysis", &oversized);
1367        let dir = tempfile::tempdir().unwrap();
1368        let path = dir.path().join("oversized_bin.m4a");
1369        std::fs::write(&path, &bytes).unwrap();
1370        let probed = match probe_file(&path, 0).unwrap() {
1371            ProbeOutcome::Probed(p, _) => p,
1372            other => panic!("expected Probed, got {other:?}"),
1373        };
1374        assert_eq!(probed.format, Format::M4a);
1375        assert!(
1376            probed.binary_tags.is_empty(),
1377            "oversized binary freeform must be skipped at extraction, not materialized"
1378        );
1379    }
1380}
1381
1382#[cfg(test)]
1383mod ogg_probe_tests {
1384    use super::*;
1385    use musefs_format::ogg::page_test_support::{
1386        build_header_pub, lace_packet_pub, vorbis_body_empty,
1387    };
1388    use std::io::Write;
1389
1390    #[test]
1391    fn probe_detects_opus_and_seeds_tags() {
1392        let head = b"OpusHead\x01\x02\x38\x01\x80\xbb\x00\x00\x00\x00\x00".to_vec();
1393        let mut tags = b"OpusTags".to_vec();
1394        tags.extend_from_slice(&vorbis_body_empty());
1395        let (mut bytes, _) = build_header_pub(0x1234, &[&head, &tags]);
1396        let (audio, _) = lace_packet_pub(0x1234, 2, false, 960, &[0u8; 100]);
1397        bytes.extend_from_slice(&audio);
1398
1399        let dir = tempfile::tempdir().unwrap();
1400        let path = dir.path().join("song.opus");
1401        std::fs::File::create(&path)
1402            .unwrap()
1403            .write_all(&bytes)
1404            .unwrap();
1405
1406        let probed = probe_full(&path, &bytes).expect("opus should probe");
1407        assert_eq!(probed.format, Format::Opus);
1408        assert_eq!(probed.audio_offset, (bytes.len() - audio.len()) as u64);
1409    }
1410
1411    #[test]
1412    fn scan_single_opus_file_ingests_it() {
1413        let head = b"OpusHead\x01\x02\x38\x01\x80\xbb\x00\x00\x00\x00\x00".to_vec();
1414        let mut tags = b"OpusTags".to_vec();
1415        tags.extend_from_slice(&vorbis_body_empty());
1416        let (mut bytes, _) = build_header_pub(0x1234, &[&head, &tags]);
1417        let (audio, _) = lace_packet_pub(0x1234, 2, false, 960, &[0u8; 100]);
1418        bytes.extend_from_slice(&audio);
1419
1420        let dir = tempfile::tempdir().unwrap();
1421        let path = dir.path().join("single.opus");
1422        std::io::Write::write_all(&mut std::fs::File::create(&path).unwrap(), &bytes).unwrap();
1423
1424        let db = musefs_db::Db::open_in_memory().unwrap();
1425        // Pass the FILE path directly (not the directory).
1426        let stats = crate::scan_directory(&db, &path).unwrap();
1427        assert_eq!(stats.scanned, 1);
1428        assert_eq!(stats.skipped, 0);
1429    }
1430
1431    #[test]
1432    fn probe_recognizes_oga_alias() {
1433        let head = b"OpusHead\x01\x02\x38\x01\x80\xbb\x00\x00\x00\x00\x00".to_vec();
1434        let mut tags = b"OpusTags".to_vec();
1435        tags.extend_from_slice(&vorbis_body_empty());
1436        let (mut bytes, _) = build_header_pub(0x1234, &[&head, &tags]);
1437        let (audio, _) = lace_packet_pub(0x1234, 2, false, 960, &[0u8; 100]);
1438        bytes.extend_from_slice(&audio);
1439
1440        let dir = tempfile::tempdir().unwrap();
1441        let path = dir.path().join("song.oga");
1442        std::fs::File::create(&path)
1443            .unwrap()
1444            .write_all(&bytes)
1445            .unwrap();
1446
1447        let probed = probe_full(&path, &bytes).expect("oga should probe");
1448        assert_eq!(probed.format, Format::Opus);
1449    }
1450}
1451
1452#[cfg(test)]
1453mod wav_probe_tests {
1454    use super::*;
1455    use std::io::Write;
1456
1457    fn build_wav() -> Vec<u8> {
1458        let mut fmt = Vec::new();
1459        fmt.extend_from_slice(&1u16.to_le_bytes());
1460        fmt.extend_from_slice(&1u16.to_le_bytes());
1461        fmt.extend_from_slice(&44_100u32.to_le_bytes());
1462        fmt.extend_from_slice(&88_200u32.to_le_bytes());
1463        fmt.extend_from_slice(&2u16.to_le_bytes());
1464        fmt.extend_from_slice(&16u16.to_le_bytes());
1465
1466        let data = vec![0u8; 16];
1467        let mut body = Vec::new();
1468        for (id, payload) in [(b"fmt ", &fmt), (b"data", &data)] {
1469            body.extend_from_slice(id);
1470            body.extend_from_slice(&u32::try_from(payload.len()).unwrap().to_le_bytes());
1471            body.extend_from_slice(payload);
1472        }
1473        let mut out = b"RIFF".to_vec();
1474        out.extend_from_slice(&u32::try_from(body.len() + 4).unwrap().to_le_bytes());
1475        out.extend_from_slice(b"WAVE");
1476        out.extend_from_slice(&body);
1477        out
1478    }
1479
1480    #[test]
1481    fn probe_detects_wav() {
1482        let bytes = build_wav();
1483        let dir = tempfile::tempdir().unwrap();
1484        let path = dir.path().join("song.wav");
1485        std::fs::File::create(&path)
1486            .unwrap()
1487            .write_all(&bytes)
1488            .unwrap();
1489
1490        let probed = probe_full(&path, &bytes).expect("wav should probe");
1491        assert_eq!(probed.format, Format::Wav);
1492        assert_eq!(probed.audio_length, 16);
1493    }
1494
1495    #[test]
1496    fn scan_single_wav_file_ingests_it() {
1497        let bytes = build_wav();
1498        let dir = tempfile::tempdir().unwrap();
1499        let path = dir.path().join("single.wav");
1500        std::fs::File::create(&path)
1501            .unwrap()
1502            .write_all(&bytes)
1503            .unwrap();
1504
1505        let db = musefs_db::Db::open_in_memory().unwrap();
1506        let stats = crate::scan_directory(&db, &path).unwrap();
1507        assert_eq!(stats.scanned, 1);
1508        assert_eq!(stats.skipped, 0);
1509    }
1510}
1511
1512#[cfg(test)]
1513mod hardening_tests {
1514    use super::*;
1515
1516    #[test]
1517    fn max_art_bytes_is_16_mib_minus_64_kib() {
1518        assert_eq!(MAX_ART_BYTES, 16_711_680);
1519    }
1520
1521    #[test]
1522    fn scan_caps_match_db_limits() {
1523        assert_eq!(
1524            i64::try_from(MAX_ART_BYTES).unwrap(),
1525            musefs_db::limits::MAX_ART_BYTES
1526        );
1527        assert_eq!(
1528            i64::try_from(MAX_BINARY_TAG_BYTES).unwrap(),
1529            musefs_db::limits::MAX_BINARY_TAG_BYTES
1530        );
1531    }
1532
1533    #[test]
1534    fn is_supported_audio_accepts_known_and_rejects_unknown() {
1535        for ok in [
1536            "a.flac", "a.mp3", "a.m4a", "a.m4b", "a.ogg", "a.oga", "a.opus", "a.wav",
1537        ] {
1538            assert!(
1539                is_supported_audio(std::path::Path::new(ok)),
1540                "{ok} should be supported"
1541            );
1542        }
1543        for bad in ["a.txt", "a.png", "a", "a.flacx"] {
1544            assert!(
1545                !is_supported_audio(std::path::Path::new(bad)),
1546                "{bad} must be rejected"
1547            );
1548        }
1549    }
1550
1551    #[test]
1552    fn collect_audio_skips_unsupported_files() {
1553        let dir = tempfile::tempdir().unwrap();
1554        std::fs::write(dir.path().join("keep.flac"), b"x").unwrap();
1555        std::fs::write(dir.path().join("skip.txt"), b"x").unwrap();
1556        let mut out = Vec::new();
1557        collect_audio(dir.path(), &mut out, false).unwrap();
1558        assert_eq!(out.len(), 1);
1559        assert!(out[0].ends_with("keep.flac"));
1560    }
1561
1562    #[test]
1563    fn scan_options_default_does_not_follow_symlinks() {
1564        assert!(!ScanOptions::default().follow_symlinks);
1565    }
1566
1567    #[test]
1568    fn collect_audio_follows_symlinked_file_when_enabled() {
1569        let dir = tempfile::tempdir().unwrap();
1570        let real = dir.path().join("real.flac");
1571        std::fs::write(&real, b"x").unwrap();
1572        let lib = dir.path().join("lib");
1573        std::fs::create_dir(&lib).unwrap();
1574        std::os::unix::fs::symlink(&real, lib.join("link.flac")).unwrap();
1575
1576        let mut on = Vec::new();
1577        collect_audio(&lib, &mut on, true).unwrap();
1578        assert_eq!(
1579            on.len(),
1580            1,
1581            "symlinked file should be collected when following"
1582        );
1583
1584        let mut off = Vec::new();
1585        collect_audio(&lib, &mut off, false).unwrap();
1586        assert!(
1587            off.is_empty(),
1588            "symlinked file should be skipped by default"
1589        );
1590    }
1591
1592    #[test]
1593    fn collect_audio_follows_symlinked_dir_when_enabled() {
1594        let dir = tempfile::tempdir().unwrap();
1595        let real_dir = dir.path().join("music");
1596        std::fs::create_dir(&real_dir).unwrap();
1597        std::fs::write(real_dir.join("song.flac"), b"x").unwrap();
1598        let root = dir.path().join("root");
1599        std::fs::create_dir(&root).unwrap();
1600        std::os::unix::fs::symlink(&real_dir, root.join("linkdir")).unwrap();
1601
1602        let mut on = Vec::new();
1603        collect_audio(&root, &mut on, true).unwrap();
1604        assert_eq!(
1605            on.len(),
1606            1,
1607            "files under a symlinked dir should be collected"
1608        );
1609
1610        let mut off = Vec::new();
1611        collect_audio(&root, &mut off, false).unwrap();
1612        assert!(off.is_empty(), "symlinked dir should be skipped by default");
1613    }
1614
1615    #[test]
1616    fn collect_audio_terminates_on_symlink_cycle() {
1617        let dir = tempfile::tempdir().unwrap();
1618        let a = dir.path().join("a");
1619        std::fs::create_dir(&a).unwrap();
1620        std::fs::write(a.join("song.flac"), b"x").unwrap();
1621        std::os::unix::fs::symlink(dir.path(), a.join("loop")).unwrap();
1622
1623        let mut out = Vec::new();
1624        collect_audio(dir.path(), &mut out, true).unwrap();
1625        assert_eq!(
1626            out.iter().filter(|p| p.ends_with("song.flac")).count(),
1627            1,
1628            "each real file collected at most once despite the cycle"
1629        );
1630    }
1631
1632    #[test]
1633    fn collect_audio_skips_broken_symlink_when_following() {
1634        let dir = tempfile::tempdir().unwrap();
1635        std::fs::write(dir.path().join("real.flac"), b"x").unwrap();
1636        std::os::unix::fs::symlink(dir.path().join("nonexistent"), dir.path().join("dangling"))
1637            .unwrap();
1638
1639        let mut out = Vec::new();
1640        let result = collect_audio(dir.path(), &mut out, true);
1641        assert!(
1642            result.is_ok(),
1643            "a dangling symlink must not abort collection"
1644        );
1645        assert_eq!(out.len(), 1);
1646        assert!(out[0].ends_with("real.flac"));
1647    }
1648
1649    #[test]
1650    fn collect_audio_does_not_follow_symlinks_by_default() {
1651        let dir = tempfile::tempdir().unwrap();
1652        std::fs::write(dir.path().join("real.flac"), b"x").unwrap();
1653        let other = dir.path().join("other.flac");
1654        std::fs::write(&other, b"x").unwrap();
1655        std::os::unix::fs::symlink(&other, dir.path().join("link.flac")).unwrap();
1656
1657        let mut out = Vec::new();
1658        collect_audio(dir.path(), &mut out, false).unwrap();
1659        assert_eq!(out.len(), 2);
1660    }
1661
1662    #[test]
1663    fn collect_audio_ignores_symlink_to_non_file_target_when_following() {
1664        use std::os::unix::ffi::OsStrExt;
1665
1666        let dir = tempfile::tempdir().unwrap();
1667        // A FIFO is neither a regular file nor a directory, and mkfifo works in
1668        // restricted sandboxes that deny Unix-socket bind (issue #277).
1669        let fifo = dir.path().join("fifo");
1670        let c_path = std::ffi::CString::new(fifo.as_os_str().as_bytes()).unwrap();
1671        #[expect(unsafe_code, reason = "libc::mkfifo FFI; no std equivalent")]
1672        let rc = unsafe { libc::mkfifo(c_path.as_ptr(), 0o644) };
1673        assert_eq!(rc, 0, "mkfifo failed: {}", std::io::Error::last_os_error());
1674
1675        // Name the link with a supported audio extension so the only thing
1676        // keeping it out of `out` is the resolved target's is_file() check.
1677        std::os::unix::fs::symlink(&fifo, dir.path().join("link.flac")).unwrap();
1678
1679        let mut out = Vec::new();
1680        collect_audio(dir.path(), &mut out, true).unwrap();
1681        assert!(
1682            out.is_empty(),
1683            "a symlink to a non-file, non-dir target must not be collected"
1684        );
1685    }
1686
1687    #[test]
1688    fn probe_returns_none_for_supported_ext_with_garbage_contents() {
1689        let dir = tempfile::tempdir().unwrap();
1690        for name in ["bad.flac", "bad.mp3", "bad.m4a", "bad.wav", "bad.opus"] {
1691            let path = dir.path().join(name);
1692            std::fs::write(&path, b"not a real audio file").unwrap();
1693            assert!(
1694                probe_full(&path, b"not a real audio file").is_none(),
1695                "{name} must skip"
1696            );
1697        }
1698    }
1699
1700    fn flac_block(bt: u8, body: &[u8], last: bool) -> Vec<u8> {
1701        let mut v = vec![(if last { 0x80 } else { 0 }) | (bt & 0x7F)];
1702        let n: u32 = u32::try_from(body.len()).unwrap();
1703        v.extend_from_slice(&[
1704            u8::try_from(n >> 16).unwrap(),
1705            u8::try_from(n >> 8).unwrap(),
1706            u8::try_from(n).unwrap(),
1707        ]);
1708        v.extend_from_slice(body);
1709        v
1710    }
1711    fn streaminfo() -> Vec<u8> {
1712        let mut si = vec![
1713            0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0xC4, 0x42, 0xF0,
1714            0x00, 0x00, 0x00, 0x00,
1715        ];
1716        si.extend_from_slice(&[0u8; 16]);
1717        si
1718    }
1719    fn vorbis_comment(entries: &[&str]) -> Vec<u8> {
1720        let mut vc = Vec::new();
1721        let vendor = b"x";
1722        vc.extend_from_slice(&u32::try_from(vendor.len()).unwrap().to_le_bytes());
1723        vc.extend_from_slice(vendor);
1724        vc.extend_from_slice(&u32::try_from(entries.len()).unwrap().to_le_bytes());
1725        for e in entries {
1726            vc.extend_from_slice(&u32::try_from(e.len()).unwrap().to_le_bytes());
1727            vc.extend_from_slice(e.as_bytes());
1728        }
1729        vc
1730    }
1731    fn picture(width: u32, height: u32, data: &[u8]) -> Vec<u8> {
1732        let mut b = Vec::new();
1733        b.extend_from_slice(&3u32.to_be_bytes());
1734        let mime = "image/png";
1735        b.extend_from_slice(&u32::try_from(mime.len()).unwrap().to_be_bytes());
1736        b.extend_from_slice(mime.as_bytes());
1737        b.extend_from_slice(&0u32.to_be_bytes());
1738        b.extend_from_slice(&width.to_be_bytes());
1739        b.extend_from_slice(&height.to_be_bytes());
1740        b.extend_from_slice(&0u32.to_be_bytes());
1741        b.extend_from_slice(&0u32.to_be_bytes());
1742        b.extend_from_slice(&u32::try_from(data.len()).unwrap().to_be_bytes());
1743        b.extend_from_slice(data);
1744        b
1745    }
1746    fn write_flac(path: &std::path::Path, entries: &[&str], pic: Option<(u32, u32)>) {
1747        let mut out = b"fLaC".to_vec();
1748        out.extend(flac_block(0, &streaminfo(), false));
1749        let last_is_vc = pic.is_none();
1750        out.extend(flac_block(4, &vorbis_comment(entries), last_is_vc));
1751        if let Some((w, h)) = pic {
1752            out.extend(flac_block(6, &picture(w, h, &[0xAB; 64]), true));
1753        }
1754        out.extend_from_slice(&[0xCD; 128]);
1755        std::fs::write(path, &out).unwrap();
1756    }
1757
1758    #[test]
1759    fn ingest_assigns_sequential_ordinals_per_key() {
1760        let dir = tempfile::tempdir().unwrap();
1761        let path = dir.path().join("multi.flac");
1762        write_flac(&path, &["ARTIST=A1", "ARTIST=A2"], None);
1763        let db = musefs_db::Db::open_in_memory().unwrap();
1764        crate::scan_directory(&db, &path).unwrap();
1765        let track = db.list_tracks().unwrap().into_iter().next().unwrap();
1766        let mut artists: Vec<(u64, String)> = db
1767            .get_tags(track.id)
1768            .unwrap()
1769            .into_iter()
1770            .filter(|t| t.key.eq_ignore_ascii_case("artist"))
1771            .map(|t| (t.ordinal, t.value))
1772            .collect();
1773        artists.sort();
1774        assert_eq!(artists, vec![(0, "A1".to_string()), (1, "A2".to_string())]);
1775    }
1776
1777    #[test]
1778    fn ingest_stores_nonzero_art_dimensions() {
1779        let dir = tempfile::tempdir().unwrap();
1780        let path = dir.path().join("art.flac");
1781        write_flac(&path, &["ARTIST=A", "TITLE=T"], Some((10, 20)));
1782        let db = musefs_db::Db::open_in_memory().unwrap();
1783        crate::scan_directory(&db, &path).unwrap();
1784        let track = db.list_tracks().unwrap().into_iter().next().unwrap();
1785        let ta = db.get_track_art(track.id).unwrap();
1786        assert_eq!(ta.len(), 1);
1787        let meta = db.get_art_meta(ta[0].art_id).unwrap().unwrap();
1788        assert_eq!(meta.width, Some(10));
1789        assert_eq!(meta.height, Some(20));
1790    }
1791
1792    #[test]
1793    fn ingest_oracle_path_stores_nonzero_art_dimensions() {
1794        // Drives the single-file `ingest` (not `ingest_bulk`) so the
1795        // `(pic.width != 0).then_some(..)` dimension guards there are pinned.
1796        let dir = tempfile::tempdir().unwrap();
1797        let path = dir.path().join("art.flac");
1798        write_flac(&path, &["ARTIST=A", "TITLE=T"], Some((10, 20)));
1799        let db = musefs_db::Db::open_in_memory().unwrap();
1800        crate::scan_directory_full_oracle(&db, &path).unwrap();
1801        let track = db.list_tracks().unwrap().into_iter().next().unwrap();
1802        let ta = db.get_track_art(track.id).unwrap();
1803        assert_eq!(ta.len(), 1);
1804        let meta = db.get_art_meta(ta[0].art_id).unwrap().unwrap();
1805        assert_eq!(meta.width, Some(10));
1806        assert_eq!(meta.height, Some(20));
1807    }
1808
1809    #[test]
1810    fn scan_directory_counts_scanned_failed_and_skipped() {
1811        let dir = tempfile::tempdir().unwrap();
1812        write_flac(
1813            &dir.path().join("ok1.flac"),
1814            &["ARTIST=A", "TITLE=T1"],
1815            None,
1816        );
1817        write_flac(
1818            &dir.path().join("ok2.flac"),
1819            &["ARTIST=A", "TITLE=T2"],
1820            None,
1821        );
1822        // Supported extension, unparseable bytes → a scan failure.
1823        std::fs::write(dir.path().join("bad.flac"), b"garbage").unwrap();
1824        // Unsupported extension → skipped at collection, never probed.
1825        std::fs::write(dir.path().join("notes.txt"), b"hello").unwrap();
1826        let db = musefs_db::Db::open_in_memory().unwrap();
1827        let stats = crate::scan_directory(&db, dir.path()).unwrap();
1828        assert_eq!(stats.scanned, 2);
1829        assert_eq!(stats.failed, 1);
1830        assert_eq!(stats.skipped, 1);
1831    }
1832
1833    #[test]
1834    fn revalidate_buckets_unchanged_and_prunes_missing() {
1835        let dir = tempfile::tempdir().unwrap();
1836        let keep = dir.path().join("keep.flac");
1837        write_flac(&keep, &["ARTIST=A", "TITLE=T"], None);
1838        let db = musefs_db::Db::open_in_memory().unwrap();
1839        crate::scan_directory(&db, dir.path()).unwrap();
1840
1841        let s1 = crate::revalidate(&db, dir.path()).unwrap();
1842        assert_eq!(s1.unchanged, 1);
1843        assert_eq!(s1.updated, 0);
1844        assert_eq!(s1.pruned, 0);
1845
1846        std::fs::remove_file(&keep).unwrap();
1847        let s2 = crate::revalidate(&db, dir.path()).unwrap();
1848        assert_eq!(s2.pruned, 1);
1849        assert!(db.list_tracks().unwrap().is_empty());
1850    }
1851
1852    #[test]
1853    fn revalidate_does_not_prune_on_non_notfound_error() {
1854        let dir = tempfile::tempdir().unwrap();
1855        let file = dir.path().join("real.flac");
1856        write_flac(&file, &["ARTIST=A", "TITLE=T"], None);
1857        let db = musefs_db::Db::open_in_memory().unwrap();
1858        crate::scan_directory(&db, dir.path()).unwrap();
1859
1860        use musefs_db::{Format, NewTrack};
1861        let track = db.list_tracks().unwrap().into_iter().next().unwrap();
1862        db.delete_track(track.id).unwrap();
1863        let canon = std::fs::canonicalize(dir.path()).unwrap();
1864        let ghost = canon.join("real.flac").join("ghost.flac");
1865        db.upsert_track(&NewTrack {
1866            backing_path: ghost.to_string_lossy().into_owned(),
1867            format: Format::Flac,
1868            audio_offset: 0,
1869            audio_length: 0,
1870            backing_size: 0,
1871            backing_mtime_ns: 0,
1872            backing_ctime_ns: 0,
1873        })
1874        .unwrap();
1875
1876        let stats = crate::revalidate(&db, dir.path()).unwrap();
1877        assert_eq!(stats.pruned, 0, "ENOTDIR is not NotFound → must not prune");
1878        assert!(
1879            db.list_tracks()
1880                .unwrap()
1881                .iter()
1882                .any(|t| t.backing_path == ghost.to_string_lossy()),
1883            "ghost track must still exist"
1884        );
1885    }
1886
1887    #[test]
1888    fn scan_ingests_binary_tags_and_promotes() {
1889        use id3::frame::{Content, Popularimeter, Unknown};
1890        use id3::{Encoder, Frame, Tag, TagLike, Version};
1891
1892        let dir = tempfile::tempdir().unwrap();
1893
1894        // Build an MP3 with a PRIV (opaque) + POPM (promoted) tag.
1895        let mut tag = Tag::new();
1896        tag.add_frame(Popularimeter {
1897            user: "u".into(),
1898            rating: 128,
1899            counter: 3,
1900        });
1901        tag.add_frame(Frame::with_content(
1902            "PRIV",
1903            Content::Unknown(Unknown {
1904                data: vec![1, 1, 2, 3, 5],
1905                version: Version::Id3v24,
1906            }),
1907        ));
1908        let mut bytes = Vec::new();
1909        Encoder::new()
1910            .version(Version::Id3v24)
1911            .encode(&tag, &mut bytes)
1912            .unwrap();
1913        // A real MP3 frame header is enough for locate_audio_bounded to find audio.
1914        bytes.extend_from_slice(&[0xFF, 0xFB, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00]);
1915        std::fs::write(dir.path().join("a.mp3"), &bytes).unwrap();
1916
1917        let db = musefs_db::Db::open_in_memory().unwrap();
1918        crate::scan::scan_directory(&db, dir.path()).unwrap();
1919        let track = db.list_tracks().unwrap().into_iter().next().unwrap();
1920        let tid = track.id;
1921
1922        // Opaque PRIV survives as a binary row.
1923        let bin = db.get_binary_tags(tid).unwrap();
1924        assert!(
1925            bin.iter().any(|r| r.key == "PRIV" && r.byte_len == 5),
1926            "PRIV not ingested as binary row; got: {bin:?}"
1927        );
1928
1929        // POPM promoted into editable text tags.
1930        let texts = db.get_tags(tid).unwrap();
1931        assert!(
1932            texts.iter().any(|t| t.key == "rating" && t.value == "128"),
1933            "rating not promoted; got: {texts:?}"
1934        );
1935        assert!(
1936            texts.iter().any(|t| t.key == "playcount" && t.value == "3"),
1937            "playcount not promoted; got: {texts:?}"
1938        );
1939    }
1940
1941    /// Probed carrying a valid, an empty, and an oversize binary tag. Only the
1942    /// valid one is stored: the filter drops empty (`EmptySegment` would fail
1943    /// layout validation) and oversize (`> MAX_BINARY_TAG_BYTES`) payloads, with
1944    /// gap-free ordinals.
1945    fn probed_with_mixed_binary_tags() -> Probed {
1946        Probed {
1947            format: musefs_db::Format::Mp3,
1948            audio_offset: 0,
1949            audio_length: 0,
1950            tags: Vec::new(),
1951            pictures: Vec::new(),
1952            binary_tags: vec![
1953                EmbeddedBinaryTag {
1954                    key: "PRIV".into(),
1955                    payload: vec![1, 2, 3],
1956                },
1957                EmbeddedBinaryTag {
1958                    key: "GEOB".into(),
1959                    payload: Vec::new(),
1960                },
1961                EmbeddedBinaryTag {
1962                    key: "SYLT".into(),
1963                    payload: vec![0u8; MAX_BINARY_TAG_BYTES + 1],
1964                },
1965            ],
1966            structural_blocks: Vec::new(),
1967        }
1968    }
1969
1970    #[test]
1971    fn ingest_filters_empty_and_oversize_binary_tags() {
1972        let dir = tempfile::tempdir().unwrap();
1973        let path = dir.path().join("a.mp3");
1974        std::fs::write(&path, b"x").unwrap();
1975        let meta = std::fs::metadata(&path).unwrap();
1976        let db = Db::open_in_memory().unwrap();
1977
1978        ingest(
1979            &db,
1980            &path.to_string_lossy(),
1981            &meta,
1982            probed_with_mixed_binary_tags(),
1983        )
1984        .unwrap();
1985
1986        let tid = db.list_tracks().unwrap()[0].id;
1987        let rows = db.get_binary_tags(tid).unwrap();
1988        assert_eq!(
1989            rows.len(),
1990            1,
1991            "only the valid binary tag survives: {rows:?}"
1992        );
1993        assert_eq!(rows[0].key, "PRIV");
1994        assert_eq!(rows[0].byte_len, 3);
1995    }
1996
1997    #[test]
1998    fn ingest_bulk_filters_empty_and_oversize_binary_tags() {
1999        let db = Db::open_in_memory().unwrap();
2000        {
2001            let mut bw = db.bulk_writer().unwrap();
2002            ingest_bulk(
2003                &mut bw,
2004                "/a.mp3",
2005                BackingStamp {
2006                    size: 1,
2007                    mtime_ns: 0,
2008                    ctime_ns: 0,
2009                },
2010                probed_with_mixed_binary_tags(),
2011            )
2012            .unwrap();
2013            bw.commit().unwrap();
2014        }
2015        let tid = db.list_tracks().unwrap()[0].id;
2016        let rows = db.get_binary_tags(tid).unwrap();
2017        assert_eq!(
2018            rows.len(),
2019            1,
2020            "only the valid binary tag survives: {rows:?}"
2021        );
2022        assert_eq!(rows[0].key, "PRIV");
2023        assert_eq!(rows[0].byte_len, 3);
2024    }
2025
2026    #[test]
2027    fn accept_pictures_keeps_at_cap_and_drops_over_cap() {
2028        let mk = |len: usize| EmbeddedPicture {
2029            mime: "image/jpeg".to_string(),
2030            picture_type: musefs_format::PictureType::new(3).unwrap(),
2031            description: String::new(),
2032            width: 0,
2033            height: 0,
2034            data: vec![0u8; len],
2035        };
2036        // A picture exactly at the cap is kept; one byte over is dropped. The
2037        // boundary pins `>` against `>=` (an at-cap drop would be silent loss).
2038        let kept = accept_pictures("/x.flac", vec![mk(MAX_ART_BYTES), mk(MAX_ART_BYTES + 1)]);
2039        assert_eq!(kept.len(), 1, "exactly the at-cap picture survives");
2040        assert_eq!(kept[0].data.len(), MAX_ART_BYTES);
2041    }
2042
2043    #[test]
2044    fn accept_binary_tags_keeps_at_cap_and_drops_over_cap() {
2045        let mk = |len: usize| EmbeddedBinaryTag {
2046            key: "PRIV".to_string(),
2047            payload: vec![0u8; len],
2048        };
2049        let kept = accept_binary_tags(
2050            "/x.mp3",
2051            vec![mk(MAX_BINARY_TAG_BYTES), mk(MAX_BINARY_TAG_BYTES + 1)],
2052        );
2053        assert_eq!(kept.len(), 1, "exactly the at-cap binary tag survives");
2054        assert_eq!(kept[0].payload.len(), MAX_BINARY_TAG_BYTES);
2055    }
2056
2057    fn probed_with_text_tags(tags: &[(&str, &str)]) -> Probed {
2058        Probed {
2059            format: musefs_db::Format::Mp3,
2060            audio_offset: 0,
2061            audio_length: 0,
2062            tags: tags
2063                .iter()
2064                .map(|(k, v)| ((*k).to_string(), (*v).to_string()))
2065                .collect(),
2066            pictures: Vec::new(),
2067            binary_tags: Vec::new(),
2068            structural_blocks: Vec::new(),
2069        }
2070    }
2071
2072    #[test]
2073    fn ingest_skips_empty_and_control_char_keys() {
2074        let dir = tempfile::tempdir().unwrap();
2075        let path = dir.path().join("a.mp3");
2076        std::fs::write(&path, b"x").unwrap();
2077        let meta = std::fs::metadata(&path).unwrap();
2078        let db = Db::open_in_memory().unwrap();
2079
2080        ingest(
2081            &db,
2082            &path.to_string_lossy(),
2083            &meta,
2084            probed_with_text_tags(&[
2085                ("artist", "Alice"),
2086                ("", "dropped"),        // empty key
2087                ("a\u{7}b", "dropped"), // control char
2088                ("a\u{0}b", "dropped"), // embedded NUL — DB CHECK can't see it, the floor can
2089                ("a=b", "kept"),        // '=' is NOT a floor violation
2090            ]),
2091        )
2092        .unwrap();
2093
2094        let tid = db.list_tracks().unwrap()[0].id;
2095        let keys: Vec<String> = db
2096            .get_tags(tid)
2097            .unwrap()
2098            .into_iter()
2099            .map(|t| t.key)
2100            .collect();
2101        // get_tags is ORDER BY key, ordinal: '=' (0x3D) sorts before 'a' (0x61).
2102        assert_eq!(keys, vec!["a=b".to_string(), "artist".to_string()]);
2103    }
2104
2105    #[test]
2106    fn ingest_bulk_skips_empty_and_control_char_keys() {
2107        let db = Db::open_in_memory().unwrap();
2108        {
2109            let mut bw = db.bulk_writer().unwrap();
2110            ingest_bulk(
2111                &mut bw,
2112                "/a.mp3",
2113                BackingStamp {
2114                    size: 1,
2115                    mtime_ns: 0,
2116                    ctime_ns: 0,
2117                },
2118                probed_with_text_tags(&[
2119                    ("artist", "Alice"),
2120                    ("", "dropped"),
2121                    ("a\u{7}b", "dropped"),
2122                    ("a\u{0}b", "dropped"), // embedded NUL — floor drops it
2123                    ("a=b", "kept"),
2124                ]),
2125            )
2126            .unwrap();
2127            bw.commit().unwrap();
2128        }
2129        let tid = db.list_tracks().unwrap()[0].id;
2130        let keys: Vec<String> = db
2131            .get_tags(tid)
2132            .unwrap()
2133            .into_iter()
2134            .map(|t| t.key)
2135            .collect();
2136        assert_eq!(keys, vec!["a=b".to_string(), "artist".to_string()]);
2137    }
2138
2139    /// Probed with two structural blocks of the SAME kind, to make the per-kind
2140    /// ordinal increment (`*ord += 1`) observable. A real FLAC carries only one
2141    /// STREAMINFO/SEEKTABLE, so a duplicate kind is the only input under which the
2142    /// second block's ordinal differs from the first; without it the increment's
2143    /// mutants survive.
2144    fn probed_with_duplicate_structural_kind() -> Probed {
2145        Probed {
2146            format: musefs_db::Format::Flac,
2147            audio_offset: 0,
2148            audio_length: 0,
2149            tags: Vec::new(),
2150            pictures: Vec::new(),
2151            binary_tags: Vec::new(),
2152            structural_blocks: vec![
2153                ("SEEKTABLE".to_string(), vec![0xA1]),
2154                ("SEEKTABLE".to_string(), vec![0xB2]),
2155            ],
2156        }
2157    }
2158
2159    #[test]
2160    fn ingest_assigns_sequential_structural_ordinals_per_kind() {
2161        let dir = tempfile::tempdir().unwrap();
2162        let path = dir.path().join("a.flac");
2163        std::fs::write(&path, b"x").unwrap();
2164        let meta = std::fs::metadata(&path).unwrap();
2165        let db = Db::open_in_memory().unwrap();
2166
2167        ingest(
2168            &db,
2169            &path.to_string_lossy(),
2170            &meta,
2171            probed_with_duplicate_structural_kind(),
2172        )
2173        .unwrap();
2174
2175        let tid = db.list_tracks().unwrap()[0].id;
2176        let got = db.get_structural_blocks(tid).unwrap();
2177        // Rows come back ORDER BY kind, ordinal: the two same-kind blocks must hold
2178        // ordinals 0 then 1 (the `-=`/`*=` mutants collapse or invert this).
2179        assert_eq!(got.len(), 2);
2180        assert_eq!(got[0].ordinal, 0);
2181        assert_eq!(got[0].body, vec![0xA1]);
2182        assert_eq!(got[1].ordinal, 1);
2183        assert_eq!(got[1].body, vec![0xB2]);
2184    }
2185
2186    /// Probed with two tags of the SAME key, to make the per-key ordinal
2187    /// increment (`*ord += 1` in the tag loop) observable. The production
2188    /// `ingest_bulk` path is exercised with a multi-value tag elsewhere, but the
2189    /// oracle-only `ingest` is not, so without this its tag-ordinal mutants
2190    /// survive. Distinct values under one key: a collapsed ordinal (the `-=`/`*=`
2191    /// mutants) either underflows or duplicates the `(track_id, key, ordinal)`
2192    /// primary key — both observable.
2193    fn probed_with_duplicate_tag_key() -> Probed {
2194        Probed {
2195            format: musefs_db::Format::Flac,
2196            audio_offset: 0,
2197            audio_length: 0,
2198            tags: vec![
2199                ("ARTIST".to_string(), "A".to_string()),
2200                ("ARTIST".to_string(), "B".to_string()),
2201            ],
2202            pictures: Vec::new(),
2203            binary_tags: Vec::new(),
2204            structural_blocks: Vec::new(),
2205        }
2206    }
2207
2208    #[test]
2209    fn ingest_assigns_sequential_tag_ordinals_per_key() {
2210        let dir = tempfile::tempdir().unwrap();
2211        let path = dir.path().join("a.flac");
2212        std::fs::write(&path, b"x").unwrap();
2213        let meta = std::fs::metadata(&path).unwrap();
2214        let db = Db::open_in_memory().unwrap();
2215
2216        ingest(
2217            &db,
2218            &path.to_string_lossy(),
2219            &meta,
2220            probed_with_duplicate_tag_key(),
2221        )
2222        .unwrap();
2223
2224        let tid = db.list_tracks().unwrap()[0].id;
2225        let got = db.get_tags(tid).unwrap();
2226        // get_tags is ORDER BY key, ordinal: the two same-key tags must hold
2227        // ordinals 0 then 1 (the `-=`/`*=` mutants collapse or invert this).
2228        assert_eq!(got.len(), 2);
2229        assert_eq!(got[0].ordinal, 0);
2230        assert_eq!(got[0].value, "A");
2231        assert_eq!(got[1].ordinal, 1);
2232        assert_eq!(got[1].value, "B");
2233    }
2234
2235    #[test]
2236    fn ingest_bulk_assigns_sequential_structural_ordinals_per_kind() {
2237        let db = Db::open_in_memory().unwrap();
2238        {
2239            let mut bw = db.bulk_writer().unwrap();
2240            ingest_bulk(
2241                &mut bw,
2242                "/a.flac",
2243                BackingStamp {
2244                    size: 1,
2245                    mtime_ns: 0,
2246                    ctime_ns: 0,
2247                },
2248                probed_with_duplicate_structural_kind(),
2249            )
2250            .unwrap();
2251            bw.commit().unwrap();
2252        }
2253        let tid = db.list_tracks().unwrap()[0].id;
2254        let got = db.get_structural_blocks(tid).unwrap();
2255        assert_eq!(got.len(), 2);
2256        assert_eq!(got[0].ordinal, 0);
2257        assert_eq!(got[0].body, vec![0xA1]);
2258        assert_eq!(got[1].ordinal, 1);
2259        assert_eq!(got[1].body, vec![0xB2]);
2260    }
2261}
2262
2263#[cfg(test)]
2264mod bounded_probe_tests {
2265    use super::*;
2266    use musefs_db::Db;
2267
2268    /// Minimal FLAC: marker + a single last STREAMINFO (34-byte body) + audio.
2269    /// FLAC has no frame-sync check at the audio offset, so any payload works.
2270    fn flac_fixture() -> Vec<u8> {
2271        let mut bytes = b"fLaC".to_vec();
2272        bytes.push(0x80); // last-block flag set, type 0 (STREAMINFO)
2273        bytes.extend_from_slice(&[0, 0, 34]); // 24-bit length = 34
2274        bytes.extend(std::iter::repeat_n(0u8, 34));
2275        bytes.extend_from_slice(b"AUDIOPAYLOAD");
2276        bytes
2277    }
2278
2279    #[test]
2280    fn scan_counts_unreadable_file_as_failed_and_continues() {
2281        let dir = tempfile::tempdir().unwrap();
2282        // One good FLAC + one zero-byte ".flac" that cannot parse.
2283        let good = dir.path().join("good.flac");
2284        let mut bytes = b"fLaC".to_vec();
2285        bytes.push(0x80);
2286        bytes.extend_from_slice(&[0, 0, 34]);
2287        bytes.extend(std::iter::repeat_n(0u8, 34));
2288        bytes.extend_from_slice(b"AUDIO");
2289        std::fs::write(&good, &bytes).unwrap();
2290        std::fs::write(dir.path().join("bad.flac"), b"").unwrap();
2291
2292        let db = Db::open_in_memory().unwrap();
2293        let stats = scan_directory(&db, dir.path()).unwrap();
2294        assert_eq!(stats.scanned, 1);
2295        assert_eq!(stats.skipped + stats.failed, 1);
2296    }
2297
2298    #[test]
2299    fn scan_directory_bounded_matches_full_for_flac() {
2300        // A FLAC fixture written to a temp dir, scanned with the (default) bounded
2301        // path, yields a track with the same audio bounds as a full-file probe.
2302        let dir = tempfile::tempdir().unwrap();
2303        let path = dir.path().join("a.flac");
2304        let bytes = flac_fixture();
2305        std::fs::write(&path, &bytes).unwrap();
2306
2307        let full = probe_full(&path, &bytes).expect("full probe");
2308
2309        let db = Db::open_in_memory().unwrap();
2310        let stats = scan_directory(&db, dir.path()).unwrap();
2311        assert_eq!(stats.scanned, 1);
2312        let track = db
2313            .get_track_by_path(&std::fs::canonicalize(&path).unwrap().to_string_lossy())
2314            .unwrap()
2315            .unwrap();
2316        assert_eq!(track.bounds.audio_offset(), full.audio_offset);
2317        assert_eq!(track.bounds.audio_length(), full.audio_length);
2318    }
2319
2320    #[test]
2321    fn revalidate_skips_unchanged_and_reprobes_changed() {
2322        let dir = tempfile::tempdir().unwrap();
2323        let p = dir.path().join("x.flac");
2324        let mk = |audio: &[u8]| {
2325            let mut b = b"fLaC".to_vec();
2326            b.push(0x80);
2327            b.extend_from_slice(&[0, 0, 34]);
2328            b.extend(std::iter::repeat_n(0u8, 34));
2329            b.extend_from_slice(audio);
2330            b
2331        };
2332        std::fs::write(&p, mk(b"AUDIO")).unwrap();
2333        let db = Db::open_in_memory().unwrap();
2334        scan_directory(&db, dir.path()).unwrap();
2335
2336        // Unchanged → all unchanged.
2337        let s1 = revalidate_with(&db, dir.path(), &ScanOptions::default()).unwrap();
2338        assert_eq!(s1.unchanged, 1);
2339        assert_eq!(s1.updated, 0);
2340
2341        // Rewrite with a different size → detected as changed and re-probed.
2342        std::fs::write(&p, mk(b"DIFFERENT-AUDIO")).unwrap();
2343        let s2 = revalidate_with(&db, dir.path(), &ScanOptions::default()).unwrap();
2344        assert_eq!(s2.updated, 1);
2345        assert_eq!(s2.unchanged, 0);
2346        // The track row now reflects the new (longer) audio length.
2347        let track = db
2348            .get_track_by_path(&std::fs::canonicalize(&p).unwrap().to_string_lossy())
2349            .unwrap()
2350            .unwrap();
2351        assert_eq!(
2352            usize_from(track.bounds.audio_length()),
2353            b"DIFFERENT-AUDIO".len()
2354        );
2355    }
2356
2357    #[test]
2358    fn revalidate_accepts_a_single_file_target() {
2359        // The CLI advertises file targets for every scan, including --revalidate,
2360        // so revalidate_with must handle a bare file root (not just a directory).
2361        let dir = tempfile::tempdir().unwrap();
2362        let p = dir.path().join("x.flac");
2363        let mut bytes = b"fLaC".to_vec();
2364        bytes.push(0x80);
2365        bytes.extend_from_slice(&[0, 0, 34]);
2366        bytes.extend(std::iter::repeat_n(0u8, 34));
2367        bytes.extend_from_slice(b"AUDIO");
2368        std::fs::write(&p, &bytes).unwrap();
2369        let db = Db::open_in_memory().unwrap();
2370        scan_directory(&db, dir.path()).unwrap();
2371
2372        // Revalidate the file path directly: must not error on read_dir and the
2373        // unchanged file is bucketed as unchanged (not pruned).
2374        let stats = revalidate_with(&db, &p, &ScanOptions::default()).unwrap();
2375        assert_eq!(stats.unchanged, 1);
2376        assert_eq!(stats.pruned, 0);
2377        assert_eq!(db.list_tracks().unwrap().len(), 1);
2378    }
2379
2380    #[test]
2381    fn jobs1_and_jobs_n_produce_equivalent_state() {
2382        let dir = tempfile::tempdir().unwrap();
2383        // A handful of distinct FLACs.
2384        for i in 0..12 {
2385            let mut bytes = b"fLaC".to_vec();
2386            bytes.push(0x80);
2387            bytes.extend_from_slice(&[0, 0, 34]);
2388            bytes.extend(std::iter::repeat_n(0u8, 34));
2389            bytes.extend_from_slice(format!("AUDIO-{i}").as_bytes());
2390            std::fs::write(dir.path().join(format!("t{i}.flac")), &bytes).unwrap();
2391        }
2392        let norm = |jobs: usize| {
2393            let db = Db::open_in_memory().unwrap();
2394            scan_directory_with(
2395                &db,
2396                dir.path(),
2397                &ScanOptions {
2398                    jobs,
2399                    ..Default::default()
2400                },
2401            )
2402            .unwrap();
2403            let mut rows: Vec<(String, u64, u64)> = db
2404                .list_tracks()
2405                .unwrap()
2406                .into_iter()
2407                .map(|t| {
2408                    (
2409                        t.backing_path,
2410                        t.bounds.audio_offset(),
2411                        t.bounds.audio_length(),
2412                    )
2413                })
2414                .collect();
2415            rows.sort();
2416            rows
2417        };
2418        assert_eq!(norm(1), norm(4));
2419        assert_eq!(norm(1).len(), 12);
2420    }
2421
2422    #[test]
2423    fn oversize_unparseable_file_is_skipped_not_read_whole() {
2424        // A file far larger than the probe ceiling, with a valid FLAC marker but
2425        // a metadata block that never terminates, must be skipped rather than
2426        // allocated whole into RAM (the misnamed-multi-GB-file OOM guard).
2427        use std::io::Write;
2428        let dir = tempfile::tempdir().unwrap();
2429        let path = dir.path().join("huge.flac");
2430        let mut f = std::fs::File::create(&path).unwrap();
2431        // Marker + a non-last VORBIS_COMMENT block claiming the max 24-bit
2432        // length, so the bounded reader keeps asking for more.
2433        f.write_all(b"fLaC").unwrap();
2434        f.write_all(&[0x04, 0xFF, 0xFF, 0xFF]).unwrap();
2435        let len = MAX_PROBE_BYTES + 4096;
2436        f.set_len(len).unwrap();
2437        drop(f);
2438
2439        assert!(matches!(
2440            probe_file(&path, WINDOW).unwrap(),
2441            ProbeOutcome::Unparseable
2442        ));
2443    }
2444
2445    #[test]
2446    fn oversize_wav_is_served_via_data_header() {
2447        // A valid WAV whose `data` payload exceeds the probe ceiling (any
2448        // recording more than a few minutes long) must still be ingested: the
2449        // `data` chunk header sits at the front, so the declared audio bounds
2450        // are known without reading the payload. Skipping it would drop every
2451        // sufficiently long WAV in the library.
2452        use std::io::Write;
2453        let dir = tempfile::tempdir().unwrap();
2454        let path = dir.path().join("long.wav");
2455
2456        let data_len: u64 = MAX_PROBE_BYTES + (16 << 20); // 80 MiB payload
2457        let mut fmt = Vec::new();
2458        fmt.extend_from_slice(&1u16.to_le_bytes());
2459        fmt.extend_from_slice(&1u16.to_le_bytes());
2460        fmt.extend_from_slice(&44_100u32.to_le_bytes());
2461        fmt.extend_from_slice(&88_200u32.to_le_bytes());
2462        fmt.extend_from_slice(&2u16.to_le_bytes());
2463        fmt.extend_from_slice(&16u16.to_le_bytes());
2464
2465        let mut front = b"RIFF".to_vec();
2466        // form: WAVE(4) + fmt chunk(24) + data header(8) + data payload
2467        let riff_size = 36u32 + u32::try_from(data_len).unwrap();
2468        front.extend_from_slice(&riff_size.to_le_bytes());
2469        front.extend_from_slice(b"WAVE");
2470        front.extend_from_slice(b"fmt ");
2471        front.extend_from_slice(&u32::try_from(fmt.len()).unwrap().to_le_bytes());
2472        front.extend_from_slice(&fmt);
2473        front.extend_from_slice(b"data");
2474        front.extend_from_slice(&u32::try_from(data_len).unwrap().to_le_bytes());
2475        let audio_offset = front.len() as u64;
2476        let file_len = audio_offset + data_len;
2477
2478        let mut f = std::fs::File::create(&path).unwrap();
2479        f.write_all(&front).unwrap();
2480        f.set_len(file_len).unwrap();
2481        drop(f);
2482
2483        let probed = match probe_file(&path, WINDOW).unwrap() {
2484            ProbeOutcome::Probed(p, _) => p,
2485            other => panic!("expected Probed, got {other:?}"),
2486        };
2487        assert_eq!(probed.format, Format::Wav);
2488        assert_eq!(probed.audio_offset, audio_offset);
2489        assert_eq!(probed.audio_length, data_len);
2490    }
2491
2492    #[test]
2493    fn probe_file_reports_raced_on_mid_probe_mutation() {
2494        use std::io::Write;
2495        let dir = tempfile::tempdir().unwrap();
2496        let path = dir.path().join("a.wav");
2497
2498        // Minimal valid WAV the probe accepts (fmt + tiny data).
2499        let mut fmt = Vec::new();
2500        for v in [1u16, 1, 0, 0, 0, 16] {
2501            fmt.extend_from_slice(&v.to_le_bytes());
2502        }
2503        let mut front = b"RIFF".to_vec();
2504        // form: WAVE(4) + fmt chunk(8+len) + data header(8) + data payload(64)
2505        let riff_size = 4 + 8 + u32::try_from(fmt.len()).unwrap() + 8 + 64;
2506        front.extend_from_slice(&riff_size.to_le_bytes());
2507        front.extend_from_slice(b"WAVE");
2508        front.extend_from_slice(b"fmt ");
2509        front.extend_from_slice(&u32::try_from(fmt.len()).unwrap().to_le_bytes());
2510        front.extend_from_slice(&fmt);
2511        front.extend_from_slice(b"data");
2512        front.extend_from_slice(&64u32.to_le_bytes());
2513        let mut f = std::fs::File::create(&path).unwrap();
2514        f.write_all(&front).unwrap();
2515        f.set_len(front.len() as u64 + 64).unwrap();
2516        drop(f);
2517
2518        let pc = path.clone();
2519        set_after_s1_hook(move || {
2520            let mut g = std::fs::OpenOptions::new().append(true).open(&pc).unwrap();
2521            g.write_all(&[0u8; 4096]).unwrap(); // size moves -> S2 != S1
2522        });
2523        let out = probe_file(&path, WINDOW);
2524        clear_after_s1_hook();
2525        assert!(matches!(out, Ok(ProbeOutcome::Raced)), "got {out:?}");
2526    }
2527}