Skip to main content

musefs_db/
tags.rs

1use crate::error::{check_field_len, check_tag_count};
2use crate::limits::{MAX_TAG_KEY_LEN, MAX_TAG_VALUE_LEN};
3use crate::models::{BinaryTag, BinaryTagRow, Tag};
4use crate::{Db, ReadWrite, Result};
5use rusqlite::params;
6
7/// Reject an over-cap text-tag row from its `length(key)` /
8/// `length(CAST(value AS BLOB))` columns *before* the strings are
9/// materialized. `value` is measured in bytes (not characters, #505) so the
10/// materialized-memory bound is exact. Routes through the shared
11/// `check_field_len`, so the allocation-free guarantee is the same one its
12/// unit test pins (spec N13).
13fn check_tag_lengths(key_len: i64, value_len: i64) -> Result<()> {
14    check_field_len("tags", "key", key_len, MAX_TAG_KEY_LEN)?;
15    check_field_len("tags", "value", value_len, MAX_TAG_VALUE_LEN)?;
16    Ok(())
17}
18
19/// Columns the grouped tag readers project: `track_id` followed by the five
20/// columns `read_tag_row` consumes. Kept in lockstep with `read_tag_row`'s
21/// offset arithmetic.
22const GROUPED_TAG_COLS: &str =
23    "track_id, length(key), length(CAST(value AS BLOB)), key, value, ordinal";
24
25/// Read one text-tag row laid out as `length(key), length(value), key, value,
26/// ordinal` starting at column `base`; length-guards the row before its strings
27/// are materialized (spec N13).
28fn read_tag_row(r: &rusqlite::Row, base: usize) -> Result<Tag> {
29    check_tag_lengths(r.get(base)?, r.get(base + 1)?)?;
30    Ok(Tag {
31        key: r.get(base + 2)?,
32        value: r.get(base + 3)?,
33        ordinal: r.get(base + 4)?,
34    })
35}
36
37/// Drain grouped tag rows (`GROUPED_TAG_COLS`: `track_id` then `read_tag_row`'s
38/// five columns at base 1) into `out`, enforcing the per-track count cap.
39fn collect_grouped_tags(
40    rows: &mut rusqlite::Rows,
41    out: &mut std::collections::HashMap<i64, Vec<Tag>>,
42) -> Result<()> {
43    while let Some(r) = rows.next()? {
44        let track_id: i64 = r.get(0)?;
45        let entry = out.entry(track_id).or_default();
46        entry.push(read_tag_row(r, 1)?);
47        check_tag_count(track_id, entry.len())?;
48    }
49    Ok(())
50}
51
52impl<M> Db<M> {
53    pub fn get_tags(&self, track_id: i64) -> Result<Vec<Tag>> {
54        let mut stmt = self.conn.prepare_cached(
55            "SELECT length(key), length(CAST(value AS BLOB)), key, value, ordinal FROM tags \
56             WHERE track_id = ?1 AND value_blob IS NULL ORDER BY key, ordinal",
57        )?;
58        let mut rows = stmt.query(params![track_id])?;
59        let mut out = Vec::new();
60        while let Some(r) = rows.next()? {
61            out.push(read_tag_row(r, 0)?);
62            check_tag_count(track_id, out.len())?;
63        }
64        Ok(out)
65    }
66
67    pub fn tags_for_tracks(
68        &self,
69        track_ids: &[i64],
70    ) -> Result<std::collections::HashMap<i64, Vec<Tag>>> {
71        let mut out = std::collections::HashMap::new();
72        crate::query_in_chunks(
73            &self.conn,
74            track_ids,
75            |ph| {
76                format!(
77                    "SELECT {GROUPED_TAG_COLS} FROM tags \
78                     WHERE track_id IN ({ph}) AND value_blob IS NULL \
79                     ORDER BY track_id, key, ordinal"
80                )
81            },
82            |rows| collect_grouped_tags(rows, &mut out),
83        )?;
84        Ok(out)
85    }
86
87    pub fn tags_grouped(&self) -> Result<std::collections::HashMap<i64, Vec<Tag>>> {
88        let sql = format!(
89            "SELECT {GROUPED_TAG_COLS} FROM tags \
90             WHERE value_blob IS NULL ORDER BY track_id, key, ordinal"
91        );
92        let mut stmt = self.conn.prepare(&sql)?;
93        let mut rows = stmt.query([])?;
94        let mut out = std::collections::HashMap::new();
95        collect_grouped_tags(&mut rows, &mut out)?;
96        Ok(out)
97    }
98
99    pub fn tags_grouped_for_keys(
100        &self,
101        keys: &[&str],
102    ) -> Result<std::collections::HashMap<i64, Vec<Tag>>> {
103        let lowered: Vec<String> = keys.iter().map(|k| k.to_ascii_lowercase()).collect();
104        let mut out = std::collections::HashMap::new();
105        crate::query_in_chunks(
106            &self.conn,
107            &lowered,
108            |ph| {
109                format!(
110                    "SELECT {GROUPED_TAG_COLS} FROM tags \
111                     WHERE value_blob IS NULL AND lower(key) IN ({ph}) \
112                     ORDER BY track_id, key, ordinal"
113                )
114            },
115            |rows| collect_grouped_tags(rows, &mut out),
116        )?;
117        Ok(out)
118    }
119
120    /// Binary tag rows for a track: streaming handle (rowid), key, and payload
121    /// length. Ordered by (key, ordinal) to match the layout builder's emission
122    /// order. The blob bytes stream at read time; only `key` (materialized here)
123    /// is length-guarded, plus the per-track row count.
124    pub fn get_binary_tags(&self, track_id: i64) -> Result<Vec<BinaryTagRow>> {
125        let mut stmt = self.conn.prepare_cached(
126            "SELECT length(key), rowid, key, length(value_blob) FROM tags \
127             WHERE track_id = ?1 AND value_blob IS NOT NULL ORDER BY key, ordinal",
128        )?;
129        let mut rows = stmt.query(params![track_id])?;
130        let mut out = Vec::new();
131        while let Some(r) = rows.next()? {
132            check_field_len("tags", "key", r.get(0)?, MAX_TAG_KEY_LEN)?;
133            out.push(BinaryTagRow {
134                rowid: r.get(1)?,
135                key: r.get(2)?,
136                byte_len: r.get(3)?,
137            });
138            check_tag_count(track_id, out.len())?;
139        }
140        Ok(out)
141    }
142
143    /// Stream binary-tag bytes at `offset` directly into `buf` via incremental blob
144    /// I/O — no intermediate allocation (#70). A short read means the row changed
145    /// underneath the resolved layout; `read_at_exact` surfaces it as an error rather
146    /// than zero-filling. (`payload_id` is the `tags` rowid; see the spec's
147    /// "payload_id validity invariant".)
148    pub fn read_binary_tag_chunk_into(
149        &self,
150        payload_id: i64,
151        offset: u64,
152        buf: &mut [u8],
153    ) -> Result<()> {
154        let blob = self
155            .conn
156            .blob_open("main", "tags", "value_blob", payload_id, true)?;
157        blob.read_at_exact(buf, crate::convert::usize_from(offset))?;
158        Ok(())
159    }
160
161    /// Allocating convenience form of `read_binary_tag_chunk_into` (non-hot-path
162    /// callers).
163    pub fn read_binary_tag_chunk(
164        &self,
165        payload_id: i64,
166        offset: u64,
167        len: usize,
168    ) -> Result<Vec<u8>> {
169        let mut buf = vec![0u8; len];
170        self.read_binary_tag_chunk_into(payload_id, offset, &mut buf)?;
171        Ok(buf)
172    }
173}
174
175/// Replace a track's text-tag rows (`value_blob IS NULL`); binary rows are
176/// untouched. Runs on `conn` so both `Db<ReadWrite>` (own transaction) and
177/// `BulkWriter` (caller-held transaction) share one implementation.
178pub(crate) fn replace_tags_in(
179    conn: &rusqlite::Connection,
180    track_id: i64,
181    tags: &[Tag],
182) -> Result<()> {
183    conn.execute(
184        "DELETE FROM tags WHERE track_id = ?1 AND value_blob IS NULL",
185        params![track_id],
186    )?;
187    let mut stmt = conn.prepare_cached(
188        "INSERT INTO tags (track_id, key, value, ordinal) VALUES (?1, ?2, ?3, ?4)",
189    )?;
190    for t in tags {
191        stmt.execute(params![track_id, t.key, t.value, t.ordinal])?;
192    }
193    Ok(())
194}
195
196/// Replace a track's binary-tag rows (`value_blob IS NOT NULL`); text rows are
197/// untouched. Binary rows store `''` in `value`. See `replace_tags_in` for the
198/// shared-`conn` rationale.
199pub(crate) fn set_binary_tags_in(
200    conn: &rusqlite::Connection,
201    track_id: i64,
202    tags: &[BinaryTag],
203) -> Result<()> {
204    conn.execute(
205        "DELETE FROM tags WHERE track_id = ?1 AND value_blob IS NOT NULL",
206        params![track_id],
207    )?;
208    let mut stmt = conn.prepare_cached(
209        "INSERT INTO tags (track_id, key, value, value_blob, ordinal) \
210         VALUES (?1, ?2, '', ?3, ?4)",
211    )?;
212    for t in tags {
213        stmt.execute(params![track_id, t.key, t.payload, t.ordinal])?;
214    }
215    Ok(())
216}
217
218impl Db<ReadWrite> {
219    pub fn replace_tags(&self, track_id: i64, tags: &[Tag]) -> Result<()> {
220        let tx = self.conn.unchecked_transaction()?;
221        replace_tags_in(&tx, track_id, tags)?;
222        tx.commit()?;
223        Ok(())
224    }
225
226    /// Replace the track's binary tag rows (value_blob IS NOT NULL); text rows
227    /// (managed by `replace_tags`) are untouched. Binary rows store '' in `value`.
228    pub fn set_binary_tags(&self, track_id: i64, tags: &[BinaryTag]) -> Result<()> {
229        let tx = self.conn.unchecked_transaction()?;
230        set_binary_tags_in(&tx, track_id, tags)?;
231        tx.commit()?;
232        Ok(())
233    }
234}
235
236#[cfg(test)]
237mod tags_for_tracks_tests {
238    use super::*;
239    use crate::{Format, NewTrack, Tag};
240
241    fn open_mem() -> Db {
242        Db::open_in_memory().unwrap()
243    }
244    fn new_track(path: &str) -> NewTrack {
245        NewTrack {
246            backing_path: path.into(),
247            format: Format::Flac,
248            audio_offset: 0,
249            audio_length: 1,
250            backing_size: 1,
251            backing_mtime_ns: 0,
252            backing_ctime_ns: 0,
253        }
254    }
255
256    #[test]
257    fn tags_for_tracks_returns_only_requested_ordered_by_key_ordinal() {
258        let db = open_mem();
259        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
260        let b = db.upsert_track(&new_track("/b.flac")).unwrap();
261        let c = db.upsert_track(&new_track("/c.flac")).unwrap();
262        db.replace_tags(
263            a,
264            &[
265                Tag::new("ARTIST", "second", 1),
266                Tag::new("ARTIST", "first", 0),
267            ],
268        )
269        .unwrap();
270        db.replace_tags(b, &[Tag::new("ARTIST", "bee", 0)]).unwrap();
271        db.replace_tags(c, &[Tag::new("ARTIST", "cee", 0)]).unwrap();
272
273        let got = db.tags_for_tracks(&[a, b]).unwrap();
274        assert_eq!(got.len(), 2, "c was not requested");
275        assert!(!got.contains_key(&c));
276        let a_tags = &got[&a];
277        assert_eq!(a_tags[0].value, "first");
278        assert_eq!(a_tags[1].value, "second");
279    }
280
281    #[test]
282    fn tags_for_tracks_chunks_beyond_sqlite_variable_limit() {
283        let db = open_mem();
284        let mut ids = Vec::new();
285        for i in 0..1500 {
286            let id = db.upsert_track(&new_track(&format!("/t{i}.flac"))).unwrap();
287            db.replace_tags(id, &[Tag::new("TITLE", &format!("t{i}"), 0)])
288                .unwrap();
289            ids.push(id);
290        }
291        let got = db.tags_for_tracks(&ids).unwrap();
292        assert_eq!(got.len(), 1500, "all chunks fetched");
293    }
294
295    #[test]
296    fn tags_for_tracks_empty_input_is_empty_map() {
297        let db = open_mem();
298        assert!(db.tags_for_tracks(&[]).unwrap().is_empty());
299    }
300
301    #[test]
302    fn text_queries_exclude_binary_rows() {
303        let db = open_mem();
304        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
305        db.replace_tags(a, &[Tag::new("artist", "Alice", 0)])
306            .unwrap();
307        db.conn
308            .execute(
309                "INSERT INTO tags (track_id, key, value, value_blob, ordinal) \
310                 VALUES (?1, 'PRIV', '', X'DEADBEEF', 0)",
311                rusqlite::params![a],
312            )
313            .unwrap();
314
315        let got = db.get_tags(a).unwrap();
316        assert_eq!(got, vec![Tag::new("artist", "Alice", 0)]);
317        let grouped = db.tags_grouped().unwrap();
318        assert_eq!(grouped[&a], vec![Tag::new("artist", "Alice", 0)]);
319        let for_tracks = db.tags_for_tracks(&[a]).unwrap();
320        assert_eq!(for_tracks[&a], vec![Tag::new("artist", "Alice", 0)]);
321    }
322
323    #[test]
324    fn binary_tags_round_trip_and_are_independent_of_text() {
325        let db = open_mem();
326        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
327        db.replace_tags(a, &[Tag::new("artist", "Alice", 0)])
328            .unwrap();
329        db.set_binary_tags(
330            a,
331            &[
332                crate::BinaryTag {
333                    key: "PRIV".into(),
334                    payload: vec![1, 2, 3],
335                    ordinal: 0,
336                },
337                crate::BinaryTag {
338                    key: "PRIV".into(),
339                    payload: vec![9, 9],
340                    ordinal: 1,
341                },
342                crate::BinaryTag {
343                    key: "GEOB".into(),
344                    payload: vec![7],
345                    ordinal: 0,
346                },
347            ],
348        )
349        .unwrap();
350
351        assert_eq!(
352            db.get_tags(a).unwrap(),
353            vec![Tag::new("artist", "Alice", 0)]
354        );
355
356        let rows = db.get_binary_tags(a).unwrap();
357        assert_eq!(rows.len(), 3);
358        assert_eq!(rows[0].key, "GEOB");
359        assert_eq!(rows[0].byte_len, 1);
360        assert_eq!(rows[1].key, "PRIV");
361        assert_eq!(rows[1].byte_len, 3);
362        assert_eq!(rows[2].byte_len, 2);
363
364        let full = db.read_binary_tag_chunk(rows[1].rowid, 0, 3).unwrap();
365        assert_eq!(full, vec![1, 2, 3]);
366        let mid = db.read_binary_tag_chunk(rows[1].rowid, 1, 2).unwrap();
367        assert_eq!(mid, vec![2, 3]);
368
369        db.set_binary_tags(a, &[]).unwrap();
370        assert!(db.get_binary_tags(a).unwrap().is_empty());
371        assert_eq!(
372            db.get_tags(a).unwrap(),
373            vec![Tag::new("artist", "Alice", 0)]
374        );
375    }
376
377    #[test]
378    fn tags_grouped_for_keys_filters_case_insensitively() {
379        let db = open_mem();
380        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
381        db.replace_tags(
382            a,
383            &[
384                Tag::new("ARTIST", "Pix", 0),
385                Tag::new("Title", "Song", 0),
386                Tag::new("LYRICS", "la la", 0),
387            ],
388        )
389        .unwrap();
390        let got = db.tags_grouped_for_keys(&["artist", "title"]).unwrap();
391        let tags = &got[&a];
392        assert!(tags.iter().any(|t| t.value == "Pix"), "ARTIST matched");
393        assert!(tags.iter().any(|t| t.value == "Song"), "Title matched");
394        assert!(!tags.iter().any(|t| t.value == "la la"), "LYRICS excluded");
395    }
396
397    #[test]
398    fn get_tags_rejects_oversize_value() {
399        let db = open_mem();
400        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
401        db.conn
402            .execute_batch("PRAGMA ignore_check_constraints=ON")
403            .unwrap();
404        let big = "v".repeat(262_145);
405        db.conn
406            .execute(
407                "INSERT INTO tags (track_id, key, value, ordinal) VALUES (?1, 'k', ?2, 0)",
408                rusqlite::params![a, big],
409            )
410            .unwrap();
411        let err = db.get_tags(a).unwrap_err();
412        assert!(
413            matches!(err, crate::DbError::FieldTooLarge { field: "value", .. }),
414            "{err:?}"
415        );
416    }
417
418    #[test]
419    fn get_tags_accepts_value_at_cap() {
420        let db = open_mem();
421        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
422        let at = "v".repeat(262_144);
423        db.conn
424            .execute(
425                "INSERT INTO tags (track_id, key, value, ordinal) VALUES (?1, 'k', ?2, 0)",
426                rusqlite::params![a, at],
427            )
428            .unwrap();
429        assert_eq!(db.get_tags(a).unwrap()[0].value.len(), 262_144);
430    }
431
432    #[test]
433    fn multibyte_value_over_byte_cap_is_rejected_at_write_and_read() {
434        // Regression for #505: the cap counts bytes, not characters. 150_000
435        // two-byte chars is 150_000 chars (under the old char-counting CHECK)
436        // but 300_000 bytes (over the 256 KiB materialized-memory bound).
437        let db = open_mem();
438        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
439        let multibyte = "é".repeat(150_000);
440        assert!(multibyte.chars().count() < 262_144, "under the char count");
441        assert!(multibyte.len() > 262_144, "over the byte cap");
442
443        // Write path: the byte-accurate schema CHECK rejects the honest insert.
444        let write_err = db.conn.execute(
445            "INSERT INTO tags (track_id, key, value, ordinal) VALUES (?1, 'k', ?2, 0)",
446            rusqlite::params![a, multibyte],
447        );
448        assert!(
449            write_err.is_err(),
450            "byte-accurate CHECK must reject the write"
451        );
452
453        // Read path (defense-in-depth): a crafted DB that bypasses the CHECK is
454        // still rejected by the byte-counting reader guard.
455        db.conn
456            .execute_batch("PRAGMA ignore_check_constraints=ON")
457            .unwrap();
458        db.conn
459            .execute(
460                "INSERT INTO tags (track_id, key, value, ordinal) VALUES (?1, 'k', ?2, 0)",
461                rusqlite::params![a, multibyte],
462            )
463            .unwrap();
464        let err = db.get_tags(a).unwrap_err();
465        assert!(
466            matches!(err, crate::DbError::FieldTooLarge { field: "value", .. }),
467            "{err:?}"
468        );
469    }
470
471    #[test]
472    fn get_binary_tags_rejects_oversize_key() {
473        let db = open_mem();
474        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
475        db.conn
476            .execute_batch("PRAGMA ignore_check_constraints=ON")
477            .unwrap();
478        let key = "k".repeat(257);
479        db.conn
480            .execute(
481                "INSERT INTO tags (track_id, key, value, value_blob, ordinal) VALUES (?1, ?2, '', X'00', 0)",
482                rusqlite::params![a, key],
483            )
484            .unwrap();
485        let err = db.get_binary_tags(a).unwrap_err();
486        assert!(
487            matches!(
488                err,
489                crate::DbError::FieldTooLarge {
490                    table: "tags",
491                    field: "key",
492                    ..
493                }
494            ),
495            "{err:?}"
496        );
497    }
498
499    #[test]
500    fn per_track_count_cap_text_and_binary() {
501        let db = open_mem();
502        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
503        // 4097 text rows -> TooManyValues on get_tags.
504        {
505            let tx = db.conn.unchecked_transaction().unwrap();
506            let mut stmt = tx
507                .prepare(
508                    "INSERT INTO tags (track_id, key, value, ordinal) VALUES (?1, 'k', 'v', ?2)",
509                )
510                .unwrap();
511            for i in 0..4097 {
512                stmt.execute(rusqlite::params![a, i]).unwrap();
513            }
514            drop(stmt);
515            tx.commit().unwrap();
516        }
517        let err = db.get_tags(a).unwrap_err();
518        assert!(
519            matches!(err, crate::DbError::TooManyValues { .. }),
520            "{err:?}"
521        );
522    }
523
524    #[test]
525    fn bulk_reader_rejects_one_oversized_track_in_batch() {
526        let db = open_mem();
527        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
528        let b = db.upsert_track(&new_track("/b.flac")).unwrap();
529        db.replace_tags(b, &[Tag::new("ok", "fine", 0)]).unwrap();
530        db.conn
531            .execute_batch("PRAGMA ignore_check_constraints=ON")
532            .unwrap();
533        let big = "v".repeat(262_145);
534        db.conn
535            .execute(
536                "INSERT INTO tags (track_id, key, value, ordinal) VALUES (?1, 'k', ?2, 0)",
537                rusqlite::params![a, big],
538            )
539            .unwrap();
540        let err = db.tags_for_tracks(&[a, b]).unwrap_err();
541        assert!(
542            matches!(err, crate::DbError::FieldTooLarge { field: "value", .. }),
543            "{err:?}"
544        );
545    }
546
547    #[test]
548    fn tags_grouped_for_keys_empty_keys_is_empty_map() {
549        let db = open_mem();
550        let a = db.upsert_track(&new_track("/a.flac")).unwrap();
551        db.replace_tags(a, &[Tag::new("ARTIST", "Pix", 0)]).unwrap();
552        let got = db.tags_grouped_for_keys(&[]).unwrap();
553        assert!(got.is_empty());
554    }
555
556    #[test]
557    fn replace_tags_rejects_floor_violating_keys() {
558        let db = open_mem();
559        let t = db.upsert_track(&new_track("/a.flac")).unwrap();
560        // A row violating the floor aborts the whole row-by-row transactional insert.
561        assert!(db.replace_tags(t, &[Tag::new("", "v", 0)]).is_err());
562        assert!(db.replace_tags(t, &[Tag::new("\u{7}", "v", 0)]).is_err());
563        // '=' passes the DB floor (only the Vorbis path bars it).
564        db.replace_tags(t, &[Tag::new("a=b", "c", 0)]).unwrap();
565        let got = db.get_tags(t).unwrap();
566        assert_eq!(got.len(), 1);
567        assert_eq!(got[0].key, "a=b");
568    }
569
570    #[test]
571    fn replace_tags_rolls_back_a_mixed_valid_invalid_batch() {
572        let db = open_mem();
573        let t = db.upsert_track(&new_track("/a.flac")).unwrap();
574        db.replace_tags(t, &[Tag::new("artist", "Alice", 0)])
575            .unwrap();
576        // replace_tags DELETEs the existing text rows before re-inserting; a CHECK
577        // violation later in the batch must roll the whole transaction back —
578        // including the DELETE — so the original rows survive rather than the batch
579        // half-applying.
580        assert!(
581            db.replace_tags(t, &[Tag::new("title", "ok", 0), Tag::new("", "bad", 0)])
582                .is_err()
583        );
584        let got = db.get_tags(t).unwrap();
585        assert_eq!(got.len(), 1);
586        assert_eq!(got[0].key, "artist");
587        assert_eq!(got[0].value, "Alice");
588    }
589}