Skip to main content

musefs_core/
template.rs

1use std::collections::BTreeMap;
2use std::collections::BTreeSet;
3use std::iter::Peekable;
4use std::str::Chars;
5use thiserror::Error;
6
7/// Max surviving segments a single `$!{}` path field may expand into. A hostile
8/// 256 KiB tag shaped `a/a/a/...` would otherwise build tens of thousands of
9/// directory levels (depth amplification across the DB trust boundary, #303).
10const MAX_PATH_FIELD_SEGMENTS: usize = 64;
11
12/// Max `[...]` section nesting depth accepted by [`Template::parse`]. Beyond this
13/// the parser rejects the template rather than recursing further (#304). Real
14/// templates nest 2–3 deep; 64 is generous headroom that still bounds the
15/// adversarial `[[[…` case.
16const MAX_SECTION_DEPTH: usize = 64;
17
18/// Why a template was rejected at parse time. Surfaced to the operator via
19/// [`crate::CoreError::InvalidTemplate`] when `Musefs::open` parses a bad
20/// `--template`.
21#[derive(Debug, Clone, PartialEq, Eq, Error)]
22pub enum TemplateError {
23    /// `[...]` sections nested deeper than `limit`.
24    #[error("template nesting exceeds the maximum depth of {limit}")]
25    NestingTooDeep { limit: usize },
26    /// A literal run contains a control byte (`< 0x20`, includes NUL), which is
27    /// not a valid POSIX path-component byte.
28    #[error("template literal contains control byte {byte:#04x}")]
29    ControlByte { byte: u8 },
30    /// A `${`/`$!{` field was opened but never closed by `}` before the end of
31    /// the template, e.g. `${albumartist`.
32    #[error("template has an unterminated '${{' field (missing '}}')")]
33    UnterminatedField,
34    /// A `[...]` section was opened but never closed by `]` before the end of
35    /// the template, e.g. `$album[ - $disc`.
36    #[error("template has an unclosed '[' section (missing ']')")]
37    UnclosedSection,
38}
39
40/// A parsed path template: literal runs, `$field` / `${field}` substitutions
41/// (with optional `${a|b}` fallback chains and `$!{field}` slash-preserving path
42/// fields), and `[...]` conditional sections. Parse once per mount; `render`
43/// then costs one output `String` per call, with no re-parse.
44#[derive(Debug, Clone)]
45pub struct Template {
46    parts: Vec<Part>,
47}
48
49#[derive(Debug, Clone)]
50enum Part {
51    Literal(String),
52    /// `names` is the `|`-separated fallback chain (length 1 for a plain field);
53    /// `raw` marks a `$!{…}` path field whose '/' are kept as separators.
54    Field {
55        names: Vec<String>,
56        raw: bool,
57    },
58    /// A `[...]` conditional section: emitted only if at least one field
59    /// referenced within it (transitively) is present.
60    Section(Vec<Part>),
61}
62
63impl Template {
64    /// Parse a beets-style template. Returns `Err` for a template that cannot
65    /// produce valid path components: control/NUL bytes in literal text
66    /// (#275), `[...]` nesting deeper than [`MAX_SECTION_DEPTH`] (#304), an
67    /// unterminated `${`/`$!{` field (no closing `}`), or an unclosed `[`
68    /// section (no closing `]`).
69    ///
70    /// - `$field` / `${field}` substitute a tag field; `${a|b|c}` is a fallback
71    ///   chain (first present wins). Names are matched case-insensitively.
72    /// - `$!{field}` is a path field: the value's '/' are kept as directory
73    ///   separators (each segment sanitized; empty / `.` / `..` dropped).
74    /// - `[...]` is a conditional section, suppressed when every field it
75    ///   references is empty. `$[` and `$]` emit literal brackets.
76    /// - A `$` not followed by a recognized form stays literal. An unterminated
77    ///   `${`/`$!{` (missing `}`) or an unclosed `[` section (missing `]`) is a
78    ///   parse error.
79    pub fn parse(template: &str) -> Result<Template, TemplateError> {
80        let mut chars = template.chars().peekable();
81        let parts = parse_parts(&mut chars, 0)?;
82        Ok(Template { parts })
83    }
84
85    /// The set of field names this template references, across plain fields,
86    /// `$!{}` path fields, `|` fallback chains, and `[...]` sections. Names
87    /// are already ASCII-lowercased at parse time, matching `tags_to_fields`'s
88    /// key folding, so a key-filtered tag load (`Db::tags_grouped_for_keys`)
89    /// fetches exactly what rendering consumes.
90    pub fn referenced_fields(&self) -> BTreeSet<String> {
91        let mut out = BTreeSet::new();
92        collect_field_names(&self.parts, &mut out);
93        out
94    }
95
96    /// Render one track's path. Outside a section a missing field resolves
97    /// through `fallbacks` then `default_fallback`; inside a section a missing
98    /// field renders blank and drives suppression. The extension follows a '.'.
99    pub fn render(
100        &self,
101        fields: &BTreeMap<String, &str>,
102        fallbacks: &BTreeMap<String, String>,
103        default_fallback: &str,
104        ext: &str,
105    ) -> String {
106        let (mut out, _, _) = render_parts(&self.parts, fields, fallbacks, default_fallback, false);
107        out.push('.');
108        out.push_str(ext);
109        out
110    }
111
112    /// Like [`render`](Self::render), but returns `None` when any *top-level*
113    /// (non-section) field is unresolved — the caller's signal to skip the track
114    /// rather than substitute `default_fallback`. Per-field fallback chains and
115    /// `[...]` sections behave exactly as in `render`; only the top-level default
116    /// substitution is replaced by the skip. Backs `--skip-on-missing`.
117    pub fn render_checked(
118        &self,
119        fields: &BTreeMap<String, &str>,
120        fallbacks: &BTreeMap<String, String>,
121        ext: &str,
122    ) -> Option<String> {
123        let (mut out, _, top_complete) = render_parts(&self.parts, fields, fallbacks, "", false);
124        if !top_complete {
125            return None;
126        }
127        out.push('.');
128        out.push_str(ext);
129        Some(out)
130    }
131}
132
133/// Parse parts until a closing `]` (when `depth > 0`) or end of input. `depth`
134/// is the current `[...]` nesting level (0 = top level). A section opened at
135/// `depth > 0` that reaches end of input without its `]` is rejected as
136/// [`TemplateError::UnclosedSection`].
137fn parse_parts(chars: &mut Peekable<Chars>, depth: usize) -> Result<Vec<Part>, TemplateError> {
138    let mut parts = Vec::new();
139    let mut literal = String::new();
140    let mut closed = false;
141    while let Some(&c) = chars.peek() {
142        match c {
143            ']' if depth > 0 => {
144                chars.next(); // consume the closing ']'
145                closed = true;
146                break;
147            }
148            '[' => {
149                chars.next();
150                push_literal(&mut parts, &mut literal);
151                if depth + 1 > MAX_SECTION_DEPTH {
152                    return Err(TemplateError::NestingTooDeep {
153                        limit: MAX_SECTION_DEPTH,
154                    });
155                }
156                let inner = parse_parts(chars, depth + 1)?;
157                parts.push(Part::Section(inner));
158            }
159            '$' => {
160                chars.next(); // consume '$'
161                match chars.peek() {
162                    Some('[') => {
163                        chars.next();
164                        literal.push('[');
165                    }
166                    Some(']') => {
167                        chars.next();
168                        literal.push(']');
169                    }
170                    Some('{') => {
171                        chars.next();
172                        let names = parse_braced_names(chars)?;
173                        push_literal(&mut parts, &mut literal);
174                        parts.push(Part::Field { names, raw: false });
175                    }
176                    Some('!') => {
177                        chars.next(); // consume '!'
178                        if chars.peek() == Some(&'{') {
179                            chars.next(); // consume '{'
180                            let names = parse_braced_names(chars)?;
181                            push_literal(&mut parts, &mut literal);
182                            parts.push(Part::Field { names, raw: true });
183                        } else {
184                            literal.push('$');
185                            literal.push('!');
186                        }
187                    }
188                    Some(&nc) if is_field_char(nc) => {
189                        let name = parse_unbraced_name(chars);
190                        push_literal(&mut parts, &mut literal);
191                        parts.push(Part::Field {
192                            names: vec![name],
193                            raw: false,
194                        });
195                    }
196                    _ => literal.push('$'),
197                }
198            }
199            _ => {
200                if (c as u32) < 0x20 {
201                    return Err(TemplateError::ControlByte { byte: c as u8 });
202                }
203                literal.push(c);
204                chars.next();
205            }
206        }
207    }
208    push_literal(&mut parts, &mut literal);
209    if depth > 0 && !closed {
210        return Err(TemplateError::UnclosedSection);
211    }
212    Ok(parts)
213}
214
215fn push_literal(parts: &mut Vec<Part>, literal: &mut String) {
216    if !literal.is_empty() {
217        parts.push(Part::Literal(std::mem::take(literal)));
218    }
219}
220
221/// Consume up to the next `}` and split on `|` into the candidate name list,
222/// lowercased for case-insensitive lookup. Reaching end of input before the
223/// closing `}` is a [`TemplateError::UnterminatedField`].
224fn parse_braced_names(chars: &mut Peekable<Chars>) -> Result<Vec<String>, TemplateError> {
225    let mut content = String::new();
226    let mut closed = false;
227    for nc in chars.by_ref() {
228        if nc == '}' {
229            closed = true;
230            break;
231        }
232        content.push(nc);
233    }
234    if !closed {
235        return Err(TemplateError::UnterminatedField);
236    }
237    Ok(content.split('|').map(str::to_ascii_lowercase).collect())
238}
239
240fn parse_unbraced_name(chars: &mut Peekable<Chars>) -> String {
241    let mut name = String::new();
242    while let Some(&nc) = chars.peek() {
243        if is_field_char(nc) {
244            name.push(nc);
245            chars.next();
246        } else {
247            break;
248        }
249    }
250    name.to_ascii_lowercase()
251}
252
253fn collect_field_names(parts: &[Part], out: &mut BTreeSet<String>) {
254    for part in parts {
255        match part {
256            Part::Literal(_) => {}
257            Part::Field { names, .. } => {
258                for name in names {
259                    out.insert(name.clone());
260                }
261            }
262            Part::Section(inner) => collect_field_names(inner, out),
263        }
264    }
265}
266
267/// Render `parts`, returning the text, whether at least one referenced field
268/// was present, and whether every *top-level* field resolved (no
269/// `default_fallback` substitution was needed). `in_section` gates
270/// `default_fallback`: it is substituted only at the top level (outside any
271/// `[...]`), and only a top-level miss clears `top_complete`.
272fn render_parts(
273    parts: &[Part],
274    fields: &BTreeMap<String, &str>,
275    fallbacks: &BTreeMap<String, String>,
276    default_fallback: &str,
277    in_section: bool,
278) -> (String, bool, bool) {
279    let mut out = String::new();
280    let mut any_present = false;
281    let mut top_complete = true;
282    for part in parts {
283        match part {
284            Part::Literal(lit) => out.push_str(lit),
285            Part::Field { names, raw: false } => {
286                if let Some(value) = resolve_plain(names, fields, fallbacks) {
287                    sanitize_into(&mut out, value);
288                    any_present = true;
289                } else if !in_section {
290                    sanitize_into(&mut out, default_fallback);
291                    top_complete = false;
292                }
293            }
294            Part::Field { names, raw: true } => {
295                if let Some(path) = resolve_path(names, fields, fallbacks) {
296                    out.push_str(&path);
297                    any_present = true;
298                } else if !in_section {
299                    sanitize_into(&mut out, default_fallback);
300                    top_complete = false;
301                }
302            }
303            Part::Section(inner) => {
304                let (text, present, _) =
305                    render_parts(inner, fields, fallbacks, default_fallback, true);
306                if present {
307                    out.push_str(&text);
308                    any_present = true;
309                }
310            }
311        }
312    }
313    (out, any_present, top_complete)
314}
315
316/// First candidate with a non-empty value, checked against `fields` then
317/// `fallbacks`.
318fn resolve_plain<'a>(
319    names: &[String],
320    fields: &BTreeMap<String, &'a str>,
321    fallbacks: &'a BTreeMap<String, String>,
322) -> Option<&'a str> {
323    for name in names {
324        if let Some(v) = fields.get(name).copied().filter(|v| !v.is_empty()) {
325            return Some(v);
326        }
327        if let Some(v) = fallbacks
328            .get(name)
329            .map(String::as_str)
330            .filter(|v| !v.is_empty())
331        {
332            return Some(v);
333        }
334    }
335    None
336}
337
338/// First candidate that yields at least one surviving path segment, returned as
339/// the sanitized multi-segment path.
340fn resolve_path(
341    names: &[String],
342    fields: &BTreeMap<String, &str>,
343    fallbacks: &BTreeMap<String, String>,
344) -> Option<String> {
345    for name in names {
346        let value = fields
347            .get(name)
348            .copied()
349            .or_else(|| fallbacks.get(name).map(String::as_str));
350        if let Some(value) = value {
351            let path = sanitize_path(value);
352            if !path.is_empty() {
353                return Some(path);
354            }
355        }
356    }
357    None
358}
359
360/// Append `value` with '/' and control characters replaced by '_' so it stays a
361/// single path component. The template's own '/' separators are literals, not
362/// passed through here.
363fn sanitize_into(out: &mut String, value: &str) {
364    for c in value.chars() {
365        if c == '/' || (c as u32) < 0x20 {
366            out.push('_');
367        } else {
368            out.push(c);
369        }
370    }
371}
372
373/// Split `value` on '/', drop empty / `.` / `..` segments, sanitize each
374/// surviving segment, and rejoin with '/'. Guarantees no empty, `.`, `..`, or
375/// leading/trailing-slash components reach the virtual tree. Stops after
376/// `MAX_PATH_FIELD_SEGMENTS` surviving segments, bounding the depth a single
377/// hostile path-field value can amplify into (#303).
378fn sanitize_path(value: &str) -> String {
379    let mut out = String::new();
380    let mut count = 0usize;
381    for segment in value.split('/') {
382        if count == MAX_PATH_FIELD_SEGMENTS {
383            break;
384        }
385        if segment.is_empty() || segment == "." || segment == ".." {
386            continue;
387        }
388        if !out.is_empty() {
389            out.push('/');
390        }
391        sanitize_into(&mut out, segment);
392        count += 1;
393    }
394    out
395}
396
397fn is_field_char(c: char) -> bool {
398    c.is_ascii_alphanumeric() || c == '_'
399}
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404
405    #[test]
406    fn referenced_fields_collects_plain_path_section_and_fallback_names() {
407        let t = Template::parse("$artist/$!{beets_path}/[$disc - ]${title|name}")
408            .expect("valid template");
409        let f = t.referenced_fields();
410        assert!(f.contains("artist"));
411        assert!(f.contains("beets_path"));
412        assert!(f.contains("disc"));
413        assert!(f.contains("title"));
414        assert!(f.contains("name"));
415        // No spurious entries from literals.
416        assert_eq!(f.len(), 5);
417    }
418
419    #[test]
420    fn nesting_at_limit_parses_one_past_limit_rejected() {
421        let at_limit = "[".repeat(MAX_SECTION_DEPTH) + &"]".repeat(MAX_SECTION_DEPTH);
422        assert!(
423            Template::parse(&at_limit).is_ok(),
424            "{MAX_SECTION_DEPTH} deep parses"
425        );
426
427        let past_limit = "[".repeat(MAX_SECTION_DEPTH + 1);
428        assert!(matches!(
429            Template::parse(&past_limit),
430            Err(TemplateError::NestingTooDeep { limit }) if limit == MAX_SECTION_DEPTH
431        ));
432    }
433}