Skip to main content

musefs_core/
template.rs

1use std::collections::BTreeMap;
2use std::collections::BTreeSet;
3use std::iter::Peekable;
4use std::str::Chars;
5use thiserror::Error;
6
7/// Max surviving segments a single `$!{}` path field may expand into. A hostile
8/// 256 KiB tag shaped `a/a/a/...` would otherwise build tens of thousands of
9/// directory levels (depth amplification across the DB trust boundary, #303).
10const MAX_PATH_FIELD_SEGMENTS: usize = 64;
11
12/// Max `[...]` section nesting depth accepted by [`Template::parse`]. Beyond this
13/// the parser rejects the template rather than recursing further (#304). Real
14/// templates nest 2–3 deep; 64 is generous headroom that still bounds the
15/// adversarial `[[[…` case.
16const MAX_SECTION_DEPTH: usize = 64;
17
18/// Why a template was rejected at parse time. Surfaced to the operator via
19/// [`crate::CoreError::InvalidTemplate`] when `Musefs::open` parses a bad
20/// `--template`.
21#[derive(Debug, Clone, PartialEq, Eq, Error)]
22pub enum TemplateError {
23    /// `[...]` sections nested deeper than `limit`.
24    #[error("template nesting exceeds the maximum depth of {limit}")]
25    NestingTooDeep { limit: usize },
26    /// A literal run contains a control byte (`< 0x20`, includes NUL), which is
27    /// not a valid POSIX path-component byte.
28    #[error("template literal contains control byte {byte:#04x}")]
29    ControlByte { byte: u8 },
30}
31
32/// A parsed path template: literal runs, `$field` / `${field}` substitutions
33/// (with optional `${a|b}` fallback chains and `$!{field}` slash-preserving path
34/// fields), and `[...]` conditional sections. Parse once per mount; `render`
35/// then costs one output `String` per call, with no re-parse.
36#[derive(Debug, Clone)]
37pub struct Template {
38    parts: Vec<Part>,
39}
40
41#[derive(Debug, Clone)]
42enum Part {
43    Literal(String),
44    /// `names` is the `|`-separated fallback chain (length 1 for a plain field);
45    /// `raw` marks a `$!{…}` path field whose '/' are kept as separators.
46    Field {
47        names: Vec<String>,
48        raw: bool,
49    },
50    /// A `[...]` conditional section: emitted only if at least one field
51    /// referenced within it (transitively) is present.
52    Section(Vec<Part>),
53}
54
55impl Template {
56    /// Parse a beets-style template. Returns `Err` for a template that cannot
57    /// produce valid path components: control/NUL bytes in literal text
58    /// (#275) or `[...]` nesting deeper than [`MAX_SECTION_DEPTH`] (#304).
59    ///
60    /// - `$field` / `${field}` substitute a tag field; `${a|b|c}` is a fallback
61    ///   chain (first present wins). Names are matched case-insensitively.
62    /// - `$!{field}` is a path field: the value's '/' are kept as directory
63    ///   separators (each segment sanitized; empty / `.` / `..` dropped).
64    /// - `[...]` is a conditional section, suppressed when every field it
65    ///   references is empty. `$[` and `$]` emit literal brackets.
66    /// - A `$` not followed by a recognized form stays literal; an unterminated
67    ///   `${`/`$!{` consumes the rest as the name; an unterminated `[` runs to
68    ///   end of input.
69    pub fn parse(template: &str) -> Result<Template, TemplateError> {
70        let mut chars = template.chars().peekable();
71        let parts = parse_parts(&mut chars, 0)?;
72        Ok(Template { parts })
73    }
74
75    /// The set of field names this template references, across plain fields,
76    /// `$!{}` path fields, `|` fallback chains, and `[...]` sections. Names
77    /// are already ASCII-lowercased at parse time, matching `tags_to_fields`'s
78    /// key folding, so a key-filtered tag load (`Db::tags_grouped_for_keys`)
79    /// fetches exactly what rendering consumes.
80    pub fn referenced_fields(&self) -> BTreeSet<String> {
81        let mut out = BTreeSet::new();
82        collect_field_names(&self.parts, &mut out);
83        out
84    }
85
86    /// Render one track's path. Outside a section a missing field resolves
87    /// through `fallbacks` then `default_fallback`; inside a section a missing
88    /// field renders blank and drives suppression. The extension follows a '.'.
89    pub fn render(
90        &self,
91        fields: &BTreeMap<String, &str>,
92        fallbacks: &BTreeMap<String, String>,
93        default_fallback: &str,
94        ext: &str,
95    ) -> String {
96        let (mut out, _) = render_parts(&self.parts, fields, fallbacks, default_fallback, false);
97        out.push('.');
98        out.push_str(ext);
99        out
100    }
101}
102
103/// Parse parts until a closing `]` (when `depth > 0`) or end of input. `depth`
104/// is the current `[...]` nesting level (0 = top level).
105fn parse_parts(chars: &mut Peekable<Chars>, depth: usize) -> Result<Vec<Part>, TemplateError> {
106    let mut parts = Vec::new();
107    let mut literal = String::new();
108    while let Some(&c) = chars.peek() {
109        match c {
110            ']' if depth > 0 => {
111                chars.next(); // consume the closing ']'
112                break;
113            }
114            '[' => {
115                chars.next();
116                push_literal(&mut parts, &mut literal);
117                if depth + 1 > MAX_SECTION_DEPTH {
118                    return Err(TemplateError::NestingTooDeep {
119                        limit: MAX_SECTION_DEPTH,
120                    });
121                }
122                let inner = parse_parts(chars, depth + 1)?;
123                parts.push(Part::Section(inner));
124            }
125            '$' => {
126                chars.next(); // consume '$'
127                match chars.peek() {
128                    Some('[') => {
129                        chars.next();
130                        literal.push('[');
131                    }
132                    Some(']') => {
133                        chars.next();
134                        literal.push(']');
135                    }
136                    Some('{') => {
137                        chars.next();
138                        let names = parse_braced_names(chars);
139                        push_literal(&mut parts, &mut literal);
140                        parts.push(Part::Field { names, raw: false });
141                    }
142                    Some('!') => {
143                        chars.next(); // consume '!'
144                        if chars.peek() == Some(&'{') {
145                            chars.next(); // consume '{'
146                            let names = parse_braced_names(chars);
147                            push_literal(&mut parts, &mut literal);
148                            parts.push(Part::Field { names, raw: true });
149                        } else {
150                            literal.push('$');
151                            literal.push('!');
152                        }
153                    }
154                    Some(&nc) if is_field_char(nc) => {
155                        let name = parse_unbraced_name(chars);
156                        push_literal(&mut parts, &mut literal);
157                        parts.push(Part::Field {
158                            names: vec![name],
159                            raw: false,
160                        });
161                    }
162                    _ => literal.push('$'),
163                }
164            }
165            _ => {
166                if (c as u32) < 0x20 {
167                    return Err(TemplateError::ControlByte { byte: c as u8 });
168                }
169                literal.push(c);
170                chars.next();
171            }
172        }
173    }
174    push_literal(&mut parts, &mut literal);
175    Ok(parts)
176}
177
178fn push_literal(parts: &mut Vec<Part>, literal: &mut String) {
179    if !literal.is_empty() {
180        parts.push(Part::Literal(std::mem::take(literal)));
181    }
182}
183
184/// Consume up to the next `}` (or end of input) and split on `|` into the
185/// candidate name list, lowercased for case-insensitive lookup.
186fn parse_braced_names(chars: &mut Peekable<Chars>) -> Vec<String> {
187    let mut content = String::new();
188    for nc in chars.by_ref() {
189        if nc == '}' {
190            break;
191        }
192        content.push(nc);
193    }
194    content.split('|').map(str::to_ascii_lowercase).collect()
195}
196
197fn parse_unbraced_name(chars: &mut Peekable<Chars>) -> String {
198    let mut name = String::new();
199    while let Some(&nc) = chars.peek() {
200        if is_field_char(nc) {
201            name.push(nc);
202            chars.next();
203        } else {
204            break;
205        }
206    }
207    name.to_ascii_lowercase()
208}
209
210fn collect_field_names(parts: &[Part], out: &mut BTreeSet<String>) {
211    for part in parts {
212        match part {
213            Part::Literal(_) => {}
214            Part::Field { names, .. } => {
215                for name in names {
216                    out.insert(name.clone());
217                }
218            }
219            Part::Section(inner) => collect_field_names(inner, out),
220        }
221    }
222}
223
224/// Render `parts`, returning the text and whether at least one referenced field
225/// was present. `in_section` gates `default_fallback`: it is substituted only at
226/// the top level (outside any `[...]`).
227fn render_parts(
228    parts: &[Part],
229    fields: &BTreeMap<String, &str>,
230    fallbacks: &BTreeMap<String, String>,
231    default_fallback: &str,
232    in_section: bool,
233) -> (String, bool) {
234    let mut out = String::new();
235    let mut any_present = false;
236    for part in parts {
237        match part {
238            Part::Literal(lit) => out.push_str(lit),
239            Part::Field { names, raw: false } => {
240                if let Some(value) = resolve_plain(names, fields, fallbacks) {
241                    sanitize_into(&mut out, value);
242                    any_present = true;
243                } else if !in_section {
244                    sanitize_into(&mut out, default_fallback);
245                }
246            }
247            Part::Field { names, raw: true } => {
248                if let Some(path) = resolve_path(names, fields, fallbacks) {
249                    out.push_str(&path);
250                    any_present = true;
251                } else if !in_section {
252                    sanitize_into(&mut out, default_fallback);
253                }
254            }
255            Part::Section(inner) => {
256                let (text, present) =
257                    render_parts(inner, fields, fallbacks, default_fallback, true);
258                if present {
259                    out.push_str(&text);
260                    any_present = true;
261                }
262            }
263        }
264    }
265    (out, any_present)
266}
267
268/// First candidate with a non-empty value, checked against `fields` then
269/// `fallbacks`.
270fn resolve_plain<'a>(
271    names: &[String],
272    fields: &BTreeMap<String, &'a str>,
273    fallbacks: &'a BTreeMap<String, String>,
274) -> Option<&'a str> {
275    for name in names {
276        if let Some(v) = fields.get(name).copied().filter(|v| !v.is_empty()) {
277            return Some(v);
278        }
279        if let Some(v) = fallbacks
280            .get(name)
281            .map(String::as_str)
282            .filter(|v| !v.is_empty())
283        {
284            return Some(v);
285        }
286    }
287    None
288}
289
290/// First candidate that yields at least one surviving path segment, returned as
291/// the sanitized multi-segment path.
292fn resolve_path(
293    names: &[String],
294    fields: &BTreeMap<String, &str>,
295    fallbacks: &BTreeMap<String, String>,
296) -> Option<String> {
297    for name in names {
298        let value = fields
299            .get(name)
300            .copied()
301            .or_else(|| fallbacks.get(name).map(String::as_str));
302        if let Some(value) = value {
303            let path = sanitize_path(value);
304            if !path.is_empty() {
305                return Some(path);
306            }
307        }
308    }
309    None
310}
311
312/// Append `value` with '/' and control characters replaced by '_' so it stays a
313/// single path component. The template's own '/' separators are literals, not
314/// passed through here.
315fn sanitize_into(out: &mut String, value: &str) {
316    for c in value.chars() {
317        if c == '/' || (c as u32) < 0x20 {
318            out.push('_');
319        } else {
320            out.push(c);
321        }
322    }
323}
324
325/// Split `value` on '/', drop empty / `.` / `..` segments, sanitize each
326/// surviving segment, and rejoin with '/'. Guarantees no empty, `.`, `..`, or
327/// leading/trailing-slash components reach the virtual tree. Stops after
328/// `MAX_PATH_FIELD_SEGMENTS` surviving segments, bounding the depth a single
329/// hostile path-field value can amplify into (#303).
330fn sanitize_path(value: &str) -> String {
331    let mut out = String::new();
332    let mut count = 0usize;
333    for segment in value.split('/') {
334        if count == MAX_PATH_FIELD_SEGMENTS {
335            break;
336        }
337        if segment.is_empty() || segment == "." || segment == ".." {
338            continue;
339        }
340        if !out.is_empty() {
341            out.push('/');
342        }
343        sanitize_into(&mut out, segment);
344        count += 1;
345    }
346    out
347}
348
349fn is_field_char(c: char) -> bool {
350    c.is_ascii_alphanumeric() || c == '_'
351}
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356
357    #[test]
358    fn referenced_fields_collects_plain_path_section_and_fallback_names() {
359        let t = Template::parse("$artist/$!{beets_path}/[$disc - ]${title|name}")
360            .expect("valid template");
361        let f = t.referenced_fields();
362        assert!(f.contains("artist"));
363        assert!(f.contains("beets_path"));
364        assert!(f.contains("disc"));
365        assert!(f.contains("title"));
366        assert!(f.contains("name"));
367        // No spurious entries from literals.
368        assert_eq!(f.len(), 5);
369    }
370
371    #[test]
372    fn nesting_at_limit_parses_one_past_limit_rejected() {
373        let at_limit = "[".repeat(MAX_SECTION_DEPTH);
374        assert!(
375            Template::parse(&at_limit).is_ok(),
376            "{MAX_SECTION_DEPTH} deep parses"
377        );
378
379        let past_limit = "[".repeat(MAX_SECTION_DEPTH + 1);
380        assert!(matches!(
381            Template::parse(&past_limit),
382            Err(TemplateError::NestingTooDeep { limit }) if limit == MAX_SECTION_DEPTH
383        ));
384    }
385}