Skip to main content

fallow_config/workspace/
pnpm_catalog.rs

1//! Parser for the `catalog:` and `catalogs:` sections of `pnpm-workspace.yaml`.
2//!
3//! pnpm supports two catalog forms:
4//! - the top-level `catalog:` map (the "default" catalog)
5//! - the top-level `catalogs:` map of named catalogs
6//!
7//! ```yaml
8//! catalog:
9//!   react: ^18.2.0
10//!   "@scope/lib": ^1.0.0
11//!
12//! catalogs:
13//!   react17:
14//!     react: ^17.0.2
15//!     react-dom: ^17.0.2
16//! ```
17//!
18//! Workspace packages reference catalog entries from their `dependencies`
19//! (and friends) with the `catalog:` protocol:
20//!
21//! ```json
22//! { "dependencies": { "react": "catalog:", "old-react": "catalog:react17" } }
23//! ```
24//!
25//! For the unused-catalog-entry detector we need both the structured catalog
26//! map and the 1-based line number of each entry in the source so findings
27//! can point users to the exact line. `serde_yaml_ng` gives us the structural
28//! parse; a second targeted scan over the raw source recovers the line
29//! numbers.
30
31/// Structured catalog data extracted from a `pnpm-workspace.yaml` file.
32#[derive(Debug, Clone, Default)]
33pub struct PnpmCatalogData {
34    /// Catalogs found in the file. The default catalog (top-level `catalog:`)
35    /// always appears first with `name = "default"` when present; named
36    /// catalogs follow in YAML source order.
37    pub catalogs: Vec<PnpmCatalog>,
38    /// Named catalogs under `catalogs:` that declare no package entries.
39    ///
40    /// The top-level `catalog:` map is intentionally not represented here:
41    /// some repos keep it as a stable hook even when currently empty.
42    pub empty_named_catalog_groups: Vec<PnpmCatalogGroup>,
43}
44
45/// A single catalog (the default or a named one).
46#[derive(Debug, Clone)]
47pub struct PnpmCatalog {
48    /// Catalog name. `"default"` for the top-level `catalog:` map, or the
49    /// named catalog key for entries declared under `catalogs.<name>:`.
50    pub name: String,
51    /// Entries declared in this catalog, in source order.
52    pub entries: Vec<PnpmCatalogEntry>,
53}
54
55/// A single entry inside a catalog.
56#[derive(Debug, Clone)]
57pub struct PnpmCatalogEntry {
58    /// Package name declared in the catalog (e.g. `"react"`, `"@scope/lib"`).
59    pub package_name: String,
60    /// 1-based line number of the entry within the source file.
61    pub line: u32,
62}
63
64/// A named catalog group under `catalogs:` with no package entries.
65#[derive(Debug, Clone)]
66pub struct PnpmCatalogGroup {
67    /// Catalog group name (e.g. `"react17"` for `catalogs.react17`).
68    pub name: String,
69    /// 1-based line number of the group header within the source file.
70    pub line: u32,
71}
72
73/// Parse the catalog sections of a `pnpm-workspace.yaml` file.
74///
75/// Returns an empty `PnpmCatalogData` when the file has no catalog data, when
76/// the YAML is malformed, or when the catalog sections are present but empty.
77/// All non-catalog top-level keys (`packages`, `catalog`, `catalogs`, etc.)
78/// are ignored.
79#[must_use]
80pub fn parse_pnpm_catalog_data(source: &str) -> PnpmCatalogData {
81    let value: serde_yaml_ng::Value = match serde_yaml_ng::from_str(source) {
82        Ok(v) => v,
83        Err(_) => return PnpmCatalogData::default(),
84    };
85    let Some(mapping) = value.as_mapping() else {
86        return PnpmCatalogData::default();
87    };
88
89    let line_index = build_line_index(source);
90    let mut catalogs = Vec::new();
91    let mut empty_named_catalog_groups = Vec::new();
92
93    if let Some(default_value) = mapping.get("catalog")
94        && let Some(default_map) = default_value.as_mapping()
95    {
96        let entries = collect_entries(default_map, &line_index, "default");
97        if !entries.is_empty() {
98            catalogs.push(PnpmCatalog {
99                name: "default".to_string(),
100                entries,
101            });
102        }
103    }
104
105    if let Some(named_value) = mapping.get("catalogs")
106        && let Some(named_map) = named_value.as_mapping()
107    {
108        for (name_value, catalog_value) in named_map {
109            let Some(name) = name_value.as_str() else {
110                continue;
111            };
112            if let Some(catalog_map) = catalog_value.as_mapping() {
113                let entries = collect_entries(catalog_map, &line_index, name);
114                if entries.is_empty() {
115                    if let Some(line) = line_index.group_line_for(name) {
116                        empty_named_catalog_groups.push(PnpmCatalogGroup {
117                            name: name.to_string(),
118                            line,
119                        });
120                    }
121                } else {
122                    catalogs.push(PnpmCatalog {
123                        name: name.to_string(),
124                        entries,
125                    });
126                }
127            } else if catalog_value.is_null()
128                && let Some(line) = line_index.group_line_for(name)
129            {
130                empty_named_catalog_groups.push(PnpmCatalogGroup {
131                    name: name.to_string(),
132                    line,
133                });
134            }
135        }
136    }
137
138    PnpmCatalogData {
139        catalogs,
140        empty_named_catalog_groups,
141    }
142}
143
144fn collect_entries(
145    mapping: &serde_yaml_ng::Mapping,
146    line_index: &CatalogLineIndex,
147    catalog_name: &str,
148) -> Vec<PnpmCatalogEntry> {
149    mapping
150        .iter()
151        .filter_map(|(k, _)| {
152            let pkg = k.as_str()?;
153            let line = line_index.line_for(catalog_name, pkg)?;
154            Some(PnpmCatalogEntry {
155                package_name: pkg.to_string(),
156                line,
157            })
158        })
159        .collect()
160}
161
162/// Maps `(catalog_name, package_name)` to its 1-based source line.
163///
164/// `catalog_name` is `"default"` for entries under the top-level `catalog:`
165/// key, or the named catalog key for entries under `catalogs.<name>:`.
166struct CatalogLineIndex {
167    entries: Vec<((String, String), u32)>,
168    groups: Vec<(String, u32)>,
169}
170
171impl CatalogLineIndex {
172    fn line_for(&self, catalog_name: &str, package_name: &str) -> Option<u32> {
173        self.entries
174            .iter()
175            .find(|((cat, pkg), _)| cat == catalog_name && pkg == package_name)
176            .map(|(_, line)| *line)
177    }
178
179    fn group_line_for(&self, catalog_name: &str) -> Option<u32> {
180        self.groups
181            .iter()
182            .find(|(name, _)| name == catalog_name)
183            .map(|(_, line)| *line)
184    }
185}
186
187/// Walk the raw YAML source to map each catalog entry to its 1-based line
188/// number. This is a small section-aware scanner: it tracks whether the
189/// current line falls inside `catalog:` (the default catalog) or inside
190/// `catalogs.<name>:` (a named catalog), and records each key at the
191/// expected indentation level.
192fn build_line_index(source: &str) -> CatalogLineIndex {
193    let mut entries = Vec::new();
194    let mut groups = Vec::new();
195    let mut section: Section = Section::None;
196    let mut named_catalog: Option<(String, usize)> = None;
197
198    for (idx, raw_line) in source.lines().enumerate() {
199        let line_no = u32::try_from(idx).unwrap_or(u32::MAX).saturating_add(1);
200        let trimmed = strip_inline_comment(raw_line);
201        let trimmed_left = trimmed.trim_start();
202        let indent = trimmed.len() - trimmed_left.len();
203
204        if trimmed_left.is_empty() {
205            continue;
206        }
207
208        if indent == 0 {
209            section = if trimmed_left.starts_with("catalogs:") {
210                Section::NamedCatalogs
211            } else if trimmed_left.starts_with("catalog:") {
212                Section::DefaultCatalog
213            } else {
214                Section::None
215            };
216            named_catalog = None;
217            continue;
218        }
219
220        match section {
221            Section::None => {}
222            Section::DefaultCatalog => {
223                if let Some(name) = parse_key(trimmed_left) {
224                    entries.push((("default".to_string(), name), line_no));
225                }
226            }
227            Section::NamedCatalogs => {
228                if let Some(name) = parse_key(trimmed_left) {
229                    match &named_catalog {
230                        Some((_, existing_indent)) if indent > *existing_indent => {
231                            entries.push((
232                                (
233                                    named_catalog
234                                        .as_ref()
235                                        .map_or_else(String::new, |(n, _)| n.clone()),
236                                    name,
237                                ),
238                                line_no,
239                            ));
240                        }
241                        _ => {
242                            groups.push((name.clone(), line_no));
243                            named_catalog = Some((name, indent));
244                        }
245                    }
246                }
247            }
248        }
249    }
250
251    CatalogLineIndex { entries, groups }
252}
253
254#[derive(Debug, Clone, Copy)]
255enum Section {
256    None,
257    DefaultCatalog,
258    NamedCatalogs,
259}
260
261/// Strip an unquoted trailing `# ...` comment from a single line. Preserves
262/// `#` characters inside quoted strings so `"# in quotes": "value"` is left
263/// alone.
264pub(super) fn strip_inline_comment(line: &str) -> &str {
265    let bytes = line.as_bytes();
266    let mut in_single = false;
267    let mut in_double = false;
268    for (i, &b) in bytes.iter().enumerate() {
269        match b {
270            b'\'' if !in_double => in_single = !in_single,
271            b'"' if !in_single => in_double = !in_double,
272            b'#' if !in_single && !in_double => {
273                let head = &line[..i];
274                return head.trim_end();
275            }
276            _ => {}
277        }
278    }
279    line.trim_end()
280}
281
282/// Parse a key declaration of the form `key:` or `key: value`, returning just
283/// the (unquoted) key. Returns `None` when the line is not a key declaration
284/// (e.g., a list item `- foo`, a block scalar marker, or malformed).
285pub(super) fn parse_key(line: &str) -> Option<String> {
286    let bytes = line.as_bytes();
287    if bytes.is_empty() {
288        return None;
289    }
290    let first = bytes[0];
291    if first == b'-' || first == b'#' {
292        return None;
293    }
294
295    if first == b'"' || first == b'\'' {
296        let quote = first;
297        let mut i = 1;
298        while i < bytes.len() {
299            let b = bytes[i];
300            if b == b'\\' && i + 1 < bytes.len() {
301                i += 2;
302                continue;
303            }
304            if b == quote {
305                let key = &line[1..i];
306                let rest = &line[i + 1..];
307                let trimmed = rest.trim_start();
308                if trimmed.starts_with(':') {
309                    return Some(unescape_key(key));
310                }
311                return None;
312            }
313            i += 1;
314        }
315        return None;
316    }
317
318    let colon_pos = bytes.iter().position(|&b| b == b':')?;
319    let key = line[..colon_pos].trim();
320    if key.is_empty() {
321        return None;
322    }
323    if key.contains(['{', '[', '&', '*', '!']) {
324        return None;
325    }
326    Some(key.to_string())
327}
328
329fn unescape_key(raw: &str) -> String {
330    let mut out = String::with_capacity(raw.len());
331    let mut chars = raw.chars();
332    while let Some(c) = chars.next() {
333        if c == '\\'
334            && let Some(next) = chars.next()
335        {
336            match next {
337                'n' => out.push('\n'),
338                't' => out.push('\t'),
339                '"' => out.push('"'),
340                '\\' => out.push('\\'),
341                other => {
342                    out.push('\\');
343                    out.push(other);
344                }
345            }
346        } else {
347            out.push(c);
348        }
349    }
350    out
351}
352
353#[cfg(test)]
354mod tests {
355    use super::*;
356
357    #[test]
358    fn parses_default_catalog() {
359        let yaml = "packages:\n  - 'packages/*'\n\ncatalog:\n  react: ^18.2.0\n  is-even: ^1.0.0\n";
360        let data = parse_pnpm_catalog_data(yaml);
361        assert_eq!(data.catalogs.len(), 1);
362        let default = &data.catalogs[0];
363        assert_eq!(default.name, "default");
364        assert_eq!(default.entries.len(), 2);
365        assert_eq!(default.entries[0].package_name, "react");
366        assert_eq!(default.entries[0].line, 5);
367        assert_eq!(default.entries[1].package_name, "is-even");
368        assert_eq!(default.entries[1].line, 6);
369    }
370
371    #[test]
372    fn parses_named_catalogs() {
373        let yaml = "catalogs:\n  react17:\n    react: ^17.0.2\n    react-dom: ^17.0.2\n  ui:\n    headlessui: ^2.0.0\n";
374        let data = parse_pnpm_catalog_data(yaml);
375        assert_eq!(data.catalogs.len(), 2);
376        assert_eq!(data.catalogs[0].name, "react17");
377        assert_eq!(data.catalogs[0].entries.len(), 2);
378        assert_eq!(data.catalogs[0].entries[0].package_name, "react");
379        assert_eq!(data.catalogs[0].entries[0].line, 3);
380        assert_eq!(data.catalogs[1].name, "ui");
381        assert_eq!(data.catalogs[1].entries[0].package_name, "headlessui");
382        assert_eq!(data.catalogs[1].entries[0].line, 6);
383        assert!(data.empty_named_catalog_groups.is_empty());
384    }
385
386    #[test]
387    fn handles_default_and_named_together() {
388        let yaml = "catalog:\n  react: ^18\n\ncatalogs:\n  legacy:\n    react: ^17\n";
389        let data = parse_pnpm_catalog_data(yaml);
390        assert_eq!(data.catalogs.len(), 2);
391        assert_eq!(data.catalogs[0].name, "default");
392        assert_eq!(data.catalogs[0].entries[0].line, 2);
393        assert_eq!(data.catalogs[1].name, "legacy");
394        assert_eq!(data.catalogs[1].entries[0].line, 6);
395    }
396
397    #[test]
398    fn handles_quoted_keys() {
399        let yaml = "catalog:\n  \"@scope/lib\": ^1.0.0\n  'my-pkg': ^2.0.0\n";
400        let data = parse_pnpm_catalog_data(yaml);
401        let default = &data.catalogs[0];
402        assert_eq!(default.entries[0].package_name, "@scope/lib");
403        assert_eq!(default.entries[0].line, 2);
404        assert_eq!(default.entries[1].package_name, "my-pkg");
405        assert_eq!(default.entries[1].line, 3);
406    }
407
408    #[test]
409    fn handles_inline_comments() {
410        let yaml = "catalog:\n  react: ^18  # pin until #1234\n  is-even: ^1.0\n";
411        let data = parse_pnpm_catalog_data(yaml);
412        assert_eq!(data.catalogs[0].entries.len(), 2);
413        assert_eq!(data.catalogs[0].entries[0].package_name, "react");
414        assert_eq!(data.catalogs[0].entries[1].package_name, "is-even");
415        assert_eq!(data.catalogs[0].entries[1].line, 3);
416    }
417
418    #[test]
419    fn handles_four_space_indentation() {
420        let yaml = "catalog:\n    react: ^18.2.0\n    vue: ^3.4.0\n";
421        let data = parse_pnpm_catalog_data(yaml);
422        assert_eq!(data.catalogs[0].entries.len(), 2);
423        assert_eq!(data.catalogs[0].entries[0].line, 2);
424        assert_eq!(data.catalogs[0].entries[1].line, 3);
425    }
426
427    #[test]
428    fn empty_catalog_returns_no_catalogs() {
429        let yaml = "catalog: {}\n";
430        let data = parse_pnpm_catalog_data(yaml);
431        assert!(data.catalogs.is_empty());
432        assert!(data.empty_named_catalog_groups.is_empty());
433    }
434
435    #[test]
436    fn tracks_empty_named_catalog_groups() {
437        let yaml = "catalog:\n  react: ^18\n\ncatalogs:\n  react17: {}\n  legacy:\n    # retained note\n  vue3:\n    vue: ^3.4.0\n";
438        let data = parse_pnpm_catalog_data(yaml);
439        assert_eq!(data.catalogs.len(), 2);
440        let empty: Vec<_> = data
441            .empty_named_catalog_groups
442            .iter()
443            .map(|group| (group.name.as_str(), group.line))
444            .collect();
445        assert_eq!(empty, vec![("react17", 5), ("legacy", 6)]);
446    }
447
448    #[test]
449    fn no_catalog_keys_returns_no_catalogs() {
450        let yaml = "packages:\n  - 'packages/*'\n";
451        let data = parse_pnpm_catalog_data(yaml);
452        assert!(data.catalogs.is_empty());
453    }
454
455    #[test]
456    fn malformed_yaml_returns_no_catalogs() {
457        let yaml = "{this is\nnot: valid: yaml: at: all";
458        let data = parse_pnpm_catalog_data(yaml);
459        assert!(data.catalogs.is_empty());
460    }
461
462    #[test]
463    fn empty_input_returns_no_catalogs() {
464        let data = parse_pnpm_catalog_data("");
465        assert!(data.catalogs.is_empty());
466    }
467
468    #[test]
469    fn handles_object_form_entries() {
470        let yaml = "catalog:\n  react:\n    specifier: ^18.2.0\n  vue: ^3.4.0\n";
471        let data = parse_pnpm_catalog_data(yaml);
472        assert_eq!(data.catalogs[0].entries.len(), 2);
473        let names: Vec<_> = data.catalogs[0]
474            .entries
475            .iter()
476            .map(|e| e.package_name.as_str())
477            .collect();
478        assert!(names.contains(&"react"));
479        assert!(names.contains(&"vue"));
480    }
481
482    #[test]
483    fn skips_packages_section() {
484        let yaml = "packages:\n  - 'apps/*'\n  - 'libs/*'\ncatalog:\n  react: ^18\n";
485        let data = parse_pnpm_catalog_data(yaml);
486        assert_eq!(data.catalogs.len(), 1);
487        assert_eq!(data.catalogs[0].entries[0].line, 5);
488    }
489
490    #[test]
491    fn strip_inline_comment_preserves_quoted_hash() {
492        assert_eq!(strip_inline_comment("foo: \"a#b\" # tail"), "foo: \"a#b\"");
493        assert_eq!(strip_inline_comment("# top-level"), "");
494        assert_eq!(strip_inline_comment("plain: value"), "plain: value");
495    }
496
497    #[test]
498    fn parse_key_handles_simple_and_quoted() {
499        assert_eq!(parse_key("react: ^18"), Some("react".to_string()));
500        assert_eq!(
501            parse_key("\"@scope/lib\": ^1"),
502            Some("@scope/lib".to_string())
503        );
504        assert_eq!(parse_key("'pkg': ^2"), Some("pkg".to_string()));
505        assert_eq!(parse_key("- item"), None);
506        assert_eq!(parse_key(""), None);
507    }
508}