Skip to main content

fallow_config/workspace/
pnpm_catalog.rs

1//! Parser for the `catalog:` and `catalogs:` sections of `pnpm-workspace.yaml`.
2//!
3//! pnpm supports two catalog forms:
4//! - the top-level `catalog:` map (the "default" catalog)
5//! - the top-level `catalogs:` map of named catalogs
6//!
7//! ```yaml
8//! catalog:
9//!   react: ^18.2.0
10//!   "@scope/lib": ^1.0.0
11//!
12//! catalogs:
13//!   react17:
14//!     react: ^17.0.2
15//!     react-dom: ^17.0.2
16//! ```
17//!
18//! Workspace packages reference catalog entries from their `dependencies`
19//! (and friends) with the `catalog:` protocol:
20//!
21//! ```json
22//! { "dependencies": { "react": "catalog:", "old-react": "catalog:react17" } }
23//! ```
24//!
25//! For the unused-catalog-entry detector we need both the structured catalog
26//! map and the 1-based line number of each entry in the source so findings
27//! can point users to the exact line. `serde_yaml_ng` gives us the structural
28//! parse; a second targeted scan over the raw source recovers the line
29//! numbers.
30
31/// Structured catalog data extracted from a `pnpm-workspace.yaml` file.
32#[derive(Debug, Clone, Default)]
33pub struct PnpmCatalogData {
34    /// Catalogs found in the file. The default catalog (top-level `catalog:`)
35    /// always appears first with `name = "default"` when present; named
36    /// catalogs follow in YAML source order.
37    pub catalogs: Vec<PnpmCatalog>,
38    /// Named catalogs under `catalogs:` that declare no package entries.
39    ///
40    /// The top-level `catalog:` map is intentionally not represented here:
41    /// some repos keep it as a stable hook even when currently empty.
42    pub empty_named_catalog_groups: Vec<PnpmCatalogGroup>,
43}
44
45/// A single catalog (the default or a named one).
46#[derive(Debug, Clone)]
47pub struct PnpmCatalog {
48    /// Catalog name. `"default"` for the top-level `catalog:` map, or the
49    /// named catalog key for entries declared under `catalogs.<name>:`.
50    pub name: String,
51    /// Entries declared in this catalog, in source order.
52    pub entries: Vec<PnpmCatalogEntry>,
53}
54
55/// A single entry inside a catalog.
56#[derive(Debug, Clone)]
57pub struct PnpmCatalogEntry {
58    /// Package name declared in the catalog (e.g. `"react"`, `"@scope/lib"`).
59    pub package_name: String,
60    /// 1-based line number of the entry within the source file.
61    pub line: u32,
62}
63
64/// A named catalog group under `catalogs:` with no package entries.
65#[derive(Debug, Clone)]
66pub struct PnpmCatalogGroup {
67    /// Catalog group name (e.g. `"react17"` for `catalogs.react17`).
68    pub name: String,
69    /// 1-based line number of the group header within the source file.
70    pub line: u32,
71}
72
73/// Parse the catalog sections of a `pnpm-workspace.yaml` file.
74///
75/// Returns an empty `PnpmCatalogData` when the file has no catalog data, when
76/// the YAML is malformed, or when the catalog sections are present but empty.
77/// All non-catalog top-level keys (`packages`, `catalog`, `catalogs`, etc.)
78/// are ignored.
79#[must_use]
80pub fn parse_pnpm_catalog_data(source: &str) -> PnpmCatalogData {
81    let value: serde_yaml_ng::Value = match serde_yaml_ng::from_str(source) {
82        Ok(v) => v,
83        Err(_) => return PnpmCatalogData::default(),
84    };
85    let Some(mapping) = value.as_mapping() else {
86        return PnpmCatalogData::default();
87    };
88
89    let line_index = build_line_index(source);
90    let mut catalogs = Vec::new();
91    let mut empty_named_catalog_groups = Vec::new();
92
93    if let Some(default_value) = mapping.get("catalog")
94        && let Some(default_map) = default_value.as_mapping()
95    {
96        let entries = collect_entries(default_map, &line_index, "default");
97        if !entries.is_empty() {
98            catalogs.push(PnpmCatalog {
99                name: "default".to_string(),
100                entries,
101            });
102        }
103    }
104
105    if let Some(named_value) = mapping.get("catalogs")
106        && let Some(named_map) = named_value.as_mapping()
107    {
108        for (name_value, catalog_value) in named_map {
109            let Some(name) = name_value.as_str() else {
110                continue;
111            };
112            if let Some(catalog_map) = catalog_value.as_mapping() {
113                let entries = collect_entries(catalog_map, &line_index, name);
114                if entries.is_empty() {
115                    if let Some(line) = line_index.group_line_for(name) {
116                        empty_named_catalog_groups.push(PnpmCatalogGroup {
117                            name: name.to_string(),
118                            line,
119                        });
120                    }
121                } else {
122                    catalogs.push(PnpmCatalog {
123                        name: name.to_string(),
124                        entries,
125                    });
126                }
127            } else if catalog_value.is_null()
128                && let Some(line) = line_index.group_line_for(name)
129            {
130                empty_named_catalog_groups.push(PnpmCatalogGroup {
131                    name: name.to_string(),
132                    line,
133                });
134            }
135        }
136    }
137
138    PnpmCatalogData {
139        catalogs,
140        empty_named_catalog_groups,
141    }
142}
143
144fn collect_entries(
145    mapping: &serde_yaml_ng::Mapping,
146    line_index: &CatalogLineIndex,
147    catalog_name: &str,
148) -> Vec<PnpmCatalogEntry> {
149    mapping
150        .iter()
151        .filter_map(|(k, _)| {
152            let pkg = k.as_str()?;
153            let line = line_index.line_for(catalog_name, pkg)?;
154            Some(PnpmCatalogEntry {
155                package_name: pkg.to_string(),
156                line,
157            })
158        })
159        .collect()
160}
161
162/// Maps `(catalog_name, package_name)` to its 1-based source line.
163///
164/// `catalog_name` is `"default"` for entries under the top-level `catalog:`
165/// key, or the named catalog key for entries under `catalogs.<name>:`.
166struct CatalogLineIndex {
167    entries: Vec<((String, String), u32)>,
168    groups: Vec<(String, u32)>,
169}
170
171impl CatalogLineIndex {
172    fn line_for(&self, catalog_name: &str, package_name: &str) -> Option<u32> {
173        self.entries
174            .iter()
175            .find(|((cat, pkg), _)| cat == catalog_name && pkg == package_name)
176            .map(|(_, line)| *line)
177    }
178
179    fn group_line_for(&self, catalog_name: &str) -> Option<u32> {
180        self.groups
181            .iter()
182            .find(|(name, _)| name == catalog_name)
183            .map(|(_, line)| *line)
184    }
185}
186
187/// Walk the raw YAML source to map each catalog entry to its 1-based line
188/// number. This is a small section-aware scanner: it tracks whether the
189/// current line falls inside `catalog:` (the default catalog) or inside
190/// `catalogs.<name>:` (a named catalog), and records each key at the
191/// expected indentation level.
192fn build_line_index(source: &str) -> CatalogLineIndex {
193    let mut entries = Vec::new();
194    let mut groups = Vec::new();
195    let mut section: Section = Section::None;
196    let mut named_catalog: Option<(String, usize)> = None;
197
198    for (idx, raw_line) in source.lines().enumerate() {
199        let line_no = u32::try_from(idx).unwrap_or(u32::MAX).saturating_add(1);
200        let trimmed = strip_inline_comment(raw_line);
201        let trimmed_left = trimmed.trim_start();
202        let indent = trimmed.len() - trimmed_left.len();
203
204        if trimmed_left.is_empty() {
205            continue;
206        }
207
208        if indent == 0 {
209            section = if trimmed_left.starts_with("catalogs:") {
210                Section::NamedCatalogs
211            } else if trimmed_left.starts_with("catalog:") {
212                Section::DefaultCatalog
213            } else {
214                Section::None
215            };
216            named_catalog = None;
217            continue;
218        }
219
220        match section {
221            Section::None => {}
222            Section::DefaultCatalog => {
223                if let Some(name) = parse_key(trimmed_left) {
224                    entries.push((("default".to_string(), name), line_no));
225                }
226            }
227            Section::NamedCatalogs => {
228                // Two indent levels are meaningful here:
229                // - level 1 (`  react17:`): a named catalog header
230                // - level 2 (`    react: ^17`): an entry inside the named catalog
231                if let Some(name) = parse_key(trimmed_left) {
232                    match &named_catalog {
233                        Some((_, existing_indent)) if indent > *existing_indent => {
234                            // Entry inside the active named catalog
235                            entries.push((
236                                (
237                                    named_catalog
238                                        .as_ref()
239                                        .map_or_else(String::new, |(n, _)| n.clone()),
240                                    name,
241                                ),
242                                line_no,
243                            ));
244                        }
245                        _ => {
246                            // New named catalog header (or first one seen)
247                            groups.push((name.clone(), line_no));
248                            named_catalog = Some((name, indent));
249                        }
250                    }
251                }
252            }
253        }
254    }
255
256    CatalogLineIndex { entries, groups }
257}
258
259#[derive(Debug, Clone, Copy)]
260enum Section {
261    None,
262    DefaultCatalog,
263    NamedCatalogs,
264}
265
266/// Strip an unquoted trailing `# ...` comment from a single line. Preserves
267/// `#` characters inside quoted strings so `"# in quotes": "value"` is left
268/// alone.
269pub(super) fn strip_inline_comment(line: &str) -> &str {
270    let bytes = line.as_bytes();
271    let mut in_single = false;
272    let mut in_double = false;
273    for (i, &b) in bytes.iter().enumerate() {
274        match b {
275            b'\'' if !in_double => in_single = !in_single,
276            b'"' if !in_single => in_double = !in_double,
277            b'#' if !in_single && !in_double => {
278                let head = &line[..i];
279                return head.trim_end();
280            }
281            _ => {}
282        }
283    }
284    line.trim_end()
285}
286
287/// Parse a key declaration of the form `key:` or `key: value`, returning just
288/// the (unquoted) key. Returns `None` when the line is not a key declaration
289/// (e.g., a list item `- foo`, a block scalar marker, or malformed).
290pub(super) fn parse_key(line: &str) -> Option<String> {
291    let bytes = line.as_bytes();
292    if bytes.is_empty() {
293        return None;
294    }
295    let first = bytes[0];
296    if first == b'-' || first == b'#' {
297        return None;
298    }
299
300    if first == b'"' || first == b'\'' {
301        // Quoted key: find the matching quote, then expect `:` after it.
302        let quote = first;
303        let mut i = 1;
304        while i < bytes.len() {
305            let b = bytes[i];
306            if b == b'\\' && i + 1 < bytes.len() {
307                i += 2;
308                continue;
309            }
310            if b == quote {
311                // Found closing quote
312                let key = &line[1..i];
313                let rest = &line[i + 1..];
314                let trimmed = rest.trim_start();
315                if trimmed.starts_with(':') {
316                    return Some(unescape_key(key));
317                }
318                return None;
319            }
320            i += 1;
321        }
322        return None;
323    }
324
325    let colon_pos = bytes.iter().position(|&b| b == b':')?;
326    let key = line[..colon_pos].trim();
327    if key.is_empty() {
328        return None;
329    }
330    // Disallow YAML flow / anchor / tag indicators in unquoted keys (we only
331    // care about simple `pkg: version` shapes in catalog maps).
332    if key.contains(['{', '[', '&', '*', '!']) {
333        return None;
334    }
335    Some(key.to_string())
336}
337
338fn unescape_key(raw: &str) -> String {
339    // Catalog package names rarely need full YAML unescaping; we just collapse
340    // the common `\"` and `\\` sequences so quoted scoped names match the
341    // serde_yaml_ng-parsed form exactly.
342    let mut out = String::with_capacity(raw.len());
343    let mut chars = raw.chars();
344    while let Some(c) = chars.next() {
345        if c == '\\'
346            && let Some(next) = chars.next()
347        {
348            match next {
349                'n' => out.push('\n'),
350                't' => out.push('\t'),
351                '"' => out.push('"'),
352                '\\' => out.push('\\'),
353                other => {
354                    out.push('\\');
355                    out.push(other);
356                }
357            }
358        } else {
359            out.push(c);
360        }
361    }
362    out
363}
364
365#[cfg(test)]
366mod tests {
367    use super::*;
368
369    #[test]
370    fn parses_default_catalog() {
371        let yaml = "packages:\n  - 'packages/*'\n\ncatalog:\n  react: ^18.2.0\n  is-even: ^1.0.0\n";
372        let data = parse_pnpm_catalog_data(yaml);
373        assert_eq!(data.catalogs.len(), 1);
374        let default = &data.catalogs[0];
375        assert_eq!(default.name, "default");
376        assert_eq!(default.entries.len(), 2);
377        assert_eq!(default.entries[0].package_name, "react");
378        assert_eq!(default.entries[0].line, 5);
379        assert_eq!(default.entries[1].package_name, "is-even");
380        assert_eq!(default.entries[1].line, 6);
381    }
382
383    #[test]
384    fn parses_named_catalogs() {
385        let yaml = "catalogs:\n  react17:\n    react: ^17.0.2\n    react-dom: ^17.0.2\n  ui:\n    headlessui: ^2.0.0\n";
386        let data = parse_pnpm_catalog_data(yaml);
387        assert_eq!(data.catalogs.len(), 2);
388        assert_eq!(data.catalogs[0].name, "react17");
389        assert_eq!(data.catalogs[0].entries.len(), 2);
390        assert_eq!(data.catalogs[0].entries[0].package_name, "react");
391        assert_eq!(data.catalogs[0].entries[0].line, 3);
392        assert_eq!(data.catalogs[1].name, "ui");
393        assert_eq!(data.catalogs[1].entries[0].package_name, "headlessui");
394        assert_eq!(data.catalogs[1].entries[0].line, 6);
395        assert!(data.empty_named_catalog_groups.is_empty());
396    }
397
398    #[test]
399    fn handles_default_and_named_together() {
400        let yaml = "catalog:\n  react: ^18\n\ncatalogs:\n  legacy:\n    react: ^17\n";
401        let data = parse_pnpm_catalog_data(yaml);
402        assert_eq!(data.catalogs.len(), 2);
403        assert_eq!(data.catalogs[0].name, "default");
404        assert_eq!(data.catalogs[0].entries[0].line, 2);
405        assert_eq!(data.catalogs[1].name, "legacy");
406        assert_eq!(data.catalogs[1].entries[0].line, 6);
407    }
408
409    #[test]
410    fn handles_quoted_keys() {
411        let yaml = "catalog:\n  \"@scope/lib\": ^1.0.0\n  'my-pkg': ^2.0.0\n";
412        let data = parse_pnpm_catalog_data(yaml);
413        let default = &data.catalogs[0];
414        assert_eq!(default.entries[0].package_name, "@scope/lib");
415        assert_eq!(default.entries[0].line, 2);
416        assert_eq!(default.entries[1].package_name, "my-pkg");
417        assert_eq!(default.entries[1].line, 3);
418    }
419
420    #[test]
421    fn handles_inline_comments() {
422        let yaml = "catalog:\n  react: ^18  # pin until #1234\n  is-even: ^1.0\n";
423        let data = parse_pnpm_catalog_data(yaml);
424        assert_eq!(data.catalogs[0].entries.len(), 2);
425        assert_eq!(data.catalogs[0].entries[0].package_name, "react");
426        assert_eq!(data.catalogs[0].entries[1].package_name, "is-even");
427        assert_eq!(data.catalogs[0].entries[1].line, 3);
428    }
429
430    #[test]
431    fn handles_four_space_indentation() {
432        let yaml = "catalog:\n    react: ^18.2.0\n    vue: ^3.4.0\n";
433        let data = parse_pnpm_catalog_data(yaml);
434        assert_eq!(data.catalogs[0].entries.len(), 2);
435        assert_eq!(data.catalogs[0].entries[0].line, 2);
436        assert_eq!(data.catalogs[0].entries[1].line, 3);
437    }
438
439    #[test]
440    fn empty_catalog_returns_no_catalogs() {
441        let yaml = "catalog: {}\n";
442        let data = parse_pnpm_catalog_data(yaml);
443        assert!(data.catalogs.is_empty());
444        assert!(data.empty_named_catalog_groups.is_empty());
445    }
446
447    #[test]
448    fn tracks_empty_named_catalog_groups() {
449        let yaml = "catalog:\n  react: ^18\n\ncatalogs:\n  react17: {}\n  legacy:\n    # retained note\n  vue3:\n    vue: ^3.4.0\n";
450        let data = parse_pnpm_catalog_data(yaml);
451        assert_eq!(data.catalogs.len(), 2);
452        let empty: Vec<_> = data
453            .empty_named_catalog_groups
454            .iter()
455            .map(|group| (group.name.as_str(), group.line))
456            .collect();
457        assert_eq!(empty, vec![("react17", 5), ("legacy", 6)]);
458    }
459
460    #[test]
461    fn no_catalog_keys_returns_no_catalogs() {
462        let yaml = "packages:\n  - 'packages/*'\n";
463        let data = parse_pnpm_catalog_data(yaml);
464        assert!(data.catalogs.is_empty());
465    }
466
467    #[test]
468    fn malformed_yaml_returns_no_catalogs() {
469        let yaml = "{this is\nnot: valid: yaml: at: all";
470        let data = parse_pnpm_catalog_data(yaml);
471        assert!(data.catalogs.is_empty());
472    }
473
474    #[test]
475    fn empty_input_returns_no_catalogs() {
476        let data = parse_pnpm_catalog_data("");
477        assert!(data.catalogs.is_empty());
478    }
479
480    #[test]
481    fn handles_object_form_entries() {
482        // pnpm 9.4+ supports object form for entries with specifier + extras
483        let yaml = "catalog:\n  react:\n    specifier: ^18.2.0\n  vue: ^3.4.0\n";
484        let data = parse_pnpm_catalog_data(yaml);
485        assert_eq!(data.catalogs[0].entries.len(), 2);
486        let names: Vec<_> = data.catalogs[0]
487            .entries
488            .iter()
489            .map(|e| e.package_name.as_str())
490            .collect();
491        assert!(names.contains(&"react"));
492        assert!(names.contains(&"vue"));
493    }
494
495    #[test]
496    fn skips_packages_section() {
497        let yaml = "packages:\n  - 'apps/*'\n  - 'libs/*'\ncatalog:\n  react: ^18\n";
498        let data = parse_pnpm_catalog_data(yaml);
499        assert_eq!(data.catalogs.len(), 1);
500        assert_eq!(data.catalogs[0].entries[0].line, 5);
501    }
502
503    #[test]
504    fn strip_inline_comment_preserves_quoted_hash() {
505        assert_eq!(strip_inline_comment("foo: \"a#b\" # tail"), "foo: \"a#b\"");
506        assert_eq!(strip_inline_comment("# top-level"), "");
507        assert_eq!(strip_inline_comment("plain: value"), "plain: value");
508    }
509
510    #[test]
511    fn parse_key_handles_simple_and_quoted() {
512        assert_eq!(parse_key("react: ^18"), Some("react".to_string()));
513        assert_eq!(
514            parse_key("\"@scope/lib\": ^1"),
515            Some("@scope/lib".to_string())
516        );
517        assert_eq!(parse_key("'pkg': ^2"), Some("pkg".to_string()));
518        assert_eq!(parse_key("- item"), None);
519        assert_eq!(parse_key(""), None);
520    }
521}