Skip to main content

fallow_config/workspace/
pnpm_catalog.rs

1//! Parser for the `catalog:` and `catalogs:` sections of `pnpm-workspace.yaml`.
2//!
3//! pnpm supports two catalog forms:
4//! - the top-level `catalog:` map (the "default" catalog)
5//! - the top-level `catalogs:` map of named catalogs
6//!
7//! ```yaml
8//! catalog:
9//!   react: ^18.2.0
10//!   "@scope/lib": ^1.0.0
11//!
12//! catalogs:
13//!   react17:
14//!     react: ^17.0.2
15//!     react-dom: ^17.0.2
16//! ```
17//!
18//! Workspace packages reference catalog entries from their `dependencies`
19//! (and friends) with the `catalog:` protocol:
20//!
21//! ```json
22//! { "dependencies": { "react": "catalog:", "old-react": "catalog:react17" } }
23//! ```
24//!
25//! For the unused-catalog-entry detector we need both the structured catalog
26//! map and the 1-based line number of each entry in the source so findings
27//! can point users to the exact line. `serde_yaml_ng` gives us the structural
28//! parse; a second targeted scan over the raw source recovers the line
29//! numbers.
30
31/// Structured catalog data extracted from a `pnpm-workspace.yaml` file.
32#[derive(Debug, Clone, Default)]
33pub struct PnpmCatalogData {
34    /// Catalogs found in the file. The default catalog (top-level `catalog:`)
35    /// always appears first with `name = "default"` when present; named
36    /// catalogs follow in YAML source order.
37    pub catalogs: Vec<PnpmCatalog>,
38}
39
40/// A single catalog (the default or a named one).
41#[derive(Debug, Clone)]
42pub struct PnpmCatalog {
43    /// Catalog name. `"default"` for the top-level `catalog:` map, or the
44    /// named catalog key for entries declared under `catalogs.<name>:`.
45    pub name: String,
46    /// Entries declared in this catalog, in source order.
47    pub entries: Vec<PnpmCatalogEntry>,
48}
49
50/// A single entry inside a catalog.
51#[derive(Debug, Clone)]
52pub struct PnpmCatalogEntry {
53    /// Package name declared in the catalog (e.g. `"react"`, `"@scope/lib"`).
54    pub package_name: String,
55    /// 1-based line number of the entry within the source file.
56    pub line: u32,
57}
58
59/// Parse the catalog sections of a `pnpm-workspace.yaml` file.
60///
61/// Returns an empty `PnpmCatalogData` when the file has no catalog data, when
62/// the YAML is malformed, or when the catalog sections are present but empty.
63/// All non-catalog top-level keys (`packages`, `catalog`, `catalogs`, etc.)
64/// are ignored.
65#[must_use]
66pub fn parse_pnpm_catalog_data(source: &str) -> PnpmCatalogData {
67    let value: serde_yaml_ng::Value = match serde_yaml_ng::from_str(source) {
68        Ok(v) => v,
69        Err(_) => return PnpmCatalogData::default(),
70    };
71    let Some(mapping) = value.as_mapping() else {
72        return PnpmCatalogData::default();
73    };
74
75    let line_index = build_line_index(source);
76    let mut catalogs = Vec::new();
77
78    if let Some(default_value) = mapping.get("catalog")
79        && let Some(default_map) = default_value.as_mapping()
80    {
81        let entries = collect_entries(default_map, &line_index, "default");
82        if !entries.is_empty() {
83            catalogs.push(PnpmCatalog {
84                name: "default".to_string(),
85                entries,
86            });
87        }
88    }
89
90    if let Some(named_value) = mapping.get("catalogs")
91        && let Some(named_map) = named_value.as_mapping()
92    {
93        for (name_value, catalog_value) in named_map {
94            let Some(name) = name_value.as_str() else {
95                continue;
96            };
97            let Some(catalog_map) = catalog_value.as_mapping() else {
98                continue;
99            };
100            let entries = collect_entries(catalog_map, &line_index, name);
101            if !entries.is_empty() {
102                catalogs.push(PnpmCatalog {
103                    name: name.to_string(),
104                    entries,
105                });
106            }
107        }
108    }
109
110    PnpmCatalogData { catalogs }
111}
112
113fn collect_entries(
114    mapping: &serde_yaml_ng::Mapping,
115    line_index: &CatalogLineIndex,
116    catalog_name: &str,
117) -> Vec<PnpmCatalogEntry> {
118    mapping
119        .iter()
120        .filter_map(|(k, _)| {
121            let pkg = k.as_str()?;
122            let line = line_index.line_for(catalog_name, pkg)?;
123            Some(PnpmCatalogEntry {
124                package_name: pkg.to_string(),
125                line,
126            })
127        })
128        .collect()
129}
130
131/// Maps `(catalog_name, package_name)` to its 1-based source line.
132///
133/// `catalog_name` is `"default"` for entries under the top-level `catalog:`
134/// key, or the named catalog key for entries under `catalogs.<name>:`.
135struct CatalogLineIndex {
136    entries: Vec<((String, String), u32)>,
137}
138
139impl CatalogLineIndex {
140    fn line_for(&self, catalog_name: &str, package_name: &str) -> Option<u32> {
141        self.entries
142            .iter()
143            .find(|((cat, pkg), _)| cat == catalog_name && pkg == package_name)
144            .map(|(_, line)| *line)
145    }
146}
147
148/// Walk the raw YAML source to map each catalog entry to its 1-based line
149/// number. This is a small section-aware scanner: it tracks whether the
150/// current line falls inside `catalog:` (the default catalog) or inside
151/// `catalogs.<name>:` (a named catalog), and records each key at the
152/// expected indentation level.
153fn build_line_index(source: &str) -> CatalogLineIndex {
154    let mut entries = Vec::new();
155    let mut section: Section = Section::None;
156    let mut named_catalog: Option<(String, usize)> = None;
157
158    for (idx, raw_line) in source.lines().enumerate() {
159        let line_no = u32::try_from(idx).unwrap_or(u32::MAX).saturating_add(1);
160        let trimmed = strip_inline_comment(raw_line);
161        let trimmed_left = trimmed.trim_start();
162        let indent = trimmed.len() - trimmed_left.len();
163
164        if trimmed_left.is_empty() {
165            continue;
166        }
167
168        if indent == 0 {
169            section = if trimmed_left.starts_with("catalogs:") {
170                Section::NamedCatalogs
171            } else if trimmed_left.starts_with("catalog:") {
172                Section::DefaultCatalog
173            } else {
174                Section::None
175            };
176            named_catalog = None;
177            continue;
178        }
179
180        match section {
181            Section::None => {}
182            Section::DefaultCatalog => {
183                if let Some(name) = parse_key(trimmed_left) {
184                    entries.push((("default".to_string(), name), line_no));
185                }
186            }
187            Section::NamedCatalogs => {
188                // Two indent levels are meaningful here:
189                // - level 1 (`  react17:`): a named catalog header
190                // - level 2 (`    react: ^17`): an entry inside the named catalog
191                if let Some(name) = parse_key(trimmed_left) {
192                    match &named_catalog {
193                        Some((_, existing_indent)) if indent > *existing_indent => {
194                            // Entry inside the active named catalog
195                            entries.push((
196                                (
197                                    named_catalog
198                                        .as_ref()
199                                        .map_or_else(String::new, |(n, _)| n.clone()),
200                                    name,
201                                ),
202                                line_no,
203                            ));
204                        }
205                        _ => {
206                            // New named catalog header (or first one seen)
207                            named_catalog = Some((name, indent));
208                        }
209                    }
210                }
211            }
212        }
213    }
214
215    CatalogLineIndex { entries }
216}
217
218#[derive(Debug, Clone, Copy)]
219enum Section {
220    None,
221    DefaultCatalog,
222    NamedCatalogs,
223}
224
225/// Strip an unquoted trailing `# ...` comment from a single line. Preserves
226/// `#` characters inside quoted strings so `"# in quotes": "value"` is left
227/// alone.
228fn strip_inline_comment(line: &str) -> &str {
229    let bytes = line.as_bytes();
230    let mut in_single = false;
231    let mut in_double = false;
232    for (i, &b) in bytes.iter().enumerate() {
233        match b {
234            b'\'' if !in_double => in_single = !in_single,
235            b'"' if !in_single => in_double = !in_double,
236            b'#' if !in_single && !in_double => {
237                let head = &line[..i];
238                return head.trim_end();
239            }
240            _ => {}
241        }
242    }
243    line.trim_end()
244}
245
246/// Parse a key declaration of the form `key:` or `key: value`, returning just
247/// the (unquoted) key. Returns `None` when the line is not a key declaration
248/// (e.g., a list item `- foo`, a block scalar marker, or malformed).
249fn parse_key(line: &str) -> Option<String> {
250    let bytes = line.as_bytes();
251    if bytes.is_empty() {
252        return None;
253    }
254    let first = bytes[0];
255    if first == b'-' || first == b'#' {
256        return None;
257    }
258
259    if first == b'"' || first == b'\'' {
260        // Quoted key: find the matching quote, then expect `:` after it.
261        let quote = first;
262        let mut i = 1;
263        while i < bytes.len() {
264            let b = bytes[i];
265            if b == b'\\' && i + 1 < bytes.len() {
266                i += 2;
267                continue;
268            }
269            if b == quote {
270                // Found closing quote
271                let key = &line[1..i];
272                let rest = &line[i + 1..];
273                let trimmed = rest.trim_start();
274                if trimmed.starts_with(':') {
275                    return Some(unescape_key(key));
276                }
277                return None;
278            }
279            i += 1;
280        }
281        return None;
282    }
283
284    let colon_pos = bytes.iter().position(|&b| b == b':')?;
285    let key = line[..colon_pos].trim();
286    if key.is_empty() {
287        return None;
288    }
289    // Disallow YAML flow / anchor / tag indicators in unquoted keys (we only
290    // care about simple `pkg: version` shapes in catalog maps).
291    if key.contains(['{', '[', '&', '*', '!']) {
292        return None;
293    }
294    Some(key.to_string())
295}
296
297fn unescape_key(raw: &str) -> String {
298    // Catalog package names rarely need full YAML unescaping; we just collapse
299    // the common `\"` and `\\` sequences so quoted scoped names match the
300    // serde_yaml_ng-parsed form exactly.
301    let mut out = String::with_capacity(raw.len());
302    let mut chars = raw.chars();
303    while let Some(c) = chars.next() {
304        if c == '\\'
305            && let Some(next) = chars.next()
306        {
307            match next {
308                'n' => out.push('\n'),
309                't' => out.push('\t'),
310                '"' => out.push('"'),
311                '\\' => out.push('\\'),
312                other => {
313                    out.push('\\');
314                    out.push(other);
315                }
316            }
317        } else {
318            out.push(c);
319        }
320    }
321    out
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327
328    #[test]
329    fn parses_default_catalog() {
330        let yaml = "packages:\n  - 'packages/*'\n\ncatalog:\n  react: ^18.2.0\n  is-even: ^1.0.0\n";
331        let data = parse_pnpm_catalog_data(yaml);
332        assert_eq!(data.catalogs.len(), 1);
333        let default = &data.catalogs[0];
334        assert_eq!(default.name, "default");
335        assert_eq!(default.entries.len(), 2);
336        assert_eq!(default.entries[0].package_name, "react");
337        assert_eq!(default.entries[0].line, 5);
338        assert_eq!(default.entries[1].package_name, "is-even");
339        assert_eq!(default.entries[1].line, 6);
340    }
341
342    #[test]
343    fn parses_named_catalogs() {
344        let yaml = "catalogs:\n  react17:\n    react: ^17.0.2\n    react-dom: ^17.0.2\n  ui:\n    headlessui: ^2.0.0\n";
345        let data = parse_pnpm_catalog_data(yaml);
346        assert_eq!(data.catalogs.len(), 2);
347        assert_eq!(data.catalogs[0].name, "react17");
348        assert_eq!(data.catalogs[0].entries.len(), 2);
349        assert_eq!(data.catalogs[0].entries[0].package_name, "react");
350        assert_eq!(data.catalogs[0].entries[0].line, 3);
351        assert_eq!(data.catalogs[1].name, "ui");
352        assert_eq!(data.catalogs[1].entries[0].package_name, "headlessui");
353        assert_eq!(data.catalogs[1].entries[0].line, 6);
354    }
355
356    #[test]
357    fn handles_default_and_named_together() {
358        let yaml = "catalog:\n  react: ^18\n\ncatalogs:\n  legacy:\n    react: ^17\n";
359        let data = parse_pnpm_catalog_data(yaml);
360        assert_eq!(data.catalogs.len(), 2);
361        assert_eq!(data.catalogs[0].name, "default");
362        assert_eq!(data.catalogs[0].entries[0].line, 2);
363        assert_eq!(data.catalogs[1].name, "legacy");
364        assert_eq!(data.catalogs[1].entries[0].line, 6);
365    }
366
367    #[test]
368    fn handles_quoted_keys() {
369        let yaml = "catalog:\n  \"@scope/lib\": ^1.0.0\n  'my-pkg': ^2.0.0\n";
370        let data = parse_pnpm_catalog_data(yaml);
371        let default = &data.catalogs[0];
372        assert_eq!(default.entries[0].package_name, "@scope/lib");
373        assert_eq!(default.entries[0].line, 2);
374        assert_eq!(default.entries[1].package_name, "my-pkg");
375        assert_eq!(default.entries[1].line, 3);
376    }
377
378    #[test]
379    fn handles_inline_comments() {
380        let yaml = "catalog:\n  react: ^18  # pin until #1234\n  is-even: ^1.0\n";
381        let data = parse_pnpm_catalog_data(yaml);
382        assert_eq!(data.catalogs[0].entries.len(), 2);
383        assert_eq!(data.catalogs[0].entries[0].package_name, "react");
384        assert_eq!(data.catalogs[0].entries[1].package_name, "is-even");
385        assert_eq!(data.catalogs[0].entries[1].line, 3);
386    }
387
388    #[test]
389    fn handles_four_space_indentation() {
390        let yaml = "catalog:\n    react: ^18.2.0\n    vue: ^3.4.0\n";
391        let data = parse_pnpm_catalog_data(yaml);
392        assert_eq!(data.catalogs[0].entries.len(), 2);
393        assert_eq!(data.catalogs[0].entries[0].line, 2);
394        assert_eq!(data.catalogs[0].entries[1].line, 3);
395    }
396
397    #[test]
398    fn empty_catalog_returns_no_catalogs() {
399        let yaml = "catalog: {}\n";
400        let data = parse_pnpm_catalog_data(yaml);
401        assert!(data.catalogs.is_empty());
402    }
403
404    #[test]
405    fn no_catalog_keys_returns_no_catalogs() {
406        let yaml = "packages:\n  - 'packages/*'\n";
407        let data = parse_pnpm_catalog_data(yaml);
408        assert!(data.catalogs.is_empty());
409    }
410
411    #[test]
412    fn malformed_yaml_returns_no_catalogs() {
413        let yaml = "{this is\nnot: valid: yaml: at: all";
414        let data = parse_pnpm_catalog_data(yaml);
415        assert!(data.catalogs.is_empty());
416    }
417
418    #[test]
419    fn empty_input_returns_no_catalogs() {
420        let data = parse_pnpm_catalog_data("");
421        assert!(data.catalogs.is_empty());
422    }
423
424    #[test]
425    fn handles_object_form_entries() {
426        // pnpm 9.4+ supports object form for entries with specifier + extras
427        let yaml = "catalog:\n  react:\n    specifier: ^18.2.0\n  vue: ^3.4.0\n";
428        let data = parse_pnpm_catalog_data(yaml);
429        assert_eq!(data.catalogs[0].entries.len(), 2);
430        let names: Vec<_> = data.catalogs[0]
431            .entries
432            .iter()
433            .map(|e| e.package_name.as_str())
434            .collect();
435        assert!(names.contains(&"react"));
436        assert!(names.contains(&"vue"));
437    }
438
439    #[test]
440    fn skips_packages_section() {
441        let yaml = "packages:\n  - 'apps/*'\n  - 'libs/*'\ncatalog:\n  react: ^18\n";
442        let data = parse_pnpm_catalog_data(yaml);
443        assert_eq!(data.catalogs.len(), 1);
444        assert_eq!(data.catalogs[0].entries[0].line, 5);
445    }
446
447    #[test]
448    fn strip_inline_comment_preserves_quoted_hash() {
449        assert_eq!(strip_inline_comment("foo: \"a#b\" # tail"), "foo: \"a#b\"");
450        assert_eq!(strip_inline_comment("# top-level"), "");
451        assert_eq!(strip_inline_comment("plain: value"), "plain: value");
452    }
453
454    #[test]
455    fn parse_key_handles_simple_and_quoted() {
456        assert_eq!(parse_key("react: ^18"), Some("react".to_string()));
457        assert_eq!(
458            parse_key("\"@scope/lib\": ^1"),
459            Some("@scope/lib".to_string())
460        );
461        assert_eq!(parse_key("'pkg': ^2"), Some("pkg".to_string()));
462        assert_eq!(parse_key("- item"), None);
463        assert_eq!(parse_key(""), None);
464    }
465}