Skip to main content

schema_catalog/
compiled.rs

1use alloc::collections::BTreeMap;
2use alloc::format;
3use alloc::string::String;
4use alloc::vec::Vec;
5
6use glob_set::{Glob, GlobMap, GlobMapBuilder};
7
8/// Details about a catalog entry, stored for detailed match lookups.
9#[derive(Debug, Clone)]
10struct CatalogEntryInfo {
11    name: String,
12    description: Option<String>,
13    file_match: Vec<String>,
14}
15
16/// Information about how a schema was matched from a catalog.
17#[derive(Debug)]
18pub struct SchemaMatch<'a> {
19    /// The schema URL.
20    pub url: &'a str,
21    /// The specific glob pattern (or exact filename) that matched.
22    pub matched_pattern: &'a str,
23    /// All `fileMatch` globs from the catalog entry.
24    pub file_match: &'a [String],
25    /// Human-readable schema name from the catalog.
26    pub name: &'a str,
27    /// Description from the catalog entry, if present.
28    pub description: Option<&'a str>,
29}
30
31/// A glob entry stored in the `GlobMap`, carrying the schema URL and the original pattern.
32struct GlobEntry {
33    url: String,
34    pattern: String,
35}
36
37/// Compiled catalog for fast filename matching.
38///
39/// Uses a single [`GlobMap`] with an optimized `MatchEngine` that automatically
40/// dispatches to the fastest strategy per pattern (literal hash, extension hash,
41/// prefix/suffix tries, Aho-Corasick pre-filter).
42pub struct CompiledCatalog {
43    map: GlobMap<GlobEntry>,
44    url_to_entry: BTreeMap<String, CatalogEntryInfo>,
45}
46
47impl CompiledCatalog {
48    /// Compile a catalog into a matcher.
49    ///
50    /// Entries with no `fileMatch` patterns are skipped.
51    /// Negation patterns (starting with `!`) are skipped.
52    /// Patterns without `/` are prepended with `**/` so they match at any depth.
53    ///
54    /// # Panics
55    ///
56    /// Panics if an empty `GlobMap` cannot be constructed (should never happen).
57    pub fn compile(catalog: &crate::Catalog) -> Self {
58        let mut builder = GlobMapBuilder::new();
59        let mut url_to_entry: BTreeMap<String, CatalogEntryInfo> = BTreeMap::new();
60
61        for schema in &catalog.schemas {
62            let description = if schema.description.is_empty() {
63                None
64            } else {
65                Some(schema.description.clone())
66            };
67
68            url_to_entry
69                .entry(schema.url.clone())
70                .or_insert_with(|| CatalogEntryInfo {
71                    name: schema.name.clone(),
72                    description,
73                    file_match: schema.file_match.clone(),
74                });
75
76            for pattern in &schema.file_match {
77                if pattern.starts_with('!') {
78                    continue;
79                }
80
81                let normalized = if pattern.contains('/') {
82                    pattern.clone()
83                } else {
84                    format!("**/{pattern}")
85                };
86
87                if let Ok(glob) = Glob::new(&normalized) {
88                    builder.insert(
89                        glob,
90                        GlobEntry {
91                            url: schema.url.clone(),
92                            pattern: pattern.clone(),
93                        },
94                    );
95                }
96            }
97        }
98
99        Self {
100            map: builder
101                .build()
102                .unwrap_or_else(|_| GlobMapBuilder::new().build().expect("empty map builds")),
103            url_to_entry,
104        }
105    }
106
107    /// Find the schema URL for a given file path.
108    ///
109    /// `path` is the full path string, `file_name` is the basename.
110    /// Returns the first matching schema URL, or `None`.
111    pub fn find_schema(&self, path: &str, _file_name: &str) -> Option<&str> {
112        let path = path.strip_prefix("./").unwrap_or(path);
113        self.map.get(path).map(|e| e.url.as_str())
114    }
115
116    /// Find the schema for a given file path, returning detailed match info.
117    ///
118    /// Returns the URL, the matched pattern, all `fileMatch` globs, the schema
119    /// name, and the description from the catalog entry.
120    pub fn find_schema_detailed<'a>(
121        &'a self,
122        path: &str,
123        _file_name: &'a str,
124    ) -> Option<SchemaMatch<'a>> {
125        let path = path.strip_prefix("./").unwrap_or(path);
126        let entry = self.map.get(path)?;
127        let info = self.url_to_entry.get(&entry.url)?;
128        Some(SchemaMatch {
129            url: &entry.url,
130            matched_pattern: &entry.pattern,
131            file_match: &info.file_match,
132            name: &info.name,
133            description: info.description.as_deref(),
134        })
135    }
136
137    /// Look up the human-readable schema name for a given URL.
138    pub fn schema_name(&self, url: &str) -> Option<&str> {
139        self.url_to_entry.get(url).map(|e| e.name.as_str())
140    }
141}
142
143#[cfg(test)]
144mod tests {
145    extern crate alloc;
146
147    use alloc::collections::BTreeMap;
148    use alloc::vec;
149
150    use super::*;
151    use crate::{Catalog, SchemaEntry};
152
153    fn test_catalog() -> Catalog {
154        Catalog {
155            version: 1,
156            schemas: vec![
157                SchemaEntry {
158                    name: "tsconfig".into(),
159                    url: "https://json.schemastore.org/tsconfig.json".into(),
160                    description: String::new(),
161                    source_url: None,
162                    file_match: vec!["tsconfig.json".into(), "tsconfig.*.json".into()],
163                    versions: BTreeMap::new(),
164                },
165                SchemaEntry {
166                    name: "package.json".into(),
167                    url: "https://json.schemastore.org/package.json".into(),
168                    description: String::new(),
169                    source_url: None,
170                    file_match: vec!["package.json".into()],
171                    versions: BTreeMap::new(),
172                },
173                SchemaEntry {
174                    name: "no-match".into(),
175                    url: "https://example.com/no-match.json".into(),
176                    description: String::new(),
177                    source_url: None,
178                    file_match: vec![],
179                    versions: BTreeMap::new(),
180                },
181            ],
182            ..Catalog::default()
183        }
184    }
185
186    #[test]
187    fn compile_and_match_basename() {
188        let catalog = test_catalog();
189        let compiled = CompiledCatalog::compile(&catalog);
190
191        assert_eq!(
192            compiled.find_schema("tsconfig.json", "tsconfig.json"),
193            Some("https://json.schemastore.org/tsconfig.json")
194        );
195    }
196
197    #[test]
198    fn compile_and_match_with_path() {
199        let catalog = test_catalog();
200        let compiled = CompiledCatalog::compile(&catalog);
201
202        assert_eq!(
203            compiled.find_schema("project/tsconfig.json", "tsconfig.json"),
204            Some("https://json.schemastore.org/tsconfig.json")
205        );
206    }
207
208    #[test]
209    fn compile_and_match_glob_pattern() {
210        let catalog = test_catalog();
211        let compiled = CompiledCatalog::compile(&catalog);
212
213        assert_eq!(
214            compiled.find_schema("tsconfig.build.json", "tsconfig.build.json"),
215            Some("https://json.schemastore.org/tsconfig.json")
216        );
217    }
218
219    #[test]
220    fn no_match_returns_none() {
221        let catalog = test_catalog();
222        let compiled = CompiledCatalog::compile(&catalog);
223
224        assert!(
225            compiled
226                .find_schema("unknown.json", "unknown.json")
227                .is_none()
228        );
229    }
230
231    #[test]
232    fn empty_file_match_skipped() {
233        let catalog = test_catalog();
234        let compiled = CompiledCatalog::compile(&catalog);
235
236        assert!(
237            compiled
238                .find_schema("no-match.json", "no-match.json")
239                .is_none()
240        );
241    }
242
243    fn github_workflow_catalog() -> Catalog {
244        Catalog {
245            version: 1,
246            schemas: vec![SchemaEntry {
247                name: "GitHub Workflow".into(),
248                url: "https://www.schemastore.org/github-workflow.json".into(),
249                description: String::new(),
250                source_url: None,
251                file_match: vec![
252                    "**/.github/workflows/*.yml".into(),
253                    "**/.github/workflows/*.yaml".into(),
254                ],
255                versions: BTreeMap::new(),
256            }],
257            ..Catalog::default()
258        }
259    }
260
261    #[test]
262    fn github_workflow_matches_relative_path() {
263        let catalog = github_workflow_catalog();
264        let compiled = CompiledCatalog::compile(&catalog);
265
266        assert_eq!(
267            compiled.find_schema(".github/workflows/ci.yml", "ci.yml"),
268            Some("https://www.schemastore.org/github-workflow.json")
269        );
270    }
271
272    #[test]
273    fn github_workflow_matches_dot_slash_prefix() {
274        let catalog = github_workflow_catalog();
275        let compiled = CompiledCatalog::compile(&catalog);
276
277        assert_eq!(
278            compiled.find_schema("./.github/workflows/ci.yml", "ci.yml"),
279            Some("https://www.schemastore.org/github-workflow.json")
280        );
281    }
282
283    #[test]
284    fn github_workflow_matches_nested() {
285        let catalog = github_workflow_catalog();
286        let compiled = CompiledCatalog::compile(&catalog);
287
288        assert_eq!(
289            compiled.find_schema("myproject/.github/workflows/deploy.yaml", "deploy.yaml"),
290            Some("https://www.schemastore.org/github-workflow.json")
291        );
292    }
293
294    #[test]
295    fn empty_description_becomes_none() {
296        let catalog = Catalog {
297            version: 1,
298            schemas: vec![SchemaEntry {
299                name: "test".into(),
300                url: "https://example.com/test.json".into(),
301                description: String::new(),
302                source_url: None,
303                file_match: vec!["test.json".into()],
304                versions: BTreeMap::new(),
305            }],
306            ..Catalog::default()
307        };
308        let compiled = CompiledCatalog::compile(&catalog);
309        let m = compiled
310            .find_schema_detailed("test.json", "test.json")
311            .expect("should match");
312        assert!(m.description.is_none());
313    }
314
315    #[test]
316    fn non_empty_description_preserved() {
317        let catalog = Catalog {
318            version: 1,
319            schemas: vec![SchemaEntry {
320                name: "test".into(),
321                url: "https://example.com/test.json".into(),
322                description: "A test schema".into(),
323                source_url: None,
324                file_match: vec!["test.json".into()],
325                versions: BTreeMap::new(),
326            }],
327            ..Catalog::default()
328        };
329        let compiled = CompiledCatalog::compile(&catalog);
330        let m = compiled
331            .find_schema_detailed("test.json", "test.json")
332            .expect("should match");
333        assert_eq!(m.description, Some("A test schema"));
334    }
335}