Skip to main content

oo_ide/schema/
mod.rs

1//! Schema provider system for structured file validation and completion.
2//!
3//! # Overview
4//!
5//! A [`SchemaProvider`] maps file paths to [`SchemaContent`] (JSON Schema
6//! strings).  Multiple providers live in a [`SchemaRegistry`]; the first
7//! matching provider wins when resolving a file.
8//!
9//! Extensions may declare packaged schemas in `extension.yaml` and include schema files inside the `.cyix` archive; the manager registers those schemas when the extension is enabled.  Built-in providers can be registered directly via [`SchemaRegistry::register`].
10//!
11//! # File matching
12//!
13//! Each provider describes the files it covers via [`FilePattern`]:
14//!
15//! | Pattern                       | Kind     | Matches                            |
16//! |-------------------------------|----------|------------------------------------|
17//! | `"Cargo.toml"`                | Filename | Any file named `Cargo.toml`        |
18//! | `"*.yaml"`                    | Glob     | Any YAML file (by name)            |
19//! | `"**/*.json"`                 | Glob     | Any JSON file anywhere             |
20//! | `".github/workflows/*.yml"`   | Glob     | GitHub Actions workflow files      |
21
22pub mod registry;
23pub mod deep_completion;
24pub mod context;
25pub mod format;
26pub use deep_completion::{completions_from_schema, completions_from_parsed_schema};
27
28pub use registry::SchemaRegistry;
29
30use std::path::Path;
31
32// ---------------------------------------------------------------------------
33// Core types
34// ---------------------------------------------------------------------------
35
36/// Content of a resolved schema.
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub struct SchemaContent {
39    /// Stable identifier (e.g. `"cargo-toml"`, `"github-actions"`).
40    pub id: String,
41    /// Human-readable name shown in the UI.
42    pub name: String,
43    /// The schema encoded as a JSON string (JSON Schema draft-07 or later).
44    pub schema_json: String,
45}
46
47/// A file-matching pattern used by schema providers to declare which files
48/// they cover.
49#[derive(Debug, Clone, PartialEq, Eq)]
50pub enum FilePattern {
51    /// Matches files whose **filename** (not full path) equals this string
52    /// exactly.
53    ///
54    /// Example: `FilePattern::Filename("Cargo.toml".into())` matches
55    /// `/project/Cargo.toml` and `./Cargo.toml`.
56    Filename(String),
57
58    /// Matches files using a glob pattern.
59    ///
60    /// Supports:
61    /// - `*` — any characters within a single path segment (no `/`).
62    /// - `**` — zero or more path segments including separators.
63    ///
64    /// Matching is tried against the full path **and** each path suffix, so
65    /// `"*.yaml"` matches `/project/config.yaml` and `".github/workflows/*.yml"`
66    /// matches `/project/.github/workflows/ci.yml`.
67    Glob(String),
68}
69
70impl FilePattern {
71    /// Returns `true` if `path` matches this pattern.
72    pub fn matches(&self, path: &Path) -> bool {
73        match self {
74            FilePattern::Filename(name) => path
75                .file_name()
76                .and_then(|n| n.to_str())
77                .is_some_and(|n| n == name),
78
79            FilePattern::Glob(glob) => {
80                // Normalise path separators so the matcher is consistent on
81                // all platforms.
82                let path_str = path.to_string_lossy();
83                let normalised = path_str.replace('\\', "/");
84                let pat = glob.as_bytes();
85
86                // Try the full normalised path first.
87                if glob_match(pat, normalised.as_bytes()) {
88                    return true;
89                }
90                // Then try each suffix after a `/` separator, so a pattern
91                // like `"*.yaml"` matches `/project/config.yaml` (suffix
92                // `"config.yaml"`) and `".github/workflows/*.yml"` matches
93                // `/project/.github/workflows/ci.yml` (suffix
94                // `".github/workflows/ci.yml"`).
95                for (i, _) in normalised.match_indices('/') {
96                    if glob_match(pat, &normalised.as_bytes()[i + 1..]) {
97                        return true;
98                    }
99                }
100                false
101            }
102        }
103    }
104}
105
106// ---------------------------------------------------------------------------
107// SchemaProvider trait
108// ---------------------------------------------------------------------------
109
110/// A type that can provide a JSON Schema for a given file path.
111///
112/// Implement this trait for built-in providers and register them via
113/// [`SchemaRegistry::register`].  Extensions use the `RegisterSchema`
114/// host-request to register schemas dynamically at runtime.
115pub trait SchemaProvider: Send + Sync {
116    /// Stable identifier for this provider (e.g. `"cargo-toml"`).
117    fn id(&self) -> &str;
118
119    /// Human-readable name (e.g. `"Cargo.toml schema"`).
120    fn name(&self) -> &str;
121
122    /// Returns the [`SchemaContent`] for `path` if this provider covers it,
123    /// or `None` otherwise.
124    fn schema_for(&self, path: &Path) -> Option<SchemaContent>;
125}
126
127// ---------------------------------------------------------------------------
128// Minimal glob matcher
129// ---------------------------------------------------------------------------
130
131/// Recursively matches `pat` against `text` using shell-style globs.
132///
133/// Rules:
134/// - `**` matches zero or more path segments (including `/`).
135/// - `*` matches zero or more characters within a single segment (no `/`).
136/// - All other bytes must match literally.
137fn glob_match(pat: &[u8], text: &[u8]) -> bool {
138    match pat {
139        // Empty pattern matches only empty text.
140        [] => text.is_empty(),
141
142        // `**` — consume zero or more complete path segments.
143        [b'*', b'*', rest @ ..] => {
144            // Strip an optional leading separator from the rest-of-pattern.
145            let rest = rest.strip_prefix(b"/").unwrap_or(rest);
146            // `**` with no remaining pattern matches any text unconditionally.
147            if rest.is_empty() {
148                return true;
149            }
150            // Try matching `rest` from the current position (zero segments),
151            // then again after every `/` in `text` (one or more segments).
152            if glob_match(rest, text) {
153                return true;
154            }
155            for i in 0..text.len() {
156                if text[i] == b'/' && glob_match(rest, &text[i + 1..]) {
157                    return true;
158                }
159            }
160            false
161        }
162
163        // `*` — match any run of non-separator characters.
164        [b'*', rest @ ..] => {
165            for i in 0..=text.len() {
166                // Stop before a path separator: `*` does not cross `/`.
167                if i < text.len() && text[i] == b'/' {
168                    break;
169                }
170                if glob_match(rest, &text[i..]) {
171                    return true;
172                }
173            }
174            false
175        }
176
177        // Literal byte — must match the head of `text`.
178        [p, rest_p @ ..] => match text {
179            [t, rest_t @ ..] if p == t => glob_match(rest_p, rest_t),
180            _ => false,
181        },
182    }
183}
184
185// ---------------------------------------------------------------------------
186// Tests
187// ---------------------------------------------------------------------------
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192    use std::path::PathBuf;
193
194    fn p(s: &str) -> PathBuf {
195        PathBuf::from(s)
196    }
197
198    // --- FilePattern::Filename ---
199
200    #[test]
201    fn filename_matches_exact_name() {
202        let pat = FilePattern::Filename("Cargo.toml".into());
203        assert!(pat.matches(&p("Cargo.toml")));
204        assert!(pat.matches(&p("/project/Cargo.toml")));
205        assert!(pat.matches(&p("nested/dir/Cargo.toml")));
206    }
207
208    #[test]
209    fn filename_case_sensitive() {
210        let pat = FilePattern::Filename("Cargo.toml".into());
211        assert!(!pat.matches(&p("cargo.toml")));
212        assert!(!pat.matches(&p("CARGO.TOML")));
213    }
214
215    #[test]
216    fn filename_no_match_different_file() {
217        let pat = FilePattern::Filename("Cargo.toml".into());
218        assert!(!pat.matches(&p("/project/package.json")));
219    }
220
221    // --- FilePattern::Glob ---
222
223    #[test]
224    fn glob_star_matches_extension() {
225        let pat = FilePattern::Glob("*.yaml".into());
226        assert!(pat.matches(&p("config.yaml")));
227        assert!(pat.matches(&p("/project/config.yaml")));
228        assert!(pat.matches(&p("nested/deep/config.yaml")));
229    }
230
231    #[test]
232    fn glob_star_does_not_match_different_extension() {
233        let pat = FilePattern::Glob("*.yaml".into());
234        assert!(!pat.matches(&p("config.json")));
235        assert!(!pat.matches(&p("config.yaml.bak")));
236    }
237
238    #[test]
239    fn glob_double_star_matches_any_depth() {
240        let pat = FilePattern::Glob("**/*.json".into());
241        assert!(pat.matches(&p("top.json")));
242        assert!(pat.matches(&p("/project/a/b/c.json")));
243        assert!(!pat.matches(&p("/project/a/b/c.yaml")));
244    }
245
246    #[test]
247    fn glob_path_pattern_workflows() {
248        let pat = FilePattern::Glob(".github/workflows/*.yml".into());
249        assert!(pat.matches(&p(".github/workflows/ci.yml")));
250        assert!(pat.matches(&p("/project/.github/workflows/release.yml")));
251        // `*` must not cross a separator inside the segment.
252        assert!(!pat.matches(&p(".github/workflows/sub/ci.yml")));
253    }
254
255    #[test]
256    fn glob_literal_filename() {
257        let pat = FilePattern::Glob("pyproject.toml".into());
258        assert!(pat.matches(&p("pyproject.toml")));
259        assert!(pat.matches(&p("/project/pyproject.toml")));
260        assert!(!pat.matches(&p("/project/other.toml")));
261    }
262
263    // --- glob_match (unit tests for the internal function) ---
264
265    #[test]
266    fn glob_match_empty_matches_empty() {
267        assert!(glob_match(b"", b""));
268        assert!(!glob_match(b"", b"x"));
269    }
270
271    #[test]
272    fn glob_match_literal() {
273        assert!(glob_match(b"hello", b"hello"));
274        assert!(!glob_match(b"hello", b"world"));
275        assert!(!glob_match(b"hello", b"hell"));
276    }
277
278    #[test]
279    fn glob_match_star_within_segment() {
280        assert!(glob_match(b"*.rs", b"main.rs"));
281        assert!(glob_match(b"*.rs", b".rs")); // zero chars before ext
282        // `*` must not cross path separators.
283        assert!(!glob_match(b"*.rs", b"src/main.rs"));
284    }
285
286    #[test]
287    fn glob_match_double_star() {
288        assert!(glob_match(b"**/*.rs", b"src/main.rs"));
289        assert!(glob_match(b"**/*.rs", b"a/b/c/main.rs"));
290        assert!(glob_match(b"**/*.rs", b"main.rs")); // zero segments
291        assert!(!glob_match(b"**/*.rs", b"main.py"));
292    }
293
294    #[test]
295    fn glob_match_double_star_only() {
296        assert!(glob_match(b"**", b""));
297        assert!(glob_match(b"**", b"a/b/c"));
298        assert!(glob_match(b"**", b"anything"));
299    }
300}