Skip to main content

ucp_schema/
loader.rs

1//! Schema loading from various sources.
2//!
3//! Handles loading schemas from files, strings, and HTTP URLs.
4
5use std::path::Path;
6
7use serde_json::Value;
8
9use crate::error::ResolveError;
10
11#[cfg(feature = "remote")]
12use std::time::Duration;
13
14/// Default timeout for HTTP requests (10 seconds).
15#[cfg(feature = "remote")]
16const HTTP_TIMEOUT: Duration = Duration::from_secs(10);
17
18/// Load a schema from a file path.
19///
20/// # Errors
21///
22/// Returns `ResolveError::FileNotFound` if the file doesn't exist,
23/// or `ResolveError::InvalidJson` if the file isn't valid JSON.
24pub fn load_schema(path: &Path) -> Result<Value, ResolveError> {
25    if !path.exists() {
26        return Err(ResolveError::FileNotFound {
27            path: path.to_path_buf(),
28        });
29    }
30
31    let content = std::fs::read_to_string(path).map_err(|source| ResolveError::ReadError {
32        path: path.to_path_buf(),
33        source,
34    })?;
35
36    serde_json::from_str(&content).map_err(|source| ResolveError::InvalidJson { source })
37}
38
39/// Load a schema from a JSON string.
40///
41/// # Errors
42///
43/// Returns `ResolveError::InvalidJson` if the string isn't valid JSON.
44pub fn load_schema_str(content: &str) -> Result<Value, ResolveError> {
45    serde_json::from_str(content).map_err(|source| ResolveError::InvalidJson { source })
46}
47
48/// Load a schema from an HTTP/HTTPS URL.
49///
50/// Requires the `remote` feature (enabled by default).
51///
52/// # Errors
53///
54/// Returns `ResolveError::NetworkError` if the request fails,
55/// or `ResolveError::InvalidJson` if the response isn't valid JSON.
56#[cfg(feature = "remote")]
57pub fn load_schema_url(url: &str) -> Result<Value, ResolveError> {
58    let client = reqwest::blocking::Client::builder()
59        .timeout(HTTP_TIMEOUT)
60        .build()
61        .map_err(|source| ResolveError::NetworkError {
62            url: url.to_string(),
63            source,
64        })?;
65
66    let response = client
67        .get(url)
68        .send()
69        .map_err(|source| ResolveError::NetworkError {
70            url: url.to_string(),
71            source,
72        })?;
73
74    // Check for HTTP errors before parsing
75    let response = response
76        .error_for_status()
77        .map_err(|source| ResolveError::NetworkError {
78            url: url.to_string(),
79            source,
80        })?;
81
82    response
83        .json()
84        .map_err(|source| ResolveError::NetworkError {
85            url: url.to_string(),
86            source,
87        })
88}
89
90/// Check if a string looks like a URL (starts with http:// or https://).
91pub fn is_url(s: &str) -> bool {
92    s.starts_with("http://") || s.starts_with("https://")
93}
94
95/// Navigate a JSON Pointer fragment (e.g., "#/$defs/foo" or "#/properties/bar").
96///
97/// Returns the value at the given JSON Pointer path within the schema.
98/// The fragment should start with '#' (e.g., "#/$defs/foo").
99pub fn navigate_fragment(schema: &Value, fragment: &str) -> Result<Value, ResolveError> {
100    // Remove leading # and split by /
101    let path = fragment.trim_start_matches('#').trim_start_matches('/');
102    if path.is_empty() {
103        return Ok(schema.clone());
104    }
105
106    let mut current = schema;
107    for part in path.split('/') {
108        // Unescape JSON Pointer encoding (~1 = /, ~0 = ~)
109        let key = part.replace("~1", "/").replace("~0", "~");
110        current = current.get(&key).ok_or_else(|| ResolveError::BundleError {
111            message: format!("fragment not found: {}", fragment),
112        })?;
113    }
114    Ok(current.clone())
115}
116
117/// Recursively resolve and inline external $ref pointers.
118///
119/// Walks the schema tree, finds `$ref` values pointing to external files,
120/// loads them, and replaces the $ref with the loaded content.
121/// Internal refs (`#/...`) in the root schema are left for the validator.
122/// Internal refs in loaded external files are resolved against that file.
123/// Self-root refs (`$ref: "#"`) are left as-is (recursive type definitions).
124///
125/// # Arguments
126/// * `schema` - The schema to process (modified in place)
127/// * `base_dir` - Base directory for resolving relative file paths
128pub fn bundle_refs(schema: &mut Value, base_dir: &Path) -> Result<(), ResolveError> {
129    // Snapshot root schema so internal #/$defs/ refs can resolve against it.
130    let root_snapshot = schema.clone();
131    bundle_refs_inner(
132        schema,
133        base_dir,
134        Some(&root_snapshot),
135        None,
136        None,
137        &mut std::collections::HashSet::new(),
138    )
139}
140
141/// Bundle external $ref pointers with URL-to-local-path mapping.
142///
143/// Like `bundle_refs`, but handles absolute URL refs by mapping them to local paths.
144/// When a ref starts with `remote_base`, that prefix is stripped and the remainder
145/// is joined to `local_base` to form the local file path.
146///
147/// # Example
148/// ```text
149/// remote_base = "https://ucp.dev/draft"
150/// local_base = Path::new("site")
151/// $ref = "https://ucp.dev/draft/schemas/ucp.json" -> "site/schemas/ucp.json"
152/// ```
153pub fn bundle_refs_with_url_mapping(
154    schema: &mut Value,
155    base_dir: &Path,
156    local_base: &Path,
157    remote_base: &str,
158) -> Result<(), ResolveError> {
159    let root_snapshot = schema.clone();
160    bundle_refs_inner(
161        schema,
162        base_dir,
163        Some(&root_snapshot),
164        Some(local_base),
165        Some(remote_base),
166        &mut std::collections::HashSet::new(),
167    )
168}
169
170fn bundle_refs_inner(
171    schema: &mut Value,
172    base_dir: &Path,
173    file_root: Option<&Value>, // Root of external file for resolving internal refs
174    url_local_base: Option<&Path>,
175    url_remote_base: Option<&str>,
176    visited: &mut std::collections::HashSet<String>,
177) -> Result<(), ResolveError> {
178    match schema {
179        Value::Object(obj) => {
180            // Check if this object has a $ref
181            if let Some(ref_val) = obj.get("$ref").and_then(|v| v.as_str()) {
182                if ref_val.starts_with('#') {
183                    // Internal ref - only resolve if we have a file_root context
184                    // Skip self-root refs ($ref: "#") - these are recursive type defs
185                    if ref_val == "#" {
186                        // Leave as-is - can't inline recursive self-reference
187                    } else if let Some(root) = file_root {
188                        let mut target = navigate_fragment(root, ref_val)?;
189                        // Recursively process (may have nested refs)
190                        bundle_refs_inner(
191                            &mut target,
192                            base_dir,
193                            file_root,
194                            url_local_base,
195                            url_remote_base,
196                            visited,
197                        )?;
198                        // Inline the resolved definition
199                        obj.remove("$ref");
200                        if let Value::Object(ref_obj) = target {
201                            for (k, v) in ref_obj {
202                                obj.entry(k).or_insert(v);
203                            }
204                        }
205                        return Ok(());
206                    }
207                    // No file_root context — leave as-is
208                } else {
209                    // External ref - may be relative path or absolute URL
210                    let (file_part, fragment) = match ref_val.find('#') {
211                        Some(idx) => (&ref_val[..idx], Some(&ref_val[idx..])),
212                        None => (ref_val, None),
213                    };
214
215                    // Resolve ref to local path, handling URL mapping if configured
216                    let ref_path =
217                        resolve_ref_to_path(file_part, base_dir, url_local_base, url_remote_base);
218
219                    // If local resolution fails and the ref is a URL, try HTTP fetch
220                    #[cfg(feature = "remote")]
221                    let (loaded, ref_dir_owned) = if !ref_path.exists() && is_url(file_part) {
222                        let fetched = load_schema_url(file_part)?;
223                        // Remote schemas have no local directory; use base_dir for
224                        // any relative refs within the fetched schema
225                        (fetched, base_dir.to_path_buf())
226                    } else {
227                        let schema = load_schema(&ref_path)?;
228                        let dir = ref_path.parent().unwrap_or(base_dir).to_path_buf();
229                        (schema, dir)
230                    };
231
232                    #[cfg(not(feature = "remote"))]
233                    let (loaded, ref_dir_owned) = {
234                        let schema = load_schema(&ref_path)?;
235                        let dir = ref_path.parent().unwrap_or(base_dir).to_path_buf();
236                        (schema, dir)
237                    };
238
239                    let canonical = ref_path.canonicalize().unwrap_or(ref_path.clone());
240                    let visit_key = format!("{}|{}", canonical.display(), fragment.unwrap_or(""));
241
242                    if visited.contains(&visit_key) {
243                        return Err(ResolveError::BundleError {
244                            message: format!("circular reference detected: {}", ref_val),
245                        });
246                    }
247
248                    let mut target = if let Some(frag) = fragment {
249                        navigate_fragment(&loaded, frag)?
250                    } else {
251                        loaded.clone()
252                    };
253
254                    visited.insert(visit_key.clone());
255                    // Pass loaded file as file_root so internal refs resolve against it
256                    bundle_refs_inner(
257                        &mut target,
258                        &ref_dir_owned,
259                        Some(&loaded),
260                        url_local_base,
261                        url_remote_base,
262                        visited,
263                    )?;
264                    visited.remove(&visit_key);
265
266                    obj.remove("$ref");
267                    if let Value::Object(ref_obj) = target {
268                        for (k, v) in ref_obj {
269                            obj.entry(k).or_insert(v);
270                        }
271                    }
272                    return Ok(());
273                }
274            }
275
276            // Recurse into all values
277            for value in obj.values_mut() {
278                bundle_refs_inner(
279                    value,
280                    base_dir,
281                    file_root,
282                    url_local_base,
283                    url_remote_base,
284                    visited,
285                )?;
286            }
287        }
288        Value::Array(arr) => {
289            for item in arr {
290                bundle_refs_inner(
291                    item,
292                    base_dir,
293                    file_root,
294                    url_local_base,
295                    url_remote_base,
296                    visited,
297                )?;
298            }
299        }
300        _ => {}
301    }
302    Ok(())
303}
304
305/// Resolve a $ref value to a local file path.
306///
307/// If URL mapping is configured and the ref matches the remote base,
308/// strips the prefix and joins to local_base. Otherwise uses base_dir
309/// for relative path resolution.
310fn resolve_ref_to_path(
311    ref_val: &str,
312    base_dir: &Path,
313    url_local_base: Option<&Path>,
314    url_remote_base: Option<&str>,
315) -> std::path::PathBuf {
316    // Check if this is an absolute URL that matches our remote base
317    if let (Some(local_base), Some(remote_base)) = (url_local_base, url_remote_base) {
318        if let Some(remainder) = ref_val.strip_prefix(remote_base) {
319            // URL matches remote base - map to local path
320            return local_base.join(remainder.trim_start_matches('/'));
321        }
322    }
323
324    // Default: treat as relative path from base_dir
325    base_dir.join(ref_val)
326}
327
328/// Bundle external $ref pointers by fetching from remote URLs.
329///
330/// Like `bundle_refs`, but fetches external refs via HTTP instead of local files.
331/// This allows remote-only validation by inlining all refs before passing to
332/// the JSON Schema validator.
333///
334/// # Arguments
335/// * `schema` - The schema to process (modified in place)
336/// * `base_url` - Base URL for resolving relative refs (typically the schema's $id)
337#[cfg(feature = "remote")]
338pub fn bundle_refs_remote(schema: &mut Value, base_url: &str) -> Result<(), ResolveError> {
339    // Snapshot root schema so internal #/$defs/ refs can resolve against it.
340    let root_snapshot = schema.clone();
341    bundle_refs_remote_inner(
342        schema,
343        base_url,
344        Some(&root_snapshot),
345        &mut std::collections::HashSet::new(),
346    )
347}
348
349#[cfg(feature = "remote")]
350fn bundle_refs_remote_inner(
351    schema: &mut Value,
352    base_url: &str,
353    file_root: Option<&Value>,
354    visited: &mut std::collections::HashSet<String>,
355) -> Result<(), ResolveError> {
356    match schema {
357        Value::Object(obj) => {
358            if let Some(ref_val) = obj.get("$ref").and_then(|v| v.as_str()) {
359                if ref_val.starts_with('#') {
360                    // Internal ref
361                    if ref_val == "#" {
362                        // Self-reference, leave as-is
363                    } else if let Some(root) = file_root {
364                        let mut target = navigate_fragment(root, ref_val)?;
365                        bundle_refs_remote_inner(&mut target, base_url, file_root, visited)?;
366                        obj.remove("$ref");
367                        if let Value::Object(ref_obj) = target {
368                            for (k, v) in ref_obj {
369                                obj.entry(k).or_insert(v);
370                            }
371                        }
372                        return Ok(());
373                    }
374                    // No file_root context — leave as-is
375                } else {
376                    // External ref - resolve URL
377                    let (file_part, fragment) = match ref_val.find('#') {
378                        Some(idx) => (&ref_val[..idx], Some(&ref_val[idx..])),
379                        None => (ref_val, None),
380                    };
381
382                    // Resolve to absolute URL
383                    let resolved_url = resolve_url(file_part, base_url);
384                    let visit_key = format!("{}|{}", resolved_url, fragment.unwrap_or(""));
385
386                    if visited.contains(&visit_key) {
387                        return Err(ResolveError::BundleError {
388                            message: format!("circular reference detected: {}", ref_val),
389                        });
390                    }
391
392                    // Fetch the referenced schema
393                    let loaded = load_schema_url(&resolved_url)?;
394                    let mut target = if let Some(frag) = fragment {
395                        navigate_fragment(&loaded, frag)?
396                    } else {
397                        loaded.clone()
398                    };
399
400                    visited.insert(visit_key.clone());
401                    // Recursively bundle with new base URL
402                    bundle_refs_remote_inner(&mut target, &resolved_url, Some(&loaded), visited)?;
403                    visited.remove(&visit_key);
404
405                    obj.remove("$ref");
406                    if let Value::Object(ref_obj) = target {
407                        for (k, v) in ref_obj {
408                            obj.entry(k).or_insert(v);
409                        }
410                    }
411                    return Ok(());
412                }
413            }
414
415            // Recurse into all values
416            for value in obj.values_mut() {
417                bundle_refs_remote_inner(value, base_url, file_root, visited)?;
418            }
419        }
420        Value::Array(arr) => {
421            for item in arr {
422                bundle_refs_remote_inner(item, base_url, file_root, visited)?;
423            }
424        }
425        _ => {}
426    }
427    Ok(())
428}
429
430/// Resolve a potentially relative URL against a base URL.
431#[cfg(feature = "remote")]
432fn resolve_url(url: &str, base: &str) -> String {
433    if is_url(url) {
434        // Already absolute
435        url.to_string()
436    } else {
437        // Relative - resolve against base
438        // Find the directory part of base URL
439        if let Some(idx) = base.rfind('/') {
440            format!("{}/{}", &base[..idx], url)
441        } else {
442            url.to_string()
443        }
444    }
445}
446
447/// Load a schema from a file path or URL.
448///
449/// Automatically detects whether the source is a URL or file path.
450/// URL loading requires the `remote` feature.
451///
452/// # Errors
453///
454/// Returns appropriate errors based on the source type.
455pub fn load_schema_auto(source: &str) -> Result<Value, ResolveError> {
456    if is_url(source) {
457        #[cfg(feature = "remote")]
458        {
459            load_schema_url(source)
460        }
461        #[cfg(not(feature = "remote"))]
462        {
463            Err(ResolveError::FileNotFound {
464                path: std::path::PathBuf::from(source),
465            })
466        }
467    } else {
468        load_schema(Path::new(source))
469    }
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475    use std::io::Write;
476    use tempfile::NamedTempFile;
477
478    #[test]
479    fn load_schema_valid_file() {
480        let mut file = NamedTempFile::new().unwrap();
481        writeln!(file, r#"{{"type": "object"}}"#).unwrap();
482
483        let schema = load_schema(file.path()).unwrap();
484        assert_eq!(schema["type"], "object");
485    }
486
487    #[test]
488    fn load_schema_file_not_found() {
489        let result = load_schema(Path::new("/nonexistent/path.json"));
490        assert!(matches!(result, Err(ResolveError::FileNotFound { .. })));
491    }
492
493    #[test]
494    fn load_schema_invalid_json() {
495        let mut file = NamedTempFile::new().unwrap();
496        writeln!(file, "not valid json").unwrap();
497
498        let result = load_schema(file.path());
499        assert!(matches!(result, Err(ResolveError::InvalidJson { .. })));
500    }
501
502    #[test]
503    fn load_schema_str_valid() {
504        let schema = load_schema_str(r#"{"type": "object"}"#).unwrap();
505        assert_eq!(schema["type"], "object");
506    }
507
508    #[test]
509    fn load_schema_str_invalid() {
510        let result = load_schema_str("not json");
511        assert!(matches!(result, Err(ResolveError::InvalidJson { .. })));
512    }
513
514    #[test]
515    fn is_url_https() {
516        assert!(is_url("https://example.com/schema.json"));
517    }
518
519    #[test]
520    fn is_url_http() {
521        assert!(is_url("http://example.com/schema.json"));
522    }
523
524    #[test]
525    fn is_url_file_path() {
526        assert!(!is_url("/path/to/schema.json"));
527        assert!(!is_url("./schema.json"));
528        assert!(!is_url("schema.json"));
529    }
530
531    #[test]
532    fn load_schema_auto_file() {
533        let mut file = NamedTempFile::new().unwrap();
534        writeln!(file, r#"{{"type": "string"}}"#).unwrap();
535
536        let schema = load_schema_auto(file.path().to_str().unwrap()).unwrap();
537        assert_eq!(schema["type"], "string");
538    }
539
540    #[test]
541    fn resolve_ref_to_path_with_url_mapping() {
542        let base_dir = Path::new("/some/dir");
543        let local_base = Path::new("/local/schemas");
544        let remote_base = "https://ucp.dev/draft";
545
546        // URL matching remote base gets mapped to local
547        let path = resolve_ref_to_path(
548            "https://ucp.dev/draft/schemas/ucp.json",
549            base_dir,
550            Some(local_base),
551            Some(remote_base),
552        );
553        assert_eq!(path, Path::new("/local/schemas/schemas/ucp.json"));
554    }
555
556    #[test]
557    fn resolve_ref_to_path_url_not_matching_remote() {
558        let base_dir = Path::new("/some/dir");
559        let local_base = Path::new("/local/schemas");
560        let remote_base = "https://ucp.dev/draft";
561
562        // URL not matching remote base falls back to base_dir join
563        let path = resolve_ref_to_path(
564            "https://other.com/schemas/foo.json",
565            base_dir,
566            Some(local_base),
567            Some(remote_base),
568        );
569        assert_eq!(
570            path,
571            Path::new("/some/dir/https://other.com/schemas/foo.json")
572        );
573    }
574
575    #[test]
576    fn resolve_ref_to_path_relative_ref() {
577        let base_dir = Path::new("/some/dir");
578
579        // Relative ref without URL mapping
580        let path = resolve_ref_to_path("types/buyer.json", base_dir, None, None);
581        assert_eq!(path, Path::new("/some/dir/types/buyer.json"));
582    }
583
584    #[test]
585    fn resolve_ref_to_path_strips_leading_slash() {
586        let base_dir = Path::new("/some/dir");
587        let local_base = Path::new("/local");
588        let remote_base = "https://ucp.dev/draft";
589
590        // Stripping remote base leaves "/schemas/..." - leading slash should be trimmed
591        let path = resolve_ref_to_path(
592            "https://ucp.dev/draft/schemas/foo.json",
593            base_dir,
594            Some(local_base),
595            Some(remote_base),
596        );
597        assert_eq!(path, Path::new("/local/schemas/foo.json"));
598    }
599
600    // Remote tests run against a local mockito server so they're deterministic
601    // and offline — no dependency on a live third party. The connection-error
602    // case uses a reserved `.invalid` host (RFC 2606), which fails to resolve
603    // locally without touching the network.
604    #[cfg(feature = "remote")]
605    mod remote {
606        use super::*;
607
608        #[test]
609        fn load_schema_url_valid() {
610            // 200 + JSON body resolves to the parsed value.
611            let mut server = mockito::Server::new();
612            let mock = server
613                .mock("GET", "/schema.json")
614                .with_header("content-type", "application/json")
615                .with_body(r#"{"type": "object"}"#)
616                .create();
617
618            let result = load_schema_url(&format!("{}/schema.json", server.url()));
619            assert_eq!(result.unwrap()["type"], "object");
620            mock.assert();
621        }
622
623        #[test]
624        fn load_schema_url_404() {
625            // Non-2xx status surfaces as NetworkError (via error_for_status).
626            let mut server = mockito::Server::new();
627            server
628                .mock("GET", "/missing.json")
629                .with_status(404)
630                .create();
631
632            let result = load_schema_url(&format!("{}/missing.json", server.url()));
633            assert!(matches!(result, Err(ResolveError::NetworkError { .. })));
634        }
635
636        #[test]
637        fn load_schema_url_invalid_host() {
638            // Connection/DNS failure surfaces as NetworkError. `.invalid` (RFC
639            // 2606) fails to resolve without network access.
640            let result =
641                load_schema_url("https://this-domain-does-not-exist-12345.invalid/schema.json");
642            assert!(matches!(result, Err(ResolveError::NetworkError { .. })));
643        }
644
645        #[test]
646        fn load_schema_auto_url() {
647            // A URL source delegates to load_schema_url.
648            let mut server = mockito::Server::new();
649            let mock = server
650                .mock("GET", "/schema.json")
651                .with_header("content-type", "application/json")
652                .with_body(r#"{"type": "string"}"#)
653                .create();
654
655            let result = load_schema_auto(&format!("{}/schema.json", server.url()));
656            assert_eq!(result.unwrap()["type"], "string");
657            mock.assert();
658        }
659    }
660}