Skip to main content

forge_parser/
external.rs

1//! External `$ref` resolution.
2//!
3//! The parser's default `parse_str` path uses [`NoExternalResolver`],
4//! which rejects every external ref with `parser/E-EXTERNAL-REF`. The
5//! file-based [`parse_path`](crate::parse_path) entry installs a
6//! [`FileResolver`] that loads adjacent JSON documents (caching by
7//! canonical path) and refuses any path that escapes the input file's
8//! parent directory.
9//!
10//! The schema walker calls [`Resolver::load`] when it encounters a `$ref`
11//! whose path part is non-empty; it then walks the target component just
12//! like a local one, prefixed with the external document's stem so IDs
13//! stay globally unique inside the type pool.
14
15use std::collections::HashMap;
16use std::fmt;
17use std::path::{Path, PathBuf};
18use std::sync::Arc;
19
20use serde_json::Value;
21
22#[derive(Debug)]
23pub struct LoadedDoc {
24    pub canonical_path: PathBuf,
25    /// Shared handle to the loaded document. Reference-counted so repeated
26    /// loads of the same logical document are a refcount bump, not a deep
27    /// clone of the whole (potentially multi-hundred-KB) JSON tree.
28    pub root: Arc<Value>,
29}
30
31#[derive(Debug)]
32pub enum ResolverError {
33    /// The current resolver does not handle external `$ref`s. The default
34    /// `parse_str` entry uses a no-op resolver; users who want external
35    /// refs should call `parse_path`.
36    NotConfigured { raw: String },
37    /// URL refs (`http://`, `https://`, ...) — deferred behind a
38    /// follow-up issue.
39    UrlNotSupported { raw: String },
40    /// The path canonicalised outside the allowed root.
41    EscapesRoot { attempted: PathBuf, root: PathBuf },
42    /// Filesystem error during load.
43    Io { path: PathBuf, message: String },
44    /// JSON parse error from the loaded file.
45    InvalidJson { path: PathBuf, message: String },
46}
47
48impl fmt::Display for ResolverError {
49    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
50        match self {
51            ResolverError::NotConfigured { raw } => write!(
52                f,
53                "external `$ref` `{raw}` requires a file-based resolver; \
54                 call `parse_path` instead of `parse_str`"
55            ),
56            ResolverError::UrlNotSupported { raw } => write!(
57                f,
58                "URL `$ref` `{raw}` is not yet supported (file-relative refs only)"
59            ),
60            ResolverError::EscapesRoot { attempted, root } => write!(
61                f,
62                "external `$ref` resolves to `{}`, which is outside the input file's directory `{}`",
63                attempted.display(),
64                root.display()
65            ),
66            ResolverError::Io { path, message } => {
67                write!(f, "failed to read `{}`: {message}", path.display())
68            }
69            ResolverError::InvalidJson { path, message } => {
70                write!(f, "failed to parse `{}`: {message}", path.display())
71            }
72        }
73    }
74}
75
76pub trait Resolver: fmt::Debug + Send {
77    /// Load the document referenced by `raw_ref`, resolved relative to
78    /// `current_doc`. Implementations cache by canonical path; repeated
79    /// loads of the same logical document return a shared `Arc` handle to
80    /// the same `Value` without re-reading or re-cloning the JSON tree.
81    fn load(&mut self, raw_ref: &str, current_doc: &Path) -> Result<LoadedDoc, ResolverError>;
82}
83
84#[derive(Debug, Default)]
85pub struct NoExternalResolver;
86
87impl Resolver for NoExternalResolver {
88    fn load(&mut self, raw_ref: &str, _: &Path) -> Result<LoadedDoc, ResolverError> {
89        Err(ResolverError::NotConfigured {
90            raw: raw_ref.to_string(),
91        })
92    }
93}
94
95#[derive(Debug)]
96pub struct FileResolver {
97    /// Canonical path to the directory enclosing the input spec. Every
98    /// loaded path must canonicalise under this root.
99    root: PathBuf,
100    cache: HashMap<PathBuf, Arc<Value>>,
101}
102
103impl FileResolver {
104    pub fn new(spec_path: &Path) -> std::io::Result<Self> {
105        let canonical = spec_path.canonicalize()?;
106        let root = canonical
107            .parent()
108            .map(Path::to_path_buf)
109            .unwrap_or(canonical);
110        Ok(Self {
111            root,
112            cache: HashMap::new(),
113        })
114    }
115
116    pub fn root(&self) -> &Path {
117        &self.root
118    }
119}
120
121impl Resolver for FileResolver {
122    fn load(&mut self, raw_ref: &str, current_doc: &Path) -> Result<LoadedDoc, ResolverError> {
123        let (file_part, _fragment) = split_ref(raw_ref);
124        if is_url(file_part) {
125            return Err(ResolverError::UrlNotSupported {
126                raw: raw_ref.to_string(),
127            });
128        }
129        let base = current_doc.parent().unwrap_or(current_doc);
130        let candidate = base.join(file_part);
131        let canonical = candidate.canonicalize().map_err(|e| ResolverError::Io {
132            path: candidate.clone(),
133            message: e.to_string(),
134        })?;
135        if !canonical.starts_with(&self.root) {
136            return Err(ResolverError::EscapesRoot {
137                attempted: canonical,
138                root: self.root.clone(),
139            });
140        }
141        if let Some(cached) = self.cache.get(&canonical) {
142            return Ok(LoadedDoc {
143                canonical_path: canonical,
144                root: Arc::clone(cached),
145            });
146        }
147        let text = std::fs::read_to_string(&canonical).map_err(|e| ResolverError::Io {
148            path: canonical.clone(),
149            message: e.to_string(),
150        })?;
151        let value: Value = serde_json::from_str(&text).map_err(|e| ResolverError::InvalidJson {
152            path: canonical.clone(),
153            message: e.to_string(),
154        })?;
155        let value = Arc::new(value);
156        self.cache.insert(canonical.clone(), Arc::clone(&value));
157        Ok(LoadedDoc {
158            canonical_path: canonical,
159            root: value,
160        })
161    }
162}
163
164/// Split a `$ref` string into `(path_part, fragment_without_hash)`.
165pub(crate) fn split_ref(raw: &str) -> (&str, &str) {
166    match raw.find('#') {
167        Some(i) => (&raw[..i], &raw[i + 1..]),
168        None => (raw, ""),
169    }
170}
171
172pub(crate) fn is_url(s: &str) -> bool {
173    s.starts_with("http://") || s.starts_with("https://") || s.starts_with("ftp://")
174}
175
176/// Walk an RFC 6901 JSON pointer fragment against `root`. Returns the
177/// pointed-at value, or `None` if any token doesn't match. The fragment
178/// is expected without its leading `#`; an empty fragment addresses the
179/// root.
180pub(crate) fn resolve_pointer<'a>(root: &'a Value, fragment: &str) -> Option<&'a Value> {
181    if fragment.is_empty() {
182        return Some(root);
183    }
184    let trimmed = fragment.strip_prefix('/').unwrap_or(fragment);
185    if trimmed.is_empty() {
186        return Some(root);
187    }
188    let mut cur = root;
189    for token in trimmed.split('/') {
190        let decoded = decode_pointer_token(token);
191        cur = match cur {
192            Value::Object(map) => map.get(&decoded)?,
193            Value::Array(items) => {
194                let idx: usize = decoded.parse().ok()?;
195                items.get(idx)?
196            }
197            _ => return None,
198        };
199    }
200    Some(cur)
201}
202
203/// Decode a single RFC 6901 token: `~1` → `/`, `~0` → `~`. Order
204/// matters: `~1` before `~0` so a literal `~01` decodes correctly.
205fn decode_pointer_token(s: &str) -> String {
206    s.replace("~1", "/").replace("~0", "~")
207}
208
209/// Last token of a JSON pointer fragment. Used to derive a schema name
210/// from `/AIAgent` or `/components/schemas/Pet`.
211pub(crate) fn fragment_last_token(fragment: &str) -> Option<String> {
212    let trimmed = fragment.strip_prefix('/').unwrap_or(fragment);
213    if trimmed.is_empty() {
214        return None;
215    }
216    trimmed.rsplit('/').next().map(decode_pointer_token)
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222    use serde_json::json;
223
224    #[test]
225    fn resolve_pointer_root() {
226        let v = json!({"a": 1});
227        assert_eq!(resolve_pointer(&v, ""), Some(&v));
228        assert_eq!(resolve_pointer(&v, "/"), Some(&v));
229    }
230
231    #[test]
232    fn resolve_pointer_walks_objects() {
233        let v = json!({"components": {"schemas": {"Pet": {"type": "object"}}}});
234        let pet = resolve_pointer(&v, "/components/schemas/Pet").unwrap();
235        assert_eq!(pet["type"], "object");
236    }
237
238    #[test]
239    fn resolve_pointer_flat_root_schema() {
240        let v = json!({"AIAgent": {"type": "object"}});
241        let agent = resolve_pointer(&v, "/AIAgent").unwrap();
242        assert_eq!(agent["type"], "object");
243    }
244
245    #[test]
246    fn resolve_pointer_decodes_escape() {
247        let v = json!({"/api/v1/users": {"get": {}}});
248        let item = resolve_pointer(&v, "/~1api~1v1~1users").unwrap();
249        assert!(item.get("get").is_some());
250    }
251
252    #[test]
253    fn resolve_pointer_walks_arrays() {
254        let v = json!({"items": [10, 20, 30]});
255        let item = resolve_pointer(&v, "/items/1").unwrap();
256        assert_eq!(item, &json!(20));
257    }
258
259    #[test]
260    fn fragment_last_token_works() {
261        assert_eq!(
262            fragment_last_token("/components/schemas/Pet"),
263            Some("Pet".to_string())
264        );
265        assert_eq!(fragment_last_token("/AIAgent"), Some("AIAgent".to_string()));
266        assert_eq!(
267            fragment_last_token("/~1api~1v1~1users"),
268            Some("/api/v1/users".to_string())
269        );
270        assert_eq!(fragment_last_token(""), None);
271    }
272}