Skip to main content

forge_parser/
external.rs

1//! External `$ref` resolution.
2//!
3//! The parser's default `parse_str` path uses [`NoExternalResolver`],
4//! which rejects every external ref with `parser/E-EXTERNAL-REF`. The
5//! file-based [`parse_path`](crate::parse_path) entry installs a
6//! [`FileResolver`] that loads adjacent JSON documents (caching by
7//! canonical path) and refuses any path that escapes the input file's
8//! parent directory.
9//!
10//! The schema walker calls [`Resolver::load`] when it encounters a `$ref`
11//! whose path part is non-empty; it then walks the target component just
12//! like a local one, prefixed with the external document's stem so IDs
13//! stay globally unique inside the type pool.
14
15use std::collections::HashMap;
16use std::fmt;
17use std::path::{Path, PathBuf};
18
19use serde_json::Value;
20
21#[derive(Debug)]
22pub struct LoadedDoc {
23    pub canonical_path: PathBuf,
24    pub root: Value,
25}
26
27#[derive(Debug)]
28pub enum ResolverError {
29    /// The current resolver does not handle external `$ref`s. The default
30    /// `parse_str` entry uses a no-op resolver; users who want external
31    /// refs should call `parse_path`.
32    NotConfigured { raw: String },
33    /// URL refs (`http://`, `https://`, ...) — deferred behind a
34    /// follow-up issue.
35    UrlNotSupported { raw: String },
36    /// The path canonicalised outside the allowed root.
37    EscapesRoot { attempted: PathBuf, root: PathBuf },
38    /// Filesystem error during load.
39    Io { path: PathBuf, message: String },
40    /// JSON parse error from the loaded file.
41    InvalidJson { path: PathBuf, message: String },
42}
43
44impl fmt::Display for ResolverError {
45    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46        match self {
47            ResolverError::NotConfigured { raw } => write!(
48                f,
49                "external `$ref` `{raw}` requires a file-based resolver; \
50                 call `parse_path` instead of `parse_str`"
51            ),
52            ResolverError::UrlNotSupported { raw } => write!(
53                f,
54                "URL `$ref` `{raw}` is not yet supported (file-relative refs only)"
55            ),
56            ResolverError::EscapesRoot { attempted, root } => write!(
57                f,
58                "external `$ref` resolves to `{}`, which is outside the input file's directory `{}`",
59                attempted.display(),
60                root.display()
61            ),
62            ResolverError::Io { path, message } => {
63                write!(f, "failed to read `{}`: {message}", path.display())
64            }
65            ResolverError::InvalidJson { path, message } => {
66                write!(f, "failed to parse `{}`: {message}", path.display())
67            }
68        }
69    }
70}
71
72pub trait Resolver: fmt::Debug + Send {
73    /// Load the document referenced by `raw_ref`, resolved relative to
74    /// `current_doc`. Implementations cache by canonical path; repeated
75    /// loads of the same logical document return the same `Value` clone
76    /// without re-reading from disk.
77    fn load(&mut self, raw_ref: &str, current_doc: &Path) -> Result<LoadedDoc, ResolverError>;
78}
79
80#[derive(Debug, Default)]
81pub struct NoExternalResolver;
82
83impl Resolver for NoExternalResolver {
84    fn load(&mut self, raw_ref: &str, _: &Path) -> Result<LoadedDoc, ResolverError> {
85        Err(ResolverError::NotConfigured {
86            raw: raw_ref.to_string(),
87        })
88    }
89}
90
91#[derive(Debug)]
92pub struct FileResolver {
93    /// Canonical path to the directory enclosing the input spec. Every
94    /// loaded path must canonicalise under this root.
95    root: PathBuf,
96    cache: HashMap<PathBuf, Value>,
97}
98
99impl FileResolver {
100    pub fn new(spec_path: &Path) -> std::io::Result<Self> {
101        let canonical = spec_path.canonicalize()?;
102        let root = canonical
103            .parent()
104            .map(Path::to_path_buf)
105            .unwrap_or(canonical);
106        Ok(Self {
107            root,
108            cache: HashMap::new(),
109        })
110    }
111
112    pub fn root(&self) -> &Path {
113        &self.root
114    }
115}
116
117impl Resolver for FileResolver {
118    fn load(&mut self, raw_ref: &str, current_doc: &Path) -> Result<LoadedDoc, ResolverError> {
119        let (file_part, _fragment) = split_ref(raw_ref);
120        if is_url(file_part) {
121            return Err(ResolverError::UrlNotSupported {
122                raw: raw_ref.to_string(),
123            });
124        }
125        let base = current_doc.parent().unwrap_or(current_doc);
126        let candidate = base.join(file_part);
127        let canonical = candidate.canonicalize().map_err(|e| ResolverError::Io {
128            path: candidate.clone(),
129            message: e.to_string(),
130        })?;
131        if !canonical.starts_with(&self.root) {
132            return Err(ResolverError::EscapesRoot {
133                attempted: canonical,
134                root: self.root.clone(),
135            });
136        }
137        if let Some(cached) = self.cache.get(&canonical) {
138            return Ok(LoadedDoc {
139                canonical_path: canonical,
140                root: cached.clone(),
141            });
142        }
143        let text = std::fs::read_to_string(&canonical).map_err(|e| ResolverError::Io {
144            path: canonical.clone(),
145            message: e.to_string(),
146        })?;
147        let value: Value = serde_json::from_str(&text).map_err(|e| ResolverError::InvalidJson {
148            path: canonical.clone(),
149            message: e.to_string(),
150        })?;
151        self.cache.insert(canonical.clone(), value.clone());
152        Ok(LoadedDoc {
153            canonical_path: canonical,
154            root: value,
155        })
156    }
157}
158
159/// Split a `$ref` string into `(path_part, fragment_without_hash)`.
160pub(crate) fn split_ref(raw: &str) -> (&str, &str) {
161    match raw.find('#') {
162        Some(i) => (&raw[..i], &raw[i + 1..]),
163        None => (raw, ""),
164    }
165}
166
167pub(crate) fn is_url(s: &str) -> bool {
168    s.starts_with("http://") || s.starts_with("https://") || s.starts_with("ftp://")
169}
170
171/// Walk an RFC 6901 JSON pointer fragment against `root`. Returns the
172/// pointed-at value, or `None` if any token doesn't match. The fragment
173/// is expected without its leading `#`; an empty fragment addresses the
174/// root.
175pub(crate) fn resolve_pointer<'a>(root: &'a Value, fragment: &str) -> Option<&'a Value> {
176    if fragment.is_empty() {
177        return Some(root);
178    }
179    let trimmed = fragment.strip_prefix('/').unwrap_or(fragment);
180    if trimmed.is_empty() {
181        return Some(root);
182    }
183    let mut cur = root;
184    for token in trimmed.split('/') {
185        let decoded = decode_pointer_token(token);
186        cur = match cur {
187            Value::Object(map) => map.get(&decoded)?,
188            Value::Array(items) => {
189                let idx: usize = decoded.parse().ok()?;
190                items.get(idx)?
191            }
192            _ => return None,
193        };
194    }
195    Some(cur)
196}
197
198/// Decode a single RFC 6901 token: `~1` → `/`, `~0` → `~`. Order
199/// matters: `~1` before `~0` so a literal `~01` decodes correctly.
200fn decode_pointer_token(s: &str) -> String {
201    s.replace("~1", "/").replace("~0", "~")
202}
203
204/// Last token of a JSON pointer fragment. Used to derive a schema name
205/// from `/AIAgent` or `/components/schemas/Pet`.
206pub(crate) fn fragment_last_token(fragment: &str) -> Option<String> {
207    let trimmed = fragment.strip_prefix('/').unwrap_or(fragment);
208    if trimmed.is_empty() {
209        return None;
210    }
211    trimmed.rsplit('/').next().map(decode_pointer_token)
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217    use serde_json::json;
218
219    #[test]
220    fn resolve_pointer_root() {
221        let v = json!({"a": 1});
222        assert_eq!(resolve_pointer(&v, ""), Some(&v));
223        assert_eq!(resolve_pointer(&v, "/"), Some(&v));
224    }
225
226    #[test]
227    fn resolve_pointer_walks_objects() {
228        let v = json!({"components": {"schemas": {"Pet": {"type": "object"}}}});
229        let pet = resolve_pointer(&v, "/components/schemas/Pet").unwrap();
230        assert_eq!(pet["type"], "object");
231    }
232
233    #[test]
234    fn resolve_pointer_flat_root_schema() {
235        let v = json!({"AIAgent": {"type": "object"}});
236        let agent = resolve_pointer(&v, "/AIAgent").unwrap();
237        assert_eq!(agent["type"], "object");
238    }
239
240    #[test]
241    fn resolve_pointer_decodes_escape() {
242        let v = json!({"/api/v1/users": {"get": {}}});
243        let item = resolve_pointer(&v, "/~1api~1v1~1users").unwrap();
244        assert!(item.get("get").is_some());
245    }
246
247    #[test]
248    fn resolve_pointer_walks_arrays() {
249        let v = json!({"items": [10, 20, 30]});
250        let item = resolve_pointer(&v, "/items/1").unwrap();
251        assert_eq!(item, &json!(20));
252    }
253
254    #[test]
255    fn fragment_last_token_works() {
256        assert_eq!(
257            fragment_last_token("/components/schemas/Pet"),
258            Some("Pet".to_string())
259        );
260        assert_eq!(fragment_last_token("/AIAgent"), Some("AIAgent".to_string()));
261        assert_eq!(
262            fragment_last_token("/~1api~1v1~1users"),
263            Some("/api/v1/users".to_string())
264        );
265        assert_eq!(fragment_last_token(""), None);
266    }
267}