Skip to main content

lex_extension/
schema.rs

1//! Schema types — the read-only structs a YAML loader produces.
2//!
3//! The loader itself lives in `lex-extension-host` (PR 4); this module
4//! defines the types both the loader and consumers (registry, host, editors)
5//! share. The types are `serde`-derived so they can also be hand-built in
6//! Rust code without a YAML round-trip (useful for embedders).
7
8use std::collections::BTreeMap;
9
10use serde::{Deserialize, Serialize};
11
12use crate::wire::DiagnosticSeverity;
13
14/// One label's schema. Mirrors the YAML format documented in the *Extending
15/// Lex* proposal §13.2.
16///
17/// Schemas are strict on deserialise: unknown fields are rejected. Forward
18/// compatibility lives at the `wire_version` axis, not at the schema-format
19/// level — a schema with a field this version doesn't know about is
20/// malformed by definition. The schema loader (`lex-extension-host`)
21/// surfaces this as a precise `SchemaError`.
22#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
23#[serde(deny_unknown_fields)]
24pub struct Schema {
25    /// Schema-format version. Currently `1`.
26    pub schema_version: u32,
27    /// Fully-qualified label, e.g. `"acme.commenting"`.
28    pub label: String,
29    #[serde(default, skip_serializing_if = "Option::is_none")]
30    pub description: Option<String>,
31    /// Declared parameters, keyed by name.
32    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
33    pub params: BTreeMap<String, ParamSpec>,
34    /// Permitted host node kinds.
35    #[serde(default, skip_serializing_if = "Vec::is_empty")]
36    pub attaches_to: Vec<String>,
37    /// Body shape when the label is used as an annotation.
38    #[serde(default)]
39    pub body: BodyShape,
40    /// Whether the label is also legal as a verbatim block closing.
41    #[serde(default)]
42    pub verbatim_label: bool,
43    /// Declared OS-level capabilities the handler needs. Honoured once
44    /// sandboxing is in place; see proposal §8.
45    #[serde(default)]
46    pub capabilities: Capabilities,
47    /// Hooks the label participates in.
48    #[serde(default)]
49    pub hooks: HookSet,
50    /// Optional handler delivery info. Schema-only labels (validation +
51    /// editor UX from the schema alone) omit this.
52    #[serde(default, skip_serializing_if = "Option::is_none")]
53    pub handler: Option<HandlerSpec>,
54    /// Diagnostic codes this label's handler can emit. Declaring them
55    /// lets the host schema-validate `[diagnostics.rules]` entries
56    /// against the resolved registry — a `<namespace>.<code>` rule
57    /// whose `<code>` matches nothing declared here is a dead letter
58    /// the host can flag — and lets `config`/editor tooling surface the
59    /// available codes with their descriptions and default severity.
60    #[serde(default, skip_serializing_if = "Vec::is_empty")]
61    pub diagnostics: Vec<DiagnosticDecl>,
62}
63
64/// One diagnostic code a namespace's handler can emit, declared in the
65/// label's schema.
66///
67/// The [`code`](Self::code) is the bare leaf (e.g.
68/// `task-due-date-missing`) — exactly what a handler stamps on the
69/// `code` field of an emitted `Diagnostic`. Combined with the owning
70/// namespace it forms the on-the-wire `<namespace>.<code>` key the user
71/// writes under `[diagnostics.rules]`.
72#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
73#[serde(deny_unknown_fields)]
74pub struct DiagnosticDecl {
75    /// Bare leaf code, matching `Diagnostic.code` set by the handler.
76    pub code: String,
77    /// Human-readable summary, surfaced in config templates and editor
78    /// hover.
79    #[serde(default, skip_serializing_if = "Option::is_none")]
80    pub description: Option<String>,
81    /// Declared intrinsic severity. This is *declaration metadata* —
82    /// surfaced by config-generation and editor tooling so authors and
83    /// users see a code's intended level. It is **not** yet read by the
84    /// runtime diagnostic pipeline: a handler-emitted diagnostic's own
85    /// `Diagnostic.severity` still determines its intrinsic severity,
86    /// and `[diagnostics.rules]` overrides apply on top of that.
87    /// Defaults to `warning`.
88    ///
89    /// Parsed strictly (unlike the permissive wire
90    /// [`DiagnosticSeverity`] deserializer): an unknown value is a
91    /// schema error, consistent with the schema loader's
92    /// `deny_unknown_fields` contract, rather than silently degrading to
93    /// `info`.
94    #[serde(
95        default = "default_decl_severity",
96        deserialize_with = "deserialize_strict_severity"
97    )]
98    pub default_severity: DiagnosticSeverity,
99}
100
101fn default_decl_severity() -> DiagnosticSeverity {
102    DiagnosticSeverity::Warning
103}
104
105/// Strict `default_severity` parser: accepts exactly the four known
106/// severities and rejects anything else, so a typo (`warn`, `erorr`)
107/// fails the schema load instead of deserialising to `info` the way the
108/// wire [`DiagnosticSeverity`] deserializer intentionally does for
109/// forward-compatible handler payloads.
110fn deserialize_strict_severity<'de, D>(deserializer: D) -> Result<DiagnosticSeverity, D::Error>
111where
112    D: serde::Deserializer<'de>,
113{
114    use serde::de::{Error, Unexpected};
115    let s = String::deserialize(deserializer)?;
116    match s.as_str() {
117        "error" => Ok(DiagnosticSeverity::Error),
118        "warning" => Ok(DiagnosticSeverity::Warning),
119        "info" => Ok(DiagnosticSeverity::Info),
120        "hint" => Ok(DiagnosticSeverity::Hint),
121        _ => Err(D::Error::invalid_value(
122            Unexpected::Str(&s),
123            &"one of: error, warning, info, hint",
124        )),
125    }
126}
127
128/// One parameter declaration.
129#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
130#[serde(deny_unknown_fields)]
131pub struct ParamSpec {
132    #[serde(rename = "type")]
133    pub ty: ParamType,
134    #[serde(default)]
135    pub required: bool,
136    #[serde(default, skip_serializing_if = "Option::is_none")]
137    pub default: Option<serde_json::Value>,
138    #[serde(default, skip_serializing_if = "Option::is_none")]
139    pub description: Option<String>,
140    #[serde(default, skip_serializing_if = "Option::is_none")]
141    pub pattern: Option<String>,
142    /// Allowed values when `ty == Enum`.
143    #[serde(default, skip_serializing_if = "Vec::is_empty")]
144    pub values: Vec<EnumValue>,
145}
146
147/// Allowed parameter types.
148///
149/// Forward compatibility: unlike the wire-format enums, schema loaders
150/// *reject* unknown types — schema-format versioning is independent of
151/// `wire_version` and a schema with an unknown `type` is malformed by
152/// definition. The `#[non_exhaustive]` attribute keeps adding new variants
153/// non-breaking on the Rust side.
154#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
155#[serde(rename_all = "lowercase")]
156#[non_exhaustive]
157pub enum ParamType {
158    String,
159    Bool,
160    Int,
161    Float,
162    Enum,
163}
164
165/// One legal value of an enum-typed parameter.
166#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
167#[serde(deny_unknown_fields)]
168pub struct EnumValue {
169    pub name: String,
170    #[serde(default, skip_serializing_if = "Option::is_none")]
171    pub description: Option<String>,
172}
173
174/// Body shape for annotation-form usage.
175#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
176#[serde(deny_unknown_fields)]
177pub struct BodyShape {
178    #[serde(default = "BodyKind::default_kind")]
179    pub kind: BodyKind,
180    #[serde(default)]
181    pub presence: BodyPresence,
182    #[serde(default, skip_serializing_if = "Option::is_none")]
183    pub description: Option<String>,
184}
185
186impl Default for BodyShape {
187    fn default() -> Self {
188        Self {
189            kind: BodyKind::None,
190            presence: BodyPresence::Optional,
191            description: None,
192        }
193    }
194}
195
196#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
197#[serde(rename_all = "lowercase")]
198#[non_exhaustive]
199pub enum BodyKind {
200    None,
201    Text,
202    Lex,
203}
204
205impl BodyKind {
206    fn default_kind() -> Self {
207        Self::None
208    }
209}
210
211#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
212#[serde(rename_all = "lowercase")]
213#[non_exhaustive]
214pub enum BodyPresence {
215    Optional,
216    Required,
217}
218
219impl Default for BodyPresence {
220    fn default() -> Self {
221        Self::Optional
222    }
223}
224
225/// Declared capabilities. The subprocess transport will sandbox the handler
226/// to honour these once OS-level enforcement ships (see proposal §8).
227#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, Serialize, Deserialize)]
228#[serde(deny_unknown_fields)]
229pub struct Capabilities {
230    #[serde(default)]
231    pub fs: bool,
232    #[serde(default)]
233    pub net: bool,
234}
235
236impl Capabilities {
237    /// True when the handler declares no privileged capabilities — the
238    /// "pure handler" classification used by the trust matrix in
239    /// proposal §8.
240    ///
241    /// Implementation note: this is exact equality with
242    /// [`Capabilities::default`] rather than an explicit
243    /// `!self.fs && !self.net`. As future capability fields are added
244    /// (e.g., `exec`, scoped network, …), they default to `false` and
245    /// participate in this check automatically — there is no second
246    /// place to remember to update.
247    pub fn is_pure(&self) -> bool {
248        *self == Self::default()
249    }
250}
251
252/// Hook participation. Each field defaults to "not implemented".
253///
254/// `resolve` and `ir_build` form the two lifecycle-phase hooks for
255/// content-substitution: `resolve` runs during the resolve phase and
256/// splices the returned wire node into the host AST (the canonical
257/// example is `lex.include`). `ir_build` runs while the host constructs
258/// its in-memory IR and produces a typed wire node consumed in IR-build
259/// position only — the canonical examples are `lex.tabular.table` and
260/// `lex.media.*`. Pair `ir_build` with `render` on the same schema to
261/// give one label both an IR shape and per-format serialization through
262/// one registration (the unified registry surface for #615).
263#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
264#[serde(deny_unknown_fields)]
265pub struct HookSet {
266    #[serde(default)]
267    pub label: bool,
268    #[serde(default)]
269    pub validate: bool,
270    #[serde(default)]
271    pub resolve: bool,
272    /// IR-build participation. When `true`, the host invokes
273    /// [`LexHandler::on_ir_build`](crate::handler::LexHandler::on_ir_build)
274    /// during IR construction (the verbatim/IR-hydration lifecycle).
275    /// Distinct from `resolve` (AST-substitution lifecycle) so a schema
276    /// can declare exactly the lifecycle phase it participates in.
277    #[serde(default)]
278    pub ir_build: bool,
279    #[serde(default)]
280    pub hover: bool,
281    #[serde(default)]
282    pub completion: bool,
283    #[serde(default)]
284    pub code_action: bool,
285    /// Render hooks declare which target formats they produce. An empty
286    /// vector means the label does not participate in rendering.
287    #[serde(default, skip_serializing_if = "Vec::is_empty")]
288    pub render: Vec<RenderHook>,
289}
290
291/// One render-format the label can produce.
292#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
293#[serde(transparent)]
294pub struct RenderHook(pub String);
295
296impl RenderHook {
297    pub fn new(format: impl Into<String>) -> Self {
298        Self(format.into())
299    }
300}
301
302/// Handler delivery info.
303#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
304#[serde(deny_unknown_fields)]
305pub struct HandlerSpec {
306    pub transport: HandlerTransport,
307    /// Argv for the subprocess transport. Variables in the form `${NAME}`
308    /// are expanded at spawn time. Ignored for native and WASM transports.
309    #[serde(default, skip_serializing_if = "Vec::is_empty")]
310    pub command: Vec<String>,
311    /// Per-request timeout. Defaults to 2000 ms in subprocess hosts.
312    #[serde(default, skip_serializing_if = "Option::is_none")]
313    pub timeout_ms: Option<u32>,
314}
315
316#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
317#[serde(rename_all = "lowercase")]
318#[non_exhaustive]
319pub enum HandlerTransport {
320    Native,
321    Subprocess,
322    Wasm,
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328
329    fn comment_schema() -> Schema {
330        let mut params = BTreeMap::new();
331        params.insert(
332            "role".into(),
333            ParamSpec {
334                ty: ParamType::Enum,
335                required: true,
336                default: None,
337                description: None,
338                pattern: None,
339                values: vec![
340                    EnumValue {
341                        name: "author".into(),
342                        description: None,
343                    },
344                    EnumValue {
345                        name: "editor".into(),
346                        description: None,
347                    },
348                ],
349            },
350        );
351        Schema {
352            schema_version: 1,
353            label: "acme.commenting".into(),
354            description: Some("A comment thread.".into()),
355            params,
356            attaches_to: vec!["paragraph".into(), "session".into()],
357            body: BodyShape {
358                kind: BodyKind::Lex,
359                presence: BodyPresence::Required,
360                description: None,
361            },
362            verbatim_label: false,
363            capabilities: Capabilities {
364                fs: false,
365                net: false,
366            },
367            hooks: HookSet {
368                validate: true,
369                hover: true,
370                render: vec![RenderHook::new("html"), RenderHook::new("markdown")],
371                ..HookSet::default()
372            },
373            handler: Some(HandlerSpec {
374                transport: HandlerTransport::Subprocess,
375                command: vec!["acme-comment-handler".into()],
376                timeout_ms: Some(2000),
377            }),
378            diagnostics: vec![
379                DiagnosticDecl {
380                    code: "unresolved-thread".into(),
381                    description: Some("A comment thread has no resolution.".into()),
382                    default_severity: DiagnosticSeverity::Warning,
383                },
384                DiagnosticDecl {
385                    code: "missing-author".into(),
386                    description: None,
387                    default_severity: DiagnosticSeverity::Error,
388                },
389            ],
390        }
391    }
392
393    #[test]
394    fn schema_round_trips_through_json() {
395        let s = comment_schema();
396        let serialised = serde_json::to_string(&s).unwrap();
397        let back: Schema = serde_json::from_str(&serialised).unwrap();
398        assert_eq!(back, s);
399    }
400
401    #[test]
402    fn capabilities_is_pure_for_zero_fs_zero_net() {
403        assert!(Capabilities::default().is_pure());
404        assert!(!Capabilities {
405            fs: true,
406            net: false
407        }
408        .is_pure());
409        assert!(!Capabilities {
410            fs: false,
411            net: true
412        }
413        .is_pure());
414    }
415
416    #[test]
417    fn hookset_default_is_all_off() {
418        let hs = HookSet::default();
419        assert!(!hs.validate);
420        assert!(!hs.resolve);
421        assert!(!hs.ir_build);
422        assert!(hs.render.is_empty());
423    }
424
425    /// `ir_build` is a new field added with #615 (unified registry
426    /// surface). Make sure it round-trips through JSON like every other
427    /// hook flag, and that the default-omitted form deserialises with
428    /// `ir_build = false` (back-compat for existing schemas authored
429    /// before the field existed).
430    #[test]
431    fn hookset_ir_build_round_trips_through_json() {
432        let hs = HookSet {
433            ir_build: true,
434            ..HookSet::default()
435        };
436        let serialised = serde_json::to_string(&hs).unwrap();
437        assert!(
438            serialised.contains("\"ir_build\":true"),
439            "ir_build must serialise: {serialised}"
440        );
441        let back: HookSet = serde_json::from_str(&serialised).unwrap();
442        assert!(back.ir_build);
443
444        // Older schema JSON without the field deserialises to false —
445        // the back-compat contract.
446        let legacy = r#"{"label":false,"validate":false,"resolve":false,"hover":false,"completion":false,"code_action":false}"#;
447        let parsed: HookSet = serde_json::from_str(legacy).unwrap();
448        assert!(
449            !parsed.ir_build,
450            "legacy JSON must default ir_build to false"
451        );
452    }
453
454    #[test]
455    fn body_shape_default_is_none_optional() {
456        let bs = BodyShape::default();
457        assert_eq!(bs.kind, BodyKind::None);
458        assert_eq!(bs.presence, BodyPresence::Optional);
459    }
460
461    #[test]
462    fn schema_without_diagnostics_field_loads_empty() {
463        // Schemas that don't declare diagnostics still load — the field
464        // defaults to an empty vec, not an error.
465        let s: Schema =
466            serde_json::from_str(r#"{"schema_version": 1, "label": "acme.task"}"#).unwrap();
467        assert!(s.diagnostics.is_empty());
468    }
469
470    #[test]
471    fn diagnostic_decl_default_severity_is_warning() {
472        // `default_severity` is optional; omitting it yields `warning`,
473        // matching the doc contract.
474        let s: Schema = serde_json::from_str(
475            r#"{"schema_version": 1, "label": "acme.task",
476                "diagnostics": [{"code": "due-date-missing"}]}"#,
477        )
478        .unwrap();
479        assert_eq!(s.diagnostics.len(), 1);
480        assert_eq!(s.diagnostics[0].code, "due-date-missing");
481        assert_eq!(s.diagnostics[0].description, None);
482        assert_eq!(
483            s.diagnostics[0].default_severity,
484            DiagnosticSeverity::Warning
485        );
486    }
487
488    #[test]
489    fn diagnostic_decl_explicit_severity_parses() {
490        let s: Schema = serde_json::from_str(
491            r#"{"schema_version": 1, "label": "acme.task",
492                "diagnostics": [{"code": "due-date-missing",
493                                 "description": "Task lacks a due date.",
494                                 "default_severity": "error"}]}"#,
495        )
496        .unwrap();
497        assert_eq!(s.diagnostics[0].default_severity, DiagnosticSeverity::Error);
498        assert_eq!(
499            s.diagnostics[0].description.as_deref(),
500            Some("Task lacks a due date.")
501        );
502    }
503
504    #[test]
505    fn diagnostic_decl_rejects_unknown_field() {
506        assert!(serde_json::from_str::<Schema>(
507            r#"{"schema_version": 1, "label": "acme.task",
508                "diagnostics": [{"code": "due-date-missing", "severty": "warn"}]}"#,
509        )
510        .is_err());
511    }
512
513    #[test]
514    fn diagnostic_decl_rejects_unknown_severity_value() {
515        // Strict, unlike the permissive wire deserializer: a typo'd
516        // severity (`warn` instead of `warning`) is a schema error, not
517        // a silent downgrade to `info`.
518        for bad in [r#""warn""#, r#""erorr""#, r#""fatal""#] {
519            let src = format!(
520                r#"{{"schema_version": 1, "label": "acme.task",
521                    "diagnostics": [{{"code": "x", "default_severity": {bad}}}]}}"#
522            );
523            assert!(
524                serde_json::from_str::<Schema>(&src).is_err(),
525                "expected `{bad}` to be rejected"
526            );
527        }
528    }
529}