sheetport_spec/
manifest.rs

1use std::collections::BTreeMap;
2use std::fmt;
3
4use regex::Regex;
5use schemars::json_schema;
6use schemars::{JsonSchema, SchemaGenerator};
7use semver::Version;
8use serde::de::{self, Deserializer, Visitor};
9use serde::ser::Serializer;
10use serde::{Deserialize, Serialize};
11use serde_json::Value as JsonValue;
12use std::borrow::Cow;
13
14use crate::validation::{ManifestIssue, ValidationError};
15
16/// Current supported FIO specification version.
17pub const CURRENT_SPEC_VERSION: &str = "0.3.0";
18/// Constant identifier for this spec.
19pub const SPEC_IDENT: &str = "fio";
20
21/// Conformance profile advertised by a manifest.
22///
23/// Profiles gate optional/forward-looking features so runtimes can safely reject
24/// manifests that use selectors they don't support.
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Default)]
26#[serde(rename_all = "kebab-case")]
27pub enum Profile {
28    /// Core profile: A1, named range, and layout selectors only.
29    #[default]
30    CoreV0,
31    /// Full profile (reserved): enables structured refs and workbook table selectors.
32    FullV0,
33}
34
35/// Optional capabilities block for feature gating.
36#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
37#[serde(deny_unknown_fields)]
38pub struct Capabilities {
39    #[serde(default)]
40    pub profile: Profile,
41    #[serde(default)]
42    pub features: Option<Vec<String>>,
43}
44
45impl Default for Capabilities {
46    fn default() -> Self {
47        Self {
48            profile: Profile::CoreV0,
49            features: None,
50        }
51    }
52}
53
54/// Canonical manifest representation.
55#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
56#[schemars(
57    title = "Formualizer I/O Manifest (SheetPort)",
58    description = "Specification that binds typed input/output ports to a spreadsheet so it can be treated as a pure function.",
59    example = crate::manifest::example_data::supply_planning_example()
60)]
61#[serde(deny_unknown_fields)]
62pub struct Manifest {
63    /// Identifier for this specification (must be `fio`).
64    pub spec: String,
65    #[serde(rename = "spec_version")]
66    pub spec_version: SpecVersion,
67    #[serde(default)]
68    /// Optional conformance capabilities for this manifest.
69    pub capabilities: Option<Capabilities>,
70    /// Human-facing metadata describing the manifest.
71    pub manifest: ManifestMeta,
72    /// Ordered list of typed ports.
73    pub ports: Vec<Port>,
74}
75
76impl Manifest {
77    /// Construct a manifest by reading YAML from any reader.
78    pub fn from_yaml_reader<R: std::io::Read>(reader: R) -> Result<Self, serde_yaml::Error> {
79        serde_yaml::from_reader(reader)
80    }
81
82    /// Construct a manifest from a YAML string slice.
83    pub fn from_yaml_str(yaml: &str) -> Result<Self, serde_yaml::Error> {
84        serde_yaml::from_str(yaml)
85    }
86
87    /// Serialize this manifest to YAML.
88    pub fn to_yaml(&self) -> Result<String, serde_yaml::Error> {
89        serde_yaml::to_string(self)
90    }
91
92    /// Normalize the manifest in-place for deterministic comparison.
93    ///
94    /// - Ports are sorted lexicographically by id.
95    /// - Tags (if any) are sorted and deduplicated.
96    /// - Table keys (if any) are sorted and deduplicated.
97    /// - Enumerated constraint values are sorted and deduplicated.
98    pub fn normalize(&mut self) {
99        if let Some(tags) = &mut self.manifest.tags {
100            tags.sort();
101            tags.dedup();
102        }
103
104        if let Some(capabilities) = &mut self.capabilities
105            && let Some(features) = &mut capabilities.features
106        {
107            features.sort();
108            features.dedup();
109        }
110
111        self.ports.sort_by(|a, b| a.id.cmp(&b.id));
112
113        for port in &mut self.ports {
114            if let Some(constraints) = &mut port.constraints {
115                canonicalize_enum(&mut constraints.r#enum);
116            }
117
118            match &mut port.schema {
119                Schema::Record(record) => {
120                    for field in record.fields.values_mut() {
121                        if let Some(constraints) = &mut field.constraints {
122                            canonicalize_enum(&mut constraints.r#enum);
123                        }
124                    }
125                }
126                Schema::Table(table) => {
127                    if let Some(keys) = &mut table.keys {
128                        keys.sort();
129                        keys.dedup();
130                    }
131                }
132                _ => {}
133            }
134        }
135    }
136
137    /// Return a normalized copy of the manifest.
138    pub fn normalized(mut self) -> Self {
139        self.normalize();
140        self
141    }
142
143    /// Return the effective conformance profile for this manifest.
144    ///
145    /// When capabilities are omitted, the manifest is treated as `core-v0`.
146    pub fn effective_profile(&self) -> Profile {
147        self.capabilities
148            .as_ref()
149            .map(|c| c.profile)
150            .unwrap_or_default()
151    }
152
153    /// Validate the manifest and return granular issues when invariants fail.
154    pub fn validate(&self) -> Result<(), ValidationError> {
155        let mut issues = Vec::new();
156
157        if self.spec != SPEC_IDENT {
158            issues.push(ManifestIssue::new(
159                "spec",
160                format!(
161                    "expected spec identifier `{}`, found `{}`",
162                    SPEC_IDENT, self.spec
163                ),
164            ));
165        }
166
167        let current_version = Version::parse(CURRENT_SPEC_VERSION)
168            .expect("CURRENT_SPEC_VERSION must be valid semver");
169        let spec_version = &self.spec_version.0;
170        if spec_version.major != current_version.major {
171            issues.push(ManifestIssue::new(
172                "spec_version",
173                format!(
174                    "incompatible major version `{}` (expected `{}`)",
175                    spec_version, current_version.major
176                ),
177            ));
178        }
179
180        let id_pattern = Regex::new(r"^[a-z0-9][a-z0-9-]{1,62}[a-z0-9]$")
181            .expect("manifest id regex must compile");
182        if !id_pattern.is_match(&self.manifest.id) {
183            issues.push(ManifestIssue::new(
184                "manifest.id",
185                "id must be lowercase alphanumeric with hyphens, 3-64 chars".to_string(),
186            ));
187        }
188
189        let mut seen_ids = std::collections::HashSet::new();
190        let port_id_pattern =
191            Regex::new(r"^[a-z0-9]+([_-][a-z0-9]+)*$").expect("port id regex must compile");
192
193        let profile = self.effective_profile();
194
195        for (idx, port) in self.ports.iter().enumerate() {
196            let path = format!("ports[{}].id", idx);
197            if !port_id_pattern.is_match(&port.id) {
198                issues.push(ManifestIssue::new(
199                    &path,
200                    "port id must contain lowercase alphanumeric characters optionally separated by '-' or '_'"
201                        .to_string(),
202                ));
203            }
204            if !seen_ids.insert(&port.id) {
205                issues.push(ManifestIssue::new(
206                    &path,
207                    format!("duplicate port id `{}`", port.id),
208                ));
209            }
210
211            validate_port_selector(profile, port, idx, &mut issues);
212
213            if port.dir == Direction::Out && port.default.is_some() {
214                issues.push(ManifestIssue::new(
215                    format!("ports[{}].default", idx),
216                    "defaults may only be defined on `in` ports".to_string(),
217                ));
218            }
219
220            if let Selector::Layout(layout) = &port.location
221                && matches!(layout.layout.terminate, LayoutTermination::UntilMarker)
222                && layout
223                    .layout
224                    .marker_text
225                    .as_deref()
226                    .map(str::trim)
227                    .unwrap_or_default()
228                    .is_empty()
229            {
230                issues.push(ManifestIssue::new(
231                    format!("ports[{}].location.layout.marker_text", idx),
232                    "marker_text must be provided when terminate == \"until_marker\"".to_string(),
233                ));
234            }
235
236            if let Some(constraints) = &port.constraints {
237                let value_type = match &port.schema {
238                    Schema::Scalar(schema) => Some(schema.value_type),
239                    Schema::Range(schema) => Some(schema.cell_type),
240                    _ => None,
241                };
242                validate_constraints(
243                    constraints,
244                    value_type,
245                    format!("ports[{}].constraints", idx),
246                    &mut issues,
247                );
248            }
249
250            if port.shape == Shape::Record {
251                if let Schema::Record(record) = &port.schema {
252                    if record.fields.is_empty() {
253                        issues.push(ManifestIssue::new(
254                            format!("ports[{}].schema.fields", idx),
255                            "record schema must define at least one field".to_string(),
256                        ));
257                    }
258                } else {
259                    issues.push(ManifestIssue::new(
260                        format!("ports[{}].schema", idx),
261                        "record shape must use a record schema".to_string(),
262                    ));
263                }
264            }
265
266            if port.shape == Shape::Table {
267                if let Schema::Table(table) = &port.schema {
268                    if table.columns.is_empty() {
269                        issues.push(ManifestIssue::new(
270                            format!("ports[{}].schema.columns", idx),
271                            "table schema must define at least one column".to_string(),
272                        ));
273                    }
274                    if let Some(keys) = &table.keys {
275                        for key in keys {
276                            if !table.columns.iter().any(|c| &c.name == key) {
277                                issues.push(ManifestIssue::new(
278                                    format!("ports[{}].schema.keys", idx),
279                                    format!(
280                                        "key `{}` not found among table columns ({:?})",
281                                        key,
282                                        table
283                                            .columns
284                                            .iter()
285                                            .map(|c| c.name.clone())
286                                            .collect::<Vec<_>>()
287                                    ),
288                                ));
289                            }
290                        }
291                    }
292                } else {
293                    issues.push(ManifestIssue::new(
294                        format!("ports[{}].schema", idx),
295                        "table shape must use a table schema".to_string(),
296                    ));
297                }
298            }
299
300            if let Schema::Record(record) = &port.schema {
301                for (field_name, field) in &record.fields {
302                    if profile == Profile::CoreV0
303                        && matches!(field.location, FieldSelector::StructRef(_))
304                    {
305                        issues.push(ManifestIssue::new(
306                            format!("ports[{}].schema.fields.{}.location", idx, field_name),
307                            format!(
308                                "structured references are not permitted under profile `{}`",
309                                profile_label(profile)
310                            ),
311                        ));
312                    }
313
314                    if let Some(constraints) = &field.constraints {
315                        validate_constraints(
316                            constraints,
317                            Some(field.value_type),
318                            format!("ports[{}].schema.fields.{}.constraints", idx, field_name),
319                            &mut issues,
320                        );
321                    }
322                }
323            }
324        }
325
326        if issues.is_empty() {
327            Ok(())
328        } else {
329            Err(ValidationError::new(issues))
330        }
331    }
332}
333
334/// Manifest metadata block.
335#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
336#[serde(deny_unknown_fields)]
337pub struct ManifestMeta {
338    /// Stable identifier for the manifest (lowercase alphanumeric + hyphen).
339    pub id: String,
340    /// Human readable manifest name.
341    pub name: String,
342    #[serde(default)]
343    pub description: Option<String>,
344    #[serde(default)]
345    pub tags: Option<Vec<String>>,
346    #[serde(default)]
347    pub workbook: Option<WorkbookMeta>,
348    #[serde(default)]
349    pub metadata: Option<BTreeMap<String, JsonValue>>,
350}
351
352/// Optional workbook descriptors. These fields are advisory hints for runtimes and may be ignored unless a runtime explicitly documents support.
353#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
354#[serde(deny_unknown_fields)]
355pub struct WorkbookMeta {
356    #[serde(default)]
357    /// Optional workbook URI (sharepoint://, file://, etc.).
358    pub uri: Option<String>,
359    #[serde(default)]
360    /// Locale hint for parsing numbers/dates.
361    pub locale: Option<String>,
362    #[serde(default)]
363    /// Expected Excel date system (1900 or 1904).
364    pub date_system: Option<i32>,
365    #[serde(default)]
366    /// Time zone identifier for datetime interpretation.
367    pub timezone: Option<String>,
368}
369
370/// Port direction.
371#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
372#[serde(rename_all = "lowercase")]
373pub enum Direction {
374    /// Input port (values provided to the workbook).
375    In,
376    /// Output port (values read from the workbook).
377    Out,
378}
379
380/// Port shape.
381#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
382#[serde(rename_all = "lowercase")]
383pub enum Shape {
384    /// Scalar value (single cell).
385    Scalar,
386    /// Record of named scalar fields.
387    Record,
388    /// Rectangular range with uniform type.
389    Range,
390    /// Table with named columns.
391    Table,
392}
393
394/// Port definition.
395#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
396#[serde(deny_unknown_fields)]
397pub struct Port {
398    /// Unique identifier for the port within the manifest.
399    pub id: String,
400    /// Direction (input or output).
401    pub dir: Direction,
402    /// Structural shape of the port.
403    pub shape: Shape,
404    #[serde(default)]
405    /// Optional documentation.
406    pub description: Option<String>,
407    #[serde(default = "default_true")]
408    /// Whether the port value is required (defaults to true).
409    pub required: bool,
410    /// Selector binding the port to a workbook region.
411    pub location: Selector,
412    /// Type information for values carried by the port.
413    pub schema: Schema,
414    #[serde(default)]
415    /// Optional constraints applied to values.
416    pub constraints: Option<Constraints>,
417    #[serde(default)]
418    /// Optional units metadata.
419    pub units: Option<Units>,
420    #[serde(default)]
421    /// Optional default value (inputs only).
422    pub default: Option<JsonValue>,
423    #[serde(default)]
424    /// Reserved hint for future partitioning/sharding semantics. No effect in `core-v0` runtimes.
425    pub partition_key: Option<bool>,
426}
427
428fn default_true() -> bool {
429    true
430}
431
432/// Selector union.
433#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
434#[serde(untagged)]
435pub enum Selector {
436    A1(SelectorA1),
437    Name(SelectorName),
438    Table(SelectorTable),
439    StructRef(SelectorStructRef),
440    Layout(SelectorLayout),
441}
442
443#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
444#[serde(deny_unknown_fields)]
445pub struct SelectorA1 {
446    /// Absolute A1-style reference to a cell or range (e.g., `Sheet1!A1:C10`).
447    pub a1: String,
448}
449
450#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
451#[serde(deny_unknown_fields)]
452pub struct SelectorName {
453    /// Workbook-defined name (global or sheet scoped).
454    pub name: String,
455}
456
457#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
458#[serde(deny_unknown_fields)]
459pub struct SelectorStructRef {
460    /// Excel structured reference syntax (e.g., `TblOrders[Qty]`). Reserved in `core-v0`; requires `full-v0` profile.
461    pub struct_ref: String,
462}
463
464#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
465#[serde(deny_unknown_fields)]
466pub struct SelectorTable {
467    /// Workbook table selector (by Excel table name). Reserved in `core-v0`; requires `full-v0` profile.
468    pub table: TableSelector,
469}
470
471/// Selector for an Excel table. Reserved in `core-v0`; requires `full-v0` profile.
472#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
473#[serde(deny_unknown_fields)]
474pub struct TableSelector {
475    /// Excel table name.
476    pub name: String,
477    #[serde(default)]
478    /// Optional target area within the table.
479    pub area: Option<TableArea>,
480}
481
482#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema)]
483#[serde(rename_all = "lowercase")]
484pub enum TableArea {
485    /// Header row of the table.
486    Header,
487    /// Table body rows (default).
488    Body,
489    /// Totals row.
490    Totals,
491}
492
493#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
494#[serde(deny_unknown_fields)]
495pub struct SelectorLayout {
496    /// Declarative layout descriptor for header-based regions.
497    pub layout: LayoutDescriptor,
498}
499
500/// Layout resolution behavior for layout selectors.
501#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Default)]
502#[serde(rename_all = "snake_case")]
503pub enum LayoutKind {
504    /// Header-driven layout using contiguous columns starting at `anchor_col`.
505    #[default]
506    HeaderContiguousV1,
507}
508
509#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
510#[serde(deny_unknown_fields)]
511pub struct LayoutDescriptor {
512    #[serde(default)]
513    /// Layout resolution behavior (defaults to `header_contiguous_v1`).
514    pub kind: LayoutKind,
515    /// Sheet containing the layout.
516    pub sheet: String,
517    /// 1-based index of the header row.
518    pub header_row: u32,
519    /// Column letter where the layout begins.
520    pub anchor_col: String,
521    /// Termination rule for the layout.
522    pub terminate: LayoutTermination,
523    #[serde(default)]
524    /// Marker text required when `terminate` equals `until_marker`.
525    pub marker_text: Option<String>,
526}
527
528#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
529#[serde(rename_all = "snake_case")]
530pub enum LayoutTermination {
531    FirstBlankRow,
532    SheetEnd,
533    UntilMarker,
534}
535
536/// Schema union.
537#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
538#[serde(untagged)]
539pub enum Schema {
540    Scalar(ScalarSchema),
541    Record(RecordSchema),
542    Range(RangeSchema),
543    Table(TableSchema),
544}
545
546#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
547#[serde(deny_unknown_fields)]
548pub struct ScalarSchema {
549    #[serde(rename = "type")]
550    /// Scalar value type.
551    pub value_type: ValueType,
552    #[serde(default)]
553    /// Optional format hint.
554    pub format: Option<String>,
555}
556
557#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
558#[serde(deny_unknown_fields)]
559pub struct RecordSchema {
560    #[serde(default)]
561    pub kind: RecordKind,
562    /// Mapping of field names to scalar schema definitions.
563    pub fields: BTreeMap<String, RecordField>,
564}
565
566#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, Default)]
567#[serde(rename_all = "lowercase")]
568pub enum RecordKind {
569    #[default]
570    Record,
571}
572
573#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
574#[serde(deny_unknown_fields)]
575pub struct RecordField {
576    #[serde(rename = "type")]
577    /// Scalar value type for the field.
578    pub value_type: ValueType,
579    /// Selector resolving to the field cell.
580    pub location: FieldSelector,
581    #[serde(default)]
582    pub constraints: Option<Constraints>,
583    #[serde(default)]
584    pub units: Option<Units>,
585}
586
587#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
588#[serde(untagged)]
589pub enum FieldSelector {
590    A1(SelectorA1),
591    Name(SelectorName),
592    StructRef(SelectorStructRef),
593}
594
595#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
596#[serde(deny_unknown_fields)]
597pub struct RangeSchema {
598    #[serde(default)]
599    pub kind: RangeKind,
600    /// Value type enforced for each cell.
601    pub cell_type: ValueType,
602    #[serde(default)]
603    pub format: Option<String>,
604}
605
606#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, Default)]
607#[serde(rename_all = "lowercase")]
608pub enum RangeKind {
609    #[default]
610    Range,
611}
612
613#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
614#[serde(deny_unknown_fields)]
615pub struct TableSchema {
616    #[serde(default)]
617    pub kind: TableKind,
618    /// Column definitions.
619    pub columns: Vec<TableColumn>,
620    #[serde(default)]
621    /// Optional list of column names forming a logical primary key.
622    pub keys: Option<Vec<String>>,
623}
624
625#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, Default)]
626#[serde(rename_all = "lowercase")]
627pub enum TableKind {
628    #[default]
629    Table,
630}
631
632#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
633#[serde(deny_unknown_fields)]
634pub struct TableColumn {
635    /// Column name exposed to clients.
636    pub name: String,
637    #[serde(rename = "type")]
638    /// Scalar type for column cells.
639    pub value_type: ValueType,
640    #[serde(default)]
641    /// Optional column letter hint when using layout selectors.
642    pub col: Option<String>,
643    #[serde(default)]
644    pub format: Option<String>,
645    #[serde(default)]
646    pub units: Option<Units>,
647}
648
649/// Scalar value types.
650#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema)]
651#[serde(rename_all = "lowercase")]
652pub enum ValueType {
653    String,
654    Number,
655    Integer,
656    Boolean,
657    Date,
658    Datetime,
659}
660
661/// Constraints applied to a port or field.
662#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
663#[serde(deny_unknown_fields)]
664pub struct Constraints {
665    #[serde(default)]
666    /// Minimum allowed numeric value.
667    pub min: Option<f64>,
668    #[serde(default)]
669    /// Maximum allowed numeric value.
670    pub max: Option<f64>,
671    #[serde(default)]
672    /// Enumerated set of allowed categorical values. Entries are compared by exact JSON equality after type checking; numeric values are not normalized (e.g., 5 != 5.0).
673    pub r#enum: Option<Vec<JsonValue>>,
674    #[serde(default)]
675    /// Regular expression pattern string.
676    pub pattern: Option<String>,
677    #[serde(default)]
678    /// Whether null/blank values are permitted.
679    pub nullable: Option<bool>,
680}
681
682/// Units metadata (extensible).
683#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
684#[serde(deny_unknown_fields)]
685pub struct Units {
686    #[serde(default)]
687    /// Currency code (ISO 4217).
688    pub currency: Option<String>,
689}
690
691/// Wrapper around semver::Version for serde compatibility.
692#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
693pub struct SpecVersion(pub Version);
694
695impl SpecVersion {
696    pub fn new(version: Version) -> Self {
697        Self(version)
698    }
699}
700
701impl Serialize for SpecVersion {
702    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
703    where
704        S: Serializer,
705    {
706        serializer.serialize_str(&self.0.to_string())
707    }
708}
709
710impl<'de> Deserialize<'de> for SpecVersion {
711    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
712    where
713        D: Deserializer<'de>,
714    {
715        struct VersionVisitor;
716
717        impl<'de> Visitor<'de> for VersionVisitor {
718            type Value = SpecVersion;
719
720            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
721                formatter.write_str("semantic version string (e.g. 0.3.0)")
722            }
723
724            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
725            where
726                E: de::Error,
727            {
728                Version::parse(v)
729                    .map(SpecVersion)
730                    .map_err(|err| de::Error::custom(format!("invalid spec_version: {err}")))
731            }
732        }
733
734        deserializer.deserialize_str(VersionVisitor)
735    }
736}
737
738impl JsonSchema for SpecVersion {
739    fn schema_name() -> Cow<'static, str> {
740        Cow::Borrowed("SpecVersion")
741    }
742
743    fn json_schema(_gen: &mut SchemaGenerator) -> schemars::Schema {
744        json_schema!({
745            "type": "string",
746            "pattern": r"^[0-9]+\.[0-9]+\.[0-9]+(?:-[0-9A-Za-z-.]+)?(?:\+[0-9A-Za-z-.]+)?$"
747        })
748    }
749}
750
751impl std::str::FromStr for Manifest {
752    type Err = serde_yaml::Error;
753
754    fn from_str(s: &str) -> Result<Self, Self::Err> {
755        Manifest::from_yaml_str(s)
756    }
757}
758
759fn profile_label(profile: Profile) -> &'static str {
760    match profile {
761        Profile::CoreV0 => "core-v0",
762        Profile::FullV0 => "full-v0",
763    }
764}
765
766fn profile_allows_struct_ref(profile: Profile) -> bool {
767    matches!(profile, Profile::FullV0)
768}
769
770fn profile_allows_table(profile: Profile) -> bool {
771    matches!(profile, Profile::FullV0)
772}
773
774fn validate_port_selector(
775    profile: Profile,
776    port: &Port,
777    idx: usize,
778    issues: &mut Vec<ManifestIssue>,
779) {
780    let path = format!("ports[{}].location", idx);
781    match port.shape {
782        Shape::Scalar => match &port.location {
783            Selector::A1(_) | Selector::Name(_) => {}
784            Selector::StructRef(_) if profile_allows_struct_ref(profile) => {}
785            Selector::StructRef(_) => issues.push(ManifestIssue::new(
786                &path,
787                format!(
788                    "structured references are not permitted under profile `{}`",
789                    profile_label(profile)
790                ),
791            )),
792            Selector::Layout(_) | Selector::Table(_) => issues.push(ManifestIssue::new(
793                &path,
794                "scalar ports may only use `a1`, `name`, or `struct_ref` selectors".to_string(),
795            )),
796        },
797        Shape::Record | Shape::Range => match &port.location {
798            Selector::A1(_) | Selector::Name(_) | Selector::Layout(_) => {}
799            Selector::StructRef(_) if profile_allows_struct_ref(profile) => {}
800            Selector::StructRef(_) => issues.push(ManifestIssue::new(
801                &path,
802                format!(
803                    "structured references are not permitted under profile `{}`",
804                    profile_label(profile)
805                ),
806            )),
807            Selector::Table(_) => issues.push(ManifestIssue::new(
808                &path,
809                "record/range ports may not use `table` selectors".to_string(),
810            )),
811        },
812        Shape::Table => match &port.location {
813            Selector::Layout(_) => {}
814            Selector::Table(_) if profile_allows_table(profile) => {}
815            Selector::Table(_) => issues.push(ManifestIssue::new(
816                &path,
817                format!(
818                    "`table` selectors are reserved and not permitted under profile `{}`",
819                    profile_label(profile)
820                ),
821            )),
822            Selector::A1(_) | Selector::Name(_) | Selector::StructRef(_) => {
823                issues.push(ManifestIssue::new(
824                    &path,
825                    "table ports must use `layout` selectors (or `table` selectors under full-v0)"
826                        .to_string(),
827                ))
828            }
829        },
830    }
831}
832
833fn canonicalize_enum(values: &mut Option<Vec<JsonValue>>) {
834    if let Some(list) = values {
835        list.sort_by_key(value_sort_key);
836        list.dedup();
837    }
838}
839
840fn value_sort_key(value: &JsonValue) -> String {
841    serde_json::to_string(value).unwrap_or_default()
842}
843
844fn validate_constraints(
845    constraints: &Constraints,
846    value_type: Option<ValueType>,
847    base_path: String,
848    issues: &mut Vec<ManifestIssue>,
849) {
850    if let (Some(min), Some(max)) = (constraints.min, constraints.max)
851        && min > max
852    {
853        issues.push(ManifestIssue::new(
854            format!("{}.min", base_path),
855            format!("`min` value {min} exceeds `max` value {max}"),
856        ));
857    }
858
859    if let Some(vt) = value_type {
860        if constraints.min.is_some() && !is_numeric_type(vt) {
861            issues.push(ManifestIssue::new(
862                format!("{}.min", base_path),
863                format!("`min` constraint requires numeric type, found `{vt:?}`"),
864            ));
865        }
866        if constraints.max.is_some() && !is_numeric_type(vt) {
867            issues.push(ManifestIssue::new(
868                format!("{}.max", base_path),
869                format!("`max` constraint requires numeric type, found `{vt:?}`"),
870            ));
871        }
872    }
873
874    if let Some(enum_values) = &constraints.r#enum {
875        if enum_values.is_empty() {
876            issues.push(ManifestIssue::new(
877                format!("{}.enum", base_path),
878                "enumerated values must contain at least one entry".to_string(),
879            ));
880        } else if let Some(vt) = value_type {
881            for (i, candidate) in enum_values.iter().enumerate() {
882                if let Err(message) = validate_enum_candidate(vt, candidate) {
883                    issues.push(ManifestIssue::new(
884                        format!("{}.enum[{}]", base_path, i),
885                        message,
886                    ));
887                }
888            }
889        }
890    }
891
892    if let Some(pattern) = &constraints.pattern
893        && let Err(err) = Regex::new(pattern)
894    {
895        issues.push(ManifestIssue::new(
896            format!("{}.pattern", base_path),
897            format!("invalid regex pattern `{pattern}`: {err}"),
898        ));
899    }
900}
901
902fn is_numeric_type(vt: ValueType) -> bool {
903    matches!(vt, ValueType::Number | ValueType::Integer)
904}
905
906fn validate_enum_candidate(vt: ValueType, candidate: &JsonValue) -> Result<(), String> {
907    use serde_json::Value as J;
908    match vt {
909        ValueType::String => match candidate {
910            J::String(_) => Ok(()),
911            other => Err(format!(
912                "enum value `{}` is not a string",
913                value_sort_key(other)
914            )),
915        },
916        ValueType::Boolean => match candidate {
917            J::Bool(_) => Ok(()),
918            other => Err(format!(
919                "enum value `{}` is not a boolean",
920                value_sort_key(other)
921            )),
922        },
923        ValueType::Number => match candidate {
924            J::Number(n) if n.as_f64().is_some() => Ok(()),
925            other => Err(format!(
926                "enum value `{}` is not numeric",
927                value_sort_key(other)
928            )),
929        },
930        ValueType::Integer => match candidate {
931            J::Number(n) => {
932                if n.as_i64().is_some() {
933                    Ok(())
934                } else if let Some(f) = n.as_f64() {
935                    if (f - f.trunc()).abs() < f64::EPSILON {
936                        Ok(())
937                    } else {
938                        Err(format!(
939                            "enum value `{}` is not an integer",
940                            value_sort_key(candidate)
941                        ))
942                    }
943                } else {
944                    Err(format!(
945                        "enum value `{}` is not numeric",
946                        value_sort_key(candidate)
947                    ))
948                }
949            }
950            other => Err(format!(
951                "enum value `{}` is not numeric",
952                value_sort_key(other)
953            )),
954        },
955        ValueType::Date => match candidate {
956            J::String(s) if parse_date_string(s) => Ok(()),
957            other => Err(format!(
958                "enum value `{}` is not a valid date",
959                value_sort_key(other)
960            )),
961        },
962        ValueType::Datetime => match candidate {
963            J::String(s) if parse_datetime_string(s) => Ok(()),
964            other => Err(format!(
965                "enum value `{}` is not a valid datetime",
966                value_sort_key(other)
967            )),
968        },
969    }
970}
971
972fn parse_date_string(raw: &str) -> bool {
973    chrono::NaiveDate::parse_from_str(raw, "%Y-%m-%d").is_ok()
974}
975
976fn parse_datetime_string(raw: &str) -> bool {
977    chrono::DateTime::parse_from_rfc3339(raw).is_ok()
978        || chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%d %H:%M:%S").is_ok()
979        || chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S").is_ok()
980}
981
982pub(crate) mod example_data {
983    use super::*;
984
985    pub fn supply_planning_example() -> Manifest {
986        serde_json::from_value(serde_json::json!({
987            "spec": SPEC_IDENT,
988            "spec_version": CURRENT_SPEC_VERSION,
989            "capabilities": { "profile": "core-v0" },
990            "manifest": {
991                "id": "supply-planning-io",
992                "name": "Supply Planning I/O",
993                "description": "Expose the workbook as a function that ingests inventory data and produces restock recommendations.",
994                "workbook": {
995                    "uri": "file://Samples/SupplyPlan.xlsx",
996                    "locale": "en-US",
997                    "date_system": 1900
998                }
999            },
1000            "ports": [
1001                {
1002                    "id": "warehouse_code",
1003                    "dir": "in",
1004                    "shape": "scalar",
1005                    "description": "Warehouse identifier used for restock planning.",
1006                    "location": { "a1": "Inputs!B2" },
1007                    "schema": { "type": "string" },
1008                    "constraints": { "pattern": "^[A-Z]{2}-\\d{3}$" }
1009                },
1010                {
1011                    "id": "planning_window",
1012                    "dir": "in",
1013                    "shape": "record",
1014                    "description": "Planning horizon (month and year).",
1015                    "location": { "a1": "Inputs!B1:C1" },
1016                    "schema": {
1017                        "kind": "record",
1018                        "fields": {
1019                            "month": {
1020                                "type": "integer",
1021                                "location": { "a1": "Inputs!B1" },
1022                                "constraints": { "min": 1, "max": 12 }
1023                            },
1024                            "year": {
1025                                "type": "integer",
1026                                "location": { "a1": "Inputs!C1" }
1027                            }
1028                        }
1029                    }
1030                },
1031                {
1032                    "id": "sku_inventory",
1033                    "dir": "in",
1034                    "shape": "table",
1035                    "description": "Current inventory snapshot by SKU.",
1036                    "location": {
1037                        "layout": {
1038                            "sheet": "Inventory",
1039                            "header_row": 1,
1040                            "anchor_col": "A",
1041                            "terminate": "first_blank_row"
1042                        }
1043                    },
1044                    "schema": {
1045                        "kind": "table",
1046                        "columns": [
1047                            { "name": "sku", "type": "string", "col": "A" },
1048                            { "name": "description", "type": "string", "col": "B" },
1049                            { "name": "on_hand", "type": "integer", "col": "C" },
1050                            { "name": "safety_stock", "type": "integer", "col": "D" },
1051                            { "name": "lead_time_days", "type": "integer", "col": "E" }
1052                        ],
1053                        "keys": ["sku"]
1054                    }
1055                },
1056                {
1057                    "id": "restock_summary",
1058                    "dir": "out",
1059                    "shape": "record",
1060                    "description": "High-level metrics summarizing the recommended restock.",
1061                    "location": { "a1": "Outputs!B2:B6" },
1062                    "schema": {
1063                        "kind": "record",
1064                        "fields": {
1065                            "total_skus": { "type": "integer", "location": { "a1": "Outputs!B2" } },
1066                            "units_to_order": { "type": "integer", "location": { "a1": "Outputs!B3" } },
1067                            "estimated_cost": { "type": "number", "location": { "a1": "Outputs!B4" }, "units": { "currency": "USD" } },
1068                            "next_restock_date": { "type": "date", "location": { "a1": "Outputs!B5" } }
1069                        }
1070                    }
1071                }
1072            ]
1073        }))
1074        .expect("example manifest should be valid")
1075    }
1076}