Skip to main content

cortex_runtime/compiler/
models.rs

1//! Core data types for the Web Compiler's compiled schema output.
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5
6/// A fully compiled schema for a single website domain.
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct CompiledSchema {
9    /// The domain this schema was compiled from.
10    pub domain: String,
11    /// When this schema was compiled.
12    pub compiled_at: DateTime<Utc>,
13    /// Discovered typed data models (Product, Article, etc.).
14    pub models: Vec<DataModel>,
15    /// Compiled HTTP actions (search, add_to_cart, etc.).
16    pub actions: Vec<CompiledAction>,
17    /// Relationships between models (belongs_to, has_many, etc.).
18    pub relationships: Vec<ModelRelationship>,
19    /// Compilation statistics.
20    pub stats: SchemaStats,
21}
22
23/// A typed data model inferred from structured data on the site.
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct DataModel {
26    /// Human-readable name: "Product", "Article", etc.
27    pub name: String,
28    /// The Schema.org `@type` value this model corresponds to.
29    pub schema_org_type: String,
30    /// Fields discovered across all instances.
31    pub fields: Vec<ModelField>,
32    /// How many nodes matched this model type.
33    pub instance_count: usize,
34    /// Example URLs of this type (first 5).
35    pub example_urls: Vec<String>,
36    /// Search action for this model, if one exists.
37    pub search_action: Option<CompiledAction>,
38    /// Listing page URL for this type, if discovered.
39    pub list_url: Option<String>,
40}
41
42/// A single field within a data model.
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct ModelField {
45    /// Field name: "price", "rating", "name", etc.
46    pub name: String,
47    /// Inferred type.
48    pub field_type: FieldType,
49    /// Where this field was discovered.
50    pub source: FieldSource,
51    /// Confidence in the field's type and value (0.0-1.0).
52    pub confidence: f32,
53    /// Whether this field is absent on some instances.
54    pub nullable: bool,
55    /// Example values seen (first 5 unique).
56    pub example_values: Vec<String>,
57    /// Feature vector dimension this field maps to, if any.
58    pub feature_dim: Option<usize>,
59}
60
61/// Inferred type for a model field.
62#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
63pub enum FieldType {
64    String,
65    Float,
66    Integer,
67    Bool,
68    DateTime,
69    Url,
70    Enum(Vec<String>),
71    Object(String),
72    Array(Box<FieldType>),
73}
74
75impl FieldType {
76    /// Convert to Python type annotation string.
77    pub fn to_python_type(&self) -> String {
78        match self {
79            Self::String => "str".to_string(),
80            Self::Float => "float".to_string(),
81            Self::Integer => "int".to_string(),
82            Self::Bool => "bool".to_string(),
83            Self::DateTime => "datetime".to_string(),
84            Self::Url => "str".to_string(),
85            Self::Enum(variants) => {
86                let joined = variants
87                    .iter()
88                    .map(|v| format!("\"{v}\""))
89                    .collect::<Vec<_>>()
90                    .join(", ");
91                format!("Literal[{joined}]")
92            }
93            Self::Object(name) => format!("'{name}'"),
94            Self::Array(inner) => format!("List[{}]", inner.to_python_type()),
95        }
96    }
97
98    /// Convert to TypeScript type annotation string.
99    pub fn to_ts_type(&self) -> String {
100        match self {
101            Self::String | Self::Url | Self::DateTime => "string".to_string(),
102            Self::Float | Self::Integer => "number".to_string(),
103            Self::Bool => "boolean".to_string(),
104            Self::Enum(variants) => {
105                let joined = variants
106                    .iter()
107                    .map(|v| format!("'{v}'"))
108                    .collect::<Vec<_>>()
109                    .join(" | ");
110                joined
111            }
112            Self::Object(name) => name.clone(),
113            Self::Array(inner) => format!("{}[]", inner.to_ts_type()),
114        }
115    }
116
117    /// Convert to OpenAPI schema JSON type.
118    pub fn to_openapi_type(&self) -> serde_json::Value {
119        match self {
120            Self::String | Self::Url | Self::DateTime => {
121                serde_json::json!({"type": "string"})
122            }
123            Self::Float => serde_json::json!({"type": "number"}),
124            Self::Integer => serde_json::json!({"type": "integer"}),
125            Self::Bool => serde_json::json!({"type": "boolean"}),
126            Self::Enum(variants) => {
127                serde_json::json!({"type": "string", "enum": variants})
128            }
129            Self::Object(name) => {
130                serde_json::json!({"$ref": format!("#/components/schemas/{name}")})
131            }
132            Self::Array(inner) => {
133                serde_json::json!({"type": "array", "items": inner.to_openapi_type()})
134            }
135        }
136    }
137
138    /// Convert to GraphQL type name.
139    pub fn to_graphql_type(&self) -> String {
140        match self {
141            Self::String | Self::Url | Self::DateTime => "String".to_string(),
142            Self::Float => "Float".to_string(),
143            Self::Integer => "Int".to_string(),
144            Self::Bool => "Boolean".to_string(),
145            Self::Enum(variants) => {
146                // GraphQL enums need a name; we'll use context to generate one
147                let _ = variants;
148                "String".to_string()
149            }
150            Self::Object(name) => name.clone(),
151            Self::Array(inner) => format!("[{}]", inner.to_graphql_type()),
152        }
153    }
154}
155
156/// Source of a discovered field.
157#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
158pub enum FieldSource {
159    /// From JSON-LD structured data (highest confidence: 0.99).
160    JsonLd,
161    /// From data-* HTML attributes (confidence: 0.95).
162    DataAttribute,
163    /// From meta tags (OG, Twitter, etc.) (confidence: 0.90).
164    MetaTag,
165    /// From CSS pattern engine selectors (confidence: 0.85).
166    CssPattern,
167    /// From ARIA labels (confidence: 0.80).
168    AriaLabel,
169    /// Inferred from feature vector values (confidence: 0.70).
170    Inferred,
171}
172
173impl FieldSource {
174    /// Default confidence for this source type.
175    pub fn default_confidence(&self) -> f32 {
176        match self {
177            Self::JsonLd => 0.99,
178            Self::DataAttribute => 0.95,
179            Self::MetaTag => 0.90,
180            Self::CssPattern => 0.85,
181            Self::AriaLabel => 0.80,
182            Self::Inferred => 0.70,
183        }
184    }
185}
186
187/// A compiled HTTP action (method) on a model.
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct CompiledAction {
190    /// Method name: "add_to_cart", "search", "checkout", etc.
191    pub name: String,
192    /// Which model this action belongs to: "Product", "Cart", "Site".
193    pub belongs_to: String,
194    /// True if this action requires a specific node (instance method).
195    pub is_instance_method: bool,
196    /// HTTP method: "GET", "POST", etc.
197    pub http_method: String,
198    /// URL endpoint template: "/cart/add.js", "/s?k={query}".
199    pub endpoint_template: String,
200    /// Parameters for this action.
201    pub params: Vec<ActionParam>,
202    /// Whether authentication is required.
203    pub requires_auth: bool,
204    /// Execution path: "http", "websocket", "webmcp", "browser".
205    pub execution_path: String,
206    /// Confidence in the compiled action (0.0-1.0).
207    pub confidence: f32,
208}
209
210/// A parameter for a compiled action.
211#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct ActionParam {
213    /// Parameter name.
214    pub name: String,
215    /// Inferred type.
216    pub param_type: FieldType,
217    /// Whether this parameter is required.
218    pub required: bool,
219    /// Default value, if known.
220    pub default_value: Option<String>,
221    /// Where this parameter appears: "form_field", "url_param", "json_body", "path_param".
222    pub source: String,
223}
224
225/// A relationship between two models.
226#[derive(Debug, Clone, Serialize, Deserialize)]
227pub struct ModelRelationship {
228    /// Source model name.
229    pub from_model: String,
230    /// Target model name.
231    pub to_model: String,
232    /// Relationship name: "sold_by", "has_reviews", "similar_to".
233    pub name: String,
234    /// Cardinality.
235    pub cardinality: Cardinality,
236    /// Number of edges backing this relationship.
237    pub edge_count: usize,
238    /// Hint for graph traversal to navigate this relationship.
239    pub traversal_hint: TraversalHint,
240}
241
242/// Cardinality of a model relationship.
243#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
244pub enum Cardinality {
245    /// Many-to-one.
246    BelongsTo,
247    /// One-to-many.
248    HasMany,
249    /// One-to-one.
250    HasOne,
251    /// Many-to-many.
252    ManyToMany,
253}
254
255/// Hint for how to traverse a relationship in the graph.
256#[derive(Debug, Clone, Serialize, Deserialize)]
257pub struct TraversalHint {
258    /// Edge types to follow.
259    pub edge_types: Vec<String>,
260    /// Whether to follow forward edges (from → to).
261    pub forward: bool,
262}
263
264/// Compilation statistics.
265#[derive(Debug, Clone, Serialize, Deserialize)]
266pub struct SchemaStats {
267    /// Number of models discovered.
268    pub total_models: usize,
269    /// Total fields across all models.
270    pub total_fields: usize,
271    /// Total node instances that matched models.
272    pub total_instances: usize,
273    /// Average confidence across all fields.
274    pub avg_confidence: f32,
275}
276
277/// Files generated by the code generator.
278#[derive(Debug, Clone, Serialize, Deserialize)]
279pub struct GeneratedFiles {
280    /// Paths of generated files.
281    pub files: Vec<GeneratedFile>,
282}
283
284/// A single generated file.
285#[derive(Debug, Clone, Serialize, Deserialize)]
286pub struct GeneratedFile {
287    /// Relative filename: "client.py", "openapi.yaml", etc.
288    pub filename: String,
289    /// Size in bytes.
290    pub size: usize,
291    /// Content of the file.
292    pub content: String,
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298
299    #[test]
300    fn test_field_type_python_conversion() {
301        assert_eq!(FieldType::String.to_python_type(), "str");
302        assert_eq!(FieldType::Float.to_python_type(), "float");
303        assert_eq!(FieldType::Integer.to_python_type(), "int");
304        assert_eq!(FieldType::Bool.to_python_type(), "bool");
305        assert_eq!(FieldType::Url.to_python_type(), "str");
306        assert_eq!(FieldType::DateTime.to_python_type(), "datetime");
307        assert_eq!(
308            FieldType::Enum(vec!["a".into(), "b".into()]).to_python_type(),
309            "Literal[\"a\", \"b\"]"
310        );
311        assert_eq!(
312            FieldType::Array(Box::new(FieldType::String)).to_python_type(),
313            "List[str]"
314        );
315    }
316
317    #[test]
318    fn test_field_type_typescript_conversion() {
319        assert_eq!(FieldType::String.to_ts_type(), "string");
320        assert_eq!(FieldType::Float.to_ts_type(), "number");
321        assert_eq!(FieldType::Bool.to_ts_type(), "boolean");
322        assert_eq!(
323            FieldType::Enum(vec!["a".into(), "b".into()]).to_ts_type(),
324            "'a' | 'b'"
325        );
326    }
327
328    #[test]
329    fn test_field_source_confidence() {
330        assert_eq!(FieldSource::JsonLd.default_confidence(), 0.99);
331        assert_eq!(FieldSource::DataAttribute.default_confidence(), 0.95);
332        assert_eq!(FieldSource::Inferred.default_confidence(), 0.70);
333    }
334
335    #[test]
336    fn test_field_type_openapi_conversion() {
337        let val = FieldType::Float.to_openapi_type();
338        assert_eq!(val["type"], "number");
339
340        let val = FieldType::Enum(vec!["x".into(), "y".into()]).to_openapi_type();
341        assert_eq!(val["type"], "string");
342        assert_eq!(val["enum"][0], "x");
343    }
344
345    #[test]
346    fn test_field_type_graphql_conversion() {
347        assert_eq!(FieldType::String.to_graphql_type(), "String");
348        assert_eq!(FieldType::Float.to_graphql_type(), "Float");
349        assert_eq!(FieldType::Integer.to_graphql_type(), "Int");
350        assert_eq!(FieldType::Bool.to_graphql_type(), "Boolean");
351    }
352}