chasm/schema/types.rs
1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! Core schema type definitions
4//!
5//! Defines the vocabulary for describing AI chat provider database schemas
6//! in a machine-readable, version-aware format.
7
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11// ============================================================================
12// Schema Version Identifier
13// ============================================================================
14
15/// Unique identifier for a provider schema version.
16///
17/// Format: `{provider}-{format}-v{version}`
18/// Examples: `copilot-json-v3`, `copilot-jsonl-v1`, `cursor-json-v1`
19#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
20pub struct SchemaVersion {
21 /// Provider identifier (e.g., "copilot", "cursor", "claude-code")
22 pub provider: String,
23 /// Format identifier (e.g., "json", "jsonl", "sqlite", "markdown")
24 pub format: FormatType,
25 /// Schema version number (monotonically increasing per provider+format)
26 pub version: u32,
27 /// Human-readable label
28 pub label: String,
29}
30
31impl SchemaVersion {
32 /// Create a new schema version identifier
33 pub fn new(provider: &str, format: FormatType, version: u32, label: &str) -> Self {
34 Self {
35 provider: provider.to_string(),
36 format,
37 version,
38 label: label.to_string(),
39 }
40 }
41
42 /// Get the canonical string ID: `{provider}-{format}-v{version}`
43 pub fn id(&self) -> String {
44 format!(
45 "{}-{}-v{}",
46 self.provider,
47 self.format.as_str(),
48 self.version
49 )
50 }
51}
52
53impl std::fmt::Display for SchemaVersion {
54 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55 write!(f, "{}", self.id())
56 }
57}
58
59// ============================================================================
60// Format & Storage Types
61// ============================================================================
62
63/// Session file format
64#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
65#[serde(rename_all = "kebab-case")]
66pub enum FormatType {
67 /// Single JSON object per file
68 Json,
69 /// JSON Lines (one event per line, event-sourced)
70 Jsonl,
71 /// SQLite database
72 Sqlite,
73 /// Markdown text files
74 Markdown,
75 /// Binary / proprietary format
76 Binary,
77 /// OpenAI API-compatible JSON
78 OpenAiApi,
79}
80
81impl FormatType {
82 pub fn as_str(&self) -> &'static str {
83 match self {
84 Self::Json => "json",
85 Self::Jsonl => "jsonl",
86 Self::Sqlite => "sqlite",
87 Self::Markdown => "markdown",
88 Self::Binary => "binary",
89 Self::OpenAiApi => "openai-api",
90 }
91 }
92}
93
94impl std::fmt::Display for FormatType {
95 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96 write!(f, "{}", self.as_str())
97 }
98}
99
100/// Where session data is stored
101#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
102#[serde(rename_all = "kebab-case")]
103pub enum StorageType {
104 /// Flat files in a directory (one file per session)
105 FilePerSession,
106 /// SQLite database (state.vscdb or custom)
107 SqliteDb,
108 /// SQLite key-value store (VS Code ItemTable pattern)
109 SqliteKeyValue,
110 /// Cloud API (no local storage, fetched on demand)
111 CloudApi,
112 /// Hybrid: files on disk + metadata in SQLite
113 Hybrid,
114}
115
116// ============================================================================
117// Storage Location
118// ============================================================================
119
120/// Platform-aware storage location descriptor
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct StorageLocation {
123 /// Description of where data lives
124 pub description: String,
125 /// Path pattern with platform placeholders
126 /// e.g., `{APPDATA}/Code/User/workspaceStorage/{hash}/chatSessions/`
127 pub path_pattern: String,
128 /// Platform-specific path overrides
129 #[serde(default)]
130 pub platform_paths: HashMap<String, String>,
131 /// Storage mechanism
132 pub storage_type: StorageType,
133 /// File extension filter (e.g., ".jsonl", ".json")
134 #[serde(default)]
135 pub file_extensions: Vec<String>,
136}
137
138// ============================================================================
139// Provider Schema (top-level)
140// ============================================================================
141
142/// Complete schema definition for one provider at one version.
143///
144/// This is the primary unit of the schema registry — it fully describes
145/// how a provider stores, structures, and indexes chat session data.
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct ProviderSchema {
148 /// Unique version identifier
149 pub version: SchemaVersion,
150
151 /// Extension/application version range this schema applies to
152 /// e.g., "0.25.0" .. "0.36.99" for Copilot JSON
153 pub extension_version_min: Option<String>,
154 pub extension_version_max: Option<String>,
155
156 /// Minimum host application version (e.g., VS Code 1.98.0)
157 pub host_version_min: Option<String>,
158
159 /// When this schema was first observed / introduced
160 pub introduced: Option<String>,
161 /// When this schema was deprecated (superseded by a newer version)
162 pub deprecated: Option<String>,
163
164 /// Where session data is stored
165 pub storage: StorageLocation,
166
167 /// Session file/record schema
168 pub session_schema: SessionFormatSchema,
169
170 /// Database keys and their schemas (for SQLite key-value stores like state.vscdb)
171 #[serde(default)]
172 pub db_keys: Vec<DbKeySchema>,
173
174 /// Human-readable notes about this schema version
175 #[serde(default)]
176 pub notes: Vec<String>,
177
178 /// Known breaking changes from the previous version
179 #[serde(default)]
180 pub breaking_changes: Vec<String>,
181
182 /// Tags for ontology classification
183 #[serde(default)]
184 pub tags: Vec<String>,
185}
186
187impl ProviderSchema {
188 /// Get the total number of fields in the session schema
189 pub fn field_count(&self) -> usize {
190 self.session_schema.fields.len()
191 }
192
193 /// Get the schema ID
194 pub fn id(&self) -> String {
195 self.version.id()
196 }
197}
198
199// ============================================================================
200// Session Format Schema
201// ============================================================================
202
203/// Schema for the session file/record format
204#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct SessionFormatSchema {
206 /// Top-level description
207 pub description: String,
208 /// The format type
209 pub format: FormatType,
210 /// List of fields with types and constraints
211 pub fields: Vec<FieldSchema>,
212 /// Nested object schemas (e.g., "request", "message", "response")
213 #[serde(default)]
214 pub nested_objects: HashMap<String, Vec<FieldSchema>>,
215 /// Example JSON for this format
216 #[serde(default)]
217 pub example: Option<serde_json::Value>,
218}
219
220// ============================================================================
221// Field Schema
222// ============================================================================
223
224/// Schema for a single field in a session record
225#[derive(Debug, Clone, Serialize, Deserialize)]
226pub struct FieldSchema {
227 /// Field name (as it appears in the JSON/data)
228 pub name: String,
229 /// camelCase name used in serialization
230 #[serde(default)]
231 pub serialized_name: Option<String>,
232 /// Data type
233 pub data_type: DataType,
234 /// Whether this field is required
235 #[serde(default)]
236 pub required: bool,
237 /// Default value (as JSON)
238 #[serde(default)]
239 pub default_value: Option<serde_json::Value>,
240 /// Human-readable description
241 pub description: String,
242 /// Constraints (value ranges, patterns, enums)
243 #[serde(default)]
244 pub constraints: Vec<FieldConstraint>,
245 /// Semantic tag for ontology mapping
246 #[serde(default)]
247 pub semantic_tag: Option<String>,
248 /// Version this field was introduced
249 #[serde(default)]
250 pub since_version: Option<String>,
251 /// Version this field was removed
252 #[serde(default)]
253 pub removed_in: Option<String>,
254}
255
256/// Supported data types
257#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
258#[serde(rename_all = "kebab-case")]
259pub enum DataType {
260 String,
261 Integer,
262 Float,
263 Boolean,
264 Timestamp,
265 Uuid,
266 Json,
267 Array(Box<DataType>),
268 Object(std::string::String),
269 Enum(Vec<std::string::String>),
270 Uri,
271 Base64,
272 Optional(Box<DataType>),
273}
274
275impl std::fmt::Display for DataType {
276 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
277 match self {
278 Self::String => write!(f, "string"),
279 Self::Integer => write!(f, "integer"),
280 Self::Float => write!(f, "float"),
281 Self::Boolean => write!(f, "boolean"),
282 Self::Timestamp => write!(f, "timestamp"),
283 Self::Uuid => write!(f, "uuid"),
284 Self::Json => write!(f, "json"),
285 Self::Array(inner) => write!(f, "array<{}>", inner),
286 Self::Object(name) => write!(f, "object<{}>", name),
287 Self::Enum(variants) => write!(f, "enum({})", variants.join("|")),
288 Self::Uri => write!(f, "uri"),
289 Self::Base64 => write!(f, "base64"),
290 Self::Optional(inner) => write!(f, "optional<{}>", inner),
291 }
292 }
293}
294
295/// Constraints on field values
296#[derive(Debug, Clone, Serialize, Deserialize)]
297#[serde(tag = "type")]
298pub enum FieldConstraint {
299 /// Minimum value (inclusive)
300 #[serde(rename = "min")]
301 Min { value: serde_json::Value },
302 /// Maximum value (inclusive)
303 #[serde(rename = "max")]
304 Max { value: serde_json::Value },
305 /// Allowed values
306 #[serde(rename = "enum")]
307 Enum { values: Vec<serde_json::Value> },
308 /// Regex pattern
309 #[serde(rename = "pattern")]
310 Pattern { pattern: String },
311 /// Reference to another entity
312 #[serde(rename = "foreign_key")]
313 ForeignKey { entity: String, field: String },
314}
315
316// ============================================================================
317// Database Key Schema (for SQLite KV stores)
318// ============================================================================
319
320/// Schema for a key in a SQLite key-value store (like VS Code's state.vscdb)
321#[derive(Debug, Clone, Serialize, Deserialize)]
322pub struct DbKeySchema {
323 /// The key name (e.g., "chat.ChatSessionStore.index")
324 pub key: String,
325 /// Human-readable description
326 pub description: String,
327 /// The data type of the value (typically JSON)
328 pub value_type: DataType,
329 /// Schema of the JSON value (if value_type is Json/Object)
330 #[serde(default)]
331 pub value_fields: Vec<FieldSchema>,
332 /// Whether this key is required for the provider to function
333 #[serde(default)]
334 pub required: bool,
335 /// Version this key was introduced
336 #[serde(default)]
337 pub since_version: Option<String>,
338 /// Version this key was removed/renamed
339 #[serde(default)]
340 pub removed_in: Option<String>,
341 /// If renamed, the new key name
342 #[serde(default)]
343 pub renamed_to: Option<String>,
344}