Skip to main content

chasm/schema/
types.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! Core schema type definitions
4//!
5//! Defines the vocabulary for describing AI chat provider database schemas
6//! in a machine-readable, version-aware format.
7
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11// ============================================================================
12// Schema Version Identifier
13// ============================================================================
14
15/// Unique identifier for a provider schema version.
16///
17/// Format: `{provider}-{format}-v{version}`
18/// Examples: `copilot-json-v3`, `copilot-jsonl-v1`, `cursor-json-v1`
19#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
20pub struct SchemaVersion {
21    /// Provider identifier (e.g., "copilot", "cursor", "claude-code")
22    pub provider: String,
23    /// Format identifier (e.g., "json", "jsonl", "sqlite", "markdown")
24    pub format: FormatType,
25    /// Schema version number (monotonically increasing per provider+format)
26    pub version: u32,
27    /// Human-readable label
28    pub label: String,
29}
30
31impl SchemaVersion {
32    /// Create a new schema version identifier
33    pub fn new(provider: &str, format: FormatType, version: u32, label: &str) -> Self {
34        Self {
35            provider: provider.to_string(),
36            format,
37            version,
38            label: label.to_string(),
39        }
40    }
41
42    /// Get the canonical string ID: `{provider}-{format}-v{version}`
43    pub fn id(&self) -> String {
44        format!(
45            "{}-{}-v{}",
46            self.provider,
47            self.format.as_str(),
48            self.version
49        )
50    }
51}
52
53impl std::fmt::Display for SchemaVersion {
54    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55        write!(f, "{}", self.id())
56    }
57}
58
59// ============================================================================
60// Format & Storage Types
61// ============================================================================
62
63/// Session file format
64#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
65#[serde(rename_all = "kebab-case")]
66pub enum FormatType {
67    /// Single JSON object per file
68    Json,
69    /// JSON Lines (one event per line, event-sourced)
70    Jsonl,
71    /// SQLite database
72    Sqlite,
73    /// Markdown text files
74    Markdown,
75    /// Binary / proprietary format
76    Binary,
77    /// OpenAI API-compatible JSON
78    OpenAiApi,
79}
80
81impl FormatType {
82    pub fn as_str(&self) -> &'static str {
83        match self {
84            Self::Json => "json",
85            Self::Jsonl => "jsonl",
86            Self::Sqlite => "sqlite",
87            Self::Markdown => "markdown",
88            Self::Binary => "binary",
89            Self::OpenAiApi => "openai-api",
90        }
91    }
92}
93
94impl std::fmt::Display for FormatType {
95    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96        write!(f, "{}", self.as_str())
97    }
98}
99
100/// Where session data is stored
101#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
102#[serde(rename_all = "kebab-case")]
103pub enum StorageType {
104    /// Flat files in a directory (one file per session)
105    FilePerSession,
106    /// SQLite database (state.vscdb or custom)
107    SqliteDb,
108    /// SQLite key-value store (VS Code ItemTable pattern)
109    SqliteKeyValue,
110    /// Cloud API (no local storage, fetched on demand)
111    CloudApi,
112    /// Hybrid: files on disk + metadata in SQLite
113    Hybrid,
114}
115
116// ============================================================================
117// Storage Location
118// ============================================================================
119
120/// Platform-aware storage location descriptor
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct StorageLocation {
123    /// Description of where data lives
124    pub description: String,
125    /// Path pattern with platform placeholders
126    /// e.g., `{APPDATA}/Code/User/workspaceStorage/{hash}/chatSessions/`
127    pub path_pattern: String,
128    /// Platform-specific path overrides
129    #[serde(default)]
130    pub platform_paths: HashMap<String, String>,
131    /// Storage mechanism
132    pub storage_type: StorageType,
133    /// File extension filter (e.g., ".jsonl", ".json")
134    #[serde(default)]
135    pub file_extensions: Vec<String>,
136}
137
138// ============================================================================
139// Provider Schema (top-level)
140// ============================================================================
141
142/// Complete schema definition for one provider at one version.
143///
144/// This is the primary unit of the schema registry — it fully describes
145/// how a provider stores, structures, and indexes chat session data.
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct ProviderSchema {
148    /// Unique version identifier
149    pub version: SchemaVersion,
150
151    /// Extension/application version range this schema applies to
152    /// e.g., "0.25.0" .. "0.36.99" for Copilot JSON
153    pub extension_version_min: Option<String>,
154    pub extension_version_max: Option<String>,
155
156    /// Minimum host application version (e.g., VS Code 1.98.0)
157    pub host_version_min: Option<String>,
158
159    /// When this schema was first observed / introduced
160    pub introduced: Option<String>,
161    /// When this schema was deprecated (superseded by a newer version)
162    pub deprecated: Option<String>,
163
164    /// Where session data is stored
165    pub storage: StorageLocation,
166
167    /// Session file/record schema
168    pub session_schema: SessionFormatSchema,
169
170    /// Database keys and their schemas (for SQLite key-value stores like state.vscdb)
171    #[serde(default)]
172    pub db_keys: Vec<DbKeySchema>,
173
174    /// Human-readable notes about this schema version
175    #[serde(default)]
176    pub notes: Vec<String>,
177
178    /// Known breaking changes from the previous version
179    #[serde(default)]
180    pub breaking_changes: Vec<String>,
181
182    /// Tags for ontology classification
183    #[serde(default)]
184    pub tags: Vec<String>,
185}
186
187impl ProviderSchema {
188    /// Get the total number of fields in the session schema
189    pub fn field_count(&self) -> usize {
190        self.session_schema.fields.len()
191    }
192
193    /// Get the schema ID
194    pub fn id(&self) -> String {
195        self.version.id()
196    }
197}
198
199// ============================================================================
200// Session Format Schema
201// ============================================================================
202
203/// Schema for the session file/record format
204#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct SessionFormatSchema {
206    /// Top-level description
207    pub description: String,
208    /// The format type
209    pub format: FormatType,
210    /// List of fields with types and constraints
211    pub fields: Vec<FieldSchema>,
212    /// Nested object schemas (e.g., "request", "message", "response")
213    #[serde(default)]
214    pub nested_objects: HashMap<String, Vec<FieldSchema>>,
215    /// Example JSON for this format
216    #[serde(default)]
217    pub example: Option<serde_json::Value>,
218}
219
220// ============================================================================
221// Field Schema
222// ============================================================================
223
224/// Schema for a single field in a session record
225#[derive(Debug, Clone, Serialize, Deserialize)]
226pub struct FieldSchema {
227    /// Field name (as it appears in the JSON/data)
228    pub name: String,
229    /// camelCase name used in serialization
230    #[serde(default)]
231    pub serialized_name: Option<String>,
232    /// Data type
233    pub data_type: DataType,
234    /// Whether this field is required
235    #[serde(default)]
236    pub required: bool,
237    /// Default value (as JSON)
238    #[serde(default)]
239    pub default_value: Option<serde_json::Value>,
240    /// Human-readable description
241    pub description: String,
242    /// Constraints (value ranges, patterns, enums)
243    #[serde(default)]
244    pub constraints: Vec<FieldConstraint>,
245    /// Semantic tag for ontology mapping
246    #[serde(default)]
247    pub semantic_tag: Option<String>,
248    /// Version this field was introduced
249    #[serde(default)]
250    pub since_version: Option<String>,
251    /// Version this field was removed
252    #[serde(default)]
253    pub removed_in: Option<String>,
254}
255
256/// Supported data types
257#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
258#[serde(rename_all = "kebab-case")]
259pub enum DataType {
260    String,
261    Integer,
262    Float,
263    Boolean,
264    Timestamp,
265    Uuid,
266    Json,
267    Array(Box<DataType>),
268    Object(std::string::String),
269    Enum(Vec<std::string::String>),
270    Uri,
271    Base64,
272    Optional(Box<DataType>),
273}
274
275impl std::fmt::Display for DataType {
276    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
277        match self {
278            Self::String => write!(f, "string"),
279            Self::Integer => write!(f, "integer"),
280            Self::Float => write!(f, "float"),
281            Self::Boolean => write!(f, "boolean"),
282            Self::Timestamp => write!(f, "timestamp"),
283            Self::Uuid => write!(f, "uuid"),
284            Self::Json => write!(f, "json"),
285            Self::Array(inner) => write!(f, "array<{}>", inner),
286            Self::Object(name) => write!(f, "object<{}>", name),
287            Self::Enum(variants) => write!(f, "enum({})", variants.join("|")),
288            Self::Uri => write!(f, "uri"),
289            Self::Base64 => write!(f, "base64"),
290            Self::Optional(inner) => write!(f, "optional<{}>", inner),
291        }
292    }
293}
294
295/// Constraints on field values
296#[derive(Debug, Clone, Serialize, Deserialize)]
297#[serde(tag = "type")]
298pub enum FieldConstraint {
299    /// Minimum value (inclusive)
300    #[serde(rename = "min")]
301    Min { value: serde_json::Value },
302    /// Maximum value (inclusive)
303    #[serde(rename = "max")]
304    Max { value: serde_json::Value },
305    /// Allowed values
306    #[serde(rename = "enum")]
307    Enum { values: Vec<serde_json::Value> },
308    /// Regex pattern
309    #[serde(rename = "pattern")]
310    Pattern { pattern: String },
311    /// Reference to another entity
312    #[serde(rename = "foreign_key")]
313    ForeignKey { entity: String, field: String },
314}
315
316// ============================================================================
317// Database Key Schema (for SQLite KV stores)
318// ============================================================================
319
320/// Schema for a key in a SQLite key-value store (like VS Code's state.vscdb)
321#[derive(Debug, Clone, Serialize, Deserialize)]
322pub struct DbKeySchema {
323    /// The key name (e.g., "chat.ChatSessionStore.index")
324    pub key: String,
325    /// Human-readable description
326    pub description: String,
327    /// The data type of the value (typically JSON)
328    pub value_type: DataType,
329    /// Schema of the JSON value (if value_type is Json/Object)
330    #[serde(default)]
331    pub value_fields: Vec<FieldSchema>,
332    /// Whether this key is required for the provider to function
333    #[serde(default)]
334    pub required: bool,
335    /// Version this key was introduced
336    #[serde(default)]
337    pub since_version: Option<String>,
338    /// Version this key was removed/renamed
339    #[serde(default)]
340    pub removed_in: Option<String>,
341    /// If renamed, the new key name
342    #[serde(default)]
343    pub renamed_to: Option<String>,
344}