Skip to main content

chasm/schema/
ontology.rs

1// Copyright (c) 2024-2026 Nervosys LLC
2// SPDX-License-Identifier: AGPL-3.0-only
3//! Ontology layer for AI agent discoverability.
4//!
5//! Provides a structured, machine-readable description of:
6//! - Entity types across all providers (session, message, tool_call, etc.)
7//! - Semantic tags that map provider-specific field names to universal concepts
8//! - Cross-provider field mappings (field A in provider X ≡ field B in provider Y)
9//! - Migration paths between schema versions
10//! - Relationship graphs between entities
11//!
12//! AI agents can query the ontology to understand what data exists,
13//! where it lives, how to access it, and how to translate between providers.
14
15use crate::schema::types::*;
16use serde::{Deserialize, Serialize};
17use std::collections::HashMap;
18
19// ============================================================================
20// Semantic Tags
21// ============================================================================
22
23/// A semantic tag represents a universal concept that may appear under
24/// different field names in different providers.
25///
26/// Example: The concept "session creation time" is stored as:
27///   - `creationDate` in Copilot Chat
28///   - `dateCreated` in Continue.dev
29///   - `timestamp` (of first message) in Claude Code
30///
31/// The semantic tag `created_at` unifies all of these.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SemanticTag {
34    /// Canonical tag name (e.g., "created_at", "session_id", "message_text")
35    pub tag: String,
36    /// Human-readable description
37    pub description: String,
38    /// The universal data type for this concept
39    pub canonical_type: DataType,
40    /// Entity type this tag belongs to
41    pub entity: EntityType,
42    /// Related tags (e.g., "created_at" is related to "updated_at")
43    #[serde(default)]
44    pub related_tags: Vec<String>,
45}
46
47// ============================================================================
48// Entity Types
49// ============================================================================
50
51/// High-level entity types that appear across providers
52#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
53#[serde(rename_all = "snake_case")]
54pub enum EntityType {
55    /// A chat session / conversation
56    Session,
57    /// A single message (user or assistant turn)
58    Message,
59    /// A request-response pair (Copilot Chat's "request" unit)
60    RequestResponse,
61    /// An AI model
62    Model,
63    /// A tool/function call
64    ToolCall,
65    /// An AI agent
66    Agent,
67    /// A workspace / project context
68    Workspace,
69    /// Session index / registry metadata
70    SessionIndex,
71    /// Cache / UI state metadata
72    UiState,
73}
74
75impl std::fmt::Display for EntityType {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        match self {
78            Self::Session => write!(f, "session"),
79            Self::Message => write!(f, "message"),
80            Self::RequestResponse => write!(f, "request_response"),
81            Self::Model => write!(f, "model"),
82            Self::ToolCall => write!(f, "tool_call"),
83            Self::Agent => write!(f, "agent"),
84            Self::Workspace => write!(f, "workspace"),
85            Self::SessionIndex => write!(f, "session_index"),
86            Self::UiState => write!(f, "ui_state"),
87        }
88    }
89}
90
91// ============================================================================
92// Entity Relationships
93// ============================================================================
94
95/// A relationship between two entity types
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct EntityRelationship {
98    /// Source entity type
99    pub from: EntityType,
100    /// Target entity type
101    pub to: EntityType,
102    /// Relationship kind
103    pub kind: RelationshipKind,
104    /// Description of the relationship
105    pub description: String,
106}
107
108/// Kinds of relationships between entities
109#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
110#[serde(rename_all = "snake_case")]
111pub enum RelationshipKind {
112    /// One-to-many containment (session contains messages)
113    Contains,
114    /// Many-to-one reference (message belongs to session)
115    BelongsTo,
116    /// Many-to-many association (session uses models)
117    References,
118    /// One-to-one equivalence (session <-> index entry)
119    MapsTo,
120}
121
122// ============================================================================
123// Cross-Provider Mapping
124// ============================================================================
125
126/// A mapping between fields in two different provider schemas.
127///
128/// Used to translate data between providers (e.g., migrating sessions
129/// from Copilot to Cursor, or merging sessions from multiple providers).
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct CrossProviderMapping {
132    /// Source schema version ID
133    pub source_schema: String,
134    /// Source field path (dot-separated, e.g., "session.creationDate")
135    pub source_field: String,
136    /// Target schema version ID
137    pub target_schema: String,
138    /// Target field path
139    pub target_field: String,
140    /// Transformation required (if any)
141    pub transform: Option<FieldTransform>,
142    /// Confidence level (0.0 – 1.0)
143    pub confidence: f64,
144    /// Semantic tag that links these fields
145    pub semantic_tag: String,
146}
147
148/// Transformation to apply when mapping a field between providers
149#[derive(Debug, Clone, Serialize, Deserialize)]
150#[serde(tag = "type")]
151pub enum FieldTransform {
152    /// Direct copy (no transformation needed)
153    #[serde(rename = "identity")]
154    Identity,
155    /// Rename only (same value, different key)
156    #[serde(rename = "rename")]
157    Rename,
158    /// Type conversion (e.g., epoch ms → ISO 8601)
159    #[serde(rename = "type_convert")]
160    TypeConvert { from_type: String, to_type: String },
161    /// Value mapping (e.g., "model" → "assistant" for Gemini roles)
162    #[serde(rename = "value_map")]
163    ValueMap { mapping: HashMap<String, String> },
164    /// Structural transformation (e.g., flatten nested object)
165    #[serde(rename = "restructure")]
166    Restructure { description: String },
167    /// Custom transformation requiring code
168    #[serde(rename = "custom")]
169    Custom { description: String },
170}
171
172// ============================================================================
173// Migration Path
174// ============================================================================
175
176/// Describes how to migrate data from one schema version to another.
177#[derive(Debug, Clone, Serialize, Deserialize)]
178pub struct MigrationPath {
179    /// Source schema version ID
180    pub from_schema: String,
181    /// Target schema version ID
182    pub to_schema: String,
183    /// Whether the migration is lossless
184    pub lossless: bool,
185    /// Ordered list of field mappings to apply
186    pub mappings: Vec<CrossProviderMapping>,
187    /// Fields that will be lost in migration
188    #[serde(default)]
189    pub data_loss: Vec<String>,
190    /// Fields that will be added (with defaults)
191    #[serde(default)]
192    pub new_fields: Vec<String>,
193    /// Human-readable migration notes
194    #[serde(default)]
195    pub notes: Vec<String>,
196}
197
198// ============================================================================
199// Ontology (aggregate)
200// ============================================================================
201
202/// The complete ontology over all provider schemas.
203///
204/// AI agents can query this to:
205/// 1. Discover what data is available across providers
206/// 2. Understand field semantics and relationships
207/// 3. Find equivalent fields across providers
208/// 4. Plan data migrations and merges
209#[derive(Debug, Clone, Serialize, Deserialize)]
210pub struct Ontology {
211    /// Version of the ontology specification
212    pub version: String,
213    /// All known semantic tags (universal field concepts)
214    pub semantic_tags: Vec<SemanticTag>,
215    /// Entity type relationships
216    pub relationships: Vec<EntityRelationship>,
217    /// Cross-provider field mappings
218    pub mappings: Vec<CrossProviderMapping>,
219    /// Known migration paths between schema versions
220    pub migration_paths: Vec<MigrationPath>,
221    /// Provider capability matrix (provider → set of capabilities)
222    pub capabilities: HashMap<String, Vec<String>>,
223}
224
225impl Ontology {
226    /// Build the default ontology from all known schemas
227    pub fn build() -> Self {
228        Self {
229            version: "1.0.0".into(),
230            semantic_tags: build_semantic_tags(),
231            relationships: build_relationships(),
232            mappings: build_cross_provider_mappings(),
233            migration_paths: build_migration_paths(),
234            capabilities: build_capability_matrix(),
235        }
236    }
237
238    /// Find all mappings between two schemas
239    pub fn cross_provider_mappings(
240        &self,
241        source_schema: &str,
242        target_schema: &str,
243    ) -> Vec<&CrossProviderMapping> {
244        self.mappings
245            .iter()
246            .filter(|m| m.source_schema == source_schema && m.target_schema == target_schema)
247            .collect()
248    }
249
250    /// Find all fields matching a semantic tag across all schemas
251    pub fn find_by_semantic_tag(&self, tag: &str) -> Vec<&CrossProviderMapping> {
252        self.mappings
253            .iter()
254            .filter(|m| m.semantic_tag == tag)
255            .collect()
256    }
257
258    /// Get migration path between two schema versions
259    pub fn migration_path(&self, from: &str, to: &str) -> Option<&MigrationPath> {
260        self.migration_paths
261            .iter()
262            .find(|p| p.from_schema == from && p.to_schema == to)
263    }
264
265    /// Get capabilities for a specific provider
266    pub fn provider_capabilities(&self, provider: &str) -> Option<&Vec<String>> {
267        self.capabilities.get(provider)
268    }
269
270    /// Get all entity types used in the ontology
271    pub fn entity_types(&self) -> Vec<&EntityType> {
272        let mut types: Vec<&EntityType> = self
273            .semantic_tags
274            .iter()
275            .map(|t| &t.entity)
276            .collect::<std::collections::HashSet<_>>()
277            .into_iter()
278            .collect();
279        types.sort_by_key(|e| format!("{}", e));
280        types
281    }
282}
283
284// ============================================================================
285// Default Ontology Builders
286// ============================================================================
287
288fn build_semantic_tags() -> Vec<SemanticTag> {
289    vec![
290        // Session-level tags
291        SemanticTag {
292            tag: "session_id".into(),
293            description: "Unique identifier for a chat session".into(),
294            canonical_type: DataType::Uuid,
295            entity: EntityType::Session,
296            related_tags: vec![],
297        },
298        SemanticTag {
299            tag: "title".into(),
300            description: "Human-readable session title".into(),
301            canonical_type: DataType::String,
302            entity: EntityType::Session,
303            related_tags: vec![],
304        },
305        SemanticTag {
306            tag: "created_at".into(),
307            description: "When the session was created (timestamp)".into(),
308            canonical_type: DataType::Timestamp,
309            entity: EntityType::Session,
310            related_tags: vec!["updated_at".into()],
311        },
312        SemanticTag {
313            tag: "updated_at".into(),
314            description: "When the session was last modified (timestamp)".into(),
315            canonical_type: DataType::Timestamp,
316            entity: EntityType::Session,
317            related_tags: vec!["created_at".into()],
318        },
319        SemanticTag {
320            tag: "is_imported".into(),
321            description: "Whether the session was imported from another source".into(),
322            canonical_type: DataType::Boolean,
323            entity: EntityType::Session,
324            related_tags: vec![],
325        },
326        SemanticTag {
327            tag: "session_location".into(),
328            description: "Where in the IDE the session was initiated (panel, terminal, etc.)"
329                .into(),
330            canonical_type: DataType::String,
331            entity: EntityType::Session,
332            related_tags: vec![],
333        },
334        // Message-level tags
335        SemanticTag {
336            tag: "message_role".into(),
337            description: "The role of a message sender (user, assistant, system, tool)".into(),
338            canonical_type: DataType::Enum(vec![
339                "user".into(),
340                "assistant".into(),
341                "system".into(),
342                "tool".into(),
343            ]),
344            entity: EntityType::Message,
345            related_tags: vec!["message_text".into()],
346        },
347        SemanticTag {
348            tag: "message_text".into(),
349            description: "The text content of a message".into(),
350            canonical_type: DataType::String,
351            entity: EntityType::Message,
352            related_tags: vec!["message_role".into(), "message_parts".into()],
353        },
354        SemanticTag {
355            tag: "message_timestamp".into(),
356            description: "When a message was sent/received".into(),
357            canonical_type: DataType::Timestamp,
358            entity: EntityType::Message,
359            related_tags: vec![],
360        },
361        SemanticTag {
362            tag: "message_parts".into(),
363            description: "Multi-part message content (multimodal: text, images, code)".into(),
364            canonical_type: DataType::Array(Box::new(DataType::Json)),
365            entity: EntityType::Message,
366            related_tags: vec!["message_text".into()],
367        },
368        SemanticTag {
369            tag: "user_message".into(),
370            description: "The user's input message in a request-response pair".into(),
371            canonical_type: DataType::Json,
372            entity: EntityType::RequestResponse,
373            related_tags: vec!["assistant_response".into()],
374        },
375        SemanticTag {
376            tag: "assistant_response".into(),
377            description: "The AI's response in a request-response pair".into(),
378            canonical_type: DataType::Json,
379            entity: EntityType::RequestResponse,
380            related_tags: vec!["user_message".into()],
381        },
382        // Model-level tags
383        SemanticTag {
384            tag: "model_id".into(),
385            description: "Identifier of the AI model used (e.g., 'gpt-4o', 'claude-3.5-sonnet')"
386                .into(),
387            canonical_type: DataType::String,
388            entity: EntityType::Model,
389            related_tags: vec![],
390        },
391        // Agent/tool tags
392        SemanticTag {
393            tag: "agent".into(),
394            description: "AI agent metadata (for agentic sessions)".into(),
395            canonical_type: DataType::Json,
396            entity: EntityType::Agent,
397            related_tags: vec!["tools".into()],
398        },
399        SemanticTag {
400            tag: "tools".into(),
401            description: "Available tools/functions for the session".into(),
402            canonical_type: DataType::Array(Box::new(DataType::Json)),
403            entity: EntityType::ToolCall,
404            related_tags: vec!["tool_calls".into()],
405        },
406        SemanticTag {
407            tag: "tool_calls".into(),
408            description: "Tool/function invocations made by the assistant".into(),
409            canonical_type: DataType::Array(Box::new(DataType::Json)),
410            entity: EntityType::ToolCall,
411            related_tags: vec!["tools".into()],
412        },
413        // Context tags
414        SemanticTag {
415            tag: "context".into(),
416            description: "Context data provided to the model (files, selections, terminal)".into(),
417            canonical_type: DataType::Json,
418            entity: EntityType::RequestResponse,
419            related_tags: vec![],
420        },
421        // State tags
422        SemanticTag {
423            tag: "response_state".into(),
424            description: "State of the response: Pending, Complete, Cancelled, Failed, NeedsInput"
425                .into(),
426            canonical_type: DataType::Enum(vec![
427                "pending".into(),
428                "complete".into(),
429                "cancelled".into(),
430                "failed".into(),
431                "needs_input".into(),
432            ]),
433            entity: EntityType::RequestResponse,
434            related_tags: vec!["is_canceled".into()],
435        },
436        SemanticTag {
437            tag: "is_canceled".into(),
438            description: "Whether a request was canceled by the user".into(),
439            canonical_type: DataType::Boolean,
440            entity: EntityType::RequestResponse,
441            related_tags: vec!["response_state".into()],
442        },
443        // Cost/performance tags
444        SemanticTag {
445            tag: "cost".into(),
446            description: "Monetary cost of the request/response".into(),
447            canonical_type: DataType::Float,
448            entity: EntityType::RequestResponse,
449            related_tags: vec!["latency".into()],
450        },
451        SemanticTag {
452            tag: "latency".into(),
453            description: "Time taken for the request to complete (milliseconds)".into(),
454            canonical_type: DataType::Integer,
455            entity: EntityType::RequestResponse,
456            related_tags: vec!["cost".into()],
457        },
458        // Schema metadata
459        SemanticTag {
460            tag: "schema_version".into(),
461            description: "Version number of the session format schema".into(),
462            canonical_type: DataType::Integer,
463            entity: EntityType::Session,
464            related_tags: vec![],
465        },
466        SemanticTag {
467            tag: "event_type".into(),
468            description: "Type of event in event-sourced formats (snapshot, update)".into(),
469            canonical_type: DataType::String,
470            entity: EntityType::Session,
471            related_tags: vec!["event_data".into()],
472        },
473        SemanticTag {
474            tag: "event_data".into(),
475            description: "Payload of an event in event-sourced formats".into(),
476            canonical_type: DataType::Json,
477            entity: EntityType::Session,
478            related_tags: vec!["event_type".into()],
479        },
480        // UI state tags
481        SemanticTag {
482            tag: "resource_uri".into(),
483            description: "URI identifying a session resource in the IDE".into(),
484            canonical_type: DataType::Uri,
485            entity: EntityType::UiState,
486            related_tags: vec![],
487        },
488        SemanticTag {
489            tag: "is_empty".into(),
490            description: "Whether a session has no messages/requests".into(),
491            canonical_type: DataType::Boolean,
492            entity: EntityType::SessionIndex,
493            related_tags: vec![],
494        },
495        SemanticTag {
496            tag: "timing".into(),
497            description: "Session timing metadata (created, last request, last response)".into(),
498            canonical_type: DataType::Object("Timing".into()),
499            entity: EntityType::SessionIndex,
500            related_tags: vec!["created_at".into(), "updated_at".into()],
501        },
502        SemanticTag {
503            tag: "user_name".into(),
504            description: "Display name of the human user".into(),
505            canonical_type: DataType::String,
506            entity: EntityType::Session,
507            related_tags: vec!["assistant_name".into()],
508        },
509        SemanticTag {
510            tag: "assistant_name".into(),
511            description: "Display name of the AI assistant".into(),
512            canonical_type: DataType::String,
513            entity: EntityType::Session,
514            related_tags: vec!["user_name".into()],
515        },
516        SemanticTag {
517            tag: "workspace_id".into(),
518            description: "Identifier of the workspace/project".into(),
519            canonical_type: DataType::String,
520            entity: EntityType::Workspace,
521            related_tags: vec![],
522        },
523        SemanticTag {
524            tag: "model_state".into(),
525            description: "Model processing state (Pending/Complete/Cancelled)".into(),
526            canonical_type: DataType::Object("ModelState".into()),
527            entity: EntityType::RequestResponse,
528            related_tags: vec!["response_state".into()],
529        },
530        SemanticTag {
531            tag: "completed_at".into(),
532            description: "Timestamp when model finished processing".into(),
533            canonical_type: DataType::Timestamp,
534            entity: EntityType::RequestResponse,
535            related_tags: vec!["message_timestamp".into()],
536        },
537        SemanticTag {
538            tag: "completion_state".into(),
539            description: "Numeric completion state (0=Pending, 1=Complete, 2=Cancelled)".into(),
540            canonical_type: DataType::Integer,
541            entity: EntityType::RequestResponse,
542            related_tags: vec!["response_state".into(), "model_state".into()],
543        },
544        SemanticTag {
545            tag: "streaming".into(),
546            description: "Whether the response should be streamed".into(),
547            canonical_type: DataType::Boolean,
548            entity: EntityType::RequestResponse,
549            related_tags: vec![],
550        },
551        SemanticTag {
552            tag: "temperature".into(),
553            description: "Sampling temperature for model generation".into(),
554            canonical_type: DataType::Float,
555            entity: EntityType::RequestResponse,
556            related_tags: vec![],
557        },
558        SemanticTag {
559            tag: "request_id".into(),
560            description: "Unique identifier for a request within a session".into(),
561            canonical_type: DataType::Uuid,
562            entity: EntityType::RequestResponse,
563            related_tags: vec!["session_id".into()],
564        },
565        SemanticTag {
566            tag: "session_index".into(),
567            description: "Index/registry of all session IDs and their metadata".into(),
568            canonical_type: DataType::Json,
569            entity: EntityType::SessionIndex,
570            related_tags: vec![],
571        },
572        SemanticTag {
573            tag: "index_version".into(),
574            description: "Version of the session index format".into(),
575            canonical_type: DataType::Integer,
576            entity: EntityType::SessionIndex,
577            related_tags: vec!["schema_version".into()],
578        },
579        SemanticTag {
580            tag: "last_read".into(),
581            description: "Timestamp when a session was last read by the user".into(),
582            canonical_type: DataType::Timestamp,
583            entity: EntityType::UiState,
584            related_tags: vec![],
585        },
586        SemanticTag {
587            tag: "messages".into(),
588            description: "Collection of messages/requests in a session".into(),
589            canonical_type: DataType::Array(Box::new(DataType::Json)),
590            entity: EntityType::Session,
591            related_tags: vec!["message_text".into(), "message_role".into()],
592        },
593    ]
594}
595
596fn build_relationships() -> Vec<EntityRelationship> {
597    vec![
598        EntityRelationship {
599            from: EntityType::Session,
600            to: EntityType::RequestResponse,
601            kind: RelationshipKind::Contains,
602            description: "A session contains zero or more request-response pairs".into(),
603        },
604        EntityRelationship {
605            from: EntityType::Session,
606            to: EntityType::Message,
607            kind: RelationshipKind::Contains,
608            description: "A session contains an ordered sequence of messages".into(),
609        },
610        EntityRelationship {
611            from: EntityType::RequestResponse,
612            to: EntityType::Message,
613            kind: RelationshipKind::Contains,
614            description: "Each request-response pair contains a user message and assistant reply"
615                .into(),
616        },
617        EntityRelationship {
618            from: EntityType::RequestResponse,
619            to: EntityType::ToolCall,
620            kind: RelationshipKind::Contains,
621            description: "A request may invoke zero or more tool calls".into(),
622        },
623        EntityRelationship {
624            from: EntityType::Session,
625            to: EntityType::Model,
626            kind: RelationshipKind::References,
627            description: "A session may use one or more AI models".into(),
628        },
629        EntityRelationship {
630            from: EntityType::Session,
631            to: EntityType::Agent,
632            kind: RelationshipKind::References,
633            description: "An agentic session references an AI agent identity".into(),
634        },
635        EntityRelationship {
636            from: EntityType::Session,
637            to: EntityType::Workspace,
638            kind: RelationshipKind::BelongsTo,
639            description: "A session belongs to a workspace/project".into(),
640        },
641        EntityRelationship {
642            from: EntityType::Session,
643            to: EntityType::SessionIndex,
644            kind: RelationshipKind::MapsTo,
645            description: "Each session has a corresponding index entry for UI display".into(),
646        },
647        EntityRelationship {
648            from: EntityType::SessionIndex,
649            to: EntityType::UiState,
650            kind: RelationshipKind::MapsTo,
651            description: "Index entries map to UI state (cache, read timestamps)".into(),
652        },
653        EntityRelationship {
654            from: EntityType::Workspace,
655            to: EntityType::Session,
656            kind: RelationshipKind::Contains,
657            description: "A workspace contains multiple sessions".into(),
658        },
659    ]
660}
661
662fn build_cross_provider_mappings() -> Vec<CrossProviderMapping> {
663    let mut mappings = Vec::new();
664
665    // Copilot JSON v3 → Copilot JSONL v1
666    mappings.extend(copilot_json_to_jsonl_mappings());
667
668    // Copilot JSON v3 → Cursor v1
669    mappings.extend(copilot_to_cursor_mappings());
670
671    // Copilot JSON v3 → OpenAI API v1
672    mappings.extend(copilot_to_openai_mappings());
673
674    // Copilot JSON v3 → Claude Code v1
675    mappings.extend(copilot_to_claude_code_mappings());
676
677    // Claude Code v1 → OpenAI API v1
678    mappings.extend(claude_code_to_openai_mappings());
679
680    mappings
681}
682
683// --- Copilot JSON v3 → Copilot JSONL v1 ---
684
685fn copilot_json_to_jsonl_mappings() -> Vec<CrossProviderMapping> {
686    vec![
687        CrossProviderMapping {
688            source_schema: "copilot-json-v3".into(),
689            source_field: "session.version".into(),
690            target_schema: "copilot-jsonl-v1".into(),
691            target_field: "data.version".into(),
692            transform: Some(FieldTransform::Restructure {
693                description: "Wrapped inside kind:0 event envelope".into(),
694            }),
695            confidence: 1.0,
696            semantic_tag: "schema_version".into(),
697        },
698        CrossProviderMapping {
699            source_schema: "copilot-json-v3".into(),
700            source_field: "session.sessionId".into(),
701            target_schema: "copilot-jsonl-v1".into(),
702            target_field: "data.sessionId".into(),
703            transform: Some(FieldTransform::Identity),
704            confidence: 1.0,
705            semantic_tag: "session_id".into(),
706        },
707        CrossProviderMapping {
708            source_schema: "copilot-json-v3".into(),
709            source_field: "session.creationDate".into(),
710            target_schema: "copilot-jsonl-v1".into(),
711            target_field: "data.creationDate".into(),
712            transform: Some(FieldTransform::Identity),
713            confidence: 1.0,
714            semantic_tag: "created_at".into(),
715        },
716        CrossProviderMapping {
717            source_schema: "copilot-json-v3".into(),
718            source_field: "session.requests".into(),
719            target_schema: "copilot-jsonl-v1".into(),
720            target_field: "data.requests".into(),
721            transform: Some(FieldTransform::Restructure {
722                description: "Response format changed from {value:[{value:text}]} to [{kind:\"\",value:text}]".into(),
723            }),
724            confidence: 0.9,
725            semantic_tag: "messages".into(),
726        },
727    ]
728}
729
730// --- Copilot JSON v3 → Cursor v1 ---
731
732fn copilot_to_cursor_mappings() -> Vec<CrossProviderMapping> {
733    vec![
734        CrossProviderMapping {
735            source_schema: "copilot-json-v3".into(),
736            source_field: "session.sessionId".into(),
737            target_schema: "cursor-json-v1".into(),
738            target_field: "session.sessionId".into(),
739            transform: Some(FieldTransform::Identity),
740            confidence: 1.0,
741            semantic_tag: "session_id".into(),
742        },
743        CrossProviderMapping {
744            source_schema: "copilot-json-v3".into(),
745            source_field: "session.creationDate".into(),
746            target_schema: "cursor-json-v1".into(),
747            target_field: "session.creationDate".into(),
748            transform: Some(FieldTransform::Identity),
749            confidence: 1.0,
750            semantic_tag: "created_at".into(),
751        },
752        CrossProviderMapping {
753            source_schema: "copilot-json-v3".into(),
754            source_field: "session.requests".into(),
755            target_schema: "cursor-json-v1".into(),
756            target_field: "session.requests".into(),
757            transform: Some(FieldTransform::Identity),
758            confidence: 0.95,
759            semantic_tag: "messages".into(),
760        },
761    ]
762}
763
764// --- Copilot JSON v3 → OpenAI API v1 ---
765
766fn copilot_to_openai_mappings() -> Vec<CrossProviderMapping> {
767    vec![
768        CrossProviderMapping {
769            source_schema: "copilot-json-v3".into(),
770            source_field: "request.message.text".into(),
771            target_schema: "openai-api-openai-api-v1".into(),
772            target_field: "messages[].content".into(),
773            transform: Some(FieldTransform::Restructure {
774                description: "Extract text from ChatMessage and set role='user'".into(),
775            }),
776            confidence: 0.9,
777            semantic_tag: "message_text".into(),
778        },
779        CrossProviderMapping {
780            source_schema: "copilot-json-v3".into(),
781            source_field: "request.response".into(),
782            target_schema: "openai-api-openai-api-v1".into(),
783            target_field: "messages[].content".into(),
784            transform: Some(FieldTransform::Custom {
785                description: "Extract text from response value array and set role='assistant'"
786                    .into(),
787            }),
788            confidence: 0.85,
789            semantic_tag: "assistant_response".into(),
790        },
791        CrossProviderMapping {
792            source_schema: "copilot-json-v3".into(),
793            source_field: "request.modelId".into(),
794            target_schema: "openai-api-openai-api-v1".into(),
795            target_field: "model".into(),
796            transform: Some(FieldTransform::Identity),
797            confidence: 0.95,
798            semantic_tag: "model_id".into(),
799        },
800    ]
801}
802
803// --- Copilot JSON v3 → Claude Code v1 ---
804
805fn copilot_to_claude_code_mappings() -> Vec<CrossProviderMapping> {
806    vec![
807        CrossProviderMapping {
808            source_schema: "copilot-json-v3".into(),
809            source_field: "request.message.text".into(),
810            target_schema: "claude-code-jsonl-v1".into(),
811            target_field: "message.content".into(),
812            transform: Some(FieldTransform::Restructure {
813                description: "Set type='human' and wrap in message object".into(),
814            }),
815            confidence: 0.85,
816            semantic_tag: "message_text".into(),
817        },
818        CrossProviderMapping {
819            source_schema: "copilot-json-v3".into(),
820            source_field: "request.timestamp".into(),
821            target_schema: "claude-code-jsonl-v1".into(),
822            target_field: "timestamp".into(),
823            transform: Some(FieldTransform::TypeConvert {
824                from_type: "epoch_ms".into(),
825                to_type: "iso8601".into(),
826            }),
827            confidence: 0.9,
828            semantic_tag: "message_timestamp".into(),
829        },
830    ]
831}
832
833// --- Claude Code v1 → OpenAI API v1 ---
834
835fn claude_code_to_openai_mappings() -> Vec<CrossProviderMapping> {
836    vec![
837        CrossProviderMapping {
838            source_schema: "claude-code-jsonl-v1".into(),
839            source_field: "type".into(),
840            target_schema: "openai-api-openai-api-v1".into(),
841            target_field: "messages[].role".into(),
842            transform: Some(FieldTransform::ValueMap {
843                mapping: HashMap::from([
844                    ("human".into(), "user".into()),
845                    ("assistant".into(), "assistant".into()),
846                    ("system".into(), "system".into()),
847                    ("tool_use".into(), "assistant".into()),
848                    ("tool_result".into(), "tool".into()),
849                ]),
850            }),
851            confidence: 0.9,
852            semantic_tag: "message_role".into(),
853        },
854        CrossProviderMapping {
855            source_schema: "claude-code-jsonl-v1".into(),
856            source_field: "message.content".into(),
857            target_schema: "openai-api-openai-api-v1".into(),
858            target_field: "messages[].content".into(),
859            transform: Some(FieldTransform::Identity),
860            confidence: 0.95,
861            semantic_tag: "message_text".into(),
862        },
863    ]
864}
865
866// ============================================================================
867// Migration Paths
868// ============================================================================
869
870fn build_migration_paths() -> Vec<MigrationPath> {
871    vec![
872        // Copilot JSON v3 → JSONL v1 (the big format transition)
873        MigrationPath {
874            from_schema: "copilot-json-v3".into(),
875            to_schema: "copilot-jsonl-v1".into(),
876            lossless: false,
877            mappings: copilot_json_to_jsonl_mappings(),
878            data_loss: vec![
879                "Response format changes (legacy value array → typed parts array)".into(),
880            ],
881            new_fields: vec![
882                "modelState (required for VS Code to show session)".into(),
883                "timeSpentWaiting".into(),
884                "Event envelope (kind, data)".into(),
885            ],
886            notes: vec![
887                "Major format transition from single JSON to event-sourced JSONL".into(),
888                "Response extraction logic must change".into(),
889                "Index format changes from UUID array to UUID→entry map".into(),
890                "Model cache (agentSessions.model.cache) must be populated".into(),
891                "File extension changes from .json to .jsonl".into(),
892            ],
893        },
894        // Copilot JSONL v1 → JSON v3 (reverse migration for backwards compat)
895        MigrationPath {
896            from_schema: "copilot-jsonl-v1".into(),
897            to_schema: "copilot-json-v3".into(),
898            lossless: false,
899            mappings: vec![], // Reverse of json_to_jsonl
900            data_loss: vec![
901                "modelState field dropped".into(),
902                "timeSpentWaiting field dropped".into(),
903                "Event history lost (only kind:0 snapshot preserved)".into(),
904                "Incremental updates (kind:1, kind:2) discarded".into(),
905            ],
906            new_fields: vec![],
907            notes: vec![
908                "Reverse migration for backwards compatibility with older VS Code versions".into(),
909                "Compact JSONL to single snapshot first, then unwrap event envelope".into(),
910                "Response parts array must be converted back to legacy format".into(),
911            ],
912        },
913    ]
914}
915
916// ============================================================================
917// Capability Matrix
918// ============================================================================
919
920fn build_capability_matrix() -> HashMap<String, Vec<String>> {
921    HashMap::from([
922        (
923            "copilot".into(),
924            vec![
925                "session_storage".into(),
926                "session_index".into(),
927                "model_cache".into(),
928                "state_cache".into(),
929                "event_sourcing".into(),
930                "agent_mode".into(),
931                "tool_calling".into(),
932                "multi_model".into(),
933                "mcp".into(),
934                "checkpoints".into(),
935            ],
936        ),
937        (
938            "cursor".into(),
939            vec![
940                "session_storage".into(),
941                "multi_model".into(),
942                "agent_mode".into(),
943                "tool_calling".into(),
944            ],
945        ),
946        (
947            "claude-code".into(),
948            vec![
949                "session_storage".into(),
950                "tool_calling".into(),
951                "agent_mode".into(),
952                "cost_tracking".into(),
953                "mcp".into(),
954            ],
955        ),
956        (
957            "codex-cli".into(),
958            vec![
959                "session_storage".into(),
960                "tool_calling".into(),
961                "agent_mode".into(),
962            ],
963        ),
964        (
965            "gemini-cli".into(),
966            vec![
967                "session_storage".into(),
968                "tool_calling".into(),
969                "agent_mode".into(),
970                "multi_modal".into(),
971            ],
972        ),
973        (
974            "continue-dev".into(),
975            vec![
976                "session_storage".into(),
977                "multi_model".into(),
978                "multi_provider".into(),
979            ],
980        ),
981        (
982            "openai-api".into(),
983            vec![
984                "chat_completions".into(),
985                "tool_calling".into(),
986                "streaming".into(),
987                "multi_model".into(),
988                "embeddings".into(),
989            ],
990        ),
991    ])
992}