sochdb_core/
catalog.rs

1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Schema Catalog for SochDB
16//!
17//! Manages table schemas, indexes, operations, and database metadata.
18//! The catalog itself is stored as a TOON document.
19//!
20//! ## MCP Integration (Task 7)
21//!
22//! Operations stored in the catalog can be exposed as MCP tools:
23//! - Input/output schemas defined as TOON schemas
24//! - Built-in, SOCH-QL, or external implementations
25//! - Token savings: ~60% via TOON responses
26
27use crate::soch::SochSchema;
28use serde::{Deserialize, Serialize};
29use std::collections::HashMap;
30
31/// Catalog entry type
32#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
33pub enum CatalogEntryType {
34    Table,
35    Index,
36    View,
37    Sequence,
38    /// MCP-compatible operation (Task 7)
39    Operation,
40}
41
42/// Operation implementation type
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub enum OperationImpl {
45    /// Built-in operation (e.g., range_scan, semantic_search)
46    BuiltIn(String),
47    /// Stored procedure as SOCH-QL query
48    SochQL(String),
49    /// External function reference
50    External(String),
51}
52
53/// A catalog entry (table, index, operation, etc.)
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct CatalogEntry {
56    /// Entry name
57    pub name: String,
58    /// Entry type
59    pub entry_type: CatalogEntryType,
60    /// Schema definition (for tables)
61    pub schema: Option<SochSchema>,
62    /// Input schema (for operations)
63    pub input_schema: Option<SochSchema>,
64    /// Output schema (for operations)
65    pub output_schema: Option<SochSchema>,
66    /// Operation implementation (for operations)
67    pub implementation: Option<OperationImpl>,
68    /// Description (for MCP tool generation)
69    pub description: Option<String>,
70    /// Root page/block ID for data storage
71    pub root_id: u64,
72    /// Creation timestamp
73    pub created_us: u64,
74    /// Last modified timestamp
75    pub modified_us: u64,
76    /// Row count estimate
77    pub row_count: u64,
78    /// Additional metadata
79    pub metadata: HashMap<String, String>,
80}
81
82impl CatalogEntry {
83    pub fn new_table(name: impl Into<String>, schema: SochSchema, root_id: u64) -> Self {
84        let now = now_micros();
85        Self {
86            name: name.into(),
87            entry_type: CatalogEntryType::Table,
88            schema: Some(schema),
89            input_schema: None,
90            output_schema: None,
91            implementation: None,
92            description: None,
93            root_id,
94            created_us: now,
95            modified_us: now,
96            row_count: 0,
97            metadata: HashMap::new(),
98        }
99    }
100
101    pub fn new_index(
102        name: impl Into<String>,
103        table_name: impl Into<String>,
104        fields: Vec<String>,
105        unique: bool,
106        root_id: u64,
107    ) -> Self {
108        let now = now_micros();
109        let mut metadata = HashMap::new();
110        metadata.insert("table".to_string(), table_name.into());
111        metadata.insert("fields".to_string(), fields.join(","));
112        metadata.insert("unique".to_string(), unique.to_string());
113
114        Self {
115            name: name.into(),
116            entry_type: CatalogEntryType::Index,
117            schema: None,
118            input_schema: None,
119            output_schema: None,
120            implementation: None,
121            description: None,
122            root_id,
123            created_us: now,
124            modified_us: now,
125            row_count: 0,
126            metadata,
127        }
128    }
129
130    /// Create a new operation entry (Task 7: MCP Integration)
131    pub fn new_operation(
132        name: impl Into<String>,
133        input_schema: SochSchema,
134        output_schema: SochSchema,
135        implementation: OperationImpl,
136        description: impl Into<String>,
137    ) -> Self {
138        let now = now_micros();
139        Self {
140            name: name.into(),
141            entry_type: CatalogEntryType::Operation,
142            schema: None,
143            input_schema: Some(input_schema),
144            output_schema: Some(output_schema),
145            implementation: Some(implementation),
146            description: Some(description.into()),
147            root_id: 0,
148            created_us: now,
149            modified_us: now,
150            row_count: 0,
151            metadata: HashMap::new(),
152        }
153    }
154
155    /// Generate MCP tool descriptor from operation entry
156    pub fn to_mcp_tool(&self) -> Option<McpToolDescriptor> {
157        if self.entry_type != CatalogEntryType::Operation {
158            return None;
159        }
160
161        Some(McpToolDescriptor {
162            name: self.name.clone(),
163            description: self.description.clone().unwrap_or_default(),
164            input_schema: self.input_schema.as_ref()?.clone(),
165            output_schema: self.output_schema.as_ref()?.clone(),
166        })
167    }
168}
169
170/// MCP Tool Descriptor for LLM tool calling
171#[derive(Debug, Clone, Serialize, Deserialize)]
172pub struct McpToolDescriptor {
173    /// Tool name
174    pub name: String,
175    /// Tool description
176    pub description: String,
177    /// Input parameter schema
178    pub input_schema: SochSchema,
179    /// Output result schema
180    pub output_schema: SochSchema,
181}
182
183/// The Schema Catalog
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct Catalog {
186    /// Database name
187    pub name: String,
188    /// Version number
189    pub version: u64,
190    /// All catalog entries
191    pub entries: HashMap<String, CatalogEntry>,
192    /// Next auto-increment ID for each table
193    pub auto_increment: HashMap<String, u64>,
194}
195
196impl Catalog {
197    pub fn new(name: impl Into<String>) -> Self {
198        Self {
199            name: name.into(),
200            version: 1,
201            entries: HashMap::new(),
202            auto_increment: HashMap::new(),
203        }
204    }
205
206    /// Create a new table
207    pub fn create_table(&mut self, schema: SochSchema, root_id: u64) -> Result<(), String> {
208        if self.entries.contains_key(&schema.name) {
209            return Err(format!("Table '{}' already exists", schema.name));
210        }
211
212        let name = schema.name.clone();
213        let entry = CatalogEntry::new_table(&name, schema, root_id);
214        self.entries.insert(name.clone(), entry);
215        self.auto_increment.insert(name, 0);
216        self.version += 1;
217        Ok(())
218    }
219
220    /// Drop a table
221    pub fn drop_table(&mut self, name: &str) -> Result<CatalogEntry, String> {
222        // Remove associated indexes first
223        let indexes_to_remove: Vec<String> = self
224            .entries
225            .iter()
226            .filter(|(_, e)| {
227                e.entry_type == CatalogEntryType::Index
228                    && e.metadata.get("table") == Some(&name.to_string())
229            })
230            .map(|(k, _)| k.clone())
231            .collect();
232
233        for idx in indexes_to_remove {
234            self.entries.remove(&idx);
235        }
236
237        self.auto_increment.remove(name);
238        self.entries
239            .remove(name)
240            .ok_or_else(|| format!("Table '{}' not found", name))
241    }
242
243    /// Get a table schema
244    pub fn get_table(&self, name: &str) -> Option<&CatalogEntry> {
245        self.entries
246            .get(name)
247            .filter(|e| e.entry_type == CatalogEntryType::Table)
248    }
249
250    /// Get a mutable table entry
251    pub fn get_table_mut(&mut self, name: &str) -> Option<&mut CatalogEntry> {
252        self.entries
253            .get_mut(name)
254            .filter(|e| e.entry_type == CatalogEntryType::Table)
255    }
256
257    /// List all tables
258    pub fn list_tables(&self) -> Vec<&str> {
259        self.entries
260            .iter()
261            .filter(|(_, e)| e.entry_type == CatalogEntryType::Table)
262            .map(|(k, _)| k.as_str())
263            .collect()
264    }
265
266    /// Create an index
267    pub fn create_index(
268        &mut self,
269        name: impl Into<String>,
270        table_name: &str,
271        fields: Vec<String>,
272        unique: bool,
273        root_id: u64,
274    ) -> Result<(), String> {
275        let name = name.into();
276
277        if !self.entries.contains_key(table_name) {
278            return Err(format!("Table '{}' not found", table_name));
279        }
280
281        if self.entries.contains_key(&name) {
282            return Err(format!("Index '{}' already exists", name));
283        }
284
285        // Validate fields exist in table
286        if let Some(entry) = self.get_table(table_name)
287            && let Some(schema) = &entry.schema
288        {
289            for field in &fields {
290                if !schema.fields.iter().any(|f| &f.name == field) {
291                    return Err(format!(
292                        "Field '{}' not found in table '{}'",
293                        field, table_name
294                    ));
295                }
296            }
297        }
298
299        let entry = CatalogEntry::new_index(&name, table_name, fields, unique, root_id);
300        self.entries.insert(name, entry);
301        self.version += 1;
302        Ok(())
303    }
304
305    /// Drop an index
306    pub fn drop_index(&mut self, name: &str) -> Result<CatalogEntry, String> {
307        if let Some(entry) = self.entries.get(name)
308            && entry.entry_type != CatalogEntryType::Index
309        {
310            return Err(format!("'{}' is not an index", name));
311        }
312        self.entries
313            .remove(name)
314            .ok_or_else(|| format!("Index '{}' not found", name))
315    }
316
317    /// Get indexes for a table
318    pub fn get_indexes(&self, table_name: &str) -> Vec<&CatalogEntry> {
319        self.entries
320            .values()
321            .filter(|e| {
322                e.entry_type == CatalogEntryType::Index
323                    && e.metadata.get("table") == Some(&table_name.to_string())
324            })
325            .collect()
326    }
327
328    /// Get next auto-increment value
329    pub fn next_auto_increment(&mut self, table_name: &str) -> u64 {
330        let value = self
331            .auto_increment
332            .entry(table_name.to_string())
333            .or_insert(0);
334        *value += 1;
335        *value
336    }
337
338    /// Update row count for a table
339    pub fn update_row_count(&mut self, table_name: &str, count: u64) {
340        if let Some(entry) = self.entries.get_mut(table_name) {
341            entry.row_count = count;
342            entry.modified_us = now_micros();
343        }
344    }
345
346    /// Serialize catalog to bytes
347    pub fn to_bytes(&self) -> Result<Vec<u8>, String> {
348        bincode::serialize(self).map_err(|e| e.to_string())
349    }
350
351    /// Deserialize catalog from bytes
352    pub fn from_bytes(data: &[u8]) -> Result<Self, String> {
353        bincode::deserialize(data).map_err(|e| e.to_string())
354    }
355
356    /// Format catalog as TOON
357    pub fn to_toon(&self) -> String {
358        let mut lines = Vec::new();
359        lines.push(format!(
360            "soch_catalog[{}]{{name,type,fields,root,rows}}:",
361            self.entries.len()
362        ));
363
364        for (name, entry) in &self.entries {
365            let entry_type = match entry.entry_type {
366                CatalogEntryType::Table => "table",
367                CatalogEntryType::Index => "index",
368                CatalogEntryType::View => "view",
369                CatalogEntryType::Sequence => "sequence",
370                CatalogEntryType::Operation => "operation",
371            };
372
373            let fields = if let Some(schema) = &entry.schema {
374                schema
375                    .fields
376                    .iter()
377                    .map(|f| format!("{}:{}", f.name, f.field_type))
378                    .collect::<Vec<_>>()
379                    .join(";")
380            } else if let Some(input) = &entry.input_schema {
381                // For operations, show input schema
382                input
383                    .fields
384                    .iter()
385                    .map(|f| format!("{}:{}", f.name, f.field_type))
386                    .collect::<Vec<_>>()
387                    .join(";")
388            } else {
389                entry.metadata.get("fields").cloned().unwrap_or_default()
390            };
391
392            lines.push(format!(
393                "{},{},\"{}\",{},{}",
394                name, entry_type, fields, entry.root_id, entry.row_count
395            ));
396        }
397
398        lines.join("\n")
399    }
400
401    /// Create an operation (Task 7: MCP Integration)
402    pub fn create_operation(
403        &mut self,
404        name: impl Into<String>,
405        input_schema: SochSchema,
406        output_schema: SochSchema,
407        implementation: OperationImpl,
408        description: impl Into<String>,
409    ) -> Result<(), String> {
410        let name = name.into();
411
412        if self.entries.contains_key(&name) {
413            return Err(format!("Operation '{}' already exists", name));
414        }
415
416        let entry = CatalogEntry::new_operation(
417            &name,
418            input_schema,
419            output_schema,
420            implementation,
421            description,
422        );
423        self.entries.insert(name, entry);
424        self.version += 1;
425        Ok(())
426    }
427
428    /// Get an operation
429    pub fn get_operation(&self, name: &str) -> Option<&CatalogEntry> {
430        self.entries
431            .get(name)
432            .filter(|e| e.entry_type == CatalogEntryType::Operation)
433    }
434
435    /// List all operations (for MCP tool discovery)
436    pub fn list_operations(&self) -> Vec<&CatalogEntry> {
437        self.entries
438            .values()
439            .filter(|e| e.entry_type == CatalogEntryType::Operation)
440            .collect()
441    }
442
443    /// Generate all MCP tool descriptors
444    pub fn generate_mcp_tools(&self) -> Vec<McpToolDescriptor> {
445        self.list_operations()
446            .iter()
447            .filter_map(|e| e.to_mcp_tool())
448            .collect()
449    }
450}
451
452fn now_micros() -> u64 {
453    std::time::SystemTime::now()
454        .duration_since(std::time::SystemTime::UNIX_EPOCH)
455        .map(|d| d.as_micros() as u64)
456        .unwrap_or(0)
457}
458
459#[cfg(test)]
460mod tests {
461    use super::*;
462    use crate::soch::SochType;
463
464    #[test]
465    fn test_create_table() {
466        let mut catalog = Catalog::new("test_db");
467
468        let schema = SochSchema::new("users")
469            .field("id", SochType::UInt)
470            .field("name", SochType::Text)
471            .field("email", SochType::Text)
472            .primary_key("id");
473
474        catalog.create_table(schema, 1).unwrap();
475
476        assert!(catalog.get_table("users").is_some());
477        assert_eq!(catalog.list_tables(), vec!["users"]);
478    }
479
480    #[test]
481    fn test_create_index() {
482        let mut catalog = Catalog::new("test_db");
483
484        let schema = SochSchema::new("users")
485            .field("id", SochType::UInt)
486            .field("email", SochType::Text);
487
488        catalog.create_table(schema, 1).unwrap();
489        catalog
490            .create_index("idx_users_email", "users", vec!["email".into()], true, 2)
491            .unwrap();
492
493        let indexes = catalog.get_indexes("users");
494        assert_eq!(indexes.len(), 1);
495        assert_eq!(indexes[0].name, "idx_users_email");
496    }
497
498    #[test]
499    fn test_auto_increment() {
500        let mut catalog = Catalog::new("test_db");
501
502        let schema = SochSchema::new("users").field("id", SochType::UInt);
503        catalog.create_table(schema, 1).unwrap();
504
505        assert_eq!(catalog.next_auto_increment("users"), 1);
506        assert_eq!(catalog.next_auto_increment("users"), 2);
507        assert_eq!(catalog.next_auto_increment("users"), 3);
508    }
509
510    #[test]
511    fn test_drop_table_removes_indexes() {
512        let mut catalog = Catalog::new("test_db");
513
514        let schema = SochSchema::new("users")
515            .field("id", SochType::UInt)
516            .field("email", SochType::Text);
517
518        catalog.create_table(schema, 1).unwrap();
519        catalog
520            .create_index("idx_users_email", "users", vec!["email".into()], true, 2)
521            .unwrap();
522
523        catalog.drop_table("users").unwrap();
524
525        assert!(catalog.get_table("users").is_none());
526        assert!(catalog.get_indexes("users").is_empty());
527    }
528
529    #[test]
530    fn test_catalog_serialization() {
531        let mut catalog = Catalog::new("test_db");
532
533        let schema = SochSchema::new("users")
534            .field("id", SochType::UInt)
535            .field("name", SochType::Text);
536        catalog.create_table(schema, 1).unwrap();
537
538        let bytes = catalog.to_bytes().expect("Failed to serialize catalog");
539        let restored = Catalog::from_bytes(&bytes).unwrap();
540
541        assert_eq!(restored.name, "test_db");
542        assert!(restored.get_table("users").is_some());
543    }
544}