Skip to main content

sochdb_core/
catalog.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! Schema Catalog for SochDB
19//!
20//! Manages table schemas, indexes, operations, and database metadata.
21//! The catalog itself is stored as a TOON document.
22//!
23//! ## MCP Integration (Task 7)
24//!
25//! Operations stored in the catalog can be exposed as MCP tools:
26//! - Input/output schemas defined as TOON schemas
27//! - Built-in, SOCH-QL, or external implementations
28//! - Token savings: ~60% via TOON responses
29
30use crate::soch::SochSchema;
31use serde::{Deserialize, Serialize};
32use std::collections::HashMap;
33
34/// Catalog entry type
35#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
36pub enum CatalogEntryType {
37    Table,
38    Index,
39    View,
40    Sequence,
41    /// MCP-compatible operation (Task 7)
42    Operation,
43}
44
45/// Operation implementation type
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub enum OperationImpl {
48    /// Built-in operation (e.g., range_scan, semantic_search)
49    BuiltIn(String),
50    /// Stored procedure as SOCH-QL query
51    SochQL(String),
52    /// External function reference
53    External(String),
54}
55
56/// A catalog entry (table, index, operation, etc.)
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct CatalogEntry {
59    /// Entry name
60    pub name: String,
61    /// Entry type
62    pub entry_type: CatalogEntryType,
63    /// Schema definition (for tables)
64    pub schema: Option<SochSchema>,
65    /// Input schema (for operations)
66    pub input_schema: Option<SochSchema>,
67    /// Output schema (for operations)
68    pub output_schema: Option<SochSchema>,
69    /// Operation implementation (for operations)
70    pub implementation: Option<OperationImpl>,
71    /// Description (for MCP tool generation)
72    pub description: Option<String>,
73    /// Root page/block ID for data storage
74    pub root_id: u64,
75    /// Creation timestamp
76    pub created_us: u64,
77    /// Last modified timestamp
78    pub modified_us: u64,
79    /// Row count estimate
80    pub row_count: u64,
81    /// Additional metadata
82    pub metadata: HashMap<String, String>,
83}
84
85impl CatalogEntry {
86    pub fn new_table(name: impl Into<String>, schema: SochSchema, root_id: u64) -> Self {
87        let now = now_micros();
88        Self {
89            name: name.into(),
90            entry_type: CatalogEntryType::Table,
91            schema: Some(schema),
92            input_schema: None,
93            output_schema: None,
94            implementation: None,
95            description: None,
96            root_id,
97            created_us: now,
98            modified_us: now,
99            row_count: 0,
100            metadata: HashMap::new(),
101        }
102    }
103
104    pub fn new_index(
105        name: impl Into<String>,
106        table_name: impl Into<String>,
107        fields: Vec<String>,
108        unique: bool,
109        root_id: u64,
110    ) -> Self {
111        let now = now_micros();
112        let mut metadata = HashMap::new();
113        metadata.insert("table".to_string(), table_name.into());
114        metadata.insert("fields".to_string(), fields.join(","));
115        metadata.insert("unique".to_string(), unique.to_string());
116
117        Self {
118            name: name.into(),
119            entry_type: CatalogEntryType::Index,
120            schema: None,
121            input_schema: None,
122            output_schema: None,
123            implementation: None,
124            description: None,
125            root_id,
126            created_us: now,
127            modified_us: now,
128            row_count: 0,
129            metadata,
130        }
131    }
132
133    /// Create a new operation entry (Task 7: MCP Integration)
134    pub fn new_operation(
135        name: impl Into<String>,
136        input_schema: SochSchema,
137        output_schema: SochSchema,
138        implementation: OperationImpl,
139        description: impl Into<String>,
140    ) -> Self {
141        let now = now_micros();
142        Self {
143            name: name.into(),
144            entry_type: CatalogEntryType::Operation,
145            schema: None,
146            input_schema: Some(input_schema),
147            output_schema: Some(output_schema),
148            implementation: Some(implementation),
149            description: Some(description.into()),
150            root_id: 0,
151            created_us: now,
152            modified_us: now,
153            row_count: 0,
154            metadata: HashMap::new(),
155        }
156    }
157
158    /// Generate MCP tool descriptor from operation entry
159    pub fn to_mcp_tool(&self) -> Option<McpToolDescriptor> {
160        if self.entry_type != CatalogEntryType::Operation {
161            return None;
162        }
163
164        Some(McpToolDescriptor {
165            name: self.name.clone(),
166            description: self.description.clone().unwrap_or_default(),
167            input_schema: self.input_schema.as_ref()?.clone(),
168            output_schema: self.output_schema.as_ref()?.clone(),
169        })
170    }
171}
172
173/// MCP Tool Descriptor for LLM tool calling
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct McpToolDescriptor {
176    /// Tool name
177    pub name: String,
178    /// Tool description
179    pub description: String,
180    /// Input parameter schema
181    pub input_schema: SochSchema,
182    /// Output result schema
183    pub output_schema: SochSchema,
184}
185
186/// The Schema Catalog
187#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct Catalog {
189    /// Database name
190    pub name: String,
191    /// Version number
192    pub version: u64,
193    /// All catalog entries
194    pub entries: HashMap<String, CatalogEntry>,
195    /// Next auto-increment ID for each table
196    pub auto_increment: HashMap<String, u64>,
197}
198
199impl Catalog {
200    pub fn new(name: impl Into<String>) -> Self {
201        Self {
202            name: name.into(),
203            version: 1,
204            entries: HashMap::new(),
205            auto_increment: HashMap::new(),
206        }
207    }
208
209    /// Create a new table
210    pub fn create_table(&mut self, schema: SochSchema, root_id: u64) -> Result<(), String> {
211        if self.entries.contains_key(&schema.name) {
212            return Err(format!("Table '{}' already exists", schema.name));
213        }
214
215        let name = schema.name.clone();
216        let entry = CatalogEntry::new_table(&name, schema, root_id);
217        self.entries.insert(name.clone(), entry);
218        self.auto_increment.insert(name, 0);
219        self.version += 1;
220        Ok(())
221    }
222
223    /// Drop a table
224    pub fn drop_table(&mut self, name: &str) -> Result<CatalogEntry, String> {
225        // Remove associated indexes first
226        let indexes_to_remove: Vec<String> = self
227            .entries
228            .iter()
229            .filter(|(_, e)| {
230                e.entry_type == CatalogEntryType::Index
231                    && e.metadata.get("table") == Some(&name.to_string())
232            })
233            .map(|(k, _)| k.clone())
234            .collect();
235
236        for idx in indexes_to_remove {
237            self.entries.remove(&idx);
238        }
239
240        self.auto_increment.remove(name);
241        self.entries
242            .remove(name)
243            .ok_or_else(|| format!("Table '{}' not found", name))
244    }
245
246    /// Get a table schema
247    pub fn get_table(&self, name: &str) -> Option<&CatalogEntry> {
248        self.entries
249            .get(name)
250            .filter(|e| e.entry_type == CatalogEntryType::Table)
251    }
252
253    /// Get a mutable table entry
254    pub fn get_table_mut(&mut self, name: &str) -> Option<&mut CatalogEntry> {
255        self.entries
256            .get_mut(name)
257            .filter(|e| e.entry_type == CatalogEntryType::Table)
258    }
259
260    /// List all tables
261    pub fn list_tables(&self) -> Vec<&str> {
262        self.entries
263            .iter()
264            .filter(|(_, e)| e.entry_type == CatalogEntryType::Table)
265            .map(|(k, _)| k.as_str())
266            .collect()
267    }
268
269    /// Create an index
270    pub fn create_index(
271        &mut self,
272        name: impl Into<String>,
273        table_name: &str,
274        fields: Vec<String>,
275        unique: bool,
276        root_id: u64,
277    ) -> Result<(), String> {
278        let name = name.into();
279
280        if !self.entries.contains_key(table_name) {
281            return Err(format!("Table '{}' not found", table_name));
282        }
283
284        if self.entries.contains_key(&name) {
285            return Err(format!("Index '{}' already exists", name));
286        }
287
288        // Validate fields exist in table
289        if let Some(entry) = self.get_table(table_name)
290            && let Some(schema) = &entry.schema
291        {
292            for field in &fields {
293                if !schema.fields.iter().any(|f| &f.name == field) {
294                    return Err(format!(
295                        "Field '{}' not found in table '{}'",
296                        field, table_name
297                    ));
298                }
299            }
300        }
301
302        let entry = CatalogEntry::new_index(&name, table_name, fields, unique, root_id);
303        self.entries.insert(name, entry);
304        self.version += 1;
305        Ok(())
306    }
307
308    /// Drop an index
309    pub fn drop_index(&mut self, name: &str) -> Result<CatalogEntry, String> {
310        if let Some(entry) = self.entries.get(name)
311            && entry.entry_type != CatalogEntryType::Index
312        {
313            return Err(format!("'{}' is not an index", name));
314        }
315        self.entries
316            .remove(name)
317            .ok_or_else(|| format!("Index '{}' not found", name))
318    }
319
320    /// Get indexes for a table
321    pub fn get_indexes(&self, table_name: &str) -> Vec<&CatalogEntry> {
322        self.entries
323            .values()
324            .filter(|e| {
325                e.entry_type == CatalogEntryType::Index
326                    && e.metadata.get("table") == Some(&table_name.to_string())
327            })
328            .collect()
329    }
330
331    /// Get next auto-increment value
332    pub fn next_auto_increment(&mut self, table_name: &str) -> u64 {
333        let value = self
334            .auto_increment
335            .entry(table_name.to_string())
336            .or_insert(0);
337        *value += 1;
338        *value
339    }
340
341    /// Update row count for a table
342    pub fn update_row_count(&mut self, table_name: &str, count: u64) {
343        if let Some(entry) = self.entries.get_mut(table_name) {
344            entry.row_count = count;
345            entry.modified_us = now_micros();
346        }
347    }
348
349    /// Serialize catalog to bytes
350    pub fn to_bytes(&self) -> Result<Vec<u8>, String> {
351        bincode::serialize(self).map_err(|e| e.to_string())
352    }
353
354    /// Deserialize catalog from bytes
355    pub fn from_bytes(data: &[u8]) -> Result<Self, String> {
356        bincode::deserialize(data).map_err(|e| e.to_string())
357    }
358
359    /// Format catalog as TOON
360    pub fn to_toon(&self) -> String {
361        let mut lines = Vec::new();
362        lines.push(format!(
363            "soch_catalog[{}]{{name,type,fields,root,rows}}:",
364            self.entries.len()
365        ));
366
367        for (name, entry) in &self.entries {
368            let entry_type = match entry.entry_type {
369                CatalogEntryType::Table => "table",
370                CatalogEntryType::Index => "index",
371                CatalogEntryType::View => "view",
372                CatalogEntryType::Sequence => "sequence",
373                CatalogEntryType::Operation => "operation",
374            };
375
376            let fields = if let Some(schema) = &entry.schema {
377                schema
378                    .fields
379                    .iter()
380                    .map(|f| format!("{}:{}", f.name, f.field_type))
381                    .collect::<Vec<_>>()
382                    .join(";")
383            } else if let Some(input) = &entry.input_schema {
384                // For operations, show input schema
385                input
386                    .fields
387                    .iter()
388                    .map(|f| format!("{}:{}", f.name, f.field_type))
389                    .collect::<Vec<_>>()
390                    .join(";")
391            } else {
392                entry.metadata.get("fields").cloned().unwrap_or_default()
393            };
394
395            lines.push(format!(
396                "{},{},\"{}\",{},{}",
397                name, entry_type, fields, entry.root_id, entry.row_count
398            ));
399        }
400
401        lines.join("\n")
402    }
403
404    /// Create an operation (Task 7: MCP Integration)
405    pub fn create_operation(
406        &mut self,
407        name: impl Into<String>,
408        input_schema: SochSchema,
409        output_schema: SochSchema,
410        implementation: OperationImpl,
411        description: impl Into<String>,
412    ) -> Result<(), String> {
413        let name = name.into();
414
415        if self.entries.contains_key(&name) {
416            return Err(format!("Operation '{}' already exists", name));
417        }
418
419        let entry = CatalogEntry::new_operation(
420            &name,
421            input_schema,
422            output_schema,
423            implementation,
424            description,
425        );
426        self.entries.insert(name, entry);
427        self.version += 1;
428        Ok(())
429    }
430
431    /// Get an operation
432    pub fn get_operation(&self, name: &str) -> Option<&CatalogEntry> {
433        self.entries
434            .get(name)
435            .filter(|e| e.entry_type == CatalogEntryType::Operation)
436    }
437
438    /// List all operations (for MCP tool discovery)
439    pub fn list_operations(&self) -> Vec<&CatalogEntry> {
440        self.entries
441            .values()
442            .filter(|e| e.entry_type == CatalogEntryType::Operation)
443            .collect()
444    }
445
446    /// Generate all MCP tool descriptors
447    pub fn generate_mcp_tools(&self) -> Vec<McpToolDescriptor> {
448        self.list_operations()
449            .iter()
450            .filter_map(|e| e.to_mcp_tool())
451            .collect()
452    }
453}
454
455fn now_micros() -> u64 {
456    std::time::SystemTime::now()
457        .duration_since(std::time::SystemTime::UNIX_EPOCH)
458        .map(|d| d.as_micros() as u64)
459        .unwrap_or(0)
460}
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465    use crate::soch::SochType;
466
467    #[test]
468    fn test_create_table() {
469        let mut catalog = Catalog::new("test_db");
470
471        let schema = SochSchema::new("users")
472            .field("id", SochType::UInt)
473            .field("name", SochType::Text)
474            .field("email", SochType::Text)
475            .primary_key("id");
476
477        catalog.create_table(schema, 1).unwrap();
478
479        assert!(catalog.get_table("users").is_some());
480        assert_eq!(catalog.list_tables(), vec!["users"]);
481    }
482
483    #[test]
484    fn test_create_index() {
485        let mut catalog = Catalog::new("test_db");
486
487        let schema = SochSchema::new("users")
488            .field("id", SochType::UInt)
489            .field("email", SochType::Text);
490
491        catalog.create_table(schema, 1).unwrap();
492        catalog
493            .create_index("idx_users_email", "users", vec!["email".into()], true, 2)
494            .unwrap();
495
496        let indexes = catalog.get_indexes("users");
497        assert_eq!(indexes.len(), 1);
498        assert_eq!(indexes[0].name, "idx_users_email");
499    }
500
501    #[test]
502    fn test_auto_increment() {
503        let mut catalog = Catalog::new("test_db");
504
505        let schema = SochSchema::new("users").field("id", SochType::UInt);
506        catalog.create_table(schema, 1).unwrap();
507
508        assert_eq!(catalog.next_auto_increment("users"), 1);
509        assert_eq!(catalog.next_auto_increment("users"), 2);
510        assert_eq!(catalog.next_auto_increment("users"), 3);
511    }
512
513    #[test]
514    fn test_drop_table_removes_indexes() {
515        let mut catalog = Catalog::new("test_db");
516
517        let schema = SochSchema::new("users")
518            .field("id", SochType::UInt)
519            .field("email", SochType::Text);
520
521        catalog.create_table(schema, 1).unwrap();
522        catalog
523            .create_index("idx_users_email", "users", vec!["email".into()], true, 2)
524            .unwrap();
525
526        catalog.drop_table("users").unwrap();
527
528        assert!(catalog.get_table("users").is_none());
529        assert!(catalog.get_indexes("users").is_empty());
530    }
531
532    #[test]
533    fn test_catalog_serialization() {
534        let mut catalog = Catalog::new("test_db");
535
536        let schema = SochSchema::new("users")
537            .field("id", SochType::UInt)
538            .field("name", SochType::Text);
539        catalog.create_table(schema, 1).unwrap();
540
541        let bytes = catalog.to_bytes().expect("Failed to serialize catalog");
542        let restored = Catalog::from_bytes(&bytes).unwrap();
543
544        assert_eq!(restored.name, "test_db");
545        assert!(restored.get_table("users").is_some());
546    }
547}