hanzo_database/
models.rs

1//! # Data Models and Schemas for Hanzo DB
2//!
3//! Defines the data models for all Hanzo Node tables.
4//! Arrow schemas are only available with backend-lancedb feature.
5
6use anyhow::Result;
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9use uuid::Uuid;
10
11#[cfg(feature = "backend-lancedb")]
12use arrow_array::{
13    Array, BinaryArray, Float32Array, Int32Array, Int64Array, RecordBatch, StringArray,
14    TimestampMillisecondArray, BooleanArray, FixedSizeListArray,
15};
16
17#[cfg(feature = "backend-lancedb")]
18use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
19
20#[cfg(feature = "backend-lancedb")]
21use std::sync::Arc;
22
23/// Vector dimension for embeddings
24#[cfg(feature = "backend-lancedb")]
25const EMBEDDING_DIM: i32 = 1536; // OpenAI ada-002 dimension
26
27/// Create Arrow schema for users table
28#[cfg(feature = "backend-lancedb")]
29pub fn user_schema() -> Arc<ArrowSchema> {
30    Arc::new(ArrowSchema::new(vec![
31        Field::new("id", DataType::Utf8, false),
32        Field::new("profile_name", DataType::Utf8, false),
33        Field::new("identity_type", DataType::Utf8, false),
34        Field::new("identity_public_key", DataType::Utf8, true),
35        Field::new("encryption_public_key", DataType::Utf8, true),
36        Field::new("signature_public_key", DataType::Utf8, true),
37        Field::new("node_signature_public_key", DataType::Utf8, true),
38        Field::new("node_encryption_public_key", DataType::Utf8, true),
39        Field::new("permission_type", DataType::Utf8, true),
40        Field::new("wallet_id", DataType::Utf8, true),
41        Field::new("metadata", DataType::Utf8, true), // JSON string
42        Field::new("created_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
43        Field::new("updated_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
44        Field::new("is_active", DataType::Boolean, false),
45    ]))
46}
47
48/// Create Arrow schema for tools table
49#[cfg(feature = "backend-lancedb")]
50pub fn tool_schema() -> Arc<ArrowSchema> {
51    Arc::new(ArrowSchema::new(vec![
52        Field::new("id", DataType::Utf8, false),
53        Field::new("name", DataType::Utf8, false),
54        Field::new("description", DataType::Utf8, true),
55        Field::new("version", DataType::Utf8, false),
56        Field::new("tool_type", DataType::Utf8, false), // rust, js, python, mcp
57        Field::new("parameters_schema", DataType::Utf8, true), // JSON schema
58        Field::new("enabled", DataType::Boolean, false),
59        Field::new("config", DataType::Utf8, true), // JSON config
60        Field::new("usage_count", DataType::Int64, false),
61        Field::new("avg_execution_time_ms", DataType::Float32, true),
62        Field::new("last_used_at", DataType::Timestamp(TimeUnit::Millisecond, None), true),
63        Field::new("created_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
64        Field::new("updated_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
65    ]))
66}
67
68/// Create Arrow schema for agents table
69#[cfg(feature = "backend-lancedb")]
70pub fn agent_schema() -> Arc<ArrowSchema> {
71    Arc::new(ArrowSchema::new(vec![
72        Field::new("id", DataType::Utf8, false),
73        Field::new("name", DataType::Utf8, false),
74        Field::new("description", DataType::Utf8, true),
75        Field::new("model", DataType::Utf8, false),
76        Field::new("system_prompt", DataType::Utf8, true),
77        Field::new("temperature", DataType::Float32, true),
78        Field::new("max_tokens", DataType::Int32, true),
79        Field::new("tools", DataType::Utf8, true), // JSON array of tool IDs
80        Field::new("created_by", DataType::Utf8, false),
81        Field::new("is_public", DataType::Boolean, false),
82        Field::new("usage_count", DataType::Int64, false),
83        Field::new("created_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
84        Field::new("updated_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
85    ]))
86}
87
88/// Create Arrow schema for embeddings table
89#[cfg(feature = "backend-lancedb")]
90pub fn embedding_schema() -> Arc<ArrowSchema> {
91    Arc::new(ArrowSchema::new(vec![
92        Field::new("id", DataType::Utf8, false),
93        Field::new("content", DataType::Utf8, false),
94        Field::new("content_hash", DataType::Utf8, false),
95        Field::new(
96            "vector",
97            DataType::FixedSizeList(
98                Arc::new(Field::new("item", DataType::Float32, false)),
99                EMBEDDING_DIM,
100            ),
101            false,
102        ),
103        Field::new("model", DataType::Utf8, false),
104        Field::new("source_type", DataType::Utf8, false), // document, chat, tool_output
105        Field::new("source_id", DataType::Utf8, true),
106        Field::new("metadata", DataType::Utf8, true), // JSON metadata
107        Field::new("created_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
108    ]))
109}
110
111/// Create Arrow schema for jobs table
112#[cfg(feature = "backend-lancedb")]
113pub fn job_schema() -> Arc<ArrowSchema> {
114    Arc::new(ArrowSchema::new(vec![
115        Field::new("id", DataType::Utf8, false),
116        Field::new("job_type", DataType::Utf8, false),
117        Field::new("status", DataType::Utf8, false), // pending, running, completed, failed
118        Field::new("priority", DataType::Int32, false),
119        Field::new("payload", DataType::Utf8, false), // JSON payload
120        Field::new("result", DataType::Utf8, true), // JSON result
121        Field::new("error", DataType::Utf8, true),
122        Field::new("created_by", DataType::Utf8, false),
123        Field::new("agent_id", DataType::Utf8, true),
124        Field::new("started_at", DataType::Timestamp(TimeUnit::Millisecond, None), true),
125        Field::new("completed_at", DataType::Timestamp(TimeUnit::Millisecond, None), true),
126        Field::new("created_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
127        Field::new("updated_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
128    ]))
129}
130
131/// Create Arrow schema for sessions table
132#[cfg(feature = "backend-lancedb")]
133pub fn session_schema() -> Arc<ArrowSchema> {
134    Arc::new(ArrowSchema::new(vec![
135        Field::new("id", DataType::Utf8, false),
136        Field::new("user_id", DataType::Utf8, false),
137        Field::new("agent_id", DataType::Utf8, true),
138        Field::new("messages", DataType::Utf8, false), // JSON array of messages
139        Field::new("context", DataType::Utf8, true), // JSON context
140        Field::new("token_count", DataType::Int64, false),
141        Field::new("cost", DataType::Float32, true),
142        Field::new("created_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
143        Field::new("updated_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
144    ]))
145}
146
147/// Create Arrow schema for multimodal table
148#[cfg(feature = "multimodal")]
149pub fn multimodal_schema() -> Arc<ArrowSchema> {
150    Arc::new(ArrowSchema::new(vec![
151        Field::new("id", DataType::Utf8, false),
152        Field::new("media_type", DataType::Utf8, false), // image, audio, video
153        Field::new("mime_type", DataType::Utf8, false),
154        Field::new("data", DataType::Binary, false), // Raw binary data
155        Field::new("thumbnail", DataType::Binary, true), // Optional thumbnail
156        Field::new("width", DataType::Int32, true),
157        Field::new("height", DataType::Int32, true),
158        Field::new("duration_ms", DataType::Int64, true), // For audio/video
159        Field::new(
160            "embedding",
161            DataType::FixedSizeList(
162                Arc::new(Field::new("item", DataType::Float32, false)),
163                EMBEDDING_DIM,
164            ),
165            true,
166        ),
167        Field::new("text_content", DataType::Utf8, true), // OCR or transcription
168        Field::new("metadata", DataType::Utf8, true), // JSON metadata
169        Field::new("created_by", DataType::Utf8, false),
170        Field::new("created_at", DataType::Timestamp(TimeUnit::Millisecond, None), false),
171    ]))
172}
173
174/// User model
175#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct User {
177    pub id: Uuid,
178    pub profile_name: String,
179    pub identity_type: String,
180    pub identity_public_key: Option<String>,
181    pub encryption_public_key: Option<String>,
182    pub signature_public_key: Option<String>,
183    pub node_signature_public_key: Option<String>,
184    pub node_encryption_public_key: Option<String>,
185    pub permission_type: Option<String>,
186    pub wallet_id: Option<String>,
187    pub metadata: Option<serde_json::Value>,
188    pub created_at: DateTime<Utc>,
189    pub updated_at: DateTime<Utc>,
190    pub is_active: bool,
191}
192
193/// Tool model
194#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct Tool {
196    pub id: Uuid,
197    pub name: String,
198    pub description: Option<String>,
199    pub version: String,
200    pub tool_type: ToolType,
201    pub parameters_schema: Option<serde_json::Value>,
202    pub enabled: bool,
203    pub config: Option<serde_json::Value>,
204    pub usage_count: i64,
205    pub avg_execution_time_ms: Option<f32>,
206    pub last_used_at: Option<DateTime<Utc>>,
207    pub created_at: DateTime<Utc>,
208    pub updated_at: DateTime<Utc>,
209}
210
211/// Tool type enum
212#[derive(Debug, Clone, Serialize, Deserialize)]
213#[serde(rename_all = "lowercase")]
214pub enum ToolType {
215    Rust,
216    JavaScript,
217    Python,
218    MCP,
219}
220
221/// Agent model
222#[derive(Debug, Clone, Serialize, Deserialize)]
223pub struct Agent {
224    pub id: Uuid,
225    pub name: String,
226    pub description: Option<String>,
227    pub model: String,
228    pub system_prompt: Option<String>,
229    pub temperature: Option<f32>,
230    pub max_tokens: Option<i32>,
231    pub tools: Vec<String>,
232    pub created_by: String,
233    pub is_public: bool,
234    pub usage_count: i64,
235    pub created_at: DateTime<Utc>,
236    pub updated_at: DateTime<Utc>,
237}
238
239/// Embedding model
240#[derive(Debug, Clone, Serialize, Deserialize)]
241pub struct Embedding {
242    pub id: Uuid,
243    pub content: String,
244    pub content_hash: String,
245    pub vector: Vec<f32>,
246    pub model: String,
247    pub source_type: String,
248    pub source_id: Option<String>,
249    pub metadata: Option<serde_json::Value>,
250    pub created_at: DateTime<Utc>,
251}
252
253/// Job model
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub struct Job {
256    pub id: Uuid,
257    pub job_type: String,
258    pub status: JobStatus,
259    pub priority: i32,
260    pub payload: serde_json::Value,
261    pub result: Option<serde_json::Value>,
262    pub error: Option<String>,
263    pub created_by: String,
264    pub agent_id: Option<String>,
265    pub started_at: Option<DateTime<Utc>>,
266    pub completed_at: Option<DateTime<Utc>>,
267    pub created_at: DateTime<Utc>,
268    pub updated_at: DateTime<Utc>,
269}
270
271/// Job status enum
272#[derive(Debug, Clone, Serialize, Deserialize)]
273#[serde(rename_all = "lowercase")]
274pub enum JobStatus {
275    Pending,
276    Running,
277    Completed,
278    Failed,
279    Cancelled,
280}
281
282/// Session model
283#[derive(Debug, Clone, Serialize, Deserialize)]
284pub struct Session {
285    pub id: Uuid,
286    pub user_id: String,
287    pub agent_id: Option<String>,
288    pub messages: Vec<Message>,
289    pub context: Option<serde_json::Value>,
290    pub token_count: i64,
291    pub cost: Option<f32>,
292    pub created_at: DateTime<Utc>,
293    pub updated_at: DateTime<Utc>,
294}
295
296/// Message model for sessions
297#[derive(Debug, Clone, Serialize, Deserialize)]
298pub struct Message {
299    pub role: String,
300    pub content: String,
301    pub timestamp: DateTime<Utc>,
302    pub tool_calls: Option<Vec<ToolCall>>,
303}
304
305/// Tool call model
306#[derive(Debug, Clone, Serialize, Deserialize)]
307pub struct ToolCall {
308    pub id: String,
309    pub name: String,
310    pub arguments: serde_json::Value,
311    pub result: Option<serde_json::Value>,
312}
313
314/// Multimodal content model
315#[cfg(feature = "multimodal")]
316#[derive(Debug, Clone, Serialize, Deserialize)]
317pub struct MultimodalContent {
318    pub id: Uuid,
319    pub media_type: MediaType,
320    pub mime_type: String,
321    pub data: Vec<u8>,
322    pub thumbnail: Option<Vec<u8>>,
323    pub width: Option<i32>,
324    pub height: Option<i32>,
325    pub duration_ms: Option<i64>,
326    pub embedding: Option<Vec<f32>>,
327    pub text_content: Option<String>,
328    pub metadata: Option<serde_json::Value>,
329    pub created_by: String,
330    pub created_at: DateTime<Utc>,
331}
332
333/// Media type enum
334#[cfg(feature = "multimodal")]
335#[derive(Debug, Clone, Serialize, Deserialize)]
336#[serde(rename_all = "lowercase")]
337pub enum MediaType {
338    Image,
339    Audio,
340    Video,
341    Document,
342}
343
344/// Helper to convert models to RecordBatch
345#[cfg(feature = "backend-lancedb")]
346pub trait ToRecordBatch {
347    fn to_record_batch(&self) -> Result<RecordBatch>;
348}
349
350/// Helper to convert from RecordBatch to models
351#[cfg(feature = "backend-lancedb")]
352pub trait FromRecordBatch: Sized {
353    fn from_record_batch(batch: &RecordBatch, row: usize) -> Result<Self>;
354}
355
356#[cfg(all(test, feature = "backend-lancedb"))]
357mod tests {
358    use super::*;
359
360    #[test]
361    fn test_user_schema() {
362        let schema = user_schema();
363        assert_eq!(schema.fields().len(), 14);
364        assert_eq!(schema.field(0).name(), "id");
365        assert_eq!(schema.field(0).data_type(), &DataType::Utf8);
366    }
367
368    #[test]
369    fn test_embedding_schema() {
370        let schema = embedding_schema();
371        let vector_field = schema.field(3);
372        assert_eq!(vector_field.name(), "vector");
373        
374        if let DataType::FixedSizeList(_, size) = vector_field.data_type() {
375            assert_eq!(*size, EMBEDDING_DIM);
376        } else {
377            panic!("Expected FixedSizeList for vector field");
378        }
379    }
380
381    #[test]
382    fn test_tool_type_serialization() {
383        let tool_type = ToolType::JavaScript;
384        let json = serde_json::to_string(&tool_type).unwrap();
385        assert_eq!(json, r#""javascript""#);
386        
387        let deserialized: ToolType = serde_json::from_str(&json).unwrap();
388        assert!(matches!(deserialized, ToolType::JavaScript));
389    }
390
391    #[test]
392    fn test_job_status_serialization() {
393        let status = JobStatus::Running;
394        let json = serde_json::to_string(&status).unwrap();
395        assert_eq!(json, r#""running""#);
396        
397        let deserialized: JobStatus = serde_json::from_str(&json).unwrap();
398        assert!(matches!(deserialized, JobStatus::Running));
399    }
400}