dataforge/db/
schema.rs

1//! Schema解析器模块
2//! 
3//! 提供自动推断表结构和类型映射功能
4
5use crate::error::{DataForgeError, Result};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9/// 数据类型
10#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
11pub enum DataType {
12    /// 字符串类型
13    String { max_length: Option<usize> },
14    /// 整数类型
15    Integer { min: Option<i64>, max: Option<i64> },
16    /// 浮点数类型
17    Float { min: Option<f64>, max: Option<f64>, precision: Option<usize> },
18    /// 布尔类型
19    Boolean,
20    /// 日期时间类型
21    DateTime { format: Option<String> },
22    /// 日期类型
23    Date { format: Option<String> },
24    /// 时间类型
25    Time { format: Option<String> },
26    /// UUID类型
27    Uuid,
28    /// 邮箱类型
29    Email,
30    /// 电话号码类型
31    Phone { country: Option<String> },
32    /// URL类型
33    Url,
34    /// JSON类型
35    Json,
36    /// 数组类型
37    Array { item_type: Box<DataType>, min_items: Option<usize>, max_items: Option<usize> },
38    /// 对象类型
39    Object { fields: HashMap<String, FieldSchema> },
40    /// 枚举类型
41    Enum { values: Vec<String> },
42    /// 自定义类型
43    Custom { type_name: String, generator: String },
44}
45
46/// 字段约束
47#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
48pub struct FieldConstraints {
49    /// 是否可为空
50    pub nullable: bool,
51    /// 是否唯一
52    pub unique: bool,
53    /// 默认值
54    pub default: Option<serde_json::Value>,
55    /// 正则表达式约束
56    pub pattern: Option<String>,
57    /// 最小值
58    pub min: Option<serde_json::Value>,
59    /// 最大值
60    pub max: Option<serde_json::Value>,
61}
62
63impl Default for FieldConstraints {
64    fn default() -> Self {
65        Self {
66            nullable: false,
67            unique: false,
68            default: None,
69            pattern: None,
70            min: None,
71            max: None,
72        }
73    }
74}
75
76/// 字段生成器类型
77#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
78pub enum FieldGeneratorType {
79    /// 默认生成器(基于数据类型自动选择)
80    Default,
81    /// 自定义生成器(指定生成器名称)
82    Custom(String),
83    /// 随机字符串
84    RandomString,
85    /// 随机整数
86    RandomInteger,
87    /// 随机浮点数
88    RandomFloat,
89    /// 随机布尔值
90    RandomBoolean,
91    /// 当前时间戳
92    CurrentTimestamp,
93    /// 随机日期
94    RandomDate,
95    /// 随机日期时间
96    RandomDateTime,
97    /// 随机邮箱
98    RandomEmail,
99    /// 随机电话号码
100    RandomPhone,
101    /// 随机URL
102    RandomUrl,
103    /// UUID
104    Uuid,
105    /// 姓名
106    Name,
107    /// 公司名称
108    CompanyName,
109    /// 地址
110    Address,
111    /// 产品名称
112    ProductName,
113    /// 订单状态
114    OrderStatus,
115}
116
117/// 字段Schema
118#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
119pub struct FieldSchema {
120    /// 字段名
121    pub name: String,
122    /// 数据类型
123    pub data_type: DataType,
124    /// 约束条件
125    pub constraints: FieldConstraints,
126    /// 描述
127    pub description: Option<String>,
128    /// 生成器配置
129    pub generator_config: Option<HashMap<String, serde_json::Value>>,
130    /// 字段生成器类型
131    pub generator_type: Option<FieldGeneratorType>,
132}
133
134impl FieldSchema {
135    /// 创建新的字段Schema
136    pub fn new(name: String, data_type: DataType) -> Self {
137        Self {
138            name,
139            data_type,
140            constraints: FieldConstraints::default(),
141            description: None,
142            generator_config: None,
143            generator_type: None,
144        }
145    }
146    
147    /// 设置字段生成器类型
148    pub fn with_generator_type(mut self, generator_type: FieldGeneratorType) -> Self {
149        self.generator_type = Some(generator_type);
150        self
151    }
152    
153    /// 设置字段约束
154    pub fn with_constraints(mut self, constraints: FieldConstraints) -> Self {
155        self.constraints = constraints;
156        self
157    }
158    
159    /// 设置字段描述
160    pub fn with_description(mut self, description: String) -> Self {
161        self.description = Some(description);
162        self
163    }
164    
165    /// 设置生成器配置
166    pub fn with_generator_config(mut self, config: HashMap<String, serde_json::Value>) -> Self {
167        self.generator_config = Some(config);
168        self
169    }
170}
171
172/// 表Schema
173#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
174pub struct TableSchema {
175    /// 表名
176    pub name: String,
177    /// 字段列表
178    pub fields: Vec<FieldSchema>,
179    /// 主键字段
180    pub primary_key: Option<Vec<String>>,
181    /// 索引定义
182    pub indexes: Vec<IndexSchema>,
183    /// 表描述
184    pub description: Option<String>,
185}
186
187impl TableSchema {
188    /// 创建新的表Schema
189    pub fn new(name: String) -> Self {
190        Self {
191            name,
192            fields: Vec::new(),
193            primary_key: None,
194            indexes: Vec::new(),
195            description: None,
196        }
197    }
198    
199    /// 添加字段
200    pub fn add_field(&mut self, field: FieldSchema) {
201        self.fields.push(field);
202    }
203    
204    /// 设置主键
205    pub fn with_primary_key(mut self, primary_key: Vec<String>) -> Self {
206        self.primary_key = Some(primary_key);
207        self
208    }
209    
210    /// 添加索引
211    pub fn add_index(&mut self, index: IndexSchema) {
212        self.indexes.push(index);
213    }
214    
215    /// 设置表描述
216    pub fn with_description(mut self, description: String) -> Self {
217        self.description = Some(description);
218        self
219    }
220}
221
222/// 索引Schema
223#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
224pub struct IndexSchema {
225    /// 索引名
226    pub name: String,
227    /// 索引字段
228    pub fields: Vec<String>,
229    /// 是否唯一索引
230    pub unique: bool,
231}
232
233/// Schema解析器
234pub struct SchemaParser {
235    /// 类型映射规则
236    type_mappings: HashMap<String, DataType>,
237    /// 模式识别规则
238    pattern_rules: Vec<PatternRule>,
239}
240
241/// 模式识别规则
242#[derive(Debug, Clone)]
243pub struct PatternRule {
244    /// 字段名模式
245    pub field_pattern: regex::Regex,
246    /// 对应的数据类型
247    pub data_type: DataType,
248    /// 优先级
249    pub priority: u32,
250}
251
252impl SchemaParser {
253    /// 创建新的Schema解析器
254    pub fn new() -> Self {
255        let mut parser = Self {
256            type_mappings: HashMap::new(),
257            pattern_rules: Vec::new(),
258        };
259        
260        parser.init_default_mappings();
261        parser.init_default_patterns();
262        parser
263    }
264
265    /// 初始化默认类型映射
266    fn init_default_mappings(&mut self) {
267        // 基础类型映射
268        self.type_mappings.insert("string".to_string(), DataType::String { max_length: Some(255) });
269        self.type_mappings.insert("text".to_string(), DataType::String { max_length: None });
270        self.type_mappings.insert("varchar".to_string(), DataType::String { max_length: Some(255) });
271        self.type_mappings.insert("char".to_string(), DataType::String { max_length: Some(1) });
272        
273        self.type_mappings.insert("int".to_string(), DataType::Integer { min: None, max: None });
274        self.type_mappings.insert("integer".to_string(), DataType::Integer { min: None, max: None });
275        self.type_mappings.insert("bigint".to_string(), DataType::Integer { min: None, max: None });
276        self.type_mappings.insert("smallint".to_string(), DataType::Integer { min: Some(-32768), max: Some(32767) });
277        
278        self.type_mappings.insert("float".to_string(), DataType::Float { min: None, max: None, precision: None });
279        self.type_mappings.insert("double".to_string(), DataType::Float { min: None, max: None, precision: None });
280        self.type_mappings.insert("decimal".to_string(), DataType::Float { min: None, max: None, precision: Some(2) });
281        
282        self.type_mappings.insert("boolean".to_string(), DataType::Boolean);
283        self.type_mappings.insert("bool".to_string(), DataType::Boolean);
284        
285        self.type_mappings.insert("datetime".to_string(), DataType::DateTime { format: None });
286        self.type_mappings.insert("timestamp".to_string(), DataType::DateTime { format: None });
287        self.type_mappings.insert("date".to_string(), DataType::Date { format: None });
288        self.type_mappings.insert("time".to_string(), DataType::Time { format: None });
289        
290        self.type_mappings.insert("uuid".to_string(), DataType::Uuid);
291        self.type_mappings.insert("json".to_string(), DataType::Json);
292    }
293
294    /// 初始化默认模式规则
295    fn init_default_patterns(&mut self) {
296        // ID字段
297        if let Ok(regex) = regex::Regex::new(r"(?i)^.*id$") {
298            self.pattern_rules.push(PatternRule {
299                field_pattern: regex,
300                data_type: DataType::Integer { min: Some(1), max: None },
301                priority: 100,
302            });
303        }
304
305        // UUID字段
306        if let Ok(regex) = regex::Regex::new(r"(?i)^.*uuid$") {
307            self.pattern_rules.push(PatternRule {
308                field_pattern: regex,
309                data_type: DataType::Uuid,
310                priority: 90,
311            });
312        }
313
314        // 邮箱字段
315        if let Ok(regex) = regex::Regex::new(r"(?i)^.*(email|mail).*$") {
316            self.pattern_rules.push(PatternRule {
317                field_pattern: regex,
318                data_type: DataType::Email,
319                priority: 80,
320            });
321        }
322
323        // 电话字段
324        if let Ok(regex) = regex::Regex::new(r"(?i)^.*(phone|tel|mobile).*$") {
325            self.pattern_rules.push(PatternRule {
326                field_pattern: regex,
327                data_type: DataType::Phone { country: Some("CN".to_string()) },
328                priority: 80,
329            });
330        }
331
332        // 姓名字段
333        if let Ok(regex) = regex::Regex::new(r"(?i)^.*(name|username).*$") {
334            self.pattern_rules.push(PatternRule {
335                field_pattern: regex,
336                data_type: DataType::String { max_length: Some(100) },
337                priority: 70,
338            });
339        }
340
341        // 时间字段
342        if let Ok(regex) = regex::Regex::new(r"(?i)^.*(created_at|updated_at|timestamp).*$") {
343            self.pattern_rules.push(PatternRule {
344                field_pattern: regex,
345                data_type: DataType::DateTime { format: None },
346                priority: 75,
347            });
348        }
349
350        // 地址字段
351        if let Ok(regex) = regex::Regex::new(r"(?i)^.*(address|addr).*$") {
352            self.pattern_rules.push(PatternRule {
353                field_pattern: regex,
354                data_type: DataType::String { max_length: Some(500) },
355                priority: 60,
356            });
357        }
358    }
359
360    /// 从JSON Schema推断表结构
361    pub fn infer_from_json(&self, json_schema: &serde_json::Value) -> Result<TableSchema> {
362        match json_schema {
363            serde_json::Value::Object(obj) => {
364                let table_name = obj.get("title")
365                    .and_then(|v| v.as_str())
366                    .unwrap_or("generated_table")
367                    .to_string();
368
369                let mut fields = Vec::new();
370
371                if let Some(properties) = obj.get("properties").and_then(|v| v.as_object()) {
372                    for (field_name, field_schema) in properties {
373                        let field = self.parse_field_schema(field_name, field_schema)?;
374                        fields.push(field);
375                    }
376                }
377
378                Ok(TableSchema {
379                    name: table_name,
380                    fields,
381                    primary_key: None,
382                    indexes: Vec::new(),
383                    description: obj.get("description").and_then(|v| v.as_str()).map(|s| s.to_string()),
384                })
385            }
386            _ => Err(DataForgeError::validation("Invalid JSON schema format")),
387        }
388    }
389
390    /// 解析字段Schema
391    fn parse_field_schema(&self, field_name: &str, schema: &serde_json::Value) -> Result<FieldSchema> {
392        let data_type = self.infer_data_type(field_name, schema)?;
393        
394        let constraints = FieldConstraints {
395            nullable: !schema.get("required").unwrap_or(&serde_json::Value::Bool(false)).as_bool().unwrap_or(false),
396            unique: schema.get("unique").and_then(|v| v.as_bool()).unwrap_or(false),
397            default: schema.get("default").cloned(),
398            pattern: schema.get("pattern").and_then(|v| v.as_str()).map(|s| s.to_string()),
399            min: schema.get("minimum").cloned(),
400            max: schema.get("maximum").cloned(),
401        };
402
403        Ok(FieldSchema {
404            name: field_name.to_string(),
405            data_type,
406            constraints,
407            description: schema.get("description").and_then(|v| v.as_str()).map(|s| s.to_string()),
408            generator_config: None,
409            generator_type: None, // 默认使用自动推断的生成器类型
410        })
411    }
412
413    /// 推断数据类型
414    pub fn infer_data_type(&self, field_name: &str, schema: &serde_json::Value) -> Result<DataType> {
415        // 首先使用模式匹配推断更具体的类型
416        let mut best_match: Option<&PatternRule> = None;
417        for rule in &self.pattern_rules {
418            if rule.field_pattern.is_match(field_name) {
419                if best_match.is_none() || rule.priority > best_match.unwrap().priority {
420                    best_match = Some(rule);
421                }
422            }
423        }
424
425        if let Some(rule) = best_match {
426            return Ok(rule.data_type.clone());
427        }
428
429        // 然后检查是否有明确的类型定义
430        if let Some(type_str) = schema.get("type").and_then(|v| v.as_str()) {
431            if let Some(data_type) = self.type_mappings.get(type_str) {
432                return Ok(data_type.clone());
433            }
434        }
435
436        // 默认为字符串类型
437        Ok(DataType::String { max_length: Some(255) })
438    }
439
440    /// 从数据库表结构推断Schema
441    pub fn infer_from_database_table(&self, table_info: &DatabaseTableInfo) -> Result<TableSchema> {
442        let mut fields = Vec::new();
443
444        for column in &table_info.columns {
445            let data_type = self.map_database_type(&column.data_type)?;
446            
447            let constraints = FieldConstraints {
448                nullable: column.nullable,
449                unique: column.unique,
450                default: column.default_value.clone(),
451                pattern: None,
452                min: None,
453                max: None,
454            };
455
456            fields.push(FieldSchema {
457                name: column.name.clone(),
458                data_type,
459                constraints,
460                description: column.comment.clone(),
461                generator_config: None,
462                generator_type: None, // 默认使用自动推断的生成器类型
463            });
464        }
465
466        Ok(TableSchema {
467            name: table_info.name.clone(),
468            fields,
469            primary_key: Some(table_info.primary_key.clone()),
470            indexes: table_info.indexes.iter().map(|idx| IndexSchema {
471                name: idx.name.clone(),
472                fields: idx.columns.clone(),
473                unique: idx.unique,
474            }).collect(),
475            description: table_info.comment.clone(),
476        })
477    }
478
479    /// 映射数据库类型到内部类型
480    pub fn map_database_type(&self, db_type: &str) -> Result<DataType> {
481        let normalized_type = db_type.to_lowercase();
482        
483        // 处理带参数的类型,如 VARCHAR(255)
484        let base_type = if let Some(pos) = normalized_type.find('(') {
485            &normalized_type[..pos]
486        } else {
487            &normalized_type
488        };
489
490        self.type_mappings.get(base_type)
491            .cloned()
492            .ok_or_else(|| DataForgeError::validation(&format!("Unsupported database type: {}", db_type)))
493    }
494
495    /// 添加自定义类型映射
496    pub fn add_type_mapping(&mut self, db_type: String, data_type: DataType) {
497        self.type_mappings.insert(db_type, data_type);
498    }
499
500    /// 添加模式规则
501    pub fn add_pattern_rule(&mut self, pattern: &str, data_type: DataType, priority: u32) -> Result<()> {
502        let regex = regex::Regex::new(pattern)
503            .map_err(|e| DataForgeError::validation(&format!("Invalid regex pattern: {}", e)))?;
504        
505        self.pattern_rules.push(PatternRule {
506            field_pattern: regex,
507            data_type,
508            priority,
509        });
510
511        // 按优先级排序
512        self.pattern_rules.sort_by(|a, b| b.priority.cmp(&a.priority));
513        
514        Ok(())
515    }
516}
517
518/// 数据库表信息
519#[derive(Debug, Clone)]
520pub struct DatabaseTableInfo {
521    pub name: String,
522    pub columns: Vec<DatabaseColumnInfo>,
523    pub primary_key: Vec<String>,
524    pub indexes: Vec<DatabaseIndexInfo>,
525    pub comment: Option<String>,
526}
527
528/// 数据库列信息
529#[derive(Debug, Clone)]
530pub struct DatabaseColumnInfo {
531    pub name: String,
532    pub data_type: String,
533    pub nullable: bool,
534    pub unique: bool,
535    pub default_value: Option<serde_json::Value>,
536    pub comment: Option<String>,
537}
538
539/// 数据库索引信息
540#[derive(Debug, Clone)]
541pub struct DatabaseIndexInfo {
542    pub name: String,
543    pub columns: Vec<String>,
544    pub unique: bool,
545}
546
547impl Default for SchemaParser {
548    fn default() -> Self {
549        Self::new()
550    }
551}
552
553#[cfg(test)]
554mod tests {
555    use super::*;
556    use serde_json::json;
557
558    #[test]
559    fn test_schema_parser_creation() {
560        let parser = SchemaParser::new();
561        assert!(!parser.type_mappings.is_empty());
562        assert!(!parser.pattern_rules.is_empty());
563    }
564
565    #[test]
566    fn test_infer_from_json() {
567        let parser = SchemaParser::new();
568        let json_schema = json!({
569            "title": "User",
570            "type": "object",
571            "properties": {
572                "id": {
573                    "type": "integer"
574                },
575                "name": {
576                    "type": "string"
577                },
578                "email": {
579                    "type": "string"
580                }
581            }
582        });
583
584        let result = parser.infer_from_json(&json_schema);
585        assert!(result.is_ok());
586
587        let schema = result.unwrap();
588        assert_eq!(schema.name, "User");
589        assert_eq!(schema.fields.len(), 3);
590    }
591
592    #[test]
593    fn test_pattern_matching() {
594        let parser = SchemaParser::new();
595        
596        // 测试ID字段识别
597        let id_type = parser.infer_data_type("user_id", &json!({})).unwrap();
598        match id_type {
599            DataType::Integer { .. } => {},
600            _ => panic!("Expected Integer type for ID field"),
601        }
602
603        // 测试邮箱字段识别
604        let email_type = parser.infer_data_type("user_email", &json!({})).unwrap();
605        match email_type {
606            DataType::Email => {},
607            _ => panic!("Expected Email type for email field"),
608        }
609    }
610
611    #[test]
612    fn test_database_type_mapping() {
613        let parser = SchemaParser::new();
614        
615        let varchar_type = parser.map_database_type("VARCHAR(255)").unwrap();
616        match varchar_type {
617            DataType::String { max_length: Some(255) } => {},
618            _ => panic!("Expected String type with max_length for VARCHAR"),
619        }
620
621        let int_type = parser.map_database_type("INTEGER").unwrap();
622        match int_type {
623            DataType::Integer { .. } => {},
624            _ => panic!("Expected Integer type for INTEGER"),
625        }
626    }
627
628    #[test]
629    fn test_custom_type_mapping() {
630        let mut parser = SchemaParser::new();
631        
632        parser.add_type_mapping(
633            "custom_type".to_string(),
634            DataType::Custom {
635                type_name: "CustomType".to_string(),
636                generator: "custom_generator".to_string(),
637            }
638        );
639
640        let custom_type = parser.map_database_type("custom_type").unwrap();
641        match custom_type {
642            DataType::Custom { type_name, .. } => {
643                assert_eq!(type_name, "CustomType");
644            },
645            _ => panic!("Expected Custom type"),
646        }
647    }
648
649    #[test]
650    fn test_pattern_rule_priority() {
651        let mut parser = SchemaParser::new();
652        
653        // 添加高优先级规则
654        parser.add_pattern_rule(r"(?i)^special_.*$", DataType::Uuid, 200).unwrap();
655        
656        let special_type = parser.infer_data_type("special_id", &json!({})).unwrap();
657        match special_type {
658            DataType::Uuid => {},
659            _ => panic!("Expected Uuid type for special field with high priority rule"),
660        }
661    }
662}