1use crate::error::{DataForgeError, Result};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8
9#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
11pub enum DataType {
12 String { max_length: Option<usize> },
14 Integer { min: Option<i64>, max: Option<i64> },
16 Float { min: Option<f64>, max: Option<f64>, precision: Option<usize> },
18 Boolean,
20 DateTime { format: Option<String> },
22 Date { format: Option<String> },
24 Time { format: Option<String> },
26 Uuid,
28 Email,
30 Phone { country: Option<String> },
32 Url,
34 Json,
36 Array { item_type: Box<DataType>, min_items: Option<usize>, max_items: Option<usize> },
38 Object { fields: HashMap<String, FieldSchema> },
40 Enum { values: Vec<String> },
42 Custom { type_name: String, generator: String },
44}
45
46#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
48pub struct FieldConstraints {
49 pub nullable: bool,
51 pub unique: bool,
53 pub default: Option<serde_json::Value>,
55 pub pattern: Option<String>,
57 pub min: Option<serde_json::Value>,
59 pub max: Option<serde_json::Value>,
61}
62
63impl Default for FieldConstraints {
64 fn default() -> Self {
65 Self {
66 nullable: false,
67 unique: false,
68 default: None,
69 pattern: None,
70 min: None,
71 max: None,
72 }
73 }
74}
75
76#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
78pub enum FieldGeneratorType {
79 Default,
81 Custom(String),
83 RandomString,
85 RandomInteger,
87 RandomFloat,
89 RandomBoolean,
91 CurrentTimestamp,
93 RandomDate,
95 RandomDateTime,
97 RandomEmail,
99 RandomPhone,
101 RandomUrl,
103 Uuid,
105 Name,
107 CompanyName,
109 Address,
111 ProductName,
113 OrderStatus,
115}
116
117#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
119pub struct FieldSchema {
120 pub name: String,
122 pub data_type: DataType,
124 pub constraints: FieldConstraints,
126 pub description: Option<String>,
128 pub generator_config: Option<HashMap<String, serde_json::Value>>,
130 pub generator_type: Option<FieldGeneratorType>,
132}
133
134impl FieldSchema {
135 pub fn new(name: String, data_type: DataType) -> Self {
137 Self {
138 name,
139 data_type,
140 constraints: FieldConstraints::default(),
141 description: None,
142 generator_config: None,
143 generator_type: None,
144 }
145 }
146
147 pub fn with_generator_type(mut self, generator_type: FieldGeneratorType) -> Self {
149 self.generator_type = Some(generator_type);
150 self
151 }
152
153 pub fn with_constraints(mut self, constraints: FieldConstraints) -> Self {
155 self.constraints = constraints;
156 self
157 }
158
159 pub fn with_description(mut self, description: String) -> Self {
161 self.description = Some(description);
162 self
163 }
164
165 pub fn with_generator_config(mut self, config: HashMap<String, serde_json::Value>) -> Self {
167 self.generator_config = Some(config);
168 self
169 }
170}
171
172#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
174pub struct TableSchema {
175 pub name: String,
177 pub fields: Vec<FieldSchema>,
179 pub primary_key: Option<Vec<String>>,
181 pub indexes: Vec<IndexSchema>,
183 pub description: Option<String>,
185}
186
187impl TableSchema {
188 pub fn new(name: String) -> Self {
190 Self {
191 name,
192 fields: Vec::new(),
193 primary_key: None,
194 indexes: Vec::new(),
195 description: None,
196 }
197 }
198
199 pub fn add_field(&mut self, field: FieldSchema) {
201 self.fields.push(field);
202 }
203
204 pub fn with_primary_key(mut self, primary_key: Vec<String>) -> Self {
206 self.primary_key = Some(primary_key);
207 self
208 }
209
210 pub fn add_index(&mut self, index: IndexSchema) {
212 self.indexes.push(index);
213 }
214
215 pub fn with_description(mut self, description: String) -> Self {
217 self.description = Some(description);
218 self
219 }
220}
221
222#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
224pub struct IndexSchema {
225 pub name: String,
227 pub fields: Vec<String>,
229 pub unique: bool,
231}
232
233pub struct SchemaParser {
235 type_mappings: HashMap<String, DataType>,
237 pattern_rules: Vec<PatternRule>,
239}
240
241#[derive(Debug, Clone)]
243pub struct PatternRule {
244 pub field_pattern: regex::Regex,
246 pub data_type: DataType,
248 pub priority: u32,
250}
251
252impl SchemaParser {
253 pub fn new() -> Self {
255 let mut parser = Self {
256 type_mappings: HashMap::new(),
257 pattern_rules: Vec::new(),
258 };
259
260 parser.init_default_mappings();
261 parser.init_default_patterns();
262 parser
263 }
264
265 fn init_default_mappings(&mut self) {
267 self.type_mappings.insert("string".to_string(), DataType::String { max_length: Some(255) });
269 self.type_mappings.insert("text".to_string(), DataType::String { max_length: None });
270 self.type_mappings.insert("varchar".to_string(), DataType::String { max_length: Some(255) });
271 self.type_mappings.insert("char".to_string(), DataType::String { max_length: Some(1) });
272
273 self.type_mappings.insert("int".to_string(), DataType::Integer { min: None, max: None });
274 self.type_mappings.insert("integer".to_string(), DataType::Integer { min: None, max: None });
275 self.type_mappings.insert("bigint".to_string(), DataType::Integer { min: None, max: None });
276 self.type_mappings.insert("smallint".to_string(), DataType::Integer { min: Some(-32768), max: Some(32767) });
277
278 self.type_mappings.insert("float".to_string(), DataType::Float { min: None, max: None, precision: None });
279 self.type_mappings.insert("double".to_string(), DataType::Float { min: None, max: None, precision: None });
280 self.type_mappings.insert("decimal".to_string(), DataType::Float { min: None, max: None, precision: Some(2) });
281
282 self.type_mappings.insert("boolean".to_string(), DataType::Boolean);
283 self.type_mappings.insert("bool".to_string(), DataType::Boolean);
284
285 self.type_mappings.insert("datetime".to_string(), DataType::DateTime { format: None });
286 self.type_mappings.insert("timestamp".to_string(), DataType::DateTime { format: None });
287 self.type_mappings.insert("date".to_string(), DataType::Date { format: None });
288 self.type_mappings.insert("time".to_string(), DataType::Time { format: None });
289
290 self.type_mappings.insert("uuid".to_string(), DataType::Uuid);
291 self.type_mappings.insert("json".to_string(), DataType::Json);
292 }
293
294 fn init_default_patterns(&mut self) {
296 if let Ok(regex) = regex::Regex::new(r"(?i)^.*id$") {
298 self.pattern_rules.push(PatternRule {
299 field_pattern: regex,
300 data_type: DataType::Integer { min: Some(1), max: None },
301 priority: 100,
302 });
303 }
304
305 if let Ok(regex) = regex::Regex::new(r"(?i)^.*uuid$") {
307 self.pattern_rules.push(PatternRule {
308 field_pattern: regex,
309 data_type: DataType::Uuid,
310 priority: 90,
311 });
312 }
313
314 if let Ok(regex) = regex::Regex::new(r"(?i)^.*(email|mail).*$") {
316 self.pattern_rules.push(PatternRule {
317 field_pattern: regex,
318 data_type: DataType::Email,
319 priority: 80,
320 });
321 }
322
323 if let Ok(regex) = regex::Regex::new(r"(?i)^.*(phone|tel|mobile).*$") {
325 self.pattern_rules.push(PatternRule {
326 field_pattern: regex,
327 data_type: DataType::Phone { country: Some("CN".to_string()) },
328 priority: 80,
329 });
330 }
331
332 if let Ok(regex) = regex::Regex::new(r"(?i)^.*(name|username).*$") {
334 self.pattern_rules.push(PatternRule {
335 field_pattern: regex,
336 data_type: DataType::String { max_length: Some(100) },
337 priority: 70,
338 });
339 }
340
341 if let Ok(regex) = regex::Regex::new(r"(?i)^.*(created_at|updated_at|timestamp).*$") {
343 self.pattern_rules.push(PatternRule {
344 field_pattern: regex,
345 data_type: DataType::DateTime { format: None },
346 priority: 75,
347 });
348 }
349
350 if let Ok(regex) = regex::Regex::new(r"(?i)^.*(address|addr).*$") {
352 self.pattern_rules.push(PatternRule {
353 field_pattern: regex,
354 data_type: DataType::String { max_length: Some(500) },
355 priority: 60,
356 });
357 }
358 }
359
360 pub fn infer_from_json(&self, json_schema: &serde_json::Value) -> Result<TableSchema> {
362 match json_schema {
363 serde_json::Value::Object(obj) => {
364 let table_name = obj.get("title")
365 .and_then(|v| v.as_str())
366 .unwrap_or("generated_table")
367 .to_string();
368
369 let mut fields = Vec::new();
370
371 if let Some(properties) = obj.get("properties").and_then(|v| v.as_object()) {
372 for (field_name, field_schema) in properties {
373 let field = self.parse_field_schema(field_name, field_schema)?;
374 fields.push(field);
375 }
376 }
377
378 Ok(TableSchema {
379 name: table_name,
380 fields,
381 primary_key: None,
382 indexes: Vec::new(),
383 description: obj.get("description").and_then(|v| v.as_str()).map(|s| s.to_string()),
384 })
385 }
386 _ => Err(DataForgeError::validation("Invalid JSON schema format")),
387 }
388 }
389
390 fn parse_field_schema(&self, field_name: &str, schema: &serde_json::Value) -> Result<FieldSchema> {
392 let data_type = self.infer_data_type(field_name, schema)?;
393
394 let constraints = FieldConstraints {
395 nullable: !schema.get("required").unwrap_or(&serde_json::Value::Bool(false)).as_bool().unwrap_or(false),
396 unique: schema.get("unique").and_then(|v| v.as_bool()).unwrap_or(false),
397 default: schema.get("default").cloned(),
398 pattern: schema.get("pattern").and_then(|v| v.as_str()).map(|s| s.to_string()),
399 min: schema.get("minimum").cloned(),
400 max: schema.get("maximum").cloned(),
401 };
402
403 Ok(FieldSchema {
404 name: field_name.to_string(),
405 data_type,
406 constraints,
407 description: schema.get("description").and_then(|v| v.as_str()).map(|s| s.to_string()),
408 generator_config: None,
409 generator_type: None, })
411 }
412
413 pub fn infer_data_type(&self, field_name: &str, schema: &serde_json::Value) -> Result<DataType> {
415 let mut best_match: Option<&PatternRule> = None;
417 for rule in &self.pattern_rules {
418 if rule.field_pattern.is_match(field_name) {
419 if best_match.is_none() || rule.priority > best_match.unwrap().priority {
420 best_match = Some(rule);
421 }
422 }
423 }
424
425 if let Some(rule) = best_match {
426 return Ok(rule.data_type.clone());
427 }
428
429 if let Some(type_str) = schema.get("type").and_then(|v| v.as_str()) {
431 if let Some(data_type) = self.type_mappings.get(type_str) {
432 return Ok(data_type.clone());
433 }
434 }
435
436 Ok(DataType::String { max_length: Some(255) })
438 }
439
440 pub fn infer_from_database_table(&self, table_info: &DatabaseTableInfo) -> Result<TableSchema> {
442 let mut fields = Vec::new();
443
444 for column in &table_info.columns {
445 let data_type = self.map_database_type(&column.data_type)?;
446
447 let constraints = FieldConstraints {
448 nullable: column.nullable,
449 unique: column.unique,
450 default: column.default_value.clone(),
451 pattern: None,
452 min: None,
453 max: None,
454 };
455
456 fields.push(FieldSchema {
457 name: column.name.clone(),
458 data_type,
459 constraints,
460 description: column.comment.clone(),
461 generator_config: None,
462 generator_type: None, });
464 }
465
466 Ok(TableSchema {
467 name: table_info.name.clone(),
468 fields,
469 primary_key: Some(table_info.primary_key.clone()),
470 indexes: table_info.indexes.iter().map(|idx| IndexSchema {
471 name: idx.name.clone(),
472 fields: idx.columns.clone(),
473 unique: idx.unique,
474 }).collect(),
475 description: table_info.comment.clone(),
476 })
477 }
478
479 pub fn map_database_type(&self, db_type: &str) -> Result<DataType> {
481 let normalized_type = db_type.to_lowercase();
482
483 let base_type = if let Some(pos) = normalized_type.find('(') {
485 &normalized_type[..pos]
486 } else {
487 &normalized_type
488 };
489
490 self.type_mappings.get(base_type)
491 .cloned()
492 .ok_or_else(|| DataForgeError::validation(&format!("Unsupported database type: {}", db_type)))
493 }
494
495 pub fn add_type_mapping(&mut self, db_type: String, data_type: DataType) {
497 self.type_mappings.insert(db_type, data_type);
498 }
499
500 pub fn add_pattern_rule(&mut self, pattern: &str, data_type: DataType, priority: u32) -> Result<()> {
502 let regex = regex::Regex::new(pattern)
503 .map_err(|e| DataForgeError::validation(&format!("Invalid regex pattern: {}", e)))?;
504
505 self.pattern_rules.push(PatternRule {
506 field_pattern: regex,
507 data_type,
508 priority,
509 });
510
511 self.pattern_rules.sort_by(|a, b| b.priority.cmp(&a.priority));
513
514 Ok(())
515 }
516}
517
518#[derive(Debug, Clone)]
520pub struct DatabaseTableInfo {
521 pub name: String,
522 pub columns: Vec<DatabaseColumnInfo>,
523 pub primary_key: Vec<String>,
524 pub indexes: Vec<DatabaseIndexInfo>,
525 pub comment: Option<String>,
526}
527
528#[derive(Debug, Clone)]
530pub struct DatabaseColumnInfo {
531 pub name: String,
532 pub data_type: String,
533 pub nullable: bool,
534 pub unique: bool,
535 pub default_value: Option<serde_json::Value>,
536 pub comment: Option<String>,
537}
538
539#[derive(Debug, Clone)]
541pub struct DatabaseIndexInfo {
542 pub name: String,
543 pub columns: Vec<String>,
544 pub unique: bool,
545}
546
547impl Default for SchemaParser {
548 fn default() -> Self {
549 Self::new()
550 }
551}
552
553#[cfg(test)]
554mod tests {
555 use super::*;
556 use serde_json::json;
557
558 #[test]
559 fn test_schema_parser_creation() {
560 let parser = SchemaParser::new();
561 assert!(!parser.type_mappings.is_empty());
562 assert!(!parser.pattern_rules.is_empty());
563 }
564
565 #[test]
566 fn test_infer_from_json() {
567 let parser = SchemaParser::new();
568 let json_schema = json!({
569 "title": "User",
570 "type": "object",
571 "properties": {
572 "id": {
573 "type": "integer"
574 },
575 "name": {
576 "type": "string"
577 },
578 "email": {
579 "type": "string"
580 }
581 }
582 });
583
584 let result = parser.infer_from_json(&json_schema);
585 assert!(result.is_ok());
586
587 let schema = result.unwrap();
588 assert_eq!(schema.name, "User");
589 assert_eq!(schema.fields.len(), 3);
590 }
591
592 #[test]
593 fn test_pattern_matching() {
594 let parser = SchemaParser::new();
595
596 let id_type = parser.infer_data_type("user_id", &json!({})).unwrap();
598 match id_type {
599 DataType::Integer { .. } => {},
600 _ => panic!("Expected Integer type for ID field"),
601 }
602
603 let email_type = parser.infer_data_type("user_email", &json!({})).unwrap();
605 match email_type {
606 DataType::Email => {},
607 _ => panic!("Expected Email type for email field"),
608 }
609 }
610
611 #[test]
612 fn test_database_type_mapping() {
613 let parser = SchemaParser::new();
614
615 let varchar_type = parser.map_database_type("VARCHAR(255)").unwrap();
616 match varchar_type {
617 DataType::String { max_length: Some(255) } => {},
618 _ => panic!("Expected String type with max_length for VARCHAR"),
619 }
620
621 let int_type = parser.map_database_type("INTEGER").unwrap();
622 match int_type {
623 DataType::Integer { .. } => {},
624 _ => panic!("Expected Integer type for INTEGER"),
625 }
626 }
627
628 #[test]
629 fn test_custom_type_mapping() {
630 let mut parser = SchemaParser::new();
631
632 parser.add_type_mapping(
633 "custom_type".to_string(),
634 DataType::Custom {
635 type_name: "CustomType".to_string(),
636 generator: "custom_generator".to_string(),
637 }
638 );
639
640 let custom_type = parser.map_database_type("custom_type").unwrap();
641 match custom_type {
642 DataType::Custom { type_name, .. } => {
643 assert_eq!(type_name, "CustomType");
644 },
645 _ => panic!("Expected Custom type"),
646 }
647 }
648
649 #[test]
650 fn test_pattern_rule_priority() {
651 let mut parser = SchemaParser::new();
652
653 parser.add_pattern_rule(r"(?i)^special_.*$", DataType::Uuid, 200).unwrap();
655
656 let special_type = parser.infer_data_type("special_id", &json!({})).unwrap();
657 match special_type {
658 DataType::Uuid => {},
659 _ => panic!("Expected Uuid type for special field with high priority rule"),
660 }
661 }
662}