1use indexmap::IndexMap;
2use logos::{Lexer, Logos};
3use serde::{Deserialize, Serialize};
4use serde_json::{json, Value};
5use std::str::FromStr;
6
7const FORMAT_NAMES: &'static [&'static str] = &[
8 "Date",
9 "Time",
10 "DateTime",
11 "Timestamp",
12 "Interval",
13 "Duration",
14 "Email",
15 "Ipv4",
16 "Ipv6",
17 "Uri",
18 "Hostname",
19 "Domainname",
20 "Uuid",
21 "UUID",
22 "Ulid",
23 "ULID",
24 "Json",
25 "JSON",
26 "Xml",
27 "XML",
28 "Color",
29 "Isbn",
30 "ISBN",
31 "Path",
32 "S3Path",
33 "SemVer",
34 "PhoneNumber",
35 "CreditCard",
36 "Currency",
37 "MimeType",
38 "Language",
39 "Locale",
40 "Base64",
41];
42const NUMBER_NAMES: &'static [&'static str] = &[
43 "price", "rate", "height", "width", "weight", "amount", "total", "percent", "ratio",
44];
45const INTEGER_NAMES: &'static [&'static str] = &[
46 "age", "year", "count", "size", "length", "delay", "time", "duration", "level", "index",
47 "position", "order", "size", "limit", "offset", "page", "quantity", "capacity", "interval",
48 "retries", "max", "min",
49];
50const BOOLEAN_NAMES: &'static [&'static str] = &[
51 "has", "is", "does", "allow", "should", "if", "can", "may", "will", "must",
52];
53
54fn array_type_callback(lex: &mut Lexer<Token>) -> (String, String, String) {
55 let complex_type = lex.slice().to_owned();
56 let offset = complex_type.find('<').unwrap();
57 let container_type = complex_type[..offset].to_owned();
58 let end_offset = complex_type.rfind('>').unwrap();
59 let item_type = complex_type[offset + 1..end_offset].to_owned();
60 let range_type = complex_type[end_offset + 1..].trim();
61 (container_type, item_type, range_type.to_string())
62}
63
64#[derive(Debug, Logos)]
65#[logos(skip r"[ \t\n\f]+")] pub enum ArrayToken {
67 #[token("(")]
68 ParenOpen,
69
70 #[token(")")]
71 ParenClose,
72
73 #[token("[")]
74 BracketOpen,
75
76 #[token("]")]
77 BracketClose,
78
79 #[token(",")]
80 Comma,
81
82 #[regex(r"-?(?:0|[1-9]\d*)?",
83 |lex| lex.slice().parse::<i64>().unwrap())]
84 Integer(i64),
85
86 #[regex(r"-?(?:0|[1-9]\d*)(?:\.\d+)(?:[eE][+-]?\d+)?",
87 |lex| lex.slice().parse::<f64>().unwrap())]
88 Number(f64),
89
90 #[regex(r#"'([^']*)'"#, |lex| lex.slice().to_owned())]
91 Text1(String),
92
93 #[regex(r#""([^"\\]|\\["\\bnfrt]|u[a-fA-F0-9]{4})*""#, |lex| lex.slice().to_owned())]
94 Text2(String),
95}
96
97#[derive(Debug, Logos)]
98#[logos(skip r"[ \t\n\f]+")] pub enum Token {
100 #[token("{")]
101 BraceOpen,
102
103 #[token("}")]
104 BraceClose,
105
106 #[token(":")]
107 Colon,
108
109 #[token(",")]
110 Comma,
111
112 #[token("...")]
113 Ellipsis,
114
115 #[regex(r#"(integer|int|long|bigint|number|float|double|real|decimal)\([^)]+\)"#,
116 |lex| lex.slice().to_owned())]
117 RangeType(String),
118
119 #[regex(r#"(string|bytes|varchar|String|Text)\([^)]+\)"#,
120 |lex| lex.slice().to_owned())]
121 StringLengthType(String),
122
123 #[regex(r#"\[[^]]+\]"#, |lex| lex.slice().to_owned())]
124 TupleType(String),
125
126 #[regex("integer|Integer|int|int32|int64|int96|int128|long|bigint|serial|bigserial|number|Number|float|double|real|decimal|boolean|Boolean|bool|string|bytes|bytea|varchar|String|Text",
127 |lex| lex.slice().to_owned())]
128 PrimitiveType(String),
129
130 #[regex("Ulid|ULID|Color|Isbn|ISBN|Path|S3Path|SemVer|PhoneNumber|CreditCard|Currency|MimeType|Language|Locale|Base64",
131 |lex| lex.slice().to_owned())]
132 ExtraType(String),
133
134 #[regex("Date|Time|DateTime|Duration|Email|Ipv4|Ipv6|Uri|Hostname|Domainname|Uuid|UUID",
135 |lex| lex.slice().to_owned())]
136 FormatType(String),
137
138 #[regex(r#"(List|list|Set|set|Array|array)<(integer|Integer|int|long|bigint|number|Number|float|double|real|decimal|boolean|Boolean|bool|string|bytes|bytea|varchar|String|Text|Date|Time|DateTime|Timestamp|Interval|Duration|Email|Ipv4|Ipv6|Uri|Hostname|Domainname|Uuid|UUID|Ulid|ULID|Color|Isbn|ISBN|Path|S3Path|SemVer|PhoneNumber|CreditCard|Currency|MimeType|Language|Locale|Base64)>(\([^)]+\))?"#,
139 array_type_callback
140 )]
141 ArrayType((String, String, String)),
142
143 #[regex("(integer|Integer|int|int32|int64|int96|int128|long|bigint|number|Number|float|double|real|decimal|boolean|Boolean|bool|string|bytes|bytea|varchar|String|Text|Date|Time|DateTime|Timestamp|Interval|Duration|Email|Ipv4|Ipv6|Uri|Hostname|Uuid|UUID)([|](integer|Integer|int|int32|int64|int96|int128|long|bigint|number|Number|float|double|real|decimal|boolean|Boolean|bool|string|bytes|bytea|varchar|String|Text|Date|Time|DateTime|Timestamp|Interval|Duration|Email|Ipv4|Ipv6|Uri|Hostname|Domainname|Uuid|UUID|Ulid|ULID|Color|Isbn|ISBN|Path|S3Path|SemVer|PhoneNumber|CreditCard|Currency|MimeType|Language|Locale|Base64))+",
144 |lex| lex.slice().to_owned())]
145 AnyOf(String),
146
147 #[regex(r#"enum\([^)]+\)"#, |lex| lex.slice().to_owned())]
148 EnumType(String),
149
150 #[regex(r#"regex\(((['][^']+['])|([\"][^\"]+[\"]))\)"#, |lex| lex.slice().to_owned())]
151 RegexType(String),
152
153 #[regex("[A-Z][a-zA-Z0-9_]+", |lex| lex.slice().to_owned())]
154 ObjectName(String),
155
156 #[regex(r#"[a-z0-9][a-zA-Z0-9_]+[\?]?"#, |lex| lex.slice().to_owned())]
157 FieldName(String),
158}
159
160#[derive(Serialize, Deserialize, Debug)]
161pub struct JsonSchema {
162 #[serde(skip_serializing_if = "Option::is_none")]
163 #[serde(rename = "$schema")]
164 pub version: Option<String>,
165 #[serde(skip_serializing_if = "Option::is_none")]
166 #[serde(rename = "$id")]
167 pub id: Option<String>,
168 pub title: String,
169 #[serde(skip_serializing_if = "Option::is_none")]
170 pub description: Option<String>,
171 #[serde(rename = "type")]
172 pub type_name: String,
173 #[serde(skip_serializing_if = "Option::is_none")]
174 pub properties: Option<IndexMap<String, JsonSchemaEntry>>,
175 #[serde(skip_serializing_if = "Option::is_none")]
176 pub required: Option<Vec<String>>,
177 #[serde(skip_serializing_if = "Option::is_none")]
178 #[serde(rename = "additionalProperties")]
179 pub additional_properties: Option<bool>,
180}
181
182#[derive(Serialize, Deserialize, Debug, Default, Clone)]
183pub struct JsonSchemaEntry {
184 #[serde(skip)]
185 pub name: String,
186 #[serde(skip)]
187 pub optional: bool,
188 #[serde(rename = "type")]
189 #[serde(skip_serializing_if = "String::is_empty")]
190 pub type_name: String,
191 #[serde(skip_serializing_if = "Option::is_none")]
192 pub description: Option<String>,
193 #[serde(skip_serializing_if = "Option::is_none")]
194 pub format: Option<String>,
195 #[serde(skip_serializing_if = "Option::is_none")]
196 pub pattern: Option<String>,
197 #[serde(skip_serializing_if = "Option::is_none")]
198 #[serde(rename = "anyOf")]
199 pub any_of: Option<Vec<JsonSchemaEntry>>,
200 #[serde(skip_serializing_if = "Option::is_none")]
201 pub items: Option<Value>,
202 #[serde(skip_serializing_if = "Option::is_none")]
203 #[serde(rename = "minItems")]
204 pub min_items: Option<u32>,
205 #[serde(skip_serializing_if = "Option::is_none")]
206 #[serde(rename = "maxItems")]
207 pub max_items: Option<u32>,
208 #[serde(skip_serializing_if = "Option::is_none")]
209 #[serde(rename = "enum")]
210 pub enums: Option<Vec<Value>>,
211 #[serde(skip_serializing_if = "Option::is_none")]
212 pub minimum: Option<Value>,
213 #[serde(skip_serializing_if = "Option::is_none")]
214 pub maximum: Option<Value>,
215 #[serde(skip_serializing_if = "Option::is_none")]
216 #[serde(rename = "minLength")]
217 pub min_length: Option<u32>,
218 #[serde(skip_serializing_if = "Option::is_none")]
219 #[serde(rename = "maxLength")]
220 pub max_length: Option<u32>,
221 #[serde(skip_serializing_if = "Option::is_none")]
222 #[serde(rename = "uniqueItems")]
223 pub unique_items: Option<bool>,
224 #[serde(skip_serializing_if = "Option::is_none")]
225 pub properties: Option<IndexMap<String, JsonSchemaEntry>>,
226 #[serde(skip_serializing_if = "Option::is_none")]
227 pub required: Option<Vec<String>>,
228 #[serde(skip_serializing_if = "Option::is_none")]
229 pub additional_properties: Option<bool>,
230}
231
232impl JsonSchema {
233 pub fn new(title: &str) -> Self {
234 JsonSchema {
235 version: None,
236 id: None,
237 title: title.to_string(),
238 description: None,
239 type_name: "object".to_owned(),
240 properties: Some(IndexMap::new()),
241 required: None,
242 additional_properties: None,
243 }
244 }
245 pub fn version_2020(title: &str) -> Self {
246 JsonSchema {
247 version: Some("https://json-schema.org/draft/2020-12/schema".to_owned()),
248 id: None,
249 title: title.to_string(),
250 description: None,
251 type_name: "object".to_owned(),
252 properties: Some(IndexMap::new()),
253 required: None,
254 additional_properties: None,
255 }
256 }
257}
258
259impl JsonSchemaEntry {
260 pub fn new(type_name: &str) -> Self {
261 JsonSchemaEntry {
262 type_name: convert_to_json_type(type_name),
263 ..Default::default()
264 }
265 }
266
267 pub fn format(format_name: &str) -> Self {
268 JsonSchemaEntry {
269 type_name: "string".to_string(),
270 format: Some(convert_to_json_format(format_name)),
271 ..Default::default()
272 }
273 }
274
275 pub fn revise(&mut self) {
276 if self.type_name.is_empty() {
277 let field_name = &self.name;
278 if field_name.contains("time") || field_name.contains("_at") {
279 self.type_name = "string".to_owned();
280 self.format = Some("date-time".to_owned());
281 } else if field_name.contains("date") {
282 self.type_name = "string".to_owned();
283 self.format = Some("date".to_owned());
284 } else if BOOLEAN_NAMES
285 .iter()
286 .any(|&item| field_name.starts_with(item))
287 {
288 self.type_name = "boolean".to_owned();
289 } else if NUMBER_NAMES.iter().any(|&item| field_name.contains(item)) {
290 self.type_name = "number".to_owned();
291 } else if INTEGER_NAMES.iter().any(|&item| field_name.contains(item)) {
292 self.type_name = "integer".to_owned();
293 } else {
294 self.type_name = "string".to_owned();
295 }
296 }
297 }
298
299 pub fn add_entry(&mut self, name: &str, type_name: &str) {
300 if self.properties.is_none() {
301 self.properties = Some(IndexMap::new());
302 }
303 if let Some(ref mut properties) = self.properties {
304 let entry = JsonSchemaEntry {
305 type_name: type_name.to_string(),
306 ..Default::default()
307 };
308 properties.insert(name.to_string(), entry);
309 }
310 }
311}
312
313pub fn to_json_schema(struct_text: &str) -> Result<JsonSchema, String> {
314 let offset = struct_text.find('{').unwrap();
315 let title = struct_text[0..offset].trim();
316 let lexer_text = &struct_text[offset..].trim().trim_matches(&['{', '}']);
317 let mut json_schema = JsonSchema::version_2020(title);
318 let mut entries: IndexMap<String, JsonSchemaEntry> = IndexMap::new();
319 let mut entry: JsonSchemaEntry = Default::default();
320 let mut parent_entries: Option<IndexMap<String, JsonSchemaEntry>> = None;
321 let mut parent_entry: Option<JsonSchemaEntry> = None;
322 let mut lexer = Token::lexer(lexer_text);
323 while let Some(result) = lexer.next() {
324 if let Ok(token) = result {
325 match token {
326 Token::BraceOpen => {
327 parent_entry = Some(entry.clone());
329 parent_entries = Some(entries.clone());
330 entry = Default::default();
332 entries = IndexMap::new();
333 }
334 Token::BraceClose => {
335 if !entry.name.is_empty() {
336 entry.revise();
337 entries.insert(entry.name.clone(), entry.clone());
338 }
339 let additional_properties = entry.additional_properties.clone();
340 entry = parent_entry.clone().unwrap();
342 entry.required = find_required_fields(&entries);
343 entry.properties = Some(entries.clone());
344 entry.additional_properties = additional_properties;
345 entries = parent_entries.clone().unwrap();
346 entries.insert(entry.name.clone(), entry.clone());
347 entry = Default::default();
349 parent_entries = None;
350 parent_entry = None;
351 }
352 Token::Colon => {}
353 Token::Comma => {
354 if !entry.name.is_empty() {
355 entry.revise();
356 entries.insert(entry.name.clone(), entry.clone());
357 }
358 entry = Default::default();
359 }
360 Token::PrimitiveType(type_name) => {
361 entry.type_name = convert_to_json_type(&type_name);
362 }
363 Token::ExtraType(type_name) => {
364 entry.type_name = convert_to_json_type(&type_name);
365 }
366 Token::FormatType(format_name) => {
367 entry.type_name = "string".to_owned();
368 entry.format = Some(convert_to_json_format(&format_name));
369 }
370 Token::ArrayType(array) => {
371 entry.type_name = "array".to_owned();
372 let container_type = array.0;
373 if container_type.to_lowercase().starts_with("set") {
374 entry.unique_items = Some(true);
375 }
376 let item_type = array.1;
377 let item_entry = if FORMAT_NAMES.contains(&item_type.as_str()) {
378 let format = convert_to_json_format(&item_type);
379 json!({
380 "type": "string",
381 "format": format
382 })
383 } else {
384 json!({
385 "type": "string"
386 })
387 };
388 let range_type = array.2;
389 if range_type.starts_with('(') {
390 let items_text = range_type.trim_matches(&['(', ')']).trim();
391 if !items_text.contains(",") {
392 entry.min_items = Some(items_text.parse().unwrap());
393 entry.max_items = Some(items_text.parse().unwrap());
394 } else if items_text.starts_with(",") {
395 entry.max_items = Some(items_text[1..].parse().unwrap());
397 } else if items_text.ends_with(",") {
398 entry.min_items =
400 Some(items_text[..items_text.len() - 1].parse().unwrap());
401 } else {
402 let items = items_text.split(',').collect::<Vec<&str>>();
403 if items.len() == 2 {
404 entry.min_items = Some(items[0].parse().unwrap());
405 entry.max_items = Some(items[1].parse().unwrap());
406 }
407 }
408 }
409 entry.items = Some(item_entry);
410 }
411 Token::AnyOf(any_of) => {
412 let mut types: Vec<JsonSchemaEntry> = vec![];
413 for type_name in any_of.split('|') {
414 types.push(JsonSchemaEntry::new(type_name));
415 }
416 entry.any_of = Some(types);
417 }
418 Token::EnumType(enum_type) => {
419 let items_text = enum_type[4..].trim();
420 let items = parse_array(items_text);
421 if !items.is_empty() {
422 entry.enums = Some(items);
423 }
424 }
425 Token::TupleType(tuple_type) => {
426 entry.type_name = "array".to_owned();
427 let items_text = tuple_type.trim_matches(&['[', ']']).trim();
428 let values = items_text
429 .split(',')
430 .map(|item| {
431 json!({
432 "type": convert_to_json_type(item.trim())
433 })
434 })
435 .collect::<Vec<Value>>();
436 entry.items = Some(Value::from(values));
437 }
438 Token::RangeType(range_type) => {
439 let offset = range_type.find('(').unwrap();
440 let type_name = convert_to_json_type(range_type[..offset].trim());
441 entry.type_name = type_name;
442 let items_text = range_type[offset..].trim_matches(&['(', ')']).trim();
443 if items_text.starts_with(",") {
444 entry.maximum = Some(Value::from_str(items_text[1..].trim()).unwrap());
446 } else if items_text.ends_with(",") {
447 entry.minimum = Some(
449 Value::from_str(items_text[..items_text.len() - 1].trim()).unwrap(),
450 );
451 } else {
452 let items = items_text.split(',').collect::<Vec<&str>>();
453 if items.len() == 2 {
454 entry.minimum = Some(Value::from_str(items[0].trim()).unwrap());
455 entry.maximum = Some(Value::from_str(items[1].trim()).unwrap());
456 }
457 }
458 }
459 Token::StringLengthType(length_type) => {
460 let offset = length_type.find('(').unwrap();
461 let type_name = length_type[0..offset].trim().to_lowercase();
462 entry.type_name = "string".to_owned();
463 let items_text = length_type[offset..].trim_matches(&['(', ')']).trim();
464 if !items_text.contains(',') {
465 let length = items_text.parse().unwrap();
466 if type_name == "varchar" {
467 entry.max_length = Some(length);
468 } else {
469 entry.min_length = Some(length);
470 entry.max_length = Some(length);
471 }
472 } else if items_text.starts_with(",") {
473 entry.max_length = Some(items_text[1..].trim().parse().unwrap());
475 } else if items_text.ends_with(",") {
476 entry.min_length = Some(items_text[1..].trim().parse().unwrap());
478 } else {
479 let items = items_text.split(',').collect::<Vec<&str>>();
480 if items.len() == 2 {
481 entry.min_length = Some(items[0].trim().parse().unwrap());
482 entry.max_length = Some(items[1].trim().parse().unwrap());
483 }
484 }
485 }
486 Token::RegexType(regex_type) => {
487 let pattern = regex_type[5..]
488 .trim()
489 .trim_matches(&['(', ')'])
490 .trim()
491 .trim_matches(&['"', '\'']);
492 entry.pattern = Some(pattern.to_string());
493 }
494 Token::ObjectName(_object_name) => {
495 entry.type_name = "object".to_string();
496 }
497 Token::FieldName(field_name) => {
498 if field_name.ends_with('?') {
499 entry.optional = true;
500 entry.name = field_name[..field_name.len() - 1].to_string();
501 } else {
502 entry.name = field_name;
503 }
504 }
505 Token::Ellipsis => {
506 entry.additional_properties = Some(true);
507 }
508 }
509 } else {
510 return Err("Failed to parse struct".to_string());
511 }
512 }
513 if !entry.name.is_empty() {
514 entry.revise();
515 entries.insert(entry.name.clone(), entry.clone());
516 } else if entry.additional_properties.is_some() {
517 json_schema.additional_properties = entry.additional_properties.clone();
518 }
519 json_schema.required = find_required_fields(&entries);
520 json_schema.properties = Some(entries);
521
522 Ok(json_schema)
523}
524
525fn parse_array(text: &str) -> Vec<Value> {
526 let mut lexer = ArrayToken::lexer(text);
527 let mut items: Vec<Value> = vec![];
528 while let Some(result) = lexer.next() {
529 if let Ok(token) = result {
530 match token {
531 ArrayToken::Integer(value) => {
532 items.push(Value::from(value));
533 }
534 ArrayToken::Number(value) => {
535 items.push(Value::from(value));
536 }
537 ArrayToken::Text1(value) => {
538 let temp = value.trim_matches('\'').replace("\"", "\\\"");
539 let text2 = format!("\"{}\"", temp);
540 items.push(Value::from_str(&text2).unwrap());
541 }
542 ArrayToken::Text2(value) => {
543 items.push(Value::from_str(&value).unwrap());
544 }
545 _ => {}
546 }
547 }
548 }
549 items
550}
551
552fn convert_to_json_type(type_name: &str) -> String {
553 let name = type_name.to_lowercase();
554 match name.as_str() {
555 "varchar" | "text" | "bytes" | "bytea" => "string".to_string(),
556 "isbn" | "ulid" | "path" | "s3path" | "semver" | "phonenumber" | "creditcard"
557 | "currency" | "mimetype" | "language" | "locale" | "base64" => "string".to_string(),
558 "int" | "long" | "bigint" | "serial" | "bigserial" => "integer".to_string(),
559 "float" | "double" | "real" | "decimal" => "number".to_string(),
560 "bool" => "boolean".to_string(),
561 _ => name,
562 }
563}
564
565fn convert_to_json_format(format_name: &str) -> String {
566 let name = format_name.to_lowercase();
567 match name.as_str() {
568 "datetime" | "timestamp" => "date-time".to_string(),
569 "interval" => "duration".to_string(),
570 "domainname" => "hostname".to_string(),
571 "json" | "xml" => "string".to_string(),
572 _ => name,
573 }
574}
575
576fn find_required_fields(entries: &IndexMap<String, JsonSchemaEntry>) -> Option<Vec<String>> {
577 let mut required: Vec<String> = vec![];
578 for entry in entries {
579 if !entry.1.optional {
580 required.push(entry.0.clone());
581 }
582 }
583 if !required.is_empty() {
584 Some(required)
585 } else {
586 None
587 }
588}
589
590#[cfg(test)]
591mod tests {
592 use super::*;
593
594 #[test]
595 fn test_lexer() {
596 let text = r#"User { id: int, nick: string(6,64) }"#;
597 let mut lexer = Token::lexer(text);
598 while let Some(token) = lexer.next() {
599 println!("{:?}", token);
600 }
601 }
602 #[test]
603 fn test_parse() {
604 let text = r#"User { id: int, tags: list<string>(2,4) }"#;
605 let json_schema = to_json_schema(text).unwrap();
606 println!("{}", serde_json::to_string_pretty(&json_schema).unwrap())
607 }
608
609 #[test]
610 fn test_parse_nested() {
611 let text = r#"User { id: int, contact: Contact { phone: string, email: string, ... }, status: enum('First',"Second", 1, 2) }"#;
612 let json_schema = to_json_schema(text).unwrap();
613 println!("{}", serde_json::to_string_pretty(&json_schema).unwrap())
614 }
615
616 #[test]
617 fn test_to_json() {
618 let mut json_schema = JsonSchema::version_2020("User");
619 let mut entries: IndexMap<String, JsonSchemaEntry> = IndexMap::new();
620 entries.insert("nick".to_owned(), JsonSchemaEntry::new("string"));
621 entries.insert("email".to_owned(), JsonSchemaEntry::format("Email"));
622 json_schema.properties = Some(entries);
623 println!("{}", serde_json::to_string_pretty(&json_schema).unwrap())
624 }
625
626 #[test]
627 fn test_array() {
628 let text = r#"['First',"Second", -1, 2, 3.0]"#;
629 let array = parse_array(text);
630 println!("{}", serde_json::to_string_pretty(&array).unwrap())
631 }
632}