1pub mod error;
2mod instructions;
3
4use self::{
5 error::ParseError,
6 instructions::{add::AddInstruction, from::FromInstruction, remove::RemoveInstruction},
7};
8use crate::ocafile::error::InstructionError;
9use convert_case::{Case, Casing};
10pub use oca_ast_semantics::ast::OCAAst;
11use oca_ast_semantics::{
12 ast::{
13 self, recursive_attributes::NestedAttrTypeFrame, Command, CommandMeta, NestedAttrType,
14 RefValue,
15 },
16 validator::{OCAValidator, Validator},
17};
18use pest::Parser;
19use recursion::CollapsibleExt;
20
21#[derive(pest_derive::Parser)]
22#[grammar = "ocafile.pest"]
23pub struct OCAfileParser;
24
25pub type Pair<'a> = pest::iterators::Pair<'a, Rule>;
26
27pub trait TryFromPair {
28 type Error;
29 fn try_from_pair(pair: Pair<'_>) -> Result<Command, Self::Error>;
30}
31
32impl TryFromPair for Command {
33 type Error = InstructionError;
34 fn try_from_pair(record: Pair) -> std::result::Result<Self, Self::Error> {
35 let instruction: Command = match record.as_rule() {
36 Rule::from => FromInstruction::from_record(record, 0)?,
37 Rule::add => AddInstruction::from_record(record, 0)?,
38 Rule::remove => RemoveInstruction::from_record(record, 0)?,
39 _ => return Err(InstructionError::UnexpectedToken(record.to_string())),
40 };
41 Ok(instruction)
42 }
43}
44
45pub fn parse_from_string(unparsed_file: String) -> Result<OCAAst, ParseError> {
46 let file = OCAfileParser::parse(Rule::file, &unparsed_file)
47 .map_err(|e| {
48 let (line_number, column_number) = match e.line_col {
49 pest::error::LineColLocation::Pos((line, column)) => (line, column),
50 pest::error::LineColLocation::Span((line, column), _) => (line, column),
51 };
52 ParseError::GrammarError {
53 line_number,
54 column_number,
55 raw_line: e.line().to_string(),
56 message: e.variant.to_string(),
57 }
58 })?
59 .next()
60 .unwrap();
61
62 let mut oca_ast = OCAAst::new();
63
64 let validator = OCAValidator {};
65
66 for (n, line) in file.into_inner().enumerate() {
67 if let Rule::EOI = line.as_rule() {
68 continue;
69 }
70 if let Rule::comment = line.as_rule() {
71 continue;
72 }
73 if let Rule::meta_comment = line.as_rule() {
74 let mut key = "".to_string();
75 let mut value = "".to_string();
76 for attr in line.into_inner() {
77 match attr.as_rule() {
78 Rule::meta_attr_key => {
79 key = attr.as_str().to_string();
80 }
81 Rule::meta_attr_value => {
82 value = attr.as_str().to_string();
83 }
84 _ => {
85 return Err(ParseError::MetaError(attr.as_str().to_string()));
86 }
87 }
88 }
89 if key.is_empty() {
90 return Err(ParseError::MetaError("key is empty".to_string()));
91 }
92 if value.is_empty() {
93 return Err(ParseError::MetaError("value is empty".to_string()));
94 }
95 oca_ast.meta.insert(key, value);
96 continue;
97 }
98 if let Rule::empty_line = line.as_rule() {
99 continue;
100 }
101
102 match Command::try_from_pair(line.clone()) {
103 Ok(command) => match validator.validate(&oca_ast, command.clone()) {
104 Ok(_) => {
105 oca_ast.commands.push(command);
106 oca_ast.commands_meta.insert(
107 oca_ast.commands.len() - 1,
108 CommandMeta {
109 line_number: n + 1,
110 raw_line: line.as_str().to_string(),
111 },
112 );
113 }
114 Err(e) => {
115 return Err(ParseError::Custom(format!(
116 "Error validating instruction: {}",
117 e
118 )));
119 }
120 },
121 Err(e) => {
122 return Err(ParseError::InstructionError(e));
123 }
124 };
125 }
126 Ok(oca_ast)
127}
128
129fn format_reference(ref_value: RefValue) -> String {
131 match ref_value {
132 RefValue::Said(said) => format!("refs:{}", said),
133 _ => panic!("Unsupported reference type: {:?}", ref_value),
134 }
135}
136
137fn oca_file_format(nested: NestedAttrType) -> String {
139 nested.collapse_frames(|frame| match frame {
140 NestedAttrTypeFrame::Reference(ref_value) => format_reference(ref_value),
141 NestedAttrTypeFrame::Value(value) => {
142 format!("{}", value)
143 }
144 NestedAttrTypeFrame::Array(arr) => {
146 format!("Array[{}]", arr)
147 }
148 NestedAttrTypeFrame::Null => "".to_string(),
149 })
150}
151
152pub fn generate_from_ast(ast: &OCAAst) -> String {
160 let mut ocafile = String::new();
161
162 ast.commands.iter().for_each(|command| {
163 let mut line = String::new();
164
165 match command.kind {
166 ast::CommandType::Add => {
167 line.push_str("ADD ");
168 match &command.object_kind {
169 ast::ObjectKind::CaptureBase(content) => {
170 if let Some(attributes) = &content.attributes {
171 line.push_str("ATTRIBUTE");
172 for (key, value) in attributes {
173 line.push_str(&format!(" {}=", key));
174 let out = oca_file_format(value.clone());
176 line.push_str(&out);
177 }
178 }
179 if let Some(properties) = &content.properties {
180 for (prop_name, prop_value) in properties {
181 if prop_name.eq("classification") {
182 if let ast::NestedValue::Value(value) = prop_value {
183 line.push_str(format!("CLASSIFICATION {}", value).as_str());
184 }
185 }
186 }
187 }
188 }
189 ast::ObjectKind::Overlay(o_type, _) => match o_type {
190 ast::OverlayType::Meta(_) => {
191 line.push_str("META ");
192 if let Some(content) = command.object_kind.overlay_content() {
193 if let Some(ref properties) = content.properties {
194 let mut properties = properties.clone();
195 if let Some(ast::NestedValue::Value(lang)) =
196 properties.remove("lang")
197 {
198 line.push_str(format!("{} ", lang).as_str());
199 }
200 if !properties.is_empty() {
201 line.push_str("PROPS ");
202 properties.iter().for_each(|(key, value)| {
203 if let ast::NestedValue::Value(value) = value {
204 line.push_str(
205 format!(" {}=\"{}\"", key, value).as_str(),
206 );
207 }
208 });
209 }
210 }
211 };
212 }
213 ast::OverlayType::Unit(_) => {
214 line.push_str("UNIT ");
215 if let Some(content) = command.object_kind.overlay_content() {
216 if let Some(ref attributes) = content.attributes {
217 line.push_str("ATTRS");
218 attributes.iter().for_each(|(key, value)| {
219 if let ast::NestedValue::Value(value) = value {
220 line.push_str(
221 format!(" {}=\"{}\"", key, value).as_str(),
222 );
223 }
224 });
225 }
226 };
227 }
228 ast::OverlayType::EntryCode(_) => {
229 line.push_str("ENTRY_CODE ");
230 if let Some(content) = command.object_kind.overlay_content() {
231 if let Some(ref properties) = content.properties {
232 if !properties.is_empty() {
233 line.push_str("PROPS ");
234 properties.iter().for_each(|(key, value)| {
235 if let ast::NestedValue::Value(value) = value {
236 line.push_str(
237 format!(" {}={}", key, value).as_str(),
238 );
239 }
240 });
241 }
242 }
243 if let Some(ref attributes) = content.attributes {
244 line.push_str("ATTRS");
245 attributes.iter().for_each(|(key, value)| {
246 if let ast::NestedValue::Array(values) = value {
247 let codes = values
248 .iter()
249 .filter_map(|value| {
250 if let ast::NestedValue::Value(value) = value {
251 Some(format!("\"{}\"", value))
252 } else {
253 None
254 }
255 })
256 .collect::<Vec<String>>()
257 .join(", ");
258 line.push_str(format!(" {}=[{}]", key, codes).as_str());
259 } else if let ast::NestedValue::Object(values) = value {
260 let group_codes = values
261 .iter()
262 .filter_map(|(group, value)| {
263 if let ast::NestedValue::Array(value) = value {
264
265 let codes = value
266 .iter()
267 .filter_map(|value| {
268 if let ast::NestedValue::Value(value) = value {
269 Some(format!("\"{}\"", value))
270 } else {
271 None
272 }
273 })
274 .collect::<Vec<String>>()
275 .join(", ");
276 Some(format!("\"{}\": [{}]", group, codes))
277 } else {
278 None
279 }
280 })
281 .collect::<Vec<String>>()
282 .join(", ");
283 line.push_str(format!(" {}={{{}}}", key, group_codes).as_str());
284 } else if let ast::NestedValue::Value(said) = value {
285 line.push_str(
286 format!(" {}=\"{}\"", key, said).as_str(),
287 );
288 }
289 });
290 }
291 };
292 }
293 ast::OverlayType::Entry(_) => {
294 line.push_str("ENTRY ");
295 if let Some(content) = command.object_kind.overlay_content() {
296 if let Some(ref properties) = content.properties {
297 let mut properties = properties.clone();
298 if let Some(ast::NestedValue::Value(lang)) =
299 properties.remove("lang")
300 {
301 line.push_str(format!("{} ", lang).as_str());
302 }
303 if !properties.is_empty() {
304 line.push_str("PROPS ");
305 properties.iter().for_each(|(key, value)| {
306 if let ast::NestedValue::Value(value) = value {
307 line.push_str(
308 format!(" {}={}", key, value).as_str(),
309 );
310 }
311 });
312 }
313 if let Some(ref attributes) = content.attributes {
314 line.push_str("ATTRS ");
315 attributes.iter().for_each(|(key, value)| {
316 if let ast::NestedValue::Object(values) = value {
318 let codes = values
319 .iter()
320 .filter_map(|(code, label)| {
321 if let ast::NestedValue::Value(label) =
323 label
324 {
325 Some(format!(
326 "\"{}\": \"{}\"",
327 code, label
328 ))
329 } else {
330 None
331 }
332 })
333 .collect::<Vec<String>>()
334 .join(", ");
335 line.push_str(
336 format!("{}={{{}}}", key, codes).as_str(),
337 );
338 } else if let ast::NestedValue::Value(value) = value {
339 line.push_str(
340 format!(" {}=\"{}\"", key, value).as_str(),
341 );
342 }
343 });
344 }
345 }
346 };
347 }
348 ast::OverlayType::Conditional(_) => {
349 line.push_str("CONDITION ");
350 if let Some(content) = command.object_kind.overlay_content() {
351 if let Some(ref properties) = content.properties {
352 if !properties.is_empty() {
353 line.push_str("PROPS ");
354 properties.iter().for_each(|(key, value)| {
355 if let ast::NestedValue::Value(value) = value {
356 line.push_str(
357 format!(" {}={}", key, value).as_str(),
358 );
359 }
360 });
361 }
362 }
363 if let Some(ref attributes) = content.attributes {
364 line.push_str("ATTRS");
365 attributes.iter().for_each(|(key, value)| {
366 if let ast::NestedValue::Value(value) = value {
367 line.push_str(
368 format!(" {}=\"{}\"", key, value).as_str(),
369 );
370 }
371 });
372 }
373 };
374 }
375 ast::OverlayType::Link(_) => {
376 line.push_str("LINK ");
377 if let Some(content) = command.object_kind.overlay_content() {
378 if let Some(ref properties) = content.properties {
379 let target = properties.get("target");
380 if let Some(ast::NestedValue::Reference(RefValue::Said(target_said))) = target {
381 line.push_str(
382 format!("refs:{} ", target_said).as_str(),
383 );
384 }
385 }
386 if let Some(ref attributes) = content.attributes {
387 line.push_str("ATTRS");
388 attributes.iter().for_each(|(key, value)| {
389 if let ast::NestedValue::Value(value) = value {
390 line.push_str(
391 format!(" {}=\"{}\"", key, value).as_str(),
392 );
393 }
394 });
395 }
396 };
397 }
398 ast::OverlayType::AttributeFraming(_) => {
399 line.push_str("ATTR_FRAMING \\\n");
400 if let Some(content) = command.object_kind.overlay_content() {
401 if let Some(ref properties) = content.properties {
402 for (prop_name, prop_value) in properties {
403 let key = prop_name.replace("frame_", "");
404 if let ast::NestedValue::Value(value) = prop_value {
405 line.push_str(format!(" {}=\"{}\" \\\n", key, value).as_str());
406 }
407 }
408 }
409 if let Some(ref attributes) = content.attributes {
410 line.push_str(" ATTRS \\");
411 attributes.iter().for_each(|(key, value)| {
412 if let ast::NestedValue::Object(object) = value {
413 let mut frames_str = "".to_string();
414 for (f_key, f_value) in object.iter() {
415 let mut frame_str = "\n ".to_string();
416 frame_str.push_str(
417 format!(
418 "\"{}\": {{",
419 f_key
420 ).as_str()
421 );
422
423 if let ast::NestedValue::Object(frame) = f_value {
424 frame.iter().for_each(|(frame_key, frame_value)| {
425 if let ast::NestedValue::Value(frame_value) = frame_value {
426 frame_str.push_str(
427 format!(
428 "\n \"{}\": \"{}\",",
429 frame_key,
430 frame_value
431 ).as_str()
432 );
433 }
434
435 });
436 }
437
438 frame_str.push_str("\n },");
439
440 frames_str.push_str(frame_str.as_str());
441 }
442 line.push_str(
443 format!("\n {}={{{}\n }}", key, frames_str).as_str(),
444 );
445 }
446 });
447 }
448 };
449 }
450 _ => {
451 line.push_str(
452 format!("{} ", o_type.to_string().to_case(Case::UpperSnake))
453 .as_str(),
454 );
455
456 if let Some(content) = command.object_kind.overlay_content() {
457 if let Some(ref properties) = content.properties {
458 let mut properties = properties.clone();
459 if let Some(ast::NestedValue::Value(lang)) =
460 properties.remove("lang")
461 {
462 line.push_str(format!("{} ", lang).as_str());
463 }
464 if !properties.is_empty() {
465 line.push_str("PROPS ");
466 properties.iter().for_each(|(key, value)| {
467 if let ast::NestedValue::Value(value) = value {
468 line.push_str(
469 format!(" {}=\"{}\"", key, value).as_str(),
470 );
471 }
472 });
473 }
474 }
475 if let Some(ref attributes) = content.attributes {
476 line.push_str("ATTRS");
477 attributes.iter().for_each(|(key, value)| {
478 if let ast::NestedValue::Value(value) = value {
479 line.push_str(
480 format!(" {}=\"{}\"", key, value).as_str(),
481 );
482 }
483 });
484 }
485 };
486 }
487 },
488 _ => {}
489 }
490 }
491 ast::CommandType::Remove => match &command.object_kind {
492 ast::ObjectKind::CaptureBase(content) => {
493 line.push_str("REMOVE ");
494 if let Some(attributes) = &content.attributes {
495 line.push_str("ATTRIBUTE");
496 for (key, _) in attributes {
497 line.push_str(&format!(" {}", key));
498 }
499 }
500 if let Some(properties) = &content.properties {
501 for (prop_name, _) in properties {
502 if prop_name.eq("classification") {
503 line.push_str("CLASSIFICATION");
504 }
505 }
506 }
507 }
508 ast::ObjectKind::Overlay(_o_type, _) => {
509 todo!()
510 }
511 _ => {}
512 },
513 ast::CommandType::From => {
514 line.push_str("FROM ");
515 }
516 ast::CommandType::Modify => todo!(),
517 }
518
519 ocafile.push_str(format!("{}\n", line).as_str());
520 });
521
522 ocafile
523}
524
525#[cfg(test)]
526mod tests {
527 use oca_ast_semantics::ast::AttributeType;
528 use said::derivation::{HashFunction, HashFunctionCode};
529
530 use super::{error::ExtractingAttributeError, *};
531
532 #[test]
533 fn parse_from_string_valid() {
534 let _ = env_logger::builder().is_test(true).try_init();
535
536 let unparsed_file = r#"
537-- version=0.0.1
538-- name=プラスウルトラ
539ADD ATTRIBUTE remove=Text
540ADD ATTRIBUTE name=Text age=Numeric car=Array[refs:EJeWVGxkqxWrdGi0efOzwg1YQK8FrA-ZmtegiVEtAVcu]
541REMOVE ATTRIBUTE remove
542ADD ATTRIBUTE incidentals_spare_parts=Array[Array[refs:EJeWVGxkqxWrdGi0efOzwg1YQK8FrA-ZmtegiVEtAVcu]]
543ADD ATTRIBUTE d=Text i=Text passed=Boolean
544ADD META en PROPS description="Entrance credential" name="Entrance credential"
545ADD CHARACTER_ENCODING ATTRS d="utf-8" i="utf-8" passed="utf-8"
546ADD CONFORMANCE ATTRS d="M" i="M" passed="M"
547ADD LABEL en ATTRS d="Schema digest" i="Credential Issuee" passed="Passed"
548ADD INFORMATION en ATTRS d="Name" i="Credential Issuee" passed="Enables or disables passing"
549ADD FORMAT ATTRS d="image/jpeg"
550ADD UNIT ATTRS i=m^2 d=°
551ADD ATTRIBUTE list=Array[Text] el=Text
552ADD CARDINALITY ATTRS list="1-2"
553ADD ENTRY_CODE ATTRS list="entry_code_said" el=["o1", "o2", "o3"]
554ADD ENTRY en ATTRS list="entry_said" el={"o1": "o1_label", "o2": "o2_label", "o3": "o3_label"}
555ADD FLAGGED_ATTRIBUTES name age
556ADD ATTR_FRAMING \
557 id=SNOMEDCT \
558 label="Systematized Nomenclature of Medicine Clinical Terms" \
559 location="https://bioportal.bioontology.org/ontologies/SNOMEDCT" \
560 version=2023AA \
561 ATTRS \
562 name = {
563 "http://purl.bioontology.org/ontology/SNOMEDCT/703503000": {
564 "Predicate_id": "skos:exactMatch",
565 "Framing_justification": "semapv:ManualMappingCuration"
566 }
567 }
568 age = {
569 "http://purl.bioontology.org/ontology/SNOMEDCT/397669002": {
570 "Predicate_id": "skos:exactMatch",
571 "Framing_justification": "semapv:ManualMappingCuration"
572 }
573 }
574"#;
575 let oca_ast = parse_from_string(unparsed_file.to_string()).unwrap();
576 assert_eq!(oca_ast.meta.get("version").unwrap(), "0.0.1");
577 assert_eq!(oca_ast.meta.get("name").unwrap(), "プラスウルトラ");
578 }
579
580 #[test]
581 fn parse_meta_from_string_valid() {
582 let unparsed_file = r#"
583-- version=0.0.1
584-- name=Objekt
585ADD attribute name=Text age=Numeric
586"#;
587
588 let oca_ast = parse_from_string(unparsed_file.to_string()).unwrap();
589 assert_eq!(oca_ast.meta.get("version").unwrap(), "0.0.1");
590 assert_eq!(oca_ast.meta.get("name").unwrap(), "Objekt");
591 }
592
593 #[test]
594 fn test_deserialization_ast_to_ocafile() {
595 let unparsed_file = r#"ADD ATTRIBUTE name=Text age=Numeric radio=Text list=Text
596ADD LABEL eo ATTRS name="Nomo" age="aĝo" radio="radio"
597ADD INFORMATION en ATTRS name="Object" age="Object"
598ADD CHARACTER_ENCODING ATTRS name="utf-8" age="utf-8"
599ADD ENTRY_CODE ATTRS radio=["o1", "o2", "o3"]
600ADD ENTRY eo ATTRS radio={"o1": "etikedo1", "o2": "etikedo2", "o3": "etikiedo3"}
601ADD ENTRY pl ATTRS radio={"o1": "etykieta1", "o2": "etykieta2", "o3": "etykieta3"}
602ADD CONDITION ATTRS radio="${age} > 18"
603ADD ENTRY_CODE ATTRS list={"g1": ["el1"], "g2": ["el2", "el3"]}
604ADD ENTRY pl ATTRS list={"el1": "element1", "el2": "element2", "el3": "element3", "g1": "grupa1", "g2": "grupa2"}
605ADD LINK refs:EJeWVGxkqxWrdGi0efOzwg1YQK8FrA-ZmtegiVEtAVcu ATTRS name="n"
606ADD ATTR_FRAMING \
607 id="SNOMEDCT" \
608 label="Systematized Nomenclature of Medicine Clinical Terms" \
609 location="https://bioportal.bioontology.org/ontologies/SNOMEDCT" \
610 version="2023AA" \
611 ATTRS \
612 name={
613 "http://purl.bioontology.org/ontology/snomedct/703503000": {
614 "predicate_id": "skos:exactMatch",
615 "framing_justification": "semapv:ManualMappingCuration",
616 },
617 "http://purl.bioontology.org/ontology/snomedct/703503001": {
618 "predicate_id": "skos:exactMatch",
619 "framing_justification": "semapv:ManualMappingCuration",
620 },
621 }
622 age={
623 "http://purl.bioontology.org/ontology/snomedct/397669002": {
624 "predicate_id": "skos:exactMatch",
625 "framing_justification": "semapv:ManualMappingCuration",
626 },
627 }
628"#;
629 let oca_ast = parse_from_string(unparsed_file.to_string()).unwrap();
630
631 let ocafile = generate_from_ast(&oca_ast);
632 assert_eq!(
633 ocafile, unparsed_file,
634 "left:\n{} \n right:\n {}",
635 ocafile, unparsed_file
636 );
637 }
638
639 #[test]
640 fn test_attributes_with_special_names() {
641 let unparsed_file = r#"ADD ATTRIBUTE person.name=Text Experiment...Range..original.values.=Array[Text]
642"#;
643 let oca_ast = parse_from_string(unparsed_file.to_string()).unwrap();
644
645 let ocafile = generate_from_ast(&oca_ast);
646 assert_eq!(
647 ocafile, unparsed_file,
648 "left:\n{} \n right:\n {}",
649 ocafile, unparsed_file
650 );
651 }
652
653 #[test]
654 fn test_attributes_from_ast_to_ocafile() {
655 let unparsed_file = r#"ADD ATTRIBUTE name=Text age=Numeric
656ADD ATTRIBUTE list=Array[Text] el=Text
657"#;
658 let oca_ast = parse_from_string(unparsed_file.to_string()).unwrap();
659
660 let ocafile = generate_from_ast(&oca_ast);
661 assert_eq!(
662 ocafile, unparsed_file,
663 "left:\n{} \n right:\n {}",
664 ocafile, unparsed_file
665 );
666 }
667
668 #[test]
669 fn test_nested_attributes_from_ocafile_to_ast() {
670 let unparsed_file = r#"ADD ATTRIBUTE name=Text age=Numeric car=Array[Array[Text]]
671ADD ATTRIBUTE incidentals_spare_parts=Array[refs:EJVVlVSZJqVNnuAMLHLkeSQgwfxYLWTKBELi9e8j1PW0]
672"#;
673 let oca_ast = parse_from_string(unparsed_file.to_string()).unwrap();
674
675 let ocafile = generate_from_ast(&oca_ast);
676 assert_eq!(
677 ocafile, unparsed_file,
678 "left:\n{} \n right:\n {}",
679 ocafile, unparsed_file
680 );
681 }
682
683 #[test]
684 fn test_wrong_said() {
685 let unparsed_file = r#"ADD ATTRIBUTE said=refs:digest"#;
686 let oca_ast = parse_from_string(unparsed_file.to_string());
687 match oca_ast.unwrap_err() {
688 ParseError::InstructionError(InstructionError::ExtractError(
689 ExtractingAttributeError::SaidError(e),
690 )) => {
691 assert_eq!(e.to_string(), "Unknown code")
692 }
693 _ => unreachable!(),
694 }
695 }
696
697 #[test]
698 fn test_oca_file_format() {
699 let text_type = NestedAttrType::Value(AttributeType::Text);
700 assert_eq!(oca_file_format(text_type), "Text");
701
702 let numeric_type = NestedAttrType::Value(AttributeType::Numeric);
703 assert_eq!(oca_file_format(numeric_type), "Numeric");
704
705 let ref_type = NestedAttrType::Reference(RefValue::Said(
706 HashFunction::from(HashFunctionCode::Blake3_256).derive("example".as_bytes()),
707 ));
708
709 let attr = NestedAttrType::Array(Box::new(NestedAttrType::Array(Box::new(ref_type))));
710
711 let out = oca_file_format(attr);
712 assert_eq!(
713 out,
714 "Array[Array[refs:EJeWVGxkqxWrdGi0efOzwg1YQK8FrA-ZmtegiVEtAVcu]]"
715 );
716 }
717}