1use phlow_sdk::prelude::*;
2use regex::Regex;
3use serde_yaml::{Mapping, Value};
4use std::collections::HashMap;
5use std::fs;
6use std::path::Path;
7use crate::settings::PrintOutput;
8
9pub fn preprocessor(
10 phlow: &str,
11 base_path: &Path,
12 print_phlow: bool,
13 print_output: PrintOutput,
14) -> Result<String, Vec<String>> {
15 let (phlow, errors) = preprocessor_directives(phlow, base_path);
16
17 if !errors.is_empty() {
18 eprintln!("❌ YAML Transformation Errors:");
19 for (i, error) in errors.iter().enumerate() {
20 eprintln!(" {}. {}", i + 1, error);
21 }
22 eprintln!();
23 return Err(errors);
24 }
25
26 let phlow = preprocessor_markdown_string_blocks(&phlow);
28 let phlow = processor_transform_phs_hidden_object_and_arrays(&phlow);
30 let phlow = preprocessor_transform_phs_hidden(&phlow);
31 let phlow = preprocessor_eval(&phlow);
32 let phlow = preprocessor_modules(&phlow)?;
33
34 if print_phlow {
35 match print_output {
36 PrintOutput::Yaml => {
37 println!("");
38 println!("# PHLOW TRANSFORMED");
39 println!("#####################################################################");
40 println!("{}", phlow);
41 println!("#####################################################################");
42 println!("");
43 }
44 PrintOutput::Json => {
45 match serde_yaml::from_str::<serde_yaml::Value>(&phlow) {
46 Ok(value) => match serde_json::to_string_pretty(&value) {
47 Ok(json) => println!("{}", json),
48 Err(err) => {
49 eprintln!("❌ Failed to serialize JSON output: {}", err);
50 }
51 },
52 Err(err) => {
53 eprintln!("❌ Failed to parse transformed YAML for JSON output: {}", err);
54 }
55 }
56 }
57 }
58 }
59
60 Ok(phlow)
61}
62
63fn preprocessor_directives(phlow: &str, base_path: &Path) -> (String, Vec<String>) {
64 let mut errors = Vec::new();
65 let include_block_regex = match Regex::new(r"(?m)^(\s*)!include\s+([^\s]+)(.*)") {
66 Ok(re) => re,
67 Err(_) => return (phlow.to_string(), errors),
68 };
69 let include_inline_regex = match Regex::new(r"(?m)^([^\n]*?)!include\s+([^\s]+)(.*)") {
75 Ok(re) => re,
76 Err(_) => return (phlow.to_string(), errors),
77 };
78 let import_inline_regex = match Regex::new(r"!import\s+(\S+)") {
79 Ok(re) => re,
80 Err(_) => return (phlow.to_string(), errors),
81 };
82
83 let with_block_includes = include_block_regex.replace_all(&phlow, |caps: ®ex::Captures| {
84 let indent = &caps[1];
85 let rel_path = &caps[2];
86 let args_str = caps.get(3).map(|m| m.as_str()).unwrap_or("").trim();
87 let args = parse_include_args(args_str);
88 let full_path = base_path.join(rel_path);
89 match process_include_file(&full_path, &args) {
90 Ok(json_str) => json_str
91 .lines()
92 .map(|line| format!("{}{}", indent, line))
93 .collect::<Vec<_>>()
94 .join("\n"),
95 Err(e) => {
96 errors.push(format!("Error including file {}: {}", rel_path, e));
97 format!("{}<!-- Error including file: {} -->", indent, rel_path)
98 }
99 }
100 });
101
102 let with_inline_includes =
103 include_inline_regex.replace_all(&with_block_includes, |caps: ®ex::Captures| {
104 let prefix = &caps[1];
105 let rel_path = &caps[2];
106 let args_str = caps.get(3).map(|m| m.as_str()).unwrap_or("").trim();
107 let args = parse_include_args(args_str);
108 let full_path = base_path.join(rel_path);
109
110 match process_include_file(&full_path, &args) {
111 Ok(json_str) => {
112 let continuation_indent: String = prefix
114 .chars()
115 .map(|ch| if ch.is_whitespace() { ch } else { ' ' })
116 .collect();
117
118 let mut lines = json_str.lines();
119 if let Some(first) = lines.next() {
120 let mut out = String::new();
121 out.push_str(prefix);
122 out.push_str(first);
123 for line in lines {
124 out.push('\n');
125 out.push_str(&continuation_indent);
126 out.push_str(line);
127 }
128 out
129 } else {
130 prefix.to_string()
132 }
133 }
134 Err(e) => {
135 errors.push(format!("Error including file {}: {}", rel_path, e));
136 format!("{}<!-- Error including file: {} -->", prefix, rel_path)
137 }
138 }
139 });
140
141 let result = import_inline_regex
142 .replace_all(&with_inline_includes, |caps: ®ex::Captures| {
143 let rel_path = &caps[1];
144 let full_path = base_path.join(rel_path);
145 let extension = full_path
146 .extension()
147 .and_then(|e| e.to_str())
148 .unwrap_or("")
149 .to_lowercase();
150
151 match fs::read_to_string(&full_path) {
152 Ok(contents) => {
153 if extension == "phs" {
154 let one_line = contents
155 .lines()
156 .map(str::trim)
157 .collect::<Vec<_>>()
158 .join(" ")
159 .replace('"', "\\\"");
160
161 format!(r#""{{{{ {} }}}}""#, one_line)
165 } else {
166 let content = contents.to_value().to_json_inline();
167 format!(r#"{}"#, content)
168 }
169 }
170 Err(_) => {
171 errors.push(format!("Error importing file {}: file not found", rel_path));
172 format!("<!-- Error importing file: {} -->", rel_path)
173 }
174 }
175 })
176 .to_string();
177
178 (result, errors)
179}
180
181fn processor_transform_phs_hidden_object_and_arrays(phlow: &str) -> String {
183 let mut result = String::new();
184 let mut lines = phlow.lines().peekable();
185
186 while let Some(line) = lines.next() {
187 let trimmed_line = line.trim_start();
188
189 if let Some(colon_pos) = trimmed_line.find(':') {
190 let key = &trimmed_line[..colon_pos].trim();
191 let value = &trimmed_line[colon_pos + 1..].trim();
192 let starts_with_brace = value.starts_with('{');
193 let starts_with_bracket = value.starts_with('[');
194
195 if starts_with_brace || starts_with_bracket {
196 let indent = &line[..line.len() - trimmed_line.len()];
197 let mut block_lines = vec![value.to_string()];
198
199 if !(starts_with_brace && value.ends_with('}'))
201 && !(starts_with_bracket && value.ends_with(']'))
202 {
203 while let Some(next_line) = lines.next() {
205 block_lines.push(next_line.trim().to_string());
206 if (starts_with_brace && next_line.trim().ends_with('}'))
207 || (starts_with_bracket && next_line.trim().ends_with(']'))
208 {
209 break;
210 }
211 }
212 }
213
214 let single_line = block_lines.join(" ");
215 result.push_str(&format!("{}{}: !phs ${{ {} }}\n", indent, key, single_line));
216 continue;
217 }
218 }
219
220 result.push_str(line);
221 result.push_str("\n");
222 }
223
224 if result.ends_with('\n') {
226 result.pop();
227 }
228 result
229}
230
231fn preprocessor_transform_phs_hidden(phlow: &str) -> String {
234 let operators: Vec<&'static str> = vec![
235 "+", "-", "*", "/", "%", "==", "!=", "<", ">", "<=", ">=", "&&", "||", "??", "?:", "!",
236 ];
237 let mut reserved_keywords = vec![
238 "if", "else", "for", "while", "loop", "match", "let", "const", "fn", "return", "switch",
239 "case", "default", "try", "catch", "throw", "when", "payload", "input", "steps", "main",
240 "setup", "envs", "tests",
241 ];
242 reserved_keywords.extend(&operators);
243
244 let mut result = String::new();
245
246 for line in phlow.lines() {
247 let trimmed_line = line.trim_start();
248
249 if let Some(colon_pos) = trimmed_line.find(':') {
250 let key = &trimmed_line[..colon_pos].trim();
251 let value = &trimmed_line[colon_pos + 1..].trim();
252
253 let first_word = value
254 .trim()
255 .split_whitespace()
256 .next()
257 .unwrap_or("")
258 .split('.')
259 .next()
260 .unwrap_or("");
261
262 if first_word == "!phs" {
263 result.push_str(line);
264 result.push_str("\n");
265 continue;
266 }
267
268 if (first_word.starts_with("`") && !first_word.starts_with("```"))
270 || first_word.starts_with("${")
271 {
272 let indent = &line[..line.len() - trimmed_line.len()];
273 let content = &format!("{}{}: !phs {}\n", indent, key, value);
274 result.push_str(content);
275 continue;
276 }
277
278 if reserved_keywords.contains(&first_word)
279 || (operators
280 .iter()
281 .any(|op| value.contains(&format!(" {} ", op)))
282 && !value.starts_with('"')
283 && !value.starts_with('\''))
284 {
285 let indent = &line[..line.len() - trimmed_line.len()];
286 result.push_str(&format!("{}{}: !phs {}\n", indent, key, value));
287 continue;
288 }
289 } else if trimmed_line.starts_with("-") {
290 let after_dash = trimmed_line[1..].trim_start();
291 let first_word = after_dash
292 .split_whitespace()
293 .next()
294 .unwrap_or("")
295 .split('.')
296 .next()
297 .unwrap_or("")
298 .split('[')
299 .next()
300 .unwrap_or("");
301
302 if first_word == "!phs" {
303 result.push_str(line);
304 result.push_str("\n");
305 continue;
306 }
307
308 if first_word.starts_with("`") && !first_word.starts_with("```") {
310 let indent = &line[..line.len() - trimmed_line.len()];
311 result.push_str(&format!("{}- !phs {}\n", indent, after_dash));
312 continue;
313 }
314
315 if reserved_keywords.contains(&first_word)
316 || (operators
317 .iter()
318 .any(|op| after_dash.contains(&format!(" {} ", op)))
319 && !after_dash.starts_with('"')
320 && !after_dash.starts_with('\''))
321 {
322 let indent = &line[..line.len() - trimmed_line.len()];
323 result.push_str(&format!("{}- !phs {}\n", indent, after_dash));
324 continue;
325 }
326 }
327
328 result.push_str(line);
329 result.push_str("\n");
330 }
331
332 result.pop();
333 result.to_string()
334}
335
336fn is_inside_double_quotes(s: &str, idx: usize) -> bool {
337 let mut in_quotes = false;
338 let mut escaped = false;
339 for (i, ch) in s.chars().enumerate() {
340 if i >= idx {
341 break;
342 }
343 if escaped {
344 escaped = false;
345 continue;
346 }
347 if ch == '\\' {
348 escaped = true;
349 continue;
350 }
351 if ch == '"' {
352 in_quotes = !in_quotes;
353 }
354 }
355 in_quotes
356}
357
358fn preprocessor_eval(phlow: &str) -> String {
359 let mut result = String::new();
360 let mut lines = phlow.lines().peekable();
361
362 while let Some(line) = lines.next() {
363 let mut search_start = 0usize;
365 let mut found_pos: Option<usize> = None;
366 while let Some(rel) = line[search_start..].find("!phs") {
367 let pos = search_start + rel;
368 if !is_inside_double_quotes(line, pos) {
369 found_pos = Some(pos);
370 break;
371 }
372 search_start = pos + 4; }
374
375 if let Some(pos) = found_pos {
376 let before_eval = &line[..pos];
377 let after_eval = if line.len() > pos + 4 {
378 line[pos + 4..].trim()
379 } else {
380 ""
381 };
382 let indent = " ".repeat(pos);
383 if after_eval.starts_with("```") {
385 let mut block_lines = vec![];
387
388 if after_eval == "```" {
389 while let Some(next_line) = lines.next() {
390 if next_line.trim() == "```" {
391 break;
392 }
393 block_lines.push(next_line.trim().to_string());
394 }
395 } else if let Some(end_pos) = after_eval[3..].find("```") {
396 let mut inner_code = after_eval[3..3 + end_pos].trim().to_string();
397 if let Some(space_idx) = inner_code.find(' ') {
399 let (first, rest) = inner_code.split_at(space_idx);
400 if !first.is_empty() {
401 inner_code = rest.trim_start().to_string();
402 }
403 }
404 block_lines.push(inner_code);
405 }
406
407 let single_line = block_lines.join(" ");
408 let escaped = single_line.replace('"', "\\\"");
409
410 if before_eval.trim().is_empty() {
412 result.push_str(&format!("{}\"{}\"\n", indent, escaped));
413 } else {
414 result.push_str(&format!("{}\"{}\"\n", before_eval, escaped));
415 }
416 }
417 else if after_eval.starts_with("${") {
419 let mut block_content = String::new();
421 let mut brace_count = 0;
422 let mut dollar_brace_started = false;
423
424 let mut chars = after_eval.chars().peekable();
426 while let Some(ch) = chars.next() {
427 block_content.push(ch);
428
429 if ch == '$' && chars.peek() == Some(&'{') {
430 if let Some(next_ch) = chars.next() {
432 block_content.push(next_ch);
433 brace_count += 1;
434 dollar_brace_started = true;
435 }
436 } else if ch == '{' && dollar_brace_started {
437 brace_count += 1;
438 } else if ch == '}' && dollar_brace_started {
439 brace_count -= 1;
440 if brace_count == 0 {
441 break;
442 }
443 }
444 }
445
446 while brace_count > 0 {
448 if let Some(next_line) = lines.next() {
449 for ch in next_line.chars() {
450 block_content.push(ch);
451 if ch == '{' {
452 brace_count += 1;
453 } else if ch == '}' {
454 brace_count -= 1;
455 if brace_count == 0 {
456 break;
457 }
458 }
459 }
460 } else {
461 break;
462 }
463 }
464
465 let inner_content =
467 if block_content.starts_with("${") && block_content.ends_with('}') {
468 &block_content[2..block_content.len() - 1]
469 } else {
470 &block_content
471 };
472
473 let single_line = inner_content
475 .lines()
476 .map(|line| line.trim())
477 .filter(|line| !line.is_empty())
478 .collect::<Vec<_>>()
479 .join(" ");
480
481 let escaped = single_line.replace('"', "\\\"");
483
484 if before_eval.trim().is_empty() {
485 result.push_str(&format!("{}\"{{{{ {} }}}}\"\n", indent, escaped));
486 } else {
487 result.push_str(&format!("{}\"{{{{ {} }}}}\"\n", before_eval, escaped));
488 }
489 }
490 else if after_eval.starts_with('`') && after_eval.ends_with('`') {
492 let inner_content = &after_eval[1..after_eval.len() - 1];
494 let escaped = inner_content.replace('"', "\\\"");
495 result.push_str(&format!("{}\"{{{{ `{}` }}}}\"\n", before_eval, escaped));
496 }
497 else if !after_eval.is_empty() {
499 let escaped = after_eval.replace('"', "\\\"");
500 result.push_str(&format!("{}\"{{{{ {} }}}}\"\n", before_eval, escaped));
501 }
502 else {
504 let mut block_lines = vec![];
506 let current_line_indent = line.chars().take_while(|c| c.is_whitespace()).count();
507
508 while let Some(&next_line) = lines.peek() {
509 let line_indent = next_line.chars().take_while(|c| c.is_whitespace()).count();
510
511 if next_line.trim().is_empty() {
512 lines.next();
514 continue;
515 } else if line_indent > current_line_indent {
516 let content = next_line.trim().to_string();
518 if !content.is_empty() {
519 block_lines.push(content);
520 }
521 lines.next();
522 } else {
523 break;
524 }
525 }
526
527 let single_line = block_lines.join(" ");
528 let escaped = single_line.replace('"', "\\\"");
529
530 if !escaped.trim().is_empty() {
532 if before_eval.trim().is_empty() {
533 result.push_str(&format!("{}\"{{{{ {} }}}}\"\n", indent, escaped));
534 } else {
535 result.push_str(&format!("{}\"{{{{ {} }}}}\"\n", before_eval, escaped));
536 }
537 } else {
538 result.push_str(&format!("{}\n", line));
540 }
541 }
542 } else {
543 result.push_str(line);
544 result.push('\n');
545 }
546 }
547
548 result.pop();
549 result.to_string()
550}
551
552fn preprocessor_markdown_string_blocks(phlow: &str) -> String {
554 let mut result = String::new();
555 let mut lines = phlow.lines().peekable();
556
557 while let Some(line) = lines.next() {
558 let trimmed_line = line.trim_start();
559
560 if trimmed_line.starts_with('-') {
562 let after_dash = trimmed_line[1..].trim_start();
564 if after_dash.starts_with("```") {
565 let indent = &line[..line.len() - trimmed_line.len()];
566 let mut block_lines: Vec<String> = Vec::new();
567
568 if after_dash == "```" {
569 while let Some(next_line) = lines.next() {
570 if next_line.trim() == "```" {
571 break;
572 }
573 block_lines.push(next_line.trim().to_string());
574 }
575 } else if let Some(end_pos) = after_dash[3..].find("```") {
576 let mut inner = after_dash[3..3 + end_pos].trim().to_string();
577 if let Some(space_idx) = inner.find(' ') {
578 let (first, rest) = inner.split_at(space_idx);
579 if !first.is_empty() {
580 inner = rest.trim_start().to_string();
581 }
582 }
583 block_lines.push(inner);
584 } else {
585 while let Some(next_line) = lines.next() {
587 if next_line.trim() == "```" {
588 break;
589 }
590 block_lines.push(next_line.trim().to_string());
591 }
592 }
593
594 let single_line = block_lines.join(" ");
595 let escaped = single_line.replace('"', "\\\"");
596 result.push_str(&format!("{}- \"{}\"\n", indent, escaped));
597 continue;
598 }
599 } else if let Some(colon_pos) = trimmed_line.find(':') {
600 let key = &trimmed_line[..colon_pos].trim();
602 let value = trimmed_line[colon_pos + 1..].trim();
603 if value.starts_with("```") {
604 let indent = &line[..line.len() - trimmed_line.len()];
605 let mut block_lines: Vec<String> = Vec::new();
606
607 if value == "```" {
608 while let Some(next_line) = lines.next() {
609 if next_line.trim() == "```" {
610 break;
611 }
612 block_lines.push(next_line.trim().to_string());
613 }
614 } else if let Some(end_pos) = value[3..].find("```") {
615 let mut inner = value[3..3 + end_pos].trim().to_string();
616 if let Some(space_idx) = inner.find(' ') {
618 let (first, rest) = inner.split_at(space_idx);
619 if !first.is_empty() {
620 inner = rest.trim_start().to_string();
621 }
622 }
623 block_lines.push(inner);
624 } else {
625 while let Some(next_line) = lines.next() {
627 if next_line.trim() == "```" {
628 break;
629 }
630 block_lines.push(next_line.trim().to_string());
631 }
632 }
633
634 let single_line = block_lines.join(" ");
635 let escaped = single_line.replace('"', "\\\"");
636 result.push_str(&format!("{}{}: \"{}\"\n", indent, key, escaped));
637 continue;
638 }
639 }
640
641 result.push_str(line);
642 result.push('\n');
643 }
644
645 result.pop();
646 result
647}
648
649fn parse_include_args(args_str: &str) -> HashMap<String, String> {
650 let mut args = HashMap::new();
651
652 if args_str.trim().is_empty() {
653 return args;
654 }
655
656 let arg_regex = match Regex::new(r#"(\w+)=(?:'([^']*)'|"([^"]*)"|([^\s]+))"#) {
658 Ok(re) => re,
659 Err(_) => return args,
660 };
661
662 for caps in arg_regex.captures_iter(args_str) {
663 let key = caps[1].to_string();
664 let value = caps
665 .get(2)
666 .or(caps.get(3))
667 .or(caps.get(4))
668 .map(|m| m.as_str().to_string())
669 .unwrap_or_default();
670 args.insert(key, value);
671 }
672
673 args
674}
675
676fn process_args_in_content(content: &str, args: &HashMap<String, String>) -> (String, Vec<String>) {
677 let mut errors = Vec::new();
678 let arg_regex = match Regex::new(r"!arg\s+(\w+)") {
679 Ok(re) => re,
680 Err(_) => return (content.to_string(), errors),
681 };
682
683 let result = arg_regex
684 .replace_all(content, |caps: ®ex::Captures| {
685 let arg_name = &caps[1];
686 match args.get(arg_name) {
687 Some(value) => value.clone(),
688 None => {
689 errors.push(format!("Missing required argument: '{}'", arg_name));
690 format!("<!-- Error: argument '{}' not found -->", arg_name)
691 }
692 }
693 })
694 .to_string();
695
696 (result, errors)
697}
698
699fn process_include_file(path: &Path, args: &HashMap<String, String>) -> Result<String, String> {
700 let original_extension = path
702 .extension()
703 .and_then(|e| e.to_str())
704 .map(|s| s.to_lowercase());
705
706 let path = if original_extension.is_none() {
707 let mut new_path = path.to_path_buf();
708 new_path.set_extension("phlow");
709 new_path
710 } else {
711 path.to_path_buf()
712 };
713
714 let raw = fs::read_to_string(&path).map_err(|e| e.to_string())?;
715
716 let (with_args, arg_errors) = process_args_in_content(&raw, args);
718
719 if !arg_errors.is_empty() {
721 return Err(arg_errors.join("; "));
722 }
723
724 if let Some(ext) = original_extension {
727 if ext != "phlow" {
728 return Ok(with_args);
729 }
730 }
731
732 let parent = path.parent().unwrap_or_else(|| Path::new("."));
733 let (transformed, errors) = preprocessor_directives(&with_args, parent);
736
737 if !errors.is_empty() {
738 return Err(errors.join("; "));
739 }
740
741 Ok(transformed)
742}
743
744fn preprocessor_modules(phlow: &str) -> Result<String, Vec<String>> {
745 let escaped_phlow = escape_yaml_exclamation_values(phlow);
747
748 let parsed: Value = match serde_yaml::from_str(&escaped_phlow) {
750 Ok(val) => val,
751 Err(_) => return Ok(phlow.to_string()), };
753
754 let mut available_modules = std::collections::HashSet::new();
755
756 if let Some(modules) = parsed.get("modules") {
758 if let Some(modules_array) = modules.as_sequence() {
759 for module in modules_array {
760 if let Some(module_map) = module.as_mapping() {
761 if let Some(module_name) = module_map
763 .get("module")
764 .or_else(|| module_map.get("name"))
765 .and_then(|v| v.as_str())
766 {
767 let clean_name = if module_name.starts_with("./modules/") {
769 &module_name[10..] } else if module_name.contains('/') {
771 module_name.split('/').last().unwrap_or(module_name)
773 } else {
774 module_name
775 };
776 available_modules.insert(clean_name.to_string());
777 }
778 }
779 }
780 }
781 }
782
783 if available_modules.is_empty() {
784 return Ok(phlow.to_string()); }
786
787 let mut parsed_mut: Value = match serde_yaml::from_str(&escaped_phlow) {
789 Ok(val) => val,
790 Err(_) => return Ok(phlow.to_string()),
791 };
792
793 let original_parsed = parsed_mut.clone();
795
796 transform_value(
797 &mut parsed_mut,
798 &available_modules,
799 false, );
801
802 if parsed_mut == original_parsed {
804 return Ok(phlow.to_string());
805 }
806
807 match serde_yaml::to_string(&parsed_mut) {
809 Ok(result) => Ok(unescape_yaml_exclamation_values(&result)),
810 Err(_) => Ok(phlow.to_string()),
811 }
812}
813
814const EXCLUSIVE_PROPERTIES: &[&str] = &[
815 "use",
816 "to",
817 "id",
818 "label",
819 "assert",
820 "assert_eq",
821 "condition",
822 "return",
823 "payload",
824 "input",
825 "log",
826 "then",
827 "else",
828 "steps",
829];
830
831fn transform_value(
833 value: &mut Value,
834 available_modules: &std::collections::HashSet<String>,
835 is_in_transformable_context: bool,
836) {
837 match value {
838 Value::Mapping(map) => {
839 let mut transformations: Vec<(Value, Value, Option<(String, Vec<String>)>)> =
841 Vec::new();
842
843 for (key, val) in map.iter() {
844 if let Some(key_str) = key.as_str() {
845 if is_in_transformable_context {
847 if key_str.contains('.') {
849 let parts: Vec<&str> = key_str.split('.').collect();
850 if parts.len() >= 2 {
851 let module_name = parts[0];
852 let action_name = parts[1].to_string();
853 let extra_args: Vec<String> =
854 parts.iter().skip(2).map(|s| s.to_string()).collect();
855
856 if !EXCLUSIVE_PROPERTIES.contains(&module_name)
858 && (available_modules.contains(module_name)
859 || !available_modules.is_empty())
860 {
861 transformations.push((
862 key.clone(),
863 val.clone(),
864 Some((action_name, extra_args)),
865 ));
866 }
867 }
868 } else {
869 if !EXCLUSIVE_PROPERTIES.contains(&key_str)
871 && available_modules.contains(key_str)
872 {
873 transformations.push((key.clone(), val.clone(), None));
874 }
875 }
876 }
877 }
878 }
879
880 for (key, old_val, action_and_args) in transformations {
882 map.remove(&key);
883
884 let mut new_entry = Mapping::new();
885
886 let module_name = if let Some(key_str) = key.as_str() {
888 if key_str.contains('.') {
889 key_str.split('.').next().unwrap_or(key_str)
890 } else {
891 key_str
892 }
893 } else {
894 ""
895 };
896
897 new_entry.insert(
898 Value::String("use".to_string()),
899 Value::String(module_name.to_string()),
900 );
901
902 let input_value = if let Some((action_name, args_vec)) = action_and_args {
904 if let Value::Mapping(old_map) = old_val {
906 let mut new_input = Mapping::new();
907 new_input.insert(
908 Value::String("action".to_string()),
909 Value::String(action_name),
910 );
911
912 if !args_vec.is_empty() {
914 let args_seq = Value::Sequence(
915 args_vec
916 .into_iter()
917 .map(Value::String)
918 .collect::<Vec<Value>>(),
919 );
920 new_input.insert(Value::String("args".to_string()), args_seq);
921 }
922
923 for (old_key, old_value) in old_map.iter() {
925 new_input.insert(old_key.clone(), old_value.clone());
926 }
927
928 Value::Mapping(new_input)
929 } else {
930 let mut new_input = Mapping::new();
932 new_input.insert(
933 Value::String("action".to_string()),
934 Value::String(action_name),
935 );
936
937 if !args_vec.is_empty() {
938 let args_seq = Value::Sequence(
939 args_vec
940 .into_iter()
941 .map(Value::String)
942 .collect::<Vec<Value>>(),
943 );
944 new_input.insert(Value::String("args".to_string()), args_seq);
945 }
946 Value::Mapping(new_input)
947 }
948 } else {
949 old_val
951 };
952
953 new_entry.insert(Value::String("input".to_string()), input_value);
954
955 for (new_key, new_val) in new_entry.iter() {
957 map.insert(new_key.clone(), new_val.clone());
958 }
959 }
960
961 for (key, val) in map.iter_mut() {
963 let key_str = key.as_str().unwrap_or("");
964
965 let next_is_transformable =
967 key_str == "steps" || key_str == "then" || key_str == "else";
968
969 transform_value(val, available_modules, next_is_transformable);
970 }
971 }
972 Value::Sequence(seq) => {
973 for item in seq.iter_mut() {
974 transform_value(item, available_modules, is_in_transformable_context);
975 }
976 }
977 _ => {}
978 }
979}
980
981fn escape_yaml_exclamation_values(yaml: &str) -> String {
983 let regex = match Regex::new(r"((?::\s*|-\s+\w+:\s*))(!\w.*?)\s*$") {
984 Ok(re) => re,
985 Err(_) => return yaml.to_string(),
986 };
987
988 let result = regex
989 .replace_all(yaml, |caps: ®ex::Captures| {
990 let prefix = &caps[1];
991 let exclamation_value = &caps[2];
992 format!(r#"{} "__PHLOW_ESCAPE__{}""#, prefix, exclamation_value)
993 })
994 .to_string();
995
996 result
997}
998
999fn unescape_yaml_exclamation_values(yaml: &str) -> String {
1001 let regex = match Regex::new(r"__PHLOW_ESCAPE__(!\w[^\s]*)") {
1002 Ok(re) => re,
1003 Err(_) => return yaml.to_string(),
1004 };
1005
1006 let result = regex
1007 .replace_all(yaml, |caps: ®ex::Captures| {
1008 let exclamation_value = &caps[1];
1009 exclamation_value.to_string()
1010 })
1011 .to_string();
1012
1013 result
1014}
1015
1016#[cfg(test)]
1017mod tests {
1018 use super::*;
1019
1020 #[test]
1021 fn test_preprocessor_transform_phs_hidden_object_and_arrays() {
1022 let input = r#"
1023 key1: {
1024 "name": "value",
1025 "list": [1, 2, 3]
1026 }
1027 key2: normal_value
1028 "#;
1029
1030 let transformed = processor_transform_phs_hidden_object_and_arrays(input);
1031
1032 assert!(
1033 transformed.contains("key1: !phs ${ { \"name\": \"value\", \"list\": [1, 2, 3] } }")
1034 );
1035 assert!(!transformed.contains("key2: !phs normal_value"));
1036 }
1037
1038 #[test]
1039 fn test_preprocessor_transform_phs_hidden() {
1040 let input = r#"
1041 key1: if condition { do_something() }
1042 key2: "normal string"
1043 - for item in list { process(item) }
1044 "#;
1045
1046 let transformed = preprocessor_transform_phs_hidden(input);
1047 assert!(transformed.contains("key1: !phs if condition { do_something() }"));
1048 assert!(transformed.contains("- !phs for item in list { process(item) }"));
1049 }
1050
1051 #[test]
1052 fn test_preprocessor_eval() {
1053 let input = r#"
1054 key1: !phs if condition { do_something() }
1055 key2: !phs ```
1056 multi_line_code();
1057 another_line();
1058 ```
1059 key3: !phs ${ for item in list { process(item) } }
1060 "#;
1061 let transformed = preprocessor_eval(input);
1062 assert!(transformed.contains("key1: \"{{ if condition { do_something() } }}\""));
1063 assert!(transformed.contains("key2: \"multi_line_code(); another_line();\""));
1065 assert!(transformed.contains("key3: \"{{ for item in list { process(item) } }}\""));
1066 }
1067
1068 #[test]
1069 fn test_preprocessor_modules() {
1070 let input = r#"
1071 modules:
1072 - module: test_module
1073
1074 steps:
1075 - test_module:
1076 param1: value1
1077 param2: value2
1078 - another_step:
1079 action: do_something
1080 - new_module.my_action:
1081 paramA: valueA
1082 "#;
1083
1084 let expected = r#"modules:
1085- module: test_module
1086steps:
1087- use: test_module
1088 input:
1089 param1: value1
1090 param2: value2
1091- another_step:
1092 action: do_something
1093- use: new_module
1094 input:
1095 action: my_action
1096 paramA: valueA
1097"#;
1098
1099 let transformed = preprocessor_modules(input).unwrap();
1100 println!("Transformed:\n{}", transformed);
1101 assert_eq!(transformed, expected);
1102 }
1103
1104 #[test]
1105 fn test_preprocessor_modules_with_action_args() {
1106 let input = r#"
1107 modules:
1108 - module: test_module
1109
1110 steps:
1111 - test_module.my_action.info.data:
1112 param1: value1
1113 "#;
1114
1115 let expected = r#"modules:
1116- module: test_module
1117steps:
1118- use: test_module
1119 input:
1120 action: my_action
1121 args:
1122 - info
1123 - data
1124 param1: value1
1125"#;
1126
1127 let transformed = preprocessor_modules(input).unwrap();
1128 println!("Transformed with args:\n{}", transformed);
1129 assert_eq!(transformed, expected);
1130 }
1131
1132 #[test]
1133 fn test_preprocessor_eval_triple_backtick_blocks_as_string() {
1134 let input_multiline = r#"
1136 key_md_block: !phs ```
1137 first_line();
1138 second_line();
1139 ```
1140 "#;
1141 let transformed_multiline = preprocessor_eval(input_multiline);
1142 assert!(transformed_multiline.contains("key_md_block: \"first_line(); second_line();\""));
1143
1144 let input_inline = r#"
1146 key_inline_block: !phs ```single_line();```
1147 "#;
1148 let transformed_inline = preprocessor_eval(input_inline);
1149 assert!(transformed_inline.contains("key_inline_block: \"single_line();\""));
1150
1151 let input_inline_lang = r#"
1153 key_inline_lang: !phs ```json {"a":1}```
1154 "#;
1155 let transformed_inline_lang = preprocessor_eval(input_inline_lang);
1156 assert!(transformed_inline_lang.contains("key_inline_lang: \"{\\\"a\\\":1}\""));
1157
1158 let input_list = r#"
1160 - !phs ```
1161 a();
1162 b();
1163 ```
1164 "#;
1165 let transformed_list = preprocessor_eval(input_list);
1166 assert!(transformed_list.contains("- \"a(); b();\""));
1167 }
1168
1169 #[test]
1170 fn test_preprocessor_markdown_string_blocks_with_language_labels() {
1171 let input_prop_inline = r#"
1173 prop: ```js doThing();```
1174 "#;
1175 let out_prop_inline = preprocessor_markdown_string_blocks(input_prop_inline);
1176 assert!(out_prop_inline.contains("prop: \"doThing();\""));
1177
1178 let input_prop_multiline = r#"
1180 prompt: ```md
1181 Hello
1182 World
1183 ```
1184 "#;
1185 let out_prop_multi = preprocessor_markdown_string_blocks(input_prop_multiline);
1186 assert!(out_prop_multi.contains("prompt: \"Hello World\""));
1187
1188 let input_list_inline = r#"
1190 - ```json {"x":2}```
1191 "#;
1192 let out_list_inline = preprocessor_markdown_string_blocks(input_list_inline);
1193 println!("out_list_inline=<<<{}>>>", out_list_inline);
1194 assert!(out_list_inline.contains("- \"{\\\"x\\\":2}\""));
1195
1196 let input_list_multi = r#"
1198 - ```sql
1199 select 1;
1200 select 2;
1201 ```
1202 "#;
1203 let out_list_multi = preprocessor_markdown_string_blocks(input_list_multi);
1204 assert!(out_list_multi.contains("- \"select 1; select 2;\""));
1205 }
1206
1207 fn temporary_included_file() -> std::io::Result<()> {
1208 let content = r#"
1209 {
1210 "included_key1": "!arg arg1",
1211 "included_key2": "!arg arg2"
1212 }
1213 "#;
1214
1215 fs::write("included_file.phlow", content)
1216 }
1217
1218 fn remove_temporary_included_file() -> std::io::Result<()> {
1219 fs::remove_file("included_file.phlow")
1220 }
1221
1222 fn temporary_included_inline_file() -> std::io::Result<()> {
1223 let content = r#"{
1224 "a": 1,
1225 "b": 2
1226}"#;
1227 fs::write("included_inline.phlow", content)
1228 }
1229
1230 fn remove_temporary_included_inline_file() -> std::io::Result<()> {
1231 fs::remove_file("included_inline.phlow")
1232 }
1233
1234 fn temporary_included_non_phlow_inline_file() -> std::io::Result<()> {
1235 let content = r#"{
1236 "a": 1,
1237 "b": 2
1238}"#;
1239 fs::write("included_inline.json", content)
1240 }
1241
1242 fn remove_temporary_included_non_phlow_inline_file() -> std::io::Result<()> {
1243 fs::remove_file("included_inline.json")
1244 }
1245
1246 #[test]
1247 fn test_preprocessor() {
1248 temporary_included_file().unwrap();
1250
1251 let input = r#"
1252 !include included_file.phlow arg1='value1' arg2="value2"
1253
1254 key1: if condition { do_something() }
1255 key2: {
1256 "name": "value",
1257 "list": [1, 2, 3]
1258 }
1259 key3: ```
1260 multi_line_code();
1261 another_line();
1262 ```
1263 modules:
1264 - module: test_module
1265
1266 steps:
1267 - test_module:
1268 param1: value1
1269 param2: value2
1270 "#;
1271
1272 let expected: &str = r#"
1273
1274
1275 {
1276
1277 "included_key1": "value1",
1278
1279 "included_key2": "value2"
1280
1281 }
1282
1283
1284
1285 key1: "{{ if condition { do_something() } }}"
1286 key2: "{{ { \"name\": \"value\", \"list\": [1, 2, 3] } }}"
1287 key3: "multi_line_code(); another_line();"
1288 modules:
1289 - module: test_module
1290
1291 steps:
1292 - test_module:
1293 param1: value1
1294 param2: value2
1295 "#;
1296
1297 let processed = preprocessor(
1298 input,
1299 &Path::new(".").to_path_buf(),
1300 false,
1301 crate::settings::PrintOutput::Yaml,
1302 )
1303 .unwrap();
1304 println!("Processed:\n{}", processed);
1305
1306 assert_eq!(processed, expected);
1307
1308 remove_temporary_included_file().unwrap();
1309 }
1310
1311 #[test]
1312 fn test_preprocessor_directives_inline_include_indentation_on_mapping_value() {
1313 temporary_included_inline_file().unwrap();
1314
1315 let input = " key: !include included_inline.phlow";
1316 let (result, errors) = preprocessor_directives(input, Path::new("."));
1317
1318 assert!(errors.is_empty(), "Errors found: {:?}", errors);
1319
1320 let expected = " key: {\n \"a\": 1,\n \"b\": 2\n }";
1326
1327 assert_eq!(
1328 result, expected,
1329 "Inline include indentation mismatch.\nGot:\n<<<{}>>>\nExpected:\n<<<{}>>>",
1330 result, expected
1331 );
1332
1333 remove_temporary_included_inline_file().unwrap();
1334 }
1335
1336 #[test]
1337 fn test_preprocessor_directives_inline_include_indentation_on_mapping_value_non_phlow() {
1338 temporary_included_non_phlow_inline_file().unwrap();
1340
1341 let input = " key: !include included_inline.json";
1342 let (result, errors) = preprocessor_directives(input, Path::new("."));
1343
1344 assert!(errors.is_empty(), "Errors found: {:?}", errors);
1345
1346 let expected = " key: {\n \"a\": 1,\n \"b\": 2\n }";
1352
1353 assert_eq!(
1354 result, expected,
1355 "Inline include indentation mismatch for non-phlow file.\nGot:\n<<<{}>>>\nExpected:\n<<<{}>>>",
1356 result, expected
1357 );
1358
1359 remove_temporary_included_non_phlow_inline_file().unwrap();
1360 }
1361
1362 #[test]
1363 fn test_no_phs() {
1364 let input = r#"modules:
1365 - module: log
1366 - module: fs
1367 - module: openai
1368 with:
1369 api_key: envs.OPENAI_API_KEY
1370 - module: amqp
1371 with:
1372 vhost: "nixyz"
1373 queue_name: "queue.etl.carrefour.raw"
1374 max_concurrency: 1
1375 definition:
1376 vhosts:
1377 - name: "nixyz"
1378 exchanges:
1379 - name: "x.etl"
1380 type: direct
1381 durable: true
1382 vhost: "nixyz"
1383 auto_delete: true
1384 queues:
1385 - name: "queue.etl.carrefour.raw"
1386 vhost: "nixyz"
1387 durable: true
1388 bindings:
1389 - source: "x.extract"
1390 vhost: "nixyz"
1391 destination: "queue.etl.carrefour.raw"
1392 destination_type: queue
1393 routing_key: "mercado.carrefour.com.br.#"
1394 arguments: {}
1395 - module: aws
1396 with:
1397 region: envs.AWS_REGION
1398 endpoint_url: envs.AWS_S3_ENDPOINT_URL
1399 s3_force_path_style: true
1400 secret_access_key: envs.AWS_SECRET_ACCESS_KEY
1401 access_key_id: envs.AWS_ACCESS_KEY_ID
1402
1403main: amqp
1404
1405steps:
1406 - log.info:
1407 message: "Starting processing message"
1408
1409 - payload: main.parse()
1410
1411 - log.info:
1412 message: payload
1413
1414 - id: fetch_s3_object
1415 aws.s3.get_object:
1416 bucket: payload.bucket
1417 key: payload.key
1418
1419 - assert: payload.success != true
1420 then:
1421 - log.error:
1422 message: payload.error
1423 - return: false
1424
1425 - log.info:
1426 message: "Fetched object from S3"
1427
1428 - fs.write:
1429 path: ./input.json
1430 content: steps.fetch_s3_object
1431 force: true
1432
1433 - id: gpt
1434 openai.chat:
1435 model: "gpt-5-nano"
1436 messages:
1437 - role: user
1438 content: "\# Product Extraction Prompt with JSON Schema\n\nAnalyze an input HTML page, extract all relevant product information, and return this data as a single object strictly following the provided JSON schema for a \"Product\" as below.\n\n## JSON Schema for \"Product\"\n\n```json\n{\n \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n \"title\": \"Product\",\n \"type\": \"object\",\n \"additionalProperties\": false,\n \"properties\": {\n \"id\": { \"type\": \"string\" },\n \"sku\": { \"type\": \"string\" },\n \"name\": { \"type\": \"string\" },\n \"description\": { \"type\": \"string\" },\n \"price\": { \"type\": \"number\" },\n \"originalPrice\": { \"type\": \"number\" },\n \"currency\": { \"type\": \"string\" },\n \"stock\": { \"type\": \"integer\" },\n \"availability\": { \"type\": \"string\" },\n \"brand\": { \"type\": \"string\" },\n \"category\": { \"type\": \"string\" },\n \"categories\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } },\n \"tags\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } },\n \"images\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"additionalProperties\": false,\n \"properties\": {\n \"url\": { \"type\": \"string\", \"format\": \"uri\" },\n \"alt\": { \"type\": \"string\" }\n }\n }\n },\n \"videos\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"additionalProperties\": false,\n \"properties\": {\n \"url\": { \"type\": \"string\", \"format\": \"uri\" },\n \"title\": { \"type\": \"string\" }\n }\n }\n },\n \"attributes\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"additionalProperties\": false,\n \"properties\": {\n \"name\": { \"type\": \"string\" },\n \"value\": { \"type\": [\"string\", \"number\", \"boolean\"] }\n }\n }\n },\n \"variants\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"additionalProperties\": false,\n \"properties\": {\n \"id\": { \"type\": \"string\" },\n \"sku\": { \"type\": \"string\" },\n \"name\": { \"type\": \"string\" },\n \"price\": { \"type\": \"number\" },\n \"originalPrice\": { \"type\": \"number\" },\n \"currency\": { \"type\": \"string\" },\n \"stock\": { \"type\": \"integer\" },\n \"attributes\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"additionalProperties\": false,\n \"properties\": {\n \"name\": { \"type\": \"string\" },\n \"value\": { \"type\": [\"string\", \"number\", \"boolean\"] }\n }\n }\n }\n }\n }\n },\n \"rating\": { \"type\": \"number\" },\n \"reviewCount\": { \"type\": \"integer\" },\n \"gtin\": { \"type\": \"string\" },\n \"mpn\": { \"type\": \"string\" },\n \"url\": { \"type\": \"string\", \"format\": \"uri\" },\n \"language\": { \"type\": \"string\" },\n \"currencySymbol\": { \"type\": \"string\" },\n \"breadcrumbs\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } },\n \"createdAt\": { \"type\": \"string\", \"format\": \"date-time\" },\n \"updatedAt\": { \"type\": \"string\", \"format\": \"date-time\" },\n \"metadata\": {\n \"type\": \"object\",\n \"additionalProperties\": {\n \"type\": [\"string\", \"number\", \"boolean\", \"null\"]\n }\n }\n }\n}\n```\n\n## Extraction Instructions\n\nCarefully identify and map as much product information as possible from the HTML content to the respective JSON fields. \nNormalize price, numbers, dates; do not hallucinate; omit missing values; output only one product.\n\n## Reasoning Requirements\n\nBefore outputting JSON, reason step-by-step about: \n- How each field can be detected \n- What normalization is needed \n- What cannot be extracted \n\n## Output Format\n\nReturn only **one JSON object**, no commentary or code block.\n"
1439 - role: user
1440 content: steps.fetch_s3_object.data
1441
1442 - fs.write:
1443 path: ./payload.json
1444 content: payload
1445 force: true
1446
1447 - log.info:
1448 message: steps.gpt.data.choices[0].message.content"#;
1449
1450 let transformed = preprocessor(
1451 input,
1452 &Path::new(".").to_path_buf(),
1453 false,
1454 crate::settings::PrintOutput::Yaml,
1455 )
1456 .unwrap();
1457
1458 assert!(!transformed.contains(r#"#{ \"type\": \"string\" }"#));
1459 }
1460}