phlow_runtime/
preprocessor.rs

1use phlow_sdk::prelude::*;
2use regex::Regex;
3use serde_yaml::{Mapping, Value};
4use std::collections::HashMap;
5use std::fs;
6use std::path::Path;
7use crate::settings::PrintOutput;
8
9pub fn preprocessor(
10    phlow: &str,
11    base_path: &Path,
12    print_phlow: bool,
13    print_output: PrintOutput,
14) -> Result<String, Vec<String>> {
15    let (phlow, errors) = preprocessor_directives(phlow, base_path);
16
17    if !errors.is_empty() {
18        eprintln!("❌ YAML Transformation Errors:");
19        for (i, error) in errors.iter().enumerate() {
20            eprintln!("  {}. {}", i + 1, error);
21        }
22        eprintln!();
23        return Err(errors);
24    }
25
26    // Primeiro, converte blocos ``` em strings para evitar alterações indevidas dentro do bloco
27    let phlow = preprocessor_markdown_string_blocks(&phlow);
28    // Depois, aplica auto-transformações de objetos/arrays e PHS oculto
29    let phlow = processor_transform_phs_hidden_object_and_arrays(&phlow);
30    let phlow = preprocessor_transform_phs_hidden(&phlow);
31    let phlow = preprocessor_eval(&phlow);
32    let phlow = preprocessor_modules(&phlow)?;
33
34    if print_phlow {
35        match print_output {
36            PrintOutput::Yaml => {
37                println!("");
38                println!("# PHLOW TRANSFORMED");
39                println!("#####################################################################");
40                println!("{}", phlow);
41                println!("#####################################################################");
42                println!("");
43            }
44            PrintOutput::Json => {
45                match serde_yaml::from_str::<serde_yaml::Value>(&phlow) {
46                    Ok(value) => match serde_json::to_string_pretty(&value) {
47                        Ok(json) => println!("{}", json),
48                        Err(err) => {
49                            eprintln!("❌ Failed to serialize JSON output: {}", err);
50                        }
51                    },
52                    Err(err) => {
53                        eprintln!("❌ Failed to parse transformed YAML for JSON output: {}", err);
54                    }
55                }
56            }
57        }
58    }
59
60    Ok(phlow)
61}
62
63fn preprocessor_directives(phlow: &str, base_path: &Path) -> (String, Vec<String>) {
64    let mut errors = Vec::new();
65    let include_block_regex = match Regex::new(r"(?m)^(\s*)!include\s+([^\s]+)(.*)") {
66        Ok(re) => re,
67        Err(_) => return (phlow.to_string(), errors),
68    };
69    // Captura o prefixo da linha até o !include para poder aplicar a mesma
70    // tabulação nas linhas subsequentes do conteúdo incluído.
71    // Grupo 1: tudo desde o início da linha até antes de !include
72    // Grupo 2: caminho do arquivo
73    // Grupo 3: argumentos opcionais
74    let include_inline_regex = match Regex::new(r"(?m)^([^\n]*?)!include\s+([^\s]+)(.*)") {
75        Ok(re) => re,
76        Err(_) => return (phlow.to_string(), errors),
77    };
78    let import_inline_regex = match Regex::new(r"!import\s+(\S+)") {
79        Ok(re) => re,
80        Err(_) => return (phlow.to_string(), errors),
81    };
82
83    let with_block_includes = include_block_regex.replace_all(&phlow, |caps: &regex::Captures| {
84        let indent = &caps[1];
85        let rel_path = &caps[2];
86        let args_str = caps.get(3).map(|m| m.as_str()).unwrap_or("").trim();
87        let args = parse_include_args(args_str);
88        let full_path = base_path.join(rel_path);
89        match process_include_file(&full_path, &args) {
90            Ok(json_str) => json_str
91                .lines()
92                .map(|line| format!("{}{}", indent, line))
93                .collect::<Vec<_>>()
94                .join("\n"),
95            Err(e) => {
96                errors.push(format!("Error including file {}: {}", rel_path, e));
97                format!("{}<!-- Error including file: {} -->", indent, rel_path)
98            }
99        }
100    });
101
102    let with_inline_includes =
103        include_inline_regex.replace_all(&with_block_includes, |caps: &regex::Captures| {
104            let prefix = &caps[1];
105            let rel_path = &caps[2];
106            let args_str = caps.get(3).map(|m| m.as_str()).unwrap_or("").trim();
107            let args = parse_include_args(args_str);
108            let full_path = base_path.join(rel_path);
109
110            match process_include_file(&full_path, &args) {
111                Ok(json_str) => {
112                    // Calcula a indentação de continuação com o mesmo comprimento do prefixo
113                    let continuation_indent: String = prefix
114                        .chars()
115                        .map(|ch| if ch.is_whitespace() { ch } else { ' ' })
116                        .collect();
117
118                    let mut lines = json_str.lines();
119                    if let Some(first) = lines.next() {
120                        let mut out = String::new();
121                        out.push_str(prefix);
122                        out.push_str(first);
123                        for line in lines {
124                            out.push('\n');
125                            out.push_str(&continuation_indent);
126                            out.push_str(line);
127                        }
128                        out
129                    } else {
130                        // Conteúdo vazio: mantém apenas o prefixo
131                        prefix.to_string()
132                    }
133                }
134                Err(e) => {
135                    errors.push(format!("Error including file {}: {}", rel_path, e));
136                    format!("{}<!-- Error including file: {} -->", prefix, rel_path)
137                }
138            }
139        });
140
141    let result = import_inline_regex
142        .replace_all(&with_inline_includes, |caps: &regex::Captures| {
143            let rel_path = &caps[1];
144            let full_path = base_path.join(rel_path);
145            let extension = full_path
146                .extension()
147                .and_then(|e| e.to_str())
148                .unwrap_or("")
149                .to_lowercase();
150
151            match fs::read_to_string(&full_path) {
152                Ok(contents) => {
153                    if extension == "phs" {
154                        let one_line = contents
155                            .lines()
156                            .map(str::trim)
157                            .collect::<Vec<_>>()
158                            .join(" ")
159                            .replace('"', "\\\"");
160
161                        // Somente arquivos com extensão .phs devem ser tratados como código PHS.
162                        // Demais extensões devem ser retornadas como string literal.
163
164                        format!(r#""{{{{ {} }}}}""#, one_line)
165                    } else {
166                        let content = contents.to_value().to_json_inline();
167                        format!(r#"{}"#, content)
168                    }
169                }
170                Err(_) => {
171                    errors.push(format!("Error importing file {}: file not found", rel_path));
172                    format!("<!-- Error importing file: {} -->", rel_path)
173                }
174            }
175        })
176        .to_string();
177
178    (result, errors)
179}
180
181// identifica blocos de codigo sem phs que são objetos ou arrays, E envole eles com !phs ${ ... }
182fn processor_transform_phs_hidden_object_and_arrays(phlow: &str) -> String {
183    let mut result = String::new();
184    let mut lines = phlow.lines().peekable();
185
186    while let Some(line) = lines.next() {
187        let trimmed_line = line.trim_start();
188
189        if let Some(colon_pos) = trimmed_line.find(':') {
190            let key = &trimmed_line[..colon_pos].trim();
191            let value = &trimmed_line[colon_pos + 1..].trim();
192            let starts_with_brace = value.starts_with('{');
193            let starts_with_bracket = value.starts_with('[');
194
195            if starts_with_brace || starts_with_bracket {
196                let indent = &line[..line.len() - trimmed_line.len()];
197                let mut block_lines = vec![value.to_string()];
198
199                // Verifica se o bloco fecha na mesma linha
200                if !(starts_with_brace && value.ends_with('}'))
201                    && !(starts_with_bracket && value.ends_with(']'))
202                {
203                    // Continua lendo até encontrar o fechamento
204                    while let Some(next_line) = lines.next() {
205                        block_lines.push(next_line.trim().to_string());
206                        if (starts_with_brace && next_line.trim().ends_with('}'))
207                            || (starts_with_bracket && next_line.trim().ends_with(']'))
208                        {
209                            break;
210                        }
211                    }
212                }
213
214                let single_line = block_lines.join(" ");
215                result.push_str(&format!("{}{}: !phs ${{ {} }}\n", indent, key, single_line));
216                continue;
217            }
218        }
219
220        result.push_str(line);
221        result.push_str("\n");
222    }
223
224    // Remove a última quebra de linha extra se houver
225    if result.ends_with('\n') {
226        result.pop();
227    }
228    result
229}
230
231// Essa função identifica qualquer valore de propriedade que inicie com
232//palavras reservadas do phs ou seja um algoritimo e inclui a tag !phs automaticamente
233fn preprocessor_transform_phs_hidden(phlow: &str) -> String {
234    let operators: Vec<&'static str> = vec![
235        "+", "-", "*", "/", "%", "==", "!=", "<", ">", "<=", ">=", "&&", "||", "??", "?:", "!",
236    ];
237    let mut reserved_keywords = vec![
238        "if", "else", "for", "while", "loop", "match", "let", "const", "fn", "return", "switch",
239        "case", "default", "try", "catch", "throw", "when", "payload", "input", "steps", "main",
240        "setup", "envs", "tests",
241    ];
242    reserved_keywords.extend(&operators);
243
244    let mut result = String::new();
245
246    for line in phlow.lines() {
247        let trimmed_line = line.trim_start();
248
249        if let Some(colon_pos) = trimmed_line.find(':') {
250            let key = &trimmed_line[..colon_pos].trim();
251            let value = &trimmed_line[colon_pos + 1..].trim();
252
253            let first_word = value
254                .trim()
255                .split_whitespace()
256                .next()
257                .unwrap_or("")
258                .split('.')
259                .next()
260                .unwrap_or("");
261
262            if first_word == "!phs" {
263                result.push_str(line);
264                result.push_str("\n");
265                continue;
266            }
267
268            // Não marcar blocos iniciando com ``` como !phs; são strings
269            if (first_word.starts_with("`") && !first_word.starts_with("```"))
270                || first_word.starts_with("${")
271            {
272                let indent = &line[..line.len() - trimmed_line.len()];
273                let content = &format!("{}{}: !phs {}\n", indent, key, value);
274                result.push_str(content);
275                continue;
276            }
277
278            if reserved_keywords.contains(&first_word)
279                || (operators
280                    .iter()
281                    .any(|op| value.contains(&format!(" {} ", op)))
282                    && !value.starts_with('"')
283                    && !value.starts_with('\''))
284            {
285                let indent = &line[..line.len() - trimmed_line.len()];
286                result.push_str(&format!("{}{}: !phs {}\n", indent, key, value));
287                continue;
288            }
289        } else if trimmed_line.starts_with("-") {
290            let after_dash = trimmed_line[1..].trim_start();
291            let first_word = after_dash
292                .split_whitespace()
293                .next()
294                .unwrap_or("")
295                .split('.')
296                .next()
297                .unwrap_or("")
298                .split('[')
299                .next()
300                .unwrap_or("");
301
302            if first_word == "!phs" {
303                result.push_str(line);
304                result.push_str("\n");
305                continue;
306            }
307
308            // Não marcar blocos iniciando com ``` como !phs; são strings
309            if first_word.starts_with("`") && !first_word.starts_with("```") {
310                let indent = &line[..line.len() - trimmed_line.len()];
311                result.push_str(&format!("{}- !phs {}\n", indent, after_dash));
312                continue;
313            }
314
315            if reserved_keywords.contains(&first_word)
316                || (operators
317                    .iter()
318                    .any(|op| after_dash.contains(&format!(" {} ", op)))
319                    && !after_dash.starts_with('"')
320                    && !after_dash.starts_with('\''))
321            {
322                let indent = &line[..line.len() - trimmed_line.len()];
323                result.push_str(&format!("{}- !phs {}\n", indent, after_dash));
324                continue;
325            }
326        }
327
328        result.push_str(line);
329        result.push_str("\n");
330    }
331
332    result.pop();
333    result.to_string()
334}
335
336fn is_inside_double_quotes(s: &str, idx: usize) -> bool {
337    let mut in_quotes = false;
338    let mut escaped = false;
339    for (i, ch) in s.chars().enumerate() {
340        if i >= idx {
341            break;
342        }
343        if escaped {
344            escaped = false;
345            continue;
346        }
347        if ch == '\\' {
348            escaped = true;
349            continue;
350        }
351        if ch == '"' {
352            in_quotes = !in_quotes;
353        }
354    }
355    in_quotes
356}
357
358fn preprocessor_eval(phlow: &str) -> String {
359    let mut result = String::new();
360    let mut lines = phlow.lines().peekable();
361
362    while let Some(line) = lines.next() {
363        // Encontra um !phs fora de aspas na linha
364        let mut search_start = 0usize;
365        let mut found_pos: Option<usize> = None;
366        while let Some(rel) = line[search_start..].find("!phs") {
367            let pos = search_start + rel;
368            if !is_inside_double_quotes(line, pos) {
369                found_pos = Some(pos);
370                break;
371            }
372            search_start = pos + 4; // avança após !phs
373        }
374
375        if let Some(pos) = found_pos {
376            let before_eval = &line[..pos];
377            let after_eval = if line.len() > pos + 4 {
378                line[pos + 4..].trim()
379            } else {
380                ""
381            };
382            let indent = " ".repeat(pos);
383            // Verifica se o bloco é markdown-style ```
384            if after_eval.starts_with("```") {
385                // Bloco markdown-style interpretado como string literal
386                let mut block_lines = vec![];
387
388                if after_eval == "```" {
389                    while let Some(next_line) = lines.next() {
390                        if next_line.trim() == "```" {
391                            break;
392                        }
393                        block_lines.push(next_line.trim().to_string());
394                    }
395                } else if let Some(end_pos) = after_eval[3..].find("```") {
396                    let mut inner_code = after_eval[3..3 + end_pos].trim().to_string();
397                    // Remove rótulo de linguagem se existir no inline
398                    if let Some(space_idx) = inner_code.find(' ') {
399                        let (first, rest) = inner_code.split_at(space_idx);
400                        if !first.is_empty() {
401                            inner_code = rest.trim_start().to_string();
402                        }
403                    }
404                    block_lines.push(inner_code);
405                }
406
407                let single_line = block_lines.join(" ");
408                let escaped = single_line.replace('"', "\\\"");
409
410                // Saída como string simples, sem "{{ }}"
411                if before_eval.trim().is_empty() {
412                    result.push_str(&format!("{}\"{}\"\n", indent, escaped));
413                } else {
414                    result.push_str(&format!("{}\"{}\"\n", before_eval, escaped));
415                }
416            }
417            // Verifica se o bloco é delimitado por ${}
418            else if after_eval.starts_with("${") {
419                // Bloco de código delimitado por ${}
420                let mut block_content = String::new();
421                let mut brace_count = 0;
422                let mut dollar_brace_started = false;
423
424                // Primeiro, verifica se há conteúdo na mesma linha
425                let mut chars = after_eval.chars().peekable();
426                while let Some(ch) = chars.next() {
427                    block_content.push(ch);
428
429                    if ch == '$' && chars.peek() == Some(&'{') {
430                        // Consome o '{'
431                        if let Some(next_ch) = chars.next() {
432                            block_content.push(next_ch);
433                            brace_count += 1;
434                            dollar_brace_started = true;
435                        }
436                    } else if ch == '{' && dollar_brace_started {
437                        brace_count += 1;
438                    } else if ch == '}' && dollar_brace_started {
439                        brace_count -= 1;
440                        if brace_count == 0 {
441                            break;
442                        }
443                    }
444                }
445
446                // Se não fechou na mesma linha, continue lendo
447                while brace_count > 0 {
448                    if let Some(next_line) = lines.next() {
449                        for ch in next_line.chars() {
450                            block_content.push(ch);
451                            if ch == '{' {
452                                brace_count += 1;
453                            } else if ch == '}' {
454                                brace_count -= 1;
455                                if brace_count == 0 {
456                                    break;
457                                }
458                            }
459                        }
460                    } else {
461                        break;
462                    }
463                }
464
465                // Remove as chaves externas ${} e processa o conteúdo
466                let inner_content =
467                    if block_content.starts_with("${") && block_content.ends_with('}') {
468                        &block_content[2..block_content.len() - 1]
469                    } else {
470                        &block_content
471                    };
472
473                // Unifica em uma linha, removendo quebras de linha desnecessárias
474                let single_line = inner_content
475                    .lines()
476                    .map(|line| line.trim())
477                    .filter(|line| !line.is_empty())
478                    .collect::<Vec<_>>()
479                    .join(" ");
480
481                // Escape apenas aspas duplas
482                let escaped = single_line.replace('"', "\\\"");
483
484                if before_eval.trim().is_empty() {
485                    result.push_str(&format!("{}\"{{{{ {} }}}}\"\n", indent, escaped));
486                } else {
487                    result.push_str(&format!("{}\"{{{{ {} }}}}\"\n", before_eval, escaped));
488                }
489            }
490            // Verifica se o bloco é uma template string com crases
491            else if after_eval.starts_with('`') && after_eval.ends_with('`') {
492                // Template string com crases - converte para sintaxe de template string
493                let inner_content = &after_eval[1..after_eval.len() - 1];
494                let escaped = inner_content.replace('"', "\\\"");
495                result.push_str(&format!("{}\"{{{{ `{}` }}}}\"\n", before_eval, escaped));
496            }
497            // Verifica se o bloco é uma única linha
498            else if !after_eval.is_empty() {
499                let escaped = after_eval.replace('"', "\\\"");
500                result.push_str(&format!("{}\"{{{{ {} }}}}\"\n", before_eval, escaped));
501            }
502            // Caso contrário, processa como um bloco indentado
503            else {
504                // Bloco indentado
505                let mut block_lines = vec![];
506                let current_line_indent = line.chars().take_while(|c| c.is_whitespace()).count();
507
508                while let Some(&next_line) = lines.peek() {
509                    let line_indent = next_line.chars().take_while(|c| c.is_whitespace()).count();
510
511                    if next_line.trim().is_empty() {
512                        // Pula linhas vazias sem adicionar conteúdo
513                        lines.next();
514                        continue;
515                    } else if line_indent > current_line_indent {
516                        // Esta linha é mais indentada que a linha do !phs, então faz parte do bloco
517                        let content = next_line.trim().to_string();
518                        if !content.is_empty() {
519                            block_lines.push(content);
520                        }
521                        lines.next();
522                    } else {
523                        break;
524                    }
525                }
526
527                let single_line = block_lines.join(" ");
528                let escaped = single_line.replace('"', "\\\"");
529
530                // Só adiciona se houver conteúdo válido
531                if !escaped.trim().is_empty() {
532                    if before_eval.trim().is_empty() {
533                        result.push_str(&format!("{}\"{{{{ {} }}}}\"\n", indent, escaped));
534                    } else {
535                        result.push_str(&format!("{}\"{{{{ {} }}}}\"\n", before_eval, escaped));
536                    }
537                } else {
538                    // Se não há conteúdo válido, apenas preserva a linha original sem processamento
539                    result.push_str(&format!("{}\n", line));
540                }
541            }
542        } else {
543            result.push_str(line);
544            result.push('\n');
545        }
546    }
547
548    result.pop();
549    result.to_string()
550}
551
552// Converte blocos iniciados com ``` em valores de string, sem necessidade de !phs
553fn preprocessor_markdown_string_blocks(phlow: &str) -> String {
554    let mut result = String::new();
555    let mut lines = phlow.lines().peekable();
556
557    while let Some(line) = lines.next() {
558        let trimmed_line = line.trim_start();
559
560        // Caso: item de lista - ``` ... ``` (processar antes de propriedade para evitar confusão com ':')
561        if trimmed_line.starts_with('-') {
562            // Caso: item de lista - ``` ... ```
563            let after_dash = trimmed_line[1..].trim_start();
564            if after_dash.starts_with("```") {
565                let indent = &line[..line.len() - trimmed_line.len()];
566                let mut block_lines: Vec<String> = Vec::new();
567
568                if after_dash == "```" {
569                    while let Some(next_line) = lines.next() {
570                        if next_line.trim() == "```" {
571                            break;
572                        }
573                        block_lines.push(next_line.trim().to_string());
574                    }
575                } else if let Some(end_pos) = after_dash[3..].find("```") {
576                    let mut inner = after_dash[3..3 + end_pos].trim().to_string();
577                    if let Some(space_idx) = inner.find(' ') {
578                        let (first, rest) = inner.split_at(space_idx);
579                        if !first.is_empty() {
580                            inner = rest.trim_start().to_string();
581                        }
582                    }
583                    block_lines.push(inner);
584                } else {
585                    // Sem fechamento na mesma linha: continua lendo até ```
586                    while let Some(next_line) = lines.next() {
587                        if next_line.trim() == "```" {
588                            break;
589                        }
590                        block_lines.push(next_line.trim().to_string());
591                    }
592                }
593
594                let single_line = block_lines.join(" ");
595                let escaped = single_line.replace('"', "\\\"");
596                result.push_str(&format!("{}- \"{}\"\n", indent, escaped));
597                continue;
598            }
599        } else if let Some(colon_pos) = trimmed_line.find(':') {
600            // Caso: propriedade key: ``` ... ```
601            let key = &trimmed_line[..colon_pos].trim();
602            let value = trimmed_line[colon_pos + 1..].trim();
603            if value.starts_with("```") {
604                let indent = &line[..line.len() - trimmed_line.len()];
605                let mut block_lines: Vec<String> = Vec::new();
606
607                if value == "```" {
608                    while let Some(next_line) = lines.next() {
609                        if next_line.trim() == "```" {
610                            break;
611                        }
612                        block_lines.push(next_line.trim().to_string());
613                    }
614                } else if let Some(end_pos) = value[3..].find("```") {
615                    let mut inner = value[3..3 + end_pos].trim().to_string();
616                    // Se houver rótulo de linguagem no início, remove-o
617                    if let Some(space_idx) = inner.find(' ') {
618                        let (first, rest) = inner.split_at(space_idx);
619                        if !first.is_empty() {
620                            inner = rest.trim_start().to_string();
621                        }
622                    }
623                    block_lines.push(inner);
624                } else {
625                    // Sem fechamento na mesma linha: continua lendo até ```
626                    while let Some(next_line) = lines.next() {
627                        if next_line.trim() == "```" {
628                            break;
629                        }
630                        block_lines.push(next_line.trim().to_string());
631                    }
632                }
633
634                let single_line = block_lines.join(" ");
635                let escaped = single_line.replace('"', "\\\"");
636                result.push_str(&format!("{}{}: \"{}\"\n", indent, key, escaped));
637                continue;
638            }
639        }
640
641        result.push_str(line);
642        result.push('\n');
643    }
644
645    result.pop();
646    result
647}
648
649fn parse_include_args(args_str: &str) -> HashMap<String, String> {
650    let mut args = HashMap::new();
651
652    if args_str.trim().is_empty() {
653        return args;
654    }
655
656    // Parse arguments in format: key=value key2='value with spaces' key3="quoted value"
657    let arg_regex = match Regex::new(r#"(\w+)=(?:'([^']*)'|"([^"]*)"|([^\s]+))"#) {
658        Ok(re) => re,
659        Err(_) => return args,
660    };
661
662    for caps in arg_regex.captures_iter(args_str) {
663        let key = caps[1].to_string();
664        let value = caps
665            .get(2)
666            .or(caps.get(3))
667            .or(caps.get(4))
668            .map(|m| m.as_str().to_string())
669            .unwrap_or_default();
670        args.insert(key, value);
671    }
672
673    args
674}
675
676fn process_args_in_content(content: &str, args: &HashMap<String, String>) -> (String, Vec<String>) {
677    let mut errors = Vec::new();
678    let arg_regex = match Regex::new(r"!arg\s+(\w+)") {
679        Ok(re) => re,
680        Err(_) => return (content.to_string(), errors),
681    };
682
683    let result = arg_regex
684        .replace_all(content, |caps: &regex::Captures| {
685            let arg_name = &caps[1];
686            match args.get(arg_name) {
687                Some(value) => value.clone(),
688                None => {
689                    errors.push(format!("Missing required argument: '{}'", arg_name));
690                    format!("<!-- Error: argument '{}' not found -->", arg_name)
691                }
692            }
693        })
694        .to_string();
695
696    (result, errors)
697}
698
699fn process_include_file(path: &Path, args: &HashMap<String, String>) -> Result<String, String> {
700    // Preserve original extension info: if no extension provided, default to .phlow
701    let original_extension = path
702        .extension()
703        .and_then(|e| e.to_str())
704        .map(|s| s.to_lowercase());
705
706    let path = if original_extension.is_none() {
707        let mut new_path = path.to_path_buf();
708        new_path.set_extension("phlow");
709        new_path
710    } else {
711        path.to_path_buf()
712    };
713
714    let raw = fs::read_to_string(&path).map_err(|e| e.to_string())?;
715
716    // First, process !arg directives with the provided arguments
717    let (with_args, arg_errors) = process_args_in_content(&raw, args);
718
719    // If there are argument errors, return them immediately
720    if !arg_errors.is_empty() {
721        return Err(arg_errors.join("; "));
722    }
723
724    // If the included file had an explicit extension and it's NOT .phlow,
725    // do not run the preprocessor: include its content directly (respecting indentation)
726    if let Some(ext) = original_extension {
727        if ext != "phlow" {
728            return Ok(with_args);
729        }
730    }
731
732    let parent = path.parent().unwrap_or_else(|| Path::new("."));
733    // Para arquivos incluídos (com extensão .phlow), só aplicamos preprocessor_directives para processar outros !include
734    // mas não aplicamos modules, auto_phs e eval pois isso será feito no arquivo principal
735    let (transformed, errors) = preprocessor_directives(&with_args, parent);
736
737    if !errors.is_empty() {
738        return Err(errors.join("; "));
739    }
740
741    Ok(transformed)
742}
743
744fn preprocessor_modules(phlow: &str) -> Result<String, Vec<String>> {
745    // Pre-processa o YAML para escapar valores que começam com ! para evitar problemas de parsing
746    let escaped_phlow = escape_yaml_exclamation_values(phlow);
747
748    // Parse o YAML para extrair módulos disponíveis
749    let parsed: Value = match serde_yaml::from_str(&escaped_phlow) {
750        Ok(val) => val,
751        Err(_) => return Ok(phlow.to_string()), // Se não conseguir parsear, retorna o original
752    };
753
754    let mut available_modules = std::collections::HashSet::new();
755
756    // Extrai módulos da seção "modules"
757    if let Some(modules) = parsed.get("modules") {
758        if let Some(modules_array) = modules.as_sequence() {
759            for module in modules_array {
760                if let Some(module_map) = module.as_mapping() {
761                    // Verifica se existe "module" ou "name"
762                    if let Some(module_name) = module_map
763                        .get("module")
764                        .or_else(|| module_map.get("name"))
765                        .and_then(|v| v.as_str())
766                    {
767                        // Extrai o nome do módulo de paths locais (./modules/cognito -> cognito)
768                        let clean_name = if module_name.starts_with("./modules/") {
769                            &module_name[10..] // Remove "./modules/"
770                        } else if module_name.contains('/') {
771                            // Para outros paths, pega apenas o último segmento
772                            module_name.split('/').last().unwrap_or(module_name)
773                        } else {
774                            module_name
775                        };
776                        available_modules.insert(clean_name.to_string());
777                    }
778                }
779            }
780        }
781    }
782
783    if available_modules.is_empty() {
784        return Ok(phlow.to_string()); // Sem módulos para transformar
785    }
786
787    // Parse novamente para modificar, usando o YAML escapado
788    let mut parsed_mut: Value = match serde_yaml::from_str(&escaped_phlow) {
789        Ok(val) => val,
790        Err(_) => return Ok(phlow.to_string()),
791    };
792
793    // Mantém uma cópia para detectar se houve alterações
794    let original_parsed = parsed_mut.clone();
795
796    transform_value(
797        &mut parsed_mut,
798        &available_modules,
799        false, // Começa como false, só será true dentro de steps, then ou else
800    );
801
802    // Se nada mudou, preserva o YAML original (mantendo comentários e formatação)
803    if parsed_mut == original_parsed {
804        return Ok(phlow.to_string());
805    }
806
807    // Converte de volta para YAML e desfaz o escape
808    match serde_yaml::to_string(&parsed_mut) {
809        Ok(result) => Ok(unescape_yaml_exclamation_values(&result)),
810        Err(_) => Ok(phlow.to_string()),
811    }
812}
813
814const EXCLUSIVE_PROPERTIES: &[&str] = &[
815    "use",
816    "to",
817    "id",
818    "label",
819    "assert",
820    "assert_eq",
821    "condition",
822    "return",
823    "payload",
824    "input",
825    "log",
826    "then",
827    "else",
828    "steps",
829];
830
831// Função recursiva para transformar o YAML
832fn transform_value(
833    value: &mut Value,
834    available_modules: &std::collections::HashSet<String>,
835    is_in_transformable_context: bool,
836) {
837    match value {
838        Value::Mapping(map) => {
839            // Collect pending transformations: (original key, original value, optional (action, args))
840            let mut transformations: Vec<(Value, Value, Option<(String, Vec<String>)>)> =
841                Vec::new();
842
843            for (key, val) in map.iter() {
844                if let Some(key_str) = key.as_str() {
845                    // Só transforma se estiver em um contexto transformável (raiz de steps, then ou else)
846                    if is_in_transformable_context {
847                        // Verifica se a chave contém um ponto (module.action)
848                        if key_str.contains('.') {
849                            let parts: Vec<&str> = key_str.split('.').collect();
850                            if parts.len() >= 2 {
851                                let module_name = parts[0];
852                                let action_name = parts[1].to_string();
853                                let extra_args: Vec<String> =
854                                    parts.iter().skip(2).map(|s| s.to_string()).collect();
855
856                                // Verifica se não é uma propriedade exclusiva e se o módulo está disponível
857                                if !EXCLUSIVE_PROPERTIES.contains(&module_name)
858                                    && (available_modules.contains(module_name)
859                                        || !available_modules.is_empty())
860                                {
861                                    transformations.push((
862                                        key.clone(),
863                                        val.clone(),
864                                        Some((action_name, extra_args)),
865                                    ));
866                                }
867                            }
868                        } else {
869                            // Se não é uma propriedade exclusiva e é um módulo disponível
870                            if !EXCLUSIVE_PROPERTIES.contains(&key_str)
871                                && available_modules.contains(key_str)
872                            {
873                                transformations.push((key.clone(), val.clone(), None));
874                            }
875                        }
876                    }
877                }
878            }
879
880            // Aplica as transformações
881            for (key, old_val, action_and_args) in transformations {
882                map.remove(&key);
883
884                let mut new_entry = Mapping::new();
885
886                // Extrai o nome do módulo (remove a ação se houver)
887                let module_name = if let Some(key_str) = key.as_str() {
888                    if key_str.contains('.') {
889                        key_str.split('.').next().unwrap_or(key_str)
890                    } else {
891                        key_str
892                    }
893                } else {
894                    ""
895                };
896
897                new_entry.insert(
898                    Value::String("use".to_string()),
899                    Value::String(module_name.to_string()),
900                );
901
902                // Cria o input com a ação como primeiro parâmetro, se houver
903                let input_value = if let Some((action_name, args_vec)) = action_and_args {
904                    // Se há uma ação, cria um novo mapeamento com action como primeiro item
905                    if let Value::Mapping(old_map) = old_val {
906                        let mut new_input = Mapping::new();
907                        new_input.insert(
908                            Value::String("action".to_string()),
909                            Value::String(action_name),
910                        );
911
912                        // Adiciona os argumentos extras, se houver
913                        if !args_vec.is_empty() {
914                            let args_seq = Value::Sequence(
915                                args_vec
916                                    .into_iter()
917                                    .map(Value::String)
918                                    .collect::<Vec<Value>>(),
919                            );
920                            new_input.insert(Value::String("args".to_string()), args_seq);
921                        }
922
923                        // Adiciona os outros parâmetros depois da ação
924                        for (old_key, old_value) in old_map.iter() {
925                            new_input.insert(old_key.clone(), old_value.clone());
926                        }
927
928                        Value::Mapping(new_input)
929                    } else {
930                        // Se old_val não é um mapeamento, cria um novo com apenas a ação
931                        let mut new_input = Mapping::new();
932                        new_input.insert(
933                            Value::String("action".to_string()),
934                            Value::String(action_name),
935                        );
936
937                        if !args_vec.is_empty() {
938                            let args_seq = Value::Sequence(
939                                args_vec
940                                    .into_iter()
941                                    .map(Value::String)
942                                    .collect::<Vec<Value>>(),
943                            );
944                            new_input.insert(Value::String("args".to_string()), args_seq);
945                        }
946                        Value::Mapping(new_input)
947                    }
948                } else {
949                    // Se não há ação, usa o valor original
950                    old_val
951                };
952
953                new_entry.insert(Value::String("input".to_string()), input_value);
954
955                // Adiciona a nova entrada transformada
956                for (new_key, new_val) in new_entry.iter() {
957                    map.insert(new_key.clone(), new_val.clone());
958                }
959            }
960
961            // Continua a transformação recursivamente
962            for (key, val) in map.iter_mut() {
963                let key_str = key.as_str().unwrap_or("");
964
965                // Determina se o próximo nível será transformável
966                let next_is_transformable =
967                    key_str == "steps" || key_str == "then" || key_str == "else";
968
969                transform_value(val, available_modules, next_is_transformable);
970            }
971        }
972        Value::Sequence(seq) => {
973            for item in seq.iter_mut() {
974                transform_value(item, available_modules, is_in_transformable_context);
975            }
976        }
977        _ => {}
978    }
979}
980
981// Função para escapar valores que começam com ! para evitar interpretação como tags YAML
982fn escape_yaml_exclamation_values(yaml: &str) -> String {
983    let regex = match Regex::new(r"((?::\s*|-\s+\w+:\s*))(!\w.*?)\s*$") {
984        Ok(re) => re,
985        Err(_) => return yaml.to_string(),
986    };
987
988    let result = regex
989        .replace_all(yaml, |caps: &regex::Captures| {
990            let prefix = &caps[1];
991            let exclamation_value = &caps[2];
992            format!(r#"{} "__PHLOW_ESCAPE__{}""#, prefix, exclamation_value)
993        })
994        .to_string();
995
996    result
997}
998
999// Função para desfazer o escape dos valores com !
1000fn unescape_yaml_exclamation_values(yaml: &str) -> String {
1001    let regex = match Regex::new(r"__PHLOW_ESCAPE__(!\w[^\s]*)") {
1002        Ok(re) => re,
1003        Err(_) => return yaml.to_string(),
1004    };
1005
1006    let result = regex
1007        .replace_all(yaml, |caps: &regex::Captures| {
1008            let exclamation_value = &caps[1];
1009            exclamation_value.to_string()
1010        })
1011        .to_string();
1012
1013    result
1014}
1015
1016#[cfg(test)]
1017mod tests {
1018    use super::*;
1019
1020    #[test]
1021    fn test_preprocessor_transform_phs_hidden_object_and_arrays() {
1022        let input = r#"
1023        key1: {
1024            "name": "value",
1025            "list": [1, 2, 3]
1026        }
1027        key2: normal_value
1028        "#;
1029
1030        let transformed = processor_transform_phs_hidden_object_and_arrays(input);
1031
1032        assert!(
1033            transformed.contains("key1: !phs ${ { \"name\": \"value\", \"list\": [1, 2, 3] } }")
1034        );
1035        assert!(!transformed.contains("key2: !phs normal_value"));
1036    }
1037
1038    #[test]
1039    fn test_preprocessor_transform_phs_hidden() {
1040        let input = r#"
1041        key1: if condition { do_something() }
1042        key2: "normal string"
1043        - for item in list { process(item) }
1044        "#;
1045
1046        let transformed = preprocessor_transform_phs_hidden(input);
1047        assert!(transformed.contains("key1: !phs if condition { do_something() }"));
1048        assert!(transformed.contains("- !phs for item in list { process(item) }"));
1049    }
1050
1051    #[test]
1052    fn test_preprocessor_eval() {
1053        let input = r#"
1054        key1: !phs if condition { do_something() }
1055        key2: !phs ```
1056        multi_line_code();
1057        another_line();
1058        ```
1059        key3: !phs ${ for item in list { process(item) } }
1060        "#;
1061        let transformed = preprocessor_eval(input);
1062        assert!(transformed.contains("key1: \"{{ if condition { do_something() } }}\""));
1063        // Blocos com ``` devem ser tratados como strings simples
1064        assert!(transformed.contains("key2: \"multi_line_code(); another_line();\""));
1065        assert!(transformed.contains("key3: \"{{ for item in list { process(item) } }}\""));
1066    }
1067
1068    #[test]
1069    fn test_preprocessor_modules() {
1070        let input = r#"
1071        modules:
1072          - module: test_module
1073
1074        steps:
1075          - test_module:
1076              param1: value1
1077              param2: value2
1078          - another_step:
1079              action: do_something
1080          - new_module.my_action:
1081              paramA: valueA
1082        "#;
1083
1084        let expected = r#"modules:
1085- module: test_module
1086steps:
1087- use: test_module
1088  input:
1089    param1: value1
1090    param2: value2
1091- another_step:
1092    action: do_something
1093- use: new_module
1094  input:
1095    action: my_action
1096    paramA: valueA
1097"#;
1098
1099        let transformed = preprocessor_modules(input).unwrap();
1100        println!("Transformed:\n{}", transformed);
1101        assert_eq!(transformed, expected);
1102    }
1103
1104    #[test]
1105    fn test_preprocessor_modules_with_action_args() {
1106        let input = r#"
1107        modules:
1108          - module: test_module
1109
1110        steps:
1111          - test_module.my_action.info.data:
1112              param1: value1
1113        "#;
1114
1115        let expected = r#"modules:
1116- module: test_module
1117steps:
1118- use: test_module
1119  input:
1120    action: my_action
1121    args:
1122    - info
1123    - data
1124    param1: value1
1125"#;
1126
1127        let transformed = preprocessor_modules(input).unwrap();
1128        println!("Transformed with args:\n{}", transformed);
1129        assert_eq!(transformed, expected);
1130    }
1131
1132    #[test]
1133    fn test_preprocessor_eval_triple_backtick_blocks_as_string() {
1134        // Multilinha com abertura/fechamento em linhas separadas
1135        let input_multiline = r#"
1136        key_md_block: !phs ```
1137        first_line();
1138        second_line();
1139        ```
1140        "#;
1141        let transformed_multiline = preprocessor_eval(input_multiline);
1142        assert!(transformed_multiline.contains("key_md_block: \"first_line(); second_line();\""));
1143
1144        // Inline na mesma linha
1145        let input_inline = r#"
1146        key_inline_block: !phs ```single_line();```
1147        "#;
1148        let transformed_inline = preprocessor_eval(input_inline);
1149        assert!(transformed_inline.contains("key_inline_block: \"single_line();\""));
1150
1151        // Inline com linguagem
1152        let input_inline_lang = r#"
1153        key_inline_lang: !phs ```json {"a":1}```
1154        "#;
1155        let transformed_inline_lang = preprocessor_eval(input_inline_lang);
1156        assert!(transformed_inline_lang.contains("key_inline_lang: \"{\\\"a\\\":1}\""));
1157
1158        // Item de lista com bloco markdown
1159        let input_list = r#"
1160        - !phs ```
1161        a();
1162        b();
1163        ```
1164        "#;
1165        let transformed_list = preprocessor_eval(input_list);
1166        assert!(transformed_list.contains("- \"a(); b();\""));
1167    }
1168
1169    #[test]
1170    fn test_preprocessor_markdown_string_blocks_with_language_labels() {
1171        // Propriedade com linguagem e conteúdo inline
1172        let input_prop_inline = r#"
1173        prop: ```js doThing();```
1174        "#;
1175        let out_prop_inline = preprocessor_markdown_string_blocks(input_prop_inline);
1176        assert!(out_prop_inline.contains("prop: \"doThing();\""));
1177
1178        // Propriedade multilinha com linguagem
1179        let input_prop_multiline = r#"
1180        prompt: ```md
1181        Hello
1182        World
1183        ```
1184        "#;
1185        let out_prop_multi = preprocessor_markdown_string_blocks(input_prop_multiline);
1186        assert!(out_prop_multi.contains("prompt: \"Hello World\""));
1187
1188        // Item de lista inline com linguagem
1189        let input_list_inline = r#"
1190        - ```json {"x":2}```
1191        "#;
1192        let out_list_inline = preprocessor_markdown_string_blocks(input_list_inline);
1193        println!("out_list_inline=<<<{}>>>", out_list_inline);
1194        assert!(out_list_inline.contains("- \"{\\\"x\\\":2}\""));
1195
1196        // Item de lista multilinha com linguagem
1197        let input_list_multi = r#"
1198        - ```sql
1199        select 1;
1200        select 2;
1201        ```
1202        "#;
1203        let out_list_multi = preprocessor_markdown_string_blocks(input_list_multi);
1204        assert!(out_list_multi.contains("- \"select 1; select 2;\""));
1205    }
1206
1207    fn temporary_included_file() -> std::io::Result<()> {
1208        let content = r#"
1209        {
1210            "included_key1": "!arg arg1",
1211            "included_key2": "!arg arg2"
1212        }
1213        "#;
1214
1215        fs::write("included_file.phlow", content)
1216    }
1217
1218    fn remove_temporary_included_file() -> std::io::Result<()> {
1219        fs::remove_file("included_file.phlow")
1220    }
1221
1222    fn temporary_included_inline_file() -> std::io::Result<()> {
1223        let content = r#"{
1224    "a": 1,
1225    "b": 2
1226}"#;
1227        fs::write("included_inline.phlow", content)
1228    }
1229
1230    fn remove_temporary_included_inline_file() -> std::io::Result<()> {
1231        fs::remove_file("included_inline.phlow")
1232    }
1233
1234    fn temporary_included_non_phlow_inline_file() -> std::io::Result<()> {
1235        let content = r#"{
1236    "a": 1,
1237    "b": 2
1238}"#;
1239        fs::write("included_inline.json", content)
1240    }
1241
1242    fn remove_temporary_included_non_phlow_inline_file() -> std::io::Result<()> {
1243        fs::remove_file("included_inline.json")
1244    }
1245
1246    #[test]
1247    fn test_preprocessor() {
1248        // create temporay included_file.phlow
1249        temporary_included_file().unwrap();
1250
1251        let input = r#"
1252        !include included_file.phlow arg1='value1' arg2="value2"
1253
1254        key1: if condition { do_something() }
1255        key2: {
1256            "name": "value",
1257            "list": [1, 2, 3]
1258        }
1259        key3: ```
1260        multi_line_code();
1261        another_line();
1262        ```
1263        modules:
1264          - module: test_module
1265
1266        steps:
1267          - test_module:
1268              param1: value1
1269              param2: value2
1270        "#;
1271
1272        let expected: &str = r#"
1273        
1274
1275                {
1276
1277                    "included_key1": "value1",
1278
1279                    "included_key2": "value2"
1280
1281                }
1282
1283                
1284
1285        key1: "{{ if condition { do_something() } }}"
1286        key2: "{{ { \"name\": \"value\", \"list\": [1, 2, 3] } }}"
1287        key3: "multi_line_code(); another_line();"
1288        modules:
1289          - module: test_module
1290
1291        steps:
1292          - test_module:
1293              param1: value1
1294              param2: value2
1295        "#;
1296
1297        let processed = preprocessor(
1298            input,
1299            &Path::new(".").to_path_buf(),
1300            false,
1301            crate::settings::PrintOutput::Yaml,
1302        )
1303        .unwrap();
1304        println!("Processed:\n{}", processed);
1305
1306        assert_eq!(processed, expected);
1307
1308        remove_temporary_included_file().unwrap();
1309    }
1310
1311    #[test]
1312    fn test_preprocessor_directives_inline_include_indentation_on_mapping_value() {
1313        temporary_included_inline_file().unwrap();
1314
1315        let input = "  key: !include included_inline.phlow";
1316        let (result, errors) = preprocessor_directives(input, Path::new("."));
1317
1318        assert!(errors.is_empty(), "Errors found: {:?}", errors);
1319
1320        // Espera-se que as quebras de linha do conteúdo incluído recebam a mesma
1321        // tabulação do prefixo até o !include (no caso, 2 espaços + 'key: ' = 7 espaços)
1322        // Observação: o conteúdo incluído possui indentação própria (4 espaços). Como
1323        // preservamos a tabulação do include E a indentação interna do conteúdo,
1324        // as linhas internas ficam com (tabulação_do_prefixo + indentação_do_conteúdo) = 7 + 4 = 11 espaços.
1325        let expected = "  key: {\n           \"a\": 1,\n           \"b\": 2\n       }";
1326
1327        assert_eq!(
1328            result, expected,
1329            "Inline include indentation mismatch.\nGot:\n<<<{}>>>\nExpected:\n<<<{}>>>",
1330            result, expected
1331        );
1332
1333        remove_temporary_included_inline_file().unwrap();
1334    }
1335
1336    #[test]
1337    fn test_preprocessor_directives_inline_include_indentation_on_mapping_value_non_phlow() {
1338        // cria o arquivo temporário included_inline.json
1339        temporary_included_non_phlow_inline_file().unwrap();
1340
1341        let input = "  key: !include included_inline.json";
1342        let (result, errors) = preprocessor_directives(input, Path::new("."));
1343
1344        assert!(errors.is_empty(), "Errors found: {:?}", errors);
1345
1346        // Espera-se que as quebras de linha do conteúdo incluído recebam a mesma
1347        // tabulação do prefixo até o !include (no caso, 2 espaços + 'key: ' = 7 espaços)
1348        // Observação: o conteúdo incluído possui indentação própria (4 espaços). Como
1349        // preservamos a tabulação do include E a indentação interna do conteúdo,
1350        // as linhas internas ficam com (tabulação_do_prefixo + indentação_do_conteúdo) = 7 + 4 = 11 espaços.
1351        let expected = "  key: {\n           \"a\": 1,\n           \"b\": 2\n       }";
1352
1353        assert_eq!(
1354            result, expected,
1355            "Inline include indentation mismatch for non-phlow file.\nGot:\n<<<{}>>>\nExpected:\n<<<{}>>>",
1356            result, expected
1357        );
1358
1359        remove_temporary_included_non_phlow_inline_file().unwrap();
1360    }
1361
1362    #[test]
1363    fn test_no_phs() {
1364        let input = r#"modules:
1365  - module: log
1366  - module: fs
1367  - module: openai
1368    with:
1369      api_key: envs.OPENAI_API_KEY
1370  - module: amqp
1371    with:
1372      vhost: "nixyz"
1373      queue_name: "queue.etl.carrefour.raw"
1374      max_concurrency: 1
1375      definition:
1376        vhosts:
1377          - name: "nixyz"
1378        exchanges:
1379          - name: "x.etl"
1380            type: direct
1381            durable: true
1382            vhost: "nixyz"
1383            auto_delete: true
1384        queues:
1385          - name: "queue.etl.carrefour.raw"
1386            vhost: "nixyz"
1387            durable: true
1388        bindings:
1389          - source: "x.extract"
1390            vhost: "nixyz"
1391            destination: "queue.etl.carrefour.raw"
1392            destination_type: queue
1393            routing_key: "mercado.carrefour.com.br.#"
1394            arguments: {}
1395  - module: aws
1396    with:
1397      region: envs.AWS_REGION
1398      endpoint_url: envs.AWS_S3_ENDPOINT_URL
1399      s3_force_path_style: true
1400      secret_access_key: envs.AWS_SECRET_ACCESS_KEY
1401      access_key_id: envs.AWS_ACCESS_KEY_ID
1402
1403main: amqp
1404
1405steps:
1406  - log.info:
1407      message: "Starting processing message"
1408
1409  - payload: main.parse()
1410
1411  - log.info:
1412      message: payload
1413
1414  - id: fetch_s3_object
1415    aws.s3.get_object:
1416      bucket: payload.bucket
1417      key: payload.key
1418
1419  - assert: payload.success != true
1420    then:
1421      - log.error:
1422          message: payload.error
1423      - return: false
1424
1425  - log.info:
1426      message: "Fetched object from S3"
1427
1428  - fs.write:
1429      path: ./input.json
1430      content: steps.fetch_s3_object
1431      force: true
1432  
1433  - id: gpt
1434    openai.chat:
1435      model: "gpt-5-nano"
1436      messages:
1437        - role: user
1438          content: "\# Product Extraction Prompt with JSON Schema\n\nAnalyze an input HTML page, extract all relevant product information, and return this data as a single object strictly following the provided JSON schema for a \"Product\" as below.\n\n## JSON Schema for \"Product\"\n\n```json\n{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"Product\",\n  \"type\": \"object\",\n  \"additionalProperties\": false,\n  \"properties\": {\n    \"id\": { \"type\": \"string\" },\n    \"sku\": { \"type\": \"string\" },\n    \"name\": { \"type\": \"string\" },\n    \"description\": { \"type\": \"string\" },\n    \"price\": { \"type\": \"number\" },\n    \"originalPrice\": { \"type\": \"number\" },\n    \"currency\": { \"type\": \"string\" },\n    \"stock\": { \"type\": \"integer\" },\n    \"availability\": { \"type\": \"string\" },\n    \"brand\": { \"type\": \"string\" },\n    \"category\": { \"type\": \"string\" },\n    \"categories\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } },\n    \"tags\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } },\n    \"images\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"object\",\n        \"additionalProperties\": false,\n        \"properties\": {\n          \"url\": { \"type\": \"string\", \"format\": \"uri\" },\n          \"alt\": { \"type\": \"string\" }\n        }\n      }\n    },\n    \"videos\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"object\",\n        \"additionalProperties\": false,\n        \"properties\": {\n          \"url\": { \"type\": \"string\", \"format\": \"uri\" },\n          \"title\": { \"type\": \"string\" }\n        }\n      }\n    },\n    \"attributes\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"object\",\n        \"additionalProperties\": false,\n        \"properties\": {\n          \"name\": { \"type\": \"string\" },\n          \"value\": { \"type\": [\"string\", \"number\", \"boolean\"] }\n        }\n      }\n    },\n    \"variants\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"object\",\n        \"additionalProperties\": false,\n        \"properties\": {\n          \"id\": { \"type\": \"string\" },\n          \"sku\": { \"type\": \"string\" },\n          \"name\": { \"type\": \"string\" },\n          \"price\": { \"type\": \"number\" },\n          \"originalPrice\": { \"type\": \"number\" },\n          \"currency\": { \"type\": \"string\" },\n          \"stock\": { \"type\": \"integer\" },\n          \"attributes\": {\n            \"type\": \"array\",\n            \"items\": {\n              \"type\": \"object\",\n              \"additionalProperties\": false,\n              \"properties\": {\n                \"name\": { \"type\": \"string\" },\n                \"value\": { \"type\": [\"string\", \"number\", \"boolean\"] }\n              }\n            }\n          }\n        }\n      }\n    },\n    \"rating\": { \"type\": \"number\" },\n    \"reviewCount\": { \"type\": \"integer\" },\n    \"gtin\": { \"type\": \"string\" },\n    \"mpn\": { \"type\": \"string\" },\n    \"url\": { \"type\": \"string\", \"format\": \"uri\" },\n    \"language\": { \"type\": \"string\" },\n    \"currencySymbol\": { \"type\": \"string\" },\n    \"breadcrumbs\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } },\n    \"createdAt\": { \"type\": \"string\", \"format\": \"date-time\" },\n    \"updatedAt\": { \"type\": \"string\", \"format\": \"date-time\" },\n    \"metadata\": {\n      \"type\": \"object\",\n      \"additionalProperties\": {\n        \"type\": [\"string\", \"number\", \"boolean\", \"null\"]\n      }\n    }\n  }\n}\n```\n\n## Extraction Instructions\n\nCarefully identify and map as much product information as possible from the HTML content to the respective JSON fields.  \nNormalize price, numbers, dates; do not hallucinate; omit missing values; output only one product.\n\n## Reasoning Requirements\n\nBefore outputting JSON, reason step-by-step about:  \n- How each field can be detected  \n- What normalization is needed  \n- What cannot be extracted  \n\n## Output Format\n\nReturn only **one JSON object**, no commentary or code block.\n"
1439        - role: user
1440          content: steps.fetch_s3_object.data
1441
1442  - fs.write:
1443      path: ./payload.json
1444      content: payload
1445      force: true
1446
1447  - log.info:
1448      message: steps.gpt.data.choices[0].message.content"#;
1449
1450        let transformed = preprocessor(
1451            input,
1452            &Path::new(".").to_path_buf(),
1453            false,
1454            crate::settings::PrintOutput::Yaml,
1455        )
1456        .unwrap();
1457
1458        assert!(!transformed.contains(r#"#{ \"type\": \"string\" }"#));
1459    }
1460}