phlow_runtime/
analyzer.rs

1use crate::loader::error::Error as LoaderError;
2use crate::preprocessor::preprocessor;
3use crate::settings::Settings;
4use phlow_sdk::prelude::*;
5use regex::Regex;
6use std::collections::HashSet;
7use std::fs;
8use std::future::Future;
9use std::path::Path;
10use std::pin::Pin;
11
12#[derive(Debug, Clone)]
13pub struct Analyzer {
14    pub enabled: bool,
15    pub files: bool,
16    pub modules: bool,
17    pub total_steps: bool,
18    pub total_pipelines: bool,
19    pub json: bool,
20    pub script_target: String,
21    pub all: bool,
22    pub inner: bool,
23}
24
25impl Analyzer {
26    pub fn from_settings(settings: &Settings) -> Self {
27        Self {
28            enabled: settings.analyzer,
29            files: settings.analyzer_files,
30            modules: settings.analyzer_modules,
31            total_steps: settings.analyzer_total_steps,
32            total_pipelines: settings.analyzer_total_pipelines,
33            json: settings.analyzer_json,
34            script_target: settings.script_main_absolute_path.clone(),
35            all: settings.analyzer_all,
36            inner: settings.analyzer_inner, // Assuming this is how inner is set from settings
37        }
38    }
39
40    pub async fn run(&self) -> Result<Value, LoaderError> {
41        // If no specific analyzer flags were provided, show all
42        let mut af = self.files;
43        let mut am = self.modules;
44        let mut ats = self.total_steps;
45        let mut atp = self.total_pipelines;
46
47        if self.all {
48            af = true;
49            am = true;
50            ats = true;
51            atp = true;
52        }
53
54        let result = analyze(&self.script_target, af, am, ats, atp, self.inner).await?;
55        Ok(result)
56    }
57
58    pub fn display(&self, result: &Value) {
59        if self.json {
60            // print valu3 Value as pretty JSON
61            println!("{}", result.to_json(JsonMode::Indented));
62            return;
63        }
64
65        // text output similar to previous main.rs behavior
66        if self.files || self.all {
67            if let Some(files) = result.get("files") {
68                println!("Files:");
69                if let Some(arr) = files.as_array() {
70                    for f in &arr.values {
71                        println!("  - {}", f.as_string());
72                    }
73                }
74            }
75        }
76
77        if self.modules || self.all {
78            if let Some(mods) = result.get("modules") {
79                println!("Modules:");
80                if let Some(arr) = mods.as_array() {
81                    for m in &arr.values {
82                        let declared = m.get("declared").map(|v| v.as_string()).unwrap_or_default();
83                        let name = m.get("name").map(|v| v.as_string()).unwrap_or_default();
84                        let downloaded = m
85                            .get("downloaded")
86                            .map(|v| v.to_string())
87                            .unwrap_or_default();
88                        println!("  - {} ({}): downloaded={}", declared, name, downloaded);
89                    }
90                }
91            }
92        }
93
94        if self.total_steps || self.all {
95            if let Some(ts) = result.get("total_steps") {
96                println!("Total steps: {}", ts.to_string());
97            }
98        }
99
100        if self.total_pipelines || self.all {
101            if let Some(tp) = result.get("total_pipelines") {
102                println!("Total pipelines: {}", tp.to_string());
103            }
104        }
105    }
106}
107fn collect_includes_recursive(
108    path: &Path,
109    visited: &mut HashSet<String>,
110    result: &mut HashSet<String>,
111) {
112    let path_str = match path.canonicalize() {
113        Ok(p) => p.to_string_lossy().to_string(),
114        Err(_) => path.to_string_lossy().to_string(),
115    };
116
117    if visited.contains(&path_str) {
118        return;
119    }
120    visited.insert(path_str.clone());
121
122    if let Ok(content) = fs::read_to_string(path) {
123        result.insert(path_str.clone());
124
125        // find !include and !import occurrences
126        let include_re = Regex::new(r"!include\s+([^\s]+)").unwrap();
127        let import_re = Regex::new(r"!import\s+(\S+)").unwrap();
128
129        let base = path.parent().unwrap_or(Path::new("."));
130
131        for cap in include_re.captures_iter(&content) {
132            if let Some(rel) = cap.get(1) {
133                let mut full = base.join(rel.as_str());
134                if full.extension().is_none() {
135                    full.set_extension("phlow");
136                }
137                if full.exists() {
138                    collect_includes_recursive(&full, visited, result);
139                } else {
140                    // still add referenced path even if it does not exist
141                    result.insert(full.to_string_lossy().to_string());
142                }
143            }
144        }
145
146        for cap in import_re.captures_iter(&content) {
147            if let Some(rel) = cap.get(1) {
148                let full = base.join(rel.as_str());
149                if full.exists() {
150                    collect_includes_recursive(&full, visited, result);
151                } else {
152                    result.insert(full.to_string_lossy().to_string());
153                }
154            }
155        }
156    }
157}
158
159fn normalize_module_name(module_name: &str) -> String {
160    if module_name.starts_with("./modules/") {
161        module_name[10..].to_string()
162    } else if module_name.contains('/') {
163        module_name
164            .split('/')
165            .last()
166            .unwrap_or(module_name)
167            .to_string()
168    } else {
169        module_name.to_string()
170    }
171}
172
173fn count_pipelines_recursive(value: &Value) -> usize {
174    if value.is_object() {
175        let mut count = 1; // this object is a pipeline
176
177        if let Some(then) = value.get("then") {
178            count += count_pipelines_recursive(then);
179        }
180        if let Some(els) = value.get("else") {
181            count += count_pipelines_recursive(els);
182        }
183
184        if let Some(steps) = value.get("steps").and_then(|v| v.as_array()) {
185            for step in &steps.values {
186                if let Some(t) = step.get("then") {
187                    count += count_pipelines_recursive(t);
188                }
189                if let Some(e) = step.get("else") {
190                    count += count_pipelines_recursive(e);
191                }
192            }
193        }
194
195        count
196    } else if let Some(arr) = value.as_array() {
197        let mut count = 1; // this array is a pipeline
198        for step in &arr.values {
199            if let Some(t) = step.get("then") {
200                count += count_pipelines_recursive(t);
201            }
202            if let Some(e) = step.get("else") {
203                count += count_pipelines_recursive(e);
204            }
205        }
206        count
207    } else {
208        0
209    }
210}
211
212fn count_steps_recursive(value: &Value) -> usize {
213    if value.is_object() {
214        let mut steps_total = 0;
215        if let Some(steps) = value.get("steps").and_then(|v| v.as_array()) {
216            steps_total += steps.values.len();
217            for step in &steps.values {
218                if let Some(t) = step.get("then") {
219                    steps_total += count_steps_recursive(t);
220                }
221                if let Some(e) = step.get("else") {
222                    steps_total += count_steps_recursive(e);
223                }
224            }
225        }
226
227        if let Some(then) = value.get("then") {
228            steps_total += count_steps_recursive(then);
229        }
230        if let Some(els) = value.get("else") {
231            steps_total += count_steps_recursive(els);
232        }
233
234        steps_total
235    } else if let Some(arr) = value.as_array() {
236        let mut steps_total = 0;
237        steps_total += arr.values.len();
238        for step in &arr.values {
239            if let Some(t) = step.get("then") {
240                steps_total += count_steps_recursive(t);
241            }
242            if let Some(e) = step.get("else") {
243                steps_total += count_steps_recursive(e);
244            }
245        }
246        steps_total
247    } else {
248        0
249    }
250}
251
252fn analyze_internal<'a>(
253    script_target: &'a str,
254    include_files: bool,
255    include_modules: bool,
256    include_total_steps: bool,
257    include_total_pipelines: bool,
258    include_inner: bool,
259    visited: &'a mut HashSet<String>,
260) -> Pin<Box<dyn Future<Output = Result<Value, LoaderError>> + 'a>> {
261    Box::pin(async move {
262        // Try load with loader (preferred) and fallback to tolerant analysis on failure
263        let mut files_set: HashSet<String> = HashSet::new();
264        let mut modules_json: Vec<Value> = Vec::new();
265        let mut total_pipelines = 0usize;
266        let mut total_steps = 0usize;
267
268        // First, try to run the preprocessor to obtain the final transformed YAML
269        let target_path = Path::new(script_target);
270        let main_path = if target_path.is_dir() {
271            let mut base_path = target_path.to_path_buf();
272            base_path.set_extension("phlow");
273            if base_path.exists() {
274                base_path
275            } else {
276                let candidates = ["main.phlow", "mod.phlow", "module.phlow"];
277                let mut found = None;
278                for c in &candidates {
279                    let p = target_path.join(c);
280                    if p.exists() {
281                        found = Some(p);
282                        break;
283                    }
284                }
285                if let Some(p) = found {
286                    p
287                } else {
288                    return Err(LoaderError::MainNotFound(script_target.to_string()));
289                }
290            }
291        } else if target_path.exists() {
292            target_path.to_path_buf()
293        } else {
294            return Err(LoaderError::MainNotFound(script_target.to_string()));
295        };
296
297        // protect against recursion cycles
298        let canonical = match main_path.canonicalize() {
299            Ok(p) => p.to_string_lossy().to_string(),
300            Err(_) => main_path.to_string_lossy().to_string(),
301        };
302        if visited.contains(&canonical) {
303            // already analyzed -> return empty result
304            return Ok(
305                json!({"files": Vec::<String>::new(), "modules": Vec::<Value>::new(), "total_steps": 0, "total_pipelines": 0}),
306            );
307        }
308        visited.insert(canonical);
309
310        let raw = fs::read_to_string(&main_path)
311            .map_err(|_| LoaderError::ModuleLoaderError("Failed to read main file".to_string()))?;
312
313        // Try preprocessor (preferred). If it fails or the resulting YAML cannot be parsed,
314        // fall back to the tolerant heuristic analysis below.
315        let preprocessed = preprocessor(
316            &raw,
317            &main_path.parent().unwrap_or(Path::new(".")),
318            false,
319            crate::settings::PrintOutput::Yaml,
320        );
321
322        if let Ok(transformed) = preprocessed {
323            // parse the YAML into valu3::Value for analysis
324            match serde_yaml::from_str::<Value>(&transformed) {
325                Ok(root) => {
326                    if include_files {
327                        let mut visited: HashSet<String> = HashSet::new();
328                        collect_includes_recursive(&main_path, &mut visited, &mut files_set);
329                    }
330
331                    if include_modules {
332                        if let Some(mods) = root.get("modules").and_then(|v| v.as_array()) {
333                            for module in &mods.values {
334                                // declared: the literal declared value (module: ...)
335                                // name_raw: prefer `name` attribute, fallback to declared
336                                let (declared, name_raw) = if module.is_object() {
337                                    let declared = module
338                                        .get("module")
339                                        .map(|v| v.as_string())
340                                        .unwrap_or_default();
341                                    let name_raw = module
342                                        .get("name")
343                                        .map(|v| v.as_string())
344                                        .unwrap_or_else(|| declared.clone());
345                                    (declared, name_raw)
346                                } else {
347                                    let declared = module.as_string();
348                                    let name_raw = declared.clone();
349                                    (declared, name_raw)
350                                };
351
352                                let clean = normalize_module_name(&name_raw);
353
354                                // determine downloaded: phlow_packages/{clean} or local module base dir
355                                let mut downloaded = String::new();
356                                let pp_path = format!("phlow_packages/{}", clean);
357                                let pp = Path::new(&pp_path);
358                                if pp.exists() {
359                                    downloaded = pp.to_string_lossy().to_string();
360                                }
361
362                                if declared.starts_with('.') {
363                                    let base = main_path.parent().unwrap_or(Path::new("."));
364                                    let mut candidate = base.join(&declared);
365                                    if candidate.is_dir() {
366                                        for c in ["main.phlow", "mod.phlow", "module.phlow"] {
367                                            let p = candidate.join(c);
368                                            if p.exists() {
369                                                candidate = p;
370                                                break;
371                                            }
372                                        }
373                                    } else if candidate.extension().is_none() {
374                                        let mut with_ext = candidate.clone();
375                                        with_ext.set_extension("phlow");
376                                        if with_ext.exists() {
377                                            candidate = with_ext;
378                                        }
379                                    }
380
381                                    if candidate.exists() {
382                                        if candidate.is_dir() {
383                                            downloaded = candidate.to_string_lossy().to_string();
384                                        } else if let Some(p) = candidate.parent() {
385                                            downloaded = p.to_string_lossy().to_string();
386                                        }
387                                    }
388                                }
389
390                                modules_json.push(json!({"declared": declared, "name": clean, "downloaded": downloaded}));
391
392                                // If declared is local, try recursive analyze when it resolves to main.phlow
393                                // only perform recursive analysis when `include_inner` is true
394                                if declared.starts_with('.') && include_inner {
395                                    let base = main_path.parent().unwrap_or(Path::new("."));
396                                    let mut candidate = base.join(&declared);
397                                    if candidate.is_dir() {
398                                        for c in ["main.phlow", "mod.phlow", "module.phlow"] {
399                                            let p = candidate.join(c);
400                                            if p.exists() {
401                                                candidate = p;
402                                                break;
403                                            }
404                                        }
405                                    } else if candidate.extension().is_none() {
406                                        let mut with_ext = candidate.clone();
407                                        with_ext.set_extension("phlow");
408                                        if with_ext.exists() {
409                                            candidate = with_ext;
410                                        }
411                                    }
412
413                                    if candidate.exists() {
414                                        if let Some(fname) =
415                                            candidate.file_name().and_then(|s| s.to_str())
416                                        {
417                                            if fname == "main.phlow" {
418                                                if let Ok(nested) = analyze_internal(
419                                                    &candidate.to_string_lossy(),
420                                                    include_files,
421                                                    include_modules,
422                                                    include_total_steps,
423                                                    include_total_pipelines,
424                                                    include_inner,
425                                                    visited,
426                                                )
427                                                .await
428                                                {
429                                                    if let Some(nfiles) = nested
430                                                        .get("files")
431                                                        .and_then(|v| v.as_array())
432                                                    {
433                                                        for f in &nfiles.values {
434                                                            files_set.insert(f.to_string());
435                                                        }
436                                                    }
437                                                    if let Some(nmods) = nested
438                                                        .get("modules")
439                                                        .and_then(|v| v.as_array())
440                                                    {
441                                                        for m in &nmods.values {
442                                                            modules_json.push(m.clone());
443                                                        }
444                                                    }
445                                                    if let Some(ns) = nested.get("total_steps") {
446                                                        if let Ok(nv) =
447                                                            ns.to_string().parse::<usize>()
448                                                        {
449                                                            total_steps += nv;
450                                                        }
451                                                    }
452                                                    if let Some(np) = nested.get("total_pipelines")
453                                                    {
454                                                        if let Ok(nv) =
455                                                            np.to_string().parse::<usize>()
456                                                        {
457                                                            total_pipelines += nv;
458                                                        }
459                                                    }
460                                                }
461                                            }
462                                        }
463                                    }
464                                }
465                            }
466                        }
467                    }
468
469                    if include_total_pipelines || include_total_steps {
470                        if let Some(steps_val) = root.get("steps") {
471                            // add the pipelines/steps from the main root to any totals already accumulated from nested modules
472                            total_pipelines += count_pipelines_recursive(steps_val);
473                            total_steps += count_steps_recursive(steps_val);
474                        }
475                    }
476                }
477                Err(_) => {
478                    // parse failed: fallthrough to tolerant fallback below
479                }
480            }
481        }
482
483        // If after preprocessor/parse we still don't have results (e.g. preprocessor failed or parse failed),
484        // perform the original tolerant fallback directly against the raw file contents (best-effort).
485        if files_set.is_empty()
486            && modules_json.is_empty()
487            && total_pipelines == 0
488            && total_steps == 0
489        {
490            let content = raw;
491
492            if include_files {
493                // collect referenced include/import paths even if they don't exist
494                let include_re = Regex::new(r"!include\s+([^\s]+)").unwrap();
495                let import_re = Regex::new(r"!import\s+(\S+)").unwrap();
496                let base = main_path.parent().unwrap_or(Path::new("."));
497
498                for cap in include_re.captures_iter(&content) {
499                    if let Some(rel) = cap.get(1) {
500                        let mut full = base.join(rel.as_str());
501                        if full.extension().is_none() {
502                            full.set_extension("phlow");
503                        }
504                        files_set.insert(full.to_string_lossy().to_string());
505                    }
506                }
507
508                for cap in import_re.captures_iter(&content) {
509                    if let Some(rel) = cap.get(1) {
510                        let full = base.join(rel.as_str());
511                        files_set.insert(full.to_string_lossy().to_string());
512                    }
513                }
514            }
515
516            if include_modules {
517                let modules_re = Regex::new(r"module:\s*([^\n\r]+)").unwrap();
518                for cap in modules_re.captures_iter(&content) {
519                    if let Some(m) = cap.get(1) {
520                        let mn_str = m.as_str().trim().to_string();
521                        let clean = normalize_module_name(&mn_str);
522                        // determine downloaded: if local module (starts with '.') check resolved path, otherwise check phlow_packages
523                        let mut downloaded = String::new();
524                        let pp_path = format!("phlow_packages/{}", clean);
525                        let pp = Path::new(&pp_path);
526                        if pp.exists() {
527                            downloaded = pp.to_string_lossy().to_string();
528                        }
529                        if mn_str.starts_with('.') {
530                            let base = main_path.parent().unwrap_or(Path::new("."));
531                            let mut candidate = base.join(&mn_str);
532                            if candidate.is_dir() {
533                                let mut found = None;
534                                for c in ["main.phlow", "mod.phlow", "module.phlow"] {
535                                    let p = candidate.join(c);
536                                    if p.exists() {
537                                        found = Some(p);
538                                        break;
539                                    }
540                                }
541                                if let Some(p) = found {
542                                    candidate = p;
543                                }
544                            } else if candidate.extension().is_none() {
545                                let mut with_ext = candidate.clone();
546                                with_ext.set_extension("phlow");
547                                if with_ext.exists() {
548                                    candidate = with_ext;
549                                }
550                            }
551                            if candidate.exists() {
552                                if candidate.is_dir() {
553                                    downloaded = candidate.to_string_lossy().to_string();
554                                } else if let Some(p) = candidate.parent() {
555                                    downloaded = p.to_string_lossy().to_string();
556                                }
557                            }
558                        }
559                        modules_json.push(
560                            json!({"declared": mn_str, "name": clean, "downloaded": downloaded}),
561                        );
562                    }
563                }
564            }
565
566            if include_total_pipelines || include_total_steps {
567                if content.contains("steps:") {
568                    let parts: Vec<&str> = content.splitn(2, "steps:").collect();
569                    if parts.len() > 1 {
570                        let steps_block = parts[1];
571                        let steps_count = steps_block.matches("\n- ").count();
572                        total_steps += steps_count;
573                        total_pipelines += 1;
574                    }
575                }
576            }
577        }
578
579        let mut files_vec: Vec<String> = files_set.into_iter().collect();
580        files_vec.sort();
581        Ok(json!({
582            "files": files_vec,
583            "modules": modules_json,
584            "total_steps": total_steps,
585            "total_pipelines": total_pipelines
586        }))
587    })
588}
589
590pub async fn analyze(
591    script_target: &str,
592    include_files: bool,
593    include_modules: bool,
594    include_total_steps: bool,
595    include_total_pipelines: bool,
596    include_inner: bool,
597) -> Result<Value, LoaderError> {
598    let mut visited: HashSet<String> = HashSet::new();
599    analyze_internal(
600        script_target,
601        include_files,
602        include_modules,
603        include_total_steps,
604        include_total_pipelines,
605        include_inner,
606        &mut visited,
607    )
608    .await
609}