arkenar 1.0.0

modern vulnerability scanner combining Katana and Nuclei.
use std::collections::HashSet;
use std::io::Write;
use std::process::Stdio;
use colored::*;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command;
use serde_json::Value;
use url::Url;
use crate::utils;

fn katana_binary() -> String {
    match utils::get_binary_path("katana") {
        Some(path) => path,
        None => {
            eprint!("{}\r\n", "Error: 'katana' binary not found. Run the scanner once to auto-install.".red());
            std::process::exit(1);
        }
    }
}

pub async fn run_katana_crawler(
    target: &str,
    mode: &str,
    verbose: bool,
    scope: bool,
) -> anyhow::Result<Vec<String>> {
    let binary = katana_binary();
    let is_simple = mode != "advanced";
    let depth = if is_simple { "2" } else { "5" };
    let max_urls: Option<usize> = if is_simple { Some(20) } else { None };

    if verbose {
        print!("[*] Starting Katana on target: {} (depth: {})\r\n", target, depth);
    } else {
        print!("[*] Starting Katana on target: {}\r\n", target);
    }
    std::io::stdout().flush().ok();

    let mut args = vec!["-u", target, "-jsonl", "-silent", "-d", depth];

    if is_simple {
        args.extend_from_slice(&["-crawl-duration", "30"]);
    }

    let mut child = Command::new(binary)
        .args(&args)
        .stdout(Stdio::piped())
        .stderr(Stdio::null())
        .spawn()?;

    let stdout = child.stdout.take().ok_or_else(|| anyhow::anyhow!("Failed to capture stdout from katana"))?;
    let reader = BufReader::new(stdout);
    let mut lines = reader.lines();
    let mut seen: HashSet<String> = HashSet::new();

    let target_domain = Url::parse(target)
        .ok()
        .and_then(|u| u.host_str().map(|h| h.to_lowercase()));

    while let Ok(Some(raw_line)) = lines.next_line().await {
        let line = raw_line.trim().to_string();
        if line.is_empty() { continue; }

        let parsed: Value = match serde_json::from_str(&line) {
            Ok(v) => v,
            Err(_) => continue,
        };

        let extracted = parsed
            .get("endpoint")
            .or_else(|| parsed.get("url"))
            .and_then(|val| val.as_str());

        let extracted = extracted.or_else(|| {
            parsed
                .get("request")
                .and_then(|req| req.get("endpoint").or_else(|| req.get("url")))
                .and_then(|val| val.as_str())
        });

        if let Some(url_str) = extracted {
            if scope {
                if let Ok(parsed_url) = Url::parse(url_str) {
                    let url_domain = parsed_url.host_str().map(|h| h.to_lowercase());
                    if url_domain != target_domain {
                        continue;
                    }
                }
            }

            if seen.insert(url_str.to_string()) {
                if verbose {
                    print!("[+] Discovered: {}\r\n", url_str);
                    std::io::stdout().flush().ok();
                }

                if let Some(limit) = max_urls {
                    if seen.len() >= limit {
                        print!("{}\r\n", format!("[*] Reached URL cap ({}) for simple mode. Stopping crawler.", limit).yellow());
                        std::io::stdout().flush().ok();
                        child.kill().await.ok();
                        break;
                    }
                }
            }
        }
    }

    let _ = child.wait().await;
    print!("[*] Katana finished. Total unique URLs: {}\r\n", seen.len());
    std::io::stdout().flush().ok();

    Ok(seen.into_iter().collect())
}