use rakers::{HttpConfig, diff_html, pretty_print, render, select_html, set_verbose, to_json};
use std::time::Duration;
use clap::Parser;
use std::{
fs,
io::{self, Read, Write},
path::Path,
};
#[derive(Parser)]
#[command(
name = "rakers",
about = "Render JavaScript into HTML using Servo's HTML parser (html5ever)"
)]
#[allow(clippy::struct_excessive_bools)]
struct Cli {
input: Option<String>,
#[arg(short, long, value_name = "FILE")]
output: Option<String>,
#[arg(short = 'A', long, value_name = "UA")]
user_agent: Option<String>,
#[arg(short = 'H', long = "header", value_name = "HEADER")]
headers: Vec<String>,
#[arg(long)]
clean: bool,
#[arg(long)]
pretty: bool,
#[arg(long)]
json: bool,
#[arg(long, value_name = "N")]
max_scripts: Option<usize>,
#[arg(long)]
diff: bool,
#[arg(long)]
verbose: bool,
#[arg(long, value_name = "SECS", conflicts_with = "no_timeout")]
timeout: Option<f64>,
#[arg(long)]
no_timeout: bool,
#[arg(long, value_name = "SELECTOR")]
selector: Option<String>,
#[arg(long, value_name = "URL")]
proxy: Option<String>,
#[arg(long)]
forward_headers: bool,
}
fn is_url(s: &str) -> bool {
s.starts_with("http://") || s.starts_with("https://")
}
fn http_config_from_cli(cli: &Cli) -> anyhow::Result<HttpConfig> {
let mut headers = Vec::new();
for raw in &cli.headers {
let (name, value) = raw
.split_once(':')
.ok_or_else(|| anyhow::anyhow!("invalid header {raw:?}: expected \"Name: Value\""))?;
headers.push((name.trim().to_owned(), value.trim().to_owned()));
}
Ok(HttpConfig {
user_agent: cli.user_agent.clone(),
headers,
proxy: cli.proxy.clone(),
forward_headers: cli.forward_headers,
})
}
fn fetch(input: &str, cfg: &HttpConfig) -> anyhow::Result<(String, bool)> {
if is_url(input) {
let body = cfg.apply(cfg.agent().get(input)).call()?.into_string()?;
Ok((body, false))
} else {
let content = fs::read_to_string(input)?;
let is_js = Path::new(input).extension().is_some_and(|e| e == "js");
Ok((content, is_js))
}
}
fn main() -> anyhow::Result<()> {
let cli = Cli::parse();
set_verbose(cli.verbose);
let cfg = http_config_from_cli(&cli)?;
let page_url = cli.input.as_deref().filter(|s| is_url(s));
let (input, is_js) = if let Some(src) = &cli.input {
fetch(src, &cfg)?
} else {
let mut s = String::new();
io::stdin().read_to_string(&mut s)?;
(s, false)
};
let script_timeout = if cli.no_timeout {
None
} else if let Some(secs) = cli.timeout {
if secs <= 0.0 {
anyhow::bail!(
"--timeout must be greater than zero (use --no-timeout to remove the cap)"
);
}
Some(Duration::from_secs_f64(secs))
} else {
Some(Duration::from_secs(30))
};
let rendered = render(
&input,
is_js,
page_url,
&cfg,
cli.clean,
cli.max_scripts,
script_timeout,
)?;
let rendered = match &cli.selector {
Some(sel) => select_html(&rendered, sel)?,
None => rendered,
};
let result = if cli.diff {
diff_html(&input, &rendered)
} else {
let html = if cli.pretty {
pretty_print(&rendered)
} else {
rendered
};
if cli.json {
to_json(input.len(), &html)
} else {
html
}
};
match &cli.output {
Some(path) => fs::write(path, &result)?,
None => io::stdout().write_all(result.as_bytes())?,
}
Ok(())
}