use std::io::Write;
use std::process::{Command, Stdio};
fn run_full(args: &[&str], stdin_data: &str) -> (String, String, bool) {
let mut child = Command::new(env!("CARGO_BIN_EXE_iriq"))
.args(args)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.expect("spawn iriq");
child
.stdin
.take()
.unwrap()
.write_all(stdin_data.as_bytes())
.unwrap();
let o = child.wait_with_output().expect("wait iriq");
(
String::from_utf8(o.stdout).unwrap(),
String::from_utf8(o.stderr).unwrap(),
o.status.success(),
)
}
fn run(args: &[&str], stdin_data: &str) -> String {
let (out, err, ok) = run_full(args, stdin_data);
assert!(ok, "iriq {args:?} failed: {err}");
out
}
#[test]
fn sections_stream_before_stdin_closes() {
use std::io::{BufRead, BufReader};
let mut child = Command::new(env!("CARGO_BIN_EXE_iriq"))
.arg("-n")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::null())
.spawn()
.expect("spawn iriq");
let mut stdin = child.stdin.take().unwrap();
stdin.write_all(b"https://foo.com/users/1\n").unwrap();
stdin.flush().unwrap();
let stdout = child.stdout.take().unwrap();
let (tx, rx) = std::sync::mpsc::channel();
std::thread::spawn(move || {
let mut line = String::new();
let _ = BufReader::new(stdout).read_line(&mut line);
let _ = tx.send(line);
});
let line = rx
.recv_timeout(std::time::Duration::from_secs(5))
.expect("no output before stdin closed — -n is not streaming");
assert!(line.contains("{user_id}"), "got: {line:?}");
drop(stdin); let _ = child.wait();
}
fn corpus_with(tag: &str, urls: &str) -> String {
let p = std::env::temp_dir().join(format!("iriq_e2e_{}_{}.json", std::process::id(), tag));
let _ = std::fs::remove_file(&p);
let cp = p.to_str().unwrap().to_string();
run(&["--corpus", &cp], urls); cp
}
#[test]
fn canonical_vs_normalize() {
assert_eq!(
run(&["-c", "HTTP://Foo.com:80/pull/42"], "").trim(),
"http://foo.com/pull/42"
);
let norm = run(&["-n", "HTTP://Foo.com:80/pull/42"], "");
assert!(
norm.contains("/pull/{"),
"expected a placeholder, got: {norm}"
);
assert!(
!norm.contains("/pull/42"),
"normalize should not keep the literal: {norm}"
);
}
#[test]
fn default_emits_parse_and_normalize() {
let out = run(&["https://foo.com/users/123"], "");
assert!(out.contains("# parse"), "{out}");
assert!(out.contains("# normalize"), "{out}");
assert!(out.contains("https://foo.com/users/{user_id}"), "{out}");
}
#[test]
fn explain_shows_per_segment_trace() {
let out = run(&["-e", "https://foo.com/users/123"], "");
assert!(out.contains("{user_id}"), "{out}");
assert!(out.contains("path:"), "{out}");
assert!(out.contains("integer"), "{out}");
}
#[test]
fn parse_json_shape() {
let out = run(&["-p", "-j", "https://foo.com/users/123"], "");
let v: serde_json::Value = serde_json::from_str(&out).expect("valid json");
assert_eq!(v["kind"], "url");
assert_eq!(v["host"], "foo.com");
assert_eq!(v["path_segments"], serde_json::json!(["users", "123"]));
}
#[test]
fn multi_section_json_has_ordered_keys() {
let out = run(&["-pn", "-j", "https://foo.com/users/123"], "");
let v: serde_json::Value = serde_json::from_str(&out).expect("valid json");
assert!(v.get("parse").is_some(), "{out}");
assert_eq!(v["normalize"], "https://foo.com/users/{user_id}");
}
#[test]
fn ndjson_emits_one_object_per_line() {
let out = run(&["-J", "-n", "https://foo.com/users/123"], "");
for line in out.lines().filter(|l| !l.is_empty()) {
serde_json::from_str::<serde_json::Value>(line).expect("each line is json");
}
assert!(out.contains("{user_id}"), "{out}");
}
#[test]
fn batch_lists_extracted_urls() {
let out = run(&[], "see https://a.com/x and b.com/y\n");
let lines: Vec<&str> = out.lines().filter(|l| !l.is_empty()).collect();
assert_eq!(lines, vec!["https://a.com/x", "https://b.com/y"]);
}
#[test]
fn large_batch_switches_to_cluster_view() {
let urls: String = (1..=12)
.map(|i| format!("https://foo.com/users/{i}\n"))
.collect();
let out = run(&[], &urls);
assert!(
out.contains("[12] foo.com"),
"expected one cluster of 12: {out}"
);
assert!(out.contains("/users/{user_id}"), "{out}");
}
#[test]
fn host_registrable_collapses_subdomains() {
let urls: String = (1..=6)
.map(|i| format!("https://api.foo.com/users/{i}\nhttps://app.foo.com/users/{i}\n"))
.collect();
let cp = std::env::temp_dir().join(format!("iriq_e2e_{}_reg.json", std::process::id()));
let _ = std::fs::remove_file(&cp);
let cps = cp.to_str().unwrap();
run(&["--host", "registrable", "--corpus", cps], &urls);
let stats = run(&["--corpus", cps, "--stats"], "");
assert!(stats.contains("foo.com"), "{stats}");
assert!(
!stats.contains("api.foo.com"),
"subdomains should collapse: {stats}"
);
let _ = std::fs::remove_file(&cp);
}
#[test]
fn stats_reports_observations_and_shapes() {
let urls: String = (1..=12)
.map(|i| format!("https://foo.com/users/{i}\n"))
.collect();
let cp = corpus_with("stats", &urls);
let out = run(&["--corpus", &cp, "--stats"], "");
assert!(out.contains("observations:"), "{out}");
assert!(out.contains("clusters:"), "{out}");
assert!(out.contains("top shapes:"), "{out}");
let _ = std::fs::remove_file(&cp);
}
#[test]
fn propose_recognizers_runs() {
let urls: String = (1..=12)
.map(|i| format!("https://foo.com/users/{i}\n"))
.collect();
let cp = corpus_with("propose", &urls);
let (out, _err, ok) = run_full(&["--corpus", &cp, "--propose-recognizers"], "");
assert!(ok);
assert!(out.to_lowercase().contains("proposal"), "{out}");
let _ = std::fs::remove_file(&cp);
}
#[test]
fn cross_host_shapes_runs() {
let urls = "https://a.com/users/1\nhttps://b.com/users/2\nhttps://c.com/users/3\n";
let cp = corpus_with("crosshost", urls);
let out = run(
&["--corpus", &cp, "--cross-host-shapes", "--min-hosts", "2"],
"",
);
assert!(out.contains("hosts"), "{out}");
let _ = std::fs::remove_file(&cp);
}
#[test]
fn reinfer_runs() {
let urls: String = (1..=12)
.map(|i| format!("https://foo.com/users/{i}\n"))
.collect();
let cp = corpus_with("reinfer", &urls);
let out = run(&["--corpus", &cp, "--reinfer"], "");
assert!(out.contains("reinferred"), "{out}");
let _ = std::fs::remove_file(&cp);
}
#[test]
fn bad_input_exits_nonzero() {
let (_out, err, ok) = run_full(&["%%%"], "");
assert!(!ok, "expected failure exit");
assert!(err.contains("parse error"), "{err}");
}
#[test]
fn version_matches_crate() {
assert_eq!(run(&["--version"], "").trim(), iriq::VERSION);
}
#[test]
fn help_shows_usage() {
let out = run(&["--help"], "");
assert!(out.contains("Usage: iriq"), "{out}");
assert!(out.contains("--normalize"), "{out}");
}