use crawlex::scheduler::{
frontier_weight, InterArrivalJitter, JitterProfile, SessionDecision, SessionDepthTracker,
WeightedFrontier, DEFAULT_FRONTIER_WEIGHTS,
};
#[test]
fn inter_arrival_distribution_matches_log_normal() {
let jitter = InterArrivalJitter::new(JitterProfile::Human);
let mut samples: Vec<u64> = (0..2000)
.map(|_| jitter.sample_raw().as_millis() as u64)
.collect();
samples.sort_unstable();
let median = samples[samples.len() / 2];
let p10 = samples[samples.len() / 10];
let p90 = samples[(samples.len() * 9) / 10];
assert!(
(400..=6000).contains(&median),
"median {median} ms outside expected human cadence band"
);
assert!(
p90 > p10.saturating_mul(3),
"distribution too tight p10={p10} p90={p90} — not log-normal"
);
let above_5s = samples.iter().filter(|ms| **ms >= 5000).count();
assert!(
above_5s >= 50,
"tail too thin: only {above_5s} samples ≥ 5s"
);
}
#[test]
fn inter_arrival_off_is_noop() {
let jitter = InterArrivalJitter::new(JitterProfile::Off);
for _ in 0..64 {
assert!(jitter.sample_raw().is_zero());
assert!(jitter.delay_for_next("s1").is_zero());
}
}
#[test]
fn session_depth_caps_are_pareto_distributed() {
let tracker = SessionDepthTracker::new(15);
let mut depths: Vec<usize> = Vec::with_capacity(200);
for i in 0..200 {
let key = format!("s{i}");
let mut steps = 0;
loop {
steps += 1;
if tracker.observe(&key) == SessionDecision::EndSession {
depths.push(steps);
break;
}
if steps > 500 {
panic!("session {key} never ended — cap broken");
}
}
}
depths.sort_unstable();
let median = depths[depths.len() / 2];
let p90 = depths[(depths.len() * 9) / 10];
let max = *depths.last().unwrap();
assert!(
(4..=30).contains(&median),
"median session depth {median} outside Pareto expectation"
);
assert!(
p90 > median,
"no tail: p90={p90} median={median} — not Pareto"
);
assert!(max <= 31, "cap truncation violated: max={max}");
}
#[test]
fn session_depth_zero_cap_disables() {
let tracker = SessionDepthTracker::new(0);
for _ in 0..100 {
assert_eq!(tracker.observe("s1"), SessionDecision::Continue);
}
}
#[test]
fn frontier_weights_decay_with_depth() {
let mut prev = f32::INFINITY;
for (d, w) in DEFAULT_FRONTIER_WEIGHTS.iter().enumerate() {
assert!(
*w <= prev,
"weight curve not monotone at depth {d}: prev={prev} cur={w}"
);
prev = *w;
}
let tail = *DEFAULT_FRONTIER_WEIGHTS.last().unwrap();
assert_eq!(frontier_weight(99), tail);
}
#[test]
fn weighted_frontier_biases_toward_hub() {
let trials = 400;
let mut hub_first = 0;
for _ in 0..trials {
let f = WeightedFrontier::default();
f.push("hub".into(), 0);
for i in 0..9 {
f.push(format!("deep{i}"), 4);
}
if f.pop_weighted().as_deref() == Some("hub") {
hub_first += 1;
}
}
let pct = (hub_first as f64 / trials as f64) * 100.0;
assert!(
pct > 20.0,
"hub-first rate {pct:.1}% too low — weighting not applied"
);
assert!(
pct < 80.0,
"hub-first rate {pct:.1}% too high — deep pages never fire"
);
}
#[test]
fn frontier_histogram_shape_is_hub_spoke() {
let f = WeightedFrontier::default();
f.push("hub".into(), 0);
for i in 0..8 {
f.push(format!("d1_{i}"), 1);
}
for i in 0..20 {
f.push(format!("d2_{i}"), 2);
}
for i in 0..5 {
f.push(format!("d3_{i}"), 3);
}
for i in 0..2 {
f.push(format!("d4_{i}"), 4);
}
let hist = f.depth_histogram();
assert!(hist[2] > hist[0], "depth-2 should exceed depth-0 (hub)");
assert!(
hist[2] >= hist[3],
"out-degree must not grow beyond depth 2"
);
assert!(hist[3] >= hist[4], "tail must taper");
}
#[test]
fn partitioned_cookie_isolation_across_top_level_sites() {
use crawlex::http::cookies::PartitionedCookieStore;
use http::HeaderMap;
use url::Url;
let mut hdr_a = HeaderMap::new();
hdr_a.append(
"set-cookie",
"k=A; Path=/; Secure; SameSite=None; Partitioned"
.parse()
.unwrap(),
);
let mut hdr_b = HeaderMap::new();
hdr_b.append(
"set-cookie",
"k=B; Path=/; Secure; SameSite=None; Partitioned"
.parse()
.unwrap(),
);
let store = PartitionedCookieStore::new();
let cdn = Url::parse("https://embed.cdn/").unwrap();
store.ingest("https://siteA.test/", &cdn, &hdr_a);
store.ingest("https://siteB.test/", &cdn, &hdr_b);
let a = store
.cookie_header("https://siteA.test/", &cdn)
.expect("partition A has the cookie");
let b = store
.cookie_header("https://siteB.test/", &cdn)
.expect("partition B has the cookie");
assert!(a.contains("k=A"), "partition A leaked: {a}");
assert!(!a.contains("k=B"), "partition B leaked into A: {a}");
assert!(b.contains("k=B"), "partition B missing: {b}");
assert!(!b.contains("k=A"), "partition A leaked into B: {b}");
let mut bad = HeaderMap::new();
bad.append(
"set-cookie",
"bad=1; Path=/; SameSite=None; Partitioned".parse().unwrap(),
);
store.ingest("https://siteA.test/", &cdn, &bad);
assert!(store.invalid_partitioned_count() >= 1);
}