use serde_json::Value;
use std::collections::BTreeMap;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
pub fn summarize_json(body: &Value) -> String {
match body {
Value::Object(map) => {
let keys: Vec<&str> = map.keys().take(8).map(String::as_str).collect();
let data_shape = match map.get("data") {
Some(Value::Array(a)) => format!(" data:[{}]", a.len()),
Some(Value::Object(_)) => " data:{obj}".to_string(),
_ => String::new(),
};
let included_shape = match map.get("included") {
Some(Value::Array(a)) => format!(" included:[{}]", a.len()),
_ => String::new(),
};
format!("keys={:?}{}{}", keys, data_shape, included_shape)
}
Value::Array(a) => format!("array[{}]", a.len()),
Value::String(_) | Value::Number(_) | Value::Bool(_) => "scalar".to_string(),
Value::Null => "null".to_string(),
}
}
pub struct FixtureWriter {
dir: PathBuf,
index: BTreeMap<String, IndexEntry>,
}
#[derive(Debug, serde::Serialize)]
struct IndexEntry {
key: String,
source_page: String,
fixture_file: String,
data_array_len: usize,
body_size_bytes: usize,
}
impl FixtureWriter {
pub fn new(dir: impl AsRef<Path>) -> io::Result<Self> {
let dir = dir.as_ref().to_path_buf();
fs::create_dir_all(&dir)?;
Ok(Self {
dir,
index: BTreeMap::new(),
})
}
pub fn write(&mut self, key: &str, body: &Value, source_page: &str) -> io::Result<PathBuf> {
let (path_part, query_part) = key.split_once('?').unwrap_or((key, ""));
let safe = slugify_path(path_part);
let qhash = if query_part.is_empty() {
String::new()
} else {
format!("__q{}", short_hash(query_part))
};
let base = format!("{safe}{qhash}");
let mut fname = format!("{base}.json");
if self.index.get(&fname).is_some_and(|entry| entry.key != key) {
let key_hash = short_hash(key);
fname = format!("{base}__k{key_hash}.json");
let mut n = 2usize;
while self.index.get(&fname).is_some_and(|entry| entry.key != key) {
fname = format!("{base}__k{key_hash}_{n}.json");
n += 1;
}
}
let dst = self.dir.join(&fname);
let pretty =
serde_json::to_string_pretty(body).expect("serde_json::Value is always serializable");
fs::write(&dst, &pretty)?;
let data_array_len = body
.get("data")
.and_then(|d| d.as_array())
.map(|a| a.len())
.unwrap_or(0);
let body_size_bytes = serde_json::to_string(body)
.expect("serde_json::Value is always serializable")
.len();
self.index.insert(
fname.clone(),
IndexEntry {
key: key.to_string(),
source_page: source_page.to_string(),
fixture_file: fname,
data_array_len,
body_size_bytes,
},
);
Ok(dst)
}
pub fn len(&self) -> usize {
self.index.len()
}
pub fn is_empty(&self) -> bool {
self.index.is_empty()
}
pub fn finalize(self) -> io::Result<PathBuf> {
let path = self.dir.join("_index.json");
let entries: Vec<&IndexEntry> = self.index.values().collect();
let json =
serde_json::to_string_pretty(&entries).expect("IndexEntry is always serializable");
fs::write(&path, json)?;
Ok(path)
}
}
fn slugify_path(path: &str) -> String {
let mut slug: String = path
.trim_matches(|c| c == '/' || c == '\\')
.chars()
.map(|c| match c {
'/' | '\\' | ':' => '-',
'[' | ']' | '<' | '>' | '"' | '|' | '?' | '*' => '_',
c if c.is_ascii_control() => '_',
other => other,
})
.collect();
if slug.is_empty() {
slug.push('_');
}
if is_windows_reserved_basename(&slug) {
slug.insert(0, '_');
}
slug
}
fn is_windows_reserved_basename(name: &str) -> bool {
let basename = name
.split('.')
.next()
.unwrap_or(name)
.trim_end_matches([' ', '.'])
.to_ascii_uppercase();
matches!(basename.as_str(), "CON" | "PRN" | "AUX" | "NUL")
|| basename
.strip_prefix("COM")
.and_then(|n| n.parse::<u8>().ok())
.is_some_and(|n| (1..=9).contains(&n))
|| basename
.strip_prefix("LPT")
.and_then(|n| n.parse::<u8>().ok())
.is_some_and(|n| (1..=9).contains(&n))
}
fn short_hash(s: &str) -> String {
const FNV_OFFSET: u64 = 0xcbf29ce484222325;
const FNV_PRIME: u64 = 0x00000100000001b3;
let mut hash = FNV_OFFSET;
for byte in s.as_bytes() {
hash ^= u64::from(*byte);
hash = hash.wrapping_mul(FNV_PRIME);
}
format!("{:08x}", hash & 0xFFFFFFFF)
}
#[cfg(feature = "browser")]
pub mod browser {
use crate::TailFinError;
use night_fury_core::BrowserSession;
use serde_json::Value;
use std::collections::BTreeMap;
pub const PX_COOKIE_NAMES: &[&str] = &["_px3", "_pxvid", "pxcts", "_pxde"];
pub async fn delete_perimeterx_cookies(
session: &BrowserSession,
site_domain: &str,
) -> Result<usize, TailFinError> {
let domains = [format!(".{site_domain}"), site_domain.to_string()];
let mut deleted = 0usize;
for name in PX_COOKIE_NAMES {
for d in &domains {
if session.delete_cookie(name, d).await.is_ok() {
deleted += 1;
}
}
}
Ok(deleted)
}
pub async fn drain_captured_responses_as_map<K, F>(
session: &BrowserSession,
source_page: &str,
seen: &mut BTreeMap<K, (String, Value)>,
key_fn: F,
) -> Result<usize, TailFinError>
where
K: Ord,
F: Fn(&str, &Value) -> Option<K>,
{
let captured = session
.get_captured_responses()
.await
.map_err(|e| TailFinError::Api(format!("get_captured_responses: {e}")))?;
let mut new_count = 0;
for r in captured {
let body: Value = match serde_json::from_str(&r.body) {
Ok(v) => v,
Err(_) => continue,
};
let Some(key) = key_fn(&r.url, &body) else {
continue;
};
if let std::collections::btree_map::Entry::Vacant(e) = seen.entry(key) {
e.insert((source_page.to_string(), body));
new_count += 1;
}
}
Ok(new_count)
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn summarize_json_object_with_data_array() {
let s = summarize_json(&json!({"data": [1, 2, 3], "meta": {}}));
assert!(s.contains("\"data\""), "got {s}");
assert!(s.contains("\"meta\""), "got {s}");
assert!(s.contains("data:[3]"), "got {s}");
}
#[test]
fn summarize_json_object_with_included() {
let s = summarize_json(&json!({
"data": [{"id": 1}],
"included": [{"a": 1}, {"b": 2}, {"c": 3}],
}));
assert!(s.contains("data:[1]"), "got {s}");
assert!(s.contains("included:[3]"), "got {s}");
}
#[test]
fn summarize_json_object_with_data_object_not_array() {
let s = summarize_json(&json!({"data": {"id": "146"}, "meta": {}}));
assert!(s.contains("data:{obj}"), "got {s}");
}
#[test]
fn summarize_json_array_root() {
let s = summarize_json(&json!([1, 2, 3, 4, 5]));
assert_eq!(s, "array[5]");
}
#[test]
fn summarize_json_scalars_and_null() {
assert_eq!(summarize_json(&json!(42)), "scalar");
assert_eq!(summarize_json(&json!("hi")), "scalar");
assert_eq!(summarize_json(&json!(true)), "scalar");
assert_eq!(summarize_json(&json!(null)), "null");
}
#[test]
fn summarize_json_keys_capped_at_eight() {
let body = json!({
"a": 1, "b": 2, "c": 3, "d": 4,
"e": 5, "f": 6, "g": 7, "h": 8,
"i": 9, "j": 10,
});
let s = summarize_json(&body);
assert!(s.contains("\"a\""), "got {s}");
assert!(!s.contains("\"j\""), "got {s}");
}
#[test]
fn fixture_writer_writes_files_and_index() {
let tmp = tempfile::tempdir().unwrap();
let mut fw = FixtureWriter::new(tmp.path()).unwrap();
let p1 = fw
.write(
"/api/v3/symbols/AAPL/ratings",
&json!({"data": [{"id": 1}]}),
"symbol_root",
)
.unwrap();
let p2 = fw
.write(
"/api/v3/symbols/AAPL/dividend_history",
&json!({"data": [{"a": 1}, {"b": 2}]}),
"symbol_root",
)
.unwrap();
assert!(p1.exists());
assert!(p2.exists());
assert_eq!(fw.len(), 2);
let idx_path = fw.finalize().unwrap();
let idx_raw = std::fs::read_to_string(&idx_path).unwrap();
let idx: serde_json::Value = serde_json::from_str(&idx_raw).unwrap();
let arr = idx.as_array().unwrap();
assert_eq!(arr.len(), 2);
let by_key: std::collections::HashMap<&str, &serde_json::Value> = arr
.iter()
.map(|e| (e["key"].as_str().unwrap(), e))
.collect();
assert_eq!(by_key["/api/v3/symbols/AAPL/ratings"]["data_array_len"], 1);
assert_eq!(
by_key["/api/v3/symbols/AAPL/dividend_history"]["data_array_len"],
2
);
assert_eq!(arr[0]["source_page"], "symbol_root");
}
#[test]
fn fixture_writer_dedupes_index_on_duplicate_filename() {
let tmp = tempfile::tempdir().unwrap();
let mut fw = FixtureWriter::new(tmp.path()).unwrap();
fw.write(
"/api/v3/symbols/AAPL/ratings",
&json!({"first": true}),
"src1",
)
.unwrap();
fw.write(
"/api/v3/symbols/AAPL/ratings",
&json!({"second": true}),
"src2",
)
.unwrap();
let idx_path = fw.finalize().unwrap();
let idx_raw = std::fs::read_to_string(&idx_path).unwrap();
let idx: serde_json::Value = serde_json::from_str(&idx_raw).unwrap();
assert_eq!(idx.as_array().unwrap().len(), 1, "dedup'd to single row");
assert_eq!(
idx[0]["source_page"], "src2",
"last write wins (matches the body now on disk)"
);
}
#[test]
fn slugify_replaces_colons() {
let tmp = tempfile::tempdir().unwrap();
let mut fw = FixtureWriter::new(tmp.path()).unwrap();
let p = fw.write("typesense:portfolio", &json!({}), "p").unwrap();
let fname = p.file_name().unwrap().to_str().unwrap();
assert!(!fname.contains(':'), "no colons in filename: {fname}");
}
#[test]
fn fixture_writer_disambiguates_query_strings() {
let tmp = tempfile::tempdir().unwrap();
let mut fw = FixtureWriter::new(tmp.path()).unwrap();
let p1 = fw
.write(
"/api/v3/symbols/AAPL/estimates?period_type=quarterly",
&json!({"x": 1}),
"p",
)
.unwrap();
let p2 = fw
.write(
"/api/v3/symbols/AAPL/estimates?period_type=annual",
&json!({"x": 2}),
"p",
)
.unwrap();
assert_ne!(
p1.file_name(),
p2.file_name(),
"different queries must yield different files"
);
}
#[test]
fn fixture_writer_disambiguates_distinct_keys_with_same_slug() {
let tmp = tempfile::tempdir().unwrap();
let mut fw = FixtureWriter::new(tmp.path()).unwrap();
let p1 = fw
.write("/api/foo:bar", &json!({"colon": true}), "p1")
.unwrap();
let p2 = fw
.write("/api/foo/bar", &json!({"slash": true}), "p2")
.unwrap();
assert_ne!(
p1.file_name(),
p2.file_name(),
"distinct keys that slugify identically must not overwrite each other"
);
let idx_path = fw.finalize().unwrap();
let idx_raw = std::fs::read_to_string(&idx_path).unwrap();
let idx: serde_json::Value = serde_json::from_str(&idx_raw).unwrap();
assert_eq!(idx.as_array().unwrap().len(), 2);
}
#[test]
fn fixture_writer_disambiguates_fallback_filename_collision() {
let tmp = tempfile::tempdir().unwrap();
let mut fw = FixtureWriter::new(tmp.path()).unwrap();
let colliding_key = "/api/foo/bar";
let hash = short_hash(colliding_key);
fw.write("/api/foo:bar", &json!({"base": true}), "p1")
.unwrap();
fw.write(
&format!("/api/foo-bar__k{hash}"),
&json!({"natural_suffix": true}),
"p2",
)
.unwrap();
let p3 = fw
.write(colliding_key, &json!({"fallback": true}), "p3")
.unwrap();
assert_eq!(
p3.file_name().unwrap().to_str().unwrap(),
format!("api-foo-bar__k{hash}_2.json")
);
let idx_path = fw.finalize().unwrap();
let idx_raw = std::fs::read_to_string(&idx_path).unwrap();
let idx: serde_json::Value = serde_json::from_str(&idx_raw).unwrap();
assert_eq!(idx.as_array().unwrap().len(), 3);
}
#[test]
fn short_hash_is_stable() {
assert_eq!(short_hash("period_type=quarterly"), "265d2857");
}
#[test]
fn slugify_replaces_all_forbidden_filename_chars() {
let slug = slugify_path("/a\\b:c[d]e<f>g\"h|i?j*k\u{0007}/");
for forbidden in [
'/', '\\', ':', '[', ']', '<', '>', '"', '|', '?', '*', '\u{0007}',
] {
assert!(
!slug.contains(forbidden),
"slug contains forbidden char {forbidden:?}: {slug}"
);
}
}
#[test]
fn fixture_writer_filename_is_filesystem_safe() {
let tmp = tempfile::tempdir().unwrap();
let mut fw = FixtureWriter::new(tmp.path()).unwrap();
let p = fw
.write(
"/api/v3/symbols/AAPL/sec:filings\\bad<name>|x?y*\u{0007}?include=form_type&page[size]=10",
&json!({}),
"p",
)
.unwrap();
let fname = p.file_name().unwrap().to_str().unwrap();
for forbidden in [
'/', '\\', ':', '[', ']', '<', '>', '"', '|', '?', '*', '\u{0007}',
] {
assert!(
!fname.contains(forbidden),
"filename contains forbidden char {forbidden:?}: {fname}"
);
}
}
#[test]
fn fixture_writer_handles_root_and_windows_reserved_names() {
let tmp = tempfile::tempdir().unwrap();
let mut fw = FixtureWriter::new(tmp.path()).unwrap();
let root = fw.write("/", &json!({}), "root").unwrap();
assert_eq!(root.file_name().unwrap().to_str().unwrap(), "_.json");
let con = fw.write("/CON", &json!({}), "reserved").unwrap();
assert_eq!(con.file_name().unwrap().to_str().unwrap(), "_CON.json");
let com1 = fw.write("/com1", &json!({}), "reserved").unwrap();
assert_eq!(com1.file_name().unwrap().to_str().unwrap(), "_com1.json");
let prn = fw.write("/PRN", &json!({}), "reserved").unwrap();
assert_eq!(prn.file_name().unwrap().to_str().unwrap(), "_PRN.json");
let lpt3 = fw.write("/lpt3", &json!({}), "reserved").unwrap();
assert_eq!(lpt3.file_name().unwrap().to_str().unwrap(), "_lpt3.json");
}
#[test]
fn fixture_writer_create_dir_is_idempotent() {
let tmp = tempfile::tempdir().unwrap();
let nested = tmp.path().join("a/b/c/discovered");
let mut fw = FixtureWriter::new(&nested).unwrap();
fw.write("/api/x", &json!({"data": []}), "src").unwrap();
let mut fw2 = FixtureWriter::new(&nested).unwrap();
fw2.write("/api/y", &json!({"data": []}), "src").unwrap();
fw2.finalize().unwrap();
}
}