use std::path::{Path, PathBuf};
use axum::{
http::{header, HeaderMap, StatusCode},
response::{IntoResponse, Response},
};
use serde::Deserialize;
const SHIPPED_RENDERERS: &[&str] = &["svg"];
const BUILTIN_MAPS: &[&str] = &["states", "counties", "countries"];
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Registry {
pub version: u32,
pub geographies: Vec<Geography>,
}
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Geography {
pub id: String,
pub label: String,
pub group: String,
pub kind: String, #[serde(default)]
pub us: bool,
pub available: bool,
#[serde(default)]
pub note: String,
#[serde(default)]
pub map: Option<String>,
#[serde(default)]
pub renderer: Option<String>,
#[serde(default)]
pub object: Option<String>,
#[serde(default)]
pub key_kind: Option<String>,
#[serde(default)]
pub features: Option<u64>,
#[serde(default)]
pub topojson: Option<String>,
#[serde(default)]
pub families: Vec<Family>,
}
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Family {
pub id: String,
pub label: String,
pub file: String,
}
impl Registry {
pub fn parse(bytes: &[u8]) -> Result<Registry, String> {
let reg: Registry = serde_json::from_slice(bytes)
.map_err(|e| format!("registry is not valid JSON: {e}"))?;
match reg.validate() {
Ok(()) => Ok(reg),
Err(errs) => Err(format!("invalid registry:\n - {}", errs.join("\n - "))),
}
}
pub fn validate(&self) -> Result<(), Vec<String>> {
let mut errs = Vec::new();
if self.version != 1 {
errs.push(format!(
"unknown registry version {} (expected 1)",
self.version
));
}
let mut seen = std::collections::HashSet::new();
for g in &self.geographies {
let id = &g.id;
if !seen.insert(id) {
errs.push(format!("duplicate geography id {id:?}"));
}
match g.kind.as_str() {
"builtin" => {
match &g.map {
Some(m) if BUILTIN_MAPS.contains(&m.as_str()) => {}
Some(m) => errs.push(format!(
"{id:?}: builtin map {m:?} is not one the server injects {BUILTIN_MAPS:?}"
)),
None => errs.push(format!("{id:?}: builtin geography needs a \"map\"")),
}
if !g.available {
errs.push(format!("{id:?}: builtin maps are always available"));
}
}
"static" => {
if g.object.is_none() {
errs.push(format!("{id:?}: static geography needs an \"object\""));
}
if g.key_kind.is_none() {
errs.push(format!("{id:?}: static geography needs a \"key_kind\""));
}
match &g.renderer {
Some(_) => {}
None => errs.push(format!("{id:?}: static geography needs a \"renderer\"")),
}
if g.available {
if let Some(r) = &g.renderer {
if !SHIPPED_RENDERERS.contains(&r.as_str()) {
errs.push(format!(
"{id:?}: available but renderer {r:?} isn't shipped {SHIPPED_RENDERERS:?}"
));
}
}
if g.topojson.is_none() {
errs.push(format!(
"{id:?}: available static layer has no \"topojson\""
));
}
if g.families.is_empty() {
errs.push(format!(
"{id:?}: available static layer has no metric families"
));
}
}
}
other => errs.push(format!("{id:?}: unknown kind {other:?} (builtin|static)")),
}
}
if errs.is_empty() {
Ok(())
} else {
Err(errs)
}
}
pub fn summary(&self) -> String {
let (live, grey): (Vec<_>, Vec<_>) = self.geographies.iter().partition(|g| g.available);
format!(
"geo registry: {} live ({}), {} greyed",
live.len(),
live.iter()
.map(|g| g.id.as_str())
.collect::<Vec<_>>()
.join(", "),
grey.len()
)
}
}
static GEO_DIR: std::sync::OnceLock<Option<PathBuf>> = std::sync::OnceLock::new();
fn geo_dir() -> Option<&'static Path> {
GEO_DIR
.get_or_init(|| std::env::var_os("TAXA_GEO_DIR").map(Into::into))
.as_deref()
}
pub fn set_geo_dir(dir: impl Into<PathBuf>) {
let _ = GEO_DIR.set(Some(dir.into()));
}
pub fn geo_dir_path() -> Option<&'static Path> {
geo_dir()
}
pub(crate) fn serve_geo_asset(rel: &str, req_headers: &HeaderMap) -> Option<Response> {
read_geo_asset(geo_dir()?, rel, req_headers)
}
fn read_geo_asset(dir: &Path, rel: &str, req_headers: &HeaderMap) -> Option<Response> {
let file = rel.strip_prefix("vendor/geo/")?;
if file.is_empty() || file.split('/').any(|s| s == ".." || s.is_empty()) {
return None; }
let path = dir.join(file);
let meta = std::fs::metadata(&path).ok()?;
if !meta.is_file() {
return None;
}
let etag = weak_etag(&meta);
let inm = req_headers
.get(header::IF_NONE_MATCH)
.and_then(|v| v.to_str().ok());
if inm.is_some_and(|v| crate::etag_matches(v, &etag)) {
return Some((StatusCode::NOT_MODIFIED, [(header::ETAG, etag)]).into_response());
}
let bytes = std::fs::read(&path).ok()?;
let mime = mime_guess::from_path(&path).first_or_octet_stream();
Some(
(
[
(header::CONTENT_TYPE, mime.as_ref()),
(header::CACHE_CONTROL, "no-cache"),
(header::ETAG, &etag),
],
bytes,
)
.into_response(),
)
}
fn weak_etag(meta: &std::fs::Metadata) -> String {
let mtime = meta
.modified()
.ok()
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
.map(|d| d.as_secs())
.unwrap_or(0);
format!("W/\"{}-{}\"", meta.len(), mtime)
}
pub fn check_registry(dir: &Path) -> Result<Option<String>, String> {
let path = dir.join("registry.json");
match std::fs::read(&path) {
Ok(bytes) => Registry::parse(&bytes).map(|r| Some(r.summary())),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(e) => Err(format!("cannot read {}: {e}", path.display())),
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn parse(v: serde_json::Value) -> Result<Registry, String> {
Registry::parse(v.to_string().as_bytes())
}
fn good_static() -> serde_json::Value {
json!({
"id": "tracts", "label": "Census tracts", "group": "High-resolution",
"kind": "static", "us": true, "renderer": "svg", "object": "tracts",
"key_kind": "geoid11", "features": 85000,
"topojson": "/static/vendor/geo/tracts.json",
"families": [{"id": "acs", "label": "ACS", "file": "/static/vendor/geo/tracts__acs.json"}],
"available": true, "note": ""
})
}
#[test]
fn accepts_a_well_formed_registry() {
let reg = parse(json!({
"version": 1,
"geographies": [
{"id": "counties", "label": "Counties", "group": "National",
"kind": "builtin", "map": "counties", "us": true, "available": true},
good_static(),
{"id": "blocks", "label": "Blocks", "group": "High-resolution",
"kind": "static", "us": true, "renderer": "webgl", "object": "blocks",
"key_kind": "geoid15", "features": 8100000, "topojson": null,
"families": [], "available": false, "note": "needs WebGL"}
]
}))
.expect("valid registry");
assert_eq!(reg.geographies.len(), 3);
}
#[test]
fn rejects_unknown_fields() {
let err = parse(json!({
"version": 1,
"geographies": [{"id": "counties", "label": "C", "group": "N",
"kind": "builtin", "map": "counties", "available": true, "avaliable": true}]
}))
.unwrap_err();
assert!(
err.contains("avaliable") || err.contains("unknown field"),
"{err}"
);
}
#[test]
fn rejects_available_static_without_geometry_or_families() {
let mut g = good_static();
g["topojson"] = json!(null);
g["families"] = json!([]);
let err = parse(json!({"version": 1, "geographies": [g]})).unwrap_err();
assert!(err.contains("no \"topojson\""), "{err}");
assert!(err.contains("no metric families"), "{err}");
}
#[test]
fn rejects_available_layer_with_unshipped_renderer() {
let mut g = good_static();
g["renderer"] = json!("webgl");
let err = parse(json!({"version": 1, "geographies": [g]})).unwrap_err();
assert!(err.contains("isn't shipped"), "{err}");
}
#[test]
fn rejects_duplicate_ids_and_bad_kinds_and_bad_builtin_map() {
let err = parse(json!({
"version": 1,
"geographies": [
{"id": "x", "label": "X", "group": "G", "kind": "builtin", "map": "nope", "available": true},
{"id": "x", "label": "X2", "group": "G", "kind": "weird", "available": true}
]
}))
.unwrap_err();
assert!(err.contains("duplicate geography id"), "{err}");
assert!(err.contains("not one the server injects"), "{err}");
assert!(err.contains("unknown kind"), "{err}");
}
#[test]
fn serves_geo_file_with_weak_etag_and_304() {
use axum::http::HeaderValue;
let dir = std::env::temp_dir().join("taxa_geo_test_serve");
let _ = std::fs::create_dir_all(&dir);
std::fs::write(
dir.join("registry.json"),
br#"{"version":1,"geographies":[]}"#,
)
.unwrap();
let resp = read_geo_asset(&dir, "vendor/geo/registry.json", &HeaderMap::new()).unwrap();
assert_eq!(resp.status(), StatusCode::OK);
let etag = resp
.headers()
.get(header::ETAG)
.unwrap()
.to_str()
.unwrap()
.to_string();
assert!(etag.starts_with("W/\""), "weak validator: {etag}");
assert_eq!(
resp.headers().get(header::CACHE_CONTROL).unwrap(),
"no-cache"
);
let mut h = HeaderMap::new();
h.insert(header::IF_NONE_MATCH, HeaderValue::from_str(&etag).unwrap());
let resp = read_geo_asset(&dir, "vendor/geo/registry.json", &h).unwrap();
assert_eq!(resp.status(), StatusCode::NOT_MODIFIED);
assert!(read_geo_asset(&dir, "vendor/geo/../secrets", &HeaderMap::new()).is_none());
assert!(read_geo_asset(&dir, "vendor/geo/nope.json", &HeaderMap::new()).is_none());
assert!(read_geo_asset(&dir, "viz/app.js", &HeaderMap::new()).is_none());
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn reports_all_violations_at_once() {
let err = parse(json!({
"version": 2,
"geographies": [{"id": "t", "label": "T", "group": "G", "kind": "static",
"renderer": "svg", "available": false}]
}))
.unwrap_err();
assert!(err.contains("version"), "{err}");
assert!(err.contains("needs an \"object\""), "{err}");
}
}