import json
import math
import sys
from pathlib import Path
CRATE_ROOT = Path(__file__).resolve().parent.parent
PROCESSED = CRATE_ROOT / "data" / "processed"
OUT_DIR = CRATE_ROOT / "audit" / "uncertainty"
OUT_DIR.mkdir(parents=True, exist_ok=True)
COMPONENTS = {
"cwru": ("CWRU IRIG accelerometer noise floor", 0.005, "BPFI envelope nominal-amplitude scatter (Bechhoefer 2014)", 0.020, "10.1109/TIE.2014.2386299"),
"ims": ("IMS PCB accelerometer noise floor", 0.003, "Health-index nominal-window stddev (Lee 2007)", 0.015, "doi:10.1115/IMECE2007-43127"),
"kuka_lwr": ("KUKA LWR-IV+ joint torque sensor", 0.05, "Jubien 2014 Gautier-style ID residual stddev", 0.30, "doi:10.3182/20140824-6-ZA-1003.02485"),
"femto_st": ("PRONOSTIA accelerometer noise floor", 0.004, "vib-HI nominal-window stddev (Nectoux 2012)", 0.010, "ieee-phm-2012"),
"panda_gaz": ("Panda motor-current torque estimate", 0.10, "Gaz 2019 Table III identified residual stddev", 0.40, "10.1109/LRA.2019.2931248"),
"dlr_justin": ("DLR-class link-side joint torque", 0.03, "Giacomuzzo 2024 measurement-vs-model residual stddev", 0.25, "zenodo:12516500"),
"ur10_kufieta": ("UR10 motor-current torque estimate", 0.20, "Polydoros 2015 ID residual stddev", 0.50, "iros-2015"),
"cheetah3": ("Mini-Cheetah F/T contact estimate", 0.50, "MPC stance-tracking residual stddev (Katz 2019)", 1.0, "10.1109/ICRA.2019.8793865"),
"icub_pushrecovery":("ergoCub foot F/T sensor", 0.10, "Centroidal-momentum tracking error stddev (Romualdi 2024)", 0.50, "humanoids-2024"),
"droid": ("Panda 7-DoF joint encoder", 0.001, "DROID per-trajectory state nominal-window stddev", 0.020, "arXiv:2403.12945"),
"openx": ("Open X aggregated joint encoder", 0.002, "Open X per-episode state nominal-window stddev", 0.030, "arXiv:2310.08864"),
"anymal_parkour":("ANYmal-C joint encoder + IMU", 0.005, "GrandTour outdoor-terrain stance-residual stddev", 0.10, "10.1126/scirobotics.abk2822"),
"unitree_g1": ("Unitree G1 joint encoder", 0.005, "G1 whole-body teleop residual stddev", 0.10, "huggingface:Makolon0321/unitree_g1_block_stack"),
"aloha_static": ("ALOHA ViperX joint encoder", 0.002, "ALOHA fine-bimanual residual stddev", 0.05, "arXiv:2304.13705"),
"icub3_sorrentino":("ergoCub foot F/T sensor (Sorrentino slate)",0.10,"Sorrentino 2025 RAL whole-body torque-control residual stddev", 0.40, "ral-2025"),
"mobile_aloha": ("Mobile ALOHA arm + base encoder", 0.003, "Mobile-ALOHA wipe-wine residual stddev (Fu 2024)", 0.06, "arXiv:2401.02117"),
"so100": ("SO-100 Dynamixel servo position", 0.010, "SO-100 servo amplitude characteristic (LeRobot 2024)", 0.30, "huggingface:lerobot/so100"),
"aloha_static_tape":("ALOHA ViperX joint encoder", 0.002, "tape-attachment fine-bimanual residual stddev", 0.05, "huggingface:lerobot/aloha_static_tape"),
"aloha_static_screw_driver":("ALOHA ViperX joint encoder",0.002, "screw-driver tool-use residual stddev", 0.05, "huggingface:lerobot/aloha_static_screw_driver"),
"aloha_static_pingpong_test":("ALOHA ViperX joint encoder",0.002,"ping-pong rhythmic-transfer residual stddev", 0.05, "huggingface:lerobot/aloha_static_pingpong_test"),
}
def calibration_sem(csv_path: Path) -> dict:
if not csv_path.exists():
return {
"available": False,
"reason": f"{csv_path} absent; run scripts/preprocess_datasets.py",
}
rows = []
with csv_path.open("r") as fh:
header = fh.readline().rstrip("\n").split(",")
try:
col = header.index("residual_norm")
except ValueError:
col = 0
for line in fh:
parts = line.rstrip("\n").split(",")
if not parts or col >= len(parts):
continue
try:
rows.append(float(parts[col]))
except ValueError:
continue
n = len(rows)
if n == 0:
return {"available": False, "reason": "empty CSV"}
cal_n = max(1, n // 5)
cal = rows[:cal_n]
mu = sum(cal) / len(cal)
var = sum((x - mu) ** 2 for x in cal) / max(1, len(cal) - 1)
sd = math.sqrt(var)
sem = sd / math.sqrt(len(cal)) if len(cal) > 0 else 0.0
return {
"available": True,
"n_calibration_samples": len(cal),
"n_total_samples": n,
"mean": mu,
"stddev": sd,
"standard_error_of_mean": sem,
}
def emit_one(slug: str) -> Path:
sensor_label, u_sensor, model_label, u_model, ref = COMPONENTS[slug]
cal = calibration_sem(PROCESSED / f"{slug}.csv")
u_cal = cal.get("standard_error_of_mean", 0.0) if cal["available"] else 0.0
u_combined = math.sqrt(u_sensor ** 2 + u_model ** 2 + u_cal ** 2)
bundle = {
"$schema": "GUM-JCGM-100-2008",
"dataset": slug,
"scope": "single-output residual norm uncertainty",
"components": [
{"name": "u_meas", "label": sensor_label, "value": u_sensor, "type_per_GUM": "B (literature)", "source": ref},
{"name": "u_model", "label": model_label, "value": u_model, "type_per_GUM": "B (literature)", "source": ref},
{"name": "u_cal", "label": "calibration-window SEM (Stage III §3)",
"value": u_cal, "type_per_GUM": "A (computed in-tree)", "source": "data/processed/{}.csv".format(slug)},
],
"combined_standard_uncertainty": u_combined,
"expanded_uncertainty_k2": 2.0 * u_combined,
"calibration_window": cal,
"notes": [
"Type-A component is the standard error of the calibration-window mean, computed in-tree.",
"Type-B components reflect publicly reported sensor / model residual standard deviations from the cited source.",
"Combined uncertainty is the root-sum-square per JCGM 100:2008 §5.1, assuming uncorrelated components.",
"DSFB bounds claims to the residual-emergence grammar; this budget supports interpreting numerical magnitudes, not new performance claims.",
],
}
out_path = OUT_DIR / f"{slug}_budget.json"
with out_path.open("w") as fh:
json.dump(bundle, fh, indent=2, sort_keys=False)
fh.write("\n")
return out_path
def main() -> int:
written = []
for slug in COMPONENTS:
p = emit_one(slug)
print(f"OK {p.name}")
written.append(p)
print(f"\nemitted {len(written)} bundles → {OUT_DIR}")
return 0
if __name__ == "__main__":
sys.exit(main())