import json
import os
import statistics
import time
import urllib.request
BASE = os.environ.get("MR_SERVER_URL", "http://localhost:8181")
NS = "test-e-edges"
def req(method, path, body=None, ns=None, timeout=10):
headers = {"Content-Type": "application/json"}
if ns:
headers["X-Namespace-ID"] = ns
data = json.dumps(body).encode() if body is not None else None
r = urllib.request.Request(f"{BASE}{path}", data=data, headers=headers, method=method)
with urllib.request.urlopen(r, timeout=timeout) as resp:
raw = resp.read()
return json.loads(raw) if raw else {}
def setup():
try:
req("DELETE", "/api/namespaces", {"namespace_id": NS})
except Exception:
pass
req("POST", "/api/namespaces", {"namespace_id": NS,
"description": "Option E edge-case test"})
intents = {
"cancel_order": ["cancel my order", "cancel the order", "cancel order"],
"cancel_subscription": ["cancel subscription", "cancel my subscription", "stop subscription"],
"cancel_meeting": ["cancel meeting", "cancel the meeting", "cancel calendar event"],
"cancel_shipment": ["cancel shipment", "cancel delivery", "cancel my package"],
"cancel_invoice": ["cancel invoice", "void invoice"],
"fire_employee": ["terminate the employee", "fire the worker", "let the employee go"],
}
for iid, phrases in intents.items():
req("POST", "/api/intents", {"id": iid, "phrases": phrases}, ns=NS)
edges = [
("kill", "cancel", "synonym", 0.90),
("abort", "cancel", "synonym", 0.92),
("scrap", "cancel", "synonym", 0.88), ("nuke", "cancel", "synonym", 0.95),
("terminate", "cancel", "synonym", 0.91),
("foo", "bar", "synonym", 0.95),
]
for f, t, kind, w in edges:
req("POST", "/api/layers/l1/edges",
{"from": f, "to": t, "kind": kind, "weight": w}, ns=NS)
def route(query, grounded_l1=False):
res = req("POST", "/api/resolve",
{"query": query, "log": False}, ns=NS)
intents = res.get("intents") or []
top = intents[0]["id"] if intents else "(none)"
return top, intents, res.get("routing_us", 0)
def check(label, query, expected, grounded_l1=True):
top, ranked, _ = route(query, grounded_l1)
ok = top == expected
flag = "✓" if ok else "✗"
print(f" {flag} {label:<48s} {query!r}")
print(f" expected: {expected:<25s} got: {top}")
if not ok:
print(f" top-3: {[m['id'] for m in ranked[:3]]}")
return ok
def test_pollution():
print("\n=== TEST 1: Pollution — many intents share the canonical word ===")
print("'kill' is OOV; substitutes to 'cancel' which appears in 5 intents.")
print("Distinguishing word in query (order/meeting/etc.) must still win.\n")
results = []
results.append(check("kill the order", "kill the order", "cancel_order"))
results.append(check("kill the meeting", "kill that meeting", "cancel_meeting"))
results.append(check("nuke the invoice", "nuke this invoice", "cancel_invoice"))
results.append(check("abort the shipment", "abort the shipment now", "cancel_shipment"))
return all(results)
def test_distinctive_preservation():
print("\n=== TEST 2: Distinctive vocabulary preserved ===")
print("'terminate' IS in fire_employee seeds (in L2 vocab).")
print("Even though there's a terminate→cancel synonym, must NOT substitute.\n")
return check("terminate the employee", "terminate the employee", "fire_employee")
def test_threshold():
print("\n=== TEST 3: Synonym weight threshold (0.90) ===")
print("'scrap'→'cancel' has weight 0.88 — below threshold. Should NOT fire.\n")
top, _, _ = route("scrap the order", grounded_l1=True)
print(f" 'scrap the order' → {top}")
return top == "cancel_order"
def test_case_sensitivity():
print("\n=== TEST 4: Case sensitivity ===")
print("'Kill' (capitalized) should behave same as 'kill'.\n")
a = check("Kill (capitalized)", "Kill the order", "cancel_order")
b = check("KILL (uppercase)", "KILL THE MEETING", "cancel_meeting")
return a and b
def test_dead_synonym():
print("\n=== TEST 5: Synonym to non-L2 target → no-op ===")
print("'foo'→'bar' but 'bar' isn't in any seed. Substitution must not happen.\n")
top, _, _ = route("foo the order", grounded_l1=True)
print(f" 'foo the order' → {top}")
return top == "cancel_order"
def test_latency_overhead():
print("\n=== TEST 6: Latency overhead ===")
print("Compare server-reported routing_us with and without grounded_l1.\n")
queries = ["cancel my order", "kill the meeting", "abort the shipment",
"cancel subscription", "nuke this invoice"] * 20 off, on = [], []
for q in queries:
_, _, us_off = route(q, grounded_l1=False)
_, _, us_on = route(q, grounded_l1=True)
off.append(us_off); on.append(us_on)
p50_off = statistics.median(off); p50_on = statistics.median(on)
print(f" grounded_l1=false p50 = {p50_off:.0f} µs")
print(f" grounded_l1=true p50 = {p50_on:.0f} µs")
print(f" delta = {p50_on - p50_off:+.0f} µs")
return abs(p50_on - p50_off) < 50
def main():
setup()
results = {
"pollution": test_pollution(),
"distinctive": test_distinctive_preservation(),
"threshold": test_threshold(),
"case": test_case_sensitivity(),
"dead_synonym": test_dead_synonym(),
"latency": test_latency_overhead(),
}
print(f"\n══ Summary ══════════════════════════════")
for name, ok in results.items():
print(f" {'✓' if ok else '✗'} {name}")
passed = sum(1 for v in results.values() if v)
total = len(results)
print(f"\n {passed} / {total} passed")
try:
req("DELETE", "/api/namespaces", {"namespace_id": NS})
except Exception:
pass
return 0 if passed == total else 1
if __name__ == "__main__":
import sys; sys.exit(main())