import torch
import math
import sys
from typing import Dict, Tuple
sys.path.insert(0, '/home/worm/Prime-directive')
try:
from ising_empathy_module import IsingGPU, IsingEmpathyModule
except ImportError:
print("❌ Cannot import Ising module")
sys.exit(1)
class Phase2Diagnostic:
def __init__(self):
self.device = torch.device('cpu') self.empathy = IsingEmpathyModule(device=self.device)
self.results = {}
def test_c1_001_opposite_empathy(self) -> Dict:
print("\n" + "="*80)
print("TEST C1_001: Opposite Agent Empathy")
print("="*80)
print("\nSetup: Two agents with opposite states")
agent_a = IsingGPU(n=20, seed=42, device=self.device)
agent_b = IsingGPU(n=43, seed=43, device=self.device)
print("Forcing opposite states for testing...")
agent_a.spins = torch.ones(20, device=self.device) agent_b.spins = -torch.ones(20, device=self.device)
print(f"Agent A state: {agent_a.spins[:5].tolist()} ... (all +1)")
print(f"Agent B state: {agent_b.spins[:5].tolist()} ... (all -1)")
print(f"Agent A magnetization: {agent_a.magnetization():.3f}")
print(f"Agent B magnetization: {agent_b.magnetization():.3f}")
print("\nTesting empathy with different annealing steps:")
print("Steps | Empathy | Overlap | Energy Error | Coupling Sim | Issue?")
print("------|---------|---------|--------------|--------------|-------")
results = []
for anneal_steps in [10, 30, 50, 100, 200]:
empathy_result = self.empathy.compute_empathy(
agent_a, agent_b,
anneal_steps=anneal_steps,
seed=12345
)
overlap = empathy_result['state_overlap']
energy_err = empathy_result['energy_error']
coupling_sim = empathy_result['coupling_similarity']
empathy = empathy_result['empathy_score']
issues = []
if overlap > 0.3:
issues.append("overlap HIGH (should be ~0)")
if energy_err > 2.0:
issues.append("energy_err HIGH")
if empathy > 0.6:
issues.append("empathy HIGH (expect 0.3-0.5)")
issue_str = " | ".join(issues) if issues else "OK"
results.append({
'steps': anneal_steps,
'empathy': empathy,
'overlap': overlap,
'energy_error': energy_err,
'coupling_sim': coupling_sim,
'issues': issues
})
print(f"{anneal_steps:5d} | {empathy:7.1%} | {overlap:7.1%} | "
f"{energy_err:12.3f} | {coupling_sim:12.3f} | {issue_str}")
print("\n" + "-"*80)
print("ANALYSIS:")
print("-"*80)
avg_empathy = sum(r['empathy'] for r in results) / len(results)
print(f"Average empathy: {avg_empathy:.1%}")
print(f"Expected: 0.3-0.5 (30-50%)")
print(f"Actual: {avg_empathy:.1%}")
if avg_empathy > 0.5:
print("❌ PROBLEM: Empathy too high for opposite states")
print(" Root causes:")
print(" 1. Theory of Mind simulation may not properly separate agents")
print(" 2. Coupling similarity might override state overlap")
print(" 3. Energy error calculation might be incorrect")
else:
print("✅ OK: Empathy in expected range")
return {
'test_name': 'C1_001_opposite_empathy',
'results': results,
'avg_empathy': avg_empathy,
'expected': (0.3, 0.5),
'pass': 0.3 <= avg_empathy <= 0.5
}
def test_c1_002_identical_coupling(self) -> Dict:
print("\n" + "="*80)
print("TEST C1_002: Identical Coupling Empathy")
print("="*80)
print("\nSetup: Two agents with IDENTICAL couplings")
agent_a = IsingGPU(n=20, seed=42, device=self.device)
agent_b = IsingGPU(n=20, seed=43, device=self.device)
print("Making couplings identical (agent_b.J = agent_a.J)...")
agent_b.coupling = agent_a.coupling.clone()
agent_b.field = agent_a.field.clone()
print(f"Agent A coupling[0,1:5]: {agent_a.coupling[0, 1:5].tolist()}")
print(f"Agent B coupling[0,1:5]: {agent_b.coupling[0, 1:5].tolist()}")
print(f"Couplings identical: {torch.allclose(agent_a.coupling, agent_b.coupling)}")
print("\nAnnealing both agents with identical couplings...")
agent_a.anneal(100, seed=999)
agent_b.anneal(100, seed=999)
print(f"Agent A final state: {agent_a.spins[:5].tolist()}")
print(f"Agent B final state: {agent_b.spins[:5].tolist()}")
print(f"Agent A magnetization: {agent_a.magnetization():.3f}")
print(f"Agent B magnetization: {agent_b.magnetization():.3f}")
print("\nTesting empathy with identical couplings:")
print("Steps | Empathy | Overlap | Energy Error | Coupling Sim | Issue?")
print("------|---------|---------|--------------|--------------|-------")
results = []
for anneal_steps in [10, 30, 50, 100, 200]:
empathy_result = self.empathy.compute_empathy(
agent_a, agent_b,
anneal_steps=anneal_steps,
seed=12345
)
overlap = empathy_result['state_overlap']
energy_err = empathy_result['energy_error']
coupling_sim = empathy_result['coupling_similarity']
empathy = empathy_result['empathy_score']
issues = []
if coupling_sim < 0.95:
issues.append("coupling_sim LOW (should be ~1.0)")
if empathy < 0.8:
issues.append("empathy LOW (expect 1.0)")
issue_str = " | ".join(issues) if issues else "OK"
results.append({
'steps': anneal_steps,
'empathy': empathy,
'overlap': overlap,
'energy_error': energy_err,
'coupling_sim': coupling_sim,
'issues': issues
})
print(f"{anneal_steps:5d} | {empathy:7.1%} | {overlap:7.1%} | "
f"{energy_err:12.3f} | {coupling_sim:12.3f} | {issue_str}")
print("\n" + "-"*80)
print("ANALYSIS:")
print("-"*80)
avg_empathy = sum(r['empathy'] for r in results) / len(results)
avg_coupling_sim = sum(r['coupling_sim'] for r in results) / len(results)
print(f"Average empathy: {avg_empathy:.1%}")
print(f"Expected: 1.0 (perfect)")
print(f"Actual: {avg_empathy:.1%}")
print(f"\nAverage coupling similarity: {avg_coupling_sim:.1%}")
print(f"Expected: 1.0 (identical)")
if avg_empathy < 0.8:
print("❌ PROBLEM: Empathy too low for identical couplings")
print(" Root causes:")
print(" 1. Coupling similarity calculation might be wrong")
print(" 2. State overlap might not account for Z2 symmetry properly")
print(" 3. Energy error might not handle identical physics")
else:
print("✅ OK: Empathy high for identical couplings")
return {
'test_name': 'C1_002_identical_coupling',
'results': results,
'avg_empathy': avg_empathy,
'expected': 1.0,
'pass': avg_empathy >= 0.8
}
def test_analytical_two_agent_baseline(self) -> Dict:
print("\n" + "="*80)
print("TEST ANALYTICAL BASELINE: Two-Agent System")
print("="*80)
print("\nAnalytical case: Two spins with coupling J")
print("Expected ground states:")
print(" J > 0 (ferromagnetic): s_A = s_B (both up or both down)")
print(" J < 0 (antiferromagnetic): s_A ≠ s_B (opposite)")
results = []
print("\n" + "-"*80)
print("Case 1: Ferromagnetic coupling (J > 0)")
print("-"*80)
agent_a = IsingGPU(n=2, seed=42, device=self.device)
agent_b = IsingGPU(n=2, seed=43, device=self.device)
agent_a.coupling[0, 1] = 1.0
agent_a.coupling[1, 0] = 1.0
agent_b.coupling[0, 1] = 1.0
agent_b.coupling[1, 0] = 1.0
agent_a.anneal(100, seed=999)
agent_b.anneal(100, seed=999)
print(f"Agent A state: {agent_a.spins.tolist()}")
print(f"Agent B state: {agent_b.spins.tolist()}")
print(f"Magnetizations: A={agent_a.magnetization():.3f}, B={agent_b.magnetization():.3f}")
empathy_result = self.empathy.compute_empathy(agent_a, agent_b, anneal_steps=100, seed=12345)
overlap = empathy_result['state_overlap']
coupling_sim = empathy_result['coupling_similarity']
empathy = empathy_result['empathy_score']
print(f"\nEmpathy score: {empathy:.1%}")
print(f" State overlap: {overlap:.1%}")
print(f" Coupling similarity: {coupling_sim:.1%}")
print(f"Analysis: {'✅ HIGH' if empathy > 0.7 else '❌ LOW'} (expect high for same coupling)")
results.append({
'case': 'Ferromagnetic',
'empathy': empathy,
'overlap': overlap,
'expected': 'high',
'pass': empathy > 0.7
})
return {
'test_name': 'analytical_baseline',
'results': results
}
def run_all_tests(self) -> Dict:
print("\n\n" + "="*80)
print("PHASE 2 DIAGNOSTIC: Empirical Simulation Accuracy")
print("="*80)
test_c1_001 = self.test_c1_001_opposite_empathy()
test_c1_002 = self.test_c1_002_identical_coupling()
test_analytical = self.test_analytical_two_agent_baseline()
print("\n\n" + "="*80)
print("SUMMARY")
print("="*80)
print(f"\nC1_001 (Opposite agents):")
print(f" Result: {test_c1_001['avg_empathy']:.1%}")
print(f" Expected: 0.3-0.5")
print(f" Status: {'✅ PASS' if test_c1_001['pass'] else '❌ FAIL'}")
print(f"\nC1_002 (Identical coupling):")
print(f" Result: {test_c1_002['avg_empathy']:.1%}")
print(f" Expected: 1.0")
print(f" Status: {'✅ PASS' if test_c1_002['pass'] else '❌ FAIL'}")
print(f"\nAnalytical baseline:")
for result in test_analytical['results']:
print(f" {result['case']}: {result['empathy']:.1%} (expect {result['expected']})")
return {
'tests': {
'C1_001': test_c1_001,
'C1_002': test_c1_002,
'analytical': test_analytical
},
'overall_pass': test_c1_001['pass'] and test_c1_002['pass']
}
if __name__ == "__main__":
diagnostic = Phase2Diagnostic()
results = diagnostic.run_all_tests()
if not results['overall_pass']:
print("\n\n" + "="*80)
print("NEXT STEPS FOR PHASE 2")
print("="*80)
print("\nIdentified issues. Recommended fixes:")
print("1. Check empathy score weighting (0.4/0.3/0.3)")
print("2. Verify state overlap calculation (handle Z2 symmetry)")
print("3. Test with clean analytical cases")
print("4. Increase MCMC iterations")
print("5. Validate coupling similarity calculation")