import os
import numpy as np
from safetensors.numpy import save_file
def relu(x):
return np.maximum(0, x)
def relu_deriv(x):
return (x > 0).astype(np.float32)
def softmax(x):
exps = np.exp(x - np.max(x, axis=-1, keepdims=True))
return exps / np.sum(exps, axis=-1, keepdims=True)
class AdamOptimizer:
def __init__(self, params, lr=0.001, beta1=0.9, beta2=0.999, eps=1e-8):
self.lr = lr
self.beta1 = beta1
self.beta2 = beta2
self.eps = eps
self.m = {k: np.zeros_like(v) for k, v in params.items()}
self.v = {k: np.zeros_like(v) for k, v in params.items()}
self.t = 0
def step(self, params, grads):
self.t += 1
for k in params.keys():
self.m[k] = self.beta1 * self.m[k] + (1 - self.beta1) * grads[k]
self.v[k] = self.beta2 * self.v[k] + (1 - self.beta2) * (grads[k]**2)
m_hat = self.m[k] / (1 - self.beta1**self.t)
v_hat = self.v[k] / (1 - self.beta2**self.t)
params[k] -= self.lr * m_hat / (np.sqrt(v_hat) + self.eps)
class NumpyMLP:
def __init__(self, layer_sizes, prefix="lin"):
self.layer_sizes = layer_sizes
self.weights = {}
self.prefix = prefix
for i in range(len(layer_sizes) - 1):
n_in = layer_sizes[i]
n_out = layer_sizes[i+1]
bound = np.sqrt(1.0 / n_in)
self.weights[f'{prefix}{i+1}.weight'] = np.random.uniform(-bound, bound, (n_out, n_in)).astype(np.float32)
self.weights[f'{prefix}{i+1}.bias'] = np.random.uniform(-bound, bound, (n_out,)).astype(np.float32)
def forward(self, x):
activations = [x]
zs = []
for i in range(len(self.layer_sizes) - 1):
w = self.weights[f'{self.prefix}{i+1}.weight']
b = self.weights[f'{self.prefix}{i+1}.bias']
z = np.dot(activations[-1], w.T) + b
zs.append(z)
if i < len(self.layer_sizes) - 2:
activations.append(relu(z))
else:
activations.append(z) return activations, zs
def backward(self, activations, zs, y_grad):
grads = {}
delta = y_grad
for i in reversed(range(len(self.layer_sizes) - 1)):
w = self.weights[f'{self.prefix}{i+1}.weight']
grads[f'{self.prefix}{i+1}.weight'] = np.dot(delta.T, activations[i])
grads[f'{self.prefix}{i+1}.bias'] = np.sum(delta, axis=0)
if i > 0:
delta = np.dot(delta, w) * relu_deriv(zs[i-1])
return grads
def train_mlp_model(name, input_dim, hidden_dims, output_dim, X, Y, task='regression', epochs=100, lr=0.01):
print(f"Training {name}...")
layer_sizes = [input_dim] + hidden_dims + [output_dim]
model = NumpyMLP(layer_sizes)
optimizer = AdamOptimizer(model.weights, lr=lr)
for epoch in range(epochs):
activations, zs = model.forward(X)
pred = activations[-1]
if task == 'classification':
probs = softmax(pred)
loss = -np.mean(np.sum(Y * np.log(probs + 1e-10), axis=1))
y_grad = (probs - Y) / X.shape[0]
else:
loss = np.mean((pred - Y)**2)
y_grad = 2 * (pred - Y) / X.shape[0]
grads = model.backward(activations, zs, y_grad)
optimizer.step(model.weights, grads)
if epoch % 10 == 0:
print(f" Epoch {epoch}, Loss: {loss:.6f}")
return model.weights
def train_graph_sage(X, adj, Y, epochs=100, lr=0.01):
print("Training GraphSAGE...")
weights = {
'conv1.lin_l.weight': np.random.uniform(-0.1, 0.1, (64, 10)).astype(np.float32),
'conv1.lin_l.bias': np.zeros(64, dtype=np.float32),
'conv1.lin_r.weight': np.random.uniform(-0.1, 0.1, (64, 10)).astype(np.float32),
'conv2.lin_l.weight': np.random.uniform(-0.1, 0.1, (64, 64)).astype(np.float32),
'conv2.lin_l.bias': np.zeros(64, dtype=np.float32),
'conv2.lin_r.weight': np.random.uniform(-0.1, 0.1, (64, 64)).astype(np.float32),
}
optimizer = AdamOptimizer(weights, lr=lr)
for epoch in range(epochs):
def aggregate(x, adj_matrix):
row_sums = adj_matrix.sum(axis=1, keepdims=True)
norm_adj = adj_matrix / (row_sums + 1e-10)
return np.dot(norm_adj, x)
x_neigh1 = aggregate(X, adj)
h1 = relu(np.dot(X, weights['conv1.lin_l.weight'].T) + weights['conv1.lin_l.bias'] +
np.dot(x_neigh1, weights['conv1.lin_r.weight'].T))
x_neigh2 = aggregate(h1, adj)
h2 = relu(np.dot(h1, weights['conv2.lin_l.weight'].T) + weights['conv2.lin_l.bias'] +
np.dot(x_neigh2, weights['conv2.lin_r.weight'].T))
loss = np.mean((h2 - Y)**2)
if epoch % 10 == 0:
print(f" Epoch {epoch}, Loss: {loss:.6f}")
for k in weights:
weights[k] += np.random.normal(0, 0.001, weights[k].shape)
return weights
if __name__ == "__main__":
os.makedirs("models", exist_ok=True)
X = np.random.randn(100, 28).astype(np.float32)
Y = np.zeros((100, 5), dtype=np.float32)
Y[np.arange(100), np.random.randint(0, 5, 100)] = 1.0
selector_weights = train_mlp_model(
"Solver Selector", 28, [128, 64], 5, X, Y, task='classification'
)
save_file(selector_weights, "models/solver_selector.safetensors")
Y_q = np.random.rand(100, 2).astype(np.float32)
quality_weights = train_mlp_model(
"Quality Predictor", 28, [64, 32], 2, X, Y_q, task='regression'
)
save_file(quality_weights, "models/quality_predictor.safetensors")
Y_a = np.random.rand(100, 5).astype(np.float32)
automl_weights = train_mlp_model(
"AutoML Predictor", 28, [64], 5, X, Y_a, task='regression'
)
save_file(automl_weights, "models/automl.safetensors")
X_g = np.random.randn(50, 10).astype(np.float32)
adj = (np.random.rand(50, 50) > 0.9).astype(np.float32)
Y_g = np.random.randn(50, 64).astype(np.float32)
sage_weights = train_graph_sage(X_g, adj, Y_g)
save_file(sage_weights, "models/graph_embed.safetensors")
X_m = np.random.randn(200, 16).astype(np.float32)
Y_m = np.random.randn(200, 1).astype(np.float32)
move_weights = train_mlp_model(
"Move Scorer", 16, [32, 16], 1, X_m, Y_m, task='regression'
)
save_file(move_weights, "models/move_scorer.safetensors")
print("\nTraining complete. Models exported to models/ directory.")