# Vanilla Gradient Descent
while True:
weights_grad=evaluate_gradient(loss_fun,data,weights)
weights+=-step_size*weights_grad # perform parameter update
# Adam (almost)
first_moment=0
second_moment=0
while True:
dx=compute_gradient(x)
first_moment=beta1*first_moment+(1-beta1)*dx
second_moment=beta2*second_moment+(1-beta2)*dx*dx
x-=learning_rate*first_moment/(np.sqrt(second_moment)+1e-7)
while True:
data_batch=dataset.sample_data_batch()
loss=network.forwadr(data_batch)
dx=network.backward()
x+=-learning_rate*dx
x_test=0.995*x_test+0.005*x # use for test set