print("the function will be y_data="+str(b)+"+"+str(w)+"*x_data")
error=0.0 for i inrange(10): error+=abs(y_data[i]-(b+w*x_data[i])) average_error=error/10 print("the average error is "+str(average_error))
the function will be y_data=-inf+nan*x_data
the average error is nan
上面的数据输出处于隐藏状态,点击即可显示
2、这里使用李宏毅老师的demo尝试
引入需要的库
1 2 3 4 5 6 7
import matplotlib import matplotlib.pyplot as plt matplotlib.use('Agg') %matplotlib inline import random as random import numpy as np import csv
准备好b、w、loss的图像数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# 生成一组b和w的数据图,方便给gradient descent的过程做标记 x = np.arange(-200,-100,1) # bias y = np.arange(-5,5,0.1) # weight Z = np.zeros((len(x),len(y))) # color X,Y = np.meshgrid(x,y) for i inrange(len(x)): for j inrange(len(y)): b = x[i] w = y[j] # Z[j][i]存储的是loss Z[j][i] = 0 for n inrange(len(x_data)): Z[j][i] = Z[j][i] + (y_data[n] - (b + w * x_data[n]))**2 Z[j][i] = Z[j][i]/len(x_data)
# y_data = b + w * x_data b = -120# initial b w = -4# initial w lr = 0.0000001# learning rate iteration = 100000# 这里直接规定了迭代次数,而不是一直运行到b_grad和w_grad都为0(事实证明这样做不太可行)
# store initial values for plotting,我们想要最终把数据描绘在图上,因此存储过程数据 b_history = [b] w_history = [w]
# iterations for i inrange(iteration): # get new b_grad and w_grad b_grad,w_grad=getGrad(b,w) # update b and w b -= lr * b_grad w -= lr * w_grad #store parameters for plotting b_history.append(b) w_history.append(w)
# y_data = b + w * x_data b = -120# initial b w = -4# initial w lr = 0.000001# learning rate 放大10倍 iteration = 100000# 这里直接规定了迭代次数,而不是一直运行到b_grad和w_grad都为0(事实证明这样做不太可行)
# store initial values for plotting,我们想要最终把数据描绘在图上,因此存储过程数据 b_history = [b] w_history = [w]
# iterations for i inrange(iteration): # get new b_grad and w_grad b_grad,w_grad=getGrad(b,w) # update b and w b -= lr * b_grad w -= lr * w_grad #store parameters for plotting b_history.append(b) w_history.append(w)
# y_data = b + w * x_data b = -120# initial b w = -4# initial w lr = 0.00001# learning rate 放大10倍 iteration = 100000# 这里直接规定了迭代次数,而不是一直运行到b_grad和w_grad都为0(事实证明这样做不太可行)
# store initial values for plotting,我们想要最终把数据描绘在图上,因此存储过程数据 b_history = [b] w_history = [w]
# iterations for i inrange(iteration): # get new b_grad and w_grad b_grad,w_grad=getGrad(b,w) # update b and w b -= lr * b_grad w -= lr * w_grad #store parameters for plotting b_history.append(b) w_history.append(w)
# y_data = b + w * x_data b = -120# initial b w = -4# initial w lr = 1# learning rate 放大10倍 iteration = 100000# 这里直接规定了迭代次数,而不是一直运行到b_grad和w_grad都为0(事实证明这样做不太可行)
# store initial values for plotting,我们想要最终把数据描绘在图上,因此存储过程数据 b_history = [b] w_history = [w]
lr_b = 0 lr_w = 0
# iterations for i inrange(iteration): # get new b_grad and w_grad b_grad,w_grad=getGrad(b,w) # get the different learning rate for b and w lr_b = lr_b + b_grad ** 2 lr_w = lr_w + w_grad ** 2 # 这一招叫做adagrad,之后会详加解释 # update b and w with new learning rate b -= lr / np.sqrt(lr_b) * b_grad w -= lr / np.sqrt(lr_w) * w_grad #store parameters for plotting b_history.append(b) w_history.append(w) # output the b w b_grad w_grad # print("b: "+str(b)+"\t\t\t w: "+str(w)+"\n"+"b_grad: "+str(b_grad)+"\t\t w_grad: "+str(w_grad)+"\n") # output the final function and its error print("the function will be y_data="+str(b)+"+"+str(w)+"*x_data") error=0.0 for i inrange(10): print("error "+str(i)+" is: "+str(np.abs(y_data[i]-(b+w*x_data[i])))+" ") error+=np.abs(y_data[i]-(b+w*x_data[i])) average_error=error/10 print("the average error is "+str(average_error))
the function will be y_data=-188.3668387495323+2.6692640713379903*x_data
error 0 is: 73.84441736270833
error 1 is: 67.4980970060185
error 2 is: 68.15177664932844
error 3 is: 28.8291759825683
error 4 is: 13.113158627146447
error 5 is: 148.63523696608252
error 6 is: 96.43143001996799
error 7 is: 94.21099446925288
error 8 is: 140.84008808876973
error 9 is: 161.7928115187101
the average error is 89.33471866905532