欠拟合和过拟合现象：多项式方法解决过拟合

tech2022-08-31 207

numpy.reshape(a,(-1,1))的含义

https://blog.csdn.net/m0_38052384/article/details/102692708

用一个例子与感受一下欠拟合：注意升维

#演示一下欠拟合场景 import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression #np.linspace主要用来创建等差数列。起始位置，终止位置，产生多少个 x = np.linspace(1,10,50) y = x* np.sin(x) #显示中文 plt.rcParams["font.family"] = "SimHei" plt.rcParams ["font.size"] =12 #X = x[:,np.newaxis] X = x.reshape(-1,1) #线性回归初始化 lr = LinearRegression() lr.fit(X,y) #绘制散点图 plt.scatter(x,y,c='g',label = '样本数据') plt.plot(X,lr.predict(X),c = "r",label = '拟合线') #绘制图例 plt.legend() #lr.score(X，y) 就是R^2 值 plt.title(f"$R^2$ :{lr.score(X,y):.3f}")

复习一个多项式特征：. 多项式扩展之后依旧还可以应用于线性模型

#复习一下多项式特征 from sklearn.preprocessing import PolynomialFeatures x = np.array( [ [1,2],[3,4]] ) print(x) print("================================") pr = PolynomialFeatures(degree =2) #include_bias=False 正常如果用在线性回归里面就不需要偏置项了 #偏置项就是0次项，[0,0] 常数就不要带入到模型里面，徒增计算量没意义 res = pr.fit_transform(x) print(res) #指数矩阵： print (pr.powers_) print("================================") print("输入的特征数量",pr.n_input_features_) print("输出的特征数量",pr.n_output_features_) #其实用for 循环就可以生成多项式的特征 # [1,2 ] 和[3,4] 轮番去乘上指数矩阵 for x1,x2 in x : for e1,e2 in pr.powers_: print(x1**e1 *x2**e2,end='\t') print("")

#运用刚刚所学的用到欠拟合现象中考察不同多项式阶数下的线性回归效果的好坏 #演示一下欠拟合场景 import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures #np.linspace主要用来创建等差数列。起始位置，终止位置，产生多少个 x = np.linspace(0,10,50) y = x* np.sin(x) #显示中文 plt.rcParams["font.family"] = "SimHei" plt.rcParams ["font.size"] =12 #考察1-66阶 fig,ax =plt.subplots(2,3) X = x.reshape(-1,1) fig.set_size_inches(18,10) ax = ax.ravel() for i in range(1,7) : pr = PolynomialFeatures(degree =i,include_bias=False ) X_new =pr.fit_transform(X) #线性回归初始化 lr = LinearRegression() lr.fit(X_new,y) #绘制散点图 ax[i-1].scatter(x,y,c='g',label = '样本数据') ax[i-1].plot(x,lr.predict(X_new),c = "r",label = '拟合线') #绘制图例 ax[i-1].legend() #lr.score(X，y) 就是R^2 值 ax[i-1].set_title(f"{i} 阶$R^2$ :{lr.score(X_new,y):.3f}")

流水线：

import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures #流水线 from sklearn.pipeline import Pipeline #np.linspace主要用来创建等差数列。起始位置，终止位置，产生多少个 x = np.linspace(1,10,50) y = x* np.sin(x) #显示中文 plt.rcParams["font.family"] = "SimHei" plt.rcParams ["font.size"] =12 #X = x[:,np.newaxis] X = x.reshape(-1,1) #流水线 #定义一个列表，列表里面的每个元素是元组类型。 #格式是 [ (步骤名1，评估器1) ，（步骤名2，评估器2），（步骤名n，评估器n） ] #参数可以直接填写在里面，也可以在下面单独写 steps = [("ploy",PolynomialFeatures(include_bias= False)),("lr",LinearRegression()) ] ##流水线 #from sklearn.pipeline import Pipeline pr = Pipeline(steps) pr.set_params(ploy__degree=8) #在这里补全评估器的参数 #如果是fit 或者fit_transform 方法，首先对前n-1 个评估器进行fit.trans_form方法 pr.fit(X,y) #绘制散点图 plt.scatter(x,y,c='g',label = '样本数据') #拟合肯定要是X #如果是其他方法，也会到前 n-1依次进行fit.trans_form方法，送入最后一个评估器调用方法 plt.plot(X,pr.predict(X),c = "r",label = '拟合线') #绘制图例 plt.legend() #lr.score(X，y) 就是R^2 值 plt.title(f"$R^2$ :{pr.score(X,y):.3f}")

演示一下过拟合：

#运用刚刚所学的用到欠拟合现象中考察不同多项式阶数下的线性回归效果的好 #演示一下过拟合场景 import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures #np.linspace主要用来创建等差数列。起始位置，终止位置，产生多少个 x = np.linspace(0,10,50)+np.random.randn(50)*0.1 y = x* np.sin(x) #显示中文 plt.rcParams["font.family"] = "SimHei" plt.rcParams ["font.size"] =12 X = x.reshape(-1,1) plt.figure(figsize=(15,15)) degrees = [3,8,15,35] for index,values in enumerate(degrees) : pr = PolynomialFeatures(degree =values,include_bias=False ) X_new =pr.fit_transform(X) #线性回归初始化 lr = LinearRegression() lr.fit(X_new,y) #绘制散点图 plt.subplot(2,2,index+1) plt.scatter(x,y,c='g',label = '样本数据') plt.plot(X,lr.predict(X_new),c = "r",label = '拟合线') #绘制图例 plt.legend() #lr.score(X，y) 就是R^2 值 plt.title(f"{values} 阶$R^2$ :{lr.score(X_new,y):.3f}")

最新回复(0)