机器学习python实现

tech2023-07-01  124

参考

python大战机器学习

文章目录

01.线性模型02.岭回归03.Lasso回归04.ElasitcNet回归05.逻辑回归06.决策树回归07.分类决策树08.贝叶斯分类器09.K近邻法10.数据降维

01.线性模型

import matplotlib.pyplot as plt import numpy as np from sklearn import datasets,linear_model,discriminant_analysis,model_selection #加载数据集,返回元组(训练集,测试集,及其标签值) def load_data(): diabetes = datasets.load_diabetes() return model_selection.train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0) #线性回归模型 def test_LinearRegression(*data): X_train,X_test,y_train,y_test=data #加载模型 regr=linear_model.LinearRegression() #用模型训练数据 regr.fit(X_train,y_train) #输出权重和截距 print('权重: %s ,截距: %.2f '%(regr.coef_,regr.intercept_)) #残差平方和 print("残差平方和: %.2f "%np.mean((regr.predict(X_test) - y_test)**2)) #性能 print('score: %.2f'%regr.score(X_test,y_test)) #调用函数 X_train,X_test,y_train,y_test=load_data() test_LinearRegression(X_train,X_test,y_train,y_test)

02.岭回归

import matplotlib.pyplot as plt import numpy as np from sklearn import datasets,linear_model,discriminant_analysis,model_selection #加载数据集,返回元组(训练集,测试集,及其标签值) def load_data(): diabetes = datasets.load_diabetes() return model_selection.train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0) #岭回归 def test_Ridge(*data): X_train,X_test,y_train,y_test=data #加载 岭回归 regr=linear_model.Ridge() regr.fit(X_train,y_train) # 输出权重和截距 print('权重: %s ,截距: %.2f ' % (regr.coef_, regr.intercept_)) # 残差平方和 print("残差平方和: %.2f " % np.mean((regr.predict(X_test) - y_test) ** 2)) # 性能 print('score: %.2f' % regr.score(X_test, y_test)) def test_Ridge_alpha(*data): X_train,X_test,y_train,y_test=data alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000] scores=[] for i,alpha in enumerate(alphas): regr=linear_model.Ridge(alpha=alpha) regr.fit(X_train,y_train) scores.append(regr.score(X_test,y_test)) ##绘图 fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.plot(alphas,scores) ax.set_xlabel(r"$\alpha&") ax.set_ylabel(r"score") ax.set_xscale('log') ax.set_title("Ridge") plt.show() X_train,X_test,y_train,y_test=load_data() test_Ridge(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=load_data() test_Ridge_alpha(X_train,X_test,y_train,y_test)

03.Lasso回归

import matplotlib.pyplot as plt import numpy as np from sklearn import datasets,linear_model,discriminant_analysis,model_selection ########加载数据集,返回元组(训练集,测试集,及其标签值) def load_data(): diabetes = datasets.load_diabetes() return model_selection.train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0) ######## Lasso def test_Lasso(*data): X_train,X_test,y_train,y_test=data regr=linear_model.Lasso() regr.fit(X_train,y_train) # 输出权重和截距 print('权重: %s ,截距: %.2f ' % (regr.coef_, regr.intercept_)) # 残差平方和 print("残差平方和: %.2f " % np.mean((regr.predict(X_test) - y_test) ** 2)) # 性能 print('score: %.2f' % regr.score(X_test, y_test)) def test_Lasso_alpha(*data): X_train,X_test,y_train,y_test=data alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000] scores=[] for i,alpha in enumerate(alphas): regr=linear_model.Lasso(alpha=alpha) regr.fit(X_train,y_train) scores.append(regr.score(X_test,y_test)) fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.plot(alphas,scores) ax.set_xlabel(r'&\alpha$') ax.set_ylabel(r"score") #把x轴设置为对数坐标 ax.set_xscale('log') ax.set_title("Lasso") plt.show() X_train,X_test,y_train,y_test=load_data() test_Lasso(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=load_data() test_Lasso_alpha(X_train,X_test,y_train,y_test)

04.ElasitcNet回归

import matplotlib.pyplot as plt import numpy as np from sklearn import datasets,linear_model,discriminant_analysis,model_selection #加载数据集,返回元组(训练集,测试集,及其标签值) def load_data(): diabetes = datasets.load_diabetes() return model_selection.train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0) #ElasticNet回归 def test_ElasticNet(*data): X_train,X_test,y_train,y_test=data regr=linear_model.ElasticNet() regr.fit(X_train,y_train) # 输出权重和截距 print('权重: %s ,截距: %.2f ' % (regr.coef_, regr.intercept_)) # 残差平方和 print("残差平方和: %.2f " % np.mean((regr.predict(X_test) - y_test) ** 2)) # 性能 print('score: %.2f' % regr.score(X_test, y_test)) def test_ElasticNet_alpha_rho(*data): X_train,X_test,y_train,y_test=data alphas=np.logspace(-2,2) rhos=np.linspace(0.01,1) scores=[] for alpha in alphas: for rho in rhos: regr=linear_model.ElasticNet(alpha=alpha,l1_ratio=rho) regr.fit(X_train,y_train) scores.append(regr.score(X_test,y_test)) alphas,rhos=np.meshgrid(alphas,rhos) scores=np.array(scores).reshape(alphas.shape) from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm fig=plt.figure() ax=Axes3D(fig) surf=ax.plot_surface(alphas,rhos,scores,rstride=1,cstride=1,cmap=cm.jet,linewidth=0,antialiased=False) fig.colorbar(surf,shrink=0.5,aspect=5) ax.set_xlabel(r"$\alpha$") ax.set_ylabel(r"$\rho$") ax.set_zlabel("score") ax.set_title("ElasticNet") plt.show() X_train,X_test,y_train,y_test=load_data() test_ElasticNet(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=load_data() test_ElasticNet_alpha_rho(X_train,X_test,y_train,y_test)

05.逻辑回归

import matplotlib.pyplot as plt import numpy as np from sklearn import datasets,linear_model,discriminant_analysis,model_selection def load_data(): iris=datasets.load_iris() X_train=iris.data y_train=iris.target return model_selection.train_test_split(X_train,y_train,test_size=0.25,random_state=0,stratify=y_train) def test_LogisticRegression(*data): X_train,X_test,y_train,y_test=data regr=linear_model.LogisticRegression() regr.fit(X_train,y_train) # 输出权重和截距 print('权重: %s ,截距: %s ' % (regr.coef_, regr.intercept_)) # 性能 print('score: %.2f' % regr.score(X_test, y_test)) def test_LogisticRegression_multinomial(*data): X_train, X_test, y_train, y_test = data regr = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs') regr.fit(X_train,y_train) # 输出权重和截距 print('权重: %s ,截距: %s ' % (regr.coef_, regr.intercept_)) # 性能 print('score: %.2f' % regr.score(X_test, y_test)) def test_LogisticRegression_C(*data): X_train, X_test, y_train, y_test = data Cs=np.logspace(-2,4,num=100) scores=[] for C in Cs: regr=linear_model.LogisticRegression(C=C) regr.fit(X_train,y_train) scores.append(regr.score(X_test,y_test)) fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.plot(Cs,scores) ax.set_xlabel(r"C") ax.set_ylabel(r"score") ax.set_xscale('log') ax.set_title("LogisticRegression") plt.show() X_train,X_test,y_train,y_test=load_data() test_LogisticRegression(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=load_data() test_LogisticRegression_multinomial(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=load_data() test_LogisticRegression_C(X_train,X_test,y_train,y_test)

06.决策树回归

import numpy as np from sklearn.tree import DecisionTreeRegressor from sklearn import model_selection import matplotlib.pyplot as plt #随机产生的数据集 def creat_data(n): np.random.seed(0) X=5*np.random.rand(n,1) y=np.sin(X).ravel() noise_num=(int)(n/5) y[::5] += 3*(0.5 - np.random.rand(noise_num)) return model_selection.train_test_split(X,y,test_size=0.25,random_state=1) #决策树回归 def test_DecisionTreeRegression(*data): X_train,X_test,y_train,y_test=data regr=DecisionTreeRegressor() regr.fit(X_train,y_train) print("训练成绩: %f"%regr.score(X_train,y_train)) print("测试成绩: %f"%regr.score(X_test,y_test)) #绘图 fig=plt.figure() ax=fig.add_subplot(1,1,1) X=np.arange(0.0,5.0,0.01)[:,np.newaxis] Y=regr.predict(X) ax.scatter(X_train,y_train,label="train sample",c='g') ax.scatter(X_test,y_test,label="test sample",c='r') ax.plot(X,Y,label="predict_value",linewidth=2,alpha=0.5) ax.set_xlabel("data") ax.set_ylabel("target") ax.set_title("Decision Tree regression") ax.legend(framealpha=0.5) plt.show() #检验随机划分与最优划分的影响 def test_DecisionTreeRegression_splitter(*data): X_train, X_test, y_train, y_test = data splitters=['random','best'] for splitter in splitters: regr=DecisionTreeRegressor(splitter=splitter) regr.fit(X_train,y_train) print("Splitter %s"%splitter) print("Training score:%f"%(regr.score(X_train,y_train))) print("Testing score:%f"%(regr.score(X_test,y_test))) #考察决策树深度的影响 def test_DecisionTreeRegression_depth(*data,maxdepth): X_train, X_test, y_train, y_test = data depths=np.arange(1,maxdepth) training_scores=[] testing_scores=[] for depth in depths: regr=DecisionTreeRegressor(max_depth=depth) regr.fit(X_train,y_train) training_scores.append(regr.score(X_train,y_train)) testing_scores.append(regr.score(X_test,y_test)) ####绘图 fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.plot(depths,training_scores,label="training score") ax.plot(depths,testing_scores,label="testing score") ax.set_xlabel("maxdepth") ax.set_ylabel("score") ax.set_title("Decision Tree Regression") ax.legend(framealpha=0.5) plt.show() X_train,X_test,y_train,y_test=creat_data(100) test_DecisionTreeRegression(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=creat_data(100) test_DecisionTreeRegression_splitter(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=creat_data(100) test_DecisionTreeRegression_depth(X_train,X_test,y_train,y_test,maxdepth=20)

07.分类决策树

import numpy as np from sklearn.tree import DecisionTreeRegressor from sklearn import model_selection import matplotlib.pyplot as plt #######随机产生的数据集 def creat_data(n): np.random.seed(0) X=5*np.random.rand(n,1) y=np.sin(X).ravel() noise_num=(int)(n/5) y[::5] += 3*(0.5 - np.random.rand(noise_num)) return model_selection.train_test_split(X,y,test_size=0.25,random_state=1) ########决策树回归 def test_DecisionTreeRegression(*data): X_train,X_test,y_train,y_test=data regr=DecisionTreeRegressor() regr.fit(X_train,y_train) print("训练成绩: %f"%regr.score(X_train,y_train)) print("测试成绩: %f"%regr.score(X_test,y_test)) #绘图 fig=plt.figure() ax=fig.add_subplot(1,1,1) X=np.arange(0.0,5.0,0.01)[:,np.newaxis] Y=regr.predict(X) ax.scatter(X_train,y_train,label="train sample",c='g') ax.scatter(X_test,y_test,label="test sample",c='r') ax.plot(X,Y,label="predict_value",linewidth=2,alpha=0.5) ax.set_xlabel("data") ax.set_ylabel("target") ax.set_title("Decision Tree regression") ax.legend(framealpha=0.5) plt.show() ######检验随机划分与最优划分的影响 def test_DecisionTreeRegression_splitter(*data): X_train, X_test, y_train, y_test = data splitters=['random','best'] for splitter in splitters: regr=DecisionTreeRegressor(splitter=splitter) regr.fit(X_train,y_train) print("Splitter %s"%splitter) print("Training score:%f"%(regr.score(X_train,y_train))) print("Testing score:%f"%(regr.score(X_test,y_test))) #########考察决策树深度的影响 def test_DecisionTreeRegression_depth(*data,maxdepth): X_train, X_test, y_train, y_test = data depths=np.arange(1,maxdepth) training_scores=[] testing_scores=[] for depth in depths: regr=DecisionTreeRegressor(max_depth=depth) regr.fit(X_train,y_train) training_scores.append(regr.score(X_train,y_train)) testing_scores.append(regr.score(X_test,y_test)) ####绘图 fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.plot(depths,training_scores,label="training score") ax.plot(depths,testing_scores,label="testing score") ax.set_xlabel("maxdepth") ax.set_ylabel("score") ax.set_title("Decision Tree Regression") ax.legend(framealpha=0.5) plt.show() X_train,X_test,y_train,y_test=creat_data(100) test_DecisionTreeRegression_splitter(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=creat_data(100) test_DecisionTreeRegression_depth(X_train,X_test,y_train,y_test,maxdepth=20)

08.贝叶斯分类器

from sklearn import datasets,model_selection,naive_bayes import numpy as np import matplotlib.pyplot as plt ########将图片转为向量 def show_digits(): digits=datasets.load_digits() fig=plt.figure() print("vector from image 0:",digits.data[0]) for i in range(25): ax=fig.add_subplot(5,5,i+1) ax.imshow(digits.image[i],cmap=plt.cm.gray_r,interpolation='nearest') plt.show() ##########加载数据集 def load_data(): digits=datasets.load_digits() return model_selection.train_test_split(digits.data,digits.target,test_size=0.25,random_state=0) ###########高斯贝叶斯分类器 def GaussianNB(*data): X_train,X_test,y_train,y_test=data cls=naive_bayes.GaussianNB() cls.fit(X_train,y_train) print('Training score: %f'%(cls.score(X_train,y_train))) print('Testing score: %f'%(cls.score(X_test,y_test))) #########多项式贝叶斯分类器 def test_MultinomialNB(*data): X_train, X_test, y_train, y_test=data cls=naive_bayes.MultinomialNB() cls.fit(X_train,y_train) print('Training score: %.2f'%(cls.score(X_train,y_train))) print('Testing score: %.2f'%(cls.score(X_test,y_test))) ########检验不同的alpha对多项式贝叶斯分类器的预测性能的影响 def test_multinomialNB_alpha(*data): X_train, X_test, y_train, y_test = data alphas=np.logspace(-2,5,num=200) train_score=[] test_score=[] for alpha in alphas: cls=naive_bayes.MultinomialNB(alpha=alpha) cls.fit(X_train,y_train) train_score.append(cls.score(X_train,y_train)) test_score.append(cls.score(X_test,y_test)) ####绘图 fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.plot(alphas,train_score,label="Training score") ax.plot(alphas,test_score,label="testing score") ax.set_xlabel(r"$\alpha$") ax.set_ylabel(r"$score") ax.set_ylim(0,1.0) ax.set_title("multinomialNB") ax.set_xscale("log") plt.show() ###########伯努利贝叶斯分类器 def test_BernoulliNB(*data): X_train, X_test, y_train, y_test=data cls=naive_bayes.BernoulliNB() cls.fit(X_train,y_train) print('Training score: %.2f'%(cls.score(X_train,y_train))) print('Testing score:%.2f'%(cls.score(X_test,y_test))) ##########检验不同的alpha对伯努利贝叶斯分类器的预测性能的影响 def test_BernoulliNB_alpha(*data): X_train, X_test, y_train, y_test=data alphas=np.logspace(-2,5,num=200) train_score=[] test_score=[] for alpha in alphas: cls=naive_bayes.BernoulliNB(alpha=alpha) cls.fit(X_train,y_train) train_score.append(cls.score(X_train,y_train)) test_score.append(cls.score(X_test,y_test)) ###绘图 fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.plot(alphas,train_score,label="Training score") ax.plot(alphas,test_score,label="Testing score") ax.set_xlabel(r"&\alpha$") ax.set_ylabel("score") ax.set_title("BernoulliNB") ax.set_ylim(0,1.0) ax.set_xscale("log") ax.legend(loc="best") plt.show() ##########binarize的参数对伯努利贝叶斯分类器的预测性能的影响 def test_BernoulliNB_binarize(*data): X_train, X_test, y_train, y_test=data min_x=min(np.min(X_train.ravel()),np.min(X_test.ravel()))-0.1 max_x=max(np.max(X_train.ravel()),np.max(X_test.ravel()))+0.1 binarizes=np.linspace(min_x,max_x,endpoint=True,num=100) train_score=[] test_score=[] for binarize in binarizes: cls=naive_bayes.BernoulliNB(binarize=binarize) cls.fit(X_train,y_train) train_score.append(cls.score(X_train,y_train)) test_score.append(cls.score(X_test,y_test)) ##绘图 fig=plt.figure() ax=fig.add_subplot(1,1,1) ax.plot(binarizes,train_score,label="Training score") ax.plot(binarizes,test_score,label="Testing score") ax.set_xlabel("binarize") ax.set_ylabel("score") ax.set_ylim(0,1.0) ax.set_xlim(min_x-1,max_x+1) ax.set_title("BernoulliNB") ax.legend(loc="best") plt.show() X_train,X_test,y_train,y_test=load_data() GaussianNB(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=load_data() test_MultinomialNB(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=load_data() test_multinomialNB_alpha(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=load_data() test_BernoulliNB(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=load_data() test_BernoulliNB_alpha(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=load_data() test_BernoulliNB_binarize(X_train,X_test,y_train,y_test)

09.K近邻法

import numpy as np import matplotlib.pyplot as plt from sklearn import neighbors,datasets,model_selection ######加载数据 def load_classification_data(): digits=datasets.load_digits() X_train=digits.data y_train=digits.target return model_selection.train_test_split(X_train,y_train,test_size=0.25,random_state=0,stratify=y_train) #######生成数据 def creat_regression_data(n): X=5*np.random.rand(n,1) y=np.sin(X).ravel() y[::5]+=1*(0.5-np.random.rand(int(n/5))) return model_selection.train_test_split(X,y,test_size=0.25,random_state=0) ########### KNN分类KNeighborsClassifier def test_KNeighborsClassifier(*data): X_train,X_test,y_train,y_test=data clf=neighbors.KNeighborsClassifier() clf.fit(X_train,y_train) print("Training score: %f"%(clf.score(X_train,y_train))) print("Testing score: %f"%(clf.score(X_test,y_test))) ########考察k值以及投票策略对于预测性能的影响 def test_KNeighborsClassifier_k_w(*data): X_train, X_test, y_train, y_test = data Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int') weights=['uniform','distance'] fig=plt.figure() ax=fig.add_subplot(1,1,1) for weight in weights: training_score=[] testing_score=[] for K in Ks: clf=neighbors.KNeighborsClassifier(weights=weight,n_neighbors=K) clf.fit(X_train,y_train) training_score.append(clf.score(X_train,y_train)) testing_score.append(clf.score(X_test,y_test)) ax.plot(Ks,training_score,label="training score: 权重%s"%weight) ax.plot(Ks, testing_score, label="testing score: 权重%s" % weight) ax.legend(loc='best') ax.set_xlabel("K") ax.set_ylabel("score") ax.set_ylim(0,1.05) ax.set_title("KNeighborsClassifier") plt.show() ###########考察p(距离函数)值对于预测性能的影响 def test_KNeighborsClassifer_k_p(*data): X_train, X_test, y_train, y_test = data Ks=np.linspace(1,y_train.size,endpoint=False,dtype='int') Ps=[1,2,10] fig=plt.figure() ax=fig.add_subplot(1,1,1) for P in Ps: training_score=[] testing_score=[] for K in Ks: clf=neighbors.KNeighborsClassifier(p=P,n_neighbors=K) clf.fit(X_train,y_train) training_score.append(clf.score(X_train,y_train)) testing_score.append(clf.score(X_test,y_test)) ax.plot(Ks,testing_score,label="testing score:p%d"%P) ax.plot(Ks, training_score, label="training score:p%d" % P) ax.legend(loc='best') ax.set_xlabel("K") ax.set_ylabel("score") ax.set_ylim(0,1.05) ax.set_title("KNeighborsClassifier") plt.show() ########### kNN回归KNeighborsRegression def test_KNeighborsRegressor(*data): X_train,X_test,y_train,y_test=data regr=neighbors.KNeighborsRegressor() regr.fit(X_train,y_train) print("Training score: %f"%regr.score(X_train,y_train)) print("Testing score: %f"%regr.score(X_test,y_test)) ###########考察k值以及投票策略对于预测性能的影响 def test_KNeighborsRegressor_k_w(*data): X_train, X_test, y_train, y_test=data Ks=np.linspace(1,y_train.size,num=100,endpoint=False,dtype='int') weights=['uniform','distance'] fig=plt.figure() ax=fig.add_subplot(1,1,1) for weight in weights: training_scores=[] testing_scores=[] for K in Ks: regr=neighbors.KNeighborsRegressor(weights=weight,n_neighbors=K) regr.fit(X_train,y_train) testing_scores.append(regr.score(X_test,y_test)) training_scores.append(regr.score(X_train,y_train)) ax.plot(Ks,testing_scores,label="testing score:weight= %s"%weight) ax.plot(Ks,training_scores, label="training score:weight= %s" % weight) ax.legend(loc='best') ax.set_xlabel("K") ax.set_ylabel("score") ax.set_ylim(0,1.05) ax.set_title("KNeighborsRegressor") plt.show() ###########考察p(距离函数)值对于预测性能的影响 def test_KNeighborsRegressor_k_p(*data): X_train, X_test, y_train, y_test=data Ks = np.linspace(1, y_train.size, num=100, endpoint=False, dtype='int') Ps=[1,2,10] fig=plt.figure() ax=fig.add_subplot(1,1,1) for P in Ps: training_scores=[] testing_scores=[] for K in Ks: regr=neighbors.KNeighborsRegressor(p=P,n_neighbors=K) regr.fit(X_train,y_train) training_scores.append(regr.score(X_train,y_train)) testing_scores.append(regr.score(X_test,y_test)) ax.plot(Ks,testing_scores,label="training score: p= %d"%P) ax.plot(Ks,testing_scores,label="testing score: p= %d"%P) ax.legend(loc='best') ax.set_xlabel("K") ax.set_ylabel("score") ax.set_ylim(0,1.05) ax.set_title("KNeighborsRegressor") plt.show() #X_train,X_test,y_train,y_test=load_classification_data() #test_KNeighborsClassifier(X_train,X_test,y_train,y_test) #X_train,X_test,y_train,y_test=load_classification_data() #test_KNeighborsClassifier_k_w(X_train,X_test,y_train,y_test) #X_train,X_test,y_train,y_test=load_classification_data() #test_KNeighborsClassifer_k_p(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=creat_regression_data(1000) test_KNeighborsRegressor(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=creat_regression_data(1000) test_KNeighborsRegressor_k_w(X_train,X_test,y_train,y_test) X_train,X_test,y_train,y_test=creat_regression_data(1000) test_KNeighborsRegressor_k_p(X_train,X_test,y_train,y_test)

10.数据降维

import numpy as np import matplotlib.pyplot as plt from sklearn import datasets,decomposition,manifold ##########加载数据 def load_data(): iris=datasets.load_iris() return iris.data,iris.target

PCA

def test_PCA(*data): X,y=data pca=decomposition.PCA(n_components=None) pca.fit(X) print('explained variance ratio : %s'% str(pca.explained_variance_ratio_)) def plot_PCA(*data): X,y=data pca=decomposition.PCA(n_components=2) pca.fit(X) X_r=pca.transform(X) fig=plt.figure() ax=fig.add_subplot(1,1,1) colors=((1,0,0),(0,1,0),(0,0,1),(0.5,0,0.5),(0,0.5,0.5),(0.5,0,0.5), (0.4,0,0.6),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2)) for label,color in zip(np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,color=color) ax.set_xlabel("x[0]") ax.set_ylabel("Y[0]") ax.legend(loc="best") ax.set_title("PCA") plt.show()

KPCA

########## KernelPCA,测试每种核函数,并输出对应的核化矩阵的特征值 def test_KPCA(*data): X,y=data kernels=['linear','poly','rbf','sigmoid'] for kernel in kernels: kpca=decomposition.KernelPCA(n_components=None,kernel=kernel) kpca.fit(X) print('kernel= %s ----> lambdas: %s'%(kernel,kpca.lambdas_)) ###########降维后样本的分布图 def plot_KPCA(*data): X,y=data kernels=['linear','poly','rbf','sigmoid'] fig=plt.figure() colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2)) ######只有三类 for i,kernel in enumerate(kernels): kpca=decomposition.KernelPCA(n_components=2,kernel=kernel) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(2,2,i+1) for label,color in zip(np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,color=color) ax.set_xlabel("X[0]") ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title("kernel= %s"%kernel) plt.suptitle("KPCA") plt.show()

考察多项式核pPoly的参数的影响

##########考察多项式核的参数的影响 def plot_KPCA_poly(*data): X,y=data fig=plt.figure() colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2)) Params=[(3,1,1),(3,10,1),(3,1,10),(3,10,10),(10,1,1),(10,1,1),(10,1,10),(10,10,10)] #####参数,(degress,gamma,coef0) for i,(p,gamma,r) in enumerate(Params): kpca=decomposition.KernelPCA(n_components=2,kernel='poly',gamma=gamma,degree=p,coef0=r) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(2,4,i+1) for label,color in zip(np.unique(y),colors): position=y==label ax.scatter(X_r[position,0],X_r[position,1],label="target= %d"%label,color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$ (%s (x \cdot z+1)+%s)^{%s}$"%(gamma,r,p)) plt.suptitle("KPCA-Poly") plt.show()

考察高斯核的参数的影响

##############考察高斯核的参数的影响 def plot_KPCA_rbf(*data): X, y = data fig = plt.figure() colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2)) Gamma=[0.5,1,4,10] for i,gamma in enumerate(Gamma): kpca=decomposition.KernelPCA(n_components=2,kernel='rbf',gamma=gamma) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(2,2,i+1) for label,color in zip(np.unique(y),colors): position=y==label ax.scatter(X_r[position, 0], X_r[position, 1], label="target= %d" % label, color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$\exp(-%s||x-z||^2)$" % gamma) plt.suptitle("KPCA-rbf") plt.show()

考察sigmoid核的参数影响

def plot_KPCA_sigmoid(*data): X, y = data fig = plt.figure() colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2)) Params=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),(0.2,0.2)] for i,(gamma,r) in enumerate(Params): kpca=decomposition.KernelPCA(n_components=2,kernel='sigmoid',gamma=gamma,coef0=r) kpca.fit(X) X_r=kpca.transform(X) ax=fig.add_subplot(3,2,i+1) for label,color in zip(np.unique(y),colors): position=y==label ax.scatter(X_r[position, 0], X_r[position, 1], label="target= %d" % label, color=color) ax.set_xlabel("X[0]") ax.set_xticks([]) ax.set_yticks([]) ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title(r"$\tanh(%s(x\cdot z )+%s)$" % (gamma,r)) plt.suptitle("KPCA-sigmoid") plt.show()

调用函数

#X,y=load_data() #test_PCA(X,y) #X,y=load_data() #plot_PCA(X,y) #X,y=load_data() #test_KPCA(X,y) #X,y=load_data() #plot_KPCA(X,y) #X,y=load_data() #plot_KPCA_poly(X,y) #X,y=load_data() #plot_KPCA_rbf(X,y) X,y=load_data() plot_KPCA_sigmoid(X,y)

MDS

def test_MDS(*data): X,y=data for n in [4,3,2,1]: mds=manifold.MDS(n_components=n) mds.fit(X) print('stress(n_components=%d) : %s'%(n,str(mds.stress_))) ########绘出降维后样本分布图 def plot_MDS(*data): X,y=data mds=manifold.MDS(n_components=2) X_r=mds.fit_transform(X) fig=plt.figure() ax=fig.add_subplot(1,1,1) colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2)) for label,color in zip(np.unique(y),colors): position = y == label ax.scatter(X_r[position, 0], X_r[position, 1], label="target= %d" % label, color=color) ax.set_xlabel("X[0]") ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title("MDS") plt.show()

Isomap

######## Isomap def test_Isomap(*data): X,y=data for n in [4,3,2,1]: isomap=manifold.Isomap(n_components=n) isomap.fit(X) print('reconstruction_error(n_components= %d) : %s'%(n,isomap.reconstruction_error())) ########绘出降维后样本分布图 def plot_Isomap_k(*data): X,y=data Ks=[1,5,25,y.size-1] fig=plt.figure() for i,k in enumerate(Ks): isomap=manifold.Isomap(n_components=2,n_neighbors=k) X_r=isomap.fit_transform(X) ax=fig.add_subplot(2,2,i+1) colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2)) for label, color in zip(np.unique(y), colors): position = y == label ax.scatter(X_r[position, 0], X_r[position, 1], label="target= %d" % label, color=color) ax.set_xlabel("X[0]") ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title("k= %d"%k) plt.suptitle("Isomap") plt.show() ########将数据的特征直接压缩到一维 def plot_Isomap_k_d1(*data): X, y = data Ks = [1, 5, 25, y.size - 1] fig = plt.figure() for i, k in enumerate(Ks): isomap = manifold.Isomap(n_components=1, n_neighbors=k) X_r = isomap.fit_transform(X) ax = fig.add_subplot(2, 2, i + 1) colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2)) for label, color in zip(np.unique(y), colors): position = y == label ax.scatter(X_r[position, 0],np.zeros_like(X_r[position]), label="target= %d" % label, color=color) ax.set_xlabel("X[0]") ax.set_ylabel("Y") ax.legend(loc="best") ax.set_title("k= %d" % k) plt.suptitle("Isomap") plt.show()

LLE

###########LLE def test_LocallylinearEmbedding(*data): X,y=data for n in [4,3,2,1]: lle=manifold.LocallyLinearEmbedding(n_components=n) lle.fit(X) print('reconstruction_error(n_components= %d): %s'%(n,lle.reconstruction_error_)) ########绘出降维后样本分布图 def plot_LocallyLinearEmbedding_k(*data): X, y = data Ks = [1, 5, 25, y.size - 1] fig = plt.figure() for i, k in enumerate(Ks): lle = manifold.LocallyLinearEmbedding(n_components=2, n_neighbors=k) X_r = lle.fit_transform(X) ax = fig.add_subplot(2, 2, i + 1) colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2)) for label, color in zip(np.unique(y), colors): position = y == label ax.scatter(X_r[position, 0], X_r[position, 1], label="target= %d" % label, color=color) ax.set_xlabel("X[0]") ax.set_ylabel("X[1]") ax.legend(loc="best") ax.set_title("k= %d" % k) plt.suptitle("LocallylinearEmbedding") plt.show() ########将数据的特征直接压缩到一维 def plot_LocallyLinearEmbedding_k_d1(*data): X, y = data Ks = [1, 5, 25, y.size - 1] fig = plt.figure() for i, k in enumerate(Ks): lle = manifold.LocallyLinearEmbedding(n_components=1, n_neighbors=k) X_r = lle.fit_transform(X) ax = fig.add_subplot(2, 2, i + 1) colors = ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5), (0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2)) for label, color in zip(np.unique(y), colors): position = y == label ax.scatter(X_r[position, 0], np.zeros_like(X_r[position]), label="target= %d" % label, color=color) ax.set_xlabel("X[0]") ax.set_ylabel("Y") ax.legend(loc="best") ax.set_title("k= %d" % k) plt.suptitle("LocallyLinearEmbedding") plt.show()
最新回复(0)