参考
python大战机器学习
文章目录
01.线性模型02.岭回归03.Lasso回归04.ElasitcNet回归05.逻辑回归06.决策树回归07.分类决策树08.贝叶斯分类器09.K近邻法10.数据降维
01.线性模型
import matplotlib
.pyplot
as plt
import numpy
as np
from sklearn
import datasets
,linear_model
,discriminant_analysis
,model_selection
def load_data():
diabetes
= datasets
.load_diabetes
()
return model_selection
.train_test_split
(diabetes
.data
,diabetes
.target
,test_size
=0.25,random_state
=0)
def test_LinearRegression(*data
):
X_train
,X_test
,y_train
,y_test
=data
regr
=linear_model
.LinearRegression
()
regr
.fit
(X_train
,y_train
)
print('权重: %s ,截距: %.2f '%(regr
.coef_
,regr
.intercept_
))
print("残差平方和: %.2f "%np
.mean
((regr
.predict
(X_test
) - y_test
)**2))
print('score: %.2f'%regr
.score
(X_test
,y_test
))
X_train
,X_test
,y_train
,y_test
=load_data
()
test_LinearRegression
(X_train
,X_test
,y_train
,y_test
)
02.岭回归
import matplotlib
.pyplot
as plt
import numpy
as np
from sklearn
import datasets
,linear_model
,discriminant_analysis
,model_selection
def load_data():
diabetes
= datasets
.load_diabetes
()
return model_selection
.train_test_split
(diabetes
.data
,diabetes
.target
,test_size
=0.25,random_state
=0)
def test_Ridge(*data
):
X_train
,X_test
,y_train
,y_test
=data
regr
=linear_model
.Ridge
()
regr
.fit
(X_train
,y_train
)
print('权重: %s ,截距: %.2f ' % (regr
.coef_
, regr
.intercept_
))
print("残差平方和: %.2f " % np
.mean
((regr
.predict
(X_test
) - y_test
) ** 2))
print('score: %.2f' % regr
.score
(X_test
, y_test
))
def test_Ridge_alpha(*data
):
X_train
,X_test
,y_train
,y_test
=data
alphas
=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000]
scores
=[]
for i
,alpha
in enumerate(alphas
):
regr
=linear_model
.Ridge
(alpha
=alpha
)
regr
.fit
(X_train
,y_train
)
scores
.append
(regr
.score
(X_test
,y_test
))
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
ax
.plot
(alphas
,scores
)
ax
.set_xlabel
(r
"$\alpha&")
ax
.set_ylabel
(r
"score")
ax
.set_xscale
('log')
ax
.set_title
("Ridge")
plt
.show
()
X_train
,X_test
,y_train
,y_test
=load_data
()
test_Ridge
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=load_data
()
test_Ridge_alpha
(X_train
,X_test
,y_train
,y_test
)
03.Lasso回归
import matplotlib
.pyplot
as plt
import numpy
as np
from sklearn
import datasets
,linear_model
,discriminant_analysis
,model_selection
def load_data():
diabetes
= datasets
.load_diabetes
()
return model_selection
.train_test_split
(diabetes
.data
,diabetes
.target
,test_size
=0.25,random_state
=0)
def test_Lasso(*data
):
X_train
,X_test
,y_train
,y_test
=data
regr
=linear_model
.Lasso
()
regr
.fit
(X_train
,y_train
)
print('权重: %s ,截距: %.2f ' % (regr
.coef_
, regr
.intercept_
))
print("残差平方和: %.2f " % np
.mean
((regr
.predict
(X_test
) - y_test
) ** 2))
print('score: %.2f' % regr
.score
(X_test
, y_test
))
def test_Lasso_alpha(*data
):
X_train
,X_test
,y_train
,y_test
=data
alphas
=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000]
scores
=[]
for i
,alpha
in enumerate(alphas
):
regr
=linear_model
.Lasso
(alpha
=alpha
)
regr
.fit
(X_train
,y_train
)
scores
.append
(regr
.score
(X_test
,y_test
))
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
ax
.plot
(alphas
,scores
)
ax
.set_xlabel
(r
'&\alpha$')
ax
.set_ylabel
(r
"score")
ax
.set_xscale
('log')
ax
.set_title
("Lasso")
plt
.show
()
X_train
,X_test
,y_train
,y_test
=load_data
()
test_Lasso
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=load_data
()
test_Lasso_alpha
(X_train
,X_test
,y_train
,y_test
)
04.ElasitcNet回归
import matplotlib
.pyplot
as plt
import numpy
as np
from sklearn
import datasets
,linear_model
,discriminant_analysis
,model_selection
def load_data():
diabetes
= datasets
.load_diabetes
()
return model_selection
.train_test_split
(diabetes
.data
,diabetes
.target
,test_size
=0.25,random_state
=0)
def test_ElasticNet(*data
):
X_train
,X_test
,y_train
,y_test
=data
regr
=linear_model
.ElasticNet
()
regr
.fit
(X_train
,y_train
)
print('权重: %s ,截距: %.2f ' % (regr
.coef_
, regr
.intercept_
))
print("残差平方和: %.2f " % np
.mean
((regr
.predict
(X_test
) - y_test
) ** 2))
print('score: %.2f' % regr
.score
(X_test
, y_test
))
def test_ElasticNet_alpha_rho(*data
):
X_train
,X_test
,y_train
,y_test
=data
alphas
=np
.logspace
(-2,2)
rhos
=np
.linspace
(0.01,1)
scores
=[]
for alpha
in alphas
:
for rho
in rhos
:
regr
=linear_model
.ElasticNet
(alpha
=alpha
,l1_ratio
=rho
)
regr
.fit
(X_train
,y_train
)
scores
.append
(regr
.score
(X_test
,y_test
))
alphas
,rhos
=np
.meshgrid
(alphas
,rhos
)
scores
=np
.array
(scores
).reshape
(alphas
.shape
)
from mpl_toolkits
.mplot3d
import Axes3D
from matplotlib
import cm
fig
=plt
.figure
()
ax
=Axes3D
(fig
)
surf
=ax
.plot_surface
(alphas
,rhos
,scores
,rstride
=1,cstride
=1,cmap
=cm
.jet
,linewidth
=0,antialiased
=False)
fig
.colorbar
(surf
,shrink
=0.5,aspect
=5)
ax
.set_xlabel
(r
"$\alpha$")
ax
.set_ylabel
(r
"$\rho$")
ax
.set_zlabel
("score")
ax
.set_title
("ElasticNet")
plt
.show
()
X_train
,X_test
,y_train
,y_test
=load_data
()
test_ElasticNet
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=load_data
()
test_ElasticNet_alpha_rho
(X_train
,X_test
,y_train
,y_test
)
05.逻辑回归
import matplotlib
.pyplot
as plt
import numpy
as np
from sklearn
import datasets
,linear_model
,discriminant_analysis
,model_selection
def load_data():
iris
=datasets
.load_iris
()
X_train
=iris
.data
y_train
=iris
.target
return model_selection
.train_test_split
(X_train
,y_train
,test_size
=0.25,random_state
=0,stratify
=y_train
)
def test_LogisticRegression(*data
):
X_train
,X_test
,y_train
,y_test
=data
regr
=linear_model
.LogisticRegression
()
regr
.fit
(X_train
,y_train
)
print('权重: %s ,截距: %s ' % (regr
.coef_
, regr
.intercept_
))
print('score: %.2f' % regr
.score
(X_test
, y_test
))
def test_LogisticRegression_multinomial(*data
):
X_train
, X_test
, y_train
, y_test
= data
regr
= linear_model
.LogisticRegression
(multi_class
='multinomial',solver
='lbfgs')
regr
.fit
(X_train
,y_train
)
print('权重: %s ,截距: %s ' % (regr
.coef_
, regr
.intercept_
))
print('score: %.2f' % regr
.score
(X_test
, y_test
))
def test_LogisticRegression_C(*data
):
X_train
, X_test
, y_train
, y_test
= data
Cs
=np
.logspace
(-2,4,num
=100)
scores
=[]
for C
in Cs
:
regr
=linear_model
.LogisticRegression
(C
=C
)
regr
.fit
(X_train
,y_train
)
scores
.append
(regr
.score
(X_test
,y_test
))
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
ax
.plot
(Cs
,scores
)
ax
.set_xlabel
(r
"C")
ax
.set_ylabel
(r
"score")
ax
.set_xscale
('log')
ax
.set_title
("LogisticRegression")
plt
.show
()
X_train
,X_test
,y_train
,y_test
=load_data
()
test_LogisticRegression
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=load_data
()
test_LogisticRegression_multinomial
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=load_data
()
test_LogisticRegression_C
(X_train
,X_test
,y_train
,y_test
)
06.决策树回归
import numpy
as np
from sklearn
.tree
import DecisionTreeRegressor
from sklearn
import model_selection
import matplotlib
.pyplot
as plt
def creat_data(n
):
np
.random
.seed
(0)
X
=5*np
.random
.rand
(n
,1)
y
=np
.sin
(X
).ravel
()
noise_num
=(int)(n
/5)
y
[::5] += 3*(0.5 - np
.random
.rand
(noise_num
))
return model_selection
.train_test_split
(X
,y
,test_size
=0.25,random_state
=1)
def test_DecisionTreeRegression(*data
):
X_train
,X_test
,y_train
,y_test
=data
regr
=DecisionTreeRegressor
()
regr
.fit
(X_train
,y_train
)
print("训练成绩: %f"%regr
.score
(X_train
,y_train
))
print("测试成绩: %f"%regr
.score
(X_test
,y_test
))
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
X
=np
.arange
(0.0,5.0,0.01)[:,np
.newaxis
]
Y
=regr
.predict
(X
)
ax
.scatter
(X_train
,y_train
,label
="train sample",c
='g')
ax
.scatter
(X_test
,y_test
,label
="test sample",c
='r')
ax
.plot
(X
,Y
,label
="predict_value",linewidth
=2,alpha
=0.5)
ax
.set_xlabel
("data")
ax
.set_ylabel
("target")
ax
.set_title
("Decision Tree regression")
ax
.legend
(framealpha
=0.5)
plt
.show
()
def test_DecisionTreeRegression_splitter(*data
):
X_train
, X_test
, y_train
, y_test
= data
splitters
=['random','best']
for splitter
in splitters
:
regr
=DecisionTreeRegressor
(splitter
=splitter
)
regr
.fit
(X_train
,y_train
)
print("Splitter %s"%splitter
)
print("Training score:%f"%(regr
.score
(X_train
,y_train
)))
print("Testing score:%f"%(regr
.score
(X_test
,y_test
)))
def test_DecisionTreeRegression_depth(*data
,maxdepth
):
X_train
, X_test
, y_train
, y_test
= data
depths
=np
.arange
(1,maxdepth
)
training_scores
=[]
testing_scores
=[]
for depth
in depths
:
regr
=DecisionTreeRegressor
(max_depth
=depth
)
regr
.fit
(X_train
,y_train
)
training_scores
.append
(regr
.score
(X_train
,y_train
))
testing_scores
.append
(regr
.score
(X_test
,y_test
))
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
ax
.plot
(depths
,training_scores
,label
="training score")
ax
.plot
(depths
,testing_scores
,label
="testing score")
ax
.set_xlabel
("maxdepth")
ax
.set_ylabel
("score")
ax
.set_title
("Decision Tree Regression")
ax
.legend
(framealpha
=0.5)
plt
.show
()
X_train
,X_test
,y_train
,y_test
=creat_data
(100)
test_DecisionTreeRegression
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=creat_data
(100)
test_DecisionTreeRegression_splitter
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=creat_data
(100)
test_DecisionTreeRegression_depth
(X_train
,X_test
,y_train
,y_test
,maxdepth
=20)
07.分类决策树
import numpy
as np
from sklearn
.tree
import DecisionTreeRegressor
from sklearn
import model_selection
import matplotlib
.pyplot
as plt
def creat_data(n
):
np
.random
.seed
(0)
X
=5*np
.random
.rand
(n
,1)
y
=np
.sin
(X
).ravel
()
noise_num
=(int)(n
/5)
y
[::5] += 3*(0.5 - np
.random
.rand
(noise_num
))
return model_selection
.train_test_split
(X
,y
,test_size
=0.25,random_state
=1)
def test_DecisionTreeRegression(*data
):
X_train
,X_test
,y_train
,y_test
=data
regr
=DecisionTreeRegressor
()
regr
.fit
(X_train
,y_train
)
print("训练成绩: %f"%regr
.score
(X_train
,y_train
))
print("测试成绩: %f"%regr
.score
(X_test
,y_test
))
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
X
=np
.arange
(0.0,5.0,0.01)[:,np
.newaxis
]
Y
=regr
.predict
(X
)
ax
.scatter
(X_train
,y_train
,label
="train sample",c
='g')
ax
.scatter
(X_test
,y_test
,label
="test sample",c
='r')
ax
.plot
(X
,Y
,label
="predict_value",linewidth
=2,alpha
=0.5)
ax
.set_xlabel
("data")
ax
.set_ylabel
("target")
ax
.set_title
("Decision Tree regression")
ax
.legend
(framealpha
=0.5)
plt
.show
()
def test_DecisionTreeRegression_splitter(*data
):
X_train
, X_test
, y_train
, y_test
= data
splitters
=['random','best']
for splitter
in splitters
:
regr
=DecisionTreeRegressor
(splitter
=splitter
)
regr
.fit
(X_train
,y_train
)
print("Splitter %s"%splitter
)
print("Training score:%f"%(regr
.score
(X_train
,y_train
)))
print("Testing score:%f"%(regr
.score
(X_test
,y_test
)))
def test_DecisionTreeRegression_depth(*data
,maxdepth
):
X_train
, X_test
, y_train
, y_test
= data
depths
=np
.arange
(1,maxdepth
)
training_scores
=[]
testing_scores
=[]
for depth
in depths
:
regr
=DecisionTreeRegressor
(max_depth
=depth
)
regr
.fit
(X_train
,y_train
)
training_scores
.append
(regr
.score
(X_train
,y_train
))
testing_scores
.append
(regr
.score
(X_test
,y_test
))
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
ax
.plot
(depths
,training_scores
,label
="training score")
ax
.plot
(depths
,testing_scores
,label
="testing score")
ax
.set_xlabel
("maxdepth")
ax
.set_ylabel
("score")
ax
.set_title
("Decision Tree Regression")
ax
.legend
(framealpha
=0.5)
plt
.show
()
X_train
,X_test
,y_train
,y_test
=creat_data
(100)
test_DecisionTreeRegression_splitter
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=creat_data
(100)
test_DecisionTreeRegression_depth
(X_train
,X_test
,y_train
,y_test
,maxdepth
=20)
08.贝叶斯分类器
from sklearn
import datasets
,model_selection
,naive_bayes
import numpy
as np
import matplotlib
.pyplot
as plt
def show_digits():
digits
=datasets
.load_digits
()
fig
=plt
.figure
()
print("vector from image 0:",digits
.data
[0])
for i
in range(25):
ax
=fig
.add_subplot
(5,5,i
+1)
ax
.imshow
(digits
.image
[i
],cmap
=plt
.cm
.gray_r
,interpolation
='nearest')
plt
.show
()
def load_data():
digits
=datasets
.load_digits
()
return model_selection
.train_test_split
(digits
.data
,digits
.target
,test_size
=0.25,random_state
=0)
def GaussianNB(*data
):
X_train
,X_test
,y_train
,y_test
=data
cls
=naive_bayes
.GaussianNB
()
cls
.fit
(X_train
,y_train
)
print('Training score: %f'%(cls
.score
(X_train
,y_train
)))
print('Testing score: %f'%(cls
.score
(X_test
,y_test
)))
def test_MultinomialNB(*data
):
X_train
, X_test
, y_train
, y_test
=data
cls
=naive_bayes
.MultinomialNB
()
cls
.fit
(X_train
,y_train
)
print('Training score: %.2f'%(cls
.score
(X_train
,y_train
)))
print('Testing score: %.2f'%(cls
.score
(X_test
,y_test
)))
def test_multinomialNB_alpha(*data
):
X_train
, X_test
, y_train
, y_test
= data
alphas
=np
.logspace
(-2,5,num
=200)
train_score
=[]
test_score
=[]
for alpha
in alphas
:
cls
=naive_bayes
.MultinomialNB
(alpha
=alpha
)
cls
.fit
(X_train
,y_train
)
train_score
.append
(cls
.score
(X_train
,y_train
))
test_score
.append
(cls
.score
(X_test
,y_test
))
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
ax
.plot
(alphas
,train_score
,label
="Training score")
ax
.plot
(alphas
,test_score
,label
="testing score")
ax
.set_xlabel
(r
"$\alpha$")
ax
.set_ylabel
(r
"$score")
ax
.set_ylim
(0,1.0)
ax
.set_title
("multinomialNB")
ax
.set_xscale
("log")
plt
.show
()
def test_BernoulliNB(*data
):
X_train
, X_test
, y_train
, y_test
=data
cls
=naive_bayes
.BernoulliNB
()
cls
.fit
(X_train
,y_train
)
print('Training score: %.2f'%(cls
.score
(X_train
,y_train
)))
print('Testing score:%.2f'%(cls
.score
(X_test
,y_test
)))
def test_BernoulliNB_alpha(*data
):
X_train
, X_test
, y_train
, y_test
=data
alphas
=np
.logspace
(-2,5,num
=200)
train_score
=[]
test_score
=[]
for alpha
in alphas
:
cls
=naive_bayes
.BernoulliNB
(alpha
=alpha
)
cls
.fit
(X_train
,y_train
)
train_score
.append
(cls
.score
(X_train
,y_train
))
test_score
.append
(cls
.score
(X_test
,y_test
))
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
ax
.plot
(alphas
,train_score
,label
="Training score")
ax
.plot
(alphas
,test_score
,label
="Testing score")
ax
.set_xlabel
(r
"&\alpha$")
ax
.set_ylabel
("score")
ax
.set_title
("BernoulliNB")
ax
.set_ylim
(0,1.0)
ax
.set_xscale
("log")
ax
.legend
(loc
="best")
plt
.show
()
def test_BernoulliNB_binarize(*data
):
X_train
, X_test
, y_train
, y_test
=data
min_x
=min(np
.min(X_train
.ravel
()),np
.min(X_test
.ravel
()))-0.1
max_x
=max(np
.max(X_train
.ravel
()),np
.max(X_test
.ravel
()))+0.1
binarizes
=np
.linspace
(min_x
,max_x
,endpoint
=True,num
=100)
train_score
=[]
test_score
=[]
for binarize
in binarizes
:
cls
=naive_bayes
.BernoulliNB
(binarize
=binarize
)
cls
.fit
(X_train
,y_train
)
train_score
.append
(cls
.score
(X_train
,y_train
))
test_score
.append
(cls
.score
(X_test
,y_test
))
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
ax
.plot
(binarizes
,train_score
,label
="Training score")
ax
.plot
(binarizes
,test_score
,label
="Testing score")
ax
.set_xlabel
("binarize")
ax
.set_ylabel
("score")
ax
.set_ylim
(0,1.0)
ax
.set_xlim
(min_x
-1,max_x
+1)
ax
.set_title
("BernoulliNB")
ax
.legend
(loc
="best")
plt
.show
()
X_train
,X_test
,y_train
,y_test
=load_data
()
GaussianNB
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=load_data
()
test_MultinomialNB
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=load_data
()
test_multinomialNB_alpha
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=load_data
()
test_BernoulliNB
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=load_data
()
test_BernoulliNB_alpha
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=load_data
()
test_BernoulliNB_binarize
(X_train
,X_test
,y_train
,y_test
)
09.K近邻法
import numpy
as np
import matplotlib
.pyplot
as plt
from sklearn
import neighbors
,datasets
,model_selection
def load_classification_data():
digits
=datasets
.load_digits
()
X_train
=digits
.data
y_train
=digits
.target
return model_selection
.train_test_split
(X_train
,y_train
,test_size
=0.25,random_state
=0,stratify
=y_train
)
def creat_regression_data(n
):
X
=5*np
.random
.rand
(n
,1)
y
=np
.sin
(X
).ravel
()
y
[::5]+=1*(0.5-np
.random
.rand
(int(n
/5)))
return model_selection
.train_test_split
(X
,y
,test_size
=0.25,random_state
=0)
def test_KNeighborsClassifier(*data
):
X_train
,X_test
,y_train
,y_test
=data
clf
=neighbors
.KNeighborsClassifier
()
clf
.fit
(X_train
,y_train
)
print("Training score: %f"%(clf
.score
(X_train
,y_train
)))
print("Testing score: %f"%(clf
.score
(X_test
,y_test
)))
def test_KNeighborsClassifier_k_w(*data
):
X_train
, X_test
, y_train
, y_test
= data
Ks
=np
.linspace
(1,y_train
.size
,num
=100,endpoint
=False,dtype
='int')
weights
=['uniform','distance']
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
for weight
in weights
:
training_score
=[]
testing_score
=[]
for K
in Ks
:
clf
=neighbors
.KNeighborsClassifier
(weights
=weight
,n_neighbors
=K
)
clf
.fit
(X_train
,y_train
)
training_score
.append
(clf
.score
(X_train
,y_train
))
testing_score
.append
(clf
.score
(X_test
,y_test
))
ax
.plot
(Ks
,training_score
,label
="training score: 权重%s"%weight
)
ax
.plot
(Ks
, testing_score
, label
="testing score: 权重%s" % weight
)
ax
.legend
(loc
='best')
ax
.set_xlabel
("K")
ax
.set_ylabel
("score")
ax
.set_ylim
(0,1.05)
ax
.set_title
("KNeighborsClassifier")
plt
.show
()
def test_KNeighborsClassifer_k_p(*data
):
X_train
, X_test
, y_train
, y_test
= data
Ks
=np
.linspace
(1,y_train
.size
,endpoint
=False,dtype
='int')
Ps
=[1,2,10]
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
for P
in Ps
:
training_score
=[]
testing_score
=[]
for K
in Ks
:
clf
=neighbors
.KNeighborsClassifier
(p
=P
,n_neighbors
=K
)
clf
.fit
(X_train
,y_train
)
training_score
.append
(clf
.score
(X_train
,y_train
))
testing_score
.append
(clf
.score
(X_test
,y_test
))
ax
.plot
(Ks
,testing_score
,label
="testing score:p%d"%P
)
ax
.plot
(Ks
, training_score
, label
="training score:p%d" % P
)
ax
.legend
(loc
='best')
ax
.set_xlabel
("K")
ax
.set_ylabel
("score")
ax
.set_ylim
(0,1.05)
ax
.set_title
("KNeighborsClassifier")
plt
.show
()
def test_KNeighborsRegressor(*data
):
X_train
,X_test
,y_train
,y_test
=data
regr
=neighbors
.KNeighborsRegressor
()
regr
.fit
(X_train
,y_train
)
print("Training score: %f"%regr
.score
(X_train
,y_train
))
print("Testing score: %f"%regr
.score
(X_test
,y_test
))
def test_KNeighborsRegressor_k_w(*data
):
X_train
, X_test
, y_train
, y_test
=data
Ks
=np
.linspace
(1,y_train
.size
,num
=100,endpoint
=False,dtype
='int')
weights
=['uniform','distance']
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
for weight
in weights
:
training_scores
=[]
testing_scores
=[]
for K
in Ks
:
regr
=neighbors
.KNeighborsRegressor
(weights
=weight
,n_neighbors
=K
)
regr
.fit
(X_train
,y_train
)
testing_scores
.append
(regr
.score
(X_test
,y_test
))
training_scores
.append
(regr
.score
(X_train
,y_train
))
ax
.plot
(Ks
,testing_scores
,label
="testing score:weight= %s"%weight
)
ax
.plot
(Ks
,training_scores
, label
="training score:weight= %s" % weight
)
ax
.legend
(loc
='best')
ax
.set_xlabel
("K")
ax
.set_ylabel
("score")
ax
.set_ylim
(0,1.05)
ax
.set_title
("KNeighborsRegressor")
plt
.show
()
def test_KNeighborsRegressor_k_p(*data
):
X_train
, X_test
, y_train
, y_test
=data
Ks
= np
.linspace
(1, y_train
.size
, num
=100, endpoint
=False, dtype
='int')
Ps
=[1,2,10]
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
for P
in Ps
:
training_scores
=[]
testing_scores
=[]
for K
in Ks
:
regr
=neighbors
.KNeighborsRegressor
(p
=P
,n_neighbors
=K
)
regr
.fit
(X_train
,y_train
)
training_scores
.append
(regr
.score
(X_train
,y_train
))
testing_scores
.append
(regr
.score
(X_test
,y_test
))
ax
.plot
(Ks
,testing_scores
,label
="training score: p= %d"%P
)
ax
.plot
(Ks
,testing_scores
,label
="testing score: p= %d"%P
)
ax
.legend
(loc
='best')
ax
.set_xlabel
("K")
ax
.set_ylabel
("score")
ax
.set_ylim
(0,1.05)
ax
.set_title
("KNeighborsRegressor")
plt
.show
()
X_train
,X_test
,y_train
,y_test
=creat_regression_data
(1000)
test_KNeighborsRegressor
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=creat_regression_data
(1000)
test_KNeighborsRegressor_k_w
(X_train
,X_test
,y_train
,y_test
)
X_train
,X_test
,y_train
,y_test
=creat_regression_data
(1000)
test_KNeighborsRegressor_k_p
(X_train
,X_test
,y_train
,y_test
)
10.数据降维
import numpy
as np
import matplotlib
.pyplot
as plt
from sklearn
import datasets
,decomposition
,manifold
def load_data():
iris
=datasets
.load_iris
()
return iris
.data
,iris
.target
PCA
def test_PCA(*data
):
X
,y
=data
pca
=decomposition
.PCA
(n_components
=None)
pca
.fit
(X
)
print('explained variance ratio : %s'% str(pca
.explained_variance_ratio_
))
def plot_PCA(*data
):
X
,y
=data
pca
=decomposition
.PCA
(n_components
=2)
pca
.fit
(X
)
X_r
=pca
.transform
(X
)
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
colors
=((1,0,0),(0,1,0),(0,0,1),(0.5,0,0.5),(0,0.5,0.5),(0.5,0,0.5),
(0.4,0,0.6),(0.6,0.4,0),(0,0.6,0.4),(0.5,0.3,0.2))
for label
,color
in zip(np
.unique
(y
),colors
):
position
=y
==label
ax
.scatter
(X_r
[position
,0],X_r
[position
,1],label
="target= %d"%label
,color
=color
)
ax
.set_xlabel
("x[0]")
ax
.set_ylabel
("Y[0]")
ax
.legend
(loc
="best")
ax
.set_title
("PCA")
plt
.show
()
KPCA
def test_KPCA(*data
):
X
,y
=data
kernels
=['linear','poly','rbf','sigmoid']
for kernel
in kernels
:
kpca
=decomposition
.KernelPCA
(n_components
=None,kernel
=kernel
)
kpca
.fit
(X
)
print('kernel= %s ----> lambdas: %s'%(kernel
,kpca
.lambdas_
))
def plot_KPCA(*data
):
X
,y
=data
kernels
=['linear','poly','rbf','sigmoid']
fig
=plt
.figure
()
colors
= ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5),
(0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2))
for i
,kernel
in enumerate(kernels
):
kpca
=decomposition
.KernelPCA
(n_components
=2,kernel
=kernel
)
kpca
.fit
(X
)
X_r
=kpca
.transform
(X
)
ax
=fig
.add_subplot
(2,2,i
+1)
for label
,color
in zip(np
.unique
(y
),colors
):
position
=y
==label
ax
.scatter
(X_r
[position
,0],X_r
[position
,1],label
="target= %d"%label
,color
=color
)
ax
.set_xlabel
("X[0]")
ax
.set_ylabel
("X[1]")
ax
.legend
(loc
="best")
ax
.set_title
("kernel= %s"%kernel
)
plt
.suptitle
("KPCA")
plt
.show
()
考察多项式核pPoly的参数的影响
def plot_KPCA_poly(*data
):
X
,y
=data
fig
=plt
.figure
()
colors
= ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5),
(0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2))
Params
=[(3,1,1),(3,10,1),(3,1,10),(3,10,10),(10,1,1),(10,1,1),(10,1,10),(10,10,10)]
for i
,(p
,gamma
,r
) in enumerate(Params
):
kpca
=decomposition
.KernelPCA
(n_components
=2,kernel
='poly',gamma
=gamma
,degree
=p
,coef0
=r
)
kpca
.fit
(X
)
X_r
=kpca
.transform
(X
)
ax
=fig
.add_subplot
(2,4,i
+1)
for label
,color
in zip(np
.unique
(y
),colors
):
position
=y
==label
ax
.scatter
(X_r
[position
,0],X_r
[position
,1],label
="target= %d"%label
,color
=color
)
ax
.set_xlabel
("X[0]")
ax
.set_xticks
([])
ax
.set_yticks
([])
ax
.set_ylabel
("X[1]")
ax
.legend
(loc
="best")
ax
.set_title
(r
"$ (%s (x \cdot z+1)+%s)^{%s}$"%(gamma
,r
,p
))
plt
.suptitle
("KPCA-Poly")
plt
.show
()
考察高斯核的参数的影响
def plot_KPCA_rbf(*data
):
X
, y
= data
fig
= plt
.figure
()
colors
= ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5),
(0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2))
Gamma
=[0.5,1,4,10]
for i
,gamma
in enumerate(Gamma
):
kpca
=decomposition
.KernelPCA
(n_components
=2,kernel
='rbf',gamma
=gamma
)
kpca
.fit
(X
)
X_r
=kpca
.transform
(X
)
ax
=fig
.add_subplot
(2,2,i
+1)
for label
,color
in zip(np
.unique
(y
),colors
):
position
=y
==label
ax
.scatter
(X_r
[position
, 0], X_r
[position
, 1], label
="target= %d" % label
, color
=color
)
ax
.set_xlabel
("X[0]")
ax
.set_xticks
([])
ax
.set_yticks
([])
ax
.set_ylabel
("X[1]")
ax
.legend
(loc
="best")
ax
.set_title
(r
"$\exp(-%s||x-z||^2)$" % gamma
)
plt
.suptitle
("KPCA-rbf")
plt
.show
()
考察sigmoid核的参数影响
def plot_KPCA_sigmoid(*data
):
X
, y
= data
fig
= plt
.figure
()
colors
= ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5),
(0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2))
Params
=[(0.01,0.1),(0.01,0.2),(0.1,0.1),(0.1,0.2),(0.2,0.1),(0.2,0.2)]
for i
,(gamma
,r
) in enumerate(Params
):
kpca
=decomposition
.KernelPCA
(n_components
=2,kernel
='sigmoid',gamma
=gamma
,coef0
=r
)
kpca
.fit
(X
)
X_r
=kpca
.transform
(X
)
ax
=fig
.add_subplot
(3,2,i
+1)
for label
,color
in zip(np
.unique
(y
),colors
):
position
=y
==label
ax
.scatter
(X_r
[position
, 0], X_r
[position
, 1], label
="target= %d" % label
, color
=color
)
ax
.set_xlabel
("X[0]")
ax
.set_xticks
([])
ax
.set_yticks
([])
ax
.set_ylabel
("X[1]")
ax
.legend
(loc
="best")
ax
.set_title
(r
"$\tanh(%s(x\cdot z )+%s)$" % (gamma
,r
))
plt
.suptitle
("KPCA-sigmoid")
plt
.show
()
调用函数
X
,y
=load_data
()
plot_KPCA_sigmoid
(X
,y
)
MDS
def test_MDS(*data
):
X
,y
=data
for n
in [4,3,2,1]:
mds
=manifold
.MDS
(n_components
=n
)
mds
.fit
(X
)
print('stress(n_components=%d) : %s'%(n
,str(mds
.stress_
)))
def plot_MDS(*data
):
X
,y
=data
mds
=manifold
.MDS
(n_components
=2)
X_r
=mds
.fit_transform
(X
)
fig
=plt
.figure
()
ax
=fig
.add_subplot
(1,1,1)
colors
= ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5),
(0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2))
for label
,color
in zip(np
.unique
(y
),colors
):
position
= y
== label
ax
.scatter
(X_r
[position
, 0], X_r
[position
, 1], label
="target= %d" % label
, color
=color
)
ax
.set_xlabel
("X[0]")
ax
.set_ylabel
("X[1]")
ax
.legend
(loc
="best")
ax
.set_title
("MDS")
plt
.show
()
Isomap
def test_Isomap(*data
):
X
,y
=data
for n
in [4,3,2,1]:
isomap
=manifold
.Isomap
(n_components
=n
)
isomap
.fit
(X
)
print('reconstruction_error(n_components= %d) : %s'%(n
,isomap
.reconstruction_error
()))
def plot_Isomap_k(*data
):
X
,y
=data
Ks
=[1,5,25,y
.size
-1]
fig
=plt
.figure
()
for i
,k
in enumerate(Ks
):
isomap
=manifold
.Isomap
(n_components
=2,n_neighbors
=k
)
X_r
=isomap
.fit_transform
(X
)
ax
=fig
.add_subplot
(2,2,i
+1)
colors
= ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5),
(0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2))
for label
, color
in zip(np
.unique
(y
), colors
):
position
= y
== label
ax
.scatter
(X_r
[position
, 0], X_r
[position
, 1], label
="target= %d" % label
, color
=color
)
ax
.set_xlabel
("X[0]")
ax
.set_ylabel
("X[1]")
ax
.legend
(loc
="best")
ax
.set_title
("k= %d"%k
)
plt
.suptitle
("Isomap")
plt
.show
()
def plot_Isomap_k_d1(*data
):
X
, y
= data
Ks
= [1, 5, 25, y
.size
- 1]
fig
= plt
.figure
()
for i
, k
in enumerate(Ks
):
isomap
= manifold
.Isomap
(n_components
=1, n_neighbors
=k
)
X_r
= isomap
.fit_transform
(X
)
ax
= fig
.add_subplot
(2, 2, i
+ 1)
colors
= ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5),
(0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2))
for label
, color
in zip(np
.unique
(y
), colors
):
position
= y
== label
ax
.scatter
(X_r
[position
, 0],np
.zeros_like
(X_r
[position
]), label
="target= %d" % label
, color
=color
)
ax
.set_xlabel
("X[0]")
ax
.set_ylabel
("Y")
ax
.legend
(loc
="best")
ax
.set_title
("k= %d" % k
)
plt
.suptitle
("Isomap")
plt
.show
()
LLE
def test_LocallylinearEmbedding(*data
):
X
,y
=data
for n
in [4,3,2,1]:
lle
=manifold
.LocallyLinearEmbedding
(n_components
=n
)
lle
.fit
(X
)
print('reconstruction_error(n_components= %d): %s'%(n
,lle
.reconstruction_error_
))
def plot_LocallyLinearEmbedding_k(*data
):
X
, y
= data
Ks
= [1, 5, 25, y
.size
- 1]
fig
= plt
.figure
()
for i
, k
in enumerate(Ks
):
lle
= manifold
.LocallyLinearEmbedding
(n_components
=2, n_neighbors
=k
)
X_r
= lle
.fit_transform
(X
)
ax
= fig
.add_subplot
(2, 2, i
+ 1)
colors
= ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5),
(0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2))
for label
, color
in zip(np
.unique
(y
), colors
):
position
= y
== label
ax
.scatter
(X_r
[position
, 0], X_r
[position
, 1], label
="target= %d" % label
, color
=color
)
ax
.set_xlabel
("X[0]")
ax
.set_ylabel
("X[1]")
ax
.legend
(loc
="best")
ax
.set_title
("k= %d" % k
)
plt
.suptitle
("LocallylinearEmbedding")
plt
.show
()
def plot_LocallyLinearEmbedding_k_d1(*data
):
X
, y
= data
Ks
= [1, 5, 25, y
.size
- 1]
fig
= plt
.figure
()
for i
, k
in enumerate(Ks
):
lle
= manifold
.LocallyLinearEmbedding
(n_components
=1, n_neighbors
=k
)
X_r
= lle
.fit_transform
(X
)
ax
= fig
.add_subplot
(2, 2, i
+ 1)
colors
= ((1, 0, 0), (0, 1, 0), (0, 0, 1), (0.5, 0, 0.5), (0, 0.5, 0.5), (0.5, 0, 0.5),
(0.4, 0, 0.6), (0.6, 0.4, 0), (0, 0.6, 0.4), (0.5, 0.3, 0.2))
for label
, color
in zip(np
.unique
(y
), colors
):
position
= y
== label
ax
.scatter
(X_r
[position
, 0], np
.zeros_like
(X_r
[position
]), label
="target= %d" % label
, color
=color
)
ax
.set_xlabel
("X[0]")
ax
.set_ylabel
("Y")
ax
.legend
(loc
="best")
ax
.set_title
("k= %d" % k
)
plt
.suptitle
("LocallyLinearEmbedding")
plt
.show
()