1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
from sklearn.linear_model import LogisticRegression #逻辑回归
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import GaussianNB #朴素贝叶斯
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
# 算法审查(注意,使用某个算法前,要自己先从skleran类库中导入)
models = {}
models['LR'] = LogisticRegression()
models['LDA'] = LinearDiscriminantAnalysis()
models['KNN'] = KNeighborsClassifier()
models['CART'] = DecisionTreeClassifier()
models['NB'] = GaussianNB()
models['SVM'] = SVC()
# 评估算法,使用十折交叉验证法
results = []
for key in models:
kfold = KFold(n_splits=10, random_state=seed)
cv_results = cross_val_score(models[key], X_train, Y_train, cv=kfold, scoring='accuracy') results.append(cv_results)
print('%s: %f (%f)' %(key, cv_results.mean(), cv_results.std()))
#测试集上验证、比较两种算法分类效果
lr=LogisticRegression()
lr.fit(X=X_train, y=Y_train)
predictions = lr.predict(X_validation)
print(accuracy_score(Y_validation, predictions)) #预测准确率
print(confusion_matrix(Y_validation, predictions)) #冲突矩阵
print(classification_report(Y_validation, predictions)) #数据报告
nb = GaussianNB()
nb.fit(X=X_train, y=Y_train)
predictions = nb.predict(X_validation)
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))
|