1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
from sklearn.linear_model import LogisticRegression #Logistic regression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import GaussianNB #Plain Bayesian
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
# Algorithm review (note that before using a particular algorithm, import it yourself from the skleran class library)
models = {}
models['LR'] = LogisticRegression()
models['LDA'] = LinearDiscriminantAnalysis()
models['KNN'] = KNeighborsClassifier()
models['CART'] = DecisionTreeClassifier()
models['NB'] = GaussianNB()
models['SVM'] = SVC()
# Evaluation algorithm using ten-fold cross-validation
results = []
for key in models:
kfold = KFold(n_splits=10, random_state=seed)
cv_results = cross_val_score(models[key], X_train, Y_train, cv=kfold, scoring='accuracy') results.append(cv_results)
print('%s: %f (%f)' %(key, cv_results.mean(), cv_results.std()))
#Validate and compare the classification effect of the two algorithms on the test set
lr=LogisticRegression()
lr.fit(X=X_train, y=Y_train)
predictions = lr.predict(X_validation)
print(accuracy_score(Y_validation, predictions)) #Prediction Accuracy
print(confusion_matrix(Y_validation, predictions)) #Conflict Matrix
print(classification_report(Y_validation, predictions)) #Data Report
nb = GaussianNB()
nb.fit(X=X_train, y=Y_train)
predictions = nb.predict(X_validation)
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))
|