SVM
import pandas
from sklearn import svm
import numpy as np
# 读取测试集和测试集
train_data = pandas.read_csv("GroundTruth_and_Features_train.csv")
test_data = pandas.read_csv("GroundTruth_and_Features_test.csv")
clf = svm.SVC(kernel='linear') # 参数 kernel 为线性核函数
predictors = ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5', 'Feature6', 'Feature7', 'Feature8',
'Feature9', 'Feature10', 'Feature11', 'Feature12', 'Feature13', 'Feature14', 'Feature15', 'Feature16',
'Feature17', 'Feature18', 'Feature19', 'Feature20', 'Feature21', 'Feature22', 'Feature23', 'Feature24',
'Feature25', 'Feature26', 'Feature27', 'Feature28', 'Feature29', 'Feature30', 'Feature31', 'Feature32',
'Feature33', 'Feature34', 'Feature35']
trian_predictors = (train_data[predictors])
train_target = train_data['Label']
# 训练分类器
clf.fit(trian_predictors, train_target)
predictions = []
test_predictions = clf.predict(test_data[predictors])
predictions.append(test_predictions)
predictions = np.concatenate(predictions, axis=0)
predictions[predictions > 0.5] = 1
predictions[predictions <= 0.5] = 0
accuracy = sum(predictions[predictions == test_data['Label']]) / len(predictions)
print('准确率为', end='')
print(accuracy * 100, end='')
print('%')
Keras深度神经网络
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import RMSprop
import pandas as pd
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# 读取数据并初始化
train_data = pd.read_csv("GroundTruth_and_Features_train.csv")
test_data = pd.read_csv("GroundTruth_and_Features_test.csv")
predictors = ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5', 'Feature6', 'Feature7', 'Feature8',
'Feature9', 'Feature10', 'Feature11', 'Feature12', 'Feature13', 'Feature14', 'Feature15', 'Feature16',
'Feature17', 'Feature18', 'Feature19', 'Feature20', 'Feature21', 'Feature22', 'Feature23', 'Feature24',
'Feature25', 'Feature26', 'Feature27', 'Feature28', 'Feature29', 'Feature30', 'Feature31', 'Feature32',
'Feature33', 'Feature34', 'Feature35']
trian_features = (train_data[predictors]) # 训练特征集
test_features = (test_data[predictors]) # 测试特征集
train_label = train_data['Label'] # 训练标签集
test_label = test_data['Label'] # 测试标签集
# 传入的训练特征和标签是Numpy数组,需要转化
trian_features_list = trian_features.values.tolist()
test_features_list = test_features.values.tolist()
train_label_list = train_label.values.tolist()
test_label_list = test_label.values.tolist()
# 转化为numpy矩阵
trian_features_list = np.array(trian_features_list)
test_features_list = np.array(test_features_list)
# 将类向量转化为类矩阵
train_label_list = np_utils.to_categorical(train_label_list, num_classes=2)
test_label_list = np_utils.to_categorical(test_label_list, num_classes=2)
# 搭建网络结构
model = Sequential([
Dense(32, input_dim=35), # 输入35个特征
Activation('relu'),
Dense(2), # 分类为二分类
Activation('softmax') # 调用softmax进行分类
])
# 函数优化器,设定学习率、θ、超参、学习率衰减率
rmsprop = RMSprop(
learning_rate=0.001,
rho=0.9,
epsilon=1e-08,
decay=0.0
)
# 配置网络训练方法
model.compile(
optimizer=rmsprop, # 优化器使用函数形式RMSprop
loss='categorical_crossentropy', # 损失函数
metrics=['accuracy'], # 最终评价指标
)
print("训练中")
model.fit(trian_features_list, train_label_list) # 训练模型
print("测试中")
loss, accuracy = model.evaluate(test_features_list, test_label_list) # 评估准确率
print("准确率为:", accuracy)
model.summary()