这里一开始对数据进行了一个预处理,导入了之前预先写好对DataHandling.m的文件
使用自己写的getKFoldData来进行每一折数据的划分,然后就跟普通训练一样,k从1到10循环去训练、测试、计算准确率、对每一折的准确率求和再除以10来求的10-fold cross-validation的平均准确率。
import DataHandling.*
%% Part A
normalise = true;
[cFtrs, cLbls, rFtrs, rLbls] = DataHandling(normalise);
cTrFtrs = cFtrs(1:1500,:);
cTeFtrs = cFtrs(1501:end,:);
rTrFtrs = rFtrs(1:1000,:);
rTeFtrs = rFtrs(1001:end,:);
cTrLbls = cLbls(1:1500,:);
cTeLbls = cLbls(1501:end,:);
rTrLbls = rLbls(1:1000,:);
rTeLbls = rLbls(1001:end,:);
% Classification
cMdl = fitcsvm(cTrFtrs, cTrLbls, 'KernelFunction', 'linear', 'BoxConstraint', 1);
[preds, score] = predict(cMdl, cTeFtrs);
classOrder = cMdl.ClassNames;
sv = cMdl.SupportVectors;
figure
gscatter(cTrFtrs(:,1),cTrFtrs(:,2),cTrLbls)
hold on
plot(sv(:,1),sv(:,2),'ko','MarkerSize',5)
legend('Obesity','Not Obesity','Support Vector')
hold off
% for test set
f1Score(preds, cTeLbls)
table(cTeLbls(1:10),preds(1:10),score(1:10,2),'VariableNames', {'TrueLabel','PredictedLabel','Score'})
%%k-fold cross validation for classification
k = 10;
sum_accuracy_svm = 0;
for i = 1:k
[cFtrs, cLbls, validFtrs, validLbls] = getKFoldData(cFtrs, cLbls, i, k);
classifer = fitcsvm(cFtrs, cLbls, 'KernelFunction', 'linear', 'BoxConstraint', 1);%训练模型
predict_label = predict(classifer, validFtrs);%test
accuracy_svm = length(find(predict_label == validLbls))/length(validLbls)%acc
sum_accuracy_svm = sum_accuracy_svm + accuracy_svm;
end
% get average acc
mean_accuracy_svm = sum_accuracy_svm / k;
disp('Classification: Average cross_validation accuracy :');
disp( mean_accuracy_svm);
function [trainFtrs, trainLbls, validFtrs, validLbls] = getKFoldData(ftrs, lbls, i, k)
foldSize = idivide(length(ftrs), int32(k));
trainFtrs = zeros((k-1)*foldSize, size(ftrs, 2));
trainLbls = zeros((k-1)*foldSize, size(lbls, 2));
stackCnt = 0;
for j=1:k
if j == i
validFtrs = ftrs((j-1)*foldSize+1:j*foldSize, :);
validLbls = lbls((j-1)*foldSize+1:j*foldSize, :);
else
trainFtrs(stackCnt*foldSize+1:(stackCnt+1)*foldSize, :) = ftrs((j-1)*foldSize+1:j*foldSize, :);
trainLbls(stackCnt*foldSize+1:(stackCnt+1)*foldSize, :) = lbls((j-1)*foldSize+1:j*foldSize, :);
stackCnt = stackCnt + 1;
end
end
end