sigmoid函数:
function g = sigmoid(z)
g = zeros(size(z));
g = 1 ./ (1 + exp(-z));
end
cost和gradient函数: 直接用向量化公式
function [J, grad] = costFunction(theta, X, y)
m = length(y);
J = 0;
grad = zeros(size(theta));
% h size: m * 1
h = sigmoid(X * theta);
% y' * log(h) size: 1 * 1
J = -1 / m * (y' * log(h) + (1 - y)' * log (1 - h));
% X' size: (n+1) * m, X*theta size: m * 1, grad size: (n+1) * 1
grad = (X' * (sigmoid(X * theta) - y)) / m;
end
predict函数: 注意使用find()直接按条件批量处理
function p = predict(theta, X)
m = size(X, 1);
p = zeros(m, 1);
positiveIdx = find(sigmoid(X * theta) >= 0.5);
p(positiveIdx) = 1;
end
带Regulation的cost和gradient函数:直接使用向量化公式和切片计算, 注意分别计算grad(1)和grad(2:n+1)
function [J, grad] = costFunctionReg(theta, X, y, lambda)
m = length(y);
J = 0;
grad = zeros(size(theta));
n1 = length(theta);
% h size: m * 1
h = sigmoid(X * theta);
% regularization
bias = lambda / (2 * m) * sum((theta .* theta)(2:n1));
% y' * log(h) size: 1 * 1
J = -1 / m * (y' * log(h) + (1 - y)' * log (1 - h)) + bias;
% X size: (m * (n+1), X' size : (n+1) * m, X'(1) size: 1 * m, y size: m + 1
grad(1) = X'(1, :) * (sigmoid(X * theta) - y) / m;
% X'(2:n1) size: (n) * m, X*theta size: m * 1, grad(2:n1) size: (n) * 1
grad(2:n1) = (X'(2:n1, :) * (sigmoid(X * theta) - y)) / m + lambda * theta(2:n1) / m;
end