前几天我们介绍了 如何利用 C# 对神经网络模型进行抽象,在这篇图文中,我们抽象了单个神经元 Neuro
,激活函数 IActivationFunction
,网络层 Layer
,网络结构 Network
,以及监督学习 ISupervisedLearning
和非监督学习 IUnsupervisedLearning
算法。通过这些抽象,我们可以得到关于神经网络的整体框架。
今天,我们就在此基础上来继承或实现这些抽象类和接口,构造最简单的一种神经网络模型 —— 单层感知器,即用感知器学习算法训练的单层神经网络。
感知器学习算法
该学习算法用于训练具有阈值激活功能的激活神经元单层神经网络。感知器是一种线性分类器,它根据将一组权重与特征向量相结合的线性预测函数进行预测。详细的算法可以参考维基百科中的相关部分:
https://en.wikipedia.org/wiki/Perceptron
上面,我们简要的介绍了感知器学习算法,接下来我们写相应的代码。
1. 实现激活函数 IActivationFunction
。
通常情况下感知器神经网络的激活函数有两种,第一种是阈值函数,第二种是符号函数。
阈值函数:
public class ThresholdFunction : IActivationFunction
{
public double Function(double x)
{
return (x >= 0) ? 1 : 0;
}
public double Derivative(double x)
{
return 0;
}
public double Derivative2(double y)
{
return 0;
}
}
符号函数:
public class SignFunction : IActivationFunction
{
public double Function(double x)
{
return x >= 0 ? 1 : -1;
}
public double Derivative(double x)
{
return 0;
}
public double Derivative2(double y)
{
return 0;
}
}
2. 继承抽象神经元类 Neuron
。
public class ActivationNeuron : Neuron
{
// 阈值
public double Threshold { get; set; } = 0.0;
// 激活函数
public IActivationFunction ActivationFunction { get; set; }
// 构造函数
public ActivationNeuron(int inputs, IActivationFunction function)
: base(inputs)
{
ActivationFunction = function;
}
// 初始化权值阈值
public override void Randomize()
{
base.Randomize();
Threshold = Rand.NextDouble()*(RandRange.Length) + RandRange.Min;
}
// 计算神经元的输出
public override double Compute(double[] input)
{
if (input.Length != InputsCount)
throw new ArgumentException("输入向量的长度错误。");
double sum = 0.0;
for (int i = 0; i < Weights.Length; i++)
{
sum += Weights[i]*input[i];
}
sum += Threshold;
double output = ActivationFunction.Function(sum);
Output = output;
return output;
}
}
3. 继承抽象神经网络层类 Layer
。
该类的主要作用是:实例化该层中的每个神经元,并为每个神经元设置激活函数。
public class ActivationLayer : Layer
{
public ActivationLayer(int neuronsCount, int inputsCount, IActivationFunction function)
: base(neuronsCount, inputsCount)
{
for (int i = 0; i < Neurons.Length; i++)
Neurons[i] = new ActivationNeuron(inputsCount, function);
}
public void SetActivationFunction(IActivationFunction function)
{
for (int i = 0; i < Neurons.Length; i++)
{
((ActivationNeuron)Neurons[i]).ActivationFunction = function;
}
}
}
4. 继承抽象的神经网络类 Network
。
public class ActivationNetwork : Network
{
public ActivationNetwork(IActivationFunction function, int inputsCount, params int[] neuronsCount)
: base(inputsCount, neuronsCount.Length)
{
// neuronsCount 指定神经网络每层中的神经元数量。
for (int i = 0; i < Layers.Length; i++)
{
Layers[i] = new ActivationLayer(
neuronsCount[i],
(i == 0) ? inputsCount : neuronsCount[i - 1],
function);
}
}
public void SetActivationFunction(IActivationFunction function)
{
for (int i = 0; i < Layers.Length; i++)
{
((ActivationLayer)Layers[i]).SetActivationFunction(function);
}
}
}
写完这个激活网络的实体类 ActivationNetwork
之后,我们就可以构造神经网络的结构了。
ActivationNetwork network = new ActivationNetwork(
new SigmoidFunction(), // sigmoid 激活函数
3, // 3个输入
new int[] {4, 1} //两层 中间层4个神经元,输出层1个神经元
);
5. 实现感知器学习规则。
由于感知器学习是有监督学习,所以要实现 ISuperviseLearning
接口。
public class PerceptronLearning : ISupervisedLearning
{
// 神经网络
private readonly ActivationNetwork _network;
// 学习率
private double _learningRate = 0.1;
// 学习率, [0, 1].
public double LearningRate
{
get { return _learningRate; }
set
{
_learningRate = Math.Max(0.0, Math.Min(1.0, value));
}
}
public PerceptronLearning(ActivationNetwork network)
{
if (network.Layers.Length != 1)
{
throw new ArgumentException("无效的神经网络,它应该只有一层。");
}
_network = network;
}
// 单个训练样本
public double Run(double[] input, double[] output)
{
double[] networkOutput = _network.Compute(input);
Layer layer = _network.Layers[0];
double error = 0.0;
for (int j = 0; j < layer.Neurons.Length; j++)
{
double e = output[j] - networkOutput[j];
if (e != 0)
{
ActivationNeuron perceptron = layer.Neurons[j] as ActivationNeuron;
if (perceptron == null)
throw new Exception("神经元为null。");
for (int i = 0; i < perceptron.Weights.Length; i++)
{
perceptron.Weights[i] += _learningRate * e * input[i];
}
perceptron.Threshold += _learningRate * e;
error += Math.Abs(e);
}
}
return error;
}
// 所有训练样本
public double RunEpoch(double[][] input, double[][] output)
{
double error = 0.0;
for (int i = 0, n = input.Length; i < n; i++)
{
error += Run(input[i], output[i]);
}
return error;
}
}
6. 感知器模型的应用
首先,我们利用感知器模型处理and
问题。
double[][] inputs = new double[4][];
double[][] outputs = new double[4][];
//(0,0);(0,1);(1,0)
inputs[0] = new double[] {0, 0};
inputs[1] = new double[] {0, 1};
inputs[2] = new double[] {1, 0};
outputs[0] = new double[] {0};
outputs[1] = new double[] {0};
outputs[2] = new double[] {0};
//(1,1)
inputs[3] = new double[] {1, 1};
outputs[3] = new double[] {1};
ActivationNetwork network = new ActivationNetwork(
new ThresholdFunction(), 2, 1);
PerceptronLearning teacher = new PerceptronLearning(network);
teacher.LearningRate = 0.1;
int iteration = 1;
while (true)
{
double error = teacher.RunEpoch(inputs, outputs);
Console.WriteLine(@"迭代次数:{0},总体误差:{1}", iteration, error);
if (error == 0)
break;
iteration++;
}
Console.WriteLine();
ActivationNeuron neuron = network.Layers[0].Neurons[0] as ActivationNeuron;
Console.WriteLine(@"Weight 1:{0}", neuron.Weights[0].ToString("F3"));
Console.WriteLine(@"Weight 2:{0}", neuron.Weights[1].ToString("F3"));
Console.WriteLine(@"Threshold:{0}", neuron.Threshold.ToString("F3"));
得到结果如下图所示:
其次,我们利用感知器模型处理or
问题。
double[][] inputs = new double[4][];
double[][] outputs = new double[4][];
//(0,0)
inputs[0] = new double[] {0, 0};
outputs[0] = new double[] {0};
//(1,1);(0,1);(1,0)
inputs[1] = new double[] {0, 1};
inputs[2] = new double[] {1, 0};
inputs[3] = new double[] {1, 1};
outputs[1] = new double[] {1};
outputs[2] = new double[] {1};
outputs[3] = new double[] {1};
ActivationNetwork network = new ActivationNetwork(
new ThresholdFunction(), 2, 1);
PerceptronLearning teacher = new PerceptronLearning(network);
teacher.LearningRate = 0.1;
int iteration = 1;
while (true)
{
double error = teacher.RunEpoch(inputs, outputs);
Console.WriteLine(@"迭代次数:{0},总体误差:{1}", iteration, error);
if (error == 0)
break;
iteration++;
}
Console.WriteLine();
ActivationNeuron neuron = network.Layers[0].Neurons[0] as ActivationNeuron;
Console.WriteLine(@"Weight 1:{0}", neuron.Weights[0].ToString("F3"));
Console.WriteLine(@"Weight 2:{0}", neuron.Weights[1].ToString("F3"));
Console.WriteLine(@"Threshold:{0}", neuron.Threshold.ToString("F3"));
得到结果如下图所示:
最后,我们处理一个稍微复杂一些的问题,比如有以下三类数据:
第一类:(0.1,0.1);(0.2,0.3);(0.3,0.4);(0.1,0.3);(0.2,0.5)
第二类:(0.1,1.0);(0.2,1.1);(0.3,0.9);(0.4,0.8);(0.2,0.9)
第三类:(1.0,0.4);(0.9,0.5);(0.8,0.6);(0.9,0.4);(1.0,0.5)
我们用 Echart 把这三类数据用不同的颜色表示:
通常情况下,这些数据会存储在文件中,这里为了演示方便,我们手动赋值了。
double[][] inputs = new double[15][];
double[][] outputs = new double[15][];
//(0.1,0.1);(0.2,0.3);(0.3,0.4);(0.1,0.3);(0.2,0.5)
inputs[0] = new double[] {0.1, 0.1};
inputs[1] = new double[] {0.2, 0.3};
inputs[2] = new double[] {0.3, 0.4};
inputs[3] = new double[] {0.1, 0.3};
inputs[4] = new double[] {0.2, 0.5};
outputs[0] = new double[] {1, 0, 0};
outputs[1] = new double[] {1, 0, 0};
outputs[2] = new double[] {1, 0, 0};
outputs[3] = new double[] {1, 0, 0};
outputs[4] = new double[] {1, 0, 0};
//(0.1,1.0);(0.2,1.1);(0.3,0.9);(0.4,0.8);(0.2,0.9)
inputs[5] = new double[] {0.1, 1.0};
inputs[6] = new double[] {0.2, 1.1};
inputs[7] = new double[] {0.3, 0.9};
inputs[8] = new double[] {0.4, 0.8};
inputs[9] = new double[] {0.2, 0.9};
outputs[5] = new double[] {0, 1, 0};
outputs[6] = new double[] {0, 1, 0};
outputs[7] = new double[] {0, 1, 0};
outputs[8] = new double[] {0, 1, 0};
outputs[9] = new double[] {0, 1, 0};
//(1.0,0.4);(0.9,0.5);(0.8,0.6);(0.9,0.4);(1.0,0.5)
inputs[10] = new double[] {1.0, 0.4};
inputs[11] = new double[] {0.9, 0.5};
inputs[12] = new double[] {0.8, 0.6};
inputs[13] = new double[] {0.9, 0.4};
inputs[14] = new double[] {1.0, 0.5};
outputs[10] = new double[] {0, 0, 1};
outputs[11] = new double[] {0, 0, 1};
outputs[12] = new double[] {0, 0, 1};
outputs[13] = new double[] {0, 0, 1};
outputs[14] = new double[] {0, 0, 1};
搭建感知器网络,输入数为2,输出层神经元个数为分类数3,并进行训练。
ActivationNetwork network = new ActivationNetwork(
new ThresholdFunction(), 2, 3);
PerceptronLearning teacher = new PerceptronLearning(network);
teacher.LearningRate = 0.1;
int iteration = 1;
while (true)
{
double error = teacher.RunEpoch(inputs, outputs);
Console.WriteLine(@"迭代次数:{0},总体误差:{1}", iteration, error);
if (error == 0)
break;
iteration++;
}
输出感知器的权值和阈值,通过这两个值我们就能得到三条分割直线。
ActivationLayer layer = network.Layers[0] as ActivationLayer;
for (int i = 0; i < 3; i++)
{
Console.WriteLine(@"神经元:{0}", i + 1);
Console.WriteLine(@"Weight 1:{0}", layer.Neurons[i].Weights[0]);
Console.WriteLine(@"Weight 2:{0}", layer.Neurons[i].Weights[1]);
Console.WriteLine(@"Threshold:{0}",
((ActivationNeuron) layer.Neurons[i]).Threshold);
}
通过以上的讲解,我们就把感知器神经网络搞定了。我们可以看到该网络可以处理and
、or
等线性可分的问题,也可以处理一些简单的多分类问题。但对线性不可分的问题就无能为力了。后面我们会介绍误差反传网络可以进行非线性可分的分类问题。好了今天就到这里吧!See You!