如何利用 C# 实现神经网络的感知器模型？

前几天我们介绍了如何利用 C# 对神经网络模型进行抽象，在这篇图文中，我们抽象了单个神经元 Neuro，激活函数 IActivationFunction，网络层 Layer，网络结构 Network，以及监督学习 ISupervisedLearning 和非监督学习 IUnsupervisedLearning 算法。通过这些抽象，我们可以得到关于神经网络的整体框架。

今天，我们就在此基础上来继承或实现这些抽象类和接口，构造最简单的一种神经网络模型 —— 单层感知器，即用感知器学习算法训练的单层神经网络。

感知器学习算法

该学习算法用于训练具有阈值激活功能的激活神经元单层神经网络。感知器是一种线性分类器，它根据将一组权重与特征向量相结合的线性预测函数进行预测。详细的算法可以参考维基百科中的相关部分：

https://en.wikipedia.org/wiki/Perceptron

感知器

上面，我们简要的介绍了感知器学习算法，接下来我们写相应的代码。

1. 实现激活函数 IActivationFunction。

通常情况下感知器神经网络的激活函数有两种，第一种是阈值函数，第二种是符号函数。

阈值函数：

public class ThresholdFunction : IActivationFunction
{
    public double Function(double x)
    {
        return (x >= 0) ? 1 : 0;
    }

    public double Derivative(double x)
    {
        return 0;
    }

    public double Derivative2(double y)
    {
        return 0;
    }
}

符号函数：

public class SignFunction : IActivationFunction
{
    public double Function(double x)
    {
        return x >= 0 ? 1 : -1;
    }

    public double Derivative(double x)
    {
        return 0;
    }

    public double Derivative2(double y)
    {
        return 0;
    }
}

2. 继承抽象神经元类 Neuron。

public class ActivationNeuron : Neuron
{
    // 阈值
    public double Threshold { get; set; } = 0.0;
    
    // 激活函数
    public IActivationFunction ActivationFunction { get; set; }
    
    // 构造函数
    public ActivationNeuron(int inputs, IActivationFunction function)
        : base(inputs)
    {
        ActivationFunction = function;
    }

    // 初始化权值阈值
    public override void Randomize()
    {
        base.Randomize();
        Threshold = Rand.NextDouble()*(RandRange.Length) + RandRange.Min;
    }

    // 计算神经元的输出
    public override double Compute(double[] input)
    {
        if (input.Length != InputsCount)
            throw new ArgumentException("输入向量的长度错误。");

        double sum = 0.0;
        for (int i = 0; i < Weights.Length; i++)
        {
            sum += Weights[i]*input[i];
        }
        
        sum += Threshold;
        double output = ActivationFunction.Function(sum);
        Output = output;
        return output;
    }
}

3. 继承抽象神经网络层类 Layer。

该类的主要作用是：实例化该层中的每个神经元，并为每个神经元设置激活函数。

public class ActivationLayer : Layer
{
    public ActivationLayer(int neuronsCount, int inputsCount, IActivationFunction function)
        : base(neuronsCount, inputsCount)
    {
        for (int i = 0; i < Neurons.Length; i++)
            Neurons[i] = new ActivationNeuron(inputsCount, function);
    }

    public void SetActivationFunction(IActivationFunction function)
    {
        for (int i = 0; i < Neurons.Length; i++)
        {
            ((ActivationNeuron)Neurons[i]).ActivationFunction = function;
        }
    }
}

4. 继承抽象的神经网络类 Network。

public class ActivationNetwork : Network
{
    public ActivationNetwork(IActivationFunction function, int inputsCount, params int[] neuronsCount)
        : base(inputsCount, neuronsCount.Length)
    {
        // neuronsCount 指定神经网络每层中的神经元数量。
        for (int i = 0; i < Layers.Length; i++)
        {
            Layers[i] = new ActivationLayer(
                neuronsCount[i],
                (i == 0) ? inputsCount : neuronsCount[i - 1],
                function);
        }
    }

    public void SetActivationFunction(IActivationFunction function)
    {
        for (int i = 0; i < Layers.Length; i++)
        {
            ((ActivationLayer)Layers[i]).SetActivationFunction(function);
        }
    }
}

写完这个激活网络的实体类 ActivationNetwork 之后，我们就可以构造神经网络的结构了。

ActivationNetwork network = new ActivationNetwork(
    new SigmoidFunction(), // sigmoid 激活函数
    3, // 3个输入
    new int[] {4, 1} //两层 中间层4个神经元，输出层1个神经元
    );

5. 实现感知器学习规则。

由于感知器学习是有监督学习，所以要实现 ISuperviseLearning 接口。

public class PerceptronLearning : ISupervisedLearning
{
    // 神经网络
    private readonly ActivationNetwork _network;
    // 学习率
    private double _learningRate = 0.1;

    // 学习率, [0, 1].
    public double LearningRate
    {
        get { return _learningRate; }
        set
        {
            _learningRate = Math.Max(0.0, Math.Min(1.0, value));
        }
    }

    public PerceptronLearning(ActivationNetwork network)
    {
        if (network.Layers.Length != 1)
        {
            throw new ArgumentException("无效的神经网络，它应该只有一层。");
        }

        _network = network;
    }
    // 单个训练样本
    public double Run(double[] input, double[] output)
    {
        double[] networkOutput = _network.Compute(input);
        Layer layer = _network.Layers[0];
        double error = 0.0;

        for (int j = 0; j < layer.Neurons.Length; j++)
        {
            double e = output[j] - networkOutput[j];
            if (e != 0)
            {
                ActivationNeuron perceptron = layer.Neurons[j] as ActivationNeuron;

                if (perceptron == null)
                    throw new Exception("神经元为null。");
                
                for (int i = 0; i < perceptron.Weights.Length; i++)
                {
                    perceptron.Weights[i] += _learningRate * e * input[i];
                }
                perceptron.Threshold += _learningRate * e;
                
                error += Math.Abs(e);
            }
        }

        return error;
    }
    
    // 所有训练样本
    public double RunEpoch(double[][] input, double[][] output)
    {
        double error = 0.0;
        for (int i = 0, n = input.Length; i < n; i++)
        {
            error += Run(input[i], output[i]);
        }
        return error;
    }
}

6. 感知器模型的应用

首先，我们利用感知器模型处理and问题。

double[][] inputs = new double[4][];
double[][] outputs = new double[4][];

//(0,0);(0,1);(1,0)
inputs[0] = new double[] {0, 0};
inputs[1] = new double[] {0, 1};
inputs[2] = new double[] {1, 0};

outputs[0] = new double[] {0};
outputs[1] = new double[] {0};
outputs[2] = new double[] {0};

//(1,1)
inputs[3] = new double[] {1, 1};
outputs[3] = new double[] {1};


ActivationNetwork network = new ActivationNetwork(
    new ThresholdFunction(), 2, 1);

PerceptronLearning teacher = new PerceptronLearning(network);
teacher.LearningRate = 0.1;

int iteration = 1;
while (true)
{
    double error = teacher.RunEpoch(inputs, outputs);
    Console.WriteLine(@"迭代次数:{0},总体误差:{1}", iteration, error);

    if (error == 0)
        break;
    iteration++;
}

Console.WriteLine();

ActivationNeuron neuron = network.Layers[0].Neurons[0] as ActivationNeuron;

Console.WriteLine(@"Weight 1:{0}", neuron.Weights[0].ToString("F3"));
Console.WriteLine(@"Weight 2:{0}", neuron.Weights[1].ToString("F3"));
Console.WriteLine(@"Threshold:{0}", neuron.Threshold.ToString("F3"));

得到结果如下图所示：

and

其次，我们利用感知器模型处理or问题。

double[][] inputs = new double[4][];
double[][] outputs = new double[4][];

//(0,0)
inputs[0] = new double[] {0, 0};
outputs[0] = new double[] {0};

//(1,1);(0,1);(1,0)
inputs[1] = new double[] {0, 1};
inputs[2] = new double[] {1, 0};
inputs[3] = new double[] {1, 1};

outputs[1] = new double[] {1};
outputs[2] = new double[] {1};
outputs[3] = new double[] {1};


ActivationNetwork network = new ActivationNetwork(
    new ThresholdFunction(), 2, 1);

PerceptronLearning teacher = new PerceptronLearning(network);
teacher.LearningRate = 0.1;

int iteration = 1;
while (true)
{
    double error = teacher.RunEpoch(inputs, outputs);
    Console.WriteLine(@"迭代次数:{0},总体误差:{1}", iteration, error);

    if (error == 0)
        break;
    iteration++;
}

Console.WriteLine();
ActivationNeuron neuron = network.Layers[0].Neurons[0] as ActivationNeuron;

Console.WriteLine(@"Weight 1:{0}", neuron.Weights[0].ToString("F3"));
Console.WriteLine(@"Weight 2:{0}", neuron.Weights[1].ToString("F3"));
Console.WriteLine(@"Threshold:{0}", neuron.Threshold.ToString("F3"));

得到结果如下图所示：

最后，我们处理一个稍微复杂一些的问题，比如有以下三类数据：

第一类：(0.1,0.1);(0.2,0.3);(0.3,0.4);(0.1,0.3);(0.2,0.5)

第二类：(0.1,1.0);(0.2,1.1);(0.3,0.9);(0.4,0.8);(0.2,0.9)

第三类：(1.0,0.4);(0.9,0.5);(0.8,0.6);(0.9,0.4);(1.0,0.5)

我们用 Echart 把这三类数据用不同的颜色表示：

原始数据

通常情况下，这些数据会存储在文件中，这里为了演示方便，我们手动赋值了。

double[][] inputs = new double[15][];
double[][] outputs = new double[15][];

//(0.1,0.1);(0.2,0.3);(0.3,0.4);(0.1,0.3);(0.2,0.5)
inputs[0] = new double[] {0.1, 0.1};
inputs[1] = new double[] {0.2, 0.3};
inputs[2] = new double[] {0.3, 0.4};
inputs[3] = new double[] {0.1, 0.3};
inputs[4] = new double[] {0.2, 0.5};

outputs[0] = new double[] {1, 0, 0};
outputs[1] = new double[] {1, 0, 0};
outputs[2] = new double[] {1, 0, 0};
outputs[3] = new double[] {1, 0, 0};
outputs[4] = new double[] {1, 0, 0};

//(0.1,1.0);(0.2,1.1);(0.3,0.9);(0.4,0.8);(0.2,0.9)
inputs[5] = new double[] {0.1, 1.0};
inputs[6] = new double[] {0.2, 1.1};
inputs[7] = new double[] {0.3, 0.9};
inputs[8] = new double[] {0.4, 0.8};
inputs[9] = new double[] {0.2, 0.9};

outputs[5] = new double[] {0, 1, 0};
outputs[6] = new double[] {0, 1, 0};
outputs[7] = new double[] {0, 1, 0};
outputs[8] = new double[] {0, 1, 0};
outputs[9] = new double[] {0, 1, 0};

//(1.0,0.4);(0.9,0.5);(0.8,0.6);(0.9,0.4);(1.0,0.5)
inputs[10] = new double[] {1.0, 0.4};
inputs[11] = new double[] {0.9, 0.5};
inputs[12] = new double[] {0.8, 0.6};
inputs[13] = new double[] {0.9, 0.4};
inputs[14] = new double[] {1.0, 0.5};

outputs[10] = new double[] {0, 0, 1};
outputs[11] = new double[] {0, 0, 1};
outputs[12] = new double[] {0, 0, 1};
outputs[13] = new double[] {0, 0, 1};
outputs[14] = new double[] {0, 0, 1};

搭建感知器网络，输入数为2，输出层神经元个数为分类数3，并进行训练。

ActivationNetwork network = new ActivationNetwork(
    new ThresholdFunction(), 2, 3);

PerceptronLearning teacher = new PerceptronLearning(network);
teacher.LearningRate = 0.1;

int iteration = 1;

while (true)
{
    double error = teacher.RunEpoch(inputs, outputs);
    Console.WriteLine(@"迭代次数:{0},总体误差:{1}", iteration, error);

    if (error == 0)
        break;
    iteration++;
}

迭代误差

输出感知器的权值和阈值，通过这两个值我们就能得到三条分割直线。

ActivationLayer layer = network.Layers[0] as ActivationLayer;
for (int i = 0; i < 3; i++)
{
    Console.WriteLine(@"神经元:{0}", i + 1);
    Console.WriteLine(@"Weight 1:{0}", layer.Neurons[i].Weights[0]);
    Console.WriteLine(@"Weight 2:{0}", layer.Neurons[i].Weights[1]);
    Console.WriteLine(@"Threshold:{0}",
        ((ActivationNeuron) layer.Neurons[i]).Threshold);
}

分割线

通过以上的讲解，我们就把感知器神经网络搞定了。我们可以看到该网络可以处理and、or等线性可分的问题，也可以处理一些简单的多分类问题。但对线性不可分的问题就无能为力了。后面我们会介绍误差反传网络可以进行非线性可分的分类问题。好了今天就到这里吧！See You！

如何利用 C# 实现神经网络的感知器模型？

推荐阅读更多精彩内容