练习:正态分布采样与参数估计
正态分布:
#!/usr/bin/python
# -*- coding:utf-8 -*-
import numpy as np
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy.optimize import leastsq
def residual(theta, x1, y1):
mu1 = theta[0]
sigma1 = theta[1]
y_hat = np.exp(-(x1-mu1)**2 / (2*sigma1**2)) / (math.sqrt(2*math.pi)*sigma1)#实际值
return y1 - y_hat#残差
if __name__ == "__main__":
data = np.random.randn(1000) * 3 + 2#随机采样1000个正态分布数。均值为2 标准差为3
y, x = np.histogram(data, bins=20, density=True)#直方图 等分20份
x = (x[1:] + x[:-1]) / 2 #使取值更加好看点 某些偏差较大的点 会因为这样被变得偏差变小
print(x)
print(y)
mu = 0
sigma = 1
t = leastsq(residual, (mu, sigma), args=(x, y))[0] #残差用最小二乘法去拟合期望和标准差
mu, sigma = t
print('期望和标准差的估计值:', mu, sigma)
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
plt.figure(facecolor='w')
plt.plot(x, y, 'go--', lw=2)
y_hat = np.exp(-(x-mu)**2 / (2*sigma**2)) / (math.sqrt(2*math.pi)*sigma)
plt.plot(x, y_hat, 'ro-', lw=2)
plt.hist(data, bins=20, normed=True, color='g', alpha=0.6)
plt.grid(b=True, ls=':')
plt.title('正态分布采样与参数估计', fontsize=20)
plt.xlabel('X', fontsize=16)
plt.ylabel('Y', fontsize=16)
plt.show()
#######################################################
[ -7.67035993 -6.70051393 -5.73066793 -4.76082192 -3.79097592
-2.82112992 -1.85128391 -0.88143791 0.08840809 1.0582541
2.0281001 2.9979461 3.96779211 4.93763811 5.90748411
6.87733012 7.84717612 8.81702212 9.78686813 10.75671413]
[ 0.00103109 0.00309327 0.00309327 0.00927982 0.01855965 0.0360882
0.06186549 0.08661169 0.11032679 0.12888644 0.1340419 0.12476208
0.10723352 0.08351841 0.05155458 0.02887056 0.02062183 0.0123731
0.00309327 0.00618655]
期望和标准差的估计值: 1.92740125788 2.95959806864