课程笔记来源于是tiger的解密大数据课程。
python数据分析范式:
1. 安装anaconda
2. 安装 jupyter notebook
3. 切换路径为当数据文件所在路径
4. 启动jupyter notebook & 加载python包
5. 导入数据
6. 数据处理
7. 数据可视化
课上作业:
# coding: utf-8
# In[27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# In[38]:
get_ipython().magic(u'matplotlib inline')
get_ipython().magic(u"config InlineBackend.figure_format = 'retina'")
# In[29]:
iq_data = pd.read_csv('IQscore.csv')
iq_data.head(3)
# In[30]:
len(iq_data)
# In[31]:
iq=iq_data['IQ']
mean=iq.mean()
mean
# In[32]:
std = iq.std()
std
# In[33]:
def normfun(x,mu, sigma):
pdf = np.exp(-((x - mu)**2) / (2* sigma**2)) / (sigma * np.sqrt(2*np.pi))
return pdf
# In[34]:
x = np.arange(60, 150,1)
y = normfun(x, mean, std)
plt.plot(x,y, color='g',linewidth = 3)
plt.hist(iq, bins =7, color = 'r',alpha=0.5,rwidth= 0.9, normed=True)
plt.title('IQ distribution')
plt.xlabel('IQ score')
plt.ylabel('Probability')
plt.show()
课下作业
# coding: utf-8
# In[1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# In[3]:
get_ipython().magic(u'matplotlib inline')
get_ipython().magic(u"config InlineBackend.figure_format='retina'")
# In[4]:
stakes_data=pd.read_csv('stakes.csv')
# In[5]:
stakes_data.head(3)
# In[7]:
len(stakes_data)
# In[10]:
stakes=stakes_data['time']
mean=stakes.mean()
mean
# In[11]:
std=stakes.std()
std
# In[12]:
def normfun(x,mu,sigema):
pdf=np.exp(-(x-mu)**2/(2*sigema**2))/(sigema*np.sqrt(2*np.pi))
return pdf
# In[21]:
x=np.arange(145,155,0.2)
y=normfun(x,mean,std)
plt.plot(x,y,color='g',linewidth=3)
plt.hist(stakes,bins=10,color='r',alpha=0.5,rwidth=0.9,normed=True)
plt.title('stakes distribution')
plt.xlabel('time')
plt.ylabel('probability')
plt.show()
# In[ ]: