一、三国人物Top10
import jieba
from wordCloud import WordCloud
import imageio
with open('./threekingdom.txt','r',encoding='utf-8') as f:
words = f.read()
counts={}
#不是人名的集合
excludes = {"将军", "却说", "丞相", "二人", "不可", "荆州", "不能", "如此", "商议",
"如何", "主公", "军士", "军马", "左右", "次日", "引兵", "大喜", "天下",
"东吴", "于是", "今日", "不敢", "魏兵", "陛下", "都督", "人马", "不知",
"孔明曰", "玄德曰", "刘备", "云长"}
#top10
li=[]
# print(len(words)) jieba分词
words_list = jieba.lcut(words)
print(words_list)
for word in words_list:
if len(word) <=1:
continue
else:
counts[word]=counts.get(word,0)+1
print(counts)
counts['孔明']=counts['孔明']+counts['孔明曰']
counts['玄德']=counts['玄德']+counts['玄德曰']+counts['刘备']
counts['关公'] = counts['云长'] + counts['关公']
#删除
for word in excludes:
del counts[word]
#排序
items=list(counts.items())
print(items)
items.sort(key=lambda x:x[1],reverse=True)
print(items)
for i in range(10):
#序列解包
role,count=items[i]
print(role,count)
#_表示循环里不需要使用临时变量
for _ in range(count):
li.append(role)
mask = imageio.imread('./china.jpg')
text=' '.join(li)
WordCloud(
font_path='msyh.ttc',
background_color='white',
width=800,
height=600,
#相邻两个重复词之间的匹配
collocations=False,
mask=mask
).generate(text).to_file('Top10.png')
二、匿名函数
1、lambda表达式
1.1、形式:lambda x1,x2,....xn:表达式。参数无数多个,表达式只有一个
from random import randint
num=lambda x1,x2:x1+x2
print(num(3,3))
#列表和字典通过lambda表达式排序
list=[
('a',32),
('b',32),
('c',56),
('d',34),
]
list.sort(key=lambda x:x[1],reverse=True)
print(list)
info=[
{"name":"xiaoxin","age":18},
{"name":"xiao","age":19},
{"name":"xin","age":10},
{"name":"aoin","age":17},
]
info.sort(key=lambda x:x["age"])
print(info)
1.2、列表推导式
#[表达式(返回值) for 临时变量 in 可迭代对象 可追加条件]
print([i for i in range(10)])
1.3、列表解析
#筛选出列表中所有的偶数
#普通方法
li=[]
for i in range(10):
if i%2==0:
li.append(i)
print(li)
#列表解析
print([ i for i in range(10) if i%2==0])
print("111")
三、画图
使图上的字不乱码
plt.rcParams["font.sans-serif"] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
1、[0,2π]正弦曲线图
x=np.linspace(0,2*np.pi,num=100)
print(x)
y=np.sin(x)
#正弦余弦在同一坐标系下
cosy=np.cos(x)
plt.plot(x,cosy)
plt.plot(x,y,color='r',linestyle='--',label='sin(x)')#设置曲线和颜色,形式
plt.xlabel("时间(s)")#x,y轴标识
plt.ylabel('电压(V)')
plt.table('xx表')
#图例
plt.legend()
plt.show()
2、柱状图
import string
# print(string.ascii_uppercase[0:6])#切片
x=['猫{}'.format(x) for x in string.ascii_uppercase[0:5] ]
y=[randint(100,1000) for _ in range(5)]
plt.bar(x,y)
print(x)
plt.show()
3、饼状图
counts=[randint(3500,9000) for _ in range(6)]
labels=['员工{}'.format(x) for x in string.ascii_lowercase[:6]]
#距离圆心点的距离
colors = ['red', 'purple','blue', 'yellow','gray','green']
explode=[0.1,0,0,0,0,0]
plt.pie(counts,explode = explode,shadow=True, labels=labels, autopct = '%1.1f%%',colors=colors)
plt.legend(loc=2)
plt.axis('equal')
plt.show()
4、闪点图
#闪点图
#(均值,标准差,数量)
x=np.random.normal(0,1,100)
y=np.random.normal(0,1,100)
#alpha透明度
plt.scatter(x,y,alpha=0.1)
plt.show()