按特性对Python容器分类:
- 容器序列:list,tuple,deque(可存放任意类型)
- 扁平序列:str,bytes,bytearray,array.array(可for循环遍历,存放单一类型)
- 可变序列:list,deque,bytearray,array
- 不可变序列:str,tuple,bytes
abd继承关系
要自己实现一个具备某种功能的类,应该实现其功能对应的魔术方法:
if xxx in yyy: # __contains__
for i in xxx: # __iter__
a + b # __add__
a += b # __iadd__
a[::] # __getitem__
import numbers
class Group:
#支持切片操作
def __init__(self, group_name, company_name, staffs):
self.group_name = group_name
self.company_name = company_name
self.staffs = staffs
def __reversed__(self):
self.staffs.reverse()
# 实现切片功能
def __getitem__(self, item): # 传入的item实际上是一个slice对象,“:2”
cls = type(self) # 即Group
if isinstance(item, slice):
return cls(group_name=self.group_name, company_name=self.company_name, staffs=self.staffs[item])
elif isinstance(item, numbers.Integral):
return cls(group_name=self.group_name, company_name=self.company_name, staffs=[self.staffs[item]])
# 可以使用len函数获取其长度
def __len__(self):
return len(self.staffs)
# 可以迭代
def __iter__(self):
return iter(self.staffs)
# 可以使用in操作符判断元素是否在内
def __contains__(self, item):
if item in self.staffs:
return True
else:
return False
维护有序序列
在频繁插入数据、同时要求有序的场合,使用bisect、collections.deque效率比较高。
import bisect
from collections import deque
#用来处理已排序的序列,用来维持已排序的序列, 升序
#二分查找
inter_list = deque()
bisect.insort(inter_list, 3)
bisect.insort(inter_list, 2)
bisect.insort(inter_list, 5)
bisect.insort(inter_list, 1)
bisect.insort(inter_list, 6)
print(bisect.bisect_left(inter_list, 3))
print(inter_list) # 插入操作完成后即是一个有序序列
什么时候不应该使用列表?
array、deque:性能比list高很多(存储在一段连续的内存空间),但只能接收单一类型元素。
import array
my_array = array.array("i")
my_array.append(1)
my_array.append("abc") # 报错,只接收int类型
dict
dict属于MutableMapping类型(可修改映射),查看源码:
from collections.abc import Mapping, MutableMapping
from _collections_abc import __all__
a = {} # dict实现了MutableMapping中的魔法函数
print (isinstance(a, MutableMapping))
常用方法
a = {
"ywh1": {
"company": "aimei"
},
"ywh2": {
"company": "aimei2"
}
}
new_dict = a.copy() # 浅拷贝,如value中存放的是地址,则不会拷贝其所指的值
new_dict["ywh1"]["company"] = "aimei3"
new_list = ["ywh1", "ywh2"]
new_dict = dict.fromkeys(new_list, {"company": "aimei"})
new_dict.update((("ywh", "aimei"),))
new_dict.setdefault("hwy", {})
子类
- 不建议继承list和dict(这些是使用C实现的数据结构,可能不会执行继承后覆盖的父类方法);
- 如要扩展dict功能,可继承collections.UserDict。
from collections import UserDict
class Mydict(dict):
def __setitem__(self, key, value):
super().__setitem__(key, value*2)
my_dict = Mydict(one=1)
my_dict["one"] = 1
print (my_dict)
class Mydict(UserDict):
def __setitem__(self, key, value):
super().__setitem__(key, value*2)
my_dict = Mydict(one=1)
# my_dict["one"] = 1
print (my_dict)
from collections import defaultdict # 在找不到某个key时会调用__missing__方法,获取默认值
my_dict = defaultdict(dict)
my_value = my_dict["bobby"]
Set
常用方法
# set 集合 fronzenset (不可变集合) 无序, 不重复
# s = set('abcdee')
# s = set(['a','b','c','d','e'])
s = {'a','b', 'c'}
# s = frozenset("abcde") #frozenset 可以作为dict的key
# | & - #集合运算
another_set = set("cef")
re_set = s.difference(another_set)
re_set = s - another_set
re_set = s & another_set
re_set = s | another_set
# set性能很高
print(re_set)
print (s.issubset(re_set))
# if "c" in re_set:
# print ("i am in set")
性能测试
from random import randint
def load_list_data(total_nums, target_nums):
"""
从文件中读取数据,以list的方式返回
:param total_nums: 读取的数量
:param target_nums: 需要查询的数据的数量
"""
all_data = []
target_data = []
file_name = "G:/慕课网课程/AdvancePython/fbobject_idnew.txt"
with open(file_name, encoding="utf8", mode="r") as f_open:
for count, line in enumerate(f_open):
if count < total_nums:
all_data.append(line)
else:
break
for x in range(target_nums):
random_index = randint(0, total_nums)
if all_data[random_index] not in target_data:
target_data.append(all_data[random_index])
if len(target_data) == target_nums:
break
return all_data, target_data
def load_dict_data(total_nums, target_nums):
"""
从文件中读取数据,以dict的方式返回
:param total_nums: 读取的数量
:param target_nums: 需要查询的数据的数量
"""
all_data = {}
target_data = []
file_name = "G:/慕课网课程/AdvancePython/fbobject_idnew.txt"
with open(file_name, encoding="utf8", mode="r") as f_open:
for count, line in enumerate(f_open):
if count < total_nums:
all_data[line] = 0
else:
break
all_data_list = list(all_data)
for x in range(target_nums):
random_index = randint(0, total_nums-1)
if all_data_list[random_index] not in target_data:
target_data.append(all_data_list[random_index])
if len(target_data) == target_nums:
break
return all_data, target_data
def find_test(all_data, target_data):
#测试运行时间
test_times = 100
total_times = 0
import time
for i in range(test_times):
find = 0
start_time = time.time()
for data in target_data:
if data in all_data:
find += 1
last_time = time.time() - start_time
total_times += last_time
return total_times/test_times
if __name__ == "__main__":
# all_data, target_data = load_list_data(10000, 1000)
# all_data, target_data = load_list_data(100000, 1000)
# all_data, target_data = load_list_data(1000000, 1000)
# all_data, target_data = load_dict_data(10000, 1000)
# all_data, target_data = load_dict_data(100000, 1000)
# all_data, target_data = load_dict_data(1000000, 1000)
all_data, target_data = load_dict_data(2000000, 1000)
last_time = find_test(all_data, target_data)
#dict查找的性能远远大于list
#在list中随着list数据的增大 查找时间会增大
#在dict中查找元素不会随着dict的增大而增大
print(last_time)
# 1. dict的key或者set的值 都必须是可以hash的
#不可变对象 都是可hash的,str,fronzenset,tuple,自己实现的类 __hash__
# 2. dict的内存花销大,但是查询速度快, 自定义的对象 或者python内部的对象都是用dict包装的
# 3. dict的存储顺序和元素添加顺序有关
# 4. 添加数据有可能改变已有数据的顺序