解压序列赋值多个变量
data=['AC',51,29.1,(2012,12,21)]
name,shares,price,date=data
name, shares, price, (year, mon, day) = data
如果变量个数和序列元素的个数不匹配,会产生一个异常。
可以使用任意变量名去占位,到时候丢掉这些变量就行了
_, shares, price, _ = data
解压可迭代对象赋值给多个变量
record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')
name,email,*phone_numbers=record
>>> phone_numbers
['773-555-1212', '847-555-1212']
迭代元素为可变长元祖的序列
records = [
('foo', 1, 2),
('bar', 'hello'),
('foo', 3, 4),
]
def do_foo(x, y):
print('foo', x, y)
def do_bar(s):
print('bar', s)
for tag, *args in records:
if tag == 'foo':
do_foo(*args)
elif tag == 'bar':
do_bar(*args)
星号解压语法在字符串操作
>>> line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
>>> uname, *fields, homedir, sh = line.split(':')
解压一些元素后丢弃,使用一个普通的废弃名称
>>> record = ('ACME', 50, 123.45, (12, 18, 2012))
>>> name, *_, (*_, year) = record
保留最后N个元素
collections.deque
使用 deque(maxlen=N) 构造函数会新建一个固定大小的队列。当新的元素加入并且这个队列已满的时候, 最老的元素会自动被移除掉。
from collections import deque
def search(lines, pattern, history=5):
previous_lines = deque(maxlen=history)
for line in lines:
if pattern in line:
yield line, previous_lines
previous_lines.append(line)
# Example use on a file
if __name__ == '__main__':
with open(r'../../cookbook/somefile.txt') as f:
for line, prevlines in search(f, 'python', 5):
for pline in prevlines:
print(pline, end='')
print(line, end='')
print('-' * 20)
q=deque()
q.append(1)
q.appendleft(4)
q.pop()
q.popleft()
查找最大或最小的N个元素
import heapq
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums)) # Prints [42, 37, 23]
print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]
portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
在一个集合中查找最大或者最小的N个元素
>>> nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
>>> import heapq
>>> heap = list(nums)
>>> heapq.heapify(heap)
>>> heap
[-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]
heapq.heappop(head) 将弹出剩余最小的元素
>>> heapq.heappop(heap)
-4
>>> heapq.heappop(heap)
1
>>> heapq.heappop(heap)
2
- 当要查找元素个数相对较小的时候,函数nlargest()和nsmallest()很合适
- 查询唯一的最小或最大的元素max()和min()
- 如果N的大小和集合大小接近的时候。通常先排序在切片 sorted(items)[:N]
实现一个队列的优先级
import heapq
class PriorityQueue:
def __init__(self):
self._queue = []
self._index = 0
def push(self, item, priority):
heapq.heappush(self._queue, (-priority, self._index, item))
self._index += 1
def pop(self):
return heapq.heappop(self._queue)[-1]
>>> class Item:
... def __init__(self, name):
... self.name = name
... def __repr__(self):
... return 'Item({!r})'.format(self.name)
...
>>> q = PriorityQueue()
>>> q.push(Item('foo'), 1)
>>> q.push(Item('bar'), 5)
>>> q.push(Item('spam'), 4)
>>> q.push(Item('grok'), 1)
>>> q.pop()
Item('bar')
>>> q.pop()
Item('spam')
>>> q.pop()
Item('foo')
>>> q.pop()
Item('grok')
>>>
pop() 操作返回优先级最高的元素,如果二个有着相同优先级的元素,pop 操作按照他们的顺序被插入到队列的顺序返回。
_index 变量的作用是保证同等优先级元素的正确排序
原理:
1.假定Item实例不支持排序:
>>> a = Item('foo')
>>> b = Item('bar')
>>> a < b
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unorderable types: Item() < Item()
>>>
2.使用元祖(priority, item),只要二个元素的优先级不同就能比较,优先级一样的话,比较就会出错。
>>> a = (1, Item('foo'))
>>> b = (5, Item('bar'))
>>> a < b
True
>>> c = (1, Item('grok'))
>>> a < c
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: unorderable types: Item() < Item()
>>>
3.引入index 变量组成三元组 (priority, index, item),就能很好的避免错误,因为不可能有二个元素有相同的index值。
>>> a = (1, 0, Item('foo'))
>>> b = (5, 1, Item('bar'))
>>> c = (1, 2, Item('grok'))
>>> a < b
True
>>> a < c
True
>>>
字典中的键映射多个值(multidict)
collections 模块中的 defaultdict 来构造这样的字典。 defaultdict 的一个特征是它会自动初始化每个 key 刚开始对应的值,所以你只需要关注添加元素操作了。
from collections import defaultdict
d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
defaultdict(<class 'list'>, {'a': [1, 2], 'b': [4]})
d = defaultdict(set)
d['a'].add(1)
d['a'].add(2)
d['b'].add(4)
defaultdict(<class 'set'>, {'a': {1, 2}, 'b': {4}})
d = {} # 一个普通的字典
d.setdefault('a', []).append(1)
d.setdefault('a', []).append(2)
d.setdefault('b', []).append(4)
{'a': [1, 2], 'b': [4]}
字典排序
1.保持元素的插入顺序
from collections import OrderedDict
from collections import OrderedDict
d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4
# Outputs "foo 1", "bar 2", "spam 3", "grok 4"
for key in d:
print(key, d[key])
OrderedDict 内部维护着一个根据键插入顺序排序的双向链表。
每次当一个新的元素插入进来的时候, 它会被放到链表的尾部。对于一个已经存在的键的重复赋值不会改变键的顺序。
字典的运算
zip()函数先将键和值反转过来
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
a=zip(prices.values(), prices.keys())
for i in a:
print(i)
(10.75, 'FB')
(45.23, 'ACME')
(205.55, 'IBM')
(612.78, 'AAPL')
(37.2, 'HPQ')
min_price = min(zip(prices.values(), prices.keys()))
# min_price is (10.75, 'FB')
max_price = max(zip(prices.values(), prices.keys()))
# max_price is (612.78, 'AAPL')
可以使用 zip() 和 sorted() 函数来排列字典数据:
prices_sorted = sorted(zip(prices.values(), prices.keys()))
# prices_sorted is [(10.75, 'FB'), (37.2, 'HPQ'),
# (45.23, 'ACME'), (205.55, 'IBM'),
# (612.78, 'AAPL')]
执行这些计算的时候,需要注意的是 zip() 函数创建的是一个只能访问一次的迭代器
prices_and_names = zip(prices.values(), prices.keys())
print(min(prices_and_names)) # OK
print(max(prices_and_names)) # ValueError: max() arg is an empty sequence
当多个实体拥有相同的值的时候,键会决定返回结果。 比如,在执行 min() 和 max() 操作的时候,如果恰巧最小或最大值有重复的,那么拥有最小或最大键的实体会返回:
>>> prices = { 'AAA' : 45.23, 'ZZZ': 45.23 }
>>> min(zip(prices.values(), prices.keys()))
(45.23, 'AAA')
>>> max(zip(prices.values(), prices.keys()))
(45.23, 'ZZZ')
>>>
查找俩字典的相同点(相同的键、相同的值)
a = {
'x' : 1,
'y' : 2,
'z' : 3
}
b = {
'w' : 10,
'x' : 11,
'y' : 2
}
# Find keys in common
a.keys() & b.keys() # { 'x', 'y' }
# Find keys in a that are not in b
a.keys() - b.keys() # { 'z' }
# Find (key,value) pairs in common
a.items() & b.items() # { ('y', 2) }
c = {key:a[key] for key in a.keys() - {'z', 'w'}}
# c is {'x': 1, 'y': 2}
删除序列相同元素并保持顺序