import functools
import re
import sys
URL_RE = re.compile(r"href=(?P<quote>[\"'])(?P<url>[^\1]+?)(?P=quote)", re.IGNORECASE)
flags = re.MULTILINE|re.IGNORECASE|re.DOTALL
H1_RE = re.compile(r"<h1>(?P<h1>.+?)</h1>", flags)
H2_RE = re.compile(r"<h2>(?P<h2>.+?)</h2>", flags)
def coroutine(function):
@functools.wraps(function)
def wrapper(*args, **kargs):
generator = function(*args, **kargs)
next(generator)
return generator
return wrapper
@coroutine
def reporter():
ignore = frozenset({"style.css", "index.html", "favicon.png"})
while True:
print("reporter before")
match = (yield)
print("reporter after")
if match is not None:
groups = match.groupdict()
if "url" in groups and groups['url'] not in ignore:
print(" URL: ", groups['url'])
elif "h1" in groups:
print(" H1: ", groups['h1'])
elif "h2" in groups:
print(" H2: ", groups['h2'])
@coroutine
def regex_matcher(reveiver, regex):
while True:
print("regex before")
text = (yield)
print("regex after")
for match in regex.finditer(text):
receiver.send(match)
receiver = reporter()
matchers = (regex_matcher(receiver, URL_RE),
regex_matcher(receiver, H1_RE),
regex_matcher(receiver, H2_RE))
try:
for file in sys.argv[1:]:
print(file)
html = open(file, encoding="utf-8").read()
for matcher in matchers:
matcher.send(html)
finally:
for matcher in matchers:
matcher.close()
receiver.close()
迭代器
带yield的函数,就是一个迭代器
import sys
def iters():
while True:
test = (yield)
if test == "a":
print("received a")
elif test == "b":
print("received b")
else:
print("others [" + str(test) + "] received")
args = sys.argv
sender_text = args[1] if len(args) > 1 else None
it = iters()
it.send(None) #这样,开始一个迭代器时,都必须先调用it.send(None),或者next(it),使迭代器走到yield那行代码,更好的办法,可以使用方法修饰器,见下
# next(it)
for i in range(10):
it.send(sender_text + str(i))
next(it)
启动一个迭代器,要么执行next(iterator), 要么iterator.send(None),这时,迭代器会自动走到yield这行,等待,当执行下一次next或者收到send(str)时,继续从这一行往下走
next其实就是send(None)
@方法修饰器
def next_iterator(function):
@functools.wraps(function)
def wrapper(*args, **kargs):
func = function(*args, **kargs)
next(func) #func.send(None)
return func
return wrapper
@next_iterator
def iters():
...... #代码见上